It's not necessary to do rounding for alloca operations when the requested
alignment is equal to the stack alignment.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@40004 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/CodeGen/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter.cpp
new file mode 100644
index 0000000..586472c
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter.cpp
@@ -0,0 +1,1210 @@
+//===-- AsmPrinter.cpp - Common AsmPrinter code ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AsmPrinter class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Constants.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Streams.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cerrno>
+using namespace llvm;
+
+static cl::opt<bool>
+AsmVerbose("asm-verbose", cl::Hidden, cl::desc("Add comments to directives."));
+
+char AsmPrinter::ID = 0;
+AsmPrinter::AsmPrinter(std::ostream &o, TargetMachine &tm,
+                       const TargetAsmInfo *T)
+  : MachineFunctionPass((intptr_t)&ID), FunctionNumber(0), O(o), TM(tm), TAI(T)
+{}
+
+std::string AsmPrinter::getSectionForFunction(const Function &F) const {
+  return TAI->getTextSection();
+}
+
+
+/// SwitchToTextSection - Switch to the specified text section of the executable
+/// if we are not already in it!
+///
+void AsmPrinter::SwitchToTextSection(const char *NewSection,
+                                     const GlobalValue *GV) {
+  std::string NS;
+  if (GV && GV->hasSection())
+    NS = TAI->getSwitchToSectionDirective() + GV->getSection();
+  else
+    NS = NewSection;
+  
+  // If we're already in this section, we're done.
+  if (CurrentSection == NS) return;
+
+  // Close the current section, if applicable.
+  if (TAI->getSectionEndDirectiveSuffix() && !CurrentSection.empty())
+    O << CurrentSection << TAI->getSectionEndDirectiveSuffix() << "\n";
+
+  CurrentSection = NS;
+
+  if (!CurrentSection.empty())
+    O << CurrentSection << TAI->getTextSectionStartSuffix() << '\n';
+}
+
+/// SwitchToDataSection - Switch to the specified data section of the executable
+/// if we are not already in it!
+///
+void AsmPrinter::SwitchToDataSection(const char *NewSection,
+                                     const GlobalValue *GV) {
+  std::string NS;
+  if (GV && GV->hasSection())
+    NS = TAI->getSwitchToSectionDirective() + GV->getSection();
+  else
+    NS = NewSection;
+  
+  // If we're already in this section, we're done.
+  if (CurrentSection == NS) return;
+
+  // Close the current section, if applicable.
+  if (TAI->getSectionEndDirectiveSuffix() && !CurrentSection.empty())
+    O << CurrentSection << TAI->getSectionEndDirectiveSuffix() << "\n";
+
+  CurrentSection = NS;
+  
+  if (!CurrentSection.empty())
+    O << CurrentSection << TAI->getDataSectionStartSuffix() << '\n';
+}
+
+
+bool AsmPrinter::doInitialization(Module &M) {
+  Mang = new Mangler(M, TAI->getGlobalPrefix());
+  
+  if (!M.getModuleInlineAsm().empty())
+    O << TAI->getCommentString() << " Start of file scope inline assembly\n"
+      << M.getModuleInlineAsm()
+      << "\n" << TAI->getCommentString()
+      << " End of file scope inline assembly\n";
+
+  SwitchToDataSection("");   // Reset back to no section.
+  
+  if (MachineModuleInfo *MMI = getAnalysisToUpdate<MachineModuleInfo>()) {
+    MMI->AnalyzeModule(M);
+  }
+  
+  return false;
+}
+
+bool AsmPrinter::doFinalization(Module &M) {
+  if (TAI->getWeakRefDirective()) {
+    if (!ExtWeakSymbols.empty())
+      SwitchToDataSection("");
+
+    for (std::set<const GlobalValue*>::iterator i = ExtWeakSymbols.begin(),
+         e = ExtWeakSymbols.end(); i != e; ++i) {
+      const GlobalValue *GV = *i;
+      std::string Name = Mang->getValueName(GV);
+      O << TAI->getWeakRefDirective() << Name << "\n";
+    }
+  }
+
+  if (TAI->getSetDirective()) {
+    if (!M.alias_empty())
+      SwitchToTextSection(TAI->getTextSection());
+
+    O << "\n";
+    for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end();
+         I!=E; ++I) {
+      std::string Name = Mang->getValueName(I);
+      std::string Target;
+      
+      if (const GlobalValue *GV = I->getAliasedGlobal())
+        Target = Mang->getValueName(GV);
+      else
+        assert(0 && "Unsupported aliasee");
+      
+      if (I->hasExternalLinkage())
+        O << "\t.globl\t" << Name << "\n";
+      else if (I->hasWeakLinkage())
+        O << TAI->getWeakRefDirective() << Name << "\n";
+      else if (!I->hasInternalLinkage())
+        assert(0 && "Invalid alias linkage");
+      
+      O << TAI->getSetDirective() << Name << ", " << Target << "\n";
+    }
+  }
+
+  delete Mang; Mang = 0;
+  return false;
+}
+
+void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
+  // What's my mangled name?
+  CurrentFnName = Mang->getValueName(MF.getFunction());
+  IncrementFunctionNumber();
+}
+
+/// EmitConstantPool - Print to the current output stream assembly
+/// representations of the constants in the constant pool MCP. This is
+/// used to print out constants which have been "spilled to memory" by
+/// the code generator.
+///
+void AsmPrinter::EmitConstantPool(MachineConstantPool *MCP) {
+  const std::vector<MachineConstantPoolEntry> &CP = MCP->getConstants();
+  if (CP.empty()) return;
+
+  // Some targets require 4-, 8-, and 16- byte constant literals to be placed
+  // in special sections.
+  std::vector<std::pair<MachineConstantPoolEntry,unsigned> > FourByteCPs;
+  std::vector<std::pair<MachineConstantPoolEntry,unsigned> > EightByteCPs;
+  std::vector<std::pair<MachineConstantPoolEntry,unsigned> > SixteenByteCPs;
+  std::vector<std::pair<MachineConstantPoolEntry,unsigned> > OtherCPs;
+  std::vector<std::pair<MachineConstantPoolEntry,unsigned> > TargetCPs;
+  for (unsigned i = 0, e = CP.size(); i != e; ++i) {
+    MachineConstantPoolEntry CPE = CP[i];
+    const Type *Ty = CPE.getType();
+    if (TAI->getFourByteConstantSection() &&
+        TM.getTargetData()->getTypeSize(Ty) == 4)
+      FourByteCPs.push_back(std::make_pair(CPE, i));
+    else if (TAI->getEightByteConstantSection() &&
+             TM.getTargetData()->getTypeSize(Ty) == 8)
+      EightByteCPs.push_back(std::make_pair(CPE, i));
+    else if (TAI->getSixteenByteConstantSection() &&
+             TM.getTargetData()->getTypeSize(Ty) == 16)
+      SixteenByteCPs.push_back(std::make_pair(CPE, i));
+    else
+      OtherCPs.push_back(std::make_pair(CPE, i));
+  }
+
+  unsigned Alignment = MCP->getConstantPoolAlignment();
+  EmitConstantPool(Alignment, TAI->getFourByteConstantSection(), FourByteCPs);
+  EmitConstantPool(Alignment, TAI->getEightByteConstantSection(), EightByteCPs);
+  EmitConstantPool(Alignment, TAI->getSixteenByteConstantSection(),
+                   SixteenByteCPs);
+  EmitConstantPool(Alignment, TAI->getConstantPoolSection(), OtherCPs);
+}
+
+void AsmPrinter::EmitConstantPool(unsigned Alignment, const char *Section,
+               std::vector<std::pair<MachineConstantPoolEntry,unsigned> > &CP) {
+  if (CP.empty()) return;
+
+  SwitchToDataSection(Section);
+  EmitAlignment(Alignment);
+  for (unsigned i = 0, e = CP.size(); i != e; ++i) {
+    O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_'
+      << CP[i].second << ":\t\t\t\t\t" << TAI->getCommentString() << " ";
+    WriteTypeSymbolic(O, CP[i].first.getType(), 0) << '\n';
+    if (CP[i].first.isMachineConstantPoolEntry())
+      EmitMachineConstantPoolValue(CP[i].first.Val.MachineCPVal);
+     else
+      EmitGlobalConstant(CP[i].first.Val.ConstVal);
+    if (i != e-1) {
+      const Type *Ty = CP[i].first.getType();
+      unsigned EntSize =
+        TM.getTargetData()->getTypeSize(Ty);
+      unsigned ValEnd = CP[i].first.getOffset() + EntSize;
+      // Emit inter-object padding for alignment.
+      EmitZeros(CP[i+1].first.getOffset()-ValEnd);
+    }
+  }
+}
+
+/// EmitJumpTableInfo - Print assembly representations of the jump tables used
+/// by the current function to the current output stream.  
+///
+void AsmPrinter::EmitJumpTableInfo(MachineJumpTableInfo *MJTI,
+                                   MachineFunction &MF) {
+  const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+  if (JT.empty()) return;
+  bool IsPic = TM.getRelocationModel() == Reloc::PIC_;
+  
+  // Use JumpTableDirective otherwise honor the entry size from the jump table
+  // info.
+  const char *JTEntryDirective = TAI->getJumpTableDirective();
+  bool HadJTEntryDirective = JTEntryDirective != NULL;
+  if (!HadJTEntryDirective) {
+    JTEntryDirective = MJTI->getEntrySize() == 4 ?
+      TAI->getData32bitsDirective() : TAI->getData64bitsDirective();
+  }
+  
+  // Pick the directive to use to print the jump table entries, and switch to 
+  // the appropriate section.
+  TargetLowering *LoweringInfo = TM.getTargetLowering();
+
+  const char* JumpTableDataSection = TAI->getJumpTableDataSection();  
+  if ((IsPic && !(LoweringInfo && LoweringInfo->usesGlobalOffsetTable())) ||
+     !JumpTableDataSection) {
+    // In PIC mode, we need to emit the jump table to the same section as the
+    // function body itself, otherwise the label differences won't make sense.
+    // We should also do if the section name is NULL.
+    const Function *F = MF.getFunction();
+    SwitchToTextSection(getSectionForFunction(*F).c_str(), F);
+  } else {
+    SwitchToDataSection(JumpTableDataSection);
+  }
+  
+  EmitAlignment(Log2_32(MJTI->getAlignment()));
+  
+  for (unsigned i = 0, e = JT.size(); i != e; ++i) {
+    const std::vector<MachineBasicBlock*> &JTBBs = JT[i].MBBs;
+    
+    // If this jump table was deleted, ignore it. 
+    if (JTBBs.empty()) continue;
+
+    // For PIC codegen, if possible we want to use the SetDirective to reduce
+    // the number of relocations the assembler will generate for the jump table.
+    // Set directives are all printed before the jump table itself.
+    std::set<MachineBasicBlock*> EmittedSets;
+    if (TAI->getSetDirective() && IsPic)
+      for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii)
+        if (EmittedSets.insert(JTBBs[ii]).second)
+          printSetLabel(i, JTBBs[ii]);
+    
+    // On some targets (e.g. darwin) we want to emit two consequtive labels
+    // before each jump table.  The first label is never referenced, but tells
+    // the assembler and linker the extents of the jump table object.  The
+    // second label is actually referenced by the code.
+    if (const char *JTLabelPrefix = TAI->getJumpTableSpecialLabelPrefix())
+      O << JTLabelPrefix << "JTI" << getFunctionNumber() << '_' << i << ":\n";
+    
+    O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() 
+      << '_' << i << ":\n";
+    
+    for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) {
+      O << JTEntryDirective << ' ';
+      // If we have emitted set directives for the jump table entries, print 
+      // them rather than the entries themselves.  If we're emitting PIC, then
+      // emit the table entries as differences between two text section labels.
+      // If we're emitting non-PIC code, then emit the entries as direct
+      // references to the target basic blocks.
+      if (!EmittedSets.empty()) {
+        O << TAI->getPrivateGlobalPrefix() << getFunctionNumber()
+          << '_' << i << "_set_" << JTBBs[ii]->getNumber();
+      } else if (IsPic) {
+        printBasicBlockLabel(JTBBs[ii], false, false);
+        // If the arch uses custom Jump Table directives, don't calc relative to
+        // JT
+        if (!HadJTEntryDirective) 
+          O << '-' << TAI->getPrivateGlobalPrefix() << "JTI"
+            << getFunctionNumber() << '_' << i;
+      } else {
+        printBasicBlockLabel(JTBBs[ii], false, false);
+      }
+      O << '\n';
+    }
+  }
+}
+
+/// EmitSpecialLLVMGlobal - Check to see if the specified global is a
+/// special global used by LLVM.  If so, emit it and return true, otherwise
+/// do nothing and return false.
+bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
+  // Ignore debug and non-emitted data.
+  if (GV->getSection() == "llvm.metadata") return true;
+  
+  if (!GV->hasAppendingLinkage()) return false;
+
+  assert(GV->hasInitializer() && "Not a special LLVM global!");
+  
+  if (GV->getName() == "llvm.used") {
+    if (TAI->getUsedDirective() != 0)    // No need to emit this at all.
+      EmitLLVMUsedList(GV->getInitializer());
+    return true;
+  }
+
+  const TargetData *TD = TM.getTargetData();
+  unsigned Align = Log2_32(TD->getPointerPrefAlignment());
+  if (GV->getName() == "llvm.global_ctors" && GV->use_empty()) {
+    SwitchToDataSection(TAI->getStaticCtorsSection());
+    EmitAlignment(Align, 0);
+    EmitXXStructorList(GV->getInitializer());
+    return true;
+  } 
+  
+  if (GV->getName() == "llvm.global_dtors" && GV->use_empty()) {
+    SwitchToDataSection(TAI->getStaticDtorsSection());
+    EmitAlignment(Align, 0);
+    EmitXXStructorList(GV->getInitializer());
+    return true;
+  }
+  
+  return false;
+}
+
+/// EmitLLVMUsedList - For targets that define a TAI::UsedDirective, mark each
+/// global in the specified llvm.used list as being used with this directive.
+void AsmPrinter::EmitLLVMUsedList(Constant *List) {
+  const char *Directive = TAI->getUsedDirective();
+
+  // Should be an array of 'sbyte*'.
+  ConstantArray *InitList = dyn_cast<ConstantArray>(List);
+  if (InitList == 0) return;
+  
+  for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
+    O << Directive;
+    EmitConstantValueOnly(InitList->getOperand(i));
+    O << "\n";
+  }
+}
+
+/// EmitXXStructorList - Emit the ctor or dtor list.  This just prints out the 
+/// function pointers, ignoring the init priority.
+void AsmPrinter::EmitXXStructorList(Constant *List) {
+  // Should be an array of '{ int, void ()* }' structs.  The first value is the
+  // init priority, which we ignore.
+  if (!isa<ConstantArray>(List)) return;
+  ConstantArray *InitList = cast<ConstantArray>(List);
+  for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
+    if (ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i))){
+      if (CS->getNumOperands() != 2) return;  // Not array of 2-element structs.
+
+      if (CS->getOperand(1)->isNullValue())
+        return;  // Found a null terminator, exit printing.
+      // Emit the function pointer.
+      EmitGlobalConstant(CS->getOperand(1));
+    }
+}
+
+/// getGlobalLinkName - Returns the asm/link name of of the specified
+/// global variable.  Should be overridden by each target asm printer to
+/// generate the appropriate value.
+const std::string AsmPrinter::getGlobalLinkName(const GlobalVariable *GV) const{
+  std::string LinkName;
+  
+  if (isa<Function>(GV)) {
+    LinkName += TAI->getFunctionAddrPrefix();
+    LinkName += Mang->getValueName(GV);
+    LinkName += TAI->getFunctionAddrSuffix();
+  } else {
+    LinkName += TAI->getGlobalVarAddrPrefix();
+    LinkName += Mang->getValueName(GV);
+    LinkName += TAI->getGlobalVarAddrSuffix();
+  }  
+  
+  return LinkName;
+}
+
+/// EmitExternalGlobal - Emit the external reference to a global variable.
+/// Should be overridden if an indirect reference should be used.
+void AsmPrinter::EmitExternalGlobal(const GlobalVariable *GV) {
+  O << getGlobalLinkName(GV);
+}
+
+
+
+//===----------------------------------------------------------------------===//
+/// LEB 128 number encoding.
+
+/// PrintULEB128 - Print a series of hexidecimal values (separated by commas)
+/// representing an unsigned leb128 value.
+void AsmPrinter::PrintULEB128(unsigned Value) const {
+  do {
+    unsigned Byte = Value & 0x7f;
+    Value >>= 7;
+    if (Value) Byte |= 0x80;
+    O << "0x" << std::hex << Byte << std::dec;
+    if (Value) O << ", ";
+  } while (Value);
+}
+
+/// SizeULEB128 - Compute the number of bytes required for an unsigned leb128
+/// value.
+unsigned AsmPrinter::SizeULEB128(unsigned Value) {
+  unsigned Size = 0;
+  do {
+    Value >>= 7;
+    Size += sizeof(int8_t);
+  } while (Value);
+  return Size;
+}
+
+/// PrintSLEB128 - Print a series of hexidecimal values (separated by commas)
+/// representing a signed leb128 value.
+void AsmPrinter::PrintSLEB128(int Value) const {
+  int Sign = Value >> (8 * sizeof(Value) - 1);
+  bool IsMore;
+  
+  do {
+    unsigned Byte = Value & 0x7f;
+    Value >>= 7;
+    IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0;
+    if (IsMore) Byte |= 0x80;
+    O << "0x" << std::hex << Byte << std::dec;
+    if (IsMore) O << ", ";
+  } while (IsMore);
+}
+
+/// SizeSLEB128 - Compute the number of bytes required for a signed leb128
+/// value.
+unsigned AsmPrinter::SizeSLEB128(int Value) {
+  unsigned Size = 0;
+  int Sign = Value >> (8 * sizeof(Value) - 1);
+  bool IsMore;
+  
+  do {
+    unsigned Byte = Value & 0x7f;
+    Value >>= 7;
+    IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0;
+    Size += sizeof(int8_t);
+  } while (IsMore);
+  return Size;
+}
+
+//===--------------------------------------------------------------------===//
+// Emission and print routines
+//
+
+/// PrintHex - Print a value as a hexidecimal value.
+///
+void AsmPrinter::PrintHex(int Value) const { 
+  O << "0x" << std::hex << Value << std::dec;
+}
+
+/// EOL - Print a newline character to asm stream.  If a comment is present
+/// then it will be printed first.  Comments should not contain '\n'.
+void AsmPrinter::EOL() const {
+  O << "\n";
+}
+void AsmPrinter::EOL(const std::string &Comment) const {
+  if (AsmVerbose && !Comment.empty()) {
+    O << "\t"
+      << TAI->getCommentString()
+      << " "
+      << Comment;
+  }
+  O << "\n";
+}
+
+/// EmitULEB128Bytes - Emit an assembler byte data directive to compose an
+/// unsigned leb128 value.
+void AsmPrinter::EmitULEB128Bytes(unsigned Value) const {
+  if (TAI->hasLEB128()) {
+    O << "\t.uleb128\t"
+      << Value;
+  } else {
+    O << TAI->getData8bitsDirective();
+    PrintULEB128(Value);
+  }
+}
+
+/// EmitSLEB128Bytes - print an assembler byte data directive to compose a
+/// signed leb128 value.
+void AsmPrinter::EmitSLEB128Bytes(int Value) const {
+  if (TAI->hasLEB128()) {
+    O << "\t.sleb128\t"
+      << Value;
+  } else {
+    O << TAI->getData8bitsDirective();
+    PrintSLEB128(Value);
+  }
+}
+
+/// EmitInt8 - Emit a byte directive and value.
+///
+void AsmPrinter::EmitInt8(int Value) const {
+  O << TAI->getData8bitsDirective();
+  PrintHex(Value & 0xFF);
+}
+
+/// EmitInt16 - Emit a short directive and value.
+///
+void AsmPrinter::EmitInt16(int Value) const {
+  O << TAI->getData16bitsDirective();
+  PrintHex(Value & 0xFFFF);
+}
+
+/// EmitInt32 - Emit a long directive and value.
+///
+void AsmPrinter::EmitInt32(int Value) const {
+  O << TAI->getData32bitsDirective();
+  PrintHex(Value);
+}
+
+/// EmitInt64 - Emit a long long directive and value.
+///
+void AsmPrinter::EmitInt64(uint64_t Value) const {
+  if (TAI->getData64bitsDirective()) {
+    O << TAI->getData64bitsDirective();
+    PrintHex(Value);
+  } else {
+    if (TM.getTargetData()->isBigEndian()) {
+      EmitInt32(unsigned(Value >> 32)); O << "\n";
+      EmitInt32(unsigned(Value));
+    } else {
+      EmitInt32(unsigned(Value)); O << "\n";
+      EmitInt32(unsigned(Value >> 32));
+    }
+  }
+}
+
+/// toOctal - Convert the low order bits of X into an octal digit.
+///
+static inline char toOctal(int X) {
+  return (X&7)+'0';
+}
+
+/// printStringChar - Print a char, escaped if necessary.
+///
+static void printStringChar(std::ostream &O, unsigned char C) {
+  if (C == '"') {
+    O << "\\\"";
+  } else if (C == '\\') {
+    O << "\\\\";
+  } else if (isprint(C)) {
+    O << C;
+  } else {
+    switch(C) {
+    case '\b': O << "\\b"; break;
+    case '\f': O << "\\f"; break;
+    case '\n': O << "\\n"; break;
+    case '\r': O << "\\r"; break;
+    case '\t': O << "\\t"; break;
+    default:
+      O << '\\';
+      O << toOctal(C >> 6);
+      O << toOctal(C >> 3);
+      O << toOctal(C >> 0);
+      break;
+    }
+  }
+}
+
+/// EmitString - Emit a string with quotes and a null terminator.
+/// Special characters are emitted properly.
+/// \literal (Eg. '\t') \endliteral
+void AsmPrinter::EmitString(const std::string &String) const {
+  const char* AscizDirective = TAI->getAscizDirective();
+  if (AscizDirective)
+    O << AscizDirective;
+  else
+    O << TAI->getAsciiDirective();
+  O << "\"";
+  for (unsigned i = 0, N = String.size(); i < N; ++i) {
+    unsigned char C = String[i];
+    printStringChar(O, C);
+  }
+  if (AscizDirective)
+    O << "\"";
+  else
+    O << "\\0\"";
+}
+
+
+//===----------------------------------------------------------------------===//
+
+// EmitAlignment - Emit an alignment directive to the specified power of
+// two boundary.  For example, if you pass in 3 here, you will get an 8
+// byte alignment.  If a global value is specified, and if that global has
+// an explicit alignment requested, it will unconditionally override the
+// alignment request.  However, if ForcedAlignBits is specified, this value
+// has final say: the ultimate alignment will be the max of ForcedAlignBits
+// and the alignment computed with NumBits and the global.
+//
+// The algorithm is:
+//     Align = NumBits;
+//     if (GV && GV->hasalignment) Align = GV->getalignment();
+//     Align = std::max(Align, ForcedAlignBits);
+//
+void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV,
+                               unsigned ForcedAlignBits) const {
+  if (GV && GV->getAlignment())
+    NumBits = Log2_32(GV->getAlignment());
+  NumBits = std::max(NumBits, ForcedAlignBits);
+  
+  if (NumBits == 0) return;   // No need to emit alignment.
+  if (TAI->getAlignmentIsInBytes()) NumBits = 1 << NumBits;
+  O << TAI->getAlignDirective() << NumBits << "\n";
+}
+
+    
+/// EmitZeros - Emit a block of zeros.
+///
+void AsmPrinter::EmitZeros(uint64_t NumZeros) const {
+  if (NumZeros) {
+    if (TAI->getZeroDirective()) {
+      O << TAI->getZeroDirective() << NumZeros;
+      if (TAI->getZeroDirectiveSuffix())
+        O << TAI->getZeroDirectiveSuffix();
+      O << "\n";
+    } else {
+      for (; NumZeros; --NumZeros)
+        O << TAI->getData8bitsDirective() << "0\n";
+    }
+  }
+}
+
+// Print out the specified constant, without a storage class.  Only the
+// constants valid in constant expressions can occur here.
+void AsmPrinter::EmitConstantValueOnly(const Constant *CV) {
+  if (CV->isNullValue() || isa<UndefValue>(CV))
+    O << "0";
+  else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+    O << CI->getZExtValue();
+  } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
+    // This is a constant address for a global variable or function. Use the
+    // name of the variable or function as the address value, possibly
+    // decorating it with GlobalVarAddrPrefix/Suffix or
+    // FunctionAddrPrefix/Suffix (these all default to "" )
+    if (isa<Function>(GV)) {
+      O << TAI->getFunctionAddrPrefix()
+        << Mang->getValueName(GV)
+        << TAI->getFunctionAddrSuffix();
+    } else {
+      O << TAI->getGlobalVarAddrPrefix()
+        << Mang->getValueName(GV)
+        << TAI->getGlobalVarAddrSuffix();
+    }
+  } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
+    const TargetData *TD = TM.getTargetData();
+    unsigned Opcode = CE->getOpcode();    
+    switch (Opcode) {
+    case Instruction::GetElementPtr: {
+      // generate a symbolic expression for the byte address
+      const Constant *ptrVal = CE->getOperand(0);
+      SmallVector<Value*, 8> idxVec(CE->op_begin()+1, CE->op_end());
+      if (int64_t Offset = TD->getIndexedOffset(ptrVal->getType(), &idxVec[0],
+                                                idxVec.size())) {
+        if (Offset)
+          O << "(";
+        EmitConstantValueOnly(ptrVal);
+        if (Offset > 0)
+          O << ") + " << Offset;
+        else if (Offset < 0)
+          O << ") - " << -Offset;
+      } else {
+        EmitConstantValueOnly(ptrVal);
+      }
+      break;
+    }
+    case Instruction::Trunc:
+    case Instruction::ZExt:
+    case Instruction::SExt:
+    case Instruction::FPTrunc:
+    case Instruction::FPExt:
+    case Instruction::UIToFP:
+    case Instruction::SIToFP:
+    case Instruction::FPToUI:
+    case Instruction::FPToSI:
+      assert(0 && "FIXME: Don't yet support this kind of constant cast expr");
+      break;
+    case Instruction::BitCast:
+      return EmitConstantValueOnly(CE->getOperand(0));
+
+    case Instruction::IntToPtr: {
+      // Handle casts to pointers by changing them into casts to the appropriate
+      // integer type.  This promotes constant folding and simplifies this code.
+      Constant *Op = CE->getOperand(0);
+      Op = ConstantExpr::getIntegerCast(Op, TD->getIntPtrType(), false/*ZExt*/);
+      return EmitConstantValueOnly(Op);
+    }
+      
+      
+    case Instruction::PtrToInt: {
+      // Support only foldable casts to/from pointers that can be eliminated by
+      // changing the pointer to the appropriately sized integer type.
+      Constant *Op = CE->getOperand(0);
+      const Type *Ty = CE->getType();
+
+      // We can emit the pointer value into this slot if the slot is an
+      // integer slot greater or equal to the size of the pointer.
+      if (Ty->isInteger() &&
+          TD->getTypeSize(Ty) >= TD->getTypeSize(Op->getType()))
+        return EmitConstantValueOnly(Op);
+      
+      assert(0 && "FIXME: Don't yet support this kind of constant cast expr");
+      EmitConstantValueOnly(Op);
+      break;
+    }
+    case Instruction::Add:
+    case Instruction::Sub:
+      O << "(";
+      EmitConstantValueOnly(CE->getOperand(0));
+      O << (Opcode==Instruction::Add ? ") + (" : ") - (");
+      EmitConstantValueOnly(CE->getOperand(1));
+      O << ")";
+      break;
+    default:
+      assert(0 && "Unsupported operator!");
+    }
+  } else {
+    assert(0 && "Unknown constant value!");
+  }
+}
+
+/// printAsCString - Print the specified array as a C compatible string, only if
+/// the predicate isString is true.
+///
+static void printAsCString(std::ostream &O, const ConstantArray *CVA,
+                           unsigned LastElt) {
+  assert(CVA->isString() && "Array is not string compatible!");
+
+  O << "\"";
+  for (unsigned i = 0; i != LastElt; ++i) {
+    unsigned char C =
+        (unsigned char)cast<ConstantInt>(CVA->getOperand(i))->getZExtValue();
+    printStringChar(O, C);
+  }
+  O << "\"";
+}
+
+/// EmitString - Emit a zero-byte-terminated string constant.
+///
+void AsmPrinter::EmitString(const ConstantArray *CVA) const {
+  unsigned NumElts = CVA->getNumOperands();
+  if (TAI->getAscizDirective() && NumElts && 
+      cast<ConstantInt>(CVA->getOperand(NumElts-1))->getZExtValue() == 0) {
+    O << TAI->getAscizDirective();
+    printAsCString(O, CVA, NumElts-1);
+  } else {
+    O << TAI->getAsciiDirective();
+    printAsCString(O, CVA, NumElts);
+  }
+  O << "\n";
+}
+
+/// EmitGlobalConstant - Print a general LLVM constant to the .s file.
+///
+void AsmPrinter::EmitGlobalConstant(const Constant *CV) {
+  const TargetData *TD = TM.getTargetData();
+
+  if (CV->isNullValue() || isa<UndefValue>(CV)) {
+    EmitZeros(TD->getTypeSize(CV->getType()));
+    return;
+  } else if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV)) {
+    if (CVA->isString()) {
+      EmitString(CVA);
+    } else { // Not a string.  Print the values in successive locations
+      for (unsigned i = 0, e = CVA->getNumOperands(); i != e; ++i)
+        EmitGlobalConstant(CVA->getOperand(i));
+    }
+    return;
+  } else if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) {
+    // Print the fields in successive locations. Pad to align if needed!
+    const StructLayout *cvsLayout = TD->getStructLayout(CVS->getType());
+    uint64_t sizeSoFar = 0;
+    for (unsigned i = 0, e = CVS->getNumOperands(); i != e; ++i) {
+      const Constant* field = CVS->getOperand(i);
+
+      // Check if padding is needed and insert one or more 0s.
+      uint64_t fieldSize = TD->getTypeSize(field->getType());
+      uint64_t padSize = ((i == e-1? cvsLayout->getSizeInBytes()
+                           : cvsLayout->getElementOffset(i+1))
+                          - cvsLayout->getElementOffset(i)) - fieldSize;
+      sizeSoFar += fieldSize + padSize;
+
+      // Now print the actual field value
+      EmitGlobalConstant(field);
+
+      // Insert the field padding unless it's zero bytes...
+      EmitZeros(padSize);
+    }
+    assert(sizeSoFar == cvsLayout->getSizeInBytes() &&
+           "Layout of constant struct may be incorrect!");
+    return;
+  } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
+    // FP Constants are printed as integer constants to avoid losing
+    // precision...
+    double Val = CFP->getValue();
+    if (CFP->getType() == Type::DoubleTy) {
+      if (TAI->getData64bitsDirective())
+        O << TAI->getData64bitsDirective() << DoubleToBits(Val) << "\t"
+          << TAI->getCommentString() << " double value: " << Val << "\n";
+      else if (TD->isBigEndian()) {
+        O << TAI->getData32bitsDirective() << unsigned(DoubleToBits(Val) >> 32)
+          << "\t" << TAI->getCommentString()
+          << " double most significant word " << Val << "\n";
+        O << TAI->getData32bitsDirective() << unsigned(DoubleToBits(Val))
+          << "\t" << TAI->getCommentString()
+          << " double least significant word " << Val << "\n";
+      } else {
+        O << TAI->getData32bitsDirective() << unsigned(DoubleToBits(Val))
+          << "\t" << TAI->getCommentString()
+          << " double least significant word " << Val << "\n";
+        O << TAI->getData32bitsDirective() << unsigned(DoubleToBits(Val) >> 32)
+          << "\t" << TAI->getCommentString()
+          << " double most significant word " << Val << "\n";
+      }
+      return;
+    } else {
+      O << TAI->getData32bitsDirective() << FloatToBits(Val)
+        << "\t" << TAI->getCommentString() << " float " << Val << "\n";
+      return;
+    }
+  } else if (CV->getType() == Type::Int64Ty) {
+    if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+      uint64_t Val = CI->getZExtValue();
+
+      if (TAI->getData64bitsDirective())
+        O << TAI->getData64bitsDirective() << Val << "\n";
+      else if (TD->isBigEndian()) {
+        O << TAI->getData32bitsDirective() << unsigned(Val >> 32)
+          << "\t" << TAI->getCommentString()
+          << " Double-word most significant word " << Val << "\n";
+        O << TAI->getData32bitsDirective() << unsigned(Val)
+          << "\t" << TAI->getCommentString()
+          << " Double-word least significant word " << Val << "\n";
+      } else {
+        O << TAI->getData32bitsDirective() << unsigned(Val)
+          << "\t" << TAI->getCommentString()
+          << " Double-word least significant word " << Val << "\n";
+        O << TAI->getData32bitsDirective() << unsigned(Val >> 32)
+          << "\t" << TAI->getCommentString()
+          << " Double-word most significant word " << Val << "\n";
+      }
+      return;
+    }
+  } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) {
+    const VectorType *PTy = CP->getType();
+    
+    for (unsigned I = 0, E = PTy->getNumElements(); I < E; ++I)
+      EmitGlobalConstant(CP->getOperand(I));
+    
+    return;
+  }
+
+  const Type *type = CV->getType();
+  printDataDirective(type);
+  EmitConstantValueOnly(CV);
+  O << "\n";
+}
+
+void
+AsmPrinter::EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
+  // Target doesn't support this yet!
+  abort();
+}
+
+/// PrintSpecial - Print information related to the specified machine instr
+/// that is independent of the operand, and may be independent of the instr
+/// itself.  This can be useful for portably encoding the comment character
+/// or other bits of target-specific knowledge into the asmstrings.  The
+/// syntax used is ${:comment}.  Targets can override this to add support
+/// for their own strange codes.
+void AsmPrinter::PrintSpecial(const MachineInstr *MI, const char *Code) {
+  if (!strcmp(Code, "private")) {
+    O << TAI->getPrivateGlobalPrefix();
+  } else if (!strcmp(Code, "comment")) {
+    O << TAI->getCommentString();
+  } else if (!strcmp(Code, "uid")) {
+    // Assign a unique ID to this machine instruction.
+    static const MachineInstr *LastMI = 0;
+    static const Function *F = 0;
+    static unsigned Counter = 0U-1;
+
+    // Comparing the address of MI isn't sufficient, because machineinstrs may
+    // be allocated to the same address across functions.
+    const Function *ThisF = MI->getParent()->getParent()->getFunction();
+    
+    // If this is a new machine instruction, bump the counter.
+    if (LastMI != MI || F != ThisF) {
+      ++Counter;
+      LastMI = MI;
+      F = ThisF;
+    }
+    O << Counter;
+  } else {
+    cerr << "Unknown special formatter '" << Code
+         << "' for machine instr: " << *MI;
+    exit(1);
+  }    
+}
+
+
+/// printInlineAsm - This method formats and prints the specified machine
+/// instruction that is an inline asm.
+void AsmPrinter::printInlineAsm(const MachineInstr *MI) const {
+  unsigned NumOperands = MI->getNumOperands();
+  
+  // Count the number of register definitions.
+  unsigned NumDefs = 0;
+  for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef();
+       ++NumDefs)
+    assert(NumDefs != NumOperands-1 && "No asm string?");
+  
+  assert(MI->getOperand(NumDefs).isExternalSymbol() && "No asm string?");
+
+  // Disassemble the AsmStr, printing out the literal pieces, the operands, etc.
+  const char *AsmStr = MI->getOperand(NumDefs).getSymbolName();
+
+  // If this asmstr is empty, don't bother printing the #APP/#NOAPP markers.
+  if (AsmStr[0] == 0) {
+    O << "\n";  // Tab already printed, avoid double indenting next instr.
+    return;
+  }
+  
+  O << TAI->getInlineAsmStart() << "\n\t";
+
+  // The variant of the current asmprinter.
+  int AsmPrinterVariant = TAI->getAssemblerDialect();
+
+  int CurVariant = -1;            // The number of the {.|.|.} region we are in.
+  const char *LastEmitted = AsmStr; // One past the last character emitted.
+  
+  while (*LastEmitted) {
+    switch (*LastEmitted) {
+    default: {
+      // Not a special case, emit the string section literally.
+      const char *LiteralEnd = LastEmitted+1;
+      while (*LiteralEnd && *LiteralEnd != '{' && *LiteralEnd != '|' &&
+             *LiteralEnd != '}' && *LiteralEnd != '$' && *LiteralEnd != '\n')
+        ++LiteralEnd;
+      if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
+        O.write(LastEmitted, LiteralEnd-LastEmitted);
+      LastEmitted = LiteralEnd;
+      break;
+    }
+    case '\n':
+      ++LastEmitted;   // Consume newline character.
+      O << "\n";       // Indent code with newline.
+      break;
+    case '$': {
+      ++LastEmitted;   // Consume '$' character.
+      bool Done = true;
+
+      // Handle escapes.
+      switch (*LastEmitted) {
+      default: Done = false; break;
+      case '$':     // $$ -> $
+        if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
+          O << '$';
+        ++LastEmitted;  // Consume second '$' character.
+        break;
+      case '(':             // $( -> same as GCC's { character.
+        ++LastEmitted;      // Consume '(' character.
+        if (CurVariant != -1) {
+          cerr << "Nested variants found in inline asm string: '"
+               << AsmStr << "'\n";
+          exit(1);
+        }
+        CurVariant = 0;     // We're in the first variant now.
+        break;
+      case '|':
+        ++LastEmitted;  // consume '|' character.
+        if (CurVariant == -1) {
+          cerr << "Found '|' character outside of variant in inline asm "
+               << "string: '" << AsmStr << "'\n";
+          exit(1);
+        }
+        ++CurVariant;   // We're in the next variant.
+        break;
+      case ')':         // $) -> same as GCC's } char.
+        ++LastEmitted;  // consume ')' character.
+        if (CurVariant == -1) {
+          cerr << "Found '}' character outside of variant in inline asm "
+               << "string: '" << AsmStr << "'\n";
+          exit(1);
+        }
+        CurVariant = -1;
+        break;
+      }
+      if (Done) break;
+      
+      bool HasCurlyBraces = false;
+      if (*LastEmitted == '{') {     // ${variable}
+        ++LastEmitted;               // Consume '{' character.
+        HasCurlyBraces = true;
+      }
+      
+      const char *IDStart = LastEmitted;
+      char *IDEnd;
+      errno = 0;
+      long Val = strtol(IDStart, &IDEnd, 10); // We only accept numbers for IDs.
+      if (!isdigit(*IDStart) || (Val == 0 && errno == EINVAL)) {
+        cerr << "Bad $ operand number in inline asm string: '" 
+             << AsmStr << "'\n";
+        exit(1);
+      }
+      LastEmitted = IDEnd;
+      
+      char Modifier[2] = { 0, 0 };
+      
+      if (HasCurlyBraces) {
+        // If we have curly braces, check for a modifier character.  This
+        // supports syntax like ${0:u}, which correspond to "%u0" in GCC asm.
+        if (*LastEmitted == ':') {
+          ++LastEmitted;    // Consume ':' character.
+          if (*LastEmitted == 0) {
+            cerr << "Bad ${:} expression in inline asm string: '" 
+                 << AsmStr << "'\n";
+            exit(1);
+          }
+          
+          Modifier[0] = *LastEmitted;
+          ++LastEmitted;    // Consume modifier character.
+        }
+        
+        if (*LastEmitted != '}') {
+          cerr << "Bad ${} expression in inline asm string: '" 
+               << AsmStr << "'\n";
+          exit(1);
+        }
+        ++LastEmitted;    // Consume '}' character.
+      }
+      
+      if ((unsigned)Val >= NumOperands-1) {
+        cerr << "Invalid $ operand number in inline asm string: '" 
+             << AsmStr << "'\n";
+        exit(1);
+      }
+      
+      // Okay, we finally have a value number.  Ask the target to print this
+      // operand!
+      if (CurVariant == -1 || CurVariant == AsmPrinterVariant) {
+        unsigned OpNo = 1;
+
+        bool Error = false;
+
+        // Scan to find the machine operand number for the operand.
+        for (; Val; --Val) {
+          if (OpNo >= MI->getNumOperands()) break;
+          unsigned OpFlags = MI->getOperand(OpNo).getImmedValue();
+          OpNo += (OpFlags >> 3) + 1;
+        }
+
+        if (OpNo >= MI->getNumOperands()) {
+          Error = true;
+        } else {
+          unsigned OpFlags = MI->getOperand(OpNo).getImmedValue();
+          ++OpNo;  // Skip over the ID number.
+
+          AsmPrinter *AP = const_cast<AsmPrinter*>(this);
+          if ((OpFlags & 7) == 4 /*ADDR MODE*/) {
+            Error = AP->PrintAsmMemoryOperand(MI, OpNo, AsmPrinterVariant,
+                                              Modifier[0] ? Modifier : 0);
+          } else {
+            Error = AP->PrintAsmOperand(MI, OpNo, AsmPrinterVariant,
+                                        Modifier[0] ? Modifier : 0);
+          }
+        }
+        if (Error) {
+          cerr << "Invalid operand found in inline asm: '"
+               << AsmStr << "'\n";
+          MI->dump();
+          exit(1);
+        }
+      }
+      break;
+    }
+    }
+  }
+  O << "\n\t" << TAI->getInlineAsmEnd() << "\n";
+}
+
+/// printLabel - This method prints a local label used by debug and
+/// exception handling tables.
+void AsmPrinter::printLabel(const MachineInstr *MI) const {
+  O << "\n"
+    << TAI->getPrivateGlobalPrefix()
+    << "label"
+    << MI->getOperand(0).getImmedValue()
+    << ":\n";
+}
+
+/// PrintAsmOperand - Print the specified operand of MI, an INLINEASM
+/// instruction, using the specified assembler variant.  Targets should
+/// overried this to format as appropriate.
+bool AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                                 unsigned AsmVariant, const char *ExtraCode) {
+  // Target doesn't support this yet!
+  return true;
+}
+
+bool AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+                                       unsigned AsmVariant,
+                                       const char *ExtraCode) {
+  // Target doesn't support this yet!
+  return true;
+}
+
+/// printBasicBlockLabel - This method prints the label for the specified
+/// MachineBasicBlock
+void AsmPrinter::printBasicBlockLabel(const MachineBasicBlock *MBB,
+                                      bool printColon,
+                                      bool printComment) const {
+  O << TAI->getPrivateGlobalPrefix() << "BB" << FunctionNumber << "_"
+    << MBB->getNumber();
+  if (printColon)
+    O << ':';
+  if (printComment && MBB->getBasicBlock())
+    O << '\t' << TAI->getCommentString() << MBB->getBasicBlock()->getName();
+}
+
+/// printSetLabel - This method prints a set label for the specified
+/// MachineBasicBlock
+void AsmPrinter::printSetLabel(unsigned uid, 
+                               const MachineBasicBlock *MBB) const {
+  if (!TAI->getSetDirective())
+    return;
+  
+  O << TAI->getSetDirective() << ' ' << TAI->getPrivateGlobalPrefix()
+    << getFunctionNumber() << '_' << uid << "_set_" << MBB->getNumber() << ',';
+  printBasicBlockLabel(MBB, false, false);
+  O << '-' << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() 
+    << '_' << uid << '\n';
+}
+
+void AsmPrinter::printSetLabel(unsigned uid, unsigned uid2,
+                               const MachineBasicBlock *MBB) const {
+  if (!TAI->getSetDirective())
+    return;
+  
+  O << TAI->getSetDirective() << ' ' << TAI->getPrivateGlobalPrefix()
+    << getFunctionNumber() << '_' << uid << '_' << uid2
+    << "_set_" << MBB->getNumber() << ',';
+  printBasicBlockLabel(MBB, false, false);
+  O << '-' << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() 
+    << '_' << uid << '_' << uid2 << '\n';
+}
+
+/// printDataDirective - This method prints the asm directive for the
+/// specified type.
+void AsmPrinter::printDataDirective(const Type *type) {
+  const TargetData *TD = TM.getTargetData();
+  switch (type->getTypeID()) {
+  case Type::IntegerTyID: {
+    unsigned BitWidth = cast<IntegerType>(type)->getBitWidth();
+    if (BitWidth <= 8)
+      O << TAI->getData8bitsDirective();
+    else if (BitWidth <= 16)
+      O << TAI->getData16bitsDirective();
+    else if (BitWidth <= 32)
+      O << TAI->getData32bitsDirective();
+    else if (BitWidth <= 64) {
+      assert(TAI->getData64bitsDirective() &&
+             "Target cannot handle 64-bit constant exprs!");
+      O << TAI->getData64bitsDirective();
+    }
+    break;
+  }
+  case Type::PointerTyID:
+    if (TD->getPointerSize() == 8) {
+      assert(TAI->getData64bitsDirective() &&
+             "Target cannot handle 64-bit pointer exprs!");
+      O << TAI->getData64bitsDirective();
+    } else {
+      O << TAI->getData32bitsDirective();
+    }
+    break;
+  case Type::FloatTyID: case Type::DoubleTyID:
+    assert (0 && "Should have already output floating point constant.");
+  default:
+    assert (0 && "Can't handle printing this type of thing");
+    break;
+  }
+}
+
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
new file mode 100644
index 0000000..0fca985
--- /dev/null
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -0,0 +1,1093 @@
+//===-- BranchFolding.cpp - Fold machine code branch instructions ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass forwards branches to unconditional branches to make them branch
+// directly to the target block.  This pass often results in dead MBB's, which
+// it then removes.
+//
+// Note that this pass must be run after register allocation, it cannot handle
+// SSA form.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "branchfolding"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/MRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
+STATISTIC(NumBranchOpts, "Number of branches optimized");
+STATISTIC(NumTailMerge , "Number of block tails merged");
+static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge", 
+                              cl::init(cl::BOU_UNSET), cl::Hidden);
+namespace {
+  // Throttle for huge numbers of predecessors (compile speed problems)
+  cl::opt<unsigned>
+  TailMergeThreshold("tail-merge-threshold", 
+            cl::desc("Max number of predecessors to consider tail merging"),
+            cl::init(100), cl::Hidden);
+
+  struct BranchFolder : public MachineFunctionPass {
+    static char ID;
+    BranchFolder(bool defaultEnableTailMerge) : 
+        MachineFunctionPass((intptr_t)&ID) {
+          switch (FlagEnableTailMerge) {
+          case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break;
+          case cl::BOU_TRUE: EnableTailMerge = true; break;
+          case cl::BOU_FALSE: EnableTailMerge = false; break;
+          }
+    }
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+    virtual const char *getPassName() const { return "Control Flow Optimizer"; }
+    const TargetInstrInfo *TII;
+    MachineModuleInfo *MMI;
+    bool MadeChange;
+  private:
+    // Tail Merging.
+    bool EnableTailMerge;
+    bool TailMergeBlocks(MachineFunction &MF);
+    bool TryMergeBlocks(MachineBasicBlock* SuccBB,
+                        MachineBasicBlock* PredBB);
+    void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
+                                 MachineBasicBlock *NewDest);
+    MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB,
+                                  MachineBasicBlock::iterator BBI1);
+
+    std::vector<std::pair<unsigned,MachineBasicBlock*> > MergePotentials;
+    const MRegisterInfo *RegInfo;
+    RegScavenger *RS;
+    // Branch optzn.
+    bool OptimizeBranches(MachineFunction &MF);
+    void OptimizeBlock(MachineBasicBlock *MBB);
+    void RemoveDeadBlock(MachineBasicBlock *MBB);
+    
+    bool CanFallThrough(MachineBasicBlock *CurBB);
+    bool CanFallThrough(MachineBasicBlock *CurBB, bool BranchUnAnalyzable,
+                        MachineBasicBlock *TBB, MachineBasicBlock *FBB,
+                        const std::vector<MachineOperand> &Cond);
+  };
+  char BranchFolder::ID = 0;
+}
+
+FunctionPass *llvm::createBranchFoldingPass(bool DefaultEnableTailMerge) { 
+      return new BranchFolder(DefaultEnableTailMerge); }
+
+/// RemoveDeadBlock - Remove the specified dead machine basic block from the
+/// function, updating the CFG.
+void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {
+  assert(MBB->pred_empty() && "MBB must be dead!");
+  DOUT << "\nRemoving MBB: " << *MBB;
+  
+  MachineFunction *MF = MBB->getParent();
+  // drop all successors.
+  while (!MBB->succ_empty())
+    MBB->removeSuccessor(MBB->succ_end()-1);
+  
+  // If there is DWARF info to active, check to see if there are any LABEL
+  // records in the basic block.  If so, unregister them from MachineModuleInfo.
+  if (MMI && !MBB->empty()) {
+    for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+         I != E; ++I) {
+      if ((unsigned)I->getOpcode() == TargetInstrInfo::LABEL) {
+        // The label ID # is always operand #0, an immediate.
+        MMI->InvalidateLabel(I->getOperand(0).getImm());
+      }
+    }
+  }
+  
+  // Remove the block.
+  MF->getBasicBlockList().erase(MBB);
+}
+
+bool BranchFolder::runOnMachineFunction(MachineFunction &MF) {
+  TII = MF.getTarget().getInstrInfo();
+  if (!TII) return false;
+
+  // Fix CFG.  The later algorithms expect it to be right.
+  bool EverMadeChange = false;
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; I++) {
+    MachineBasicBlock *MBB = I, *TBB = 0, *FBB = 0;
+    std::vector<MachineOperand> Cond;
+    if (!TII->AnalyzeBranch(*MBB, TBB, FBB, Cond))
+      EverMadeChange |= MBB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());
+  }
+
+  RegInfo = MF.getTarget().getRegisterInfo();
+  RS = RegInfo->requiresRegisterScavenging(MF) ? new RegScavenger() : NULL;
+
+  MMI = getAnalysisToUpdate<MachineModuleInfo>();
+
+  bool MadeChangeThisIteration = true;
+  while (MadeChangeThisIteration) {
+    MadeChangeThisIteration = false;
+    MadeChangeThisIteration |= TailMergeBlocks(MF);
+    MadeChangeThisIteration |= OptimizeBranches(MF);
+    EverMadeChange |= MadeChangeThisIteration;
+  }
+
+  // See if any jump tables have become mergable or dead as the code generator
+  // did its thing.
+  MachineJumpTableInfo *JTI = MF.getJumpTableInfo();
+  const std::vector<MachineJumpTableEntry> &JTs = JTI->getJumpTables();
+  if (!JTs.empty()) {
+    // Figure out how these jump tables should be merged.
+    std::vector<unsigned> JTMapping;
+    JTMapping.reserve(JTs.size());
+    
+    // We always keep the 0th jump table.
+    JTMapping.push_back(0);
+
+    // Scan the jump tables, seeing if there are any duplicates.  Note that this
+    // is N^2, which should be fixed someday.
+    for (unsigned i = 1, e = JTs.size(); i != e; ++i)
+      JTMapping.push_back(JTI->getJumpTableIndex(JTs[i].MBBs));
+    
+    // If a jump table was merge with another one, walk the function rewriting
+    // references to jump tables to reference the new JT ID's.  Keep track of
+    // whether we see a jump table idx, if not, we can delete the JT.
+    std::vector<bool> JTIsLive;
+    JTIsLive.resize(JTs.size());
+    for (MachineFunction::iterator BB = MF.begin(), E = MF.end();
+         BB != E; ++BB) {
+      for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
+           I != E; ++I)
+        for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
+          MachineOperand &Op = I->getOperand(op);
+          if (!Op.isJumpTableIndex()) continue;
+          unsigned NewIdx = JTMapping[Op.getJumpTableIndex()];
+          Op.setJumpTableIndex(NewIdx);
+
+          // Remember that this JT is live.
+          JTIsLive[NewIdx] = true;
+        }
+    }
+   
+    // Finally, remove dead jump tables.  This happens either because the
+    // indirect jump was unreachable (and thus deleted) or because the jump
+    // table was merged with some other one.
+    for (unsigned i = 0, e = JTIsLive.size(); i != e; ++i)
+      if (!JTIsLive[i]) {
+        JTI->RemoveJumpTable(i);
+        EverMadeChange = true;
+      }
+  }
+  
+  delete RS;
+  return EverMadeChange;
+}
+
+//===----------------------------------------------------------------------===//
+//  Tail Merging of Blocks
+//===----------------------------------------------------------------------===//
+
+/// HashMachineInstr - Compute a hash value for MI and its operands.
+static unsigned HashMachineInstr(const MachineInstr *MI) {
+  unsigned Hash = MI->getOpcode();
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &Op = MI->getOperand(i);
+    
+    // Merge in bits from the operand if easy.
+    unsigned OperandHash = 0;
+    switch (Op.getType()) {
+    case MachineOperand::MO_Register:          OperandHash = Op.getReg(); break;
+    case MachineOperand::MO_Immediate:         OperandHash = Op.getImm(); break;
+    case MachineOperand::MO_MachineBasicBlock:
+      OperandHash = Op.getMachineBasicBlock()->getNumber();
+      break;
+    case MachineOperand::MO_FrameIndex: OperandHash = Op.getFrameIndex(); break;
+    case MachineOperand::MO_ConstantPoolIndex:
+      OperandHash = Op.getConstantPoolIndex();
+      break;
+    case MachineOperand::MO_JumpTableIndex:
+      OperandHash = Op.getJumpTableIndex();
+      break;
+    case MachineOperand::MO_GlobalAddress:
+    case MachineOperand::MO_ExternalSymbol:
+      // Global address / external symbol are too hard, don't bother, but do
+      // pull in the offset.
+      OperandHash = Op.getOffset();
+      break;
+    default: break;
+    }
+    
+    Hash += ((OperandHash << 3) | Op.getType()) << (i&31);
+  }
+  return Hash;
+}
+
+/// HashEndOfMBB - Hash the last few instructions in the MBB.  For blocks
+/// with no successors, we hash two instructions, because cross-jumping 
+/// only saves code when at least two instructions are removed (since a 
+/// branch must be inserted).  For blocks with a successor, one of the
+/// two blocks to be tail-merged will end with a branch already, so
+/// it gains to cross-jump even for one instruction.
+
+static unsigned HashEndOfMBB(const MachineBasicBlock *MBB,
+                             unsigned minCommonTailLength) {
+  MachineBasicBlock::const_iterator I = MBB->end();
+  if (I == MBB->begin())
+    return 0;   // Empty MBB.
+  
+  --I;
+  unsigned Hash = HashMachineInstr(I);
+    
+  if (I == MBB->begin() || minCommonTailLength == 1)
+    return Hash;   // Single instr MBB.
+  
+  --I;
+  // Hash in the second-to-last instruction.
+  Hash ^= HashMachineInstr(I) << 2;
+  return Hash;
+}
+
+/// ComputeCommonTailLength - Given two machine basic blocks, compute the number
+/// of instructions they actually have in common together at their end.  Return
+/// iterators for the first shared instruction in each block.
+static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
+                                        MachineBasicBlock *MBB2,
+                                        MachineBasicBlock::iterator &I1,
+                                        MachineBasicBlock::iterator &I2) {
+  I1 = MBB1->end();
+  I2 = MBB2->end();
+  
+  unsigned TailLen = 0;
+  while (I1 != MBB1->begin() && I2 != MBB2->begin()) {
+    --I1; --I2;
+    if (!I1->isIdenticalTo(I2)) {
+      ++I1; ++I2;
+      break;
+    }
+    ++TailLen;
+  }
+  return TailLen;
+}
+
+/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
+/// after it, replacing it with an unconditional branch to NewDest.  This
+/// returns true if OldInst's block is modified, false if NewDest is modified.
+void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
+                                           MachineBasicBlock *NewDest) {
+  MachineBasicBlock *OldBB = OldInst->getParent();
+  
+  // Remove all the old successors of OldBB from the CFG.
+  while (!OldBB->succ_empty())
+    OldBB->removeSuccessor(OldBB->succ_begin());
+  
+  // Remove all the dead instructions from the end of OldBB.
+  OldBB->erase(OldInst, OldBB->end());
+
+  // If OldBB isn't immediately before OldBB, insert a branch to it.
+  if (++MachineFunction::iterator(OldBB) != MachineFunction::iterator(NewDest))
+    TII->InsertBranch(*OldBB, NewDest, 0, std::vector<MachineOperand>());
+  OldBB->addSuccessor(NewDest);
+  ++NumTailMerge;
+}
+
+/// SplitMBBAt - Given a machine basic block and an iterator into it, split the
+/// MBB so that the part before the iterator falls into the part starting at the
+/// iterator.  This returns the new MBB.
+MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
+                                            MachineBasicBlock::iterator BBI1) {
+  // Create the fall-through block.
+  MachineFunction::iterator MBBI = &CurMBB;
+  MachineBasicBlock *NewMBB = new MachineBasicBlock(CurMBB.getBasicBlock());
+  CurMBB.getParent()->getBasicBlockList().insert(++MBBI, NewMBB);
+
+  // Move all the successors of this block to the specified block.
+  while (!CurMBB.succ_empty()) {
+    MachineBasicBlock *S = *(CurMBB.succ_end()-1);
+    NewMBB->addSuccessor(S);
+    CurMBB.removeSuccessor(S);
+  }
+ 
+  // Add an edge from CurMBB to NewMBB for the fall-through.
+  CurMBB.addSuccessor(NewMBB);
+  
+  // Splice the code over.
+  NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end());
+
+  // For targets that use the register scavenger, we must maintain LiveIns.
+  if (RS) {
+    RS->enterBasicBlock(&CurMBB);
+    if (!CurMBB.empty())
+      RS->forward(prior(CurMBB.end()));
+    BitVector RegsLiveAtExit(RegInfo->getNumRegs());
+    RS->getRegsUsed(RegsLiveAtExit, false);
+    for (unsigned int i=0, e=RegInfo->getNumRegs(); i!=e; i++)
+      if (RegsLiveAtExit[i])
+        NewMBB->addLiveIn(i);
+  }
+
+  return NewMBB;
+}
+
+/// EstimateRuntime - Make a rough estimate for how long it will take to run
+/// the specified code.
+static unsigned EstimateRuntime(MachineBasicBlock::iterator I,
+                                MachineBasicBlock::iterator E,
+                                const TargetInstrInfo *TII) {
+  unsigned Time = 0;
+  for (; I != E; ++I) {
+    const TargetInstrDescriptor &TID = TII->get(I->getOpcode());
+    if (TID.Flags & M_CALL_FLAG)
+      Time += 10;
+    else if (TID.Flags & (M_LOAD_FLAG|M_STORE_FLAG))
+      Time += 2;
+    else
+      ++Time;
+  }
+  return Time;
+}
+
+/// ShouldSplitFirstBlock - We need to either split MBB1 at MBB1I or MBB2 at
+/// MBB2I and then insert an unconditional branch in the other block.  Determine
+/// which is the best to split
+static bool ShouldSplitFirstBlock(MachineBasicBlock *MBB1,
+                                  MachineBasicBlock::iterator MBB1I,
+                                  MachineBasicBlock *MBB2,
+                                  MachineBasicBlock::iterator MBB2I,
+                                  const TargetInstrInfo *TII,
+                                  MachineBasicBlock *PredBB) {
+  // If one block is the entry block, split the other one; we can't generate
+  // a branch to the entry block, as its label is not emitted.
+  MachineBasicBlock *Entry = MBB1->getParent()->begin();
+  if (MBB1 == Entry)
+    return false;
+  if (MBB2 == Entry)
+    return true;
+
+  // If one block falls through into the common successor, choose that
+  // one to split; it is one instruction less to do that.
+  if (PredBB) {
+    if (MBB1 == PredBB)
+      return true;
+    else if (MBB2 == PredBB)
+      return false;
+  }
+  // TODO: if we had some notion of which block was hotter, we could split
+  // the hot block, so it is the fall-through.  Since we don't have profile info
+  // make a decision based on which will hurt most to split.
+  unsigned MBB1Time = EstimateRuntime(MBB1->begin(), MBB1I, TII);
+  unsigned MBB2Time = EstimateRuntime(MBB2->begin(), MBB2I, TII);
+  
+  // If the MBB1 prefix takes "less time" to run than the MBB2 prefix, split the
+  // MBB1 block so it falls through.  This will penalize the MBB2 path, but will
+  // have a lower overall impact on the program execution.
+  return MBB1Time < MBB2Time;
+}
+
+// CurMBB needs to add an unconditional branch to SuccMBB (we removed these
+// branches temporarily for tail merging).  In the case where CurMBB ends
+// with a conditional branch to the next block, optimize by reversing the
+// test and conditionally branching to SuccMBB instead.
+
+static void FixTail(MachineBasicBlock* CurMBB, MachineBasicBlock *SuccBB,
+                    const TargetInstrInfo *TII) {
+  MachineFunction *MF = CurMBB->getParent();
+  MachineFunction::iterator I = next(MachineFunction::iterator(CurMBB));
+  MachineBasicBlock *TBB = 0, *FBB = 0;
+  std::vector<MachineOperand> Cond;
+  if (I != MF->end() &&
+      !TII->AnalyzeBranch(*CurMBB, TBB, FBB, Cond)) {
+    MachineBasicBlock *NextBB = I;
+    if (TBB == NextBB && Cond.size() && !FBB) {
+      if (!TII->ReverseBranchCondition(Cond)) {
+        TII->RemoveBranch(*CurMBB);
+        TII->InsertBranch(*CurMBB, SuccBB, NULL, Cond);
+        return;
+      }
+    }
+  }
+  TII->InsertBranch(*CurMBB, SuccBB, NULL, std::vector<MachineOperand>());
+}
+
+static bool MergeCompare(const std::pair<unsigned,MachineBasicBlock*> &p,
+                         const std::pair<unsigned,MachineBasicBlock*> &q) {
+    if (p.first < q.first)
+      return true;
+     else if (p.first > q.first)
+      return false;
+    else if (p.second->getNumber() < q.second->getNumber())
+      return true;
+    else if (p.second->getNumber() > q.second->getNumber())
+      return false;
+    else {
+      // _GLIBCXX_DEBUG checks strict weak ordering, which involves comparing
+      // an object with itself.
+#ifndef _GLIBCXX_DEBUG
+      assert(0 && "Predecessor appears twice");
+#endif
+      return(false);
+    }
+}
+
+// See if any of the blocks in MergePotentials (which all have a common single
+// successor, or all have no successor) can be tail-merged.  If there is a
+// successor, any blocks in MergePotentials that are not tail-merged and
+// are not immediately before Succ must have an unconditional branch to
+// Succ added (but the predecessor/successor lists need no adjustment).  
+// The lone predecessor of Succ that falls through into Succ,
+// if any, is given in PredBB.
+
+bool BranchFolder::TryMergeBlocks(MachineBasicBlock *SuccBB,
+                                  MachineBasicBlock* PredBB) {
+  unsigned minCommonTailLength = (SuccBB ? 1 : 2);
+  MadeChange = false;
+  
+  // Sort by hash value so that blocks with identical end sequences sort
+  // together.
+  std::stable_sort(MergePotentials.begin(), MergePotentials.end(), MergeCompare);
+
+  // Walk through equivalence sets looking for actual exact matches.
+  while (MergePotentials.size() > 1) {
+    unsigned CurHash  = (MergePotentials.end()-1)->first;
+    unsigned PrevHash = (MergePotentials.end()-2)->first;
+    MachineBasicBlock *CurMBB = (MergePotentials.end()-1)->second;
+    
+    // If there is nothing that matches the hash of the current basic block,
+    // give up.
+    if (CurHash != PrevHash) {
+      if (SuccBB && CurMBB != PredBB)
+        FixTail(CurMBB, SuccBB, TII);
+      MergePotentials.pop_back();
+      continue;
+    }
+    
+    // Look through all the pairs of blocks that have the same hash as this
+    // one, and find the pair that has the largest number of instructions in
+    // common.
+     // Since instructions may get combined later (e.g. single stores into
+    // store multiple) this measure is not particularly accurate.
+   MachineBasicBlock::iterator BBI1, BBI2;
+    
+    unsigned FoundI = ~0U, FoundJ = ~0U;
+    unsigned maxCommonTailLength = 0U;
+    for (int i = MergePotentials.size()-1;
+         i != -1 && MergePotentials[i].first == CurHash; --i) {
+      for (int j = i-1; 
+           j != -1 && MergePotentials[j].first == CurHash; --j) {
+        MachineBasicBlock::iterator TrialBBI1, TrialBBI2;
+        unsigned CommonTailLen = ComputeCommonTailLength(
+                                                MergePotentials[i].second,
+                                                MergePotentials[j].second,
+                                                TrialBBI1, TrialBBI2);
+        if (CommonTailLen >= minCommonTailLength &&
+            CommonTailLen > maxCommonTailLength) {
+          FoundI = i;
+          FoundJ = j;
+          maxCommonTailLength = CommonTailLen;
+          BBI1 = TrialBBI1;
+          BBI2 = TrialBBI2;
+        }
+      }
+    }
+
+    // If we didn't find any pair that has at least minCommonTailLength 
+    // instructions in common, bail out.  All entries with this
+    // hash code can go away now.
+    if (FoundI == ~0U) {
+      for (int i = MergePotentials.size()-1;
+           i != -1 && MergePotentials[i].first == CurHash; --i) {
+        // Put the unconditional branch back, if we need one.
+        CurMBB = MergePotentials[i].second;
+        if (SuccBB && CurMBB != PredBB)
+          FixTail(CurMBB, SuccBB, TII);
+        MergePotentials.pop_back();
+      }
+      continue;
+    }
+
+    // Otherwise, move the block(s) to the right position(s).  So that
+    // BBI1/2 will be valid, the last must be I and the next-to-last J.
+    if (FoundI != MergePotentials.size()-1)
+      std::swap(MergePotentials[FoundI], *(MergePotentials.end()-1));
+    if (FoundJ != MergePotentials.size()-2)
+      std::swap(MergePotentials[FoundJ], *(MergePotentials.end()-2));
+
+    CurMBB = (MergePotentials.end()-1)->second;
+    MachineBasicBlock *MBB2 = (MergePotentials.end()-2)->second;
+
+    // If neither block is the entire common tail, split the tail of one block
+    // to make it redundant with the other tail.  Also, we cannot jump to the
+    // entry block, so if one block is the entry block, split the other one.
+    MachineBasicBlock *Entry = CurMBB->getParent()->begin();
+    if (CurMBB->begin() == BBI1 && CurMBB != Entry)
+      ;   // CurMBB is common tail
+    else if (MBB2->begin() == BBI2 && MBB2 != Entry)
+      ;   // MBB2 is common tail
+    else {
+      if (0) { // Enable this to disable partial tail merges.
+        MergePotentials.pop_back();
+        continue;
+      }
+      
+      // Decide whether we want to split CurMBB or MBB2.
+      if (ShouldSplitFirstBlock(CurMBB, BBI1, MBB2, BBI2, TII, PredBB)) {
+        CurMBB = SplitMBBAt(*CurMBB, BBI1);
+        BBI1 = CurMBB->begin();
+        MergePotentials.back().second = CurMBB;
+      } else {
+        MBB2 = SplitMBBAt(*MBB2, BBI2);
+        BBI2 = MBB2->begin();
+        (MergePotentials.end()-2)->second = MBB2;
+      }
+    }
+    
+    if (MBB2->begin() == BBI2 && MBB2 != Entry) {
+      // Hack the end off CurMBB, making it jump to MBBI@ instead.
+      ReplaceTailWithBranchTo(BBI1, MBB2);
+      // This modifies CurMBB, so remove it from the worklist.
+      MergePotentials.pop_back();
+    } else {
+      assert(CurMBB->begin() == BBI1 && CurMBB != Entry && 
+             "Didn't split block correctly?");
+      // Hack the end off MBB2, making it jump to CurMBB instead.
+      ReplaceTailWithBranchTo(BBI2, CurMBB);
+      // This modifies MBB2, so remove it from the worklist.
+      MergePotentials.erase(MergePotentials.end()-2);
+    }
+    MadeChange = true;
+  }
+  return MadeChange;
+}
+
+bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
+
+  if (!EnableTailMerge) return false;
+ 
+  MadeChange = false;
+
+  // First find blocks with no successors.
+  MergePotentials.clear();
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+    if (I->succ_empty())
+      MergePotentials.push_back(std::make_pair(HashEndOfMBB(I, 2U), I));
+  }
+  // See if we can do any tail merging on those.
+  if (MergePotentials.size() < TailMergeThreshold)
+    MadeChange |= TryMergeBlocks(NULL, NULL);
+
+  // Look at blocks (IBB) with multiple predecessors (PBB).
+  // We change each predecessor to a canonical form, by
+  // (1) temporarily removing any unconditional branch from the predecessor
+  // to IBB, and
+  // (2) alter conditional branches so they branch to the other block
+  // not IBB; this may require adding back an unconditional branch to IBB 
+  // later, where there wasn't one coming in.  E.g.
+  //   Bcc IBB
+  //   fallthrough to QBB
+  // here becomes
+  //   Bncc QBB
+  // with a conceptual B to IBB after that, which never actually exists.
+  // With those changes, we see whether the predecessors' tails match,
+  // and merge them if so.  We change things out of canonical form and
+  // back to the way they were later in the process.  (OptimizeBranches
+  // would undo some of this, but we can't use it, because we'd get into
+  // a compile-time infinite loop repeatedly doing and undoing the same
+  // transformations.)
+
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+    if (!I->succ_empty() && I->pred_size() >= 2 && 
+         I->pred_size() < TailMergeThreshold) {
+      MachineBasicBlock *IBB = I;
+      MachineBasicBlock *PredBB = prior(I);
+      MergePotentials.clear();
+      for (MachineBasicBlock::pred_iterator P = I->pred_begin(), 
+                                            E2 = I->pred_end();
+           P != E2; ++P) {
+        MachineBasicBlock* PBB = *P;
+        // Skip blocks that loop to themselves, can't tail merge these.
+        if (PBB==IBB)
+          continue;
+        MachineBasicBlock *TBB = 0, *FBB = 0;
+        std::vector<MachineOperand> Cond;
+        if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond)) {
+          // Failing case:  IBB is the target of a cbr, and
+          // we cannot reverse the branch.
+          std::vector<MachineOperand> NewCond(Cond);
+          if (Cond.size() && TBB==IBB) {
+            if (TII->ReverseBranchCondition(NewCond))
+              continue;
+            // This is the QBB case described above
+            if (!FBB)
+              FBB = next(MachineFunction::iterator(PBB));
+          }
+          // Failing case:  the only way IBB can be reached from PBB is via
+          // exception handling.  Happens for landing pads.  Would be nice
+          // to have a bit in the edge so we didn't have to do all this.
+          if (IBB->isLandingPad()) {
+            MachineFunction::iterator IP = PBB;  IP++;
+            MachineBasicBlock* PredNextBB = NULL;
+            if (IP!=MF.end())
+              PredNextBB = IP;
+            if (TBB==NULL) {
+              if (IBB!=PredNextBB)      // fallthrough
+                continue;
+            } else if (FBB) {
+              if (TBB!=IBB && FBB!=IBB)   // cbr then ubr
+                continue;
+            } else if (Cond.size() == 0) {
+              if (TBB!=IBB)               // ubr
+                continue;
+            } else {
+              if (TBB!=IBB && IBB!=PredNextBB)  // cbr
+                continue;
+            }
+          }
+          // Remove the unconditional branch at the end, if any.
+          if (TBB && (Cond.size()==0 || FBB)) {
+            TII->RemoveBranch(*PBB);
+            if (Cond.size())
+              // reinsert conditional branch only, for now
+              TII->InsertBranch(*PBB, (TBB==IBB) ? FBB : TBB, 0, NewCond);
+          }
+          MergePotentials.push_back(std::make_pair(HashEndOfMBB(PBB, 1U), *P));
+        }
+      }
+    if (MergePotentials.size() >= 2)
+      MadeChange |= TryMergeBlocks(I, PredBB);
+    // Reinsert an unconditional branch if needed.
+    // The 1 below can be either an original single predecessor, or a result
+    // of removing blocks in TryMergeBlocks.
+    PredBB = prior(I);      // this may have been changed in TryMergeBlocks
+    if (MergePotentials.size()==1 && 
+        (MergePotentials.begin())->second != PredBB)
+      FixTail((MergePotentials.begin())->second, I, TII);
+    }
+  }
+  return MadeChange;
+}
+
+//===----------------------------------------------------------------------===//
+//  Branch Optimization
+//===----------------------------------------------------------------------===//
+
+bool BranchFolder::OptimizeBranches(MachineFunction &MF) {
+  MadeChange = false;
+  
+  // Make sure blocks are numbered in order
+  MF.RenumberBlocks();
+
+  for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) {
+    MachineBasicBlock *MBB = I++;
+    OptimizeBlock(MBB);
+    
+    // If it is dead, remove it.
+    if (MBB->pred_empty()) {
+      RemoveDeadBlock(MBB);
+      MadeChange = true;
+      ++NumDeadBlocks;
+    }
+  }
+  return MadeChange;
+}
+
+
+/// CanFallThrough - Return true if the specified block (with the specified
+/// branch condition) can implicitly transfer control to the block after it by
+/// falling off the end of it.  This should return false if it can reach the
+/// block after it, but it uses an explicit branch to do so (e.g. a table jump).
+///
+/// True is a conservative answer.
+///
+bool BranchFolder::CanFallThrough(MachineBasicBlock *CurBB,
+                                  bool BranchUnAnalyzable,
+                                  MachineBasicBlock *TBB, MachineBasicBlock *FBB,
+                                  const std::vector<MachineOperand> &Cond) {
+  MachineFunction::iterator Fallthrough = CurBB;
+  ++Fallthrough;
+  // If FallthroughBlock is off the end of the function, it can't fall through.
+  if (Fallthrough == CurBB->getParent()->end())
+    return false;
+  
+  // If FallthroughBlock isn't a successor of CurBB, no fallthrough is possible.
+  if (!CurBB->isSuccessor(Fallthrough))
+    return false;
+  
+  // If we couldn't analyze the branch, assume it could fall through.
+  if (BranchUnAnalyzable) return true;
+  
+  // If there is no branch, control always falls through.
+  if (TBB == 0) return true;
+
+  // If there is some explicit branch to the fallthrough block, it can obviously
+  // reach, even though the branch should get folded to fall through implicitly.
+  if (MachineFunction::iterator(TBB) == Fallthrough ||
+      MachineFunction::iterator(FBB) == Fallthrough)
+    return true;
+  
+  // If it's an unconditional branch to some block not the fall through, it 
+  // doesn't fall through.
+  if (Cond.empty()) return false;
+  
+  // Otherwise, if it is conditional and has no explicit false block, it falls
+  // through.
+  return FBB == 0;
+}
+
+/// CanFallThrough - Return true if the specified can implicitly transfer
+/// control to the block after it by falling off the end of it.  This should
+/// return false if it can reach the block after it, but it uses an explicit
+/// branch to do so (e.g. a table jump).
+///
+/// True is a conservative answer.
+///
+bool BranchFolder::CanFallThrough(MachineBasicBlock *CurBB) {
+  MachineBasicBlock *TBB = 0, *FBB = 0;
+  std::vector<MachineOperand> Cond;
+  bool CurUnAnalyzable = TII->AnalyzeBranch(*CurBB, TBB, FBB, Cond);
+  return CanFallThrough(CurBB, CurUnAnalyzable, TBB, FBB, Cond);
+}
+
+/// IsBetterFallthrough - Return true if it would be clearly better to
+/// fall-through to MBB1 than to fall through into MBB2.  This has to return
+/// a strict ordering, returning true for both (MBB1,MBB2) and (MBB2,MBB1) will
+/// result in infinite loops.
+static bool IsBetterFallthrough(MachineBasicBlock *MBB1, 
+                                MachineBasicBlock *MBB2,
+                                const TargetInstrInfo &TII) {
+  // Right now, we use a simple heuristic.  If MBB2 ends with a call, and
+  // MBB1 doesn't, we prefer to fall through into MBB1.  This allows us to
+  // optimize branches that branch to either a return block or an assert block
+  // into a fallthrough to the return.
+  if (MBB1->empty() || MBB2->empty()) return false;
+
+  MachineInstr *MBB1I = --MBB1->end();
+  MachineInstr *MBB2I = --MBB2->end();
+  return TII.isCall(MBB2I->getOpcode()) && !TII.isCall(MBB1I->getOpcode());
+}
+
+/// OptimizeBlock - Analyze and optimize control flow related to the specified
+/// block.  This is never called on the entry block.
+void BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
+  MachineFunction::iterator FallThrough = MBB;
+  ++FallThrough;
+  
+  // If this block is empty, make everyone use its fall-through, not the block
+  // explicitly.  Landing pads should not do this since the landing-pad table
+  // points to this block.
+  if (MBB->empty() && !MBB->isLandingPad()) {
+    // Dead block?  Leave for cleanup later.
+    if (MBB->pred_empty()) return;
+    
+    if (FallThrough == MBB->getParent()->end()) {
+      // TODO: Simplify preds to not branch here if possible!
+    } else {
+      // Rewrite all predecessors of the old block to go to the fallthrough
+      // instead.
+      while (!MBB->pred_empty()) {
+        MachineBasicBlock *Pred = *(MBB->pred_end()-1);
+        Pred->ReplaceUsesOfBlockWith(MBB, FallThrough);
+      }
+      
+      // If MBB was the target of a jump table, update jump tables to go to the
+      // fallthrough instead.
+      MBB->getParent()->getJumpTableInfo()->
+        ReplaceMBBInJumpTables(MBB, FallThrough);
+      MadeChange = true;
+    }
+    return;
+  }
+
+  // Check to see if we can simplify the terminator of the block before this
+  // one.
+  MachineBasicBlock &PrevBB = *prior(MachineFunction::iterator(MBB));
+
+  MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0;
+  std::vector<MachineOperand> PriorCond;
+  bool PriorUnAnalyzable =
+    TII->AnalyzeBranch(PrevBB, PriorTBB, PriorFBB, PriorCond);
+  if (!PriorUnAnalyzable) {
+    // If the CFG for the prior block has extra edges, remove them.
+    MadeChange |= PrevBB.CorrectExtraCFGEdges(PriorTBB, PriorFBB,
+                                              !PriorCond.empty());
+    
+    // If the previous branch is conditional and both conditions go to the same
+    // destination, remove the branch, replacing it with an unconditional one or
+    // a fall-through.
+    if (PriorTBB && PriorTBB == PriorFBB) {
+      TII->RemoveBranch(PrevBB);
+      PriorCond.clear(); 
+      if (PriorTBB != MBB)
+        TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond);
+      MadeChange = true;
+      ++NumBranchOpts;
+      return OptimizeBlock(MBB);
+    }
+    
+    // If the previous branch *only* branches to *this* block (conditional or
+    // not) remove the branch.
+    if (PriorTBB == MBB && PriorFBB == 0) {
+      TII->RemoveBranch(PrevBB);
+      MadeChange = true;
+      ++NumBranchOpts;
+      return OptimizeBlock(MBB);
+    }
+    
+    // If the prior block branches somewhere else on the condition and here if
+    // the condition is false, remove the uncond second branch.
+    if (PriorFBB == MBB) {
+      TII->RemoveBranch(PrevBB);
+      TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond);
+      MadeChange = true;
+      ++NumBranchOpts;
+      return OptimizeBlock(MBB);
+    }
+    
+    // If the prior block branches here on true and somewhere else on false, and
+    // if the branch condition is reversible, reverse the branch to create a
+    // fall-through.
+    if (PriorTBB == MBB) {
+      std::vector<MachineOperand> NewPriorCond(PriorCond);
+      if (!TII->ReverseBranchCondition(NewPriorCond)) {
+        TII->RemoveBranch(PrevBB);
+        TII->InsertBranch(PrevBB, PriorFBB, 0, NewPriorCond);
+        MadeChange = true;
+        ++NumBranchOpts;
+        return OptimizeBlock(MBB);
+      }
+    }
+    
+    // If this block doesn't fall through (e.g. it ends with an uncond branch or
+    // has no successors) and if the pred falls through into this block, and if
+    // it would otherwise fall through into the block after this, move this
+    // block to the end of the function.
+    //
+    // We consider it more likely that execution will stay in the function (e.g.
+    // due to loops) than it is to exit it.  This asserts in loops etc, moving
+    // the assert condition out of the loop body.
+    if (!PriorCond.empty() && PriorFBB == 0 &&
+        MachineFunction::iterator(PriorTBB) == FallThrough &&
+        !CanFallThrough(MBB)) {
+      bool DoTransform = true;
+      
+      // We have to be careful that the succs of PredBB aren't both no-successor
+      // blocks.  If neither have successors and if PredBB is the second from
+      // last block in the function, we'd just keep swapping the two blocks for
+      // last.  Only do the swap if one is clearly better to fall through than
+      // the other.
+      if (FallThrough == --MBB->getParent()->end() &&
+          !IsBetterFallthrough(PriorTBB, MBB, *TII))
+        DoTransform = false;
+
+      // We don't want to do this transformation if we have control flow like:
+      //   br cond BB2
+      // BB1:
+      //   ..
+      //   jmp BBX
+      // BB2:
+      //   ..
+      //   ret
+      //
+      // In this case, we could actually be moving the return block *into* a
+      // loop!
+      if (DoTransform && !MBB->succ_empty() &&
+          (!CanFallThrough(PriorTBB) || PriorTBB->empty()))
+        DoTransform = false;
+      
+      
+      if (DoTransform) {
+        // Reverse the branch so we will fall through on the previous true cond.
+        std::vector<MachineOperand> NewPriorCond(PriorCond);
+        if (!TII->ReverseBranchCondition(NewPriorCond)) {
+          DOUT << "\nMoving MBB: " << *MBB;
+          DOUT << "To make fallthrough to: " << *PriorTBB << "\n";
+          
+          TII->RemoveBranch(PrevBB);
+          TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond);
+
+          // Move this block to the end of the function.
+          MBB->moveAfter(--MBB->getParent()->end());
+          MadeChange = true;
+          ++NumBranchOpts;
+          return;
+        }
+      }
+    }
+  }
+  
+  // Analyze the branch in the current block.
+  MachineBasicBlock *CurTBB = 0, *CurFBB = 0;
+  std::vector<MachineOperand> CurCond;
+  bool CurUnAnalyzable = TII->AnalyzeBranch(*MBB, CurTBB, CurFBB, CurCond);
+  if (!CurUnAnalyzable) {
+    // If the CFG for the prior block has extra edges, remove them.
+    MadeChange |= MBB->CorrectExtraCFGEdges(CurTBB, CurFBB, !CurCond.empty());
+
+    // If this is a two-way branch, and the FBB branches to this block, reverse 
+    // the condition so the single-basic-block loop is faster.  Instead of:
+    //    Loop: xxx; jcc Out; jmp Loop
+    // we want:
+    //    Loop: xxx; jncc Loop; jmp Out
+    if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB) {
+      std::vector<MachineOperand> NewCond(CurCond);
+      if (!TII->ReverseBranchCondition(NewCond)) {
+        TII->RemoveBranch(*MBB);
+        TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond);
+        MadeChange = true;
+        ++NumBranchOpts;
+        return OptimizeBlock(MBB);
+      }
+    }
+    
+    
+    // If this branch is the only thing in its block, see if we can forward
+    // other blocks across it.
+    if (CurTBB && CurCond.empty() && CurFBB == 0 && 
+        TII->isBranch(MBB->begin()->getOpcode()) && CurTBB != MBB) {
+      // This block may contain just an unconditional branch.  Because there can
+      // be 'non-branch terminators' in the block, try removing the branch and
+      // then seeing if the block is empty.
+      TII->RemoveBranch(*MBB);
+
+      // If this block is just an unconditional branch to CurTBB, we can
+      // usually completely eliminate the block.  The only case we cannot
+      // completely eliminate the block is when the block before this one
+      // falls through into MBB and we can't understand the prior block's branch
+      // condition.
+      if (MBB->empty()) {
+        bool PredHasNoFallThrough = TII->BlockHasNoFallThrough(PrevBB);
+        if (PredHasNoFallThrough || !PriorUnAnalyzable ||
+            !PrevBB.isSuccessor(MBB)) {
+          // If the prior block falls through into us, turn it into an
+          // explicit branch to us to make updates simpler.
+          if (!PredHasNoFallThrough && PrevBB.isSuccessor(MBB) && 
+              PriorTBB != MBB && PriorFBB != MBB) {
+            if (PriorTBB == 0) {
+              assert(PriorCond.empty() && PriorFBB == 0 &&
+                     "Bad branch analysis");
+              PriorTBB = MBB;
+            } else {
+              assert(PriorFBB == 0 && "Machine CFG out of date!");
+              PriorFBB = MBB;
+            }
+            TII->RemoveBranch(PrevBB);
+            TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond);
+          }
+
+          // Iterate through all the predecessors, revectoring each in-turn.
+          size_t PI = 0;
+          bool DidChange = false;
+          bool HasBranchToSelf = false;
+          while(PI != MBB->pred_size()) {
+            MachineBasicBlock *PMBB = *(MBB->pred_begin() + PI);
+            if (PMBB == MBB) {
+              // If this block has an uncond branch to itself, leave it.
+              ++PI;
+              HasBranchToSelf = true;
+            } else {
+              DidChange = true;
+              PMBB->ReplaceUsesOfBlockWith(MBB, CurTBB);
+            }
+          }
+
+          // Change any jumptables to go to the new MBB.
+          MBB->getParent()->getJumpTableInfo()->
+            ReplaceMBBInJumpTables(MBB, CurTBB);
+          if (DidChange) {
+            ++NumBranchOpts;
+            MadeChange = true;
+            if (!HasBranchToSelf) return;
+          }
+        }
+      }
+      
+      // Add the branch back if the block is more than just an uncond branch.
+      TII->InsertBranch(*MBB, CurTBB, 0, CurCond);
+    }
+  }
+
+  // If the prior block doesn't fall through into this block, and if this
+  // block doesn't fall through into some other block, see if we can find a
+  // place to move this block where a fall-through will happen.
+  if (!CanFallThrough(&PrevBB, PriorUnAnalyzable,
+                      PriorTBB, PriorFBB, PriorCond)) {
+    // Now we know that there was no fall-through into this block, check to
+    // see if it has a fall-through into its successor.
+    bool CurFallsThru = CanFallThrough(MBB, CurUnAnalyzable, CurTBB, CurFBB, 
+                                       CurCond);
+
+    if (!MBB->isLandingPad()) {
+      // Check all the predecessors of this block.  If one of them has no fall
+      // throughs, move this block right after it.
+      for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+           E = MBB->pred_end(); PI != E; ++PI) {
+        // Analyze the branch at the end of the pred.
+        MachineBasicBlock *PredBB = *PI;
+        MachineFunction::iterator PredFallthrough = PredBB; ++PredFallthrough;
+        if (PredBB != MBB && !CanFallThrough(PredBB)
+            && (!CurFallsThru || !CurTBB || !CurFBB)
+            && (!CurFallsThru || MBB->getNumber() >= PredBB->getNumber())) {
+          // If the current block doesn't fall through, just move it.
+          // If the current block can fall through and does not end with a
+          // conditional branch, we need to append an unconditional jump to 
+          // the (current) next block.  To avoid a possible compile-time
+          // infinite loop, move blocks only backward in this case.
+          // Also, if there are already 2 branches here, we cannot add a third;
+          // this means we have the case
+          // Bcc next
+          // B elsewhere
+          // next:
+          if (CurFallsThru) {
+            MachineBasicBlock *NextBB = next(MachineFunction::iterator(MBB));
+            CurCond.clear();
+            TII->InsertBranch(*MBB, NextBB, 0, CurCond);
+          }
+          MBB->moveAfter(PredBB);
+          MadeChange = true;
+          return OptimizeBlock(MBB);
+        }
+      }
+    }
+        
+    if (!CurFallsThru) {
+      // Check all successors to see if we can move this block before it.
+      for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+           E = MBB->succ_end(); SI != E; ++SI) {
+        // Analyze the branch at the end of the block before the succ.
+        MachineBasicBlock *SuccBB = *SI;
+        MachineFunction::iterator SuccPrev = SuccBB; --SuccPrev;
+        std::vector<MachineOperand> SuccPrevCond;
+        
+        // If this block doesn't already fall-through to that successor, and if
+        // the succ doesn't already have a block that can fall through into it,
+        // and if the successor isn't an EH destination, we can arrange for the
+        // fallthrough to happen.
+        if (SuccBB != MBB && !CanFallThrough(SuccPrev) &&
+            !SuccBB->isLandingPad()) {
+          MBB->moveBefore(SuccBB);
+          MadeChange = true;
+          return OptimizeBlock(MBB);
+        }
+      }
+      
+      // Okay, there is no really great place to put this block.  If, however,
+      // the block before this one would be a fall-through if this block were
+      // removed, move this block to the end of the function.
+      if (FallThrough != MBB->getParent()->end() &&
+          PrevBB.isSuccessor(FallThrough)) {
+        MBB->moveAfter(--MBB->getParent()->end());
+        MadeChange = true;
+        return;
+      }
+    }
+  }
+}
diff --git a/lib/CodeGen/DwarfWriter.cpp b/lib/CodeGen/DwarfWriter.cpp
new file mode 100644
index 0000000..257a776
--- /dev/null
+++ b/lib/CodeGen/DwarfWriter.cpp
@@ -0,0 +1,3616 @@
+//===-- llvm/CodeGen/DwarfWriter.cpp - Dwarf Framework ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by James M. Laskey and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/DwarfWriter.h"
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/UniqueVector.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/MRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include <ostream>
+#include <string>
+using namespace llvm;
+using namespace llvm::dwarf;
+
+namespace llvm {
+  
+//===----------------------------------------------------------------------===//
+
+/// Configuration values for initial hash set sizes (log2).
+///
+static const unsigned InitDiesSetSize          = 9; // 512
+static const unsigned InitAbbreviationsSetSize = 9; // 512
+static const unsigned InitValuesSetSize        = 9; // 512
+
+//===----------------------------------------------------------------------===//
+/// Forward declarations.
+///
+class DIE;
+class DIEValue;
+
+//===----------------------------------------------------------------------===//
+/// DWLabel - Labels are used to track locations in the assembler file.
+/// Labels appear in the form <prefix><Tag><Number>, where the tag is a
+/// category of label (Ex. location) and number is a value unique in that
+/// category.
+class DWLabel {
+public:
+  /// Tag - Label category tag. Should always be a staticly declared C string.
+  ///
+  const char *Tag;
+  
+  /// Number - Value to make label unique.
+  ///
+  unsigned    Number;
+
+  DWLabel(const char *T, unsigned N) : Tag(T), Number(N) {}
+  
+  void Profile(FoldingSetNodeID &ID) const {
+    ID.AddString(std::string(Tag));
+    ID.AddInteger(Number);
+  }
+  
+#ifndef NDEBUG
+  void print(std::ostream *O) const {
+    if (O) print(*O);
+  }
+  void print(std::ostream &O) const {
+    O << "." << Tag;
+    if (Number) O << Number;
+  }
+#endif
+};
+
+//===----------------------------------------------------------------------===//
+/// DIEAbbrevData - Dwarf abbreviation data, describes the one attribute of a
+/// Dwarf abbreviation.
+class DIEAbbrevData {
+private:
+  /// Attribute - Dwarf attribute code.
+  ///
+  unsigned Attribute;
+  
+  /// Form - Dwarf form code.
+  ///              
+  unsigned Form;                      
+  
+public:
+  DIEAbbrevData(unsigned A, unsigned F)
+  : Attribute(A)
+  , Form(F)
+  {}
+  
+  // Accessors.
+  unsigned getAttribute() const { return Attribute; }
+  unsigned getForm()      const { return Form; }
+
+  /// Profile - Used to gather unique data for the abbreviation folding set.
+  ///
+  void Profile(FoldingSetNodeID &ID)const  {
+    ID.AddInteger(Attribute);
+    ID.AddInteger(Form);
+  }
+};
+
+//===----------------------------------------------------------------------===//
+/// DIEAbbrev - Dwarf abbreviation, describes the organization of a debug
+/// information object.
+class DIEAbbrev : public FoldingSetNode {
+private:
+  /// Tag - Dwarf tag code.
+  ///
+  unsigned Tag;
+  
+  /// Unique number for node.
+  ///
+  unsigned Number;
+
+  /// ChildrenFlag - Dwarf children flag.
+  ///
+  unsigned ChildrenFlag;
+
+  /// Data - Raw data bytes for abbreviation.
+  ///
+  std::vector<DIEAbbrevData> Data;
+
+public:
+
+  DIEAbbrev(unsigned T, unsigned C)
+  : Tag(T)
+  , ChildrenFlag(C)
+  , Data()
+  {}
+  ~DIEAbbrev() {}
+  
+  // Accessors.
+  unsigned getTag()                           const { return Tag; }
+  unsigned getNumber()                        const { return Number; }
+  unsigned getChildrenFlag()                  const { return ChildrenFlag; }
+  const std::vector<DIEAbbrevData> &getData() const { return Data; }
+  void setTag(unsigned T)                           { Tag = T; }
+  void setChildrenFlag(unsigned CF)                 { ChildrenFlag = CF; }
+  void setNumber(unsigned N)                        { Number = N; }
+  
+  /// AddAttribute - Adds another set of attribute information to the
+  /// abbreviation.
+  void AddAttribute(unsigned Attribute, unsigned Form) {
+    Data.push_back(DIEAbbrevData(Attribute, Form));
+  }
+  
+  /// AddFirstAttribute - Adds a set of attribute information to the front
+  /// of the abbreviation.
+  void AddFirstAttribute(unsigned Attribute, unsigned Form) {
+    Data.insert(Data.begin(), DIEAbbrevData(Attribute, Form));
+  }
+  
+  /// Profile - Used to gather unique data for the abbreviation folding set.
+  ///
+  void Profile(FoldingSetNodeID &ID) {
+    ID.AddInteger(Tag);
+    ID.AddInteger(ChildrenFlag);
+    
+    // For each attribute description.
+    for (unsigned i = 0, N = Data.size(); i < N; ++i)
+      Data[i].Profile(ID);
+  }
+  
+  /// Emit - Print the abbreviation using the specified Dwarf writer.
+  ///
+  void Emit(const DwarfDebug &DD) const; 
+      
+#ifndef NDEBUG
+  void print(std::ostream *O) {
+    if (O) print(*O);
+  }
+  void print(std::ostream &O);
+  void dump();
+#endif
+};
+
+//===----------------------------------------------------------------------===//
+/// DIE - A structured debug information entry.  Has an abbreviation which
+/// describes it's organization.
+class DIE : public FoldingSetNode {
+protected:
+  /// Abbrev - Buffer for constructing abbreviation.
+  ///
+  DIEAbbrev Abbrev;
+  
+  /// Offset - Offset in debug info section.
+  ///
+  unsigned Offset;
+  
+  /// Size - Size of instance + children.
+  ///
+  unsigned Size;
+  
+  /// Children DIEs.
+  ///
+  std::vector<DIE *> Children;
+  
+  /// Attributes values.
+  ///
+  std::vector<DIEValue *> Values;
+  
+public:
+  DIE(unsigned Tag)
+  : Abbrev(Tag, DW_CHILDREN_no)
+  , Offset(0)
+  , Size(0)
+  , Children()
+  , Values()
+  {}
+  virtual ~DIE();
+  
+  // Accessors.
+  DIEAbbrev &getAbbrev()                           { return Abbrev; }
+  unsigned   getAbbrevNumber()               const {
+    return Abbrev.getNumber();
+  }
+  unsigned getTag()                          const { return Abbrev.getTag(); }
+  unsigned getOffset()                       const { return Offset; }
+  unsigned getSize()                         const { return Size; }
+  const std::vector<DIE *> &getChildren()    const { return Children; }
+  std::vector<DIEValue *> &getValues()       { return Values; }
+  void setTag(unsigned Tag)                  { Abbrev.setTag(Tag); }
+  void setOffset(unsigned O)                 { Offset = O; }
+  void setSize(unsigned S)                   { Size = S; }
+  
+  /// AddValue - Add a value and attributes to a DIE.
+  ///
+  void AddValue(unsigned Attribute, unsigned Form, DIEValue *Value) {
+    Abbrev.AddAttribute(Attribute, Form);
+    Values.push_back(Value);
+  }
+  
+  /// SiblingOffset - Return the offset of the debug information entry's
+  /// sibling.
+  unsigned SiblingOffset() const { return Offset + Size; }
+  
+  /// AddSiblingOffset - Add a sibling offset field to the front of the DIE.
+  ///
+  void AddSiblingOffset();
+
+  /// AddChild - Add a child to the DIE.
+  ///
+  void AddChild(DIE *Child) {
+    Abbrev.setChildrenFlag(DW_CHILDREN_yes);
+    Children.push_back(Child);
+  }
+  
+  /// Detach - Detaches objects connected to it after copying.
+  ///
+  void Detach() {
+    Children.clear();
+  }
+  
+  /// Profile - Used to gather unique data for the value folding set.
+  ///
+  void Profile(FoldingSetNodeID &ID) ;
+      
+#ifndef NDEBUG
+  void print(std::ostream *O, unsigned IncIndent = 0) {
+    if (O) print(*O, IncIndent);
+  }
+  void print(std::ostream &O, unsigned IncIndent = 0);
+  void dump();
+#endif
+};
+
+//===----------------------------------------------------------------------===//
+/// DIEValue - A debug information entry value.
+///
+class DIEValue : public FoldingSetNode {
+public:
+  enum {
+    isInteger,
+    isString,
+    isLabel,
+    isAsIsLabel,
+    isDelta,
+    isEntry,
+    isBlock
+  };
+  
+  /// Type - Type of data stored in the value.
+  ///
+  unsigned Type;
+  
+  DIEValue(unsigned T)
+  : Type(T)
+  {}
+  virtual ~DIEValue() {}
+  
+  // Accessors
+  unsigned getType()  const { return Type; }
+  
+  // Implement isa/cast/dyncast.
+  static bool classof(const DIEValue *) { return true; }
+  
+  /// EmitValue - Emit value via the Dwarf writer.
+  ///
+  virtual void EmitValue(DwarfDebug &DD, unsigned Form) = 0;
+  
+  /// SizeOf - Return the size of a value in bytes.
+  ///
+  virtual unsigned SizeOf(const DwarfDebug &DD, unsigned Form) const = 0;
+  
+  /// Profile - Used to gather unique data for the value folding set.
+  ///
+  virtual void Profile(FoldingSetNodeID &ID) = 0;
+      
+#ifndef NDEBUG
+  void print(std::ostream *O) {
+    if (O) print(*O);
+  }
+  virtual void print(std::ostream &O) = 0;
+  void dump();
+#endif
+};
+
+//===----------------------------------------------------------------------===//
+/// DWInteger - An integer value DIE.
+/// 
+class DIEInteger : public DIEValue {
+private:
+  uint64_t Integer;
+  
+public:
+  DIEInteger(uint64_t I) : DIEValue(isInteger), Integer(I) {}
+
+  // Implement isa/cast/dyncast.
+  static bool classof(const DIEInteger *) { return true; }
+  static bool classof(const DIEValue *I)  { return I->Type == isInteger; }
+  
+  /// BestForm - Choose the best form for integer.
+  ///
+  static unsigned BestForm(bool IsSigned, uint64_t Integer) {
+    if (IsSigned) {
+      if ((char)Integer == (signed)Integer)   return DW_FORM_data1;
+      if ((short)Integer == (signed)Integer)  return DW_FORM_data2;
+      if ((int)Integer == (signed)Integer)    return DW_FORM_data4;
+    } else {
+      if ((unsigned char)Integer == Integer)  return DW_FORM_data1;
+      if ((unsigned short)Integer == Integer) return DW_FORM_data2;
+      if ((unsigned int)Integer == Integer)   return DW_FORM_data4;
+    }
+    return DW_FORM_data8;
+  }
+    
+  /// EmitValue - Emit integer of appropriate size.
+  ///
+  virtual void EmitValue(DwarfDebug &DD, unsigned Form);
+  
+  /// SizeOf - Determine size of integer value in bytes.
+  ///
+  virtual unsigned SizeOf(const DwarfDebug &DD, unsigned Form) const;
+  
+  /// Profile - Used to gather unique data for the value folding set.
+  ///
+  static void Profile(FoldingSetNodeID &ID, unsigned Integer) {
+    ID.AddInteger(isInteger);
+    ID.AddInteger(Integer);
+  }
+  virtual void Profile(FoldingSetNodeID &ID) { Profile(ID, Integer); }
+  
+#ifndef NDEBUG
+  virtual void print(std::ostream &O) {
+    O << "Int: " << (int64_t)Integer
+      << "  0x" << std::hex << Integer << std::dec;
+  }
+#endif
+};
+
+//===----------------------------------------------------------------------===//
+/// DIEString - A string value DIE.
+/// 
+class DIEString : public DIEValue {
+public:
+  const std::string String;
+  
+  DIEString(const std::string &S) : DIEValue(isString), String(S) {}
+
+  // Implement isa/cast/dyncast.
+  static bool classof(const DIEString *) { return true; }
+  static bool classof(const DIEValue *S) { return S->Type == isString; }
+  
+  /// EmitValue - Emit string value.
+  ///
+  virtual void EmitValue(DwarfDebug &DD, unsigned Form);
+  
+  /// SizeOf - Determine size of string value in bytes.
+  ///
+  virtual unsigned SizeOf(const DwarfDebug &DD, unsigned Form) const {
+    return String.size() + sizeof(char); // sizeof('\0');
+  }
+  
+  /// Profile - Used to gather unique data for the value folding set.
+  ///
+  static void Profile(FoldingSetNodeID &ID, const std::string &String) {
+    ID.AddInteger(isString);
+    ID.AddString(String);
+  }
+  virtual void Profile(FoldingSetNodeID &ID) { Profile(ID, String); }
+  
+#ifndef NDEBUG
+  virtual void print(std::ostream &O) {
+    O << "Str: \"" << String << "\"";
+  }
+#endif
+};
+
+//===----------------------------------------------------------------------===//
+/// DIEDwarfLabel - A Dwarf internal label expression DIE.
+//
+class DIEDwarfLabel : public DIEValue {
+public:
+
+  const DWLabel Label;
+  
+  DIEDwarfLabel(const DWLabel &L) : DIEValue(isLabel), Label(L) {}
+
+  // Implement isa/cast/dyncast.
+  static bool classof(const DIEDwarfLabel *)  { return true; }
+  static bool classof(const DIEValue *L) { return L->Type == isLabel; }
+  
+  /// EmitValue - Emit label value.
+  ///
+  virtual void EmitValue(DwarfDebug &DD, unsigned Form);
+  
+  /// SizeOf - Determine size of label value in bytes.
+  ///
+  virtual unsigned SizeOf(const DwarfDebug &DD, unsigned Form) const;
+  
+  /// Profile - Used to gather unique data for the value folding set.
+  ///
+  static void Profile(FoldingSetNodeID &ID, const DWLabel &Label) {
+    ID.AddInteger(isLabel);
+    Label.Profile(ID);
+  }
+  virtual void Profile(FoldingSetNodeID &ID) { Profile(ID, Label); }
+  
+#ifndef NDEBUG
+  virtual void print(std::ostream &O) {
+    O << "Lbl: ";
+    Label.print(O);
+  }
+#endif
+};
+
+
+//===----------------------------------------------------------------------===//
+/// DIEObjectLabel - A label to an object in code or data.
+//
+class DIEObjectLabel : public DIEValue {
+public:
+  const std::string Label;
+  
+  DIEObjectLabel(const std::string &L) : DIEValue(isAsIsLabel), Label(L) {}
+
+  // Implement isa/cast/dyncast.
+  static bool classof(const DIEObjectLabel *) { return true; }
+  static bool classof(const DIEValue *L)    { return L->Type == isAsIsLabel; }
+  
+  /// EmitValue - Emit label value.
+  ///
+  virtual void EmitValue(DwarfDebug &DD, unsigned Form);
+  
+  /// SizeOf - Determine size of label value in bytes.
+  ///
+  virtual unsigned SizeOf(const DwarfDebug &DD, unsigned Form) const;
+  
+  /// Profile - Used to gather unique data for the value folding set.
+  ///
+  static void Profile(FoldingSetNodeID &ID, const std::string &Label) {
+    ID.AddInteger(isAsIsLabel);
+    ID.AddString(Label);
+  }
+  virtual void Profile(FoldingSetNodeID &ID) { Profile(ID, Label); }
+
+#ifndef NDEBUG
+  virtual void print(std::ostream &O) {
+    O << "Obj: " << Label;
+  }
+#endif
+};
+
+//===----------------------------------------------------------------------===//
+/// DIEDelta - A simple label difference DIE.
+/// 
+class DIEDelta : public DIEValue {
+public:
+  const DWLabel LabelHi;
+  const DWLabel LabelLo;
+  
+  DIEDelta(const DWLabel &Hi, const DWLabel &Lo)
+  : DIEValue(isDelta), LabelHi(Hi), LabelLo(Lo) {}
+
+  // Implement isa/cast/dyncast.
+  static bool classof(const DIEDelta *)  { return true; }
+  static bool classof(const DIEValue *D) { return D->Type == isDelta; }
+  
+  /// EmitValue - Emit delta value.
+  ///
+  virtual void EmitValue(DwarfDebug &DD, unsigned Form);
+  
+  /// SizeOf - Determine size of delta value in bytes.
+  ///
+  virtual unsigned SizeOf(const DwarfDebug &DD, unsigned Form) const;
+  
+  /// Profile - Used to gather unique data for the value folding set.
+  ///
+  static void Profile(FoldingSetNodeID &ID, const DWLabel &LabelHi,
+                                            const DWLabel &LabelLo) {
+    ID.AddInteger(isDelta);
+    LabelHi.Profile(ID);
+    LabelLo.Profile(ID);
+  }
+  virtual void Profile(FoldingSetNodeID &ID) { Profile(ID, LabelHi, LabelLo); }
+
+#ifndef NDEBUG
+  virtual void print(std::ostream &O) {
+    O << "Del: ";
+    LabelHi.print(O);
+    O << "-";
+    LabelLo.print(O);
+  }
+#endif
+};
+
+//===----------------------------------------------------------------------===//
+/// DIEntry - A pointer to another debug information entry.  An instance of this
+/// class can also be used as a proxy for a debug information entry not yet
+/// defined (ie. types.)
+class DIEntry : public DIEValue {
+public:
+  DIE *Entry;
+  
+  DIEntry(DIE *E) : DIEValue(isEntry), Entry(E) {}
+  
+  // Implement isa/cast/dyncast.
+  static bool classof(const DIEntry *)   { return true; }
+  static bool classof(const DIEValue *E) { return E->Type == isEntry; }
+  
+  /// EmitValue - Emit debug information entry offset.
+  ///
+  virtual void EmitValue(DwarfDebug &DD, unsigned Form);
+  
+  /// SizeOf - Determine size of debug information entry in bytes.
+  ///
+  virtual unsigned SizeOf(const DwarfDebug &DD, unsigned Form) const {
+    return sizeof(int32_t);
+  }
+  
+  /// Profile - Used to gather unique data for the value folding set.
+  ///
+  static void Profile(FoldingSetNodeID &ID, DIE *Entry) {
+    ID.AddInteger(isEntry);
+    ID.AddPointer(Entry);
+  }
+  virtual void Profile(FoldingSetNodeID &ID) {
+    ID.AddInteger(isEntry);
+    
+    if (Entry) {
+      ID.AddPointer(Entry);
+    } else {
+      ID.AddPointer(this);
+    }
+  }
+  
+#ifndef NDEBUG
+  virtual void print(std::ostream &O) {
+    O << "Die: 0x" << std::hex << (intptr_t)Entry << std::dec;
+  }
+#endif
+};
+
+//===----------------------------------------------------------------------===//
+/// DIEBlock - A block of values.  Primarily used for location expressions.
+//
+class DIEBlock : public DIEValue, public DIE {
+public:
+  unsigned Size;                        // Size in bytes excluding size header.
+  
+  DIEBlock()
+  : DIEValue(isBlock)
+  , DIE(0)
+  , Size(0)
+  {}
+  ~DIEBlock()  {
+  }
+  
+  // Implement isa/cast/dyncast.
+  static bool classof(const DIEBlock *)  { return true; }
+  static bool classof(const DIEValue *E) { return E->Type == isBlock; }
+  
+  /// ComputeSize - calculate the size of the block.
+  ///
+  unsigned ComputeSize(DwarfDebug &DD);
+  
+  /// BestForm - Choose the best form for data.
+  ///
+  unsigned BestForm() const {
+    if ((unsigned char)Size == Size)  return DW_FORM_block1;
+    if ((unsigned short)Size == Size) return DW_FORM_block2;
+    if ((unsigned int)Size == Size)   return DW_FORM_block4;
+    return DW_FORM_block;
+  }
+
+  /// EmitValue - Emit block data.
+  ///
+  virtual void EmitValue(DwarfDebug &DD, unsigned Form);
+  
+  /// SizeOf - Determine size of block data in bytes.
+  ///
+  virtual unsigned SizeOf(const DwarfDebug &DD, unsigned Form) const;
+  
+
+  /// Profile - Used to gather unique data for the value folding set.
+  ///
+  virtual void Profile(FoldingSetNodeID &ID) {
+    ID.AddInteger(isBlock);
+    DIE::Profile(ID);
+  }
+  
+#ifndef NDEBUG
+  virtual void print(std::ostream &O) {
+    O << "Blk: ";
+    DIE::print(O, 5);
+  }
+#endif
+};
+
+//===----------------------------------------------------------------------===//
+/// CompileUnit - This dwarf writer support class manages information associate
+/// with a source file.
+class CompileUnit {
+private:
+  /// Desc - Compile unit debug descriptor.
+  ///
+  CompileUnitDesc *Desc;
+  
+  /// ID - File identifier for source.
+  ///
+  unsigned ID;
+  
+  /// Die - Compile unit debug information entry.
+  ///
+  DIE *Die;
+  
+  /// DescToDieMap - Tracks the mapping of unit level debug informaton
+  /// descriptors to debug information entries.
+  std::map<DebugInfoDesc *, DIE *> DescToDieMap;
+
+  /// DescToDIEntryMap - Tracks the mapping of unit level debug informaton
+  /// descriptors to debug information entries using a DIEntry proxy.
+  std::map<DebugInfoDesc *, DIEntry *> DescToDIEntryMap;
+
+  /// Globals - A map of globally visible named entities for this unit.
+  ///
+  std::map<std::string, DIE *> Globals;
+
+  /// DiesSet - Used to uniquely define dies within the compile unit.
+  ///
+  FoldingSet<DIE> DiesSet;
+  
+  /// Dies - List of all dies in the compile unit.
+  ///
+  std::vector<DIE *> Dies;
+  
+public:
+  CompileUnit(CompileUnitDesc *CUD, unsigned I, DIE *D)
+  : Desc(CUD)
+  , ID(I)
+  , Die(D)
+  , DescToDieMap()
+  , DescToDIEntryMap()
+  , Globals()
+  , DiesSet(InitDiesSetSize)
+  , Dies()
+  {}
+  
+  ~CompileUnit() {
+    delete Die;
+    
+    for (unsigned i = 0, N = Dies.size(); i < N; ++i)
+      delete Dies[i];
+  }
+  
+  // Accessors.
+  CompileUnitDesc *getDesc() const { return Desc; }
+  unsigned getID()           const { return ID; }
+  DIE* getDie()              const { return Die; }
+  std::map<std::string, DIE *> &getGlobals() { return Globals; }
+
+  /// hasContent - Return true if this compile unit has something to write out.
+  ///
+  bool hasContent() const {
+    return !Die->getChildren().empty();
+  }
+
+  /// AddGlobal - Add a new global entity to the compile unit.
+  ///
+  void AddGlobal(const std::string &Name, DIE *Die) {
+    Globals[Name] = Die;
+  }
+  
+  /// getDieMapSlotFor - Returns the debug information entry map slot for the
+  /// specified debug descriptor.
+  DIE *&getDieMapSlotFor(DebugInfoDesc *DID) {
+    return DescToDieMap[DID];
+  }
+  
+  /// getDIEntrySlotFor - Returns the debug information entry proxy slot for the
+  /// specified debug descriptor.
+  DIEntry *&getDIEntrySlotFor(DebugInfoDesc *DID) {
+    return DescToDIEntryMap[DID];
+  }
+  
+  /// AddDie - Adds or interns the DIE to the compile unit.
+  ///
+  DIE *AddDie(DIE &Buffer) {
+    FoldingSetNodeID ID;
+    Buffer.Profile(ID);
+    void *Where;
+    DIE *Die = DiesSet.FindNodeOrInsertPos(ID, Where);
+    
+    if (!Die) {
+      Die = new DIE(Buffer);
+      DiesSet.InsertNode(Die, Where);
+      this->Die->AddChild(Die);
+      Buffer.Detach();
+    }
+    
+    return Die;
+  }
+};
+
+//===----------------------------------------------------------------------===//
+/// Dwarf - Emits general Dwarf directives. 
+///
+class Dwarf {
+
+protected:
+
+  //===--------------------------------------------------------------------===//
+  // Core attributes used by the Dwarf writer.
+  //
+  
+  //
+  /// O - Stream to .s file.
+  ///
+  std::ostream &O;
+
+  /// Asm - Target of Dwarf emission.
+  ///
+  AsmPrinter *Asm;
+  
+  /// TAI - Target Asm Printer.
+  const TargetAsmInfo *TAI;
+  
+  /// TD - Target data.
+  const TargetData *TD;
+  
+  /// RI - Register Information.
+  const MRegisterInfo *RI;
+  
+  /// M - Current module.
+  ///
+  Module *M;
+  
+  /// MF - Current machine function.
+  ///
+  MachineFunction *MF;
+  
+  /// MMI - Collected machine module information.
+  ///
+  MachineModuleInfo *MMI;
+  
+  /// SubprogramCount - The running count of functions being compiled.
+  ///
+  unsigned SubprogramCount;
+
+  unsigned SetCounter;
+  Dwarf(std::ostream &OS, AsmPrinter *A, const TargetAsmInfo *T)
+  : O(OS)
+  , Asm(A)
+  , TAI(T)
+  , TD(Asm->TM.getTargetData())
+  , RI(Asm->TM.getRegisterInfo())
+  , M(NULL)
+  , MF(NULL)
+  , MMI(NULL)
+  , SubprogramCount(0)
+  , SetCounter(1)
+  {
+  }
+
+public:
+
+  //===--------------------------------------------------------------------===//
+  // Accessors.
+  //
+  AsmPrinter *getAsm() const { return Asm; }
+  MachineModuleInfo *getMMI() const { return MMI; }
+  const TargetAsmInfo *getTargetAsmInfo() const { return TAI; }
+
+  /// PrintLabelName - Print label name in form used by Dwarf writer.
+  ///
+  void PrintLabelName(DWLabel Label) const {
+    PrintLabelName(Label.Tag, Label.Number);
+  }
+  void PrintLabelName(const char *Tag, unsigned Number,
+                      bool isInSection = false) const {
+    if (isInSection && TAI->getDwarfSectionOffsetDirective())
+      O << TAI->getDwarfSectionOffsetDirective() << Tag;
+    else
+      O << TAI->getPrivateGlobalPrefix() << Tag;
+    if (Number) O << Number;
+  }
+  
+  /// EmitLabel - Emit location label for internal use by Dwarf.
+  ///
+  void EmitLabel(DWLabel Label) const {
+    EmitLabel(Label.Tag, Label.Number);
+  }
+  void EmitLabel(const char *Tag, unsigned Number) const {
+    PrintLabelName(Tag, Number);
+    O << ":\n";
+  }
+  
+  /// EmitReference - Emit a reference to a label.
+  ///
+  void EmitReference(DWLabel Label, bool IsPCRelative = false) const {
+    EmitReference(Label.Tag, Label.Number, IsPCRelative);
+  }
+  void EmitReference(const char *Tag, unsigned Number,
+                     bool IsPCRelative = false) const {
+    if (TAI->getAddressSize() == sizeof(int32_t))
+      O << TAI->getData32bitsDirective();
+    else
+      O << TAI->getData64bitsDirective();
+      
+    PrintLabelName(Tag, Number);
+    
+    if (IsPCRelative) O << "-" << TAI->getPCSymbol();
+  }
+  void EmitReference(const std::string &Name, bool IsPCRelative = false) const {
+    if (TAI->getAddressSize() == sizeof(int32_t))
+      O << TAI->getData32bitsDirective();
+    else
+      O << TAI->getData64bitsDirective();
+      
+    O << Name;
+    
+    if (IsPCRelative) O << "-" << TAI->getPCSymbol();
+  }
+
+  /// EmitDifference - Emit the difference between two labels.  Some
+  /// assemblers do not behave with absolute expressions with data directives,
+  /// so there is an option (needsSet) to use an intermediary set expression.
+  void EmitDifference(DWLabel LabelHi, DWLabel LabelLo,
+                      bool IsSmall = false) {
+    EmitDifference(LabelHi.Tag, LabelHi.Number,
+                   LabelLo.Tag, LabelLo.Number,
+                   IsSmall);
+  }
+  void EmitDifference(const char *TagHi, unsigned NumberHi,
+                      const char *TagLo, unsigned NumberLo,
+                      bool IsSmall = false) {
+    if (TAI->needsSet()) {
+      O << "\t.set\t";
+      PrintLabelName("set", SetCounter);
+      O << ",";
+      PrintLabelName(TagHi, NumberHi);
+      O << "-";
+      PrintLabelName(TagLo, NumberLo);
+      O << "\n";
+      
+      if (IsSmall || TAI->getAddressSize() == sizeof(int32_t))
+        O << TAI->getData32bitsDirective();
+      else
+        O << TAI->getData64bitsDirective();
+        
+      PrintLabelName("set", SetCounter);
+      
+      ++SetCounter;
+    } else {
+      if (IsSmall || TAI->getAddressSize() == sizeof(int32_t))
+        O << TAI->getData32bitsDirective();
+      else
+        O << TAI->getData64bitsDirective();
+        
+      PrintLabelName(TagHi, NumberHi);
+      O << "-";
+      PrintLabelName(TagLo, NumberLo);
+    }
+  }
+
+  void EmitSectionOffset(const char* Label, const char* Section,
+                         unsigned LabelNumber, unsigned SectionNumber,
+                         bool IsSmall = false, bool isEH = false) {
+    bool printAbsolute = false;
+    if (TAI->needsSet()) {
+      O << "\t.set\t";
+      PrintLabelName("set", SetCounter);
+      O << ",";
+      PrintLabelName(Label, LabelNumber, true);
+
+      if (isEH)
+        printAbsolute = TAI->isAbsoluteEHSectionOffsets();
+      else
+        printAbsolute = TAI->isAbsoluteDebugSectionOffsets();
+      
+      if (!printAbsolute) {
+        O << "-";
+        PrintLabelName(Section, SectionNumber);
+      }      
+      O << "\n";
+      
+      if (IsSmall || TAI->getAddressSize() == sizeof(int32_t))
+        O << TAI->getData32bitsDirective();
+      else
+        O << TAI->getData64bitsDirective();
+        
+      PrintLabelName("set", SetCounter);
+      ++SetCounter;
+    } else {
+      if (IsSmall || TAI->getAddressSize() == sizeof(int32_t))
+        O << TAI->getData32bitsDirective();
+      else
+        O << TAI->getData64bitsDirective();
+        
+      PrintLabelName(Label, LabelNumber, true);
+
+      if (isEH)
+        printAbsolute = TAI->isAbsoluteEHSectionOffsets();
+      else
+        printAbsolute = TAI->isAbsoluteDebugSectionOffsets();
+
+      if (!printAbsolute) {
+        O << "-";
+        PrintLabelName(Section, SectionNumber);
+      }
+    }    
+  }
+  
+  /// EmitFrameMoves - Emit frame instructions to describe the layout of the
+  /// frame.
+  void EmitFrameMoves(const char *BaseLabel, unsigned BaseLabelID,
+                      const std::vector<MachineMove> &Moves) {
+    int stackGrowth =
+        Asm->TM.getFrameInfo()->getStackGrowthDirection() ==
+          TargetFrameInfo::StackGrowsUp ?
+            TAI->getAddressSize() : -TAI->getAddressSize();
+    bool IsLocal = BaseLabel && strcmp(BaseLabel, "label") == 0;
+
+    for (unsigned i = 0, N = Moves.size(); i < N; ++i) {
+      const MachineMove &Move = Moves[i];
+      unsigned LabelID = Move.getLabelID();
+      
+      if (LabelID) {
+        LabelID = MMI->MappedLabel(LabelID);
+      
+        // Throw out move if the label is invalid.
+        if (!LabelID) continue;
+      }
+      
+      const MachineLocation &Dst = Move.getDestination();
+      const MachineLocation &Src = Move.getSource();
+      
+      // Advance row if new location.
+      if (BaseLabel && LabelID && (BaseLabelID != LabelID || !IsLocal)) {
+        Asm->EmitInt8(DW_CFA_advance_loc4);
+        Asm->EOL("DW_CFA_advance_loc4");
+        EmitDifference("label", LabelID, BaseLabel, BaseLabelID, true);
+        Asm->EOL();
+        
+        BaseLabelID = LabelID;
+        BaseLabel = "label";
+        IsLocal = true;
+      }
+      
+      // If advancing cfa.
+      if (Dst.isRegister() && Dst.getRegister() == MachineLocation::VirtualFP) {
+        if (!Src.isRegister()) {
+          if (Src.getRegister() == MachineLocation::VirtualFP) {
+            Asm->EmitInt8(DW_CFA_def_cfa_offset);
+            Asm->EOL("DW_CFA_def_cfa_offset");
+          } else {
+            Asm->EmitInt8(DW_CFA_def_cfa);
+            Asm->EOL("DW_CFA_def_cfa");
+            Asm->EmitULEB128Bytes(RI->getDwarfRegNum(Src.getRegister()));
+            Asm->EOL("Register");
+          }
+          
+          int Offset = -Src.getOffset();
+          
+          Asm->EmitULEB128Bytes(Offset);
+          Asm->EOL("Offset");
+        } else {
+          assert(0 && "Machine move no supported yet.");
+        }
+      } else if (Src.isRegister() &&
+        Src.getRegister() == MachineLocation::VirtualFP) {
+        if (Dst.isRegister()) {
+          Asm->EmitInt8(DW_CFA_def_cfa_register);
+          Asm->EOL("DW_CFA_def_cfa_register");
+          Asm->EmitULEB128Bytes(RI->getDwarfRegNum(Dst.getRegister()));
+          Asm->EOL("Register");
+        } else {
+          assert(0 && "Machine move no supported yet.");
+        }
+      } else {
+        unsigned Reg = RI->getDwarfRegNum(Src.getRegister());
+        int Offset = Dst.getOffset() / stackGrowth;
+        
+        if (Offset < 0) {
+          Asm->EmitInt8(DW_CFA_offset_extended_sf);
+          Asm->EOL("DW_CFA_offset_extended_sf");
+          Asm->EmitULEB128Bytes(Reg);
+          Asm->EOL("Reg");
+          Asm->EmitSLEB128Bytes(Offset);
+          Asm->EOL("Offset");
+        } else if (Reg < 64) {
+          Asm->EmitInt8(DW_CFA_offset + Reg);
+          Asm->EOL("DW_CFA_offset + Reg");
+          Asm->EmitULEB128Bytes(Offset);
+          Asm->EOL("Offset");
+        } else {
+          Asm->EmitInt8(DW_CFA_offset_extended);
+          Asm->EOL("DW_CFA_offset_extended");
+          Asm->EmitULEB128Bytes(Reg);
+          Asm->EOL("Reg");
+          Asm->EmitULEB128Bytes(Offset);
+          Asm->EOL("Offset");
+        }
+      }
+    }
+  }
+
+};
+
+//===----------------------------------------------------------------------===//
+/// DwarfDebug - Emits Dwarf debug directives. 
+///
+class DwarfDebug : public Dwarf {
+
+private:
+  //===--------------------------------------------------------------------===//
+  // Attributes used to construct specific Dwarf sections.
+  //
+  
+  /// CompileUnits - All the compile units involved in this build.  The index
+  /// of each entry in this vector corresponds to the sources in MMI.
+  std::vector<CompileUnit *> CompileUnits;
+  
+  /// AbbreviationsSet - Used to uniquely define abbreviations.
+  ///
+  FoldingSet<DIEAbbrev> AbbreviationsSet;
+
+  /// Abbreviations - A list of all the unique abbreviations in use.
+  ///
+  std::vector<DIEAbbrev *> Abbreviations;
+  
+  /// ValuesSet - Used to uniquely define values.
+  ///
+  FoldingSet<DIEValue> ValuesSet;
+  
+  /// Values - A list of all the unique values in use.
+  ///
+  std::vector<DIEValue *> Values;
+  
+  /// StringPool - A UniqueVector of strings used by indirect references.
+  ///
+  UniqueVector<std::string> StringPool;
+
+  /// UnitMap - Map debug information descriptor to compile unit.
+  ///
+  std::map<DebugInfoDesc *, CompileUnit *> DescToUnitMap;
+  
+  /// SectionMap - Provides a unique id per text section.
+  ///
+  UniqueVector<std::string> SectionMap;
+  
+  /// SectionSourceLines - Tracks line numbers per text section.
+  ///
+  std::vector<std::vector<SourceLineInfo> > SectionSourceLines;
+
+  /// didInitial - Flag to indicate if initial emission has been done.
+  ///
+  bool didInitial;
+  
+  /// shouldEmit - Flag to indicate if debug information should be emitted.
+  ///
+  bool shouldEmit;
+
+  struct FunctionDebugFrameInfo {
+    unsigned Number;
+    std::vector<MachineMove> Moves;
+
+    FunctionDebugFrameInfo(unsigned Num, const std::vector<MachineMove> &M):
+      Number(Num), Moves(M) { };
+  };
+
+  std::vector<FunctionDebugFrameInfo> DebugFrames;
+  
+public:
+  
+  /// ShouldEmitDwarf - Returns true if Dwarf declarations should be made.
+  ///
+  bool ShouldEmitDwarf() const { return shouldEmit; }
+
+  /// AssignAbbrevNumber - Define a unique number for the abbreviation.
+  ///  
+  void AssignAbbrevNumber(DIEAbbrev &Abbrev) {
+    // Profile the node so that we can make it unique.
+    FoldingSetNodeID ID;
+    Abbrev.Profile(ID);
+    
+    // Check the set for priors.
+    DIEAbbrev *InSet = AbbreviationsSet.GetOrInsertNode(&Abbrev);
+    
+    // If it's newly added.
+    if (InSet == &Abbrev) {
+      // Add to abbreviation list. 
+      Abbreviations.push_back(&Abbrev);
+      // Assign the vector position + 1 as its number.
+      Abbrev.setNumber(Abbreviations.size());
+    } else {
+      // Assign existing abbreviation number.
+      Abbrev.setNumber(InSet->getNumber());
+    }
+  }
+
+  /// NewString - Add a string to the constant pool and returns a label.
+  ///
+  DWLabel NewString(const std::string &String) {
+    unsigned StringID = StringPool.insert(String);
+    return DWLabel("string", StringID);
+  }
+  
+  /// NewDIEntry - Creates a new DIEntry to be a proxy for a debug information
+  /// entry.
+  DIEntry *NewDIEntry(DIE *Entry = NULL) {
+    DIEntry *Value;
+    
+    if (Entry) {
+      FoldingSetNodeID ID;
+      DIEntry::Profile(ID, Entry);
+      void *Where;
+      Value = static_cast<DIEntry *>(ValuesSet.FindNodeOrInsertPos(ID, Where));
+      
+      if (Value) return Value;
+      
+      Value = new DIEntry(Entry);
+      ValuesSet.InsertNode(Value, Where);
+    } else {
+      Value = new DIEntry(Entry);
+    }
+    
+    Values.push_back(Value);
+    return Value;
+  }
+  
+  /// SetDIEntry - Set a DIEntry once the debug information entry is defined.
+  ///
+  void SetDIEntry(DIEntry *Value, DIE *Entry) {
+    Value->Entry = Entry;
+    // Add to values set if not already there.  If it is, we merely have a
+    // duplicate in the values list (no harm.)
+    ValuesSet.GetOrInsertNode(Value);
+  }
+
+  /// AddUInt - Add an unsigned integer attribute data and value.
+  ///
+  void AddUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer) {
+    if (!Form) Form = DIEInteger::BestForm(false, Integer);
+
+    FoldingSetNodeID ID;
+    DIEInteger::Profile(ID, Integer);
+    void *Where;
+    DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where);
+    if (!Value) {
+      Value = new DIEInteger(Integer);
+      ValuesSet.InsertNode(Value, Where);
+      Values.push_back(Value);
+    }
+  
+    Die->AddValue(Attribute, Form, Value);
+  }
+      
+  /// AddSInt - Add an signed integer attribute data and value.
+  ///
+  void AddSInt(DIE *Die, unsigned Attribute, unsigned Form, int64_t Integer) {
+    if (!Form) Form = DIEInteger::BestForm(true, Integer);
+
+    FoldingSetNodeID ID;
+    DIEInteger::Profile(ID, (uint64_t)Integer);
+    void *Where;
+    DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where);
+    if (!Value) {
+      Value = new DIEInteger(Integer);
+      ValuesSet.InsertNode(Value, Where);
+      Values.push_back(Value);
+    }
+  
+    Die->AddValue(Attribute, Form, Value);
+  }
+      
+  /// AddString - Add a std::string attribute data and value.
+  ///
+  void AddString(DIE *Die, unsigned Attribute, unsigned Form,
+                 const std::string &String) {
+    FoldingSetNodeID ID;
+    DIEString::Profile(ID, String);
+    void *Where;
+    DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where);
+    if (!Value) {
+      Value = new DIEString(String);
+      ValuesSet.InsertNode(Value, Where);
+      Values.push_back(Value);
+    }
+  
+    Die->AddValue(Attribute, Form, Value);
+  }
+      
+  /// AddLabel - Add a Dwarf label attribute data and value.
+  ///
+  void AddLabel(DIE *Die, unsigned Attribute, unsigned Form,
+                     const DWLabel &Label) {
+    FoldingSetNodeID ID;
+    DIEDwarfLabel::Profile(ID, Label);
+    void *Where;
+    DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where);
+    if (!Value) {
+      Value = new DIEDwarfLabel(Label);
+      ValuesSet.InsertNode(Value, Where);
+      Values.push_back(Value);
+    }
+  
+    Die->AddValue(Attribute, Form, Value);
+  }
+      
+  /// AddObjectLabel - Add an non-Dwarf label attribute data and value.
+  ///
+  void AddObjectLabel(DIE *Die, unsigned Attribute, unsigned Form,
+                      const std::string &Label) {
+    FoldingSetNodeID ID;
+    DIEObjectLabel::Profile(ID, Label);
+    void *Where;
+    DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where);
+    if (!Value) {
+      Value = new DIEObjectLabel(Label);
+      ValuesSet.InsertNode(Value, Where);
+      Values.push_back(Value);
+    }
+  
+    Die->AddValue(Attribute, Form, Value);
+  }
+      
+  /// AddDelta - Add a label delta attribute data and value.
+  ///
+  void AddDelta(DIE *Die, unsigned Attribute, unsigned Form,
+                          const DWLabel &Hi, const DWLabel &Lo) {
+    FoldingSetNodeID ID;
+    DIEDelta::Profile(ID, Hi, Lo);
+    void *Where;
+    DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where);
+    if (!Value) {
+      Value = new DIEDelta(Hi, Lo);
+      ValuesSet.InsertNode(Value, Where);
+      Values.push_back(Value);
+    }
+  
+    Die->AddValue(Attribute, Form, Value);
+  }
+      
+  /// AddDIEntry - Add a DIE attribute data and value.
+  ///
+  void AddDIEntry(DIE *Die, unsigned Attribute, unsigned Form, DIE *Entry) {
+    Die->AddValue(Attribute, Form, NewDIEntry(Entry));
+  }
+
+  /// AddBlock - Add block data.
+  ///
+  void AddBlock(DIE *Die, unsigned Attribute, unsigned Form, DIEBlock *Block) {
+    Block->ComputeSize(*this);
+    FoldingSetNodeID ID;
+    Block->Profile(ID);
+    void *Where;
+    DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where);
+    if (!Value) {
+      Value = Block;
+      ValuesSet.InsertNode(Value, Where);
+      Values.push_back(Value);
+    } else {
+      delete Block;
+    }
+  
+    Die->AddValue(Attribute, Block->BestForm(), Value);
+  }
+
+private:
+
+  /// AddSourceLine - Add location information to specified debug information
+  /// entry.
+  void AddSourceLine(DIE *Die, CompileUnitDesc *File, unsigned Line) {
+    if (File && Line) {
+      CompileUnit *FileUnit = FindCompileUnit(File);
+      unsigned FileID = FileUnit->getID();
+      AddUInt(Die, DW_AT_decl_file, 0, FileID);
+      AddUInt(Die, DW_AT_decl_line, 0, Line);
+    }
+  }
+
+  /// AddAddress - Add an address attribute to a die based on the location
+  /// provided.
+  void AddAddress(DIE *Die, unsigned Attribute,
+                            const MachineLocation &Location) {
+    unsigned Reg = RI->getDwarfRegNum(Location.getRegister());
+    DIEBlock *Block = new DIEBlock();
+    
+    if (Location.isRegister()) {
+      if (Reg < 32) {
+        AddUInt(Block, 0, DW_FORM_data1, DW_OP_reg0 + Reg);
+      } else {
+        AddUInt(Block, 0, DW_FORM_data1, DW_OP_regx);
+        AddUInt(Block, 0, DW_FORM_udata, Reg);
+      }
+    } else {
+      if (Reg < 32) {
+        AddUInt(Block, 0, DW_FORM_data1, DW_OP_breg0 + Reg);
+      } else {
+        AddUInt(Block, 0, DW_FORM_data1, DW_OP_bregx);
+        AddUInt(Block, 0, DW_FORM_udata, Reg);
+      }
+      AddUInt(Block, 0, DW_FORM_sdata, Location.getOffset());
+    }
+    
+    AddBlock(Die, Attribute, 0, Block);
+  }
+  
+  /// AddBasicType - Add a new basic type attribute to the specified entity.
+  ///
+  void AddBasicType(DIE *Entity, CompileUnit *Unit,
+                    const std::string &Name,
+                    unsigned Encoding, unsigned Size) {
+    DIE *Die = ConstructBasicType(Unit, Name, Encoding, Size);
+    AddDIEntry(Entity, DW_AT_type, DW_FORM_ref4, Die);
+  }
+  
+  /// ConstructBasicType - Construct a new basic type.
+  ///
+  DIE *ConstructBasicType(CompileUnit *Unit,
+                          const std::string &Name,
+                          unsigned Encoding, unsigned Size) {
+    DIE Buffer(DW_TAG_base_type);
+    AddUInt(&Buffer, DW_AT_byte_size, 0, Size);
+    AddUInt(&Buffer, DW_AT_encoding, DW_FORM_data1, Encoding);
+    if (!Name.empty()) AddString(&Buffer, DW_AT_name, DW_FORM_string, Name);
+    return Unit->AddDie(Buffer);
+  }
+  
+  /// AddPointerType - Add a new pointer type attribute to the specified entity.
+  ///
+  void AddPointerType(DIE *Entity, CompileUnit *Unit, const std::string &Name) {
+    DIE *Die = ConstructPointerType(Unit, Name);
+    AddDIEntry(Entity, DW_AT_type, DW_FORM_ref4, Die);
+  }
+  
+  /// ConstructPointerType - Construct a new pointer type.
+  ///
+  DIE *ConstructPointerType(CompileUnit *Unit, const std::string &Name) {
+    DIE Buffer(DW_TAG_pointer_type);
+    AddUInt(&Buffer, DW_AT_byte_size, 0, TAI->getAddressSize());
+    if (!Name.empty()) AddString(&Buffer, DW_AT_name, DW_FORM_string, Name);
+    return Unit->AddDie(Buffer);
+  }
+  
+  /// AddType - Add a new type attribute to the specified entity.
+  ///
+  void AddType(DIE *Entity, TypeDesc *TyDesc, CompileUnit *Unit) {
+    if (!TyDesc) {
+      AddBasicType(Entity, Unit, "", DW_ATE_signed, sizeof(int32_t));
+    } else {
+      // Check for pre-existence.
+      DIEntry *&Slot = Unit->getDIEntrySlotFor(TyDesc);
+      
+      // If it exists then use the existing value.
+      if (Slot) {
+        Entity->AddValue(DW_AT_type, DW_FORM_ref4, Slot);
+        return;
+      }
+      
+      if (SubprogramDesc *SubprogramTy = dyn_cast<SubprogramDesc>(TyDesc)) {
+        // FIXME - Not sure why programs and variables are coming through here.
+        // Short cut for handling subprogram types (not really a TyDesc.)
+        AddPointerType(Entity, Unit, SubprogramTy->getName());
+      } else if (GlobalVariableDesc *GlobalTy =
+                                         dyn_cast<GlobalVariableDesc>(TyDesc)) {
+        // FIXME - Not sure why programs and variables are coming through here.
+        // Short cut for handling global variable types (not really a TyDesc.)
+        AddPointerType(Entity, Unit, GlobalTy->getName());
+      } else {  
+        // Set up proxy.
+        Slot = NewDIEntry();
+        
+        // Construct type.
+        DIE Buffer(DW_TAG_base_type);
+        ConstructType(Buffer, TyDesc, Unit);
+        
+        // Add debug information entry to entity and unit.
+        DIE *Die = Unit->AddDie(Buffer);
+        SetDIEntry(Slot, Die);
+        Entity->AddValue(DW_AT_type, DW_FORM_ref4, Slot);
+      }
+    }
+  }
+  
+  /// ConstructType - Adds all the required attributes to the type.
+  ///
+  void ConstructType(DIE &Buffer, TypeDesc *TyDesc, CompileUnit *Unit) {
+    // Get core information.
+    const std::string &Name = TyDesc->getName();
+    uint64_t Size = TyDesc->getSize() >> 3;
+    
+    if (BasicTypeDesc *BasicTy = dyn_cast<BasicTypeDesc>(TyDesc)) {
+      // Fundamental types like int, float, bool
+      Buffer.setTag(DW_TAG_base_type);
+      AddUInt(&Buffer, DW_AT_encoding,  DW_FORM_data1, BasicTy->getEncoding());
+    } else if (DerivedTypeDesc *DerivedTy = dyn_cast<DerivedTypeDesc>(TyDesc)) {
+      // Fetch tag.
+      unsigned Tag = DerivedTy->getTag();
+      // FIXME - Workaround for templates.
+      if (Tag == DW_TAG_inheritance) Tag = DW_TAG_reference_type;
+      // Pointers, typedefs et al. 
+      Buffer.setTag(Tag);
+      // Map to main type, void will not have a type.
+      if (TypeDesc *FromTy = DerivedTy->getFromType())
+        AddType(&Buffer, FromTy, Unit);
+    } else if (CompositeTypeDesc *CompTy = dyn_cast<CompositeTypeDesc>(TyDesc)){
+      // Fetch tag.
+      unsigned Tag = CompTy->getTag();
+      
+      // Set tag accordingly.
+      if (Tag == DW_TAG_vector_type)
+        Buffer.setTag(DW_TAG_array_type);
+      else 
+        Buffer.setTag(Tag);
+
+      std::vector<DebugInfoDesc *> &Elements = CompTy->getElements();
+      
+      switch (Tag) {
+      case DW_TAG_vector_type:
+        AddUInt(&Buffer, DW_AT_GNU_vector, DW_FORM_flag, 1);
+        // Fall thru
+      case DW_TAG_array_type: {
+        // Add element type.
+        if (TypeDesc *FromTy = CompTy->getFromType())
+          AddType(&Buffer, FromTy, Unit);
+        
+        // Don't emit size attribute.
+        Size = 0;
+        
+        // Construct an anonymous type for index type.
+        DIE *IndexTy = ConstructBasicType(Unit, "", DW_ATE_signed,
+                                          sizeof(int32_t));
+      
+        // Add subranges to array type.
+        for(unsigned i = 0, N = Elements.size(); i < N; ++i) {
+          SubrangeDesc *SRD = cast<SubrangeDesc>(Elements[i]);
+          int64_t Lo = SRD->getLo();
+          int64_t Hi = SRD->getHi();
+          DIE *Subrange = new DIE(DW_TAG_subrange_type);
+          
+          // If a range is available.
+          if (Lo != Hi) {
+            AddDIEntry(Subrange, DW_AT_type, DW_FORM_ref4, IndexTy);
+            // Only add low if non-zero.
+            if (Lo) AddSInt(Subrange, DW_AT_lower_bound, 0, Lo);
+            AddSInt(Subrange, DW_AT_upper_bound, 0, Hi);
+          }
+          
+          Buffer.AddChild(Subrange);
+        }
+        break;
+      }
+      case DW_TAG_structure_type:
+      case DW_TAG_union_type: {
+        // Add elements to structure type.
+        for(unsigned i = 0, N = Elements.size(); i < N; ++i) {
+          DebugInfoDesc *Element = Elements[i];
+          
+          if (DerivedTypeDesc *MemberDesc = dyn_cast<DerivedTypeDesc>(Element)){
+            // Add field or base class.
+            
+            unsigned Tag = MemberDesc->getTag();
+          
+            // Extract the basic information.
+            const std::string &Name = MemberDesc->getName();
+            uint64_t Size = MemberDesc->getSize();
+            uint64_t Align = MemberDesc->getAlign();
+            uint64_t Offset = MemberDesc->getOffset();
+       
+            // Construct member debug information entry.
+            DIE *Member = new DIE(Tag);
+            
+            // Add name if not "".
+            if (!Name.empty())
+              AddString(Member, DW_AT_name, DW_FORM_string, Name);
+            // Add location if available.
+            AddSourceLine(Member, MemberDesc->getFile(), MemberDesc->getLine());
+            
+            // Most of the time the field info is the same as the members.
+            uint64_t FieldSize = Size;
+            uint64_t FieldAlign = Align;
+            uint64_t FieldOffset = Offset;
+            
+            // Set the member type.
+            TypeDesc *FromTy = MemberDesc->getFromType();
+            AddType(Member, FromTy, Unit);
+            
+            // Walk up typedefs until a real size is found.
+            while (FromTy) {
+              if (FromTy->getTag() != DW_TAG_typedef) {
+                FieldSize = FromTy->getSize();
+                FieldAlign = FromTy->getSize();
+                break;
+              }
+              
+              FromTy = dyn_cast<DerivedTypeDesc>(FromTy)->getFromType();
+            }
+            
+            // Unless we have a bit field.
+            if (Tag == DW_TAG_member && FieldSize != Size) {
+              // Construct the alignment mask.
+              uint64_t AlignMask = ~(FieldAlign - 1);
+              // Determine the high bit + 1 of the declared size.
+              uint64_t HiMark = (Offset + FieldSize) & AlignMask;
+              // Work backwards to determine the base offset of the field.
+              FieldOffset = HiMark - FieldSize;
+              // Now normalize offset to the field.
+              Offset -= FieldOffset;
+              
+              // Maybe we need to work from the other end.
+              if (TD->isLittleEndian()) Offset = FieldSize - (Offset + Size);
+              
+              // Add size and offset.
+              AddUInt(Member, DW_AT_byte_size, 0, FieldSize >> 3);
+              AddUInt(Member, DW_AT_bit_size, 0, Size);
+              AddUInt(Member, DW_AT_bit_offset, 0, Offset);
+            }
+            
+            // Add computation for offset.
+            DIEBlock *Block = new DIEBlock();
+            AddUInt(Block, 0, DW_FORM_data1, DW_OP_plus_uconst);
+            AddUInt(Block, 0, DW_FORM_udata, FieldOffset >> 3);
+            AddBlock(Member, DW_AT_data_member_location, 0, Block);
+
+            // Add accessibility (public default unless is base class.
+            if (MemberDesc->isProtected()) {
+              AddUInt(Member, DW_AT_accessibility, 0, DW_ACCESS_protected);
+            } else if (MemberDesc->isPrivate()) {
+              AddUInt(Member, DW_AT_accessibility, 0, DW_ACCESS_private);
+            } else if (Tag == DW_TAG_inheritance) {
+              AddUInt(Member, DW_AT_accessibility, 0, DW_ACCESS_public);
+            }
+            
+            Buffer.AddChild(Member);
+          } else if (GlobalVariableDesc *StaticDesc =
+                                        dyn_cast<GlobalVariableDesc>(Element)) {
+            // Add static member.
+            
+            // Construct member debug information entry.
+            DIE *Static = new DIE(DW_TAG_variable);
+            
+            // Add name and mangled name.
+            const std::string &Name = StaticDesc->getName();
+            const std::string &LinkageName = StaticDesc->getLinkageName();
+            AddString(Static, DW_AT_name, DW_FORM_string, Name);
+            if (!LinkageName.empty()) {
+              AddString(Static, DW_AT_MIPS_linkage_name, DW_FORM_string,
+                                LinkageName);
+            }
+            
+            // Add location.
+            AddSourceLine(Static, StaticDesc->getFile(), StaticDesc->getLine());
+           
+            // Add type.
+            if (TypeDesc *StaticTy = StaticDesc->getType())
+              AddType(Static, StaticTy, Unit);
+            
+            // Add flags.
+            if (!StaticDesc->isStatic())
+              AddUInt(Static, DW_AT_external, DW_FORM_flag, 1);
+            AddUInt(Static, DW_AT_declaration, DW_FORM_flag, 1);
+            
+            Buffer.AddChild(Static);
+          } else if (SubprogramDesc *MethodDesc =
+                                            dyn_cast<SubprogramDesc>(Element)) {
+            // Add member function.
+            
+            // Construct member debug information entry.
+            DIE *Method = new DIE(DW_TAG_subprogram);
+           
+            // Add name and mangled name.
+            const std::string &Name = MethodDesc->getName();
+            const std::string &LinkageName = MethodDesc->getLinkageName();
+            
+            AddString(Method, DW_AT_name, DW_FORM_string, Name);            
+            bool IsCTor = TyDesc->getName() == Name;
+            
+            if (!LinkageName.empty()) {
+              AddString(Method, DW_AT_MIPS_linkage_name, DW_FORM_string,
+                                LinkageName);
+            }
+            
+            // Add location.
+            AddSourceLine(Method, MethodDesc->getFile(), MethodDesc->getLine());
+           
+            // Add type.
+            if (CompositeTypeDesc *MethodTy =
+                   dyn_cast_or_null<CompositeTypeDesc>(MethodDesc->getType())) {
+              // Get argument information.
+              std::vector<DebugInfoDesc *> &Args = MethodTy->getElements();
+             
+              // If not a ctor.
+              if (!IsCTor) {
+                // Add return type.
+                AddType(Method, dyn_cast<TypeDesc>(Args[0]), Unit);
+              }
+              
+              // Add arguments.
+              for(unsigned i = 1, N = Args.size(); i < N; ++i) {
+                DIE *Arg = new DIE(DW_TAG_formal_parameter);
+                AddType(Arg, cast<TypeDesc>(Args[i]), Unit);
+                AddUInt(Arg, DW_AT_artificial, DW_FORM_flag, 1);
+                Method->AddChild(Arg);
+              }
+            }
+
+            // Add flags.
+            if (!MethodDesc->isStatic())
+              AddUInt(Method, DW_AT_external, DW_FORM_flag, 1);
+            AddUInt(Method, DW_AT_declaration, DW_FORM_flag, 1);
+              
+            Buffer.AddChild(Method);
+          }
+        }
+        break;
+      }
+      case DW_TAG_enumeration_type: {
+        // Add enumerators to enumeration type.
+        for(unsigned i = 0, N = Elements.size(); i < N; ++i) {
+          EnumeratorDesc *ED = cast<EnumeratorDesc>(Elements[i]);
+          const std::string &Name = ED->getName();
+          int64_t Value = ED->getValue();
+          DIE *Enumerator = new DIE(DW_TAG_enumerator);
+          AddString(Enumerator, DW_AT_name, DW_FORM_string, Name);
+          AddSInt(Enumerator, DW_AT_const_value, DW_FORM_sdata, Value);
+          Buffer.AddChild(Enumerator);
+        }
+
+        break;
+      }
+      case DW_TAG_subroutine_type: {
+        // Add prototype flag.
+        AddUInt(&Buffer, DW_AT_prototyped, DW_FORM_flag, 1);
+        // Add return type.
+        AddType(&Buffer, dyn_cast<TypeDesc>(Elements[0]), Unit);
+        
+        // Add arguments.
+        for(unsigned i = 1, N = Elements.size(); i < N; ++i) {
+          DIE *Arg = new DIE(DW_TAG_formal_parameter);
+          AddType(Arg, cast<TypeDesc>(Elements[i]), Unit);
+          Buffer.AddChild(Arg);
+        }
+        
+        break;
+      }
+      default: break;
+      }
+    }
+   
+    // Add size if non-zero (derived types don't have a size.)
+    if (Size) AddUInt(&Buffer, DW_AT_byte_size, 0, Size);
+    // Add name if not anonymous or intermediate type.
+    if (!Name.empty()) AddString(&Buffer, DW_AT_name, DW_FORM_string, Name);
+    // Add source line info if available.
+    AddSourceLine(&Buffer, TyDesc->getFile(), TyDesc->getLine());
+  }
+
+  /// NewCompileUnit - Create new compile unit and it's debug information entry.
+  ///
+  CompileUnit *NewCompileUnit(CompileUnitDesc *UnitDesc, unsigned ID) {
+    // Construct debug information entry.
+    DIE *Die = new DIE(DW_TAG_compile_unit);
+    if (TAI->isAbsoluteDebugSectionOffsets())
+      AddLabel(Die, DW_AT_stmt_list, DW_FORM_data4, DWLabel("section_line", 0));
+    else
+      AddDelta(Die, DW_AT_stmt_list, DW_FORM_data4, DWLabel("section_line", 0),
+               DWLabel("section_line", 0));      
+    AddString(Die, DW_AT_producer,  DW_FORM_string, UnitDesc->getProducer());
+    AddUInt  (Die, DW_AT_language,  DW_FORM_data1,  UnitDesc->getLanguage());
+    AddString(Die, DW_AT_name,      DW_FORM_string, UnitDesc->getFileName());
+    AddString(Die, DW_AT_comp_dir,  DW_FORM_string, UnitDesc->getDirectory());
+    
+    // Construct compile unit.
+    CompileUnit *Unit = new CompileUnit(UnitDesc, ID, Die);
+    
+    // Add Unit to compile unit map.
+    DescToUnitMap[UnitDesc] = Unit;
+    
+    return Unit;
+  }
+
+  /// GetBaseCompileUnit - Get the main compile unit.
+  ///
+  CompileUnit *GetBaseCompileUnit() const {
+    CompileUnit *Unit = CompileUnits[0];
+    assert(Unit && "Missing compile unit.");
+    return Unit;
+  }
+
+  /// FindCompileUnit - Get the compile unit for the given descriptor.
+  ///
+  CompileUnit *FindCompileUnit(CompileUnitDesc *UnitDesc) {
+    CompileUnit *Unit = DescToUnitMap[UnitDesc];
+    assert(Unit && "Missing compile unit.");
+    return Unit;
+  }
+
+  /// NewGlobalVariable - Add a new global variable DIE.
+  ///
+  DIE *NewGlobalVariable(GlobalVariableDesc *GVD) {
+    // Get the compile unit context.
+    CompileUnitDesc *UnitDesc =
+      static_cast<CompileUnitDesc *>(GVD->getContext());
+    CompileUnit *Unit = GetBaseCompileUnit();
+
+    // Check for pre-existence.
+    DIE *&Slot = Unit->getDieMapSlotFor(GVD);
+    if (Slot) return Slot;
+    
+    // Get the global variable itself.
+    GlobalVariable *GV = GVD->getGlobalVariable();
+
+    const std::string &Name = GVD->getName();
+    const std::string &FullName = GVD->getFullName();
+    const std::string &LinkageName = GVD->getLinkageName();
+    // Create the global's variable DIE.
+    DIE *VariableDie = new DIE(DW_TAG_variable);
+    AddString(VariableDie, DW_AT_name, DW_FORM_string, Name);
+    if (!LinkageName.empty()) {
+      AddString(VariableDie, DW_AT_MIPS_linkage_name, DW_FORM_string,
+                             LinkageName);
+    }
+    AddType(VariableDie, GVD->getType(), Unit);
+    if (!GVD->isStatic())
+      AddUInt(VariableDie, DW_AT_external, DW_FORM_flag, 1);
+    
+    // Add source line info if available.
+    AddSourceLine(VariableDie, UnitDesc, GVD->getLine());
+    
+    // Add address.
+    DIEBlock *Block = new DIEBlock();
+    AddUInt(Block, 0, DW_FORM_data1, DW_OP_addr);
+    AddObjectLabel(Block, 0, DW_FORM_udata, Asm->getGlobalLinkName(GV));
+    AddBlock(VariableDie, DW_AT_location, 0, Block);
+    
+    // Add to map.
+    Slot = VariableDie;
+   
+    // Add to context owner.
+    Unit->getDie()->AddChild(VariableDie);
+    
+    // Expose as global.
+    // FIXME - need to check external flag.
+    Unit->AddGlobal(FullName, VariableDie);
+    
+    return VariableDie;
+  }
+
+  /// NewSubprogram - Add a new subprogram DIE.
+  ///
+  DIE *NewSubprogram(SubprogramDesc *SPD) {
+    // Get the compile unit context.
+    CompileUnitDesc *UnitDesc =
+      static_cast<CompileUnitDesc *>(SPD->getContext());
+    CompileUnit *Unit = GetBaseCompileUnit();
+
+    // Check for pre-existence.
+    DIE *&Slot = Unit->getDieMapSlotFor(SPD);
+    if (Slot) return Slot;
+    
+    // Gather the details (simplify add attribute code.)
+    const std::string &Name = SPD->getName();
+    const std::string &FullName = SPD->getFullName();
+    const std::string &LinkageName = SPD->getLinkageName();
+                                      
+    DIE *SubprogramDie = new DIE(DW_TAG_subprogram);
+    AddString(SubprogramDie, DW_AT_name, DW_FORM_string, Name);
+    if (!LinkageName.empty()) {
+      AddString(SubprogramDie, DW_AT_MIPS_linkage_name, DW_FORM_string,
+                               LinkageName);
+    }
+    if (SPD->getType()) AddType(SubprogramDie, SPD->getType(), Unit);
+    if (!SPD->isStatic())
+      AddUInt(SubprogramDie, DW_AT_external, DW_FORM_flag, 1);
+    AddUInt(SubprogramDie, DW_AT_prototyped, DW_FORM_flag, 1);
+    
+    // Add source line info if available.
+    AddSourceLine(SubprogramDie, UnitDesc, SPD->getLine());
+
+    // Add to map.
+    Slot = SubprogramDie;
+   
+    // Add to context owner.
+    Unit->getDie()->AddChild(SubprogramDie);
+    
+    // Expose as global.
+    Unit->AddGlobal(FullName, SubprogramDie);
+    
+    return SubprogramDie;
+  }
+
+  /// NewScopeVariable - Create a new scope variable.
+  ///
+  DIE *NewScopeVariable(DebugVariable *DV, CompileUnit *Unit) {
+    // Get the descriptor.
+    VariableDesc *VD = DV->getDesc();
+
+    // Translate tag to proper Dwarf tag.  The result variable is dropped for
+    // now.
+    unsigned Tag;
+    switch (VD->getTag()) {
+    case DW_TAG_return_variable:  return NULL;
+    case DW_TAG_arg_variable:     Tag = DW_TAG_formal_parameter; break;
+    case DW_TAG_auto_variable:    // fall thru
+    default:                      Tag = DW_TAG_variable; break;
+    }
+
+    // Define variable debug information entry.
+    DIE *VariableDie = new DIE(Tag);
+    AddString(VariableDie, DW_AT_name, DW_FORM_string, VD->getName());
+
+    // Add source line info if available.
+    AddSourceLine(VariableDie, VD->getFile(), VD->getLine());
+    
+    // Add variable type.
+    AddType(VariableDie, VD->getType(), Unit); 
+    
+    // Add variable address.
+    MachineLocation Location;
+    RI->getLocation(*MF, DV->getFrameIndex(), Location);
+    AddAddress(VariableDie, DW_AT_location, Location);
+
+    return VariableDie;
+  }
+
+  /// ConstructScope - Construct the components of a scope.
+  ///
+  void ConstructScope(DebugScope *ParentScope,
+                      unsigned ParentStartID, unsigned ParentEndID,
+                      DIE *ParentDie, CompileUnit *Unit) {
+    // Add variables to scope.
+    std::vector<DebugVariable *> &Variables = ParentScope->getVariables();
+    for (unsigned i = 0, N = Variables.size(); i < N; ++i) {
+      DIE *VariableDie = NewScopeVariable(Variables[i], Unit);
+      if (VariableDie) ParentDie->AddChild(VariableDie);
+    }
+    
+    // Add nested scopes.
+    std::vector<DebugScope *> &Scopes = ParentScope->getScopes();
+    for (unsigned j = 0, M = Scopes.size(); j < M; ++j) {
+      // Define the Scope debug information entry.
+      DebugScope *Scope = Scopes[j];
+      // FIXME - Ignore inlined functions for the time being.
+      if (!Scope->getParent()) continue;
+      
+      unsigned StartID = MMI->MappedLabel(Scope->getStartLabelID());
+      unsigned EndID = MMI->MappedLabel(Scope->getEndLabelID());
+
+      // Ignore empty scopes.
+      if (StartID == EndID && StartID != 0) continue;
+      if (Scope->getScopes().empty() && Scope->getVariables().empty()) continue;
+      
+      if (StartID == ParentStartID && EndID == ParentEndID) {
+        // Just add stuff to the parent scope.
+        ConstructScope(Scope, ParentStartID, ParentEndID, ParentDie, Unit);
+      } else {
+        DIE *ScopeDie = new DIE(DW_TAG_lexical_block);
+        
+        // Add the scope bounds.
+        if (StartID) {
+          AddLabel(ScopeDie, DW_AT_low_pc, DW_FORM_addr,
+                             DWLabel("label", StartID));
+        } else {
+          AddLabel(ScopeDie, DW_AT_low_pc, DW_FORM_addr,
+                             DWLabel("func_begin", SubprogramCount));
+        }
+        if (EndID) {
+          AddLabel(ScopeDie, DW_AT_high_pc, DW_FORM_addr,
+                             DWLabel("label", EndID));
+        } else {
+          AddLabel(ScopeDie, DW_AT_high_pc, DW_FORM_addr,
+                             DWLabel("func_end", SubprogramCount));
+        }
+                           
+        // Add the scope contents.
+        ConstructScope(Scope, StartID, EndID, ScopeDie, Unit);
+        ParentDie->AddChild(ScopeDie);
+      }
+    }
+  }
+
+  /// ConstructRootScope - Construct the scope for the subprogram.
+  ///
+  void ConstructRootScope(DebugScope *RootScope) {
+    // Exit if there is no root scope.
+    if (!RootScope) return;
+    
+    // Get the subprogram debug information entry. 
+    SubprogramDesc *SPD = cast<SubprogramDesc>(RootScope->getDesc());
+    
+    // Get the compile unit context.
+    CompileUnit *Unit = GetBaseCompileUnit();
+    
+    // Get the subprogram die.
+    DIE *SPDie = Unit->getDieMapSlotFor(SPD);
+    assert(SPDie && "Missing subprogram descriptor");
+    
+    // Add the function bounds.
+    AddLabel(SPDie, DW_AT_low_pc, DW_FORM_addr,
+                    DWLabel("func_begin", SubprogramCount));
+    AddLabel(SPDie, DW_AT_high_pc, DW_FORM_addr,
+                    DWLabel("func_end", SubprogramCount));
+    MachineLocation Location(RI->getFrameRegister(*MF));
+    AddAddress(SPDie, DW_AT_frame_base, Location);
+
+    ConstructScope(RootScope, 0, 0, SPDie, Unit);
+  }
+
+  /// EmitInitial - Emit initial Dwarf declarations.  This is necessary for cc
+  /// tools to recognize the object file contains Dwarf information.
+  void EmitInitial() {
+    // Check to see if we already emitted intial headers.
+    if (didInitial) return;
+    didInitial = true;
+    
+    // Dwarf sections base addresses.
+    if (TAI->doesDwarfRequireFrameSection()) {
+      Asm->SwitchToDataSection(TAI->getDwarfFrameSection());
+      EmitLabel("section_debug_frame", 0);
+    }
+    Asm->SwitchToDataSection(TAI->getDwarfInfoSection());
+    EmitLabel("section_info", 0);
+    Asm->SwitchToDataSection(TAI->getDwarfAbbrevSection());
+    EmitLabel("section_abbrev", 0);
+    Asm->SwitchToDataSection(TAI->getDwarfARangesSection());
+    EmitLabel("section_aranges", 0);
+    Asm->SwitchToDataSection(TAI->getDwarfMacInfoSection());
+    EmitLabel("section_macinfo", 0);
+    Asm->SwitchToDataSection(TAI->getDwarfLineSection());
+    EmitLabel("section_line", 0);
+    Asm->SwitchToDataSection(TAI->getDwarfLocSection());
+    EmitLabel("section_loc", 0);
+    Asm->SwitchToDataSection(TAI->getDwarfPubNamesSection());
+    EmitLabel("section_pubnames", 0);
+    Asm->SwitchToDataSection(TAI->getDwarfStrSection());
+    EmitLabel("section_str", 0);
+    Asm->SwitchToDataSection(TAI->getDwarfRangesSection());
+    EmitLabel("section_ranges", 0);
+
+    Asm->SwitchToTextSection(TAI->getTextSection());
+    EmitLabel("text_begin", 0);
+    Asm->SwitchToDataSection(TAI->getDataSection());
+    EmitLabel("data_begin", 0);
+  }
+
+  /// EmitDIE - Recusively Emits a debug information entry.
+  ///
+  void EmitDIE(DIE *Die) {
+    // Get the abbreviation for this DIE.
+    unsigned AbbrevNumber = Die->getAbbrevNumber();
+    const DIEAbbrev *Abbrev = Abbreviations[AbbrevNumber - 1];
+    
+    Asm->EOL();
+
+    // Emit the code (index) for the abbreviation.
+    Asm->EmitULEB128Bytes(AbbrevNumber);
+    Asm->EOL(std::string("Abbrev [" +
+             utostr(AbbrevNumber) +
+             "] 0x" + utohexstr(Die->getOffset()) +
+             ":0x" + utohexstr(Die->getSize()) + " " +
+             TagString(Abbrev->getTag())));
+    
+    std::vector<DIEValue *> &Values = Die->getValues();
+    const std::vector<DIEAbbrevData> &AbbrevData = Abbrev->getData();
+    
+    // Emit the DIE attribute values.
+    for (unsigned i = 0, N = Values.size(); i < N; ++i) {
+      unsigned Attr = AbbrevData[i].getAttribute();
+      unsigned Form = AbbrevData[i].getForm();
+      assert(Form && "Too many attributes for DIE (check abbreviation)");
+      
+      switch (Attr) {
+      case DW_AT_sibling: {
+        Asm->EmitInt32(Die->SiblingOffset());
+        break;
+      }
+      default: {
+        // Emit an attribute using the defined form.
+        Values[i]->EmitValue(*this, Form);
+        break;
+      }
+      }
+      
+      Asm->EOL(AttributeString(Attr));
+    }
+    
+    // Emit the DIE children if any.
+    if (Abbrev->getChildrenFlag() == DW_CHILDREN_yes) {
+      const std::vector<DIE *> &Children = Die->getChildren();
+      
+      for (unsigned j = 0, M = Children.size(); j < M; ++j) {
+        EmitDIE(Children[j]);
+      }
+      
+      Asm->EmitInt8(0); Asm->EOL("End Of Children Mark");
+    }
+  }
+
+  /// SizeAndOffsetDie - Compute the size and offset of a DIE.
+  ///
+  unsigned SizeAndOffsetDie(DIE *Die, unsigned Offset, bool Last) {
+    // Get the children.
+    const std::vector<DIE *> &Children = Die->getChildren();
+    
+    // If not last sibling and has children then add sibling offset attribute.
+    if (!Last && !Children.empty()) Die->AddSiblingOffset();
+
+    // Record the abbreviation.
+    AssignAbbrevNumber(Die->getAbbrev());
+   
+    // Get the abbreviation for this DIE.
+    unsigned AbbrevNumber = Die->getAbbrevNumber();
+    const DIEAbbrev *Abbrev = Abbreviations[AbbrevNumber - 1];
+
+    // Set DIE offset
+    Die->setOffset(Offset);
+    
+    // Start the size with the size of abbreviation code.
+    Offset += Asm->SizeULEB128(AbbrevNumber);
+    
+    const std::vector<DIEValue *> &Values = Die->getValues();
+    const std::vector<DIEAbbrevData> &AbbrevData = Abbrev->getData();
+
+    // Size the DIE attribute values.
+    for (unsigned i = 0, N = Values.size(); i < N; ++i) {
+      // Size attribute value.
+      Offset += Values[i]->SizeOf(*this, AbbrevData[i].getForm());
+    }
+    
+    // Size the DIE children if any.
+    if (!Children.empty()) {
+      assert(Abbrev->getChildrenFlag() == DW_CHILDREN_yes &&
+             "Children flag not set");
+      
+      for (unsigned j = 0, M = Children.size(); j < M; ++j) {
+        Offset = SizeAndOffsetDie(Children[j], Offset, (j + 1) == M);
+      }
+      
+      // End of children marker.
+      Offset += sizeof(int8_t);
+    }
+
+    Die->setSize(Offset - Die->getOffset());
+    return Offset;
+  }
+
+  /// SizeAndOffsets - Compute the size and offset of all the DIEs.
+  ///
+  void SizeAndOffsets() {
+    // Process base compile unit.
+    CompileUnit *Unit = GetBaseCompileUnit();
+    // Compute size of compile unit header
+    unsigned Offset = sizeof(int32_t) + // Length of Compilation Unit Info
+                      sizeof(int16_t) + // DWARF version number
+                      sizeof(int32_t) + // Offset Into Abbrev. Section
+                      sizeof(int8_t);   // Pointer Size (in bytes)
+    SizeAndOffsetDie(Unit->getDie(), Offset, true);
+  }
+
+  /// EmitDebugInfo - Emit the debug info section.
+  ///
+  void EmitDebugInfo() {
+    // Start debug info section.
+    Asm->SwitchToDataSection(TAI->getDwarfInfoSection());
+    
+    CompileUnit *Unit = GetBaseCompileUnit();
+    DIE *Die = Unit->getDie();
+    // Emit the compile units header.
+    EmitLabel("info_begin", Unit->getID());
+    // Emit size of content not including length itself
+    unsigned ContentSize = Die->getSize() +
+                           sizeof(int16_t) + // DWARF version number
+                           sizeof(int32_t) + // Offset Into Abbrev. Section
+                           sizeof(int8_t) +  // Pointer Size (in bytes)
+                           sizeof(int32_t);  // FIXME - extra pad for gdb bug.
+                           
+    Asm->EmitInt32(ContentSize);  Asm->EOL("Length of Compilation Unit Info");
+    Asm->EmitInt16(DWARF_VERSION); Asm->EOL("DWARF version number");
+    EmitSectionOffset("abbrev_begin", "section_abbrev", 0, 0, true, false);
+    Asm->EOL("Offset Into Abbrev. Section");
+    Asm->EmitInt8(TAI->getAddressSize()); Asm->EOL("Address Size (in bytes)");
+  
+    EmitDIE(Die);
+    // FIXME - extra padding for gdb bug.
+    Asm->EmitInt8(0); Asm->EOL("Extra Pad For GDB");
+    Asm->EmitInt8(0); Asm->EOL("Extra Pad For GDB");
+    Asm->EmitInt8(0); Asm->EOL("Extra Pad For GDB");
+    Asm->EmitInt8(0); Asm->EOL("Extra Pad For GDB");
+    EmitLabel("info_end", Unit->getID());
+    
+    Asm->EOL();
+  }
+
+  /// EmitAbbreviations - Emit the abbreviation section.
+  ///
+  void EmitAbbreviations() const {
+    // Check to see if it is worth the effort.
+    if (!Abbreviations.empty()) {
+      // Start the debug abbrev section.
+      Asm->SwitchToDataSection(TAI->getDwarfAbbrevSection());
+      
+      EmitLabel("abbrev_begin", 0);
+      
+      // For each abbrevation.
+      for (unsigned i = 0, N = Abbreviations.size(); i < N; ++i) {
+        // Get abbreviation data
+        const DIEAbbrev *Abbrev = Abbreviations[i];
+        
+        // Emit the abbrevations code (base 1 index.)
+        Asm->EmitULEB128Bytes(Abbrev->getNumber());
+        Asm->EOL("Abbreviation Code");
+        
+        // Emit the abbreviations data.
+        Abbrev->Emit(*this);
+    
+        Asm->EOL();
+      }
+      
+      // Mark end of abbreviations.
+      Asm->EmitULEB128Bytes(0); Asm->EOL("EOM(3)");
+
+      EmitLabel("abbrev_end", 0);
+    
+      Asm->EOL();
+    }
+  }
+
+  /// EmitDebugLines - Emit source line information.
+  ///
+  void EmitDebugLines() {
+    // Minimum line delta, thus ranging from -10..(255-10).
+    const int MinLineDelta = -(DW_LNS_fixed_advance_pc + 1);
+    // Maximum line delta, thus ranging from -10..(255-10).
+    const int MaxLineDelta = 255 + MinLineDelta;
+
+    // Start the dwarf line section.
+    Asm->SwitchToDataSection(TAI->getDwarfLineSection());
+    
+    // Construct the section header.
+    
+    EmitDifference("line_end", 0, "line_begin", 0, true);
+    Asm->EOL("Length of Source Line Info");
+    EmitLabel("line_begin", 0);
+    
+    Asm->EmitInt16(DWARF_VERSION); Asm->EOL("DWARF version number");
+    
+    EmitDifference("line_prolog_end", 0, "line_prolog_begin", 0, true);
+    Asm->EOL("Prolog Length");
+    EmitLabel("line_prolog_begin", 0);
+    
+    Asm->EmitInt8(1); Asm->EOL("Minimum Instruction Length");
+
+    Asm->EmitInt8(1); Asm->EOL("Default is_stmt_start flag");
+
+    Asm->EmitInt8(MinLineDelta); Asm->EOL("Line Base Value (Special Opcodes)");
+    
+    Asm->EmitInt8(MaxLineDelta); Asm->EOL("Line Range Value (Special Opcodes)");
+
+    Asm->EmitInt8(-MinLineDelta); Asm->EOL("Special Opcode Base");
+    
+    // Line number standard opcode encodings argument count
+    Asm->EmitInt8(0); Asm->EOL("DW_LNS_copy arg count");
+    Asm->EmitInt8(1); Asm->EOL("DW_LNS_advance_pc arg count");
+    Asm->EmitInt8(1); Asm->EOL("DW_LNS_advance_line arg count");
+    Asm->EmitInt8(1); Asm->EOL("DW_LNS_set_file arg count");
+    Asm->EmitInt8(1); Asm->EOL("DW_LNS_set_column arg count");
+    Asm->EmitInt8(0); Asm->EOL("DW_LNS_negate_stmt arg count");
+    Asm->EmitInt8(0); Asm->EOL("DW_LNS_set_basic_block arg count");
+    Asm->EmitInt8(0); Asm->EOL("DW_LNS_const_add_pc arg count");
+    Asm->EmitInt8(1); Asm->EOL("DW_LNS_fixed_advance_pc arg count");
+
+    const UniqueVector<std::string> &Directories = MMI->getDirectories();
+    const UniqueVector<SourceFileInfo>
+      &SourceFiles = MMI->getSourceFiles();
+
+    // Emit directories.
+    for (unsigned DirectoryID = 1, NDID = Directories.size();
+                  DirectoryID <= NDID; ++DirectoryID) {
+      Asm->EmitString(Directories[DirectoryID]); Asm->EOL("Directory");
+    }
+    Asm->EmitInt8(0); Asm->EOL("End of directories");
+    
+    // Emit files.
+    for (unsigned SourceID = 1, NSID = SourceFiles.size();
+                 SourceID <= NSID; ++SourceID) {
+      const SourceFileInfo &SourceFile = SourceFiles[SourceID];
+      Asm->EmitString(SourceFile.getName());
+      Asm->EOL("Source");
+      Asm->EmitULEB128Bytes(SourceFile.getDirectoryID());
+      Asm->EOL("Directory #");
+      Asm->EmitULEB128Bytes(0);
+      Asm->EOL("Mod date");
+      Asm->EmitULEB128Bytes(0);
+      Asm->EOL("File size");
+    }
+    Asm->EmitInt8(0); Asm->EOL("End of files");
+    
+    EmitLabel("line_prolog_end", 0);
+    
+    // A sequence for each text section.
+    for (unsigned j = 0, M = SectionSourceLines.size(); j < M; ++j) {
+      // Isolate current sections line info.
+      const std::vector<SourceLineInfo> &LineInfos = SectionSourceLines[j];
+      
+      Asm->EOL(std::string("Section ") + SectionMap[j + 1]);
+
+      // Dwarf assumes we start with first line of first source file.
+      unsigned Source = 1;
+      unsigned Line = 1;
+      
+      // Construct rows of the address, source, line, column matrix.
+      for (unsigned i = 0, N = LineInfos.size(); i < N; ++i) {
+        const SourceLineInfo &LineInfo = LineInfos[i];
+        unsigned LabelID = MMI->MappedLabel(LineInfo.getLabelID());
+        if (!LabelID) continue;
+        
+        unsigned SourceID = LineInfo.getSourceID();
+        const SourceFileInfo &SourceFile = SourceFiles[SourceID];
+        unsigned DirectoryID = SourceFile.getDirectoryID();
+        Asm->EOL(Directories[DirectoryID]
+          + SourceFile.getName()
+          + ":"
+          + utostr_32(LineInfo.getLine()));
+
+        // Define the line address.
+        Asm->EmitInt8(0); Asm->EOL("Extended Op");
+        Asm->EmitInt8(TAI->getAddressSize() + 1); Asm->EOL("Op size");
+        Asm->EmitInt8(DW_LNE_set_address); Asm->EOL("DW_LNE_set_address");
+        EmitReference("label",  LabelID); Asm->EOL("Location label");
+        
+        // If change of source, then switch to the new source.
+        if (Source != LineInfo.getSourceID()) {
+          Source = LineInfo.getSourceID();
+          Asm->EmitInt8(DW_LNS_set_file); Asm->EOL("DW_LNS_set_file");
+          Asm->EmitULEB128Bytes(Source); Asm->EOL("New Source");
+        }
+        
+        // If change of line.
+        if (Line != LineInfo.getLine()) {
+          // Determine offset.
+          int Offset = LineInfo.getLine() - Line;
+          int Delta = Offset - MinLineDelta;
+          
+          // Update line.
+          Line = LineInfo.getLine();
+          
+          // If delta is small enough and in range...
+          if (Delta >= 0 && Delta < (MaxLineDelta - 1)) {
+            // ... then use fast opcode.
+            Asm->EmitInt8(Delta - MinLineDelta); Asm->EOL("Line Delta");
+          } else {
+            // ... otherwise use long hand.
+            Asm->EmitInt8(DW_LNS_advance_line); Asm->EOL("DW_LNS_advance_line");
+            Asm->EmitSLEB128Bytes(Offset); Asm->EOL("Line Offset");
+            Asm->EmitInt8(DW_LNS_copy); Asm->EOL("DW_LNS_copy");
+          }
+        } else {
+          // Copy the previous row (different address or source)
+          Asm->EmitInt8(DW_LNS_copy); Asm->EOL("DW_LNS_copy");
+        }
+      }
+
+      // Define last address of section.
+      Asm->EmitInt8(0); Asm->EOL("Extended Op");
+      Asm->EmitInt8(TAI->getAddressSize() + 1); Asm->EOL("Op size");
+      Asm->EmitInt8(DW_LNE_set_address); Asm->EOL("DW_LNE_set_address");
+      EmitReference("section_end", j + 1); Asm->EOL("Section end label");
+
+      // Mark end of matrix.
+      Asm->EmitInt8(0); Asm->EOL("DW_LNE_end_sequence");
+      Asm->EmitULEB128Bytes(1); Asm->EOL();
+      Asm->EmitInt8(1); Asm->EOL();
+    }
+    
+    EmitLabel("line_end", 0);
+    
+    Asm->EOL();
+  }
+    
+  /// EmitCommonDebugFrame - Emit common frame info into a debug frame section.
+  ///
+  void EmitCommonDebugFrame() {
+    if (!TAI->doesDwarfRequireFrameSection())
+      return;
+
+    int stackGrowth =
+        Asm->TM.getFrameInfo()->getStackGrowthDirection() ==
+          TargetFrameInfo::StackGrowsUp ?
+        TAI->getAddressSize() : -TAI->getAddressSize();
+
+    // Start the dwarf frame section.
+    Asm->SwitchToDataSection(TAI->getDwarfFrameSection());
+
+    EmitLabel("debug_frame_common", 0);
+    EmitDifference("debug_frame_common_end", 0,
+                   "debug_frame_common_begin", 0, true);
+    Asm->EOL("Length of Common Information Entry");
+
+    EmitLabel("debug_frame_common_begin", 0);
+    Asm->EmitInt32((int)DW_CIE_ID);
+    Asm->EOL("CIE Identifier Tag");
+    Asm->EmitInt8(DW_CIE_VERSION);
+    Asm->EOL("CIE Version");
+    Asm->EmitString("");
+    Asm->EOL("CIE Augmentation");
+    Asm->EmitULEB128Bytes(1);
+    Asm->EOL("CIE Code Alignment Factor");
+    Asm->EmitSLEB128Bytes(stackGrowth);
+    Asm->EOL("CIE Data Alignment Factor");   
+    Asm->EmitInt8(RI->getDwarfRegNum(RI->getRARegister()));
+    Asm->EOL("CIE RA Column");
+    
+    std::vector<MachineMove> Moves;
+    RI->getInitialFrameState(Moves);
+
+    EmitFrameMoves(NULL, 0, Moves);
+
+    Asm->EmitAlignment(2);
+    EmitLabel("debug_frame_common_end", 0);
+    
+    Asm->EOL();
+  }
+
+  /// EmitFunctionDebugFrame - Emit per function frame info into a debug frame
+  /// section.
+  void EmitFunctionDebugFrame(const FunctionDebugFrameInfo &DebugFrameInfo) {
+    if (!TAI->doesDwarfRequireFrameSection())
+      return;
+       
+    // Start the dwarf frame section.
+    Asm->SwitchToDataSection(TAI->getDwarfFrameSection());
+    
+    EmitDifference("debug_frame_end", DebugFrameInfo.Number,
+                   "debug_frame_begin", DebugFrameInfo.Number, true);
+    Asm->EOL("Length of Frame Information Entry");
+    
+    EmitLabel("debug_frame_begin", DebugFrameInfo.Number);
+
+    EmitSectionOffset("debug_frame_common", "section_debug_frame",
+                      0, 0, true, false);
+    Asm->EOL("FDE CIE offset");
+
+    EmitReference("func_begin", DebugFrameInfo.Number);
+    Asm->EOL("FDE initial location");
+    EmitDifference("func_end", DebugFrameInfo.Number,
+                   "func_begin", DebugFrameInfo.Number);
+    Asm->EOL("FDE address range");
+    
+    EmitFrameMoves("func_begin", DebugFrameInfo.Number, DebugFrameInfo.Moves);
+    
+    Asm->EmitAlignment(2);
+    EmitLabel("debug_frame_end", DebugFrameInfo.Number);
+
+    Asm->EOL();
+  }
+
+  /// EmitDebugPubNames - Emit visible names into a debug pubnames section.
+  ///
+  void EmitDebugPubNames() {
+    // Start the dwarf pubnames section.
+    Asm->SwitchToDataSection(TAI->getDwarfPubNamesSection());
+      
+    CompileUnit *Unit = GetBaseCompileUnit(); 
+ 
+    EmitDifference("pubnames_end", Unit->getID(),
+                   "pubnames_begin", Unit->getID(), true);
+    Asm->EOL("Length of Public Names Info");
+    
+    EmitLabel("pubnames_begin", Unit->getID());
+    
+    Asm->EmitInt16(DWARF_VERSION); Asm->EOL("DWARF Version");
+
+    EmitSectionOffset("info_begin", "section_info",
+                      Unit->getID(), 0, true, false);
+    Asm->EOL("Offset of Compilation Unit Info");
+
+    EmitDifference("info_end", Unit->getID(), "info_begin", Unit->getID(),true);
+    Asm->EOL("Compilation Unit Length");
+    
+    std::map<std::string, DIE *> &Globals = Unit->getGlobals();
+    
+    for (std::map<std::string, DIE *>::iterator GI = Globals.begin(),
+                                                GE = Globals.end();
+         GI != GE; ++GI) {
+      const std::string &Name = GI->first;
+      DIE * Entity = GI->second;
+      
+      Asm->EmitInt32(Entity->getOffset()); Asm->EOL("DIE offset");
+      Asm->EmitString(Name); Asm->EOL("External Name");
+    }
+  
+    Asm->EmitInt32(0); Asm->EOL("End Mark");
+    EmitLabel("pubnames_end", Unit->getID());
+  
+    Asm->EOL();
+  }
+
+  /// EmitDebugStr - Emit visible names into a debug str section.
+  ///
+  void EmitDebugStr() {
+    // Check to see if it is worth the effort.
+    if (!StringPool.empty()) {
+      // Start the dwarf str section.
+      Asm->SwitchToDataSection(TAI->getDwarfStrSection());
+      
+      // For each of strings in the string pool.
+      for (unsigned StringID = 1, N = StringPool.size();
+           StringID <= N; ++StringID) {
+        // Emit a label for reference from debug information entries.
+        EmitLabel("string", StringID);
+        // Emit the string itself.
+        const std::string &String = StringPool[StringID];
+        Asm->EmitString(String); Asm->EOL();
+      }
+    
+      Asm->EOL();
+    }
+  }
+
+  /// EmitDebugLoc - Emit visible names into a debug loc section.
+  ///
+  void EmitDebugLoc() {
+    // Start the dwarf loc section.
+    Asm->SwitchToDataSection(TAI->getDwarfLocSection());
+    
+    Asm->EOL();
+  }
+
+  /// EmitDebugARanges - Emit visible names into a debug aranges section.
+  ///
+  void EmitDebugARanges() {
+    // Start the dwarf aranges section.
+    Asm->SwitchToDataSection(TAI->getDwarfARangesSection());
+    
+    // FIXME - Mock up
+  #if 0
+    CompileUnit *Unit = GetBaseCompileUnit(); 
+      
+    // Don't include size of length
+    Asm->EmitInt32(0x1c); Asm->EOL("Length of Address Ranges Info");
+    
+    Asm->EmitInt16(DWARF_VERSION); Asm->EOL("Dwarf Version");
+    
+    EmitReference("info_begin", Unit->getID());
+    Asm->EOL("Offset of Compilation Unit Info");
+
+    Asm->EmitInt8(TAI->getAddressSize()); Asm->EOL("Size of Address");
+
+    Asm->EmitInt8(0); Asm->EOL("Size of Segment Descriptor");
+
+    Asm->EmitInt16(0);  Asm->EOL("Pad (1)");
+    Asm->EmitInt16(0);  Asm->EOL("Pad (2)");
+
+    // Range 1
+    EmitReference("text_begin", 0); Asm->EOL("Address");
+    EmitDifference("text_end", 0, "text_begin", 0, true); Asm->EOL("Length");
+
+    Asm->EmitInt32(0); Asm->EOL("EOM (1)");
+    Asm->EmitInt32(0); Asm->EOL("EOM (2)");
+    
+    Asm->EOL();
+  #endif
+  }
+
+  /// EmitDebugRanges - Emit visible names into a debug ranges section.
+  ///
+  void EmitDebugRanges() {
+    // Start the dwarf ranges section.
+    Asm->SwitchToDataSection(TAI->getDwarfRangesSection());
+    
+    Asm->EOL();
+  }
+
+  /// EmitDebugMacInfo - Emit visible names into a debug macinfo section.
+  ///
+  void EmitDebugMacInfo() {
+    // Start the dwarf macinfo section.
+    Asm->SwitchToDataSection(TAI->getDwarfMacInfoSection());
+    
+    Asm->EOL();
+  }
+
+  /// ConstructCompileUnitDIEs - Create a compile unit DIE for each source and
+  /// header file.
+  void ConstructCompileUnitDIEs() {
+    const UniqueVector<CompileUnitDesc *> CUW = MMI->getCompileUnits();
+    
+    for (unsigned i = 1, N = CUW.size(); i <= N; ++i) {
+      unsigned ID = MMI->RecordSource(CUW[i]);
+      CompileUnit *Unit = NewCompileUnit(CUW[i], ID);
+      CompileUnits.push_back(Unit);
+    }
+  }
+
+  /// ConstructGlobalDIEs - Create DIEs for each of the externally visible
+  /// global variables.
+  void ConstructGlobalDIEs() {
+    std::vector<GlobalVariableDesc *> GlobalVariables =
+        MMI->getAnchoredDescriptors<GlobalVariableDesc>(*M);
+    
+    for (unsigned i = 0, N = GlobalVariables.size(); i < N; ++i) {
+      GlobalVariableDesc *GVD = GlobalVariables[i];
+      NewGlobalVariable(GVD);
+    }
+  }
+
+  /// ConstructSubprogramDIEs - Create DIEs for each of the externally visible
+  /// subprograms.
+  void ConstructSubprogramDIEs() {
+    std::vector<SubprogramDesc *> Subprograms =
+        MMI->getAnchoredDescriptors<SubprogramDesc>(*M);
+    
+    for (unsigned i = 0, N = Subprograms.size(); i < N; ++i) {
+      SubprogramDesc *SPD = Subprograms[i];
+      NewSubprogram(SPD);
+    }
+  }
+
+public:
+  //===--------------------------------------------------------------------===//
+  // Main entry points.
+  //
+  DwarfDebug(std::ostream &OS, AsmPrinter *A, const TargetAsmInfo *T)
+  : Dwarf(OS, A, T)
+  , CompileUnits()
+  , AbbreviationsSet(InitAbbreviationsSetSize)
+  , Abbreviations()
+  , ValuesSet(InitValuesSetSize)
+  , Values()
+  , StringPool()
+  , DescToUnitMap()
+  , SectionMap()
+  , SectionSourceLines()
+  , didInitial(false)
+  , shouldEmit(false)
+  {
+  }
+  virtual ~DwarfDebug() {
+    for (unsigned i = 0, N = CompileUnits.size(); i < N; ++i)
+      delete CompileUnits[i];
+    for (unsigned j = 0, M = Values.size(); j < M; ++j)
+      delete Values[j];
+  }
+
+  /// SetModuleInfo - Set machine module information when it's known that pass
+  /// manager has created it.  Set by the target AsmPrinter.
+  void SetModuleInfo(MachineModuleInfo *mmi) {
+    // Make sure initial declarations are made.
+    if (!MMI && mmi->hasDebugInfo()) {
+      MMI = mmi;
+      shouldEmit = true;
+      
+      // Emit initial sections
+      EmitInitial();
+    
+      // Create all the compile unit DIEs.
+      ConstructCompileUnitDIEs();
+      
+      // Create DIEs for each of the externally visible global variables.
+      ConstructGlobalDIEs();
+
+      // Create DIEs for each of the externally visible subprograms.
+      ConstructSubprogramDIEs();
+      
+      // Prime section data.
+      SectionMap.insert(TAI->getTextSection());
+    }
+  }
+
+  /// BeginModule - Emit all Dwarf sections that should come prior to the
+  /// content.
+  void BeginModule(Module *M) {
+    this->M = M;
+    
+    if (!ShouldEmitDwarf()) return;
+  }
+
+  /// EndModule - Emit all Dwarf sections that should come after the content.
+  ///
+  void EndModule() {
+    if (!ShouldEmitDwarf()) return;
+    
+    // Standard sections final addresses.
+    Asm->SwitchToTextSection(TAI->getTextSection());
+    EmitLabel("text_end", 0);
+    Asm->SwitchToDataSection(TAI->getDataSection());
+    EmitLabel("data_end", 0);
+    
+    // End text sections.
+    for (unsigned i = 1, N = SectionMap.size(); i <= N; ++i) {
+      Asm->SwitchToTextSection(SectionMap[i].c_str());
+      EmitLabel("section_end", i);
+    }
+
+    // Emit common frame information.
+    EmitCommonDebugFrame();
+
+    // Emit function debug frame information
+    for (std::vector<FunctionDebugFrameInfo>::iterator I = DebugFrames.begin(),
+           E = DebugFrames.end(); I != E; ++I)
+      EmitFunctionDebugFrame(*I);
+
+    // Compute DIE offsets and sizes.
+    SizeAndOffsets();
+    
+    // Emit all the DIEs into a debug info section
+    EmitDebugInfo();
+    
+    // Corresponding abbreviations into a abbrev section.
+    EmitAbbreviations();
+    
+    // Emit source line correspondence into a debug line section.
+    EmitDebugLines();
+    
+    // Emit info into a debug pubnames section.
+    EmitDebugPubNames();
+    
+    // Emit info into a debug str section.
+    EmitDebugStr();
+    
+    // Emit info into a debug loc section.
+    EmitDebugLoc();
+    
+    // Emit info into a debug aranges section.
+    EmitDebugARanges();
+    
+    // Emit info into a debug ranges section.
+    EmitDebugRanges();
+    
+    // Emit info into a debug macinfo section.
+    EmitDebugMacInfo();
+  }
+
+  /// BeginFunction - Gather pre-function debug information.  Assumes being 
+  /// emitted immediately after the function entry point.
+  void BeginFunction(MachineFunction *MF) {
+    this->MF = MF;
+    
+    if (!ShouldEmitDwarf()) return;
+
+    // Begin accumulating function debug information.
+    MMI->BeginFunction(MF);
+    
+    // Assumes in correct section after the entry point.
+    EmitLabel("func_begin", ++SubprogramCount);
+  }
+  
+  /// EndFunction - Gather and emit post-function debug information.
+  ///
+  void EndFunction() {
+    if (!ShouldEmitDwarf()) return;
+    
+    // Define end label for subprogram.
+    EmitLabel("func_end", SubprogramCount);
+      
+    // Get function line info.
+    const std::vector<SourceLineInfo> &LineInfos = MMI->getSourceLines();
+
+    if (!LineInfos.empty()) {
+      // Get section line info.
+      unsigned ID = SectionMap.insert(Asm->CurrentSection);
+      if (SectionSourceLines.size() < ID) SectionSourceLines.resize(ID);
+      std::vector<SourceLineInfo> &SectionLineInfos = SectionSourceLines[ID-1];
+      // Append the function info to section info.
+      SectionLineInfos.insert(SectionLineInfos.end(),
+                              LineInfos.begin(), LineInfos.end());
+    }
+    
+    // Construct scopes for subprogram.
+    ConstructRootScope(MMI->getRootScope());
+
+    DebugFrames.push_back(FunctionDebugFrameInfo(SubprogramCount,
+                                                 MMI->getFrameMoves()));
+  }
+};
+
+//===----------------------------------------------------------------------===//
+/// DwarfException - Emits Dwarf exception handling directives. 
+///
+class DwarfException : public Dwarf  {
+
+private:
+  struct FunctionEHFrameInfo {
+    std::string FnName;
+    unsigned Number;
+    unsigned PersonalityIndex;
+    bool hasCalls;
+    bool hasLandingPads;
+    std::vector<MachineMove> Moves;
+
+    FunctionEHFrameInfo(const std::string &FN, unsigned Num, unsigned P,
+                        bool hC, bool hL,
+                        const std::vector<MachineMove> &M):
+      FnName(FN), Number(Num), PersonalityIndex(P),
+      hasCalls(hC), hasLandingPads(hL), Moves(M) { };
+  };
+
+  std::vector<FunctionEHFrameInfo> EHFrames;
+    
+  /// shouldEmit - Flag to indicate if debug information should be emitted.
+  ///
+  bool shouldEmit;
+  
+  /// EmitCommonEHFrame - Emit the common eh unwind frame.
+  ///
+  void EmitCommonEHFrame(const Function *Personality, unsigned Index) {
+    // Size and sign of stack growth.
+    int stackGrowth =
+        Asm->TM.getFrameInfo()->getStackGrowthDirection() ==
+          TargetFrameInfo::StackGrowsUp ?
+        TAI->getAddressSize() : -TAI->getAddressSize();
+
+    // Begin eh frame section.
+    Asm->SwitchToTextSection(TAI->getDwarfEHFrameSection());
+    O << "EH_frame" << Index << ":\n";
+    EmitLabel("section_eh_frame", Index);
+
+    // Define base labels.
+    EmitLabel("eh_frame_common", Index);
+    
+    // Define the eh frame length.
+    EmitDifference("eh_frame_common_end", Index,
+                   "eh_frame_common_begin", Index, true);
+    Asm->EOL("Length of Common Information Entry");
+
+    // EH frame header.
+    EmitLabel("eh_frame_common_begin", Index);
+    Asm->EmitInt32((int)0);
+    Asm->EOL("CIE Identifier Tag");
+    Asm->EmitInt8(DW_CIE_VERSION);
+    Asm->EOL("CIE Version");
+    
+    // The personality presence indicates that language specific information
+    // will show up in the eh frame.
+    Asm->EmitString(Personality ? "zPLR" : "zR");
+    Asm->EOL("CIE Augmentation");
+    
+    // Round out reader.
+    Asm->EmitULEB128Bytes(1);
+    Asm->EOL("CIE Code Alignment Factor");
+    Asm->EmitSLEB128Bytes(stackGrowth);
+    Asm->EOL("CIE Data Alignment Factor");   
+    Asm->EmitInt8(RI->getDwarfRegNum(RI->getRARegister()));
+    Asm->EOL("CIE RA Column");
+    
+    // If there is a personality, we need to indicate the functions location.
+    if (Personality) {
+      Asm->EmitULEB128Bytes(7);
+      Asm->EOL("Augmentation Size");
+      Asm->EmitInt8(DW_EH_PE_pcrel | DW_EH_PE_sdata4);
+      Asm->EOL("Personality (pcrel sdata4)");
+      
+      O << TAI->getData32bitsDirective();
+      Asm->EmitExternalGlobal((const GlobalVariable *)(Personality));
+      O << "-" << TAI->getPCSymbol();
+      Asm->EOL("Personality");
+      
+      Asm->EmitULEB128Bytes(DW_EH_PE_pcrel);
+      Asm->EOL("LSDA Encoding (pcrel)");
+      Asm->EmitULEB128Bytes(DW_EH_PE_pcrel);
+      Asm->EOL("FDE Encoding (pcrel)");
+   } else {
+      Asm->EmitULEB128Bytes(1);
+      Asm->EOL("Augmentation Size");
+      Asm->EmitULEB128Bytes(DW_EH_PE_pcrel);
+      Asm->EOL("FDE Encoding (pcrel)");
+    }
+
+    // Indicate locations of general callee saved registers in frame.
+    std::vector<MachineMove> Moves;
+    RI->getInitialFrameState(Moves);
+    EmitFrameMoves(NULL, 0, Moves);
+
+    Asm->EmitAlignment(2);
+    EmitLabel("eh_frame_common_end", Index);
+    
+    Asm->EOL();
+  }
+  
+  /// EmitEHFrame - Emit function exception frame information.
+  ///
+  void EmitEHFrame(const FunctionEHFrameInfo &EHFrameInfo) {
+    Asm->SwitchToTextSection(TAI->getDwarfEHFrameSection());
+
+    // Externally visible entry into the functions eh frame info.
+    if (const char *GlobalDirective = TAI->getGlobalDirective())
+      O << GlobalDirective << EHFrameInfo.FnName << ".eh\n";
+    
+    // If there are no calls then you can't unwind.
+    if (!EHFrameInfo.hasCalls) { 
+      O << EHFrameInfo.FnName << ".eh = 0\n";
+    } else {
+      O << EHFrameInfo.FnName << ".eh:\n";
+      
+      // EH frame header.
+      EmitDifference("eh_frame_end", EHFrameInfo.Number,
+                     "eh_frame_begin", EHFrameInfo.Number, true);
+      Asm->EOL("Length of Frame Information Entry");
+      
+      EmitLabel("eh_frame_begin", EHFrameInfo.Number);
+
+      EmitSectionOffset("eh_frame_begin", "eh_frame_common",
+                        EHFrameInfo.Number, EHFrameInfo.PersonalityIndex,
+                        true, true);
+      Asm->EOL("FDE CIE offset");
+
+      EmitReference("eh_func_begin", EHFrameInfo.Number, true);
+      Asm->EOL("FDE initial location");
+      EmitDifference("eh_func_end", EHFrameInfo.Number,
+                     "eh_func_begin", EHFrameInfo.Number);
+      Asm->EOL("FDE address range");
+      
+      // If there is a personality and landing pads then point to the language
+      // specific data area in the exception table.
+      if (EHFrameInfo.PersonalityIndex) {
+        Asm->EmitULEB128Bytes(4);
+        Asm->EOL("Augmentation size");
+        
+        if (EHFrameInfo.hasLandingPads) {
+          EmitReference("exception", EHFrameInfo.Number, true);
+        } else if(TAI->getAddressSize() == 8) {
+          Asm->EmitInt64((int)0);
+        } else {
+          Asm->EmitInt32((int)0);
+        }
+        Asm->EOL("Language Specific Data Area");
+      } else {
+        Asm->EmitULEB128Bytes(0);
+        Asm->EOL("Augmentation size");
+      }
+      
+      // Indicate locations of function specific  callee saved registers in
+      // frame.
+      EmitFrameMoves("eh_func_begin", EHFrameInfo.Number, EHFrameInfo.Moves);
+      
+      Asm->EmitAlignment(2);
+      EmitLabel("eh_frame_end", EHFrameInfo.Number);
+    }
+    
+    if (const char *UsedDirective = TAI->getUsedDirective())
+      O << UsedDirective << EHFrameInfo.FnName << ".eh\n\n";
+  }
+
+  /// EmitExceptionTable - Emit landpads and actions.
+  ///
+  /// The general organization of the table is complex, but the basic concepts
+  /// are easy.  First there is a header which describes the location and
+  /// organization of the three components that follow.
+  ///  1. The landing pad site information describes the range of code covered
+  ///     by the try.  In our case it's an accumulation of the ranges covered
+  ///     by the invokes in the try.  There is also a reference to the landing
+  ///     pad that handles the exception once processed.  Finally an index into
+  ///     the actions table.
+  ///  2. The action table, in our case, is composed of pairs of type ids
+  ///     and next action offset.  Starting with the action index from the
+  ///     landing pad site, each type Id is checked for a match to the current
+  ///     exception.  If it matches then the exception and type id are passed
+  ///     on to the landing pad.  Otherwise the next action is looked up.  This
+  ///     chain is terminated with a next action of zero.  If no type id is
+  ///     found the the frame is unwound and handling continues.
+  ///  3. Type id table contains references to all the C++ typeinfo for all
+  ///     catches in the function.  This tables is reversed indexed base 1.
+
+  /// SharedTypeIds - How many leading type ids two landing pads have in common.
+  static unsigned SharedTypeIds(const LandingPadInfo *L,
+                                const LandingPadInfo *R) {
+    const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds;
+    unsigned LSize = LIds.size(), RSize = RIds.size();
+    unsigned MinSize = LSize < RSize ? LSize : RSize;
+    unsigned Count = 0;
+
+    for (; Count != MinSize; ++Count)
+      if (LIds[Count] != RIds[Count])
+        return Count;
+
+    return Count;
+  }
+
+  /// PadLT - Order landing pads lexicographically by type id.
+  static bool PadLT(const LandingPadInfo *L, const LandingPadInfo *R) {
+    const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds;
+    unsigned LSize = LIds.size(), RSize = RIds.size();
+    unsigned MinSize = LSize < RSize ? LSize : RSize;
+
+    for (unsigned i = 0; i != MinSize; ++i)
+      if (LIds[i] != RIds[i])
+        return LIds[i] < RIds[i];
+
+    return LSize < RSize;
+  }
+
+  struct KeyInfo {
+    static inline unsigned getEmptyKey() { return -1U; }
+    static inline unsigned getTombstoneKey() { return -2U; }
+    static unsigned getHashValue(const unsigned &Key) { return Key; }
+    static bool isPod() { return true; }
+  };
+
+  struct PadSite {
+    unsigned PadIndex;
+    unsigned SiteIndex;
+  };
+
+  typedef DenseMap<unsigned, PadSite, KeyInfo> PadMapType;
+
+  struct ActionEntry {
+    int ValueForTypeID; // The value to write - may not be equal to the type id.
+    int NextAction;
+    struct ActionEntry *Previous;
+  };
+
+  void EmitExceptionTable() {
+    // Map all labels and get rid of any dead landing pads.
+    MMI->TidyLandingPads();
+
+    const std::vector<GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
+    const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
+    const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
+    if (PadInfos.empty()) return;
+
+    // Sort the landing pads in order of their type ids.  This is used to fold
+    // duplicate actions.
+    SmallVector<const LandingPadInfo *, 64> LandingPads;
+    LandingPads.reserve(PadInfos.size());
+    for (unsigned i = 0, N = PadInfos.size(); i != N; ++i)
+      LandingPads.push_back(&PadInfos[i]);
+    std::sort(LandingPads.begin(), LandingPads.end(), PadLT);
+
+    // Gather first action index for each landing pad site.
+    SmallVector<unsigned, 64> FirstActions;
+    FirstActions.reserve(PadInfos.size());
+
+    // The actions table.
+    SmallVector<ActionEntry, 32> Actions;
+
+    // Negative type ids index into FilterIds, positive type ids index into
+    // TypeInfos.  The value written for a positive type id is just the type
+    // id itself.  For a negative type id, however, the value written is the
+    // (negative) byte offset of the corresponding FilterIds entry.  The byte
+    // offset is usually equal to the type id, because the FilterIds entries
+    // are written using a variable width encoding which outputs one byte per
+    // entry as long as the value written is not too large, but can differ.
+    // This kind of complication does not occur for positive type ids because
+    // type infos are output using a fixed width encoding.
+    // FilterOffsets[i] holds the byte offset corresponding to FilterIds[i].
+    SmallVector<int, 16> FilterOffsets;
+    FilterOffsets.reserve(FilterIds.size());
+    int Offset = -1;
+    for(std::vector<unsigned>::const_iterator I = FilterIds.begin(),
+        E = FilterIds.end(); I != E; ++I) {
+      FilterOffsets.push_back(Offset);
+      Offset -= Asm->SizeULEB128(*I);
+    }
+
+    // Compute sizes for exception table.
+    unsigned SizeSites = 0;
+    unsigned SizeActions = 0;
+
+    // Look at each landing pad site to compute size.  We need the size of each
+    // landing pad site info and the size of the landing pad's actions.
+    int FirstAction = 0;
+
+    for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
+      const LandingPadInfo *LP = LandingPads[i];
+      const std::vector<int> &TypeIds = LP->TypeIds;
+      const unsigned NumShared = i ? SharedTypeIds(LP, LandingPads[i-1]) : 0;
+      unsigned SizeSiteActions = 0;
+
+      if (NumShared < TypeIds.size()) {
+        unsigned SizeAction = 0;
+        ActionEntry *PrevAction = 0;
+
+        if (NumShared) {
+          const unsigned SizePrevIds = LandingPads[i-1]->TypeIds.size();
+          assert(Actions.size());
+          PrevAction = &Actions.back();
+          SizeAction = Asm->SizeSLEB128(PrevAction->NextAction) +
+            Asm->SizeSLEB128(PrevAction->ValueForTypeID);
+          for (unsigned j = NumShared; j != SizePrevIds; ++j) {
+            SizeAction -= Asm->SizeSLEB128(PrevAction->ValueForTypeID);
+            SizeAction += -PrevAction->NextAction;
+            PrevAction = PrevAction->Previous;
+          }
+        }
+
+        // Compute the actions.
+        for (unsigned I = NumShared, M = TypeIds.size(); I != M; ++I) {
+          int TypeID = TypeIds[I];
+          assert(-1-TypeID < (int)FilterOffsets.size() && "Unknown filter id!");
+          int ValueForTypeID = TypeID < 0 ? FilterOffsets[-1 - TypeID] : TypeID;
+          unsigned SizeTypeID = Asm->SizeSLEB128(ValueForTypeID);
+
+          int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0;
+          SizeAction = SizeTypeID + Asm->SizeSLEB128(NextAction);
+          SizeSiteActions += SizeAction;
+
+          ActionEntry Action = {ValueForTypeID, NextAction, PrevAction};
+          Actions.push_back(Action);
+
+          PrevAction = &Actions.back();
+        }
+
+        // Record the first action of the landing pad site.
+        FirstAction = SizeActions + SizeSiteActions - SizeAction + 1;
+      } // else identical - re-use previous FirstAction
+
+      FirstActions.push_back(FirstAction);
+
+      // Compute this sites contribution to size.
+      SizeActions += SizeSiteActions;
+      unsigned M = LP->BeginLabels.size();
+      SizeSites += M*(sizeof(int32_t) +               // Site start.
+                      sizeof(int32_t) +               // Site length.
+                      sizeof(int32_t) +               // Landing pad.
+                      Asm->SizeULEB128(FirstAction)); // Action.
+    }
+    
+    // Final tallies.
+    unsigned SizeTypes = TypeInfos.size() * TAI->getAddressSize();
+
+    unsigned TypeOffset = sizeof(int8_t) + // Call site format
+                          Asm->SizeULEB128(SizeSites) + // Call-site table length
+                          SizeSites + SizeActions + SizeTypes;
+
+    unsigned TotalSize = sizeof(int8_t) + // LPStart format
+                         sizeof(int8_t) + // TType format
+                         Asm->SizeULEB128(TypeOffset) + // TType base offset
+                         TypeOffset;
+
+    unsigned SizeAlign = (4 - TotalSize) & 3;
+
+    // Begin the exception table.
+    Asm->SwitchToDataSection(TAI->getDwarfExceptionSection());
+    O << "GCC_except_table" << SubprogramCount << ":\n";
+    Asm->EmitAlignment(2);
+    for (unsigned i = 0; i != SizeAlign; ++i) {
+      Asm->EmitInt8(0);
+      Asm->EOL("Padding");
+    }
+    EmitLabel("exception", SubprogramCount);
+
+    // Emit the header.
+    Asm->EmitInt8(DW_EH_PE_omit);
+    Asm->EOL("LPStart format (DW_EH_PE_omit)");
+    Asm->EmitInt8(DW_EH_PE_absptr);
+    Asm->EOL("TType format (DW_EH_PE_absptr)");
+    Asm->EmitULEB128Bytes(TypeOffset);
+    Asm->EOL("TType base offset");
+    Asm->EmitInt8(DW_EH_PE_udata4);
+    Asm->EOL("Call site format (DW_EH_PE_udata4)");
+    Asm->EmitULEB128Bytes(SizeSites);
+    Asm->EOL("Call-site table length");
+
+    // Emit the landing pad site information in order of address.
+    PadMapType PadMap;
+
+    for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
+      const LandingPadInfo *LandingPad = LandingPads[i];
+      for (unsigned j=0, E = LandingPad->BeginLabels.size(); j != E; ++j) {
+        unsigned BeginLabel = LandingPad->BeginLabels[j];
+        assert(!PadMap.count(BeginLabel) && "duplicate landing pad labels!");
+        PadSite P = { i, j };
+        PadMap[BeginLabel] = P;
+      }
+    }
+
+    for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+         I != E; ++I) {
+      for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end();
+           MI != E; ++MI) {
+        if (MI->getOpcode() != TargetInstrInfo::LABEL)
+          continue;
+
+        unsigned BeginLabel = MI->getOperand(0).getImmedValue();
+        PadMapType::iterator L = PadMap.find(BeginLabel);
+
+        if (L == PadMap.end())
+          continue;
+
+        PadSite P = L->second;
+        const LandingPadInfo *LandingPad = LandingPads[P.PadIndex];
+
+        assert(BeginLabel == LandingPad->BeginLabels[P.SiteIndex] &&
+               "Inconsistent landing pad map!");
+
+        EmitSectionOffset("label", "eh_func_begin", BeginLabel, SubprogramCount,
+                          false, true);
+        Asm->EOL("Region start");
+
+        EmitDifference("label", LandingPad->EndLabels[P.SiteIndex],
+                       "label", BeginLabel);
+        Asm->EOL("Region length");
+
+        if (LandingPad->TypeIds.empty()) {
+          if (TAI->getAddressSize() == sizeof(int32_t))
+            Asm->EmitInt32(0);
+          else
+            Asm->EmitInt64(0);
+        } else {
+          EmitSectionOffset("label", "eh_func_begin",
+                            LandingPad->LandingPadLabel, SubprogramCount,
+                            false, true);
+        }
+        Asm->EOL("Landing pad");
+
+        Asm->EmitULEB128Bytes(FirstActions[P.PadIndex]);
+        Asm->EOL("Action");
+      }
+    }
+
+    // Emit the actions.
+    for (unsigned I = 0, N = Actions.size(); I != N; ++I) {
+      ActionEntry &Action = Actions[I];
+
+      Asm->EmitSLEB128Bytes(Action.ValueForTypeID);
+      Asm->EOL("TypeInfo index");
+      Asm->EmitSLEB128Bytes(Action.NextAction);
+      Asm->EOL("Next action");
+    }
+
+    // Emit the type ids.
+    for (unsigned M = TypeInfos.size(); M; --M) {
+      GlobalVariable *GV = TypeInfos[M - 1];
+      
+      if (TAI->getAddressSize() == sizeof(int32_t))
+        O << TAI->getData32bitsDirective();
+      else
+        O << TAI->getData64bitsDirective();
+
+      if (GV)
+        O << Asm->getGlobalLinkName(GV);
+      else
+        O << "0";
+      
+      Asm->EOL("TypeInfo");
+    }
+
+    // Emit the filter typeids.
+    for (unsigned j = 0, M = FilterIds.size(); j < M; ++j) {
+      unsigned TypeID = FilterIds[j];
+      Asm->EmitULEB128Bytes(TypeID);
+      Asm->EOL("Filter TypeInfo index");
+    }
+    
+    Asm->EmitAlignment(2);
+  }
+
+public:
+  //===--------------------------------------------------------------------===//
+  // Main entry points.
+  //
+  DwarfException(std::ostream &OS, AsmPrinter *A, const TargetAsmInfo *T)
+  : Dwarf(OS, A, T)
+  , shouldEmit(false)
+  {}
+  
+  virtual ~DwarfException() {}
+
+  /// SetModuleInfo - Set machine module information when it's known that pass
+  /// manager has created it.  Set by the target AsmPrinter.
+  void SetModuleInfo(MachineModuleInfo *mmi) {
+    MMI = mmi;
+  }
+
+  /// BeginModule - Emit all exception information that should come prior to the
+  /// content.
+  void BeginModule(Module *M) {
+    this->M = M;
+  }
+
+  /// EndModule - Emit all exception information that should come after the
+  /// content.
+  void EndModule() {
+    if (!shouldEmit) return;
+
+    const std::vector<Function *> Personalities = MMI->getPersonalities();
+    for (unsigned i =0; i < Personalities.size(); ++i)
+      EmitCommonEHFrame(Personalities[i], i);
+    
+    for (std::vector<FunctionEHFrameInfo>::iterator I = EHFrames.begin(),
+           E = EHFrames.end(); I != E; ++I)
+      EmitEHFrame(*I);
+  }
+
+  /// BeginFunction - Gather pre-function exception information.  Assumes being 
+  /// emitted immediately after the function entry point.
+  void BeginFunction(MachineFunction *MF) {
+    this->MF = MF;
+    
+    if (MMI &&
+        ExceptionHandling &&
+        TAI->doesSupportExceptionHandling()) {
+      shouldEmit = true;
+      // Assumes in correct section after the entry point.
+      EmitLabel("eh_func_begin", ++SubprogramCount);
+    }
+  }
+
+  /// EndFunction - Gather and emit post-function exception information.
+  ///
+  void EndFunction() {
+    if (!shouldEmit) return;
+
+    EmitLabel("eh_func_end", SubprogramCount);
+    EmitExceptionTable();
+
+    // Save EH frame information
+    EHFrames.push_back(FunctionEHFrameInfo(getAsm()->CurrentFnName,
+                                           SubprogramCount,
+                                           MMI->getPersonalityIndex(),
+                                           MF->getFrameInfo()->hasCalls(),
+                                           !MMI->getLandingPads().empty(),
+                                           MMI->getFrameMoves()));
+  }
+};
+
+} // End of namespace llvm
+
+//===----------------------------------------------------------------------===//
+
+/// Emit - Print the abbreviation using the specified Dwarf writer.
+///
+void DIEAbbrev::Emit(const DwarfDebug &DD) const {
+  // Emit its Dwarf tag type.
+  DD.getAsm()->EmitULEB128Bytes(Tag);
+  DD.getAsm()->EOL(TagString(Tag));
+  
+  // Emit whether it has children DIEs.
+  DD.getAsm()->EmitULEB128Bytes(ChildrenFlag);
+  DD.getAsm()->EOL(ChildrenString(ChildrenFlag));
+  
+  // For each attribute description.
+  for (unsigned i = 0, N = Data.size(); i < N; ++i) {
+    const DIEAbbrevData &AttrData = Data[i];
+    
+    // Emit attribute type.
+    DD.getAsm()->EmitULEB128Bytes(AttrData.getAttribute());
+    DD.getAsm()->EOL(AttributeString(AttrData.getAttribute()));
+    
+    // Emit form type.
+    DD.getAsm()->EmitULEB128Bytes(AttrData.getForm());
+    DD.getAsm()->EOL(FormEncodingString(AttrData.getForm()));
+  }
+
+  // Mark end of abbreviation.
+  DD.getAsm()->EmitULEB128Bytes(0); DD.getAsm()->EOL("EOM(1)");
+  DD.getAsm()->EmitULEB128Bytes(0); DD.getAsm()->EOL("EOM(2)");
+}
+
+#ifndef NDEBUG
+void DIEAbbrev::print(std::ostream &O) {
+  O << "Abbreviation @"
+    << std::hex << (intptr_t)this << std::dec
+    << "  "
+    << TagString(Tag)
+    << " "
+    << ChildrenString(ChildrenFlag)
+    << "\n";
+  
+  for (unsigned i = 0, N = Data.size(); i < N; ++i) {
+    O << "  "
+      << AttributeString(Data[i].getAttribute())
+      << "  "
+      << FormEncodingString(Data[i].getForm())
+      << "\n";
+  }
+}
+void DIEAbbrev::dump() { print(cerr); }
+#endif
+
+//===----------------------------------------------------------------------===//
+
+#ifndef NDEBUG
+void DIEValue::dump() {
+  print(cerr);
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit integer of appropriate size.
+///
+void DIEInteger::EmitValue(DwarfDebug &DD, unsigned Form) {
+  switch (Form) {
+  case DW_FORM_flag:  // Fall thru
+  case DW_FORM_ref1:  // Fall thru
+  case DW_FORM_data1: DD.getAsm()->EmitInt8(Integer);         break;
+  case DW_FORM_ref2:  // Fall thru
+  case DW_FORM_data2: DD.getAsm()->EmitInt16(Integer);        break;
+  case DW_FORM_ref4:  // Fall thru
+  case DW_FORM_data4: DD.getAsm()->EmitInt32(Integer);        break;
+  case DW_FORM_ref8:  // Fall thru
+  case DW_FORM_data8: DD.getAsm()->EmitInt64(Integer);        break;
+  case DW_FORM_udata: DD.getAsm()->EmitULEB128Bytes(Integer); break;
+  case DW_FORM_sdata: DD.getAsm()->EmitSLEB128Bytes(Integer); break;
+  default: assert(0 && "DIE Value form not supported yet");   break;
+  }
+}
+
+/// SizeOf - Determine size of integer value in bytes.
+///
+unsigned DIEInteger::SizeOf(const DwarfDebug &DD, unsigned Form) const {
+  switch (Form) {
+  case DW_FORM_flag:  // Fall thru
+  case DW_FORM_ref1:  // Fall thru
+  case DW_FORM_data1: return sizeof(int8_t);
+  case DW_FORM_ref2:  // Fall thru
+  case DW_FORM_data2: return sizeof(int16_t);
+  case DW_FORM_ref4:  // Fall thru
+  case DW_FORM_data4: return sizeof(int32_t);
+  case DW_FORM_ref8:  // Fall thru
+  case DW_FORM_data8: return sizeof(int64_t);
+  case DW_FORM_udata: return DD.getAsm()->SizeULEB128(Integer);
+  case DW_FORM_sdata: return DD.getAsm()->SizeSLEB128(Integer);
+  default: assert(0 && "DIE Value form not supported yet"); break;
+  }
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit string value.
+///
+void DIEString::EmitValue(DwarfDebug &DD, unsigned Form) {
+  DD.getAsm()->EmitString(String);
+}
+
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit label value.
+///
+void DIEDwarfLabel::EmitValue(DwarfDebug &DD, unsigned Form) {
+  DD.EmitReference(Label);
+}
+
+/// SizeOf - Determine size of label value in bytes.
+///
+unsigned DIEDwarfLabel::SizeOf(const DwarfDebug &DD, unsigned Form) const {
+  return DD.getTargetAsmInfo()->getAddressSize();
+}
+
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit label value.
+///
+void DIEObjectLabel::EmitValue(DwarfDebug &DD, unsigned Form) {
+  DD.EmitReference(Label);
+}
+
+/// SizeOf - Determine size of label value in bytes.
+///
+unsigned DIEObjectLabel::SizeOf(const DwarfDebug &DD, unsigned Form) const {
+  return DD.getTargetAsmInfo()->getAddressSize();
+}
+    
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit delta value.
+///
+void DIEDelta::EmitValue(DwarfDebug &DD, unsigned Form) {
+  bool IsSmall = Form == DW_FORM_data4;
+  DD.EmitDifference(LabelHi, LabelLo, IsSmall);
+}
+
+/// SizeOf - Determine size of delta value in bytes.
+///
+unsigned DIEDelta::SizeOf(const DwarfDebug &DD, unsigned Form) const {
+  if (Form == DW_FORM_data4) return 4;
+  return DD.getTargetAsmInfo()->getAddressSize();
+}
+
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit debug information entry offset.
+///
+void DIEntry::EmitValue(DwarfDebug &DD, unsigned Form) {
+  DD.getAsm()->EmitInt32(Entry->getOffset());
+}
+    
+//===----------------------------------------------------------------------===//
+
+/// ComputeSize - calculate the size of the block.
+///
+unsigned DIEBlock::ComputeSize(DwarfDebug &DD) {
+  if (!Size) {
+    const std::vector<DIEAbbrevData> &AbbrevData = Abbrev.getData();
+    
+    for (unsigned i = 0, N = Values.size(); i < N; ++i) {
+      Size += Values[i]->SizeOf(DD, AbbrevData[i].getForm());
+    }
+  }
+  return Size;
+}
+
+/// EmitValue - Emit block data.
+///
+void DIEBlock::EmitValue(DwarfDebug &DD, unsigned Form) {
+  switch (Form) {
+  case DW_FORM_block1: DD.getAsm()->EmitInt8(Size);         break;
+  case DW_FORM_block2: DD.getAsm()->EmitInt16(Size);        break;
+  case DW_FORM_block4: DD.getAsm()->EmitInt32(Size);        break;
+  case DW_FORM_block:  DD.getAsm()->EmitULEB128Bytes(Size); break;
+  default: assert(0 && "Improper form for block");          break;
+  }
+  
+  const std::vector<DIEAbbrevData> &AbbrevData = Abbrev.getData();
+
+  for (unsigned i = 0, N = Values.size(); i < N; ++i) {
+    DD.getAsm()->EOL();
+    Values[i]->EmitValue(DD, AbbrevData[i].getForm());
+  }
+}
+
+/// SizeOf - Determine size of block data in bytes.
+///
+unsigned DIEBlock::SizeOf(const DwarfDebug &DD, unsigned Form) const {
+  switch (Form) {
+  case DW_FORM_block1: return Size + sizeof(int8_t);
+  case DW_FORM_block2: return Size + sizeof(int16_t);
+  case DW_FORM_block4: return Size + sizeof(int32_t);
+  case DW_FORM_block: return Size + DD.getAsm()->SizeULEB128(Size);
+  default: assert(0 && "Improper form for block"); break;
+  }
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+/// DIE Implementation
+
+DIE::~DIE() {
+  for (unsigned i = 0, N = Children.size(); i < N; ++i)
+    delete Children[i];
+}
+  
+/// AddSiblingOffset - Add a sibling offset field to the front of the DIE.
+///
+void DIE::AddSiblingOffset() {
+  DIEInteger *DI = new DIEInteger(0);
+  Values.insert(Values.begin(), DI);
+  Abbrev.AddFirstAttribute(DW_AT_sibling, DW_FORM_ref4);
+}
+
+/// Profile - Used to gather unique data for the value folding set.
+///
+void DIE::Profile(FoldingSetNodeID &ID) {
+  Abbrev.Profile(ID);
+  
+  for (unsigned i = 0, N = Children.size(); i < N; ++i)
+    ID.AddPointer(Children[i]);
+
+  for (unsigned j = 0, M = Values.size(); j < M; ++j)
+    ID.AddPointer(Values[j]);
+}
+
+#ifndef NDEBUG
+void DIE::print(std::ostream &O, unsigned IncIndent) {
+  static unsigned IndentCount = 0;
+  IndentCount += IncIndent;
+  const std::string Indent(IndentCount, ' ');
+  bool isBlock = Abbrev.getTag() == 0;
+  
+  if (!isBlock) {
+    O << Indent
+      << "Die: "
+      << "0x" << std::hex << (intptr_t)this << std::dec
+      << ", Offset: " << Offset
+      << ", Size: " << Size
+      << "\n"; 
+    
+    O << Indent
+      << TagString(Abbrev.getTag())
+      << " "
+      << ChildrenString(Abbrev.getChildrenFlag());
+  } else {
+    O << "Size: " << Size;
+  }
+  O << "\n";
+
+  const std::vector<DIEAbbrevData> &Data = Abbrev.getData();
+  
+  IndentCount += 2;
+  for (unsigned i = 0, N = Data.size(); i < N; ++i) {
+    O << Indent;
+    if (!isBlock) {
+      O << AttributeString(Data[i].getAttribute());
+    } else {
+      O << "Blk[" << i << "]";
+    }
+    O <<  "  "
+      << FormEncodingString(Data[i].getForm())
+      << " ";
+    Values[i]->print(O);
+    O << "\n";
+  }
+  IndentCount -= 2;
+
+  for (unsigned j = 0, M = Children.size(); j < M; ++j) {
+    Children[j]->print(O, 4);
+  }
+  
+  if (!isBlock) O << "\n";
+  IndentCount -= IncIndent;
+}
+
+void DIE::dump() {
+  print(cerr);
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+/// DwarfWriter Implementation
+///
+
+DwarfWriter::DwarfWriter(std::ostream &OS, AsmPrinter *A,
+                         const TargetAsmInfo *T) {
+  DE = new DwarfException(OS, A, T);
+  DD = new DwarfDebug(OS, A, T);
+}
+
+DwarfWriter::~DwarfWriter() {
+  delete DE;
+  delete DD;
+}
+
+/// SetModuleInfo - Set machine module info when it's known that pass manager
+/// has created it.  Set by the target AsmPrinter.
+void DwarfWriter::SetModuleInfo(MachineModuleInfo *MMI) {
+  DD->SetModuleInfo(MMI);
+  DE->SetModuleInfo(MMI);
+}
+
+/// BeginModule - Emit all Dwarf sections that should come prior to the
+/// content.
+void DwarfWriter::BeginModule(Module *M) {
+  DE->BeginModule(M);
+  DD->BeginModule(M);
+}
+
+/// EndModule - Emit all Dwarf sections that should come after the content.
+///
+void DwarfWriter::EndModule() {
+  DE->EndModule();
+  DD->EndModule();
+}
+
+/// BeginFunction - Gather pre-function debug information.  Assumes being 
+/// emitted immediately after the function entry point.
+void DwarfWriter::BeginFunction(MachineFunction *MF) {
+  DE->BeginFunction(MF);
+  DD->BeginFunction(MF);
+}
+
+/// EndFunction - Gather and emit post-function debug information.
+///
+void DwarfWriter::EndFunction() {
+  DD->EndFunction();
+  DE->EndFunction();
+  
+  if (MachineModuleInfo *MMI = DD->getMMI() ? DD->getMMI() : DE->getMMI()) {
+    // Clear function debug information.
+    MMI->EndFunction();
+  }
+}
diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp
new file mode 100644
index 0000000..8ecddb8
--- /dev/null
+++ b/lib/CodeGen/ELFWriter.cpp
@@ -0,0 +1,547 @@
+//===-- ELFWriter.cpp - Target-independent ELF Writer code ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the target-independent ELF writer.  This file writes out
+// the ELF file in the following order:
+//
+//  #1. ELF Header
+//  #2. '.text' section
+//  #3. '.data' section
+//  #4. '.bss' section  (conceptual position in file)
+//  ...
+//  #X. '.shstrtab' section
+//  #Y. Section Table
+//
+// The entries in the section table are laid out as:
+//  #0. Null entry [required]
+//  #1. ".text" entry - the program code
+//  #2. ".data" entry - global variables with initializers.     [ if needed ]
+//  #3. ".bss" entry  - global variables without initializers.  [ if needed ]
+//  ...
+//  #N. ".shstrtab" entry - String table for the section names.
+//
+// NOTE: This code should eventually be extended to support 64-bit ELF (this
+// won't be hard), but we haven't done so yet!
+//
+//===----------------------------------------------------------------------===//
+
+#include "ELFWriter.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/FileWriters.h"
+#include "llvm/CodeGen/MachineCodeEmitter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetELFWriterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/Support/OutputBuffer.h"
+#include "llvm/Support/Streams.h"
+#include <list>
+using namespace llvm;
+
+char ELFWriter::ID = 0;
+/// AddELFWriter - Concrete function to add the ELF writer to the function pass
+/// manager.
+MachineCodeEmitter *llvm::AddELFWriter(FunctionPassManager &FPM,
+                                       std::ostream &O,
+                                       TargetMachine &TM) {
+  ELFWriter *EW = new ELFWriter(O, TM);
+  FPM.add(EW);
+  return &EW->getMachineCodeEmitter();
+}
+
+//===----------------------------------------------------------------------===//
+//                       ELFCodeEmitter Implementation
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+  /// ELFCodeEmitter - This class is used by the ELFWriter to emit the code for
+  /// functions to the ELF file.
+  class ELFCodeEmitter : public MachineCodeEmitter {
+    ELFWriter &EW;
+    TargetMachine &TM;
+    ELFWriter::ELFSection *ES;  // Section to write to.
+    std::vector<unsigned char> *OutBuffer;
+    size_t FnStart;
+  public:
+    ELFCodeEmitter(ELFWriter &ew) : EW(ew), TM(EW.TM), OutBuffer(0) {}
+
+    void startFunction(MachineFunction &F);
+    bool finishFunction(MachineFunction &F);
+
+    void addRelocation(const MachineRelocation &MR) {
+      assert(0 && "relo not handled yet!");
+    }
+    
+    virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) {
+    }
+
+    virtual intptr_t getConstantPoolEntryAddress(unsigned Index) const {
+      assert(0 && "CP not implementated yet!");
+      return 0;
+    }
+    virtual intptr_t getJumpTableEntryAddress(unsigned Index) const {
+      assert(0 && "JT not implementated yet!");
+      return 0;
+    }
+
+    virtual intptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const {
+      assert(0 && "JT not implementated yet!");
+      return 0;
+    }
+
+    /// JIT SPECIFIC FUNCTIONS - DO NOT IMPLEMENT THESE HERE!
+    void startFunctionStub(unsigned StubSize, unsigned Alignment = 1) {
+      assert(0 && "JIT specific function called!");
+      abort();
+    }
+    void *finishFunctionStub(const Function *F) {
+      assert(0 && "JIT specific function called!");
+      abort();
+      return 0;
+    }
+  };
+}
+
+/// startFunction - This callback is invoked when a new machine function is
+/// about to be emitted.
+void ELFCodeEmitter::startFunction(MachineFunction &F) {
+  // Align the output buffer to the appropriate alignment.
+  unsigned Align = 16;   // FIXME: GENERICIZE!!
+  // Get the ELF Section that this function belongs in.
+  ES = &EW.getSection(".text", ELFWriter::ELFSection::SHT_PROGBITS,
+                      ELFWriter::ELFSection::SHF_EXECINSTR |
+                      ELFWriter::ELFSection::SHF_ALLOC);
+  OutBuffer = &ES->SectionData;
+  cerr << "FIXME: This code needs to be updated for changes in the "
+       << "CodeEmitter interfaces.  In particular, this should set "
+       << "BufferBegin/BufferEnd/CurBufferPtr, not deal with OutBuffer!";
+  abort();
+
+  // Upgrade the section alignment if required.
+  if (ES->Align < Align) ES->Align = Align;
+
+  // Add padding zeros to the end of the buffer to make sure that the
+  // function will start on the correct byte alignment within the section.
+  OutputBuffer OB(*OutBuffer,
+                  TM.getTargetData()->getPointerSizeInBits() == 64,
+                  TM.getTargetData()->isLittleEndian());
+  OB.align(Align);
+  FnStart = OutBuffer->size();
+}
+
+/// finishFunction - This callback is invoked after the function is completely
+/// finished.
+bool ELFCodeEmitter::finishFunction(MachineFunction &F) {
+  // We now know the size of the function, add a symbol to represent it.
+  ELFWriter::ELFSym FnSym(F.getFunction());
+
+  // Figure out the binding (linkage) of the symbol.
+  switch (F.getFunction()->getLinkage()) {
+  default:
+    // appending linkage is illegal for functions.
+    assert(0 && "Unknown linkage type!");
+  case GlobalValue::ExternalLinkage:
+    FnSym.SetBind(ELFWriter::ELFSym::STB_GLOBAL);
+    break;
+  case GlobalValue::LinkOnceLinkage:
+  case GlobalValue::WeakLinkage:
+    FnSym.SetBind(ELFWriter::ELFSym::STB_WEAK);
+    break;
+  case GlobalValue::InternalLinkage:
+    FnSym.SetBind(ELFWriter::ELFSym::STB_LOCAL);
+    break;
+  }
+
+  ES->Size = OutBuffer->size();
+
+  FnSym.SetType(ELFWriter::ELFSym::STT_FUNC);
+  FnSym.SectionIdx = ES->SectionIdx;
+  FnSym.Value = FnStart;   // Value = Offset from start of Section.
+  FnSym.Size = OutBuffer->size()-FnStart;
+
+  // Finally, add it to the symtab.
+  EW.SymbolTable.push_back(FnSym);
+  return false;
+}
+
+//===----------------------------------------------------------------------===//
+//                          ELFWriter Implementation
+//===----------------------------------------------------------------------===//
+
+ELFWriter::ELFWriter(std::ostream &o, TargetMachine &tm) 
+  : MachineFunctionPass((intptr_t)&ID), O(o), TM(tm) {
+  e_flags = 0;    // e_flags defaults to 0, no flags.
+
+  is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64;
+  isLittleEndian = TM.getTargetData()->isLittleEndian();
+
+  // Create the machine code emitter object for this target.
+  MCE = new ELFCodeEmitter(*this);
+  NumSections = 0;
+}
+
+ELFWriter::~ELFWriter() {
+  delete MCE;
+}
+
+// doInitialization - Emit the file header and all of the global variables for
+// the module to the ELF file.
+bool ELFWriter::doInitialization(Module &M) {
+  Mang = new Mangler(M);
+
+  // Local alias to shortenify coming code.
+  std::vector<unsigned char> &FH = FileHeader;
+  OutputBuffer FHOut(FH, is64Bit, isLittleEndian);
+
+  FHOut.outbyte(0x7F);                     // EI_MAG0
+  FHOut.outbyte('E');                      // EI_MAG1
+  FHOut.outbyte('L');                      // EI_MAG2
+  FHOut.outbyte('F');                      // EI_MAG3
+  FHOut.outbyte(is64Bit ? 2 : 1);          // EI_CLASS
+  FHOut.outbyte(isLittleEndian ? 1 : 2);   // EI_DATA
+  FHOut.outbyte(1);                        // EI_VERSION
+  FH.resize(16);                         // EI_PAD up to 16 bytes.
+
+  // This should change for shared objects.
+  FHOut.outhalf(1);                 // e_type = ET_REL
+  FHOut.outword(TM.getELFWriterInfo()->getEMachine()); // target-defined
+  FHOut.outword(1);                 // e_version = 1
+  FHOut.outaddr(0);                 // e_entry = 0 -> no entry point in .o file
+  FHOut.outaddr(0);                 // e_phoff = 0 -> no program header for .o
+
+  ELFHeader_e_shoff_Offset = FH.size();
+  FHOut.outaddr(0);                 // e_shoff
+  FHOut.outword(e_flags);           // e_flags = whatever the target wants
+
+  FHOut.outhalf(is64Bit ? 64 : 52); // e_ehsize = ELF header size
+  FHOut.outhalf(0);                 // e_phentsize = prog header entry size
+  FHOut.outhalf(0);                 // e_phnum     = # prog header entries = 0
+  FHOut.outhalf(is64Bit ? 64 : 40); // e_shentsize = sect hdr entry size
+
+
+  ELFHeader_e_shnum_Offset = FH.size();
+  FHOut.outhalf(0);                 // e_shnum     = # of section header ents
+  ELFHeader_e_shstrndx_Offset = FH.size();
+  FHOut.outhalf(0);                 // e_shstrndx  = Section # of '.shstrtab'
+
+  // Add the null section, which is required to be first in the file.
+  getSection("", 0, 0);
+
+  // Start up the symbol table.  The first entry in the symtab is the null
+  // entry.
+  SymbolTable.push_back(ELFSym(0));
+
+  return false;
+}
+
+void ELFWriter::EmitGlobal(GlobalVariable *GV) {
+  // If this is an external global, emit it now.  TODO: Note that it would be
+  // better to ignore the symbol here and only add it to the symbol table if
+  // referenced.
+  if (!GV->hasInitializer()) {
+    ELFSym ExternalSym(GV);
+    ExternalSym.SetBind(ELFSym::STB_GLOBAL);
+    ExternalSym.SetType(ELFSym::STT_NOTYPE);
+    ExternalSym.SectionIdx = ELFSection::SHN_UNDEF;
+    SymbolTable.push_back(ExternalSym);
+    return;
+  }
+
+  const Type *GVType = (const Type*)GV->getType();
+  unsigned Align = TM.getTargetData()->getPrefTypeAlignment(GVType);
+  unsigned Size  = TM.getTargetData()->getTypeSize(GVType);
+
+  // If this global has a zero initializer, it is part of the .bss or common
+  // section.
+  if (GV->getInitializer()->isNullValue()) {
+    // If this global is part of the common block, add it now.  Variables are
+    // part of the common block if they are zero initialized and allowed to be
+    // merged with other symbols.
+    if (GV->hasLinkOnceLinkage() || GV->hasWeakLinkage()) {
+      ELFSym CommonSym(GV);
+      // Value for common symbols is the alignment required.
+      CommonSym.Value = Align;
+      CommonSym.Size  = Size;
+      CommonSym.SetBind(ELFSym::STB_GLOBAL);
+      CommonSym.SetType(ELFSym::STT_OBJECT);
+      // TODO SOMEDAY: add ELF visibility.
+      CommonSym.SectionIdx = ELFSection::SHN_COMMON;
+      SymbolTable.push_back(CommonSym);
+      return;
+    }
+
+    // Otherwise, this symbol is part of the .bss section.  Emit it now.
+
+    // Handle alignment.  Ensure section is aligned at least as much as required
+    // by this symbol.
+    ELFSection &BSSSection = getBSSSection();
+    BSSSection.Align = std::max(BSSSection.Align, Align);
+
+    // Within the section, emit enough virtual padding to get us to an alignment
+    // boundary.
+    if (Align)
+      BSSSection.Size = (BSSSection.Size + Align - 1) & ~(Align-1);
+
+    ELFSym BSSSym(GV);
+    BSSSym.Value = BSSSection.Size;
+    BSSSym.Size = Size;
+    BSSSym.SetType(ELFSym::STT_OBJECT);
+
+    switch (GV->getLinkage()) {
+    default:  // weak/linkonce handled above
+      assert(0 && "Unexpected linkage type!");
+    case GlobalValue::AppendingLinkage:  // FIXME: This should be improved!
+    case GlobalValue::ExternalLinkage:
+      BSSSym.SetBind(ELFSym::STB_GLOBAL);
+      break;
+    case GlobalValue::InternalLinkage:
+      BSSSym.SetBind(ELFSym::STB_LOCAL);
+      break;
+    }
+
+    // Set the idx of the .bss section
+    BSSSym.SectionIdx = BSSSection.SectionIdx;
+    SymbolTable.push_back(BSSSym);
+
+    // Reserve space in the .bss section for this symbol.
+    BSSSection.Size += Size;
+    return;
+  }
+
+  // FIXME: handle .rodata
+  //assert(!GV->isConstant() && "unimp");
+
+  // FIXME: handle .data
+  //assert(0 && "unimp");
+}
+
+
+bool ELFWriter::runOnMachineFunction(MachineFunction &MF) {
+  // Nothing to do here, this is all done through the MCE object above.
+  return false;
+}
+
+/// doFinalization - Now that the module has been completely processed, emit
+/// the ELF file to 'O'.
+bool ELFWriter::doFinalization(Module &M) {
+  // Okay, the ELF header and .text sections have been completed, build the
+  // .data, .bss, and "common" sections next.
+  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+       I != E; ++I)
+    EmitGlobal(I);
+
+  // Emit the symbol table now, if non-empty.
+  EmitSymbolTable();
+
+  // FIXME: Emit the relocations now.
+
+  // Emit the string table for the sections in the ELF file we have.
+  EmitSectionTableStringTable();
+
+  // Emit the sections to the .o file, and emit the section table for the file.
+  OutputSectionsAndSectionTable();
+
+  // We are done with the abstract symbols.
+  SectionList.clear();
+  NumSections = 0;
+
+  // Release the name mangler object.
+  delete Mang; Mang = 0;
+  return false;
+}
+
+/// EmitSymbolTable - If the current symbol table is non-empty, emit the string
+/// table for it and then the symbol table itself.
+void ELFWriter::EmitSymbolTable() {
+  if (SymbolTable.size() == 1) return;  // Only the null entry.
+
+  // FIXME: compact all local symbols to the start of the symtab.
+  unsigned FirstNonLocalSymbol = 1;
+
+  ELFSection &StrTab = getSection(".strtab", ELFSection::SHT_STRTAB, 0);
+  StrTab.Align = 1;
+
+  DataBuffer &StrTabBuf = StrTab.SectionData;
+  OutputBuffer StrTabOut(StrTabBuf, is64Bit, isLittleEndian);
+
+  // Set the zero'th symbol to a null byte, as required.
+  StrTabOut.outbyte(0);
+  SymbolTable[0].NameIdx = 0;
+  unsigned Index = 1;
+  for (unsigned i = 1, e = SymbolTable.size(); i != e; ++i) {
+    // Use the name mangler to uniquify the LLVM symbol.
+    std::string Name = Mang->getValueName(SymbolTable[i].GV);
+
+    if (Name.empty()) {
+      SymbolTable[i].NameIdx = 0;
+    } else {
+      SymbolTable[i].NameIdx = Index;
+
+      // Add the name to the output buffer, including the null terminator.
+      StrTabBuf.insert(StrTabBuf.end(), Name.begin(), Name.end());
+
+      // Add a null terminator.
+      StrTabBuf.push_back(0);
+
+      // Keep track of the number of bytes emitted to this section.
+      Index += Name.size()+1;
+    }
+  }
+  assert(Index == StrTabBuf.size());
+  StrTab.Size = Index;
+
+  // Now that we have emitted the string table and know the offset into the
+  // string table of each symbol, emit the symbol table itself.
+  ELFSection &SymTab = getSection(".symtab", ELFSection::SHT_SYMTAB, 0);
+  SymTab.Align = is64Bit ? 8 : 4;
+  SymTab.Link = SymTab.SectionIdx;     // Section Index of .strtab.
+  SymTab.Info = FirstNonLocalSymbol;   // First non-STB_LOCAL symbol.
+  SymTab.EntSize = 16; // Size of each symtab entry. FIXME: wrong for ELF64
+  DataBuffer &SymTabBuf = SymTab.SectionData;
+  OutputBuffer SymTabOut(SymTabBuf, is64Bit, isLittleEndian);
+
+  if (!is64Bit) {   // 32-bit and 64-bit formats are shuffled a bit.
+    for (unsigned i = 0, e = SymbolTable.size(); i != e; ++i) {
+      ELFSym &Sym = SymbolTable[i];
+      SymTabOut.outword(Sym.NameIdx);
+      SymTabOut.outaddr32(Sym.Value);
+      SymTabOut.outword(Sym.Size);
+      SymTabOut.outbyte(Sym.Info);
+      SymTabOut.outbyte(Sym.Other);
+      SymTabOut.outhalf(Sym.SectionIdx);
+    }
+  } else {
+    for (unsigned i = 0, e = SymbolTable.size(); i != e; ++i) {
+      ELFSym &Sym = SymbolTable[i];
+      SymTabOut.outword(Sym.NameIdx);
+      SymTabOut.outbyte(Sym.Info);
+      SymTabOut.outbyte(Sym.Other);
+      SymTabOut.outhalf(Sym.SectionIdx);
+      SymTabOut.outaddr64(Sym.Value);
+      SymTabOut.outxword(Sym.Size);
+    }
+  }
+
+  SymTab.Size = SymTabBuf.size();
+}
+
+/// EmitSectionTableStringTable - This method adds and emits a section for the
+/// ELF Section Table string table: the string table that holds all of the
+/// section names.
+void ELFWriter::EmitSectionTableStringTable() {
+  // First step: add the section for the string table to the list of sections:
+  ELFSection &SHStrTab = getSection(".shstrtab", ELFSection::SHT_STRTAB, 0);
+
+  // Now that we know which section number is the .shstrtab section, update the
+  // e_shstrndx entry in the ELF header.
+  OutputBuffer FHOut(FileHeader, is64Bit, isLittleEndian);
+  FHOut.fixhalf(SHStrTab.SectionIdx, ELFHeader_e_shstrndx_Offset);
+
+  // Set the NameIdx of each section in the string table and emit the bytes for
+  // the string table.
+  unsigned Index = 0;
+  DataBuffer &Buf = SHStrTab.SectionData;
+
+  for (std::list<ELFSection>::iterator I = SectionList.begin(),
+         E = SectionList.end(); I != E; ++I) {
+    // Set the index into the table.  Note if we have lots of entries with
+    // common suffixes, we could memoize them here if we cared.
+    I->NameIdx = Index;
+
+    // Add the name to the output buffer, including the null terminator.
+    Buf.insert(Buf.end(), I->Name.begin(), I->Name.end());
+
+    // Add a null terminator.
+    Buf.push_back(0);
+
+    // Keep track of the number of bytes emitted to this section.
+    Index += I->Name.size()+1;
+  }
+
+  // Set the size of .shstrtab now that we know what it is.
+  assert(Index == Buf.size());
+  SHStrTab.Size = Index;
+}
+
+/// OutputSectionsAndSectionTable - Now that we have constructed the file header
+/// and all of the sections, emit these to the ostream destination and emit the
+/// SectionTable.
+void ELFWriter::OutputSectionsAndSectionTable() {
+  // Pass #1: Compute the file offset for each section.
+  size_t FileOff = FileHeader.size();   // File header first.
+
+  // Emit all of the section data in order.
+  for (std::list<ELFSection>::iterator I = SectionList.begin(),
+         E = SectionList.end(); I != E; ++I) {
+    // Align FileOff to whatever the alignment restrictions of the section are.
+    if (I->Align)
+      FileOff = (FileOff+I->Align-1) & ~(I->Align-1);
+    I->Offset = FileOff;
+    FileOff += I->SectionData.size();
+  }
+
+  // Align Section Header.
+  unsigned TableAlign = is64Bit ? 8 : 4;
+  FileOff = (FileOff+TableAlign-1) & ~(TableAlign-1);
+
+  // Now that we know where all of the sections will be emitted, set the e_shnum
+  // entry in the ELF header.
+  OutputBuffer FHOut(FileHeader, is64Bit, isLittleEndian);
+  FHOut.fixhalf(NumSections, ELFHeader_e_shnum_Offset);
+
+  // Now that we know the offset in the file of the section table, update the
+  // e_shoff address in the ELF header.
+  FHOut.fixaddr(FileOff, ELFHeader_e_shoff_Offset);
+
+  // Now that we know all of the data in the file header, emit it and all of the
+  // sections!
+  O.write((char*)&FileHeader[0], FileHeader.size());
+  FileOff = FileHeader.size();
+  DataBuffer().swap(FileHeader);
+
+  DataBuffer Table;
+  OutputBuffer TableOut(Table, is64Bit, isLittleEndian);
+
+  // Emit all of the section data and build the section table itself.
+  while (!SectionList.empty()) {
+    const ELFSection &S = *SectionList.begin();
+
+    // Align FileOff to whatever the alignment restrictions of the section are.
+    if (S.Align)
+      for (size_t NewFileOff = (FileOff+S.Align-1) & ~(S.Align-1);
+           FileOff != NewFileOff; ++FileOff)
+        O.put((char)0xAB);
+    O.write((char*)&S.SectionData[0], S.SectionData.size());
+    FileOff += S.SectionData.size();
+
+    TableOut.outword(S.NameIdx);  // sh_name - Symbol table name idx
+    TableOut.outword(S.Type);     // sh_type - Section contents & semantics
+    TableOut.outword(S.Flags);    // sh_flags - Section flags.
+    TableOut.outaddr(S.Addr);     // sh_addr - The mem addr this section is in.
+    TableOut.outaddr(S.Offset);   // sh_offset - Offset from the file start.
+    TableOut.outword(S.Size);     // sh_size - The section size.
+    TableOut.outword(S.Link);     // sh_link - Section header table index link.
+    TableOut.outword(S.Info);     // sh_info - Auxillary information.
+    TableOut.outword(S.Align);    // sh_addralign - Alignment of section.
+    TableOut.outword(S.EntSize);  // sh_entsize - Size of entries in the section
+
+    SectionList.pop_front();
+  }
+
+  // Align output for the section table.
+  for (size_t NewFileOff = (FileOff+TableAlign-1) & ~(TableAlign-1);
+       FileOff != NewFileOff; ++FileOff)
+    O.put((char)0xAB);
+
+  // Emit the section table itself.
+  O.write((char*)&Table[0], Table.size());
+}
diff --git a/lib/CodeGen/ELFWriter.h b/lib/CodeGen/ELFWriter.h
new file mode 100644
index 0000000..f27d78f
--- /dev/null
+++ b/lib/CodeGen/ELFWriter.h
@@ -0,0 +1,228 @@
+//===-- ELFWriter.h - Target-independent ELF writer support -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ELFWriter class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ELFWRITER_H
+#define ELFWRITER_H
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include <list>
+
+namespace llvm {
+  class GlobalVariable;
+  class Mangler;
+  class MachineCodeEmitter;
+  class ELFCodeEmitter;
+
+  /// ELFWriter - This class implements the common target-independent code for
+  /// writing ELF files.  Targets should derive a class from this to
+  /// parameterize the output format.
+  ///
+  class ELFWriter : public MachineFunctionPass {
+    friend class ELFCodeEmitter;
+  public:
+    static char ID;
+
+    MachineCodeEmitter &getMachineCodeEmitter() const {
+      return *(MachineCodeEmitter*)MCE;
+    }
+
+    ELFWriter(std::ostream &O, TargetMachine &TM);
+    ~ELFWriter();
+
+    typedef std::vector<unsigned char> DataBuffer;
+
+  protected:
+    /// Output stream to send the resultant object file to.
+    ///
+    std::ostream &O;
+
+    /// Target machine description.
+    ///
+    TargetMachine &TM;
+
+    /// Mang - The object used to perform name mangling for this module.
+    ///
+    Mangler *Mang;
+
+    /// MCE - The MachineCodeEmitter object that we are exposing to emit machine
+    /// code for functions to the .o file.
+    ELFCodeEmitter *MCE;
+
+    //===------------------------------------------------------------------===//
+    // Properties to be set by the derived class ctor, used to configure the
+    // ELFWriter.
+
+    // e_machine - This field is the target specific value to emit as the
+    // e_machine member of the ELF header.
+    unsigned short e_machine;
+
+    // e_flags - The machine flags for the target.  This defaults to zero.
+    unsigned e_flags;
+
+    //===------------------------------------------------------------------===//
+    // Properties inferred automatically from the target machine.
+    //
+
+    /// is64Bit/isLittleEndian - This information is inferred from the target
+    /// machine directly, indicating whether to emit a 32- or 64-bit ELF file.
+    bool is64Bit, isLittleEndian;
+
+    /// doInitialization - Emit the file header and all of the global variables
+    /// for the module to the ELF file.
+    bool doInitialization(Module &M);
+
+    bool runOnMachineFunction(MachineFunction &MF);
+
+
+    /// doFinalization - Now that the module has been completely processed, emit
+    /// the ELF file to 'O'.
+    bool doFinalization(Module &M);
+
+  private:
+    // The buffer we accumulate the file header into.  Note that this should be
+    // changed into something much more efficient later (and the bitcode writer
+    // as well!).
+    DataBuffer FileHeader;
+
+    /// ELFSection - This struct contains information about each section that is
+    /// emitted to the file.  This is eventually turned into the section header
+    /// table at the end of the file.
+    struct ELFSection {
+      std::string Name;       // Name of the section.
+      unsigned NameIdx;       // Index in .shstrtab of name, once emitted.
+      unsigned Type;
+      unsigned Flags;
+      uint64_t Addr;
+      unsigned Offset;
+      unsigned Size;
+      unsigned Link;
+      unsigned Info;
+      unsigned Align;
+      unsigned EntSize;
+
+      /// SectionIdx - The number of the section in the Section Table.
+      ///
+      unsigned short SectionIdx;
+
+      /// SectionData - The actual data for this section which we are building
+      /// up for emission to the file.
+      DataBuffer SectionData;
+
+      enum { SHT_NULL = 0, SHT_PROGBITS = 1, SHT_SYMTAB = 2, SHT_STRTAB = 3,
+             SHT_RELA = 4, SHT_HASH = 5, SHT_DYNAMIC = 6, SHT_NOTE = 7,
+             SHT_NOBITS = 8, SHT_REL = 9, SHT_SHLIB = 10, SHT_DYNSYM = 11 };
+      enum { SHN_UNDEF = 0, SHN_ABS = 0xFFF1, SHN_COMMON = 0xFFF2 };
+      enum {   // SHF - ELF Section Header Flags
+        SHF_WRITE            = 1 << 0, // Writable
+        SHF_ALLOC            = 1 << 1, // Mapped into the process addr space
+        SHF_EXECINSTR        = 1 << 2, // Executable
+        SHF_MERGE            = 1 << 4, // Might be merged if equal
+        SHF_STRINGS          = 1 << 5, // Contains null-terminated strings
+        SHF_INFO_LINK        = 1 << 6, // 'sh_info' contains SHT index
+        SHF_LINK_ORDER       = 1 << 7, // Preserve order after combining
+        SHF_OS_NONCONFORMING = 1 << 8, // nonstandard OS support required
+        SHF_GROUP            = 1 << 9, // Section is a member of a group
+        SHF_TLS              = 1 << 10 // Section holds thread-local data
+      };
+
+      ELFSection(const std::string &name)
+        : Name(name), Type(0), Flags(0), Addr(0), Offset(0), Size(0),
+          Link(0), Info(0), Align(0), EntSize(0) {
+      }
+    };
+
+    /// SectionList - This is the list of sections that we have emitted to the
+    /// file.  Once the file has been completely built, the section header table
+    /// is constructed from this info.
+    std::list<ELFSection> SectionList;
+    unsigned NumSections;   // Always = SectionList.size()
+
+    /// SectionLookup - This is a mapping from section name to section number in
+    /// the SectionList.
+    std::map<std::string, ELFSection*> SectionLookup;
+
+    /// getSection - Return the section with the specified name, creating a new
+    /// section if one does not already exist.
+    ELFSection &getSection(const std::string &Name,
+                           unsigned Type, unsigned Flags = 0) {
+      ELFSection *&SN = SectionLookup[Name];
+      if (SN) return *SN;
+
+      SectionList.push_back(Name);
+      SN = &SectionList.back();
+      SN->SectionIdx = NumSections++;
+      SN->Type = Type;
+      SN->Flags = Flags;
+      return *SN;
+    }
+
+    ELFSection &getDataSection() {
+      return getSection(".data", ELFSection::SHT_PROGBITS,
+                        ELFSection::SHF_WRITE | ELFSection::SHF_ALLOC);
+    }
+    ELFSection &getBSSSection() {
+      return getSection(".bss", ELFSection::SHT_NOBITS,
+                        ELFSection::SHF_WRITE | ELFSection::SHF_ALLOC);
+    }
+
+    /// ELFSym - This struct contains information about each symbol that is
+    /// added to logical symbol table for the module.  This is eventually
+    /// turned into a real symbol table in the file.
+    struct ELFSym {
+      const GlobalValue *GV;    // The global value this corresponds to.
+      unsigned NameIdx;         // Index in .strtab of name, once emitted.
+      uint64_t Value;
+      unsigned Size;
+      unsigned char Info;
+      unsigned char Other;
+      unsigned short SectionIdx;
+
+      enum { STB_LOCAL = 0, STB_GLOBAL = 1, STB_WEAK = 2 };
+      enum { STT_NOTYPE = 0, STT_OBJECT = 1, STT_FUNC = 2, STT_SECTION = 3,
+             STT_FILE = 4 };
+      ELFSym(const GlobalValue *gv) : GV(gv), Value(0), Size(0), Info(0),
+                                      Other(0), SectionIdx(0) {}
+
+      void SetBind(unsigned X) {
+        assert(X == (X & 0xF) && "Bind value out of range!");
+        Info = (Info & 0x0F) | (X << 4);
+      }
+      void SetType(unsigned X) {
+        assert(X == (X & 0xF) && "Type value out of range!");
+        Info = (Info & 0xF0) | X;
+      }
+    };
+
+    /// SymbolTable - This is the list of symbols we have emitted to the file.
+    /// This actually gets rearranged before emission to the file (to put the
+    /// local symbols first in the list).
+    std::vector<ELFSym> SymbolTable;
+
+    // As we complete the ELF file, we need to update fields in the ELF header
+    // (e.g. the location of the section table).  These members keep track of
+    // the offset in ELFHeader of these various pieces to update and other
+    // locations in the file.
+    unsigned ELFHeader_e_shoff_Offset;     // e_shoff    in ELF header.
+    unsigned ELFHeader_e_shstrndx_Offset;  // e_shstrndx in ELF header.
+    unsigned ELFHeader_e_shnum_Offset;     // e_shnum    in ELF header.
+  private:
+    void EmitGlobal(GlobalVariable *GV);
+
+    void EmitSymbolTable();
+
+    void EmitSectionTableStringTable();
+    void OutputSectionsAndSectionTable();
+  };
+}
+
+#endif
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
new file mode 100644
index 0000000..3bddc77
--- /dev/null
+++ b/lib/CodeGen/IfConversion.cpp
@@ -0,0 +1,1226 @@
+//===-- IfConversion.cpp - Machine code if conversion pass. ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the Evan Cheng and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the machine instruction level if-conversion pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ifcvt"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+namespace {
+  // Hidden options for help debugging.
+  cl::opt<int> IfCvtFnStart("ifcvt-fn-start", cl::init(-1), cl::Hidden);
+  cl::opt<int> IfCvtFnStop("ifcvt-fn-stop", cl::init(-1), cl::Hidden);
+  cl::opt<int> IfCvtLimit("ifcvt-limit", cl::init(-1), cl::Hidden);
+  cl::opt<bool> DisableSimple("disable-ifcvt-simple", 
+                              cl::init(false), cl::Hidden);
+  cl::opt<bool> DisableSimpleF("disable-ifcvt-simple-false", 
+                               cl::init(false), cl::Hidden);
+  cl::opt<bool> DisableTriangle("disable-ifcvt-triangle", 
+                                cl::init(false), cl::Hidden);
+  cl::opt<bool> DisableTriangleR("disable-ifcvt-triangle-rev", 
+                                 cl::init(false), cl::Hidden);
+  cl::opt<bool> DisableTriangleF("disable-ifcvt-triangle-false", 
+                                 cl::init(false), cl::Hidden);
+  cl::opt<bool> DisableTriangleFR("disable-ifcvt-triangle-false-rev", 
+                                  cl::init(false), cl::Hidden);
+  cl::opt<bool> DisableDiamond("disable-ifcvt-diamond", 
+                               cl::init(false), cl::Hidden);
+}
+
+STATISTIC(NumSimple,       "Number of simple if-conversions performed");
+STATISTIC(NumSimpleFalse,  "Number of simple (F) if-conversions performed");
+STATISTIC(NumTriangle,     "Number of triangle if-conversions performed");
+STATISTIC(NumTriangleRev,  "Number of triangle (R) if-conversions performed");
+STATISTIC(NumTriangleFalse,"Number of triangle (F) if-conversions performed");
+STATISTIC(NumTriangleFRev, "Number of triangle (F/R) if-conversions performed");
+STATISTIC(NumDiamonds,     "Number of diamond if-conversions performed");
+STATISTIC(NumIfConvBBs,    "Number of if-converted blocks");
+STATISTIC(NumDupBBs,       "Number of duplicated blocks");
+
+namespace {
+  class IfConverter : public MachineFunctionPass {
+    enum IfcvtKind {
+      ICNotClassfied,  // BB data valid, but not classified.
+      ICSimpleFalse,   // Same as ICSimple, but on the false path.
+      ICSimple,        // BB is entry of an one split, no rejoin sub-CFG.
+      ICTriangleFRev,  // Same as ICTriangleFalse, but false path rev condition.
+      ICTriangleRev,   // Same as ICTriangle, but true path rev condition.
+      ICTriangleFalse, // Same as ICTriangle, but on the false path.
+      ICTriangle,      // BB is entry of a triangle sub-CFG.
+      ICDiamond        // BB is entry of a diamond sub-CFG.
+    };
+
+    /// BBInfo - One per MachineBasicBlock, this is used to cache the result
+    /// if-conversion feasibility analysis. This includes results from
+    /// TargetInstrInfo::AnalyzeBranch() (i.e. TBB, FBB, and Cond), and its
+    /// classification, and common tail block of its successors (if it's a
+    /// diamond shape), its size, whether it's predicable, and whether any
+    /// instruction can clobber the 'would-be' predicate.
+    ///
+    /// IsDone          - True if BB is not to be considered for ifcvt.
+    /// IsBeingAnalyzed - True if BB is currently being analyzed.
+    /// IsAnalyzed      - True if BB has been analyzed (info is still valid).
+    /// IsEnqueued      - True if BB has been enqueued to be ifcvt'ed.
+    /// IsBrAnalyzable  - True if AnalyzeBranch() returns false.
+    /// HasFallThrough  - True if BB may fallthrough to the following BB.
+    /// IsUnpredicable  - True if BB is known to be unpredicable.
+    /// ClobbersPred    - True if BB could modify predicates (e.g. has
+    ///                   cmp, call, etc.)
+    /// NonPredSize     - Number of non-predicated instructions.
+    /// BB              - Corresponding MachineBasicBlock.
+    /// TrueBB / FalseBB- See AnalyzeBranch().
+    /// BrCond          - Conditions for end of block conditional branches.
+    /// Predicate       - Predicate used in the BB.
+    struct BBInfo {
+      bool IsDone          : 1;
+      bool IsBeingAnalyzed : 1;
+      bool IsAnalyzed      : 1;
+      bool IsEnqueued      : 1;
+      bool IsBrAnalyzable  : 1;
+      bool HasFallThrough  : 1;
+      bool IsUnpredicable  : 1;
+      bool CannotBeCopied  : 1;
+      bool ClobbersPred    : 1;
+      unsigned NonPredSize;
+      MachineBasicBlock *BB;
+      MachineBasicBlock *TrueBB;
+      MachineBasicBlock *FalseBB;
+      std::vector<MachineOperand> BrCond;
+      std::vector<MachineOperand> Predicate;
+      BBInfo() : IsDone(false), IsBeingAnalyzed(false),
+                 IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false),
+                 HasFallThrough(false), IsUnpredicable(false),
+                 CannotBeCopied(false), ClobbersPred(false), NonPredSize(0),
+                 BB(0), TrueBB(0), FalseBB(0) {}
+    };
+
+    /// IfcvtToken - Record information about pending if-conversions to attemp:
+    /// BBI             - Corresponding BBInfo.
+    /// Kind            - Type of block. See IfcvtKind.
+    /// NeedSubsumsion  - True if the to be predicated BB has already been
+    ///                   predicated.
+    /// NumDups      - Number of instructions that would be duplicated due
+    ///                   to this if-conversion. (For diamonds, the number of
+    ///                   identical instructions at the beginnings of both
+    ///                   paths).
+    /// NumDups2     - For diamonds, the number of identical instructions
+    ///                   at the ends of both paths.
+    struct IfcvtToken {
+      BBInfo &BBI;
+      IfcvtKind Kind;
+      bool NeedSubsumsion;
+      unsigned NumDups;
+      unsigned NumDups2;
+      IfcvtToken(BBInfo &b, IfcvtKind k, bool s, unsigned d, unsigned d2 = 0)
+        : BBI(b), Kind(k), NeedSubsumsion(s), NumDups(d), NumDups2(d2) {}
+    };
+
+    /// Roots - Basic blocks that do not have successors. These are the starting
+    /// points of Graph traversal.
+    std::vector<MachineBasicBlock*> Roots;
+
+    /// BBAnalysis - Results of if-conversion feasibility analysis indexed by
+    /// basic block number.
+    std::vector<BBInfo> BBAnalysis;
+
+    const TargetLowering *TLI;
+    const TargetInstrInfo *TII;
+    bool MadeChange;
+  public:
+    static char ID;
+    IfConverter() : MachineFunctionPass((intptr_t)&ID) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+    virtual const char *getPassName() const { return "If converter"; }
+
+  private:
+    bool ReverseBranchCondition(BBInfo &BBI);
+    bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const;
+    bool ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
+                       bool FalseBranch, unsigned &Dups) const;
+    bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
+                      unsigned &Dups1, unsigned &Dups2) const;
+    void ScanInstructions(BBInfo &BBI);
+    BBInfo &AnalyzeBlock(MachineBasicBlock *BB,
+                         std::vector<IfcvtToken*> &Tokens);
+    bool FeasibilityAnalysis(BBInfo &BBI, std::vector<MachineOperand> &Cond,
+                             bool isTriangle = false, bool RevBranch = false);
+    bool AnalyzeBlocks(MachineFunction &MF,
+                       std::vector<IfcvtToken*> &Tokens);
+    void InvalidatePreds(MachineBasicBlock *BB);
+    void RemoveExtraEdges(BBInfo &BBI);
+    bool IfConvertSimple(BBInfo &BBI, IfcvtKind Kind);
+    bool IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind);
+    bool IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
+                          unsigned NumDups1, unsigned NumDups2);
+    void PredicateBlock(BBInfo &BBI,
+                        MachineBasicBlock::iterator E,
+                        std::vector<MachineOperand> &Cond);
+    void CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
+                               std::vector<MachineOperand> &Cond,
+                               bool IgnoreBr = false);
+    void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI);
+
+    bool MeetIfcvtSizeLimit(unsigned Size) const {
+      return Size > 0 && Size <= TLI->getIfCvtBlockSizeLimit();
+    }
+
+    // blockAlwaysFallThrough - Block ends without a terminator.
+    bool blockAlwaysFallThrough(BBInfo &BBI) const {
+      return BBI.IsBrAnalyzable && BBI.TrueBB == NULL;
+    }
+
+    // IfcvtTokenCmp - Used to sort if-conversion candidates.
+    static bool IfcvtTokenCmp(IfcvtToken *C1, IfcvtToken *C2) {
+      int Incr1 = (C1->Kind == ICDiamond)
+        ? -(int)(C1->NumDups + C1->NumDups2) : (int)C1->NumDups;
+      int Incr2 = (C2->Kind == ICDiamond)
+        ? -(int)(C2->NumDups + C2->NumDups2) : (int)C2->NumDups;
+      if (Incr1 > Incr2)
+        return true;
+      else if (Incr1 == Incr2) {
+        // Favors subsumsion.
+        if (C1->NeedSubsumsion == false && C2->NeedSubsumsion == true)
+          return true;
+        else if (C1->NeedSubsumsion == C2->NeedSubsumsion) {
+          // Favors diamond over triangle, etc.
+          if ((unsigned)C1->Kind < (unsigned)C2->Kind)
+            return true;
+          else if (C1->Kind == C2->Kind)
+            return C1->BBI.BB->getNumber() < C2->BBI.BB->getNumber();
+        }
+      }
+      return false;
+    }
+  };
+
+  char IfConverter::ID = 0;
+}
+
+FunctionPass *llvm::createIfConverterPass() { return new IfConverter(); }
+
+bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
+  TLI = MF.getTarget().getTargetLowering();
+  TII = MF.getTarget().getInstrInfo();
+  if (!TII) return false;
+
+  static int FnNum = -1;
+  DOUT << "\nIfcvt: function (" << ++FnNum <<  ") \'"
+       << MF.getFunction()->getName() << "\'";
+
+  if (FnNum < IfCvtFnStart || (IfCvtFnStop != -1 && FnNum > IfCvtFnStop)) {
+    DOUT << " skipped\n";
+    return false;
+  }
+  DOUT << "\n";
+
+  MF.RenumberBlocks();
+  BBAnalysis.resize(MF.getNumBlockIDs());
+
+  // Look for root nodes, i.e. blocks without successors.
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+    if (I->succ_size() == 0)
+      Roots.push_back(I);
+
+  std::vector<IfcvtToken*> Tokens;
+  MadeChange = false;
+  unsigned NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle +
+    NumTriangleRev + NumTriangleFalse + NumTriangleFRev + NumDiamonds;
+  while (IfCvtLimit == -1 || (int)NumIfCvts < IfCvtLimit) {
+    // Do an intial analysis for each basic block and finding all the potential
+    // candidates to perform if-convesion.
+    bool Change = AnalyzeBlocks(MF, Tokens);
+    while (!Tokens.empty()) {
+      IfcvtToken *Token = Tokens.back();
+      Tokens.pop_back();
+      BBInfo &BBI = Token->BBI;
+      IfcvtKind Kind = Token->Kind;
+
+      // If the block has been evicted out of the queue or it has already been
+      // marked dead (due to it being predicated), then skip it.
+      if (BBI.IsDone)
+        BBI.IsEnqueued = false;
+      if (!BBI.IsEnqueued)
+        continue;
+
+      BBI.IsEnqueued = false;
+
+      bool RetVal = false;
+      switch (Kind) {
+      default: assert(false && "Unexpected!");
+        break;
+      case ICSimple:
+      case ICSimpleFalse: {
+        bool isFalse = Kind == ICSimpleFalse;
+        if ((isFalse && DisableSimpleF) || (!isFalse && DisableSimple)) break;
+        DOUT << "Ifcvt (Simple" << (Kind == ICSimpleFalse ? " false" :"")
+             << "): BB#" << BBI.BB->getNumber() << " ("
+             << ((Kind == ICSimpleFalse)
+                 ? BBI.FalseBB->getNumber()
+                 : BBI.TrueBB->getNumber()) << ") ";
+        RetVal = IfConvertSimple(BBI, Kind);
+        DOUT << (RetVal ? "succeeded!" : "failed!") << "\n";
+        if (RetVal)
+          if (isFalse) NumSimpleFalse++;
+          else         NumSimple++;
+       break;
+      }
+      case ICTriangle:
+      case ICTriangleRev:
+      case ICTriangleFalse:
+      case ICTriangleFRev: {
+        bool isFalse = Kind == ICTriangleFalse;
+        bool isRev   = (Kind == ICTriangleRev || Kind == ICTriangleFRev);
+        if (DisableTriangle && !isFalse && !isRev) break;
+        if (DisableTriangleR && !isFalse && isRev) break;
+        if (DisableTriangleF && isFalse && !isRev) break;
+        if (DisableTriangleFR && isFalse && isRev) break;
+        DOUT << "Ifcvt (Triangle";
+        if (isFalse)
+          DOUT << " false";
+        if (isRev)
+          DOUT << " rev";
+        DOUT << "): BB#" << BBI.BB->getNumber() << " (T:"
+             << BBI.TrueBB->getNumber() << ",F:"
+             << BBI.FalseBB->getNumber() << ") ";
+        RetVal = IfConvertTriangle(BBI, Kind);
+        DOUT << (RetVal ? "succeeded!" : "failed!") << "\n";
+        if (RetVal) {
+          if (isFalse) {
+            if (isRev) NumTriangleFRev++;
+            else       NumTriangleFalse++;
+          } else {
+            if (isRev) NumTriangleRev++;
+            else       NumTriangle++;
+          }
+        }
+        break;
+      }
+      case ICDiamond: {
+        if (DisableDiamond) break;
+        DOUT << "Ifcvt (Diamond): BB#" << BBI.BB->getNumber() << " (T:"
+             << BBI.TrueBB->getNumber() << ",F:"
+             << BBI.FalseBB->getNumber() << ") ";
+        RetVal = IfConvertDiamond(BBI, Kind, Token->NumDups, Token->NumDups2);
+        DOUT << (RetVal ? "succeeded!" : "failed!") << "\n";
+        if (RetVal) NumDiamonds++;
+        break;
+      }
+      }
+
+      Change |= RetVal;
+
+      NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle + NumTriangleRev +
+        NumTriangleFalse + NumTriangleFRev + NumDiamonds;
+      if (IfCvtLimit != -1 && (int)NumIfCvts >= IfCvtLimit)
+        break;
+    }
+
+    if (!Change)
+      break;
+    MadeChange |= Change;
+  }
+
+  // Delete tokens in case of early exit.
+  while (!Tokens.empty()) {
+    IfcvtToken *Token = Tokens.back();
+    Tokens.pop_back();
+    delete Token;
+  }
+
+  Tokens.clear();
+  Roots.clear();
+  BBAnalysis.clear();
+
+  return MadeChange;
+}
+
+/// findFalseBlock - BB has a fallthrough. Find its 'false' successor given
+/// its 'true' successor.
+static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB,
+                                         MachineBasicBlock *TrueBB) {
+  for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+         E = BB->succ_end(); SI != E; ++SI) {
+    MachineBasicBlock *SuccBB = *SI;
+    if (SuccBB != TrueBB)
+      return SuccBB;
+  }
+  return NULL;
+}
+
+/// ReverseBranchCondition - Reverse the condition of the end of the block
+/// branchs. Swap block's 'true' and 'false' successors.
+bool IfConverter::ReverseBranchCondition(BBInfo &BBI) {
+  if (!TII->ReverseBranchCondition(BBI.BrCond)) {
+    TII->RemoveBranch(*BBI.BB);
+    TII->InsertBranch(*BBI.BB, BBI.FalseBB, BBI.TrueBB, BBI.BrCond);
+    std::swap(BBI.TrueBB, BBI.FalseBB);
+    return true;
+  }
+  return false;
+}
+
+/// getNextBlock - Returns the next block in the function blocks ordering. If
+/// it is the end, returns NULL.
+static inline MachineBasicBlock *getNextBlock(MachineBasicBlock *BB) {
+  MachineFunction::iterator I = BB;
+  MachineFunction::iterator E = BB->getParent()->end();
+  if (++I == E)
+    return NULL;
+  return I;
+}
+
+/// ValidSimple - Returns true if the 'true' block (along with its
+/// predecessor) forms a valid simple shape for ifcvt. It also returns the
+/// number of instructions that the ifcvt would need to duplicate if performed
+/// in Dups.
+bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const {
+  Dups = 0;
+  if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
+    return false;
+
+  if (TrueBBI.IsBrAnalyzable)
+    return false;
+
+  if (TrueBBI.BB->pred_size() > 1) {
+    if (TrueBBI.CannotBeCopied ||
+        TrueBBI.NonPredSize > TLI->getIfCvtDupBlockSizeLimit())
+      return false;
+    Dups = TrueBBI.NonPredSize;
+  }
+
+  return true;
+}
+
+/// ValidTriangle - Returns true if the 'true' and 'false' blocks (along
+/// with their common predecessor) forms a valid triangle shape for ifcvt.
+/// If 'FalseBranch' is true, it checks if 'true' block's false branch
+/// branches to the false branch rather than the other way around. It also
+/// returns the number of instructions that the ifcvt would need to duplicate
+/// if performed in 'Dups'.
+bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
+                                bool FalseBranch, unsigned &Dups) const {
+  Dups = 0;
+  if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
+    return false;
+
+  if (TrueBBI.BB->pred_size() > 1) {
+    if (TrueBBI.CannotBeCopied)
+      return false;
+
+    unsigned Size = TrueBBI.NonPredSize;
+    if (TrueBBI.IsBrAnalyzable) {
+      if (TrueBBI.TrueBB && TrueBBI.BrCond.size() == 0)
+        // End with an unconditional branch. It will be removed.
+        --Size;
+      else {
+        MachineBasicBlock *FExit = FalseBranch
+          ? TrueBBI.TrueBB : TrueBBI.FalseBB;
+        if (FExit)
+          // Require a conditional branch
+          ++Size;
+      }
+    }
+    if (Size > TLI->getIfCvtDupBlockSizeLimit())
+      return false;
+    Dups = Size;
+  }
+
+  MachineBasicBlock *TExit = FalseBranch ? TrueBBI.FalseBB : TrueBBI.TrueBB;
+  if (!TExit && blockAlwaysFallThrough(TrueBBI)) {
+    MachineFunction::iterator I = TrueBBI.BB;
+    if (++I == TrueBBI.BB->getParent()->end())
+      return false;
+    TExit = I;
+  }
+  return TExit && TExit == FalseBBI.BB;
+}
+
+static
+MachineBasicBlock::iterator firstNonBranchInst(MachineBasicBlock *BB,
+                                               const TargetInstrInfo *TII) {
+  MachineBasicBlock::iterator I = BB->end();
+  while (I != BB->begin()) {
+    --I;
+    const TargetInstrDescriptor *TID = I->getInstrDescriptor();
+    if ((TID->Flags & M_BRANCH_FLAG) == 0)
+      break;
+  }
+  return I;
+}
+
+/// ValidDiamond - Returns true if the 'true' and 'false' blocks (along
+/// with their common predecessor) forms a valid diamond shape for ifcvt.
+bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
+                               unsigned &Dups1, unsigned &Dups2) const {
+  Dups1 = Dups2 = 0;
+  if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone ||
+      FalseBBI.IsBeingAnalyzed || FalseBBI.IsDone)
+    return false;
+
+  MachineBasicBlock *TT = TrueBBI.TrueBB;
+  MachineBasicBlock *FT = FalseBBI.TrueBB;
+
+  if (!TT && blockAlwaysFallThrough(TrueBBI))
+    TT = getNextBlock(TrueBBI.BB);
+  if (!FT && blockAlwaysFallThrough(FalseBBI))
+    FT = getNextBlock(FalseBBI.BB);
+  if (TT != FT)
+    return false;
+  if (TT == NULL && (TrueBBI.IsBrAnalyzable || FalseBBI.IsBrAnalyzable))
+    return false;
+  if  (TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1)
+    return false;
+
+  // FIXME: Allow true block to have an early exit?
+  if (TrueBBI.FalseBB || FalseBBI.FalseBB ||
+      (TrueBBI.ClobbersPred && FalseBBI.ClobbersPred))
+    return false;
+
+  MachineBasicBlock::iterator TI = TrueBBI.BB->begin();
+  MachineBasicBlock::iterator FI = FalseBBI.BB->begin();
+  while (TI != TrueBBI.BB->end() && FI != FalseBBI.BB->end()) {
+    if (!TI->isIdenticalTo(FI))
+      break;
+    ++Dups1;
+    ++TI;
+    ++FI;
+  }
+
+  TI = firstNonBranchInst(TrueBBI.BB, TII);
+  FI = firstNonBranchInst(FalseBBI.BB, TII);
+  while (TI != TrueBBI.BB->begin() && FI != FalseBBI.BB->begin()) {
+    if (!TI->isIdenticalTo(FI))
+      break;
+    ++Dups2;
+    --TI;
+    --FI;
+  }
+
+  return true;
+}
+
+/// ScanInstructions - Scan all the instructions in the block to determine if
+/// the block is predicable. In most cases, that means all the instructions
+/// in the block has M_PREDICABLE flag. Also checks if the block contains any
+/// instruction which can clobber a predicate (e.g. condition code register).
+/// If so, the block is not predicable unless it's the last instruction.
+void IfConverter::ScanInstructions(BBInfo &BBI) {
+  if (BBI.IsDone)
+    return;
+
+  bool AlreadyPredicated = BBI.Predicate.size() > 0;
+  // First analyze the end of BB branches.
+  BBI.TrueBB = BBI.FalseBB = NULL;
+  BBI.BrCond.clear();
+  BBI.IsBrAnalyzable =
+    !TII->AnalyzeBranch(*BBI.BB, BBI.TrueBB, BBI.FalseBB, BBI.BrCond);
+  BBI.HasFallThrough = BBI.IsBrAnalyzable && BBI.FalseBB == NULL;
+
+  if (BBI.BrCond.size()) {
+    // No false branch. This BB must end with a conditional branch and a
+    // fallthrough.
+    if (!BBI.FalseBB)
+      BBI.FalseBB = findFalseBlock(BBI.BB, BBI.TrueBB);  
+    assert(BBI.FalseBB && "Expected to find the fallthrough block!");
+  }
+
+  // Then scan all the instructions.
+  BBI.NonPredSize = 0;
+  BBI.ClobbersPred = false;
+  bool SeenCondBr = false;
+  for (MachineBasicBlock::iterator I = BBI.BB->begin(), E = BBI.BB->end();
+       I != E; ++I) {
+    const TargetInstrDescriptor *TID = I->getInstrDescriptor();
+    if ((TID->Flags & M_NOT_DUPLICABLE) != 0)
+      BBI.CannotBeCopied = true;
+
+    bool isPredicated = TII->isPredicated(I);
+    bool isCondBr = BBI.IsBrAnalyzable &&
+      (TID->Flags & M_BRANCH_FLAG) != 0 && (TID->Flags & M_BARRIER_FLAG) == 0;
+
+    if (!isCondBr) {
+      if (!isPredicated)
+        BBI.NonPredSize++;
+      else if (!AlreadyPredicated) {
+        // FIXME: This instruction is already predicated before the
+        // if-conversion pass. It's probably something like a conditional move.
+        // Mark this block unpredicable for now.
+        BBI.IsUnpredicable = true;
+        return;
+      }
+        
+    }
+
+    if (BBI.ClobbersPred && !isPredicated) {
+      // Predicate modification instruction should end the block (except for
+      // already predicated instructions and end of block branches).
+      if (isCondBr) {
+        SeenCondBr = true;
+
+        // Conditional branches is not predicable. But it may be eliminated.
+        continue;
+      }
+
+      // Predicate may have been modified, the subsequent (currently)
+      // unpredicated instructions cannot be correctly predicated.
+      BBI.IsUnpredicable = true;
+      return;
+    }
+
+    // FIXME: Make use of PredDefs? e.g. ADDC, SUBC sets predicates but are
+    // still potentially predicable.
+    std::vector<MachineOperand> PredDefs;
+    if (TII->DefinesPredicate(I, PredDefs))
+      BBI.ClobbersPred = true;
+
+    if ((TID->Flags & M_PREDICABLE) == 0) {
+      BBI.IsUnpredicable = true;
+      return;
+    }
+  }
+}
+
+/// FeasibilityAnalysis - Determine if the block is a suitable candidate to be
+/// predicated by the specified predicate.
+bool IfConverter::FeasibilityAnalysis(BBInfo &BBI,
+                                      std::vector<MachineOperand> &Pred,
+                                      bool isTriangle, bool RevBranch) {
+  // If the block is dead or unpredicable, then it cannot be predicated.
+  if (BBI.IsDone || BBI.IsUnpredicable)
+    return false;
+
+  // If it is already predicated, check if its predicate subsumes the new
+  // predicate.
+  if (BBI.Predicate.size() && !TII->SubsumesPredicate(BBI.Predicate, Pred))
+    return false;
+
+  if (BBI.BrCond.size()) {
+    if (!isTriangle)
+      return false;
+
+    // Test predicate subsumsion.
+    std::vector<MachineOperand> RevPred(Pred);
+    std::vector<MachineOperand> Cond(BBI.BrCond);
+    if (RevBranch) {
+      if (TII->ReverseBranchCondition(Cond))
+        return false;
+    }
+    if (TII->ReverseBranchCondition(RevPred) ||
+        !TII->SubsumesPredicate(Cond, RevPred))
+      return false;
+  }
+
+  return true;
+}
+
+/// AnalyzeBlock - Analyze the structure of the sub-CFG starting from
+/// the specified block. Record its successors and whether it looks like an
+/// if-conversion candidate.
+IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
+                                             std::vector<IfcvtToken*> &Tokens) {
+  BBInfo &BBI = BBAnalysis[BB->getNumber()];
+
+  if (BBI.IsAnalyzed || BBI.IsBeingAnalyzed)
+    return BBI;
+
+  BBI.BB = BB;
+  BBI.IsBeingAnalyzed = true;
+
+  ScanInstructions(BBI);
+
+  // Unanalyable or ends with fallthrough or unconditional branch.
+  if (!BBI.IsBrAnalyzable || BBI.BrCond.size() == 0) {
+    BBI.IsBeingAnalyzed = false;
+    BBI.IsAnalyzed = true;
+    return BBI;
+  }
+
+  // Do not ifcvt if either path is a back edge to the entry block.
+  if (BBI.TrueBB == BB || BBI.FalseBB == BB) {
+    BBI.IsBeingAnalyzed = false;
+    BBI.IsAnalyzed = true;
+    return BBI;
+  }
+
+  BBInfo &TrueBBI  = AnalyzeBlock(BBI.TrueBB, Tokens);
+  BBInfo &FalseBBI = AnalyzeBlock(BBI.FalseBB, Tokens);
+
+  if (TrueBBI.IsDone && FalseBBI.IsDone) {
+    BBI.IsBeingAnalyzed = false;
+    BBI.IsAnalyzed = true;
+    return BBI;
+  }
+
+  std::vector<MachineOperand> RevCond(BBI.BrCond);
+  bool CanRevCond = !TII->ReverseBranchCondition(RevCond);
+
+  unsigned Dups = 0;
+  unsigned Dups2 = 0;
+  bool TNeedSub = TrueBBI.Predicate.size() > 0;
+  bool FNeedSub = FalseBBI.Predicate.size() > 0;
+  bool Enqueued = false;
+  if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) &&
+      MeetIfcvtSizeLimit(TrueBBI.NonPredSize - (Dups + Dups2)) &&
+      MeetIfcvtSizeLimit(FalseBBI.NonPredSize - (Dups + Dups2)) &&
+      FeasibilityAnalysis(TrueBBI, BBI.BrCond) &&
+      FeasibilityAnalysis(FalseBBI, RevCond)) {
+    // Diamond:
+    //   EBB
+    //   / \_
+    //  |   |
+    // TBB FBB
+    //   \ /
+    //  TailBB
+    // Note TailBB can be empty.
+    Tokens.push_back(new IfcvtToken(BBI, ICDiamond, TNeedSub|FNeedSub, Dups,
+                                    Dups2));
+    Enqueued = true;
+  }
+
+  if (ValidTriangle(TrueBBI, FalseBBI, false, Dups) &&
+      MeetIfcvtSizeLimit(TrueBBI.NonPredSize) &&
+      FeasibilityAnalysis(TrueBBI, BBI.BrCond, true)) {
+    // Triangle:
+    //   EBB
+    //   | \_
+    //   |  |
+    //   | TBB
+    //   |  /
+    //   FBB
+    Tokens.push_back(new IfcvtToken(BBI, ICTriangle, TNeedSub, Dups));
+    Enqueued = true;
+  }
+  
+  if (ValidTriangle(TrueBBI, FalseBBI, true, Dups) &&
+      MeetIfcvtSizeLimit(TrueBBI.NonPredSize) &&
+      FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) {
+    Tokens.push_back(new IfcvtToken(BBI, ICTriangleRev, TNeedSub, Dups));
+    Enqueued = true;
+  }
+
+  if (ValidSimple(TrueBBI, Dups) &&
+      MeetIfcvtSizeLimit(TrueBBI.NonPredSize) &&
+      FeasibilityAnalysis(TrueBBI, BBI.BrCond)) {
+    // Simple (split, no rejoin):
+    //   EBB
+    //   | \_
+    //   |  |
+    //   | TBB---> exit
+    //   |    
+    //   FBB
+    Tokens.push_back(new IfcvtToken(BBI, ICSimple, TNeedSub, Dups));
+    Enqueued = true;
+  }
+
+  if (CanRevCond) {
+    // Try the other path...
+    if (ValidTriangle(FalseBBI, TrueBBI, false, Dups) &&
+        MeetIfcvtSizeLimit(FalseBBI.NonPredSize) &&
+        FeasibilityAnalysis(FalseBBI, RevCond, true)) {
+      Tokens.push_back(new IfcvtToken(BBI, ICTriangleFalse, FNeedSub, Dups));
+      Enqueued = true;
+    }
+
+    if (ValidTriangle(FalseBBI, TrueBBI, true, Dups) &&
+        MeetIfcvtSizeLimit(FalseBBI.NonPredSize) &&
+        FeasibilityAnalysis(FalseBBI, RevCond, true, true)) {
+      Tokens.push_back(new IfcvtToken(BBI, ICTriangleFRev, FNeedSub, Dups));
+      Enqueued = true;
+    }
+
+    if (ValidSimple(FalseBBI, Dups) &&
+        MeetIfcvtSizeLimit(FalseBBI.NonPredSize) &&
+        FeasibilityAnalysis(FalseBBI, RevCond)) {
+      Tokens.push_back(new IfcvtToken(BBI, ICSimpleFalse, FNeedSub, Dups));
+      Enqueued = true;
+    }
+  }
+
+  BBI.IsEnqueued = Enqueued;
+  BBI.IsBeingAnalyzed = false;
+  BBI.IsAnalyzed = true;
+  return BBI;
+}
+
+/// AnalyzeBlocks - Analyze all blocks and find entries for all if-conversion
+/// candidates. It returns true if any CFG restructuring is done to expose more
+/// if-conversion opportunities.
+bool IfConverter::AnalyzeBlocks(MachineFunction &MF,
+                                std::vector<IfcvtToken*> &Tokens) {
+  bool Change = false;
+  std::set<MachineBasicBlock*> Visited;
+  for (unsigned i = 0, e = Roots.size(); i != e; ++i) {
+    for (idf_ext_iterator<MachineBasicBlock*> I=idf_ext_begin(Roots[i],Visited),
+           E = idf_ext_end(Roots[i], Visited); I != E; ++I) {
+      MachineBasicBlock *BB = *I;
+      AnalyzeBlock(BB, Tokens);
+    }
+  }
+
+  // Sort to favor more complex ifcvt scheme.
+  std::stable_sort(Tokens.begin(), Tokens.end(), IfcvtTokenCmp);
+
+  return Change;
+}
+
+/// canFallThroughTo - Returns true either if ToBB is the next block after BB or
+/// that all the intervening blocks are empty (given BB can fall through to its
+/// next block).
+static bool canFallThroughTo(MachineBasicBlock *BB, MachineBasicBlock *ToBB) {
+  MachineFunction::iterator I = BB;
+  MachineFunction::iterator TI = ToBB;
+  MachineFunction::iterator E = BB->getParent()->end();
+  while (++I != TI)
+    if (I == E || !I->empty())
+      return false;
+  return true;
+}
+
+/// InvalidatePreds - Invalidate predecessor BB info so it would be re-analyzed
+/// to determine if it can be if-converted. If predecessor is already enqueued,
+/// dequeue it!
+void IfConverter::InvalidatePreds(MachineBasicBlock *BB) {
+  for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
+         E = BB->pred_end(); PI != E; ++PI) {
+    BBInfo &PBBI = BBAnalysis[(*PI)->getNumber()];
+    if (PBBI.IsDone || PBBI.BB == BB)
+      continue;
+    PBBI.IsAnalyzed = false;
+    PBBI.IsEnqueued = false;
+  }
+}
+
+/// InsertUncondBranch - Inserts an unconditional branch from BB to ToBB.
+///
+static void InsertUncondBranch(MachineBasicBlock *BB, MachineBasicBlock *ToBB,
+                               const TargetInstrInfo *TII) {
+  std::vector<MachineOperand> NoCond;
+  TII->InsertBranch(*BB, ToBB, NULL, NoCond);
+}
+
+/// RemoveExtraEdges - Remove true / false edges if either / both are no longer
+/// successors.
+void IfConverter::RemoveExtraEdges(BBInfo &BBI) {
+  MachineBasicBlock *TBB = NULL, *FBB = NULL;
+  std::vector<MachineOperand> Cond;
+  if (!TII->AnalyzeBranch(*BBI.BB, TBB, FBB, Cond))
+    BBI.BB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());
+}
+
+/// IfConvertSimple - If convert a simple (split, no rejoin) sub-CFG.
+///
+bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
+  BBInfo &TrueBBI  = BBAnalysis[BBI.TrueBB->getNumber()];
+  BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
+  BBInfo *CvtBBI = &TrueBBI;
+  BBInfo *NextBBI = &FalseBBI;
+
+  std::vector<MachineOperand> Cond(BBI.BrCond);
+  if (Kind == ICSimpleFalse)
+    std::swap(CvtBBI, NextBBI);
+
+  if (CvtBBI->IsDone ||
+      (CvtBBI->CannotBeCopied && CvtBBI->BB->pred_size() > 1)) {
+    // Something has changed. It's no longer safe to predicate this block.
+    BBI.IsAnalyzed = false;
+    CvtBBI->IsAnalyzed = false;
+    return false;
+  }
+
+  if (Kind == ICSimpleFalse)
+    TII->ReverseBranchCondition(Cond);
+
+  if (CvtBBI->BB->pred_size() > 1) {
+    BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+    // Copy instructions in the true block, predicate them add them to
+    // the entry block.
+    CopyAndPredicateBlock(BBI, *CvtBBI, Cond);
+  } else {
+    PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond);
+
+    // Merge converted block into entry block.
+    BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+    MergeBlocks(BBI, *CvtBBI);
+  }
+
+  bool IterIfcvt = true;
+  if (!canFallThroughTo(BBI.BB, NextBBI->BB)) {
+    InsertUncondBranch(BBI.BB, NextBBI->BB, TII);
+    BBI.HasFallThrough = false;
+    // Now ifcvt'd block will look like this:
+    // BB:
+    // ...
+    // t, f = cmp
+    // if t op
+    // b BBf
+    //
+    // We cannot further ifcvt this block because the unconditional branch
+    // will have to be predicated on the new condition, that will not be
+    // available if cmp executes.
+    IterIfcvt = false;
+  }
+
+  RemoveExtraEdges(BBI);
+
+  // Update block info. BB can be iteratively if-converted.
+  if (!IterIfcvt)
+    BBI.IsDone = true;
+  InvalidatePreds(BBI.BB);
+  CvtBBI->IsDone = true;
+
+  // FIXME: Must maintain LiveIns.
+  return true;
+}
+
+/// IfConvertTriangle - If convert a triangle sub-CFG.
+///
+bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
+  BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
+  BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
+  BBInfo *CvtBBI = &TrueBBI;
+  BBInfo *NextBBI = &FalseBBI;
+
+  std::vector<MachineOperand> Cond(BBI.BrCond);
+  if (Kind == ICTriangleFalse || Kind == ICTriangleFRev)
+    std::swap(CvtBBI, NextBBI);
+
+  if (CvtBBI->IsDone ||
+      (CvtBBI->CannotBeCopied && CvtBBI->BB->pred_size() > 1)) {
+    // Something has changed. It's no longer safe to predicate this block.
+    BBI.IsAnalyzed = false;
+    CvtBBI->IsAnalyzed = false;
+    return false;
+  }
+
+  if (Kind == ICTriangleFalse || Kind == ICTriangleFRev)
+    TII->ReverseBranchCondition(Cond);
+
+  if (Kind == ICTriangleRev || Kind == ICTriangleFRev) {
+    ReverseBranchCondition(*CvtBBI);
+    // BB has been changed, modify its predecessors (except for this
+    // one) so they don't get ifcvt'ed based on bad intel.
+    for (MachineBasicBlock::pred_iterator PI = CvtBBI->BB->pred_begin(),
+           E = CvtBBI->BB->pred_end(); PI != E; ++PI) {
+      MachineBasicBlock *PBB = *PI;
+      if (PBB == BBI.BB)
+        continue;
+      BBInfo &PBBI = BBAnalysis[PBB->getNumber()];
+      if (PBBI.IsEnqueued) {
+        PBBI.IsAnalyzed = false;
+        PBBI.IsEnqueued = false;
+      }
+    }
+  }
+
+  bool HasEarlyExit = CvtBBI->FalseBB != NULL;
+  bool DupBB = CvtBBI->BB->pred_size() > 1;
+  if (DupBB) {
+    BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+    // Copy instructions in the true block, predicate them add them to
+    // the entry block.
+    CopyAndPredicateBlock(BBI, *CvtBBI, Cond, true);
+  } else {
+    // Predicate the 'true' block after removing its branch.
+    CvtBBI->NonPredSize -= TII->RemoveBranch(*CvtBBI->BB);
+    PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond);
+  }
+
+  if (!DupBB) {
+    // Now merge the entry of the triangle with the true block.
+    BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+    MergeBlocks(BBI, *CvtBBI);
+  }
+
+  // If 'true' block has a 'false' successor, add an exit branch to it.
+  if (HasEarlyExit) {
+    std::vector<MachineOperand> RevCond(CvtBBI->BrCond);
+    if (TII->ReverseBranchCondition(RevCond))
+      assert(false && "Unable to reverse branch condition!");
+    TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, NULL, RevCond);
+    BBI.BB->addSuccessor(CvtBBI->FalseBB);
+  }
+
+  // Merge in the 'false' block if the 'false' block has no other
+  // predecessors. Otherwise, add a unconditional branch from to 'false'.
+  bool FalseBBDead = false;
+  bool IterIfcvt = true;
+  bool isFallThrough = canFallThroughTo(BBI.BB, NextBBI->BB);
+  if (!isFallThrough) {
+    // Only merge them if the true block does not fallthrough to the false
+    // block. By not merging them, we make it possible to iteratively
+    // ifcvt the blocks.
+    if (!HasEarlyExit &&
+        NextBBI->BB->pred_size() == 1 && !NextBBI->HasFallThrough) {
+      MergeBlocks(BBI, *NextBBI);
+      FalseBBDead = true;
+    } else {
+      InsertUncondBranch(BBI.BB, NextBBI->BB, TII);
+      BBI.HasFallThrough = false;
+    }
+    // Mixed predicated and unpredicated code. This cannot be iteratively
+    // predicated.
+    IterIfcvt = false;
+  }
+
+  RemoveExtraEdges(BBI);
+
+  // Update block info. BB can be iteratively if-converted.
+  if (!IterIfcvt) 
+    BBI.IsDone = true;
+  InvalidatePreds(BBI.BB);
+  CvtBBI->IsDone = true;
+  if (FalseBBDead)
+    NextBBI->IsDone = true;
+
+  // FIXME: Must maintain LiveIns.
+  return true;
+}
+
+/// IfConvertDiamond - If convert a diamond sub-CFG.
+///
+bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
+                                   unsigned NumDups1, unsigned NumDups2) {
+  BBInfo &TrueBBI  = BBAnalysis[BBI.TrueBB->getNumber()];
+  BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
+  MachineBasicBlock *TailBB = TrueBBI.TrueBB;
+  // True block must fall through or ended with unanalyzable terminator.
+  if (!TailBB) {
+    if (blockAlwaysFallThrough(TrueBBI))
+      TailBB = FalseBBI.TrueBB;
+    assert((TailBB || !TrueBBI.IsBrAnalyzable) && "Unexpected!");
+  }
+
+  if (TrueBBI.IsDone || FalseBBI.IsDone ||
+      TrueBBI.BB->pred_size() > 1 ||
+      FalseBBI.BB->pred_size() > 1) {
+    // Something has changed. It's no longer safe to predicate these blocks.
+    BBI.IsAnalyzed = false;
+    TrueBBI.IsAnalyzed = false;
+    FalseBBI.IsAnalyzed = false;
+    return false;
+  }
+
+  // Merge the 'true' and 'false' blocks by copying the instructions
+  // from the 'false' block to the 'true' block. That is, unless the true
+  // block would clobber the predicate, in that case, do the opposite.
+  BBInfo *BBI1 = &TrueBBI;
+  BBInfo *BBI2 = &FalseBBI;
+  std::vector<MachineOperand> RevCond(BBI.BrCond);
+  TII->ReverseBranchCondition(RevCond);
+  std::vector<MachineOperand> *Cond1 = &BBI.BrCond;
+  std::vector<MachineOperand> *Cond2 = &RevCond;
+
+  // Figure out the more profitable ordering.
+  bool DoSwap = false;
+  if (TrueBBI.ClobbersPred && !FalseBBI.ClobbersPred)
+    DoSwap = true;
+  else if (TrueBBI.ClobbersPred == FalseBBI.ClobbersPred) {
+    if (TrueBBI.NonPredSize > FalseBBI.NonPredSize)
+      DoSwap = true;
+  }
+  if (DoSwap) {
+    std::swap(BBI1, BBI2);
+    std::swap(Cond1, Cond2);
+  }
+
+  // Remove the conditional branch from entry to the blocks.
+  BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+
+  // Remove the duplicated instructions at the beginnings of both paths.
+  MachineBasicBlock::iterator DI1 = BBI1->BB->begin();
+  MachineBasicBlock::iterator DI2 = BBI2->BB->begin();
+  BBI1->NonPredSize -= NumDups1;
+  BBI2->NonPredSize -= NumDups1;
+  while (NumDups1 != 0) {
+    ++DI1;
+    ++DI2;
+    --NumDups1;
+  }
+  BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1);
+  BBI2->BB->erase(BBI2->BB->begin(), DI2);
+
+  // Predicate the 'true' block after removing its branch.
+  BBI1->NonPredSize -= TII->RemoveBranch(*BBI1->BB);
+  DI1 = BBI1->BB->end();
+  for (unsigned i = 0; i != NumDups2; ++i)
+    --DI1;
+  BBI1->BB->erase(DI1, BBI1->BB->end());
+  PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1);
+
+  // Predicate the 'false' block.
+  BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB);
+  DI2 = BBI2->BB->end();
+  while (NumDups2 != 0) {
+    --DI2;
+    --NumDups2;
+  }
+  PredicateBlock(*BBI2, DI2, *Cond2);
+
+  // Merge the true block into the entry of the diamond.
+  MergeBlocks(BBI, *BBI1);
+  MergeBlocks(BBI, *BBI2);
+
+  // If the if-converted block fallthrough or unconditionally branch into the
+  // tail block, and the tail block does not have other predecessors, then
+  // fold the tail block in as well. Otherwise, unless it falls through to the
+  // tail, add a unconditional branch to it.
+  if (TailBB) {
+    BBInfo TailBBI = BBAnalysis[TailBB->getNumber()];
+    if (TailBB->pred_size() == 1 && !TailBBI.HasFallThrough) {
+      BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+      MergeBlocks(BBI, TailBBI);
+      TailBBI.IsDone = true;
+    } else {
+      InsertUncondBranch(BBI.BB, TailBB, TII);
+      BBI.HasFallThrough = false;
+    }
+  }
+
+  RemoveExtraEdges(BBI);
+
+  // Update block info.
+  BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true;
+  InvalidatePreds(BBI.BB);
+
+  // FIXME: Must maintain LiveIns.
+  return true;
+}
+
+/// PredicateBlock - Predicate instructions from the start of the block to the
+/// specified end with the specified condition.
+void IfConverter::PredicateBlock(BBInfo &BBI,
+                                 MachineBasicBlock::iterator E,
+                                 std::vector<MachineOperand> &Cond) {
+  for (MachineBasicBlock::iterator I = BBI.BB->begin(); I != E; ++I) {
+    if (TII->isPredicated(I))
+      continue;
+    if (!TII->PredicateInstruction(I, Cond)) {
+      cerr << "Unable to predicate " << *I << "!\n";
+      abort();
+    }
+  }
+
+  std::copy(Cond.begin(), Cond.end(), std::back_inserter(BBI.Predicate));
+
+  BBI.IsAnalyzed = false;
+  BBI.NonPredSize = 0;
+
+  NumIfConvBBs++;
+}
+
+/// CopyAndPredicateBlock - Copy and predicate instructions from source BB to
+/// the destination block. Skip end of block branches if IgnoreBr is true.
+void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
+                                        std::vector<MachineOperand> &Cond,
+                                        bool IgnoreBr) {
+  for (MachineBasicBlock::iterator I = FromBBI.BB->begin(),
+         E = FromBBI.BB->end(); I != E; ++I) {
+    const TargetInstrDescriptor *TID = I->getInstrDescriptor();
+    bool isPredicated = TII->isPredicated(I);
+    // Do not copy the end of the block branches.
+    if (IgnoreBr && !isPredicated && (TID->Flags & M_BRANCH_FLAG) != 0)
+      break;
+
+    MachineInstr *MI = I->clone();
+    ToBBI.BB->insert(ToBBI.BB->end(), MI);
+    ToBBI.NonPredSize++;
+
+    if (!isPredicated)
+      if (!TII->PredicateInstruction(MI, Cond)) {
+        cerr << "Unable to predicate " << *MI << "!\n";
+        abort();
+      }
+  }
+
+  std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(),
+                                         FromBBI.BB->succ_end());
+  MachineBasicBlock *NBB = getNextBlock(FromBBI.BB);
+  MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : NULL;
+
+  for (unsigned i = 0, e = Succs.size(); i != e; ++i) {
+    MachineBasicBlock *Succ = Succs[i];
+    // Fallthrough edge can't be transferred.
+    if (Succ == FallThrough)
+      continue;
+    if (!ToBBI.BB->isSuccessor(Succ))
+      ToBBI.BB->addSuccessor(Succ);
+  }
+
+  std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(),
+            std::back_inserter(ToBBI.Predicate));
+  std::copy(Cond.begin(), Cond.end(), std::back_inserter(ToBBI.Predicate));
+
+  ToBBI.ClobbersPred |= FromBBI.ClobbersPred;
+  ToBBI.IsAnalyzed = false;
+
+  NumDupBBs++;
+}
+
+/// MergeBlocks - Move all instructions from FromBB to the end of ToBB.
+///
+void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI) {
+  ToBBI.BB->splice(ToBBI.BB->end(),
+                   FromBBI.BB, FromBBI.BB->begin(), FromBBI.BB->end());
+
+  // Redirect all branches to FromBB to ToBB.
+  std::vector<MachineBasicBlock *> Preds(FromBBI.BB->pred_begin(),
+                                         FromBBI.BB->pred_end());
+  for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
+    MachineBasicBlock *Pred = Preds[i];
+    if (Pred == ToBBI.BB)
+      continue;
+    Pred->ReplaceUsesOfBlockWith(FromBBI.BB, ToBBI.BB);
+  }
+ 
+  std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(),
+                                         FromBBI.BB->succ_end());
+  MachineBasicBlock *NBB = getNextBlock(FromBBI.BB);
+  MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : NULL;
+
+  for (unsigned i = 0, e = Succs.size(); i != e; ++i) {
+    MachineBasicBlock *Succ = Succs[i];
+    // Fallthrough edge can't be transferred.
+    if (Succ == FallThrough)
+      continue;
+    FromBBI.BB->removeSuccessor(Succ);
+    if (!ToBBI.BB->isSuccessor(Succ))
+      ToBBI.BB->addSuccessor(Succ);
+  }
+
+  // Now FromBBI always fall through to the next block!
+  if (NBB && !FromBBI.BB->isSuccessor(NBB))
+    FromBBI.BB->addSuccessor(NBB);
+
+  std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(),
+            std::back_inserter(ToBBI.Predicate));
+  FromBBI.Predicate.clear();
+
+  ToBBI.NonPredSize += FromBBI.NonPredSize;
+  FromBBI.NonPredSize = 0;
+
+  ToBBI.ClobbersPred |= FromBBI.ClobbersPred;
+  ToBBI.HasFallThrough = FromBBI.HasFallThrough;
+  ToBBI.IsAnalyzed = false;
+  FromBBI.IsAnalyzed = false;
+}
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
new file mode 100644
index 0000000..8ae4df6
--- /dev/null
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -0,0 +1,799 @@
+//===-- IntrinsicLowering.cpp - Intrinsic Lowering default implementation -===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the IntrinsicLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Instructions.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/Support/Streams.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+template <class ArgIt>
+static void EnsureFunctionExists(Module &M, const char *Name,
+                                 ArgIt ArgBegin, ArgIt ArgEnd,
+                                 const Type *RetTy) {
+  // Insert a correctly-typed definition now.
+  std::vector<const Type *> ParamTys;
+  for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
+    ParamTys.push_back(I->getType());
+  M.getOrInsertFunction(Name, FunctionType::get(RetTy, ParamTys, false));
+}
+
+/// ReplaceCallWith - This function is used when we want to lower an intrinsic
+/// call to a call of an external function.  This handles hard cases such as
+/// when there was already a prototype for the external function, and if that
+/// prototype doesn't match the arguments we expect to pass in.
+template <class ArgIt>
+static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
+                                 ArgIt ArgBegin, ArgIt ArgEnd,
+                                 const Type *RetTy, Constant *&FCache) {
+  if (!FCache) {
+    // If we haven't already looked up this function, check to see if the
+    // program already contains a function with this name.
+    Module *M = CI->getParent()->getParent()->getParent();
+    // Get or insert the definition now.
+    std::vector<const Type *> ParamTys;
+    for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
+      ParamTys.push_back((*I)->getType());
+    FCache = M->getOrInsertFunction(NewFn,
+                                    FunctionType::get(RetTy, ParamTys, false));
+  }
+
+  SmallVector<Value*, 8> Operands(ArgBegin, ArgEnd);
+  CallInst *NewCI = new CallInst(FCache, &Operands[0], Operands.size(),
+                                 CI->getName(), CI);
+  if (!CI->use_empty())
+    CI->replaceAllUsesWith(NewCI);
+  return NewCI;
+}
+
+void IntrinsicLowering::AddPrototypes(Module &M) {
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+    if (I->isDeclaration() && !I->use_empty())
+      switch (I->getIntrinsicID()) {
+      default: break;
+      case Intrinsic::setjmp:
+        EnsureFunctionExists(M, "setjmp", I->arg_begin(), I->arg_end(),
+                             Type::Int32Ty);
+        break;
+      case Intrinsic::longjmp:
+        EnsureFunctionExists(M, "longjmp", I->arg_begin(), I->arg_end(),
+                             Type::VoidTy);
+        break;
+      case Intrinsic::siglongjmp:
+        EnsureFunctionExists(M, "abort", I->arg_end(), I->arg_end(),
+                             Type::VoidTy);
+        break;
+      case Intrinsic::memcpy_i32:
+      case Intrinsic::memcpy_i64:
+        M.getOrInsertFunction("memcpy", PointerType::get(Type::Int8Ty),
+                              PointerType::get(Type::Int8Ty), 
+                              PointerType::get(Type::Int8Ty), 
+                              TD.getIntPtrType(), (Type *)0);
+        break;
+      case Intrinsic::memmove_i32:
+      case Intrinsic::memmove_i64:
+        M.getOrInsertFunction("memmove", PointerType::get(Type::Int8Ty),
+                              PointerType::get(Type::Int8Ty), 
+                              PointerType::get(Type::Int8Ty), 
+                              TD.getIntPtrType(), (Type *)0);
+        break;
+      case Intrinsic::memset_i32:
+      case Intrinsic::memset_i64:
+        M.getOrInsertFunction("memset", PointerType::get(Type::Int8Ty),
+                              PointerType::get(Type::Int8Ty), Type::Int32Ty, 
+                              TD.getIntPtrType(), (Type *)0);
+        break;
+      case Intrinsic::sqrt_f32:
+      case Intrinsic::sqrt_f64:
+        if(I->arg_begin()->getType() == Type::FloatTy)
+          EnsureFunctionExists(M, "sqrtf", I->arg_begin(), I->arg_end(),
+                               Type::FloatTy);
+        else
+          EnsureFunctionExists(M, "sqrt", I->arg_begin(), I->arg_end(),
+                               Type::DoubleTy);
+        break;
+      }
+}
+
+/// LowerBSWAP - Emit the code to lower bswap of V before the specified
+/// instruction IP.
+static Value *LowerBSWAP(Value *V, Instruction *IP) {
+  assert(V->getType()->isInteger() && "Can't bswap a non-integer type!");
+
+  unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
+  
+  switch(BitSize) {
+  default: assert(0 && "Unhandled type size of value to byteswap!");
+  case 16: {
+    Value *Tmp1 = BinaryOperator::createShl(V,
+                                ConstantInt::get(V->getType(),8),"bswap.2",IP);
+    Value *Tmp2 = BinaryOperator::createLShr(V,
+                                ConstantInt::get(V->getType(),8),"bswap.1",IP);
+    V = BinaryOperator::createOr(Tmp1, Tmp2, "bswap.i16", IP);
+    break;
+  }
+  case 32: {
+    Value *Tmp4 = BinaryOperator::createShl(V,
+                              ConstantInt::get(V->getType(),24),"bswap.4", IP);
+    Value *Tmp3 = BinaryOperator::createShl(V,
+                              ConstantInt::get(V->getType(),8),"bswap.3",IP);
+    Value *Tmp2 = BinaryOperator::createLShr(V,
+                              ConstantInt::get(V->getType(),8),"bswap.2",IP);
+    Value *Tmp1 = BinaryOperator::createLShr(V,
+                              ConstantInt::get(V->getType(),24),"bswap.1", IP);
+    Tmp3 = BinaryOperator::createAnd(Tmp3, 
+                                     ConstantInt::get(Type::Int32Ty, 0xFF0000),
+                                     "bswap.and3", IP);
+    Tmp2 = BinaryOperator::createAnd(Tmp2, 
+                                     ConstantInt::get(Type::Int32Ty, 0xFF00),
+                                     "bswap.and2", IP);
+    Tmp4 = BinaryOperator::createOr(Tmp4, Tmp3, "bswap.or1", IP);
+    Tmp2 = BinaryOperator::createOr(Tmp2, Tmp1, "bswap.or2", IP);
+    V = BinaryOperator::createOr(Tmp4, Tmp2, "bswap.i32", IP);
+    break;
+  }
+  case 64: {
+    Value *Tmp8 = BinaryOperator::createShl(V,
+                              ConstantInt::get(V->getType(),56),"bswap.8", IP);
+    Value *Tmp7 = BinaryOperator::createShl(V,
+                              ConstantInt::get(V->getType(),40),"bswap.7", IP);
+    Value *Tmp6 = BinaryOperator::createShl(V,
+                              ConstantInt::get(V->getType(),24),"bswap.6", IP);
+    Value *Tmp5 = BinaryOperator::createShl(V,
+                              ConstantInt::get(V->getType(),8),"bswap.5", IP);
+    Value* Tmp4 = BinaryOperator::createLShr(V,
+                              ConstantInt::get(V->getType(),8),"bswap.4", IP);
+    Value* Tmp3 = BinaryOperator::createLShr(V,
+                              ConstantInt::get(V->getType(),24),"bswap.3", IP);
+    Value* Tmp2 = BinaryOperator::createLShr(V,
+                              ConstantInt::get(V->getType(),40),"bswap.2", IP);
+    Value* Tmp1 = BinaryOperator::createLShr(V,
+                              ConstantInt::get(V->getType(),56),"bswap.1", IP);
+    Tmp7 = BinaryOperator::createAnd(Tmp7,
+                             ConstantInt::get(Type::Int64Ty, 
+                               0xFF000000000000ULL),
+                             "bswap.and7", IP);
+    Tmp6 = BinaryOperator::createAnd(Tmp6,
+                             ConstantInt::get(Type::Int64Ty, 0xFF0000000000ULL),
+                             "bswap.and6", IP);
+    Tmp5 = BinaryOperator::createAnd(Tmp5,
+                             ConstantInt::get(Type::Int64Ty, 0xFF00000000ULL),
+                             "bswap.and5", IP);
+    Tmp4 = BinaryOperator::createAnd(Tmp4,
+                             ConstantInt::get(Type::Int64Ty, 0xFF000000ULL),
+                             "bswap.and4", IP);
+    Tmp3 = BinaryOperator::createAnd(Tmp3,
+                             ConstantInt::get(Type::Int64Ty, 0xFF0000ULL),
+                             "bswap.and3", IP);
+    Tmp2 = BinaryOperator::createAnd(Tmp2,
+                             ConstantInt::get(Type::Int64Ty, 0xFF00ULL),
+                             "bswap.and2", IP);
+    Tmp8 = BinaryOperator::createOr(Tmp8, Tmp7, "bswap.or1", IP);
+    Tmp6 = BinaryOperator::createOr(Tmp6, Tmp5, "bswap.or2", IP);
+    Tmp4 = BinaryOperator::createOr(Tmp4, Tmp3, "bswap.or3", IP);
+    Tmp2 = BinaryOperator::createOr(Tmp2, Tmp1, "bswap.or4", IP);
+    Tmp8 = BinaryOperator::createOr(Tmp8, Tmp6, "bswap.or5", IP);
+    Tmp4 = BinaryOperator::createOr(Tmp4, Tmp2, "bswap.or6", IP);
+    V = BinaryOperator::createOr(Tmp8, Tmp4, "bswap.i64", IP);
+    break;
+  }
+  }
+  return V;
+}
+
+/// LowerCTPOP - Emit the code to lower ctpop of V before the specified
+/// instruction IP.
+static Value *LowerCTPOP(Value *V, Instruction *IP) {
+  assert(V->getType()->isInteger() && "Can't ctpop a non-integer type!");
+
+  static const uint64_t MaskValues[6] = {
+    0x5555555555555555ULL, 0x3333333333333333ULL,
+    0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL,
+    0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL
+  };
+
+  unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
+  unsigned WordSize = (BitSize + 63) / 64;
+  Value *Count = ConstantInt::get(V->getType(), 0);
+
+  for (unsigned n = 0; n < WordSize; ++n) {
+    Value *PartValue = V;
+    for (unsigned i = 1, ct = 0; i < (BitSize>64 ? 64 : BitSize); 
+         i <<= 1, ++ct) {
+      Value *MaskCst = ConstantInt::get(V->getType(), MaskValues[ct]);
+      Value *LHS = BinaryOperator::createAnd(
+                     PartValue, MaskCst, "cppop.and1", IP);
+      Value *VShift = BinaryOperator::createLShr(PartValue,
+                        ConstantInt::get(V->getType(), i), "ctpop.sh", IP);
+      Value *RHS = BinaryOperator::createAnd(VShift, MaskCst, "cppop.and2", IP);
+      PartValue = BinaryOperator::createAdd(LHS, RHS, "ctpop.step", IP);
+    }
+    Count = BinaryOperator::createAdd(PartValue, Count, "ctpop.part", IP);
+    if (BitSize > 64) {
+      V = BinaryOperator::createLShr(V, ConstantInt::get(V->getType(), 64), 
+                                     "ctpop.part.sh", IP);
+      BitSize -= 64;
+    }
+  }
+
+  return CastInst::createIntegerCast(Count, Type::Int32Ty, false, "ctpop", IP);
+}
+
+/// LowerCTLZ - Emit the code to lower ctlz of V before the specified
+/// instruction IP.
+static Value *LowerCTLZ(Value *V, Instruction *IP) {
+
+  unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
+  for (unsigned i = 1; i < BitSize; i <<= 1) {
+    Value *ShVal = ConstantInt::get(V->getType(), i);
+    ShVal = BinaryOperator::createLShr(V, ShVal, "ctlz.sh", IP);
+    V = BinaryOperator::createOr(V, ShVal, "ctlz.step", IP);
+  }
+
+  V = BinaryOperator::createNot(V, "", IP);
+  return LowerCTPOP(V, IP);
+}
+
+/// Convert the llvm.part.select.iX.iY intrinsic. This intrinsic takes 
+/// three integer arguments. The first argument is the Value from which the
+/// bits will be selected. It may be of any bit width. The second and third
+/// arguments specify a range of bits to select with the second argument 
+/// specifying the low bit and the third argument specifying the high bit. Both
+/// must be type i32. The result is the corresponding selected bits from the
+/// Value in the same width as the Value (first argument). If the low bit index
+/// is higher than the high bit index then the inverse selection is done and 
+/// the bits are returned in inverse order. 
+/// @brief Lowering of llvm.part.select intrinsic.
+static Instruction *LowerPartSelect(CallInst *CI) {
+  // Make sure we're dealing with a part select intrinsic here
+  Function *F = CI->getCalledFunction();
+  const FunctionType *FT = F->getFunctionType();
+  if (!F->isDeclaration() || !FT->getReturnType()->isInteger() ||
+      FT->getNumParams() != 3 || !FT->getParamType(0)->isInteger() ||
+      !FT->getParamType(1)->isInteger() || !FT->getParamType(2)->isInteger())
+    return CI;
+
+  // Get the intrinsic implementation function by converting all the . to _
+  // in the intrinsic's function name and then reconstructing the function
+  // declaration.
+  std::string Name(F->getName());
+  for (unsigned i = 4; i < Name.length(); ++i)
+    if (Name[i] == '.')
+      Name[i] = '_';
+  Module* M = F->getParent();
+  F = cast<Function>(M->getOrInsertFunction(Name, FT));
+  F->setLinkage(GlobalValue::WeakLinkage);
+
+  // If we haven't defined the impl function yet, do so now
+  if (F->isDeclaration()) {
+
+    // Get the arguments to the function
+    Function::arg_iterator args = F->arg_begin();
+    Value* Val = args++; Val->setName("Val");
+    Value* Lo = args++; Lo->setName("Lo");
+    Value* Hi  = args++; Hi->setName("High");
+
+    // We want to select a range of bits here such that [Hi, Lo] is shifted
+    // down to the low bits. However, it is quite possible that Hi is smaller
+    // than Lo in which case the bits have to be reversed. 
+    
+    // Create the blocks we will need for the two cases (forward, reverse)
+    BasicBlock* CurBB   = new BasicBlock("entry", F);
+    BasicBlock *RevSize = new BasicBlock("revsize", CurBB->getParent());
+    BasicBlock *FwdSize = new BasicBlock("fwdsize", CurBB->getParent());
+    BasicBlock *Compute = new BasicBlock("compute", CurBB->getParent());
+    BasicBlock *Reverse = new BasicBlock("reverse", CurBB->getParent());
+    BasicBlock *RsltBlk = new BasicBlock("result",  CurBB->getParent());
+
+    // Cast Hi and Lo to the size of Val so the widths are all the same
+    if (Hi->getType() != Val->getType())
+      Hi = CastInst::createIntegerCast(Hi, Val->getType(), false, 
+                                         "tmp", CurBB);
+    if (Lo->getType() != Val->getType())
+      Lo = CastInst::createIntegerCast(Lo, Val->getType(), false, 
+                                          "tmp", CurBB);
+
+    // Compute a few things that both cases will need, up front.
+    Constant* Zero = ConstantInt::get(Val->getType(), 0);
+    Constant* One = ConstantInt::get(Val->getType(), 1);
+    Constant* AllOnes = ConstantInt::getAllOnesValue(Val->getType());
+
+    // Compare the Hi and Lo bit positions. This is used to determine 
+    // which case we have (forward or reverse)
+    ICmpInst *Cmp = new ICmpInst(ICmpInst::ICMP_ULT, Hi, Lo, "less",CurBB);
+    new BranchInst(RevSize, FwdSize, Cmp, CurBB);
+
+    // First, copmute the number of bits in the forward case.
+    Instruction* FBitSize = 
+      BinaryOperator::createSub(Hi, Lo,"fbits", FwdSize);
+    new BranchInst(Compute, FwdSize);
+
+    // Second, compute the number of bits in the reverse case.
+    Instruction* RBitSize = 
+      BinaryOperator::createSub(Lo, Hi, "rbits", RevSize);
+    new BranchInst(Compute, RevSize);
+
+    // Now, compute the bit range. Start by getting the bitsize and the shift
+    // amount (either Hi or Lo) from PHI nodes. Then we compute a mask for 
+    // the number of bits we want in the range. We shift the bits down to the 
+    // least significant bits, apply the mask to zero out unwanted high bits, 
+    // and we have computed the "forward" result. It may still need to be 
+    // reversed.
+
+    // Get the BitSize from one of the two subtractions
+    PHINode *BitSize = new PHINode(Val->getType(), "bits", Compute);
+    BitSize->reserveOperandSpace(2);
+    BitSize->addIncoming(FBitSize, FwdSize);
+    BitSize->addIncoming(RBitSize, RevSize);
+
+    // Get the ShiftAmount as the smaller of Hi/Lo
+    PHINode *ShiftAmt = new PHINode(Val->getType(), "shiftamt", Compute);
+    ShiftAmt->reserveOperandSpace(2);
+    ShiftAmt->addIncoming(Lo, FwdSize);
+    ShiftAmt->addIncoming(Hi, RevSize);
+
+    // Increment the bit size
+    Instruction *BitSizePlusOne = 
+      BinaryOperator::createAdd(BitSize, One, "bits", Compute);
+
+    // Create a Mask to zero out the high order bits.
+    Instruction* Mask = 
+      BinaryOperator::createShl(AllOnes, BitSizePlusOne, "mask", Compute);
+    Mask = BinaryOperator::createNot(Mask, "mask", Compute);
+
+    // Shift the bits down and apply the mask
+    Instruction* FRes = 
+      BinaryOperator::createLShr(Val, ShiftAmt, "fres", Compute);
+    FRes = BinaryOperator::createAnd(FRes, Mask, "fres", Compute);
+    new BranchInst(Reverse, RsltBlk, Cmp, Compute);
+
+    // In the Reverse block we have the mask already in FRes but we must reverse
+    // it by shifting FRes bits right and putting them in RRes by shifting them 
+    // in from left.
+
+    // First set up our loop counters
+    PHINode *Count = new PHINode(Val->getType(), "count", Reverse);
+    Count->reserveOperandSpace(2);
+    Count->addIncoming(BitSizePlusOne, Compute);
+
+    // Next, get the value that we are shifting.
+    PHINode *BitsToShift   = new PHINode(Val->getType(), "val", Reverse);
+    BitsToShift->reserveOperandSpace(2);
+    BitsToShift->addIncoming(FRes, Compute);
+
+    // Finally, get the result of the last computation
+    PHINode *RRes  = new PHINode(Val->getType(), "rres", Reverse);
+    RRes->reserveOperandSpace(2);
+    RRes->addIncoming(Zero, Compute);
+
+    // Decrement the counter
+    Instruction *Decr = BinaryOperator::createSub(Count, One, "decr", Reverse);
+    Count->addIncoming(Decr, Reverse);
+
+    // Compute the Bit that we want to move
+    Instruction *Bit = 
+      BinaryOperator::createAnd(BitsToShift, One, "bit", Reverse);
+
+    // Compute the new value for next iteration.
+    Instruction *NewVal = 
+      BinaryOperator::createLShr(BitsToShift, One, "rshift", Reverse);
+    BitsToShift->addIncoming(NewVal, Reverse);
+
+    // Shift the bit into the low bits of the result.
+    Instruction *NewRes = 
+      BinaryOperator::createShl(RRes, One, "lshift", Reverse);
+    NewRes = BinaryOperator::createOr(NewRes, Bit, "addbit", Reverse);
+    RRes->addIncoming(NewRes, Reverse);
+    
+    // Terminate loop if we've moved all the bits.
+    ICmpInst *Cond = 
+      new ICmpInst(ICmpInst::ICMP_EQ, Decr, Zero, "cond", Reverse);
+    new BranchInst(RsltBlk, Reverse, Cond, Reverse);
+
+    // Finally, in the result block, select one of the two results with a PHI
+    // node and return the result;
+    CurBB = RsltBlk;
+    PHINode *BitSelect = new PHINode(Val->getType(), "part_select", CurBB);
+    BitSelect->reserveOperandSpace(2);
+    BitSelect->addIncoming(FRes, Compute);
+    BitSelect->addIncoming(NewRes, Reverse);
+    new ReturnInst(BitSelect, CurBB);
+  }
+
+  // Return a call to the implementation function
+  Value *Args[] = {
+    CI->getOperand(1),
+    CI->getOperand(2),
+    CI->getOperand(3)
+  };
+  return new CallInst(F, Args, sizeof(Args)/sizeof(Args[0]), CI->getName(), CI);
+}
+
+/// Convert the llvm.part.set.iX.iY.iZ intrinsic. This intrinsic takes 
+/// four integer arguments (iAny %Value, iAny %Replacement, i32 %Low, i32 %High)
+/// The first two arguments can be any bit width. The result is the same width
+/// as %Value. The operation replaces bits between %Low and %High with the value
+/// in %Replacement. If %Replacement is not the same width, it is truncated or
+/// zero extended as appropriate to fit the bits being replaced. If %Low is
+/// greater than %High then the inverse set of bits are replaced.
+/// @brief Lowering of llvm.bit.part.set intrinsic.
+static Instruction *LowerPartSet(CallInst *CI) {
+  // Make sure we're dealing with a part select intrinsic here
+  Function *F = CI->getCalledFunction();
+  const FunctionType *FT = F->getFunctionType();
+  if (!F->isDeclaration() || !FT->getReturnType()->isInteger() ||
+      FT->getNumParams() != 4 || !FT->getParamType(0)->isInteger() ||
+      !FT->getParamType(1)->isInteger() || !FT->getParamType(2)->isInteger() ||
+      !FT->getParamType(3)->isInteger())
+    return CI;
+
+  // Get the intrinsic implementation function by converting all the . to _
+  // in the intrinsic's function name and then reconstructing the function
+  // declaration.
+  std::string Name(F->getName());
+  for (unsigned i = 4; i < Name.length(); ++i)
+    if (Name[i] == '.')
+      Name[i] = '_';
+  Module* M = F->getParent();
+  F = cast<Function>(M->getOrInsertFunction(Name, FT));
+  F->setLinkage(GlobalValue::WeakLinkage);
+
+  // If we haven't defined the impl function yet, do so now
+  if (F->isDeclaration()) {
+    // Get the arguments for the function.
+    Function::arg_iterator args = F->arg_begin();
+    Value* Val = args++; Val->setName("Val");
+    Value* Rep = args++; Rep->setName("Rep");
+    Value* Lo  = args++; Lo->setName("Lo");
+    Value* Hi  = args++; Hi->setName("Hi");
+
+    // Get some types we need
+    const IntegerType* ValTy = cast<IntegerType>(Val->getType());
+    const IntegerType* RepTy = cast<IntegerType>(Rep->getType());
+    uint32_t ValBits = ValTy->getBitWidth();
+    uint32_t RepBits = RepTy->getBitWidth();
+
+    // Constant Definitions
+    ConstantInt* RepBitWidth = ConstantInt::get(Type::Int32Ty, RepBits);
+    ConstantInt* RepMask = ConstantInt::getAllOnesValue(RepTy);
+    ConstantInt* ValMask = ConstantInt::getAllOnesValue(ValTy);
+    ConstantInt* One = ConstantInt::get(Type::Int32Ty, 1);
+    ConstantInt* ValOne = ConstantInt::get(ValTy, 1);
+    ConstantInt* Zero = ConstantInt::get(Type::Int32Ty, 0);
+    ConstantInt* ValZero = ConstantInt::get(ValTy, 0);
+
+    // Basic blocks we fill in below.
+    BasicBlock* entry = new BasicBlock("entry", F, 0);
+    BasicBlock* large = new BasicBlock("large", F, 0);
+    BasicBlock* small = new BasicBlock("small", F, 0);
+    BasicBlock* reverse = new BasicBlock("reverse", F, 0);
+    BasicBlock* result = new BasicBlock("result", F, 0);
+
+    // BASIC BLOCK: entry
+    // First, get the number of bits that we're placing as an i32
+    ICmpInst* is_forward = 
+      new ICmpInst(ICmpInst::ICMP_ULT, Lo, Hi, "", entry);
+    SelectInst* Hi_pn = new SelectInst(is_forward, Hi, Lo, "", entry);
+    SelectInst* Lo_pn = new SelectInst(is_forward, Lo, Hi, "", entry);
+    BinaryOperator* NumBits = BinaryOperator::createSub(Hi_pn, Lo_pn, "",entry);
+    NumBits = BinaryOperator::createAdd(NumBits, One, "", entry);
+    // Now, convert Lo and Hi to ValTy bit width
+    if (ValBits > 32) {
+      Lo = new ZExtInst(Lo_pn, ValTy, "", entry);
+    } else if (ValBits < 32) {
+      Lo = new TruncInst(Lo_pn, ValTy, "", entry);
+    }
+    // Determine if the replacement bits are larger than the number of bits we
+    // are replacing and deal with it.
+    ICmpInst* is_large = 
+      new ICmpInst(ICmpInst::ICMP_ULT, NumBits, RepBitWidth, "", entry);
+    new BranchInst(large, small, is_large, entry);
+
+    // BASIC BLOCK: large
+    Instruction* MaskBits = 
+      BinaryOperator::createSub(RepBitWidth, NumBits, "", large);
+    MaskBits = CastInst::createIntegerCast(MaskBits, RepMask->getType(), 
+                                           false, "", large);
+    BinaryOperator* Mask1 = 
+      BinaryOperator::createLShr(RepMask, MaskBits, "", large);
+    BinaryOperator* Rep2 = BinaryOperator::createAnd(Mask1, Rep, "", large);
+    new BranchInst(small, large);
+
+    // BASIC BLOCK: small
+    PHINode* Rep3 = new PHINode(RepTy, "", small);
+    Rep3->reserveOperandSpace(2);
+    Rep3->addIncoming(Rep2, large);
+    Rep3->addIncoming(Rep, entry);
+    Value* Rep4 = Rep3;
+    if (ValBits > RepBits)
+      Rep4 = new ZExtInst(Rep3, ValTy, "", small);
+    else if (ValBits < RepBits)
+      Rep4 = new TruncInst(Rep3, ValTy, "", small);
+    new BranchInst(result, reverse, is_forward, small);
+
+    // BASIC BLOCK: reverse (reverses the bits of the replacement)
+    // Set up our loop counter as a PHI so we can decrement on each iteration.
+    // We will loop for the number of bits in the replacement value.
+    PHINode *Count = new PHINode(Type::Int32Ty, "count", reverse);
+    Count->reserveOperandSpace(2);
+    Count->addIncoming(NumBits, small);
+
+    // Get the value that we are shifting bits out of as a PHI because
+    // we'll change this with each iteration.
+    PHINode *BitsToShift   = new PHINode(Val->getType(), "val", reverse);
+    BitsToShift->reserveOperandSpace(2);
+    BitsToShift->addIncoming(Rep4, small);
+
+    // Get the result of the last computation or zero on first iteration
+    PHINode *RRes  = new PHINode(Val->getType(), "rres", reverse);
+    RRes->reserveOperandSpace(2);
+    RRes->addIncoming(ValZero, small);
+
+    // Decrement the loop counter by one
+    Instruction *Decr = BinaryOperator::createSub(Count, One, "", reverse);
+    Count->addIncoming(Decr, reverse);
+
+    // Get the bit that we want to move into the result
+    Value *Bit = BinaryOperator::createAnd(BitsToShift, ValOne, "", reverse);
+
+    // Compute the new value of the bits to shift for the next iteration.
+    Value *NewVal = BinaryOperator::createLShr(BitsToShift, ValOne,"", reverse);
+    BitsToShift->addIncoming(NewVal, reverse);
+
+    // Shift the bit we extracted into the low bit of the result.
+    Instruction *NewRes = BinaryOperator::createShl(RRes, ValOne, "", reverse);
+    NewRes = BinaryOperator::createOr(NewRes, Bit, "", reverse);
+    RRes->addIncoming(NewRes, reverse);
+    
+    // Terminate loop if we've moved all the bits.
+    ICmpInst *Cond = new ICmpInst(ICmpInst::ICMP_EQ, Decr, Zero, "", reverse);
+    new BranchInst(result, reverse, Cond, reverse);
+
+    // BASIC BLOCK: result
+    PHINode *Rplcmnt  = new PHINode(Val->getType(), "", result);
+    Rplcmnt->reserveOperandSpace(2);
+    Rplcmnt->addIncoming(NewRes, reverse);
+    Rplcmnt->addIncoming(Rep4, small);
+    Value* t0   = CastInst::createIntegerCast(NumBits,ValTy,false,"",result);
+    Value* t1   = BinaryOperator::createShl(ValMask, Lo, "", result);
+    Value* t2   = BinaryOperator::createNot(t1, "", result);
+    Value* t3   = BinaryOperator::createShl(t1, t0, "", result);
+    Value* t4   = BinaryOperator::createOr(t2, t3, "", result);
+    Value* t5   = BinaryOperator::createAnd(t4, Val, "", result);
+    Value* t6   = BinaryOperator::createShl(Rplcmnt, Lo, "", result);
+    Value* Rslt = BinaryOperator::createOr(t5, t6, "part_set", result);
+    new ReturnInst(Rslt, result);
+  }
+
+  // Return a call to the implementation function
+  Value *Args[] = {
+    CI->getOperand(1),
+    CI->getOperand(2),
+    CI->getOperand(3),
+    CI->getOperand(4)
+  };
+  return new CallInst(F, Args, sizeof(Args)/sizeof(Args[0]), CI->getName(), CI);
+}
+
+
+void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
+  Function *Callee = CI->getCalledFunction();
+  assert(Callee && "Cannot lower an indirect call!");
+
+  switch (Callee->getIntrinsicID()) {
+  case Intrinsic::not_intrinsic:
+    cerr << "Cannot lower a call to a non-intrinsic function '"
+         << Callee->getName() << "'!\n";
+    abort();
+  default:
+    cerr << "Error: Code generator does not support intrinsic function '"
+         << Callee->getName() << "'!\n";
+    abort();
+
+    // The setjmp/longjmp intrinsics should only exist in the code if it was
+    // never optimized (ie, right out of the CFE), or if it has been hacked on
+    // by the lowerinvoke pass.  In both cases, the right thing to do is to
+    // convert the call to an explicit setjmp or longjmp call.
+  case Intrinsic::setjmp: {
+    static Constant *SetjmpFCache = 0;
+    Value *V = ReplaceCallWith("setjmp", CI, CI->op_begin()+1, CI->op_end(),
+                               Type::Int32Ty, SetjmpFCache);
+    if (CI->getType() != Type::VoidTy)
+      CI->replaceAllUsesWith(V);
+    break;
+  }
+  case Intrinsic::sigsetjmp:
+     if (CI->getType() != Type::VoidTy)
+       CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
+     break;
+
+  case Intrinsic::longjmp: {
+    static Constant *LongjmpFCache = 0;
+    ReplaceCallWith("longjmp", CI, CI->op_begin()+1, CI->op_end(),
+                    Type::VoidTy, LongjmpFCache);
+    break;
+  }
+
+  case Intrinsic::siglongjmp: {
+    // Insert the call to abort
+    static Constant *AbortFCache = 0;
+    ReplaceCallWith("abort", CI, CI->op_end(), CI->op_end(), 
+                    Type::VoidTy, AbortFCache);
+    break;
+  }
+  case Intrinsic::ctpop:
+    CI->replaceAllUsesWith(LowerCTPOP(CI->getOperand(1), CI));
+    break;
+
+  case Intrinsic::bswap:
+    CI->replaceAllUsesWith(LowerBSWAP(CI->getOperand(1), CI));
+    break;
+    
+  case Intrinsic::ctlz:
+    CI->replaceAllUsesWith(LowerCTLZ(CI->getOperand(1), CI));
+    break;
+
+  case Intrinsic::cttz: {
+    // cttz(x) -> ctpop(~X & (X-1))
+    Value *Src = CI->getOperand(1);
+    Value *NotSrc = BinaryOperator::createNot(Src, Src->getName()+".not", CI);
+    Value *SrcM1  = ConstantInt::get(Src->getType(), 1);
+    SrcM1 = BinaryOperator::createSub(Src, SrcM1, "", CI);
+    Src = LowerCTPOP(BinaryOperator::createAnd(NotSrc, SrcM1, "", CI), CI);
+    CI->replaceAllUsesWith(Src);
+    break;
+  }
+
+  case Intrinsic::part_select:
+    CI->replaceAllUsesWith(LowerPartSelect(CI));
+    break;
+
+  case Intrinsic::part_set:
+    CI->replaceAllUsesWith(LowerPartSet(CI));
+    break;
+
+  case Intrinsic::stacksave:
+  case Intrinsic::stackrestore: {
+    static bool Warned = false;
+    if (!Warned)
+      cerr << "WARNING: this target does not support the llvm.stack"
+           << (Callee->getIntrinsicID() == Intrinsic::stacksave ?
+               "save" : "restore") << " intrinsic.\n";
+    Warned = true;
+    if (Callee->getIntrinsicID() == Intrinsic::stacksave)
+      CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
+    break;
+  }
+    
+  case Intrinsic::returnaddress:
+  case Intrinsic::frameaddress:
+    cerr << "WARNING: this target does not support the llvm."
+         << (Callee->getIntrinsicID() == Intrinsic::returnaddress ?
+             "return" : "frame") << "address intrinsic.\n";
+    CI->replaceAllUsesWith(ConstantPointerNull::get(
+                                            cast<PointerType>(CI->getType())));
+    break;
+
+  case Intrinsic::prefetch:
+    break;    // Simply strip out prefetches on unsupported architectures
+
+  case Intrinsic::pcmarker:
+    break;    // Simply strip out pcmarker on unsupported architectures
+  case Intrinsic::readcyclecounter: {
+    cerr << "WARNING: this target does not support the llvm.readcyclecoun"
+         << "ter intrinsic.  It is being lowered to a constant 0\n";
+    CI->replaceAllUsesWith(ConstantInt::get(Type::Int64Ty, 0));
+    break;
+  }
+
+  case Intrinsic::dbg_stoppoint:
+  case Intrinsic::dbg_region_start:
+  case Intrinsic::dbg_region_end:
+  case Intrinsic::dbg_func_start:
+  case Intrinsic::dbg_declare:
+    break;    // Simply strip out debugging intrinsics
+
+  case Intrinsic::eh_exception:
+  case Intrinsic::eh_selector:
+    CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
+    break;
+
+  case Intrinsic::eh_typeid_for:
+    // Return something different to eh_selector.
+    CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
+    break;
+
+  case Intrinsic::var_annotation:
+    break;   // Strip out annotate intrinsic
+    
+  case Intrinsic::memcpy_i32:
+  case Intrinsic::memcpy_i64: {
+    static Constant *MemcpyFCache = 0;
+    Value *Size = CI->getOperand(3);
+    const Type *IntPtr = TD.getIntPtrType();
+    if (Size->getType()->getPrimitiveSizeInBits() <
+        IntPtr->getPrimitiveSizeInBits())
+      Size = new ZExtInst(Size, IntPtr, "", CI);
+    else if (Size->getType()->getPrimitiveSizeInBits() >
+             IntPtr->getPrimitiveSizeInBits())
+      Size = new TruncInst(Size, IntPtr, "", CI);
+    Value *Ops[3];
+    Ops[0] = CI->getOperand(1);
+    Ops[1] = CI->getOperand(2);
+    Ops[2] = Size;
+    ReplaceCallWith("memcpy", CI, Ops, Ops+3, CI->getOperand(1)->getType(),
+                    MemcpyFCache);
+    break;
+  }
+  case Intrinsic::memmove_i32: 
+  case Intrinsic::memmove_i64: {
+    static Constant *MemmoveFCache = 0;
+    Value *Size = CI->getOperand(3);
+    const Type *IntPtr = TD.getIntPtrType();
+    if (Size->getType()->getPrimitiveSizeInBits() <
+        IntPtr->getPrimitiveSizeInBits())
+      Size = new ZExtInst(Size, IntPtr, "", CI);
+    else if (Size->getType()->getPrimitiveSizeInBits() >
+             IntPtr->getPrimitiveSizeInBits())
+      Size = new TruncInst(Size, IntPtr, "", CI);
+    Value *Ops[3];
+    Ops[0] = CI->getOperand(1);
+    Ops[1] = CI->getOperand(2);
+    Ops[2] = Size;
+    ReplaceCallWith("memmove", CI, Ops, Ops+3, CI->getOperand(1)->getType(),
+                    MemmoveFCache);
+    break;
+  }
+  case Intrinsic::memset_i32:
+  case Intrinsic::memset_i64: {
+    static Constant *MemsetFCache = 0;
+    Value *Size = CI->getOperand(3);
+    const Type *IntPtr = TD.getIntPtrType();
+    if (Size->getType()->getPrimitiveSizeInBits() <
+        IntPtr->getPrimitiveSizeInBits())
+      Size = new ZExtInst(Size, IntPtr, "", CI);
+    else if (Size->getType()->getPrimitiveSizeInBits() >
+             IntPtr->getPrimitiveSizeInBits())
+      Size = new TruncInst(Size, IntPtr, "", CI);
+    Value *Ops[3];
+    Ops[0] = CI->getOperand(1);
+    // Extend the amount to i32.
+    Ops[1] = new ZExtInst(CI->getOperand(2), Type::Int32Ty, "", CI);
+    Ops[2] = Size;
+    ReplaceCallWith("memset", CI, Ops, Ops+3, CI->getOperand(1)->getType(),
+                    MemsetFCache);
+    break;
+  }
+  case Intrinsic::sqrt_f32: {
+    static Constant *sqrtfFCache = 0;
+    ReplaceCallWith("sqrtf", CI, CI->op_begin()+1, CI->op_end(),
+                    Type::FloatTy, sqrtfFCache);
+    break;
+  }
+  case Intrinsic::sqrt_f64: {
+    static Constant *sqrtFCache = 0;
+    ReplaceCallWith("sqrt", CI, CI->op_begin()+1, CI->op_end(),
+                    Type::DoubleTy, sqrtFCache);
+    break;
+  }
+  }
+
+  assert(CI->use_empty() &&
+         "Lowering should have eliminated any uses of the intrinsic call!");
+  CI->eraseFromParent();
+}
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
new file mode 100644
index 0000000..b72704b
--- /dev/null
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -0,0 +1,205 @@
+//===-- LLVMTargetMachine.cpp - Implement the LLVMTargetMachine class -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LLVMTargetMachine class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/PassManager.h"
+#include "llvm/Pass.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+static cl::opt<bool> PrintLSR("print-lsr-output", cl::Hidden,
+    cl::desc("Print LLVM IR produced by the loop-reduce pass"));
+static cl::opt<bool> PrintISelInput("print-isel-input", cl::Hidden,
+    cl::desc("Print LLVM IR input to isel pass"));
+
+FileModel::Model
+LLVMTargetMachine::addPassesToEmitFile(FunctionPassManager &PM,
+                                       std::ostream &Out,
+                                       CodeGenFileType FileType,
+                                       bool Fast) {
+  // Standard LLVM-Level Passes.
+  
+  // Run loop strength reduction before anything else.
+  if (!Fast) {
+    PM.add(createLoopStrengthReducePass(getTargetLowering()));
+    if (PrintLSR)
+      PM.add(new PrintFunctionPass("\n\n*** Code after LSR *** \n", &cerr));
+  }
+  
+  // FIXME: Implement efficient support for garbage collection intrinsics.
+  PM.add(createLowerGCPass());
+
+  if (!ExceptionHandling)
+    PM.add(createLowerInvokePass(getTargetLowering()));
+
+  // Make sure that no unreachable blocks are instruction selected.
+  PM.add(createUnreachableBlockEliminationPass());
+
+  if (!Fast)
+    PM.add(createCodeGenPreparePass(getTargetLowering()));
+
+  if (PrintISelInput)
+    PM.add(new PrintFunctionPass("\n\n*** Final LLVM Code input to ISel *** \n",
+                                 &cerr));
+  
+  // Ask the target for an isel.
+  if (addInstSelector(PM, Fast))
+    return FileModel::Error;
+
+  // Print the instruction selected machine code...
+  if (PrintMachineCode)
+    PM.add(createMachineFunctionPrinterPass(cerr));
+  
+  // Perform register allocation to convert to a concrete x86 representation
+  PM.add(createRegisterAllocator());
+  
+  if (PrintMachineCode)
+    PM.add(createMachineFunctionPrinterPass(cerr));
+
+  // Run post-ra passes.
+  if (addPostRegAlloc(PM, Fast) && PrintMachineCode)
+    PM.add(createMachineFunctionPrinterPass(cerr));
+
+  // Insert prolog/epilog code.  Eliminate abstract frame index references...
+  PM.add(createPrologEpilogCodeInserter());
+  
+  // Second pass scheduler.
+  if (!Fast)
+    PM.add(createPostRAScheduler());
+
+  // Branch folding must be run after regalloc and prolog/epilog insertion.
+  if (!Fast)
+    PM.add(createBranchFoldingPass(getEnableTailMergeDefault()));
+    
+  // Fold redundant debug labels.
+  PM.add(createDebugLabelFoldingPass());
+  
+  if (PrintMachineCode)  // Print the register-allocated code
+    PM.add(createMachineFunctionPrinterPass(cerr));
+
+  if (addPreEmitPass(PM, Fast) && PrintMachineCode)
+    PM.add(createMachineFunctionPrinterPass(cerr));
+
+  switch (FileType) {
+  default:
+    break;
+  case TargetMachine::AssemblyFile:
+    if (addAssemblyEmitter(PM, Fast, Out))
+      return FileModel::Error;
+    return FileModel::AsmFile;
+  case TargetMachine::ObjectFile:
+    if (getMachOWriterInfo())
+      return FileModel::MachOFile;
+    else if (getELFWriterInfo())
+      return FileModel::ElfFile;
+  }
+
+  return FileModel::Error;
+}
+ 
+/// addPassesToEmitFileFinish - If the passes to emit the specified file had to
+/// be split up (e.g., to add an object writer pass), this method can be used to
+/// finish up adding passes to emit the file, if necessary.
+bool LLVMTargetMachine::addPassesToEmitFileFinish(FunctionPassManager &PM,
+                                                  MachineCodeEmitter *MCE,
+                                                  bool Fast) {
+  if (MCE)
+    addSimpleCodeEmitter(PM, Fast, *MCE);
+
+  // Delete machine code for this function
+  PM.add(createMachineCodeDeleter());
+
+  return false; // success!
+}
+
+/// addPassesToEmitMachineCode - Add passes to the specified pass manager to
+/// get machine code emitted.  This uses a MachineCodeEmitter object to handle
+/// actually outputting the machine code and resolving things like the address
+/// of functions.  This method should returns true if machine code emission is
+/// not supported.
+///
+bool LLVMTargetMachine::addPassesToEmitMachineCode(FunctionPassManager &PM,
+                                                   MachineCodeEmitter &MCE,
+                                                   bool Fast) {
+  // Standard LLVM-Level Passes.
+  
+  // Run loop strength reduction before anything else.
+  if (!Fast) {
+    PM.add(createLoopStrengthReducePass(getTargetLowering()));
+    if (PrintLSR)
+      PM.add(new PrintFunctionPass("\n\n*** Code after LSR *** \n", &cerr));
+  }
+  
+  // FIXME: Implement efficient support for garbage collection intrinsics.
+  PM.add(createLowerGCPass());
+  
+  // FIXME: Implement the invoke/unwind instructions!
+  PM.add(createLowerInvokePass(getTargetLowering()));
+  
+  // Make sure that no unreachable blocks are instruction selected.
+  PM.add(createUnreachableBlockEliminationPass());
+
+  if (!Fast)
+    PM.add(createCodeGenPreparePass(getTargetLowering()));
+
+  if (PrintISelInput)
+    PM.add(new PrintFunctionPass("\n\n*** Final LLVM Code input to ISel *** \n",
+                                 &cerr));
+
+  // Ask the target for an isel.
+  if (addInstSelector(PM, Fast))
+    return true;
+
+  // Print the instruction selected machine code...
+  if (PrintMachineCode)
+    PM.add(createMachineFunctionPrinterPass(cerr));
+  
+  // Perform register allocation to convert to a concrete x86 representation
+  PM.add(createRegisterAllocator());
+  
+  if (PrintMachineCode)
+    PM.add(createMachineFunctionPrinterPass(cerr));
+
+  // Run post-ra passes.
+  if (addPostRegAlloc(PM, Fast) && PrintMachineCode)
+    PM.add(createMachineFunctionPrinterPass(cerr));
+
+  // Insert prolog/epilog code.  Eliminate abstract frame index references...
+  PM.add(createPrologEpilogCodeInserter());
+  
+  if (PrintMachineCode)  // Print the register-allocated code
+    PM.add(createMachineFunctionPrinterPass(cerr));
+  
+  // Second pass scheduler.
+  if (!Fast)
+    PM.add(createPostRAScheduler());
+
+  // Branch folding must be run after regalloc and prolog/epilog insertion.
+  if (!Fast)
+    PM.add(createBranchFoldingPass(getEnableTailMergeDefault()));
+  
+  if (addPreEmitPass(PM, Fast) && PrintMachineCode)
+    PM.add(createMachineFunctionPrinterPass(cerr));
+
+  addCodeEmitter(PM, Fast, MCE);
+  
+  // Delete machine code for this function
+  PM.add(createMachineCodeDeleter());
+  
+  return false; // success!
+}
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
new file mode 100644
index 0000000..45c1dd0
--- /dev/null
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -0,0 +1,526 @@
+//===-- LiveInterval.cpp - Live Interval Representation -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LiveRange and LiveInterval classes.  Given some
+// numbering of each the machine instructions an interval [i, j) is said to be a
+// live interval for register v if there is no instruction with number j' > j
+// such that v is live at j' abd there is no instruction with number i' < i such
+// that v is live at i'. In this implementation intervals can have holes,
+// i.e. an interval might look like [1,20), [50,65), [1000,1001).  Each
+// individual range is represented as an instance of LiveRange, and the whole
+// interval is represented as an instance of LiveInterval.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Streams.h"
+#include "llvm/Target/MRegisterInfo.h"
+#include <algorithm>
+#include <map>
+#include <ostream>
+using namespace llvm;
+
+// An example for liveAt():
+//
+// this = [1,4), liveAt(0) will return false. The instruction defining this
+// spans slots [0,3]. The interval belongs to an spilled definition of the
+// variable it represents. This is because slot 1 is used (def slot) and spans
+// up to slot 3 (store slot).
+//
+bool LiveInterval::liveAt(unsigned I) const {
+  Ranges::const_iterator r = std::upper_bound(ranges.begin(), ranges.end(), I);
+
+  if (r == ranges.begin())
+    return false;
+
+  --r;
+  return r->contains(I);
+}
+
+// overlaps - Return true if the intersection of the two live intervals is
+// not empty.
+//
+// An example for overlaps():
+//
+// 0: A = ...
+// 4: B = ...
+// 8: C = A + B ;; last use of A
+//
+// The live intervals should look like:
+//
+// A = [3, 11)
+// B = [7, x)
+// C = [11, y)
+//
+// A->overlaps(C) should return false since we want to be able to join
+// A and C.
+//
+bool LiveInterval::overlapsFrom(const LiveInterval& other,
+                                const_iterator StartPos) const {
+  const_iterator i = begin();
+  const_iterator ie = end();
+  const_iterator j = StartPos;
+  const_iterator je = other.end();
+
+  assert((StartPos->start <= i->start || StartPos == other.begin()) &&
+         StartPos != other.end() && "Bogus start position hint!");
+
+  if (i->start < j->start) {
+    i = std::upper_bound(i, ie, j->start);
+    if (i != ranges.begin()) --i;
+  } else if (j->start < i->start) {
+    ++StartPos;
+    if (StartPos != other.end() && StartPos->start <= i->start) {
+      assert(StartPos < other.end() && i < end());
+      j = std::upper_bound(j, je, i->start);
+      if (j != other.ranges.begin()) --j;
+    }
+  } else {
+    return true;
+  }
+
+  if (j == je) return false;
+
+  while (i != ie) {
+    if (i->start > j->start) {
+      std::swap(i, j);
+      std::swap(ie, je);
+    }
+
+    if (i->end > j->start)
+      return true;
+    ++i;
+  }
+
+  return false;
+}
+
+/// extendIntervalEndTo - This method is used when we want to extend the range
+/// specified by I to end at the specified endpoint.  To do this, we should
+/// merge and eliminate all ranges that this will overlap with.  The iterator is
+/// not invalidated.
+void LiveInterval::extendIntervalEndTo(Ranges::iterator I, unsigned NewEnd) {
+  assert(I != ranges.end() && "Not a valid interval!");
+  unsigned ValId = I->ValId;
+
+  // Search for the first interval that we can't merge with.
+  Ranges::iterator MergeTo = next(I);
+  for (; MergeTo != ranges.end() && NewEnd >= MergeTo->end; ++MergeTo) {
+    assert(MergeTo->ValId == ValId && "Cannot merge with differing values!");
+  }
+
+  // If NewEnd was in the middle of an interval, make sure to get its endpoint.
+  I->end = std::max(NewEnd, prior(MergeTo)->end);
+
+  // Erase any dead ranges.
+  ranges.erase(next(I), MergeTo);
+  
+  // If the newly formed range now touches the range after it and if they have
+  // the same value number, merge the two ranges into one range.
+  Ranges::iterator Next = next(I);
+  if (Next != ranges.end() && Next->start <= I->end && Next->ValId == ValId) {
+    I->end = Next->end;
+    ranges.erase(Next);
+  }
+}
+
+
+/// extendIntervalStartTo - This method is used when we want to extend the range
+/// specified by I to start at the specified endpoint.  To do this, we should
+/// merge and eliminate all ranges that this will overlap with.
+LiveInterval::Ranges::iterator
+LiveInterval::extendIntervalStartTo(Ranges::iterator I, unsigned NewStart) {
+  assert(I != ranges.end() && "Not a valid interval!");
+  unsigned ValId = I->ValId;
+
+  // Search for the first interval that we can't merge with.
+  Ranges::iterator MergeTo = I;
+  do {
+    if (MergeTo == ranges.begin()) {
+      I->start = NewStart;
+      ranges.erase(MergeTo, I);
+      return I;
+    }
+    assert(MergeTo->ValId == ValId && "Cannot merge with differing values!");
+    --MergeTo;
+  } while (NewStart <= MergeTo->start);
+
+  // If we start in the middle of another interval, just delete a range and
+  // extend that interval.
+  if (MergeTo->end >= NewStart && MergeTo->ValId == ValId) {
+    MergeTo->end = I->end;
+  } else {
+    // Otherwise, extend the interval right after.
+    ++MergeTo;
+    MergeTo->start = NewStart;
+    MergeTo->end = I->end;
+  }
+
+  ranges.erase(next(MergeTo), next(I));
+  return MergeTo;
+}
+
+LiveInterval::iterator
+LiveInterval::addRangeFrom(LiveRange LR, iterator From) {
+  unsigned Start = LR.start, End = LR.end;
+  iterator it = std::upper_bound(From, ranges.end(), Start);
+
+  // If the inserted interval starts in the middle or right at the end of
+  // another interval, just extend that interval to contain the range of LR.
+  if (it != ranges.begin()) {
+    iterator B = prior(it);
+    if (LR.ValId == B->ValId) {
+      if (B->start <= Start && B->end >= Start) {
+        extendIntervalEndTo(B, End);
+        return B;
+      }
+    } else {
+      // Check to make sure that we are not overlapping two live ranges with
+      // different ValId's.
+      assert(B->end <= Start &&
+             "Cannot overlap two LiveRanges with differing ValID's"
+             " (did you def the same reg twice in a MachineInstr?)");
+    }
+  }
+
+  // Otherwise, if this range ends in the middle of, or right next to, another
+  // interval, merge it into that interval.
+  if (it != ranges.end())
+    if (LR.ValId == it->ValId) {
+      if (it->start <= End) {
+        it = extendIntervalStartTo(it, Start);
+
+        // If LR is a complete superset of an interval, we may need to grow its
+        // endpoint as well.
+        if (End > it->end)
+          extendIntervalEndTo(it, End);
+        return it;
+      }
+    } else {
+      // Check to make sure that we are not overlapping two live ranges with
+      // different ValId's.
+      assert(it->start >= End &&
+             "Cannot overlap two LiveRanges with differing ValID's");
+    }
+
+  // Otherwise, this is just a new range that doesn't interact with anything.
+  // Insert it.
+  return ranges.insert(it, LR);
+}
+
+
+/// removeRange - Remove the specified range from this interval.  Note that
+/// the range must already be in this interval in its entirety.
+void LiveInterval::removeRange(unsigned Start, unsigned End) {
+  // Find the LiveRange containing this span.
+  Ranges::iterator I = std::upper_bound(ranges.begin(), ranges.end(), Start);
+  assert(I != ranges.begin() && "Range is not in interval!");
+  --I;
+  assert(I->contains(Start) && I->contains(End-1) &&
+         "Range is not entirely in interval!");
+
+  // If the span we are removing is at the start of the LiveRange, adjust it.
+  if (I->start == Start) {
+    if (I->end == End)
+      ranges.erase(I);  // Removed the whole LiveRange.
+    else
+      I->start = End;
+    return;
+  }
+
+  // Otherwise if the span we are removing is at the end of the LiveRange,
+  // adjust the other way.
+  if (I->end == End) {
+    I->end = Start;
+    return;
+  }
+
+  // Otherwise, we are splitting the LiveRange into two pieces.
+  unsigned OldEnd = I->end;
+  I->end = Start;   // Trim the old interval.
+
+  // Insert the new one.
+  ranges.insert(next(I), LiveRange(End, OldEnd, I->ValId));
+}
+
+/// getLiveRangeContaining - Return the live range that contains the
+/// specified index, or null if there is none.
+LiveInterval::const_iterator 
+LiveInterval::FindLiveRangeContaining(unsigned Idx) const {
+  const_iterator It = std::upper_bound(begin(), end(), Idx);
+  if (It != ranges.begin()) {
+    --It;
+    if (It->contains(Idx))
+      return It;
+  }
+
+  return end();
+}
+
+LiveInterval::iterator 
+LiveInterval::FindLiveRangeContaining(unsigned Idx) {
+  iterator It = std::upper_bound(begin(), end(), Idx);
+  if (It != begin()) {
+    --It;
+    if (It->contains(Idx))
+      return It;
+  }
+  
+  return end();
+}
+
+/// join - Join two live intervals (this, and other) together.  This applies
+/// mappings to the value numbers in the LHS/RHS intervals as specified.  If
+/// the intervals are not joinable, this aborts.
+void LiveInterval::join(LiveInterval &Other, int *LHSValNoAssignments,
+                        int *RHSValNoAssignments, 
+                        SmallVector<std::pair<unsigned, 
+                                           unsigned>, 16> &NewValueNumberInfo) {
+  
+  // Try to do the least amount of work possible.  In particular, if there are
+  // more liverange chunks in the other set than there are in the 'this' set,
+  // swap sets to merge the fewest chunks in possible.
+  //
+  // Also, if one range is a physreg and one is a vreg, we always merge from the
+  // vreg into the physreg, which leaves the vreg intervals pristine.
+  if ((Other.ranges.size() > ranges.size() &&
+      MRegisterInfo::isVirtualRegister(reg)) ||
+      MRegisterInfo::isPhysicalRegister(Other.reg)) {
+    swap(Other);
+    std::swap(LHSValNoAssignments, RHSValNoAssignments);
+  }
+
+  // Determine if any of our live range values are mapped.  This is uncommon, so
+  // we want to avoid the interval scan if not.
+  bool MustMapCurValNos = false;
+  for (unsigned i = 0, e = getNumValNums(); i != e; ++i) {
+    if (ValueNumberInfo[i].first == ~2U) continue;  // tombstone value #
+    if (i != (unsigned)LHSValNoAssignments[i]) {
+      MustMapCurValNos = true;
+      break;
+    }
+  }
+  
+  // If we have to apply a mapping to our base interval assignment, rewrite it
+  // now.
+  if (MustMapCurValNos) {
+    // Map the first live range.
+    iterator OutIt = begin();
+    OutIt->ValId = LHSValNoAssignments[OutIt->ValId];
+    ++OutIt;
+    for (iterator I = OutIt, E = end(); I != E; ++I) {
+      OutIt->ValId = LHSValNoAssignments[I->ValId];
+      
+      // If this live range has the same value # as its immediate predecessor,
+      // and if they are neighbors, remove one LiveRange.  This happens when we
+      // have [0,3:0)[4,7:1) and map 0/1 onto the same value #.
+      if (OutIt->ValId == (OutIt-1)->ValId && (OutIt-1)->end == OutIt->start) {
+        (OutIt-1)->end = OutIt->end;
+      } else {
+        if (I != OutIt) {
+          OutIt->start = I->start;
+          OutIt->end = I->end;
+        }
+        
+        // Didn't merge, on to the next one.
+        ++OutIt;
+      }
+    }
+    
+    // If we merge some live ranges, chop off the end.
+    ranges.erase(OutIt, end());
+  }
+  
+  // Okay, now insert the RHS live ranges into the LHS.
+  iterator InsertPos = begin();
+  for (iterator I = Other.begin(), E = Other.end(); I != E; ++I) {
+    // Map the ValId in the other live range to the current live range.
+    I->ValId = RHSValNoAssignments[I->ValId];
+    InsertPos = addRangeFrom(*I, InsertPos);
+  }
+
+  ValueNumberInfo.clear();
+  ValueNumberInfo.append(NewValueNumberInfo.begin(), NewValueNumberInfo.end());
+  weight += Other.weight;
+  if (Other.preference && !preference)
+    preference = Other.preference;
+}
+
+/// MergeRangesInAsValue - Merge all of the intervals in RHS into this live
+/// interval as the specified value number.  The LiveRanges in RHS are
+/// allowed to overlap with LiveRanges in the current interval, but only if
+/// the overlapping LiveRanges have the specified value number.
+void LiveInterval::MergeRangesInAsValue(const LiveInterval &RHS, 
+                                        unsigned LHSValNo) {
+  // TODO: Make this more efficient.
+  iterator InsertPos = begin();
+  for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) {
+    // Map the ValId in the other live range to the current live range.
+    LiveRange Tmp = *I;
+    Tmp.ValId = LHSValNo;
+    InsertPos = addRangeFrom(Tmp, InsertPos);
+  }
+}
+
+
+/// MergeInClobberRanges - For any live ranges that are not defined in the
+/// current interval, but are defined in the Clobbers interval, mark them
+/// used with an unknown definition value.
+void LiveInterval::MergeInClobberRanges(const LiveInterval &Clobbers) {
+  if (Clobbers.begin() == Clobbers.end()) return;
+  
+  // Find a value # to use for the clobber ranges.  If there is already a value#
+  // for unknown values, use it.
+  // FIXME: Use a single sentinal number for these!
+  unsigned ClobberValNo = getNextValue(~0U, 0);
+  
+  iterator IP = begin();
+  for (const_iterator I = Clobbers.begin(), E = Clobbers.end(); I != E; ++I) {
+    unsigned Start = I->start, End = I->end;
+    IP = std::upper_bound(IP, end(), Start);
+    
+    // If the start of this range overlaps with an existing liverange, trim it.
+    if (IP != begin() && IP[-1].end > Start) {
+      Start = IP[-1].end;
+      // Trimmed away the whole range?
+      if (Start >= End) continue;
+    }
+    // If the end of this range overlaps with an existing liverange, trim it.
+    if (IP != end() && End > IP->start) {
+      End = IP->start;
+      // If this trimmed away the whole range, ignore it.
+      if (Start == End) continue;
+    }
+    
+    // Insert the clobber interval.
+    IP = addRangeFrom(LiveRange(Start, End, ClobberValNo), IP);
+  }
+}
+
+/// MergeValueNumberInto - This method is called when two value nubmers
+/// are found to be equivalent.  This eliminates V1, replacing all
+/// LiveRanges with the V1 value number with the V2 value number.  This can
+/// cause merging of V1/V2 values numbers and compaction of the value space.
+void LiveInterval::MergeValueNumberInto(unsigned V1, unsigned V2) {
+  assert(V1 != V2 && "Identical value#'s are always equivalent!");
+
+  // This code actually merges the (numerically) larger value number into the
+  // smaller value number, which is likely to allow us to compactify the value
+  // space.  The only thing we have to be careful of is to preserve the
+  // instruction that defines the result value.
+
+  // Make sure V2 is smaller than V1.
+  if (V1 < V2) {
+    setValueNumberInfo(V1, getValNumInfo(V2));
+    std::swap(V1, V2);
+  }
+
+  // Merge V1 live ranges into V2.
+  for (iterator I = begin(); I != end(); ) {
+    iterator LR = I++;
+    if (LR->ValId != V1) continue;  // Not a V1 LiveRange.
+    
+    // Okay, we found a V1 live range.  If it had a previous, touching, V2 live
+    // range, extend it.
+    if (LR != begin()) {
+      iterator Prev = LR-1;
+      if (Prev->ValId == V2 && Prev->end == LR->start) {
+        Prev->end = LR->end;
+
+        // Erase this live-range.
+        ranges.erase(LR);
+        I = Prev+1;
+        LR = Prev;
+      }
+    }
+    
+    // Okay, now we have a V1 or V2 live range that is maximally merged forward.
+    // Ensure that it is a V2 live-range.
+    LR->ValId = V2;
+    
+    // If we can merge it into later V2 live ranges, do so now.  We ignore any
+    // following V1 live ranges, as they will be merged in subsequent iterations
+    // of the loop.
+    if (I != end()) {
+      if (I->start == LR->end && I->ValId == V2) {
+        LR->end = I->end;
+        ranges.erase(I);
+        I = LR+1;
+      }
+    }
+  }
+  
+  // Now that V1 is dead, remove it.  If it is the largest value number, just
+  // nuke it (and any other deleted values neighboring it), otherwise mark it as
+  // ~1U so it can be nuked later.
+  if (V1 == getNumValNums()-1) {
+    do {
+      ValueNumberInfo.pop_back();
+    } while (ValueNumberInfo.back().first == ~1U);
+  } else {
+    ValueNumberInfo[V1].first = ~1U;
+  }
+}
+
+unsigned LiveInterval::getSize() const {
+  unsigned Sum = 0;
+  for (const_iterator I = begin(), E = end(); I != E; ++I)
+    Sum += I->end - I->start;
+  return Sum;
+}
+
+std::ostream& llvm::operator<<(std::ostream& os, const LiveRange &LR) {
+  return os << '[' << LR.start << ',' << LR.end << ':' << LR.ValId << ")";
+}
+
+void LiveRange::dump() const {
+  cerr << *this << "\n";
+}
+
+void LiveInterval::print(std::ostream &OS, const MRegisterInfo *MRI) const {
+  if (MRI && MRegisterInfo::isPhysicalRegister(reg))
+    OS << MRI->getName(reg);
+  else
+    OS << "%reg" << reg;
+
+  OS << ',' << weight;
+
+  if (empty())
+    OS << "EMPTY";
+  else {
+    OS << " = ";
+    for (LiveInterval::Ranges::const_iterator I = ranges.begin(),
+           E = ranges.end(); I != E; ++I)
+    OS << *I;
+  }
+  
+  // Print value number info.
+  if (getNumValNums()) {
+    OS << "  ";
+    for (unsigned i = 0; i != getNumValNums(); ++i) {
+      if (i) OS << " ";
+      OS << i << "@";
+      if (ValueNumberInfo[i].first == ~0U) {
+        OS << "?";
+      } else {
+        OS << ValueNumberInfo[i].first;
+      }
+    }
+  }
+}
+
+void LiveInterval::dump() const {
+  cerr << *this << "\n";
+}
+
+
+void LiveRange::print(std::ostream &os) const {
+  os << *this;
+}
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
new file mode 100644
index 0000000..369493f
--- /dev/null
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -0,0 +1,692 @@
+//===-- LiveIntervalAnalysis.cpp - Live Interval Analysis -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LiveInterval analysis pass which is used
+// by the Linear Scan Register allocator. This pass linearizes the
+// basic blocks of the function in DFS order and uses the
+// LiveVariables pass to conservatively compute live intervals for
+// each virtual and physical register.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "liveintervals"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "VirtRegMap.h"
+#include "llvm/Value.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/SSARegMap.h"
+#include "llvm/Target/MRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+#include <cmath>
+using namespace llvm;
+
+STATISTIC(numIntervals, "Number of original intervals");
+STATISTIC(numIntervalsAfter, "Number of intervals after coalescing");
+STATISTIC(numFolded   , "Number of loads/stores folded into instructions");
+
+char LiveIntervals::ID = 0;
+namespace {
+  RegisterPass<LiveIntervals> X("liveintervals", "Live Interval Analysis");
+}
+
+void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addPreserved<LiveVariables>();
+  AU.addRequired<LiveVariables>();
+  AU.addPreservedID(PHIEliminationID);
+  AU.addRequiredID(PHIEliminationID);
+  AU.addRequiredID(TwoAddressInstructionPassID);
+  AU.addRequired<LoopInfo>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void LiveIntervals::releaseMemory() {
+  mi2iMap_.clear();
+  i2miMap_.clear();
+  r2iMap_.clear();
+}
+
+/// runOnMachineFunction - Register allocate the whole function
+///
+bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
+  mf_ = &fn;
+  tm_ = &fn.getTarget();
+  mri_ = tm_->getRegisterInfo();
+  tii_ = tm_->getInstrInfo();
+  lv_ = &getAnalysis<LiveVariables>();
+  allocatableRegs_ = mri_->getAllocatableSet(fn);
+
+  // Number MachineInstrs and MachineBasicBlocks.
+  // Initialize MBB indexes to a sentinal.
+  MBB2IdxMap.resize(mf_->getNumBlockIDs(), ~0U);
+  
+  unsigned MIIndex = 0;
+  for (MachineFunction::iterator MBB = mf_->begin(), E = mf_->end();
+       MBB != E; ++MBB) {
+    // Set the MBB2IdxMap entry for this MBB.
+    MBB2IdxMap[MBB->getNumber()] = MIIndex;
+
+    for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+         I != E; ++I) {
+      bool inserted = mi2iMap_.insert(std::make_pair(I, MIIndex)).second;
+      assert(inserted && "multiple MachineInstr -> index mappings");
+      i2miMap_.push_back(I);
+      MIIndex += InstrSlots::NUM;
+    }
+  }
+
+  computeIntervals();
+
+  numIntervals += getNumIntervals();
+
+  DOUT << "********** INTERVALS **********\n";
+  for (iterator I = begin(), E = end(); I != E; ++I) {
+    I->second.print(DOUT, mri_);
+    DOUT << "\n";
+  }
+
+  numIntervalsAfter += getNumIntervals();
+  DEBUG(dump());
+  return true;
+}
+
+/// print - Implement the dump method.
+void LiveIntervals::print(std::ostream &O, const Module* ) const {
+  O << "********** INTERVALS **********\n";
+  for (const_iterator I = begin(), E = end(); I != E; ++I) {
+    I->second.print(DOUT, mri_);
+    DOUT << "\n";
+  }
+
+  O << "********** MACHINEINSTRS **********\n";
+  for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end();
+       mbbi != mbbe; ++mbbi) {
+    O << ((Value*)mbbi->getBasicBlock())->getName() << ":\n";
+    for (MachineBasicBlock::iterator mii = mbbi->begin(),
+           mie = mbbi->end(); mii != mie; ++mii) {
+      O << getInstructionIndex(mii) << '\t' << *mii;
+    }
+  }
+}
+
+// Not called?
+/// CreateNewLiveInterval - Create a new live interval with the given live
+/// ranges. The new live interval will have an infinite spill weight.
+LiveInterval&
+LiveIntervals::CreateNewLiveInterval(const LiveInterval *LI,
+                                     const std::vector<LiveRange> &LRs) {
+  const TargetRegisterClass *RC = mf_->getSSARegMap()->getRegClass(LI->reg);
+
+  // Create a new virtual register for the spill interval.
+  unsigned NewVReg = mf_->getSSARegMap()->createVirtualRegister(RC);
+
+  // Replace the old virtual registers in the machine operands with the shiny
+  // new one.
+  for (std::vector<LiveRange>::const_iterator
+         I = LRs.begin(), E = LRs.end(); I != E; ++I) {
+    unsigned Index = getBaseIndex(I->start);
+    unsigned End = getBaseIndex(I->end - 1) + InstrSlots::NUM;
+
+    for (; Index != End; Index += InstrSlots::NUM) {
+      // Skip deleted instructions
+      while (Index != End && !getInstructionFromIndex(Index))
+        Index += InstrSlots::NUM;
+
+      if (Index == End) break;
+
+      MachineInstr *MI = getInstructionFromIndex(Index);
+
+      for (unsigned J = 0, e = MI->getNumOperands(); J != e; ++J) {
+        MachineOperand &MOp = MI->getOperand(J);
+        if (MOp.isRegister() && MOp.getReg() == LI->reg)
+          MOp.setReg(NewVReg);
+      }
+    }
+  }
+
+  LiveInterval &NewLI = getOrCreateInterval(NewVReg);
+
+  // The spill weight is now infinity as it cannot be spilled again
+  NewLI.weight = float(HUGE_VAL);
+
+  for (std::vector<LiveRange>::const_iterator
+         I = LRs.begin(), E = LRs.end(); I != E; ++I) {
+    DOUT << "  Adding live range " << *I << " to new interval\n";
+    NewLI.addRange(*I);
+  }
+            
+  DOUT << "Created new live interval " << NewLI << "\n";
+  return NewLI;
+}
+
+std::vector<LiveInterval*> LiveIntervals::
+addIntervalsForSpills(const LiveInterval &li, VirtRegMap &vrm, int slot) {
+  // since this is called after the analysis is done we don't know if
+  // LiveVariables is available
+  lv_ = getAnalysisToUpdate<LiveVariables>();
+
+  std::vector<LiveInterval*> added;
+
+  assert(li.weight != HUGE_VALF &&
+         "attempt to spill already spilled interval!");
+
+  DOUT << "\t\t\t\tadding intervals for spills for interval: ";
+  li.print(DOUT, mri_);
+  DOUT << '\n';
+
+  const TargetRegisterClass* rc = mf_->getSSARegMap()->getRegClass(li.reg);
+
+  for (LiveInterval::Ranges::const_iterator
+         i = li.ranges.begin(), e = li.ranges.end(); i != e; ++i) {
+    unsigned index = getBaseIndex(i->start);
+    unsigned end = getBaseIndex(i->end-1) + InstrSlots::NUM;
+    for (; index != end; index += InstrSlots::NUM) {
+      // skip deleted instructions
+      while (index != end && !getInstructionFromIndex(index))
+        index += InstrSlots::NUM;
+      if (index == end) break;
+
+      MachineInstr *MI = getInstructionFromIndex(index);
+
+    RestartInstruction:
+      for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
+        MachineOperand& mop = MI->getOperand(i);
+        if (mop.isRegister() && mop.getReg() == li.reg) {
+          MachineInstr *fmi = li.remat ? NULL
+            : mri_->foldMemoryOperand(MI, i, slot);
+          if (fmi) {
+            // Attempt to fold the memory reference into the instruction.  If we
+            // can do this, we don't need to insert spill code.
+            if (lv_)
+              lv_->instructionChanged(MI, fmi);
+            MachineBasicBlock &MBB = *MI->getParent();
+            vrm.virtFolded(li.reg, MI, i, fmi);
+            mi2iMap_.erase(MI);
+            i2miMap_[index/InstrSlots::NUM] = fmi;
+            mi2iMap_[fmi] = index;
+            MI = MBB.insert(MBB.erase(MI), fmi);
+            ++numFolded;
+            // Folding the load/store can completely change the instruction in
+            // unpredictable ways, rescan it from the beginning.
+            goto RestartInstruction;
+          } else {
+            // Create a new virtual register for the spill interval.
+            unsigned NewVReg = mf_->getSSARegMap()->createVirtualRegister(rc);
+            
+            // Scan all of the operands of this instruction rewriting operands
+            // to use NewVReg instead of li.reg as appropriate.  We do this for
+            // two reasons:
+            //
+            //   1. If the instr reads the same spilled vreg multiple times, we
+            //      want to reuse the NewVReg.
+            //   2. If the instr is a two-addr instruction, we are required to
+            //      keep the src/dst regs pinned.
+            //
+            // Keep track of whether we replace a use and/or def so that we can
+            // create the spill interval with the appropriate range. 
+            mop.setReg(NewVReg);
+            
+            bool HasUse = mop.isUse();
+            bool HasDef = mop.isDef();
+            for (unsigned j = i+1, e = MI->getNumOperands(); j != e; ++j) {
+              if (MI->getOperand(j).isReg() &&
+                  MI->getOperand(j).getReg() == li.reg) {
+                MI->getOperand(j).setReg(NewVReg);
+                HasUse |= MI->getOperand(j).isUse();
+                HasDef |= MI->getOperand(j).isDef();
+              }
+            }
+
+            // create a new register for this spill
+            vrm.grow();
+            if (li.remat)
+              vrm.setVirtIsReMaterialized(NewVReg, li.remat);
+            vrm.assignVirt2StackSlot(NewVReg, slot);
+            LiveInterval &nI = getOrCreateInterval(NewVReg);
+            nI.remat = li.remat;
+            assert(nI.empty());
+
+            // the spill weight is now infinity as it
+            // cannot be spilled again
+            nI.weight = HUGE_VALF;
+
+            if (HasUse) {
+              LiveRange LR(getLoadIndex(index), getUseIndex(index),
+                           nI.getNextValue(~0U, 0));
+              DOUT << " +" << LR;
+              nI.addRange(LR);
+            }
+            if (HasDef) {
+              LiveRange LR(getDefIndex(index), getStoreIndex(index),
+                           nI.getNextValue(~0U, 0));
+              DOUT << " +" << LR;
+              nI.addRange(LR);
+            }
+            
+            added.push_back(&nI);
+
+            // update live variables if it is available
+            if (lv_)
+              lv_->addVirtualRegisterKilled(NewVReg, MI);
+            
+            DOUT << "\t\t\t\tadded new interval: ";
+            nI.print(DOUT, mri_);
+            DOUT << '\n';
+          }
+        }
+      }
+    }
+  }
+
+  return added;
+}
+
+void LiveIntervals::printRegName(unsigned reg) const {
+  if (MRegisterInfo::isPhysicalRegister(reg))
+    cerr << mri_->getName(reg);
+  else
+    cerr << "%reg" << reg;
+}
+
+/// isReDefinedByTwoAddr - Returns true if the Reg re-definition is due to
+/// two addr elimination.
+static bool isReDefinedByTwoAddr(MachineInstr *MI, unsigned Reg,
+                                const TargetInstrInfo *TII) {
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO1 = MI->getOperand(i);
+    if (MO1.isRegister() && MO1.isDef() && MO1.getReg() == Reg) {
+      for (unsigned j = i+1; j < e; ++j) {
+        MachineOperand &MO2 = MI->getOperand(j);
+        if (MO2.isRegister() && MO2.isUse() && MO2.getReg() == Reg &&
+            MI->getInstrDescriptor()->
+            getOperandConstraint(j, TOI::TIED_TO) == (int)i)
+          return true;
+      }
+    }
+  }
+  return false;
+}
+
+void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
+                                             MachineBasicBlock::iterator mi,
+                                             unsigned MIIdx,
+                                             LiveInterval &interval) {
+  DOUT << "\t\tregister: "; DEBUG(printRegName(interval.reg));
+  LiveVariables::VarInfo& vi = lv_->getVarInfo(interval.reg);
+
+  // Virtual registers may be defined multiple times (due to phi
+  // elimination and 2-addr elimination).  Much of what we do only has to be
+  // done once for the vreg.  We use an empty interval to detect the first
+  // time we see a vreg.
+  if (interval.empty()) {
+    // Remember if the definition can be rematerialized. All load's from fixed
+    // stack slots are re-materializable. The target may permit other
+    // instructions to be re-materialized as well.
+    int FrameIdx = 0;
+    if (vi.DefInst &&
+        (tii_->isTriviallyReMaterializable(vi.DefInst) ||
+         (tii_->isLoadFromStackSlot(vi.DefInst, FrameIdx) &&
+          mf_->getFrameInfo()->isFixedObjectIndex(FrameIdx))))
+      interval.remat = vi.DefInst;
+
+    // Get the Idx of the defining instructions.
+    unsigned defIndex = getDefIndex(MIIdx);
+
+    unsigned ValNum;
+    unsigned SrcReg, DstReg;
+    if (!tii_->isMoveInstr(*mi, SrcReg, DstReg))
+      ValNum = interval.getNextValue(~0U, 0);
+    else
+      ValNum = interval.getNextValue(defIndex, SrcReg);
+    
+    assert(ValNum == 0 && "First value in interval is not 0?");
+    ValNum = 0;  // Clue in the optimizer.
+
+    // Loop over all of the blocks that the vreg is defined in.  There are
+    // two cases we have to handle here.  The most common case is a vreg
+    // whose lifetime is contained within a basic block.  In this case there
+    // will be a single kill, in MBB, which comes after the definition.
+    if (vi.Kills.size() == 1 && vi.Kills[0]->getParent() == mbb) {
+      // FIXME: what about dead vars?
+      unsigned killIdx;
+      if (vi.Kills[0] != mi)
+        killIdx = getUseIndex(getInstructionIndex(vi.Kills[0]))+1;
+      else
+        killIdx = defIndex+1;
+
+      // If the kill happens after the definition, we have an intra-block
+      // live range.
+      if (killIdx > defIndex) {
+        assert(vi.AliveBlocks.none() &&
+               "Shouldn't be alive across any blocks!");
+        LiveRange LR(defIndex, killIdx, ValNum);
+        interval.addRange(LR);
+        DOUT << " +" << LR << "\n";
+        return;
+      }
+    }
+
+    // The other case we handle is when a virtual register lives to the end
+    // of the defining block, potentially live across some blocks, then is
+    // live into some number of blocks, but gets killed.  Start by adding a
+    // range that goes from this definition to the end of the defining block.
+    LiveRange NewLR(defIndex,
+                    getInstructionIndex(&mbb->back()) + InstrSlots::NUM,
+                    ValNum);
+    DOUT << " +" << NewLR;
+    interval.addRange(NewLR);
+
+    // Iterate over all of the blocks that the variable is completely
+    // live in, adding [insrtIndex(begin), instrIndex(end)+4) to the
+    // live interval.
+    for (unsigned i = 0, e = vi.AliveBlocks.size(); i != e; ++i) {
+      if (vi.AliveBlocks[i]) {
+        MachineBasicBlock *MBB = mf_->getBlockNumbered(i);
+        if (!MBB->empty()) {
+          LiveRange LR(getMBBStartIdx(i),
+                       getInstructionIndex(&MBB->back()) + InstrSlots::NUM,
+                       ValNum);
+          interval.addRange(LR);
+          DOUT << " +" << LR;
+        }
+      }
+    }
+
+    // Finally, this virtual register is live from the start of any killing
+    // block to the 'use' slot of the killing instruction.
+    for (unsigned i = 0, e = vi.Kills.size(); i != e; ++i) {
+      MachineInstr *Kill = vi.Kills[i];
+      LiveRange LR(getMBBStartIdx(Kill->getParent()),
+                   getUseIndex(getInstructionIndex(Kill))+1,
+                   ValNum);
+      interval.addRange(LR);
+      DOUT << " +" << LR;
+    }
+
+  } else {
+    // Can no longer safely assume definition is rematerializable.
+    interval.remat = NULL;
+
+    // If this is the second time we see a virtual register definition, it
+    // must be due to phi elimination or two addr elimination.  If this is
+    // the result of two address elimination, then the vreg is one of the
+    // def-and-use register operand.
+    if (isReDefinedByTwoAddr(mi, interval.reg, tii_)) {
+      // If this is a two-address definition, then we have already processed
+      // the live range.  The only problem is that we didn't realize there
+      // are actually two values in the live interval.  Because of this we
+      // need to take the LiveRegion that defines this register and split it
+      // into two values.
+      unsigned DefIndex = getDefIndex(getInstructionIndex(vi.DefInst));
+      unsigned RedefIndex = getDefIndex(MIIdx);
+
+      // Delete the initial value, which should be short and continuous,
+      // because the 2-addr copy must be in the same MBB as the redef.
+      interval.removeRange(DefIndex, RedefIndex);
+
+      // Two-address vregs should always only be redefined once.  This means
+      // that at this point, there should be exactly one value number in it.
+      assert(interval.containsOneValue() && "Unexpected 2-addr liveint!");
+
+      // The new value number (#1) is defined by the instruction we claimed
+      // defined value #0.
+      unsigned ValNo = interval.getNextValue(0, 0);
+      interval.setValueNumberInfo(1, interval.getValNumInfo(0));
+      
+      // Value#0 is now defined by the 2-addr instruction.
+      interval.setValueNumberInfo(0, std::make_pair(~0U, 0U));
+      
+      // Add the new live interval which replaces the range for the input copy.
+      LiveRange LR(DefIndex, RedefIndex, ValNo);
+      DOUT << " replace range with " << LR;
+      interval.addRange(LR);
+
+      // If this redefinition is dead, we need to add a dummy unit live
+      // range covering the def slot.
+      if (lv_->RegisterDefIsDead(mi, interval.reg))
+        interval.addRange(LiveRange(RedefIndex, RedefIndex+1, 0));
+
+      DOUT << " RESULT: ";
+      interval.print(DOUT, mri_);
+
+    } else {
+      // Otherwise, this must be because of phi elimination.  If this is the
+      // first redefinition of the vreg that we have seen, go back and change
+      // the live range in the PHI block to be a different value number.
+      if (interval.containsOneValue()) {
+        assert(vi.Kills.size() == 1 &&
+               "PHI elimination vreg should have one kill, the PHI itself!");
+
+        // Remove the old range that we now know has an incorrect number.
+        MachineInstr *Killer = vi.Kills[0];
+        unsigned Start = getMBBStartIdx(Killer->getParent());
+        unsigned End = getUseIndex(getInstructionIndex(Killer))+1;
+        DOUT << " Removing [" << Start << "," << End << "] from: ";
+        interval.print(DOUT, mri_); DOUT << "\n";
+        interval.removeRange(Start, End);
+        DOUT << " RESULT: "; interval.print(DOUT, mri_);
+
+        // Replace the interval with one of a NEW value number.  Note that this
+        // value number isn't actually defined by an instruction, weird huh? :)
+        LiveRange LR(Start, End, interval.getNextValue(~0U, 0));
+        DOUT << " replace range with " << LR;
+        interval.addRange(LR);
+        DOUT << " RESULT: "; interval.print(DOUT, mri_);
+      }
+
+      // In the case of PHI elimination, each variable definition is only
+      // live until the end of the block.  We've already taken care of the
+      // rest of the live range.
+      unsigned defIndex = getDefIndex(MIIdx);
+      
+      unsigned ValNum;
+      unsigned SrcReg, DstReg;
+      if (!tii_->isMoveInstr(*mi, SrcReg, DstReg))
+        ValNum = interval.getNextValue(~0U, 0);
+      else
+        ValNum = interval.getNextValue(defIndex, SrcReg);
+      
+      LiveRange LR(defIndex,
+                   getInstructionIndex(&mbb->back()) + InstrSlots::NUM, ValNum);
+      interval.addRange(LR);
+      DOUT << " +" << LR;
+    }
+  }
+
+  DOUT << '\n';
+}
+
+void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
+                                              MachineBasicBlock::iterator mi,
+                                              unsigned MIIdx,
+                                              LiveInterval &interval,
+                                              unsigned SrcReg) {
+  // A physical register cannot be live across basic block, so its
+  // lifetime must end somewhere in its defining basic block.
+  DOUT << "\t\tregister: "; DEBUG(printRegName(interval.reg));
+
+  unsigned baseIndex = MIIdx;
+  unsigned start = getDefIndex(baseIndex);
+  unsigned end = start;
+
+  // If it is not used after definition, it is considered dead at
+  // the instruction defining it. Hence its interval is:
+  // [defSlot(def), defSlot(def)+1)
+  if (lv_->RegisterDefIsDead(mi, interval.reg)) {
+    DOUT << " dead";
+    end = getDefIndex(start) + 1;
+    goto exit;
+  }
+
+  // If it is not dead on definition, it must be killed by a
+  // subsequent instruction. Hence its interval is:
+  // [defSlot(def), useSlot(kill)+1)
+  while (++mi != MBB->end()) {
+    baseIndex += InstrSlots::NUM;
+    if (lv_->KillsRegister(mi, interval.reg)) {
+      DOUT << " killed";
+      end = getUseIndex(baseIndex) + 1;
+      goto exit;
+    } else if (lv_->ModifiesRegister(mi, interval.reg)) {
+      // Another instruction redefines the register before it is ever read.
+      // Then the register is essentially dead at the instruction that defines
+      // it. Hence its interval is:
+      // [defSlot(def), defSlot(def)+1)
+      DOUT << " dead";
+      end = getDefIndex(start) + 1;
+      goto exit;
+    }
+  }
+  
+  // The only case we should have a dead physreg here without a killing or
+  // instruction where we know it's dead is if it is live-in to the function
+  // and never used.
+  assert(!SrcReg && "physreg was not killed in defining block!");
+  end = getDefIndex(start) + 1;  // It's dead.
+
+exit:
+  assert(start < end && "did not find end of interval?");
+
+  // Already exists? Extend old live interval.
+  LiveInterval::iterator OldLR = interval.FindLiveRangeContaining(start);
+  unsigned Id = (OldLR != interval.end())
+    ? OldLR->ValId
+    : interval.getNextValue(SrcReg != 0 ? start : ~0U, SrcReg);
+  LiveRange LR(start, end, Id);
+  interval.addRange(LR);
+  DOUT << " +" << LR << '\n';
+}
+
+void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB,
+                                      MachineBasicBlock::iterator MI,
+                                      unsigned MIIdx,
+                                      unsigned reg) {
+  if (MRegisterInfo::isVirtualRegister(reg))
+    handleVirtualRegisterDef(MBB, MI, MIIdx, getOrCreateInterval(reg));
+  else if (allocatableRegs_[reg]) {
+    unsigned SrcReg, DstReg;
+    if (!tii_->isMoveInstr(*MI, SrcReg, DstReg))
+      SrcReg = 0;
+    handlePhysicalRegisterDef(MBB, MI, MIIdx, getOrCreateInterval(reg), SrcReg);
+    // Def of a register also defines its sub-registers.
+    for (const unsigned* AS = mri_->getSubRegisters(reg); *AS; ++AS)
+      // Avoid processing some defs more than once.
+      if (!MI->findRegisterDefOperand(*AS))
+        handlePhysicalRegisterDef(MBB, MI, MIIdx, getOrCreateInterval(*AS), 0);
+  }
+}
+
+void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
+                                         unsigned MIIdx,
+                                         LiveInterval &interval, bool isAlias) {
+  DOUT << "\t\tlivein register: "; DEBUG(printRegName(interval.reg));
+
+  // Look for kills, if it reaches a def before it's killed, then it shouldn't
+  // be considered a livein.
+  MachineBasicBlock::iterator mi = MBB->begin();
+  unsigned baseIndex = MIIdx;
+  unsigned start = baseIndex;
+  unsigned end = start;
+  while (mi != MBB->end()) {
+    if (lv_->KillsRegister(mi, interval.reg)) {
+      DOUT << " killed";
+      end = getUseIndex(baseIndex) + 1;
+      goto exit;
+    } else if (lv_->ModifiesRegister(mi, interval.reg)) {
+      // Another instruction redefines the register before it is ever read.
+      // Then the register is essentially dead at the instruction that defines
+      // it. Hence its interval is:
+      // [defSlot(def), defSlot(def)+1)
+      DOUT << " dead";
+      end = getDefIndex(start) + 1;
+      goto exit;
+    }
+
+    baseIndex += InstrSlots::NUM;
+    ++mi;
+  }
+
+exit:
+  // Live-in register might not be used at all.
+  if (end == MIIdx) {
+    if (isAlias) {
+      DOUT << " dead";
+      end = getDefIndex(MIIdx) + 1;
+    } else {
+      DOUT << " live through";
+      end = baseIndex;
+    }
+  }
+
+  LiveRange LR(start, end, interval.getNextValue(~0U, 0));
+  DOUT << " +" << LR << '\n';
+  interval.addRange(LR);
+}
+
+/// computeIntervals - computes the live intervals for virtual
+/// registers. for some ordering of the machine instructions [1,N] a
+/// live interval is an interval [i, j) where 1 <= i <= j < N for
+/// which a variable is live
+void LiveIntervals::computeIntervals() {
+  DOUT << "********** COMPUTING LIVE INTERVALS **********\n"
+       << "********** Function: "
+       << ((Value*)mf_->getFunction())->getName() << '\n';
+  // Track the index of the current machine instr.
+  unsigned MIIndex = 0;
+  for (MachineFunction::iterator MBBI = mf_->begin(), E = mf_->end();
+       MBBI != E; ++MBBI) {
+    MachineBasicBlock *MBB = MBBI;
+    DOUT << ((Value*)MBB->getBasicBlock())->getName() << ":\n";
+
+    MachineBasicBlock::iterator MI = MBB->begin(), miEnd = MBB->end();
+
+    if (MBB->livein_begin() != MBB->livein_end()) {
+      // Create intervals for live-ins to this BB first.
+      for (MachineBasicBlock::const_livein_iterator LI = MBB->livein_begin(),
+             LE = MBB->livein_end(); LI != LE; ++LI) {
+        handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*LI));
+        // Multiple live-ins can alias the same register.
+        for (const unsigned* AS = mri_->getSubRegisters(*LI); *AS; ++AS)
+          if (!hasInterval(*AS))
+            handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*AS),
+                                 true);
+      }
+    }
+    
+    for (; MI != miEnd; ++MI) {
+      DOUT << MIIndex << "\t" << *MI;
+
+      // Handle defs.
+      for (int i = MI->getNumOperands() - 1; i >= 0; --i) {
+        MachineOperand &MO = MI->getOperand(i);
+        // handle register defs - build intervals
+        if (MO.isRegister() && MO.getReg() && MO.isDef())
+          handleRegisterDef(MBB, MI, MIIndex, MO.getReg());
+      }
+      
+      MIIndex += InstrSlots::NUM;
+    }
+  }
+}
+
+LiveInterval LiveIntervals::createInterval(unsigned reg) {
+  float Weight = MRegisterInfo::isPhysicalRegister(reg) ?
+                       HUGE_VALF : 0.0F;
+  return LiveInterval(reg, Weight);
+}
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
new file mode 100644
index 0000000..504b607
--- /dev/null
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -0,0 +1,643 @@
+//===-- LiveVariables.cpp - Live Variable Analysis for Machine Code -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LiveVariable analysis pass.  For each machine
+// instruction in the function, this pass calculates the set of registers that
+// are immediately dead after the instruction (i.e., the instruction calculates
+// the value, but it is never used) and the set of registers that are used by
+// the instruction, but are never used after the instruction (i.e., they are
+// killed).
+//
+// This class computes live variables using are sparse implementation based on
+// the machine code SSA form.  This class computes live variable information for
+// each virtual and _register allocatable_ physical register in a function.  It
+// uses the dominance properties of SSA form to efficiently compute live
+// variables for virtual registers, and assumes that physical registers are only
+// live within a single basic block (allowing it to do a single local analysis
+// to resolve physical register lifetimes in each basic block).  If a physical
+// register is not register allocatable, it is not tracked.  This is useful for
+// things like the stack pointer and condition codes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Target/MRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Config/alloca.h"
+#include <algorithm>
+using namespace llvm;
+
+char LiveVariables::ID = 0;
+static RegisterPass<LiveVariables> X("livevars", "Live Variable Analysis");
+
+void LiveVariables::VarInfo::dump() const {
+  cerr << "Register Defined by: ";
+  if (DefInst) 
+    cerr << *DefInst;
+  else
+    cerr << "<null>\n";
+  cerr << "  Alive in blocks: ";
+  for (unsigned i = 0, e = AliveBlocks.size(); i != e; ++i)
+    if (AliveBlocks[i]) cerr << i << ", ";
+  cerr << "\n  Killed by:";
+  if (Kills.empty())
+    cerr << " No instructions.\n";
+  else {
+    for (unsigned i = 0, e = Kills.size(); i != e; ++i)
+      cerr << "\n    #" << i << ": " << *Kills[i];
+    cerr << "\n";
+  }
+}
+
+LiveVariables::VarInfo &LiveVariables::getVarInfo(unsigned RegIdx) {
+  assert(MRegisterInfo::isVirtualRegister(RegIdx) &&
+         "getVarInfo: not a virtual register!");
+  RegIdx -= MRegisterInfo::FirstVirtualRegister;
+  if (RegIdx >= VirtRegInfo.size()) {
+    if (RegIdx >= 2*VirtRegInfo.size())
+      VirtRegInfo.resize(RegIdx*2);
+    else
+      VirtRegInfo.resize(2*VirtRegInfo.size());
+  }
+  VarInfo &VI = VirtRegInfo[RegIdx];
+  VI.AliveBlocks.resize(MF->getNumBlockIDs());
+  return VI;
+}
+
+bool LiveVariables::KillsRegister(MachineInstr *MI, unsigned Reg) const {
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (MO.isReg() && MO.isKill()) {
+      if ((MO.getReg() == Reg) ||
+          (MRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+           MRegisterInfo::isPhysicalRegister(Reg) &&
+           RegInfo->isSubRegister(MO.getReg(), Reg)))
+        return true;
+    }
+  }
+  return false;
+}
+
+bool LiveVariables::RegisterDefIsDead(MachineInstr *MI, unsigned Reg) const {
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (MO.isReg() && MO.isDead()) {
+      if ((MO.getReg() == Reg) ||
+          (MRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+           MRegisterInfo::isPhysicalRegister(Reg) &&
+           RegInfo->isSubRegister(MO.getReg(), Reg)))
+        return true;
+    }
+  }
+  return false;
+}
+
+bool LiveVariables::ModifiesRegister(MachineInstr *MI, unsigned Reg) const {
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (MO.isReg() && MO.isDef() && MO.getReg() == Reg)
+      return true;
+  }
+  return false;
+}
+
+void LiveVariables::MarkVirtRegAliveInBlock(VarInfo &VRInfo,
+                                            MachineBasicBlock *MBB,
+                                    std::vector<MachineBasicBlock*> &WorkList) {
+  unsigned BBNum = MBB->getNumber();
+
+  // Check to see if this basic block is one of the killing blocks.  If so,
+  // remove it...
+  for (unsigned i = 0, e = VRInfo.Kills.size(); i != e; ++i)
+    if (VRInfo.Kills[i]->getParent() == MBB) {
+      VRInfo.Kills.erase(VRInfo.Kills.begin()+i);  // Erase entry
+      break;
+    }
+
+  if (MBB == VRInfo.DefInst->getParent()) return;  // Terminate recursion
+
+  if (VRInfo.AliveBlocks[BBNum])
+    return;  // We already know the block is live
+
+  // Mark the variable known alive in this bb
+  VRInfo.AliveBlocks[BBNum] = true;
+
+  for (MachineBasicBlock::const_pred_reverse_iterator PI = MBB->pred_rbegin(),
+         E = MBB->pred_rend(); PI != E; ++PI)
+    WorkList.push_back(*PI);
+}
+
+void LiveVariables::MarkVirtRegAliveInBlock(VarInfo &VRInfo,
+                                            MachineBasicBlock *MBB) {
+  std::vector<MachineBasicBlock*> WorkList;
+  MarkVirtRegAliveInBlock(VRInfo, MBB, WorkList);
+  while (!WorkList.empty()) {
+    MachineBasicBlock *Pred = WorkList.back();
+    WorkList.pop_back();
+    MarkVirtRegAliveInBlock(VRInfo, Pred, WorkList);
+  }
+}
+
+
+void LiveVariables::HandleVirtRegUse(VarInfo &VRInfo, MachineBasicBlock *MBB,
+                                     MachineInstr *MI) {
+  assert(VRInfo.DefInst && "Register use before def!");
+
+  VRInfo.NumUses++;
+
+  // Check to see if this basic block is already a kill block...
+  if (!VRInfo.Kills.empty() && VRInfo.Kills.back()->getParent() == MBB) {
+    // Yes, this register is killed in this basic block already.  Increase the
+    // live range by updating the kill instruction.
+    VRInfo.Kills.back() = MI;
+    return;
+  }
+
+#ifndef NDEBUG
+  for (unsigned i = 0, e = VRInfo.Kills.size(); i != e; ++i)
+    assert(VRInfo.Kills[i]->getParent() != MBB && "entry should be at end!");
+#endif
+
+  assert(MBB != VRInfo.DefInst->getParent() &&
+         "Should have kill for defblock!");
+
+  // Add a new kill entry for this basic block.
+  // If this virtual register is already marked as alive in this basic block,
+  // that means it is alive in at least one of the successor block, it's not
+  // a kill.
+  if (!VRInfo.AliveBlocks[MBB->getNumber()])
+    VRInfo.Kills.push_back(MI);
+
+  // Update all dominating blocks to mark them known live.
+  for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
+         E = MBB->pred_end(); PI != E; ++PI)
+    MarkVirtRegAliveInBlock(VRInfo, *PI);
+}
+
+bool LiveVariables::addRegisterKilled(unsigned IncomingReg, MachineInstr *MI,
+                                      bool AddIfNotFound) {
+  bool Found = false;
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (MO.isReg() && MO.isUse()) {
+      unsigned Reg = MO.getReg();
+      if (!Reg)
+        continue;
+      if (Reg == IncomingReg) {
+        MO.setIsKill();
+        Found = true;
+        break;
+      } else if (MRegisterInfo::isPhysicalRegister(Reg) &&
+                 MRegisterInfo::isPhysicalRegister(IncomingReg) &&
+                 RegInfo->isSuperRegister(IncomingReg, Reg) &&
+                 MO.isKill())
+        // A super-register kill already exists.
+        return true;
+    }
+  }
+
+  // If not found, this means an alias of one of the operand is killed. Add a
+  // new implicit operand if required.
+  if (!Found && AddIfNotFound) {
+    MI->addRegOperand(IncomingReg, false/*IsDef*/,true/*IsImp*/,true/*IsKill*/);
+    return true;
+  }
+  return Found;
+}
+
+bool LiveVariables::addRegisterDead(unsigned IncomingReg, MachineInstr *MI,
+                                    bool AddIfNotFound) {
+  bool Found = false;
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (MO.isReg() && MO.isDef()) {
+      unsigned Reg = MO.getReg();
+      if (!Reg)
+        continue;
+      if (Reg == IncomingReg) {
+        MO.setIsDead();
+        Found = true;
+        break;
+      } else if (MRegisterInfo::isPhysicalRegister(Reg) &&
+                 MRegisterInfo::isPhysicalRegister(IncomingReg) &&
+                 RegInfo->isSuperRegister(IncomingReg, Reg) &&
+                 MO.isDead())
+        // There exists a super-register that's marked dead.
+        return true;
+    }
+  }
+
+  // If not found, this means an alias of one of the operand is dead. Add a
+  // new implicit operand.
+  if (!Found && AddIfNotFound) {
+    MI->addRegOperand(IncomingReg, true/*IsDef*/,true/*IsImp*/,false/*IsKill*/,
+                      true/*IsDead*/);
+    return true;
+  }
+  return Found;
+}
+
+void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) {
+  // There is a now a proper use, forget about the last partial use.
+  PhysRegPartUse[Reg] = NULL;
+
+  // Turn previous partial def's into read/mod/write.
+  for (unsigned i = 0, e = PhysRegPartDef[Reg].size(); i != e; ++i) {
+    MachineInstr *Def = PhysRegPartDef[Reg][i];
+    // First one is just a def. This means the use is reading some undef bits.
+    if (i != 0)
+      Def->addRegOperand(Reg, false/*IsDef*/,true/*IsImp*/,true/*IsKill*/);
+    Def->addRegOperand(Reg, true/*IsDef*/,true/*IsImp*/);
+  }
+  PhysRegPartDef[Reg].clear();
+
+  // There was an earlier def of a super-register. Add implicit def to that MI.
+  // A: EAX = ...
+  // B:     = AX
+  // Add implicit def to A.
+  if (PhysRegInfo[Reg] && !PhysRegUsed[Reg]) {
+    MachineInstr *Def = PhysRegInfo[Reg];
+    if (!Def->findRegisterDefOperand(Reg))
+      Def->addRegOperand(Reg, true/*IsDef*/,true/*IsImp*/);
+  }
+
+  PhysRegInfo[Reg] = MI;
+  PhysRegUsed[Reg] = true;
+
+  for (const unsigned *SubRegs = RegInfo->getSubRegisters(Reg);
+       unsigned SubReg = *SubRegs; ++SubRegs) {
+    PhysRegInfo[SubReg] = MI;
+    PhysRegUsed[SubReg] = true;
+  }
+
+  // Remember the partial uses.
+  for (const unsigned *SuperRegs = RegInfo->getSuperRegisters(Reg);
+       unsigned SuperReg = *SuperRegs; ++SuperRegs)
+    PhysRegPartUse[SuperReg] = MI;
+}
+
+bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *RefMI,
+                                      SmallSet<unsigned, 4> &SubKills) {
+  for (const unsigned *SubRegs = RegInfo->getImmediateSubRegisters(Reg);
+       unsigned SubReg = *SubRegs; ++SubRegs) {
+    MachineInstr *LastRef = PhysRegInfo[SubReg];
+    if (LastRef != RefMI)
+      SubKills.insert(SubReg);
+    else if (!HandlePhysRegKill(SubReg, RefMI, SubKills))
+      SubKills.insert(SubReg);
+  }
+
+  if (*RegInfo->getImmediateSubRegisters(Reg) == 0) {
+    // No sub-registers, just check if reg is killed by RefMI.
+    if (PhysRegInfo[Reg] == RefMI)
+      return true;
+  } else if (SubKills.empty())
+    // None of the sub-registers are killed elsewhere...
+    return true;
+  return false;
+}
+
+void LiveVariables::addRegisterKills(unsigned Reg, MachineInstr *MI,
+                                     SmallSet<unsigned, 4> &SubKills) {
+  if (SubKills.count(Reg) == 0)
+    addRegisterKilled(Reg, MI, true);
+  else {
+    for (const unsigned *SubRegs = RegInfo->getImmediateSubRegisters(Reg);
+         unsigned SubReg = *SubRegs; ++SubRegs)
+      addRegisterKills(SubReg, MI, SubKills);
+  }
+}
+
+bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *RefMI) {
+  SmallSet<unsigned, 4> SubKills;
+  if (HandlePhysRegKill(Reg, RefMI, SubKills)) {
+    addRegisterKilled(Reg, RefMI);
+    return true;
+  } else {
+    // Some sub-registers are killed by another MI.
+    for (const unsigned *SubRegs = RegInfo->getImmediateSubRegisters(Reg);
+         unsigned SubReg = *SubRegs; ++SubRegs)
+      addRegisterKills(SubReg, RefMI, SubKills);
+    return false;
+  }
+}
+
+void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI) {
+  // Does this kill a previous version of this register?
+  if (MachineInstr *LastRef = PhysRegInfo[Reg]) {
+    if (PhysRegUsed[Reg]) {
+      if (!HandlePhysRegKill(Reg, LastRef)) {
+        if (PhysRegPartUse[Reg])
+          addRegisterKilled(Reg, PhysRegPartUse[Reg], true);
+      }
+    } else if (PhysRegPartUse[Reg])
+      // Add implicit use / kill to last use of a sub-register.
+      addRegisterKilled(Reg, PhysRegPartUse[Reg], true);
+    else
+      addRegisterDead(Reg, LastRef);
+  }
+
+  for (const unsigned *SubRegs = RegInfo->getSubRegisters(Reg);
+       unsigned SubReg = *SubRegs; ++SubRegs) {
+    if (MachineInstr *LastRef = PhysRegInfo[SubReg]) {
+      if (PhysRegUsed[SubReg]) {
+        if (!HandlePhysRegKill(SubReg, LastRef)) {
+          if (PhysRegPartUse[SubReg])
+            addRegisterKilled(SubReg, PhysRegPartUse[SubReg], true);
+        }
+      } else if (PhysRegPartUse[SubReg])
+        // Add implicit use / kill to last use of a sub-register.
+        addRegisterKilled(SubReg, PhysRegPartUse[SubReg], true);
+      else
+        addRegisterDead(SubReg, LastRef);
+    }
+  }
+
+  if (MI) {
+    for (const unsigned *SuperRegs = RegInfo->getSuperRegisters(Reg);
+         unsigned SuperReg = *SuperRegs; ++SuperRegs) {
+      if (PhysRegInfo[SuperReg]) {
+        // The larger register is previously defined. Now a smaller part is
+        // being re-defined. Treat it as read/mod/write.
+        // EAX =
+        // AX  =        EAX<imp-use,kill>, EAX<imp-def>
+        MI->addRegOperand(SuperReg, false/*IsDef*/,true/*IsImp*/,true/*IsKill*/);
+        MI->addRegOperand(SuperReg, true/*IsDef*/,true/*IsImp*/);
+        PhysRegInfo[SuperReg] = MI;
+        PhysRegUsed[SuperReg] = false;
+        PhysRegPartUse[SuperReg] = NULL;
+      } else {
+        // Remember this partial def.
+        PhysRegPartDef[SuperReg].push_back(MI);
+      }
+    }
+
+    PhysRegInfo[Reg] = MI;
+    PhysRegUsed[Reg] = false;
+    PhysRegPartUse[Reg] = NULL;
+    for (const unsigned *SubRegs = RegInfo->getSubRegisters(Reg);
+         unsigned SubReg = *SubRegs; ++SubRegs) {
+      PhysRegInfo[SubReg] = MI;
+      PhysRegUsed[SubReg] = false;
+      PhysRegPartUse[SubReg] = NULL;
+    }
+  }
+}
+
+bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
+  MF = &mf;
+  const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+  RegInfo = MF->getTarget().getRegisterInfo();
+  assert(RegInfo && "Target doesn't have register information?");
+
+  ReservedRegisters = RegInfo->getReservedRegs(mf);
+
+  unsigned NumRegs = RegInfo->getNumRegs();
+  PhysRegInfo = new MachineInstr*[NumRegs];
+  PhysRegUsed = new bool[NumRegs];
+  PhysRegPartUse = new MachineInstr*[NumRegs];
+  PhysRegPartDef = new SmallVector<MachineInstr*,4>[NumRegs];
+  PHIVarInfo = new SmallVector<unsigned, 4>[MF->getNumBlockIDs()];
+  std::fill(PhysRegInfo, PhysRegInfo + NumRegs, (MachineInstr*)0);
+  std::fill(PhysRegUsed, PhysRegUsed + NumRegs, false);
+  std::fill(PhysRegPartUse, PhysRegPartUse + NumRegs, (MachineInstr*)0);
+
+  /// Get some space for a respectable number of registers...
+  VirtRegInfo.resize(64);
+
+  analyzePHINodes(mf);
+
+  // Calculate live variable information in depth first order on the CFG of the
+  // function.  This guarantees that we will see the definition of a virtual
+  // register before its uses due to dominance properties of SSA (except for PHI
+  // nodes, which are treated as a special case).
+  //
+  MachineBasicBlock *Entry = MF->begin();
+  SmallPtrSet<MachineBasicBlock*,16> Visited;
+  for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> >
+         DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
+       DFI != E; ++DFI) {
+    MachineBasicBlock *MBB = *DFI;
+
+    // Mark live-in registers as live-in.
+    for (MachineBasicBlock::const_livein_iterator II = MBB->livein_begin(),
+           EE = MBB->livein_end(); II != EE; ++II) {
+      assert(MRegisterInfo::isPhysicalRegister(*II) &&
+             "Cannot have a live-in virtual register!");
+      HandlePhysRegDef(*II, 0);
+    }
+
+    // Loop over all of the instructions, processing them.
+    for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+         I != E; ++I) {
+      MachineInstr *MI = I;
+
+      // Process all of the operands of the instruction...
+      unsigned NumOperandsToProcess = MI->getNumOperands();
+
+      // Unless it is a PHI node.  In this case, ONLY process the DEF, not any
+      // of the uses.  They will be handled in other basic blocks.
+      if (MI->getOpcode() == TargetInstrInfo::PHI)
+        NumOperandsToProcess = 1;
+
+      // Process all uses...
+      for (unsigned i = 0; i != NumOperandsToProcess; ++i) {
+        MachineOperand &MO = MI->getOperand(i);
+        if (MO.isRegister() && MO.isUse() && MO.getReg()) {
+          if (MRegisterInfo::isVirtualRegister(MO.getReg())){
+            HandleVirtRegUse(getVarInfo(MO.getReg()), MBB, MI);
+          } else if (MRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+                     !ReservedRegisters[MO.getReg()]) {
+            HandlePhysRegUse(MO.getReg(), MI);
+          }
+        }
+      }
+
+      // Process all defs...
+      for (unsigned i = 0; i != NumOperandsToProcess; ++i) {
+        MachineOperand &MO = MI->getOperand(i);
+        if (MO.isRegister() && MO.isDef() && MO.getReg()) {
+          if (MRegisterInfo::isVirtualRegister(MO.getReg())) {
+            VarInfo &VRInfo = getVarInfo(MO.getReg());
+
+            assert(VRInfo.DefInst == 0 && "Variable multiply defined!");
+            VRInfo.DefInst = MI;
+            // Defaults to dead
+            VRInfo.Kills.push_back(MI);
+          } else if (MRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+                     !ReservedRegisters[MO.getReg()]) {
+            HandlePhysRegDef(MO.getReg(), MI);
+          }
+        }
+      }
+    }
+
+    // Handle any virtual assignments from PHI nodes which might be at the
+    // bottom of this basic block.  We check all of our successor blocks to see
+    // if they have PHI nodes, and if so, we simulate an assignment at the end
+    // of the current block.
+    if (!PHIVarInfo[MBB->getNumber()].empty()) {
+      SmallVector<unsigned, 4>& VarInfoVec = PHIVarInfo[MBB->getNumber()];
+
+      for (SmallVector<unsigned, 4>::iterator I = VarInfoVec.begin(),
+             E = VarInfoVec.end(); I != E; ++I) {
+        VarInfo& VRInfo = getVarInfo(*I);
+        assert(VRInfo.DefInst && "Register use before def (or no def)!");
+
+        // Only mark it alive only in the block we are representing.
+        MarkVirtRegAliveInBlock(VRInfo, MBB);
+      }
+    }
+
+    // Finally, if the last instruction in the block is a return, make sure to mark
+    // it as using all of the live-out values in the function.
+    if (!MBB->empty() && TII.isReturn(MBB->back().getOpcode())) {
+      MachineInstr *Ret = &MBB->back();
+      for (MachineFunction::liveout_iterator I = MF->liveout_begin(),
+             E = MF->liveout_end(); I != E; ++I) {
+        assert(MRegisterInfo::isPhysicalRegister(*I) &&
+               "Cannot have a live-in virtual register!");
+        HandlePhysRegUse(*I, Ret);
+        // Add live-out registers as implicit uses.
+        if (Ret->findRegisterUseOperandIdx(*I) == -1)
+          Ret->addRegOperand(*I, false, true);
+      }
+    }
+
+    // Loop over PhysRegInfo, killing any registers that are available at the
+    // end of the basic block.  This also resets the PhysRegInfo map.
+    for (unsigned i = 0; i != NumRegs; ++i)
+      if (PhysRegInfo[i])
+        HandlePhysRegDef(i, 0);
+
+    // Clear some states between BB's. These are purely local information.
+    for (unsigned i = 0; i != NumRegs; ++i)
+      PhysRegPartDef[i].clear();
+    std::fill(PhysRegInfo, PhysRegInfo + NumRegs, (MachineInstr*)0);
+    std::fill(PhysRegUsed, PhysRegUsed + NumRegs, false);
+    std::fill(PhysRegPartUse, PhysRegPartUse + NumRegs, (MachineInstr*)0);
+  }
+
+  // Convert and transfer the dead / killed information we have gathered into
+  // VirtRegInfo onto MI's.
+  //
+  for (unsigned i = 0, e1 = VirtRegInfo.size(); i != e1; ++i)
+    for (unsigned j = 0, e2 = VirtRegInfo[i].Kills.size(); j != e2; ++j) {
+      if (VirtRegInfo[i].Kills[j] == VirtRegInfo[i].DefInst)
+        addRegisterDead(i + MRegisterInfo::FirstVirtualRegister,
+                        VirtRegInfo[i].Kills[j]);
+      else
+        addRegisterKilled(i + MRegisterInfo::FirstVirtualRegister,
+                          VirtRegInfo[i].Kills[j]);
+    }
+
+  // Check to make sure there are no unreachable blocks in the MC CFG for the
+  // function.  If so, it is due to a bug in the instruction selector or some
+  // other part of the code generator if this happens.
+#ifndef NDEBUG
+  for(MachineFunction::iterator i = MF->begin(), e = MF->end(); i != e; ++i)
+    assert(Visited.count(&*i) != 0 && "unreachable basic block found");
+#endif
+
+  delete[] PhysRegInfo;
+  delete[] PhysRegUsed;
+  delete[] PhysRegPartUse;
+  delete[] PhysRegPartDef;
+  delete[] PHIVarInfo;
+
+  return false;
+}
+
+/// instructionChanged - When the address of an instruction changes, this
+/// method should be called so that live variables can update its internal
+/// data structures.  This removes the records for OldMI, transfering them to
+/// the records for NewMI.
+void LiveVariables::instructionChanged(MachineInstr *OldMI,
+                                       MachineInstr *NewMI) {
+  // If the instruction defines any virtual registers, update the VarInfo,
+  // kill and dead information for the instruction.
+  for (unsigned i = 0, e = OldMI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = OldMI->getOperand(i);
+    if (MO.isRegister() && MO.getReg() &&
+        MRegisterInfo::isVirtualRegister(MO.getReg())) {
+      unsigned Reg = MO.getReg();
+      VarInfo &VI = getVarInfo(Reg);
+      if (MO.isDef()) {
+        if (MO.isDead()) {
+          MO.unsetIsDead();
+          addVirtualRegisterDead(Reg, NewMI);
+        }
+        // Update the defining instruction.
+        if (VI.DefInst == OldMI)
+          VI.DefInst = NewMI;
+      }
+      if (MO.isUse()) {
+        if (MO.isKill()) {
+          MO.unsetIsKill();
+          addVirtualRegisterKilled(Reg, NewMI);
+        }
+        // If this is a kill of the value, update the VI kills list.
+        if (VI.removeKill(OldMI))
+          VI.Kills.push_back(NewMI);   // Yes, there was a kill of it
+      }
+    }
+  }
+}
+
+/// removeVirtualRegistersKilled - Remove all killed info for the specified
+/// instruction.
+void LiveVariables::removeVirtualRegistersKilled(MachineInstr *MI) {
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (MO.isReg() && MO.isKill()) {
+      MO.unsetIsKill();
+      unsigned Reg = MO.getReg();
+      if (MRegisterInfo::isVirtualRegister(Reg)) {
+        bool removed = getVarInfo(Reg).removeKill(MI);
+        assert(removed && "kill not in register's VarInfo?");
+      }
+    }
+  }
+}
+
+/// removeVirtualRegistersDead - Remove all of the dead registers for the
+/// specified instruction from the live variable information.
+void LiveVariables::removeVirtualRegistersDead(MachineInstr *MI) {
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (MO.isReg() && MO.isDead()) {
+      MO.unsetIsDead();
+      unsigned Reg = MO.getReg();
+      if (MRegisterInfo::isVirtualRegister(Reg)) {
+        bool removed = getVarInfo(Reg).removeKill(MI);
+        assert(removed && "kill not in register's VarInfo?");
+      }
+    }
+  }
+}
+
+/// analyzePHINodes - Gather information about the PHI nodes in here. In
+/// particular, we want to map the variable information of a virtual
+/// register which is used in a PHI node. We map that to the BB the vreg is
+/// coming from.
+///
+void LiveVariables::analyzePHINodes(const MachineFunction& Fn) {
+  for (MachineFunction::const_iterator I = Fn.begin(), E = Fn.end();
+       I != E; ++I)
+    for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end();
+         BBI != BBE && BBI->getOpcode() == TargetInstrInfo::PHI; ++BBI)
+      for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2)
+        PHIVarInfo[BBI->getOperand(i + 1).getMachineBasicBlock()->getNumber()].
+          push_back(BBI->getOperand(i).getReg());
+}
diff --git a/lib/CodeGen/MachOWriter.cpp b/lib/CodeGen/MachOWriter.cpp
new file mode 100644
index 0000000..36060e1
--- /dev/null
+++ b/lib/CodeGen/MachOWriter.cpp
@@ -0,0 +1,945 @@
+//===-- MachOWriter.cpp - Target-independent Mach-O Writer code -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Nate Begeman and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the target-independent Mach-O writer.  This file writes
+// out the Mach-O file in the following order:
+//
+//  #1 FatHeader (universal-only)
+//  #2 FatArch (universal-only, 1 per universal arch)
+//  Per arch:
+//    #3 Header
+//    #4 Load Commands
+//    #5 Sections
+//    #6 Relocations
+//    #7 Symbols
+//    #8 Strings
+//
+//===----------------------------------------------------------------------===//
+
+#include "MachOWriter.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/FileWriters.h"
+#include "llvm/CodeGen/MachineCodeEmitter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetJITInfo.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/OutputBuffer.h"
+#include "llvm/Support/Streams.h"
+#include <algorithm>
+using namespace llvm;
+
+/// AddMachOWriter - Concrete function to add the Mach-O writer to the function
+/// pass manager.
+MachineCodeEmitter *llvm::AddMachOWriter(FunctionPassManager &FPM,
+                                         std::ostream &O,
+                                         TargetMachine &TM) {
+  MachOWriter *MOW = new MachOWriter(O, TM);
+  FPM.add(MOW);
+  return &MOW->getMachineCodeEmitter();
+}
+
+//===----------------------------------------------------------------------===//
+//                       MachOCodeEmitter Implementation
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+  /// MachOCodeEmitter - This class is used by the MachOWriter to emit the code 
+  /// for functions to the Mach-O file.
+  class MachOCodeEmitter : public MachineCodeEmitter {
+    MachOWriter &MOW;
+
+    /// Target machine description.
+    TargetMachine &TM;
+
+    /// is64Bit/isLittleEndian - This information is inferred from the target
+    /// machine directly, indicating what header values and flags to set.
+    bool is64Bit, isLittleEndian;
+
+    /// Relocations - These are the relocations that the function needs, as
+    /// emitted.
+    std::vector<MachineRelocation> Relocations;
+    
+    /// CPLocations - This is a map of constant pool indices to offsets from the
+    /// start of the section for that constant pool index.
+    std::vector<intptr_t> CPLocations;
+
+    /// CPSections - This is a map of constant pool indices to the MachOSection
+    /// containing the constant pool entry for that index.
+    std::vector<unsigned> CPSections;
+
+    /// JTLocations - This is a map of jump table indices to offsets from the
+    /// start of the section for that jump table index.
+    std::vector<intptr_t> JTLocations;
+
+    /// MBBLocations - This vector is a mapping from MBB ID's to their address.
+    /// It is filled in by the StartMachineBasicBlock callback and queried by
+    /// the getMachineBasicBlockAddress callback.
+    std::vector<intptr_t> MBBLocations;
+    
+  public:
+    MachOCodeEmitter(MachOWriter &mow) : MOW(mow), TM(MOW.TM) {
+      is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64;
+      isLittleEndian = TM.getTargetData()->isLittleEndian();
+    }
+
+    virtual void startFunction(MachineFunction &MF);
+    virtual bool finishFunction(MachineFunction &MF);
+
+    virtual void addRelocation(const MachineRelocation &MR) {
+      Relocations.push_back(MR);
+    }
+    
+    void emitConstantPool(MachineConstantPool *MCP);
+    void emitJumpTables(MachineJumpTableInfo *MJTI);
+    
+    virtual intptr_t getConstantPoolEntryAddress(unsigned Index) const {
+      assert(CPLocations.size() > Index && "CP not emitted!");
+      return CPLocations[Index];
+    }
+    virtual intptr_t getJumpTableEntryAddress(unsigned Index) const {
+      assert(JTLocations.size() > Index && "JT not emitted!");
+      return JTLocations[Index];
+    }
+
+    virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) {
+      if (MBBLocations.size() <= (unsigned)MBB->getNumber())
+        MBBLocations.resize((MBB->getNumber()+1)*2);
+      MBBLocations[MBB->getNumber()] = getCurrentPCOffset();
+    }
+
+    virtual intptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const {
+      assert(MBBLocations.size() > (unsigned)MBB->getNumber() && 
+             MBBLocations[MBB->getNumber()] && "MBB not emitted!");
+      return MBBLocations[MBB->getNumber()];
+    }
+
+    /// JIT SPECIFIC FUNCTIONS - DO NOT IMPLEMENT THESE HERE!
+    virtual void startFunctionStub(unsigned StubSize, unsigned Alignment = 1) {
+      assert(0 && "JIT specific function called!");
+      abort();
+    }
+    virtual void *finishFunctionStub(const Function *F) {
+      assert(0 && "JIT specific function called!");
+      abort();
+      return 0;
+    }
+  };
+}
+
+/// startFunction - This callback is invoked when a new machine function is
+/// about to be emitted.
+void MachOCodeEmitter::startFunction(MachineFunction &MF) {
+  const TargetData *TD = TM.getTargetData();
+  const Function *F = MF.getFunction();
+
+  // Align the output buffer to the appropriate alignment, power of 2.
+  unsigned FnAlign = F->getAlignment();
+  unsigned TDAlign = TD->getPrefTypeAlignment(F->getType());
+  unsigned Align = Log2_32(std::max(FnAlign, TDAlign));
+  assert(!(Align & (Align-1)) && "Alignment is not a power of two!");
+
+  // Get the Mach-O Section that this function belongs in.
+  MachOWriter::MachOSection *MOS = MOW.getTextSection();
+  
+  // FIXME: better memory management
+  MOS->SectionData.reserve(4096);
+  BufferBegin = &MOS->SectionData[0];
+  BufferEnd = BufferBegin + MOS->SectionData.capacity();
+
+  // Upgrade the section alignment if required.
+  if (MOS->align < Align) MOS->align = Align;
+
+  // Round the size up to the correct alignment for starting the new function.
+  if ((MOS->size & ((1 << Align) - 1)) != 0) {
+    MOS->size += (1 << Align);
+    MOS->size &= ~((1 << Align) - 1);
+  }
+
+  // FIXME: Using MOS->size directly here instead of calculating it from the
+  // output buffer size (impossible because the code emitter deals only in raw
+  // bytes) forces us to manually synchronize size and write padding zero bytes
+  // to the output buffer for all non-text sections.  For text sections, we do
+  // not synchonize the output buffer, and we just blow up if anyone tries to
+  // write non-code to it.  An assert should probably be added to
+  // AddSymbolToSection to prevent calling it on the text section.
+  CurBufferPtr = BufferBegin + MOS->size;
+
+  // Clear per-function data structures.
+  CPLocations.clear();
+  CPSections.clear();
+  JTLocations.clear();
+  MBBLocations.clear();
+}
+
+/// finishFunction - This callback is invoked after the function is completely
+/// finished.
+bool MachOCodeEmitter::finishFunction(MachineFunction &MF) {
+  // Get the Mach-O Section that this function belongs in.
+  MachOWriter::MachOSection *MOS = MOW.getTextSection();
+
+  // Get a symbol for the function to add to the symbol table
+  // FIXME: it seems like we should call something like AddSymbolToSection
+  // in startFunction rather than changing the section size and symbol n_value
+  // here.
+  const GlobalValue *FuncV = MF.getFunction();
+  MachOSym FnSym(FuncV, MOW.Mang->getValueName(FuncV), MOS->Index, TM);
+  FnSym.n_value = MOS->size;
+  MOS->size = CurBufferPtr - BufferBegin;
+  
+  // Emit constant pool to appropriate section(s)
+  emitConstantPool(MF.getConstantPool());
+
+  // Emit jump tables to appropriate section
+  emitJumpTables(MF.getJumpTableInfo());
+  
+  // If we have emitted any relocations to function-specific objects such as 
+  // basic blocks, constant pools entries, or jump tables, record their
+  // addresses now so that we can rewrite them with the correct addresses
+  // later.
+  for (unsigned i = 0, e = Relocations.size(); i != e; ++i) {
+    MachineRelocation &MR = Relocations[i];
+    intptr_t Addr;
+
+    if (MR.isBasicBlock()) {
+      Addr = getMachineBasicBlockAddress(MR.getBasicBlock());
+      MR.setConstantVal(MOS->Index);
+      MR.setResultPointer((void*)Addr);
+    } else if (MR.isJumpTableIndex()) {
+      Addr = getJumpTableEntryAddress(MR.getJumpTableIndex());
+      MR.setConstantVal(MOW.getJumpTableSection()->Index);
+      MR.setResultPointer((void*)Addr);
+    } else if (MR.isConstantPoolIndex()) {
+      Addr = getConstantPoolEntryAddress(MR.getConstantPoolIndex());
+      MR.setConstantVal(CPSections[MR.getConstantPoolIndex()]);
+      MR.setResultPointer((void*)Addr);
+    } else if (MR.isGlobalValue()) {
+      // FIXME: This should be a set or something that uniques
+      MOW.PendingGlobals.push_back(MR.getGlobalValue());
+    } else {
+      assert(0 && "Unhandled relocation type");
+    }
+    MOS->Relocations.push_back(MR);
+  }
+  Relocations.clear();
+  
+  // Finally, add it to the symtab.
+  MOW.SymbolTable.push_back(FnSym);
+  return false;
+}
+
+/// emitConstantPool - For each constant pool entry, figure out which section
+/// the constant should live in, allocate space for it, and emit it to the 
+/// Section data buffer.
+void MachOCodeEmitter::emitConstantPool(MachineConstantPool *MCP) {
+  const std::vector<MachineConstantPoolEntry> &CP = MCP->getConstants();
+  if (CP.empty()) return;
+
+  // FIXME: handle PIC codegen
+  bool isPIC = TM.getRelocationModel() == Reloc::PIC_;
+  assert(!isPIC && "PIC codegen not yet handled for mach-o jump tables!");
+
+  // Although there is no strict necessity that I am aware of, we will do what
+  // gcc for OS X does and put each constant pool entry in a section of constant
+  // objects of a certain size.  That means that float constants go in the
+  // literal4 section, and double objects go in literal8, etc.
+  //
+  // FIXME: revisit this decision if we ever do the "stick everything into one
+  // "giant object for PIC" optimization.
+  for (unsigned i = 0, e = CP.size(); i != e; ++i) {
+    const Type *Ty = CP[i].getType();
+    unsigned Size = TM.getTargetData()->getTypeSize(Ty);
+
+    MachOWriter::MachOSection *Sec = MOW.getConstSection(CP[i].Val.ConstVal);
+    OutputBuffer SecDataOut(Sec->SectionData, is64Bit, isLittleEndian);
+
+    CPLocations.push_back(Sec->SectionData.size());
+    CPSections.push_back(Sec->Index);
+    
+    // FIXME: remove when we have unified size + output buffer
+    Sec->size += Size;
+
+    // Allocate space in the section for the global.
+    // FIXME: need alignment?
+    // FIXME: share between here and AddSymbolToSection?
+    for (unsigned j = 0; j < Size; ++j)
+      SecDataOut.outbyte(0);
+
+    MOW.InitMem(CP[i].Val.ConstVal, &Sec->SectionData[0], CPLocations[i],
+                TM.getTargetData(), Sec->Relocations);
+  }
+}
+
+/// emitJumpTables - Emit all the jump tables for a given jump table info
+/// record to the appropriate section.
+void MachOCodeEmitter::emitJumpTables(MachineJumpTableInfo *MJTI) {
+  const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+  if (JT.empty()) return;
+
+  // FIXME: handle PIC codegen
+  bool isPIC = TM.getRelocationModel() == Reloc::PIC_;
+  assert(!isPIC && "PIC codegen not yet handled for mach-o jump tables!");
+
+  MachOWriter::MachOSection *Sec = MOW.getJumpTableSection();
+  unsigned TextSecIndex = MOW.getTextSection()->Index;
+  OutputBuffer SecDataOut(Sec->SectionData, is64Bit, isLittleEndian);
+
+  for (unsigned i = 0, e = JT.size(); i != e; ++i) {
+    // For each jump table, record its offset from the start of the section,
+    // reserve space for the relocations to the MBBs, and add the relocations.
+    const std::vector<MachineBasicBlock*> &MBBs = JT[i].MBBs;
+    JTLocations.push_back(Sec->SectionData.size());
+    for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi) {
+      MachineRelocation MR(MOW.GetJTRelocation(Sec->SectionData.size(),
+                                               MBBs[mi]));
+      MR.setResultPointer((void *)JTLocations[i]);
+      MR.setConstantVal(TextSecIndex);
+      Sec->Relocations.push_back(MR);
+      SecDataOut.outaddr(0);
+    }
+  }
+  // FIXME: remove when we have unified size + output buffer
+  Sec->size = Sec->SectionData.size();
+}
+
+//===----------------------------------------------------------------------===//
+//                          MachOWriter Implementation
+//===----------------------------------------------------------------------===//
+
+char MachOWriter::ID = 0;
+MachOWriter::MachOWriter(std::ostream &o, TargetMachine &tm) 
+  : MachineFunctionPass((intptr_t)&ID), O(o), TM(tm) {
+  is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64;
+  isLittleEndian = TM.getTargetData()->isLittleEndian();
+
+  // Create the machine code emitter object for this target.
+  MCE = new MachOCodeEmitter(*this);
+}
+
+MachOWriter::~MachOWriter() {
+  delete MCE;
+}
+
+void MachOWriter::AddSymbolToSection(MachOSection *Sec, GlobalVariable *GV) {
+  const Type *Ty = GV->getType()->getElementType();
+  unsigned Size = TM.getTargetData()->getTypeSize(Ty);
+  unsigned Align = GV->getAlignment();
+  if (Align == 0)
+    Align = TM.getTargetData()->getPrefTypeAlignment(Ty);
+  
+  // Reserve space in the .bss section for this symbol while maintaining the
+  // desired section alignment, which must be at least as much as required by
+  // this symbol.
+  OutputBuffer SecDataOut(Sec->SectionData, is64Bit, isLittleEndian);
+
+  if (Align) {
+    uint64_t OrigSize = Sec->size;
+    Align = Log2_32(Align);
+    Sec->align = std::max(unsigned(Sec->align), Align);
+    Sec->size = (Sec->size + Align - 1) & ~(Align-1);
+    
+    // Add alignment padding to buffer as well.
+    // FIXME: remove when we have unified size + output buffer
+    unsigned AlignedSize = Sec->size - OrigSize;
+    for (unsigned i = 0; i < AlignedSize; ++i)
+      SecDataOut.outbyte(0);
+  }
+  // Globals without external linkage apparently do not go in the symbol table.
+  if (GV->getLinkage() != GlobalValue::InternalLinkage) {
+    MachOSym Sym(GV, Mang->getValueName(GV), Sec->Index, TM);
+    Sym.n_value = Sec->size;
+    SymbolTable.push_back(Sym);
+  }
+
+  // Record the offset of the symbol, and then allocate space for it.
+  // FIXME: remove when we have unified size + output buffer
+  Sec->size += Size;
+  
+  // Now that we know what section the GlovalVariable is going to be emitted 
+  // into, update our mappings.
+  // FIXME: We may also need to update this when outputting non-GlobalVariable
+  // GlobalValues such as functions.
+  GVSection[GV] = Sec;
+  GVOffset[GV] = Sec->SectionData.size();
+  
+  // Allocate space in the section for the global.
+  for (unsigned i = 0; i < Size; ++i)
+    SecDataOut.outbyte(0);
+}
+
+void MachOWriter::EmitGlobal(GlobalVariable *GV) {
+  const Type *Ty = GV->getType()->getElementType();
+  unsigned Size = TM.getTargetData()->getTypeSize(Ty);
+  bool NoInit = !GV->hasInitializer();
+  
+  // If this global has a zero initializer, it is part of the .bss or common
+  // section.
+  if (NoInit || GV->getInitializer()->isNullValue()) {
+    // If this global is part of the common block, add it now.  Variables are
+    // part of the common block if they are zero initialized and allowed to be
+    // merged with other symbols.
+    if (NoInit || GV->hasLinkOnceLinkage() || GV->hasWeakLinkage()) {
+      MachOSym ExtOrCommonSym(GV, Mang->getValueName(GV), MachOSym::NO_SECT,TM);
+      // For undefined (N_UNDF) external (N_EXT) types, n_value is the size in
+      // bytes of the symbol.
+      ExtOrCommonSym.n_value = Size;
+      SymbolTable.push_back(ExtOrCommonSym);
+      // Remember that we've seen this symbol
+      GVOffset[GV] = Size;
+      return;
+    }
+    // Otherwise, this symbol is part of the .bss section.
+    MachOSection *BSS = getBSSSection();
+    AddSymbolToSection(BSS, GV);
+    return;
+  }
+  
+  // Scalar read-only data goes in a literal section if the scalar is 4, 8, or
+  // 16 bytes, or a cstring.  Other read only data goes into a regular const
+  // section.  Read-write data goes in the data section.
+  MachOSection *Sec = GV->isConstant() ? getConstSection(GV->getInitializer()) : 
+                                         getDataSection();
+  AddSymbolToSection(Sec, GV);
+  InitMem(GV->getInitializer(), &Sec->SectionData[0], GVOffset[GV],
+          TM.getTargetData(), Sec->Relocations);
+}
+
+
+bool MachOWriter::runOnMachineFunction(MachineFunction &MF) {
+  // Nothing to do here, this is all done through the MCE object.
+  return false;
+}
+
+bool MachOWriter::doInitialization(Module &M) {
+  // Set the magic value, now that we know the pointer size and endianness
+  Header.setMagic(isLittleEndian, is64Bit);
+
+  // Set the file type
+  // FIXME: this only works for object files, we do not support the creation
+  //        of dynamic libraries or executables at this time.
+  Header.filetype = MachOHeader::MH_OBJECT;
+
+  Mang = new Mangler(M);
+  return false;
+}
+
+/// doFinalization - Now that the module has been completely processed, emit
+/// the Mach-O file to 'O'.
+bool MachOWriter::doFinalization(Module &M) {
+  // FIXME: we don't handle debug info yet, we should probably do that.
+
+  // Okay, the.text section has been completed, build the .data, .bss, and 
+  // "common" sections next.
+  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+       I != E; ++I)
+    EmitGlobal(I);
+  
+  // Emit the header and load commands.
+  EmitHeaderAndLoadCommands();
+
+  // Emit the various sections and their relocation info.
+  EmitSections();
+
+  // Write the symbol table and the string table to the end of the file.
+  O.write((char*)&SymT[0], SymT.size());
+  O.write((char*)&StrT[0], StrT.size());
+
+  // We are done with the abstract symbols.
+  SectionList.clear();
+  SymbolTable.clear();
+  DynamicSymbolTable.clear();
+
+  // Release the name mangler object.
+  delete Mang; Mang = 0;
+  return false;
+}
+
+void MachOWriter::EmitHeaderAndLoadCommands() {
+  // Step #0: Fill in the segment load command size, since we need it to figure
+  //          out the rest of the header fields
+  MachOSegment SEG("", is64Bit);
+  SEG.nsects  = SectionList.size();
+  SEG.cmdsize = SEG.cmdSize(is64Bit) + 
+                SEG.nsects * SectionList[0]->cmdSize(is64Bit);
+  
+  // Step #1: calculate the number of load commands.  We always have at least
+  //          one, for the LC_SEGMENT load command, plus two for the normal
+  //          and dynamic symbol tables, if there are any symbols.
+  Header.ncmds = SymbolTable.empty() ? 1 : 3;
+  
+  // Step #2: calculate the size of the load commands
+  Header.sizeofcmds = SEG.cmdsize;
+  if (!SymbolTable.empty())
+    Header.sizeofcmds += SymTab.cmdsize + DySymTab.cmdsize;
+    
+  // Step #3: write the header to the file
+  // Local alias to shortenify coming code.
+  DataBuffer &FH = Header.HeaderData;
+  OutputBuffer FHOut(FH, is64Bit, isLittleEndian);
+
+  FHOut.outword(Header.magic);
+  FHOut.outword(TM.getMachOWriterInfo()->getCPUType());
+  FHOut.outword(TM.getMachOWriterInfo()->getCPUSubType());
+  FHOut.outword(Header.filetype);
+  FHOut.outword(Header.ncmds);
+  FHOut.outword(Header.sizeofcmds);
+  FHOut.outword(Header.flags);
+  if (is64Bit)
+    FHOut.outword(Header.reserved);
+  
+  // Step #4: Finish filling in the segment load command and write it out
+  for (std::vector<MachOSection*>::iterator I = SectionList.begin(),
+         E = SectionList.end(); I != E; ++I)
+    SEG.filesize += (*I)->size;
+
+  SEG.vmsize = SEG.filesize;
+  SEG.fileoff = Header.cmdSize(is64Bit) + Header.sizeofcmds;
+  
+  FHOut.outword(SEG.cmd);
+  FHOut.outword(SEG.cmdsize);
+  FHOut.outstring(SEG.segname, 16);
+  FHOut.outaddr(SEG.vmaddr);
+  FHOut.outaddr(SEG.vmsize);
+  FHOut.outaddr(SEG.fileoff);
+  FHOut.outaddr(SEG.filesize);
+  FHOut.outword(SEG.maxprot);
+  FHOut.outword(SEG.initprot);
+  FHOut.outword(SEG.nsects);
+  FHOut.outword(SEG.flags);
+  
+  // Step #5: Finish filling in the fields of the MachOSections 
+  uint64_t currentAddr = 0;
+  for (std::vector<MachOSection*>::iterator I = SectionList.begin(),
+         E = SectionList.end(); I != E; ++I) {
+    MachOSection *MOS = *I;
+    MOS->addr = currentAddr;
+    MOS->offset = currentAddr + SEG.fileoff;
+
+    // FIXME: do we need to do something with alignment here?
+    currentAddr += MOS->size;
+  }
+  
+  // Step #6: Emit the symbol table to temporary buffers, so that we know the
+  // size of the string table when we write the next load command.  This also
+  // sorts and assigns indices to each of the symbols, which is necessary for
+  // emitting relocations to externally-defined objects.
+  BufferSymbolAndStringTable();
+  
+  // Step #7: Calculate the number of relocations for each section and write out
+  // the section commands for each section
+  currentAddr += SEG.fileoff;
+  for (std::vector<MachOSection*>::iterator I = SectionList.begin(),
+         E = SectionList.end(); I != E; ++I) {
+    MachOSection *MOS = *I;
+    // Convert the relocations to target-specific relocations, and fill in the
+    // relocation offset for this section.
+    CalculateRelocations(*MOS);
+    MOS->reloff = MOS->nreloc ? currentAddr : 0;
+    currentAddr += MOS->nreloc * 8;
+    
+    // write the finalized section command to the output buffer
+    FHOut.outstring(MOS->sectname, 16);
+    FHOut.outstring(MOS->segname, 16);
+    FHOut.outaddr(MOS->addr);
+    FHOut.outaddr(MOS->size);
+    FHOut.outword(MOS->offset);
+    FHOut.outword(MOS->align);
+    FHOut.outword(MOS->reloff);
+    FHOut.outword(MOS->nreloc);
+    FHOut.outword(MOS->flags);
+    FHOut.outword(MOS->reserved1);
+    FHOut.outword(MOS->reserved2);
+    if (is64Bit)
+      FHOut.outword(MOS->reserved3);
+  }
+  
+  // Step #8: Emit LC_SYMTAB/LC_DYSYMTAB load commands
+  SymTab.symoff  = currentAddr;
+  SymTab.nsyms   = SymbolTable.size();
+  SymTab.stroff  = SymTab.symoff + SymT.size();
+  SymTab.strsize = StrT.size();
+  FHOut.outword(SymTab.cmd);
+  FHOut.outword(SymTab.cmdsize);
+  FHOut.outword(SymTab.symoff);
+  FHOut.outword(SymTab.nsyms);
+  FHOut.outword(SymTab.stroff);
+  FHOut.outword(SymTab.strsize);
+
+  // FIXME: set DySymTab fields appropriately
+  // We should probably just update these in BufferSymbolAndStringTable since
+  // thats where we're partitioning up the different kinds of symbols.
+  FHOut.outword(DySymTab.cmd);
+  FHOut.outword(DySymTab.cmdsize);
+  FHOut.outword(DySymTab.ilocalsym);
+  FHOut.outword(DySymTab.nlocalsym);
+  FHOut.outword(DySymTab.iextdefsym);
+  FHOut.outword(DySymTab.nextdefsym);
+  FHOut.outword(DySymTab.iundefsym);
+  FHOut.outword(DySymTab.nundefsym);
+  FHOut.outword(DySymTab.tocoff);
+  FHOut.outword(DySymTab.ntoc);
+  FHOut.outword(DySymTab.modtaboff);
+  FHOut.outword(DySymTab.nmodtab);
+  FHOut.outword(DySymTab.extrefsymoff);
+  FHOut.outword(DySymTab.nextrefsyms);
+  FHOut.outword(DySymTab.indirectsymoff);
+  FHOut.outword(DySymTab.nindirectsyms);
+  FHOut.outword(DySymTab.extreloff);
+  FHOut.outword(DySymTab.nextrel);
+  FHOut.outword(DySymTab.locreloff);
+  FHOut.outword(DySymTab.nlocrel);
+  
+  O.write((char*)&FH[0], FH.size());
+}
+
+/// EmitSections - Now that we have constructed the file header and load
+/// commands, emit the data for each section to the file.
+void MachOWriter::EmitSections() {
+  for (std::vector<MachOSection*>::iterator I = SectionList.begin(),
+         E = SectionList.end(); I != E; ++I)
+    // Emit the contents of each section
+    O.write((char*)&(*I)->SectionData[0], (*I)->size);
+  for (std::vector<MachOSection*>::iterator I = SectionList.begin(),
+         E = SectionList.end(); I != E; ++I)
+    // Emit the relocation entry data for each section.
+    O.write((char*)&(*I)->RelocBuffer[0], (*I)->RelocBuffer.size());
+}
+
+/// PartitionByLocal - Simple boolean predicate that returns true if Sym is
+/// a local symbol rather than an external symbol.
+bool MachOWriter::PartitionByLocal(const MachOSym &Sym) {
+  return (Sym.n_type & (MachOSym::N_EXT | MachOSym::N_PEXT)) == 0;
+}
+
+/// PartitionByDefined - Simple boolean predicate that returns true if Sym is
+/// defined in this module.
+bool MachOWriter::PartitionByDefined(const MachOSym &Sym) {
+  // FIXME: Do N_ABS or N_INDR count as defined?
+  return (Sym.n_type & MachOSym::N_SECT) == MachOSym::N_SECT;
+}
+
+/// BufferSymbolAndStringTable - Sort the symbols we encountered and assign them
+/// each a string table index so that they appear in the correct order in the
+/// output file.
+void MachOWriter::BufferSymbolAndStringTable() {
+  // The order of the symbol table is:
+  // 1. local symbols
+  // 2. defined external symbols (sorted by name)
+  // 3. undefined external symbols (sorted by name)
+  
+  // Before sorting the symbols, check the PendingGlobals for any undefined
+  // globals that need to be put in the symbol table.
+  for (std::vector<GlobalValue*>::iterator I = PendingGlobals.begin(),
+         E = PendingGlobals.end(); I != E; ++I) {
+    if (GVOffset[*I] == 0 && GVSection[*I] == 0) {
+      MachOSym UndfSym(*I, Mang->getValueName(*I), MachOSym::NO_SECT, TM);
+      SymbolTable.push_back(UndfSym);
+      GVOffset[*I] = -1;
+    }
+  }
+  
+  // Sort the symbols by name, so that when we partition the symbols by scope
+  // of definition, we won't have to sort by name within each partition.
+  std::sort(SymbolTable.begin(), SymbolTable.end(), MachOSymCmp());
+
+  // Parition the symbol table entries so that all local symbols come before 
+  // all symbols with external linkage. { 1 | 2 3 }
+  std::partition(SymbolTable.begin(), SymbolTable.end(), PartitionByLocal);
+  
+  // Advance iterator to beginning of external symbols and partition so that
+  // all external symbols defined in this module come before all external
+  // symbols defined elsewhere. { 1 | 2 | 3 }
+  for (std::vector<MachOSym>::iterator I = SymbolTable.begin(),
+         E = SymbolTable.end(); I != E; ++I) {
+    if (!PartitionByLocal(*I)) {
+      std::partition(I, E, PartitionByDefined);
+      break;
+    }
+  }
+
+  // Calculate the starting index for each of the local, extern defined, and 
+  // undefined symbols, as well as the number of each to put in the LC_DYSYMTAB
+  // load command.
+  for (std::vector<MachOSym>::iterator I = SymbolTable.begin(),
+         E = SymbolTable.end(); I != E; ++I) {
+    if (PartitionByLocal(*I)) {
+      ++DySymTab.nlocalsym;
+      ++DySymTab.iextdefsym;
+      ++DySymTab.iundefsym;
+    } else if (PartitionByDefined(*I)) {
+      ++DySymTab.nextdefsym;
+      ++DySymTab.iundefsym;
+    } else {
+      ++DySymTab.nundefsym;
+    }
+  }
+  
+  // Write out a leading zero byte when emitting string table, for n_strx == 0
+  // which means an empty string.
+  OutputBuffer StrTOut(StrT, is64Bit, isLittleEndian);
+  StrTOut.outbyte(0);
+
+  // The order of the string table is:
+  // 1. strings for external symbols
+  // 2. strings for local symbols
+  // Since this is the opposite order from the symbol table, which we have just
+  // sorted, we can walk the symbol table backwards to output the string table.
+  for (std::vector<MachOSym>::reverse_iterator I = SymbolTable.rbegin(),
+        E = SymbolTable.rend(); I != E; ++I) {
+    if (I->GVName == "") {
+      I->n_strx = 0;
+    } else {
+      I->n_strx = StrT.size();
+      StrTOut.outstring(I->GVName, I->GVName.length()+1);
+    }
+  }
+
+  OutputBuffer SymTOut(SymT, is64Bit, isLittleEndian);
+
+  unsigned index = 0;
+  for (std::vector<MachOSym>::iterator I = SymbolTable.begin(),
+         E = SymbolTable.end(); I != E; ++I, ++index) {
+    // Add the section base address to the section offset in the n_value field
+    // to calculate the full address.
+    // FIXME: handle symbols where the n_value field is not the address
+    GlobalValue *GV = const_cast<GlobalValue*>(I->GV);
+    if (GV && GVSection[GV])
+      I->n_value += GVSection[GV]->addr;
+    if (GV && (GVOffset[GV] == -1))
+      GVOffset[GV] = index;
+         
+    // Emit nlist to buffer
+    SymTOut.outword(I->n_strx);
+    SymTOut.outbyte(I->n_type);
+    SymTOut.outbyte(I->n_sect);
+    SymTOut.outhalf(I->n_desc);
+    SymTOut.outaddr(I->n_value);
+  }
+}
+
+/// CalculateRelocations - For each MachineRelocation in the current section,
+/// calculate the index of the section containing the object to be relocated,
+/// and the offset into that section.  From this information, create the
+/// appropriate target-specific MachORelocation type and add buffer it to be
+/// written out after we are finished writing out sections.
+void MachOWriter::CalculateRelocations(MachOSection &MOS) {
+  for (unsigned i = 0, e = MOS.Relocations.size(); i != e; ++i) {
+    MachineRelocation &MR = MOS.Relocations[i];
+    unsigned TargetSection = MR.getConstantVal();
+    unsigned TargetAddr = 0;
+    unsigned TargetIndex = 0;
+
+    // This is a scattered relocation entry if it points to a global value with
+    // a non-zero offset.
+    bool Scattered = false;
+    bool Extern = false;
+
+    // Since we may not have seen the GlobalValue we were interested in yet at
+    // the time we emitted the relocation for it, fix it up now so that it
+    // points to the offset into the correct section.
+    if (MR.isGlobalValue()) {
+      GlobalValue *GV = MR.getGlobalValue();
+      MachOSection *MOSPtr = GVSection[GV];
+      intptr_t Offset = GVOffset[GV];
+      
+      // If we have never seen the global before, it must be to a symbol
+      // defined in another module (N_UNDF).
+      if (!MOSPtr) {
+        // FIXME: need to append stub suffix
+        Extern = true;
+        TargetAddr = 0;
+        TargetIndex = GVOffset[GV];
+      } else {
+        Scattered = TargetSection != 0;
+        TargetSection = MOSPtr->Index;
+      }
+      MR.setResultPointer((void*)Offset);
+    }
+    
+    // If the symbol is locally defined, pass in the address of the section and
+    // the section index to the code which will generate the target relocation.
+    if (!Extern) {
+        MachOSection &To = *SectionList[TargetSection - 1];
+        TargetAddr = To.addr;
+        TargetIndex = To.Index;
+    }
+
+    OutputBuffer RelocOut(MOS.RelocBuffer, is64Bit, isLittleEndian);
+    OutputBuffer SecOut(MOS.SectionData, is64Bit, isLittleEndian);
+    
+    MOS.nreloc += GetTargetRelocation(MR, MOS.Index, TargetAddr, TargetIndex,
+                                      RelocOut, SecOut, Scattered, Extern);
+  }
+}
+
+// InitMem - Write the value of a Constant to the specified memory location,
+// converting it into bytes and relocations.
+void MachOWriter::InitMem(const Constant *C, void *Addr, intptr_t Offset,
+                          const TargetData *TD, 
+                          std::vector<MachineRelocation> &MRs) {
+  typedef std::pair<const Constant*, intptr_t> CPair;
+  std::vector<CPair> WorkList;
+  
+  WorkList.push_back(CPair(C,(intptr_t)Addr + Offset));
+  
+  intptr_t ScatteredOffset = 0;
+  
+  while (!WorkList.empty()) {
+    const Constant *PC = WorkList.back().first;
+    intptr_t PA = WorkList.back().second;
+    WorkList.pop_back();
+    
+    if (isa<UndefValue>(PC)) {
+      continue;
+    } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(PC)) {
+      unsigned ElementSize = TD->getTypeSize(CP->getType()->getElementType());
+      for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i)
+        WorkList.push_back(CPair(CP->getOperand(i), PA+i*ElementSize));
+    } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(PC)) {
+      //
+      // FIXME: Handle ConstantExpression.  See EE::getConstantValue()
+      //
+      switch (CE->getOpcode()) {
+      case Instruction::GetElementPtr: {
+        SmallVector<Value*, 8> Indices(CE->op_begin()+1, CE->op_end());
+        ScatteredOffset = TD->getIndexedOffset(CE->getOperand(0)->getType(),
+                                               &Indices[0], Indices.size());
+        WorkList.push_back(CPair(CE->getOperand(0), PA));
+        break;
+      }
+      case Instruction::Add:
+      default:
+        cerr << "ConstantExpr not handled as global var init: " << *CE << "\n";
+        abort();
+        break;
+      }
+    } else if (PC->getType()->isFirstClassType()) {
+      unsigned char *ptr = (unsigned char *)PA;
+      switch (PC->getType()->getTypeID()) {
+      case Type::IntegerTyID: {
+        unsigned NumBits = cast<IntegerType>(PC->getType())->getBitWidth();
+        uint64_t val = cast<ConstantInt>(PC)->getZExtValue();
+        if (NumBits <= 8)
+          ptr[0] = val;
+        else if (NumBits <= 16) {
+          if (TD->isBigEndian())
+            val = ByteSwap_16(val);
+          ptr[0] = val;
+          ptr[1] = val >> 8;
+        } else if (NumBits <= 32) {
+          if (TD->isBigEndian())
+            val = ByteSwap_32(val);
+          ptr[0] = val;
+          ptr[1] = val >> 8;
+          ptr[2] = val >> 16;
+          ptr[3] = val >> 24;
+        } else if (NumBits <= 64) {
+          if (TD->isBigEndian())
+            val = ByteSwap_64(val);
+          ptr[0] = val;
+          ptr[1] = val >> 8;
+          ptr[2] = val >> 16;
+          ptr[3] = val >> 24;
+          ptr[4] = val >> 32;
+          ptr[5] = val >> 40;
+          ptr[6] = val >> 48;
+          ptr[7] = val >> 56;
+        } else {
+          assert(0 && "Not implemented: bit widths > 64");
+        }
+        break;
+      }
+      case Type::FloatTyID: {
+        uint64_t val = FloatToBits(cast<ConstantFP>(PC)->getValue());
+        if (TD->isBigEndian())
+          val = ByteSwap_32(val);
+        ptr[0] = val;
+        ptr[1] = val >> 8;
+        ptr[2] = val >> 16;
+        ptr[3] = val >> 24;
+        break;
+      }
+      case Type::DoubleTyID: {
+        uint64_t val = DoubleToBits(cast<ConstantFP>(PC)->getValue());
+        if (TD->isBigEndian())
+          val = ByteSwap_64(val);
+        ptr[0] = val;
+        ptr[1] = val >> 8;
+        ptr[2] = val >> 16;
+        ptr[3] = val >> 24;
+        ptr[4] = val >> 32;
+        ptr[5] = val >> 40;
+        ptr[6] = val >> 48;
+        ptr[7] = val >> 56;
+        break;
+      }
+      case Type::PointerTyID:
+        if (isa<ConstantPointerNull>(PC))
+          memset(ptr, 0, TD->getPointerSize());
+        else if (const GlobalValue* GV = dyn_cast<GlobalValue>(PC)) {
+          // FIXME: what about function stubs?
+          MRs.push_back(MachineRelocation::getGV(PA-(intptr_t)Addr, 
+                                                 MachineRelocation::VANILLA,
+                                                 const_cast<GlobalValue*>(GV),
+                                                 ScatteredOffset));
+          ScatteredOffset = 0;
+        } else
+          assert(0 && "Unknown constant pointer type!");
+        break;
+      default:
+        cerr << "ERROR: Constant unimp for type: " << *PC->getType() << "\n";
+        abort();
+      }
+    } else if (isa<ConstantAggregateZero>(PC)) {
+      memset((void*)PA, 0, (size_t)TD->getTypeSize(PC->getType()));
+    } else if (const ConstantArray *CPA = dyn_cast<ConstantArray>(PC)) {
+      unsigned ElementSize = TD->getTypeSize(CPA->getType()->getElementType());
+      for (unsigned i = 0, e = CPA->getNumOperands(); i != e; ++i)
+        WorkList.push_back(CPair(CPA->getOperand(i), PA+i*ElementSize));
+    } else if (const ConstantStruct *CPS = dyn_cast<ConstantStruct>(PC)) {
+      const StructLayout *SL =
+        TD->getStructLayout(cast<StructType>(CPS->getType()));
+      for (unsigned i = 0, e = CPS->getNumOperands(); i != e; ++i)
+        WorkList.push_back(CPair(CPS->getOperand(i),
+                                 PA+SL->getElementOffset(i)));
+    } else {
+      cerr << "Bad Type: " << *PC->getType() << "\n";
+      assert(0 && "Unknown constant type to initialize memory with!");
+    }
+  }
+}
+
+MachOSym::MachOSym(const GlobalValue *gv, std::string name, uint8_t sect,
+                   TargetMachine &TM) :
+  GV(gv), n_strx(0), n_type(sect == NO_SECT ? N_UNDF : N_SECT), n_sect(sect),
+  n_desc(0), n_value(0) {
+
+  const TargetAsmInfo *TAI = TM.getTargetAsmInfo();  
+  
+  switch (GV->getLinkage()) {
+  default:
+    assert(0 && "Unexpected linkage type!");
+    break;
+  case GlobalValue::WeakLinkage:
+  case GlobalValue::LinkOnceLinkage:
+    assert(!isa<Function>(gv) && "Unexpected linkage type for Function!");
+  case GlobalValue::ExternalLinkage:
+    GVName = TAI->getGlobalPrefix() + name;
+    n_type |= GV->hasHiddenVisibility() ? N_PEXT : N_EXT;
+    break;
+  case GlobalValue::InternalLinkage:
+    GVName = TAI->getGlobalPrefix() + name;
+    break;
+  }
+}
diff --git a/lib/CodeGen/MachOWriter.h b/lib/CodeGen/MachOWriter.h
new file mode 100644
index 0000000..6d88832
--- /dev/null
+++ b/lib/CodeGen/MachOWriter.h
@@ -0,0 +1,627 @@
+//=== MachOWriter.h - Target-independent Mach-O writer support --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Nate Begeman and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MachOWriter class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MACHOWRITER_H
+#define MACHOWRITER_H
+
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRelocation.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetMachOWriterInfo.h"
+
+namespace llvm {
+  class GlobalVariable;
+  class Mangler;
+  class MachineCodeEmitter;
+  class MachOCodeEmitter;
+  class OutputBuffer;
+
+  /// MachOSym - This struct contains information about each symbol that is
+  /// added to logical symbol table for the module.  This is eventually
+  /// turned into a real symbol table in the file.
+  struct MachOSym {
+    const GlobalValue *GV;    // The global value this corresponds to.
+    std::string GVName;       // The mangled name of the global value.
+    uint32_t    n_strx;       // index into the string table
+    uint8_t     n_type;       // type flag
+    uint8_t     n_sect;       // section number or NO_SECT
+    int16_t     n_desc;       // see <mach-o/stab.h>
+    uint64_t    n_value;      // value for this symbol (or stab offset)
+    
+    // Constants for the n_sect field
+    // see <mach-o/nlist.h>
+    enum { NO_SECT = 0 };   // symbol is not in any section
+
+    // Constants for the n_type field
+    // see <mach-o/nlist.h>
+    enum { N_UNDF  = 0x0,  // undefined, n_sect == NO_SECT
+           N_ABS   = 0x2,  // absolute, n_sect == NO_SECT
+           N_SECT  = 0xe,  // defined in section number n_sect
+           N_PBUD  = 0xc,  // prebound undefined (defined in a dylib)
+           N_INDR  = 0xa   // indirect
+    };
+    // The following bits are OR'd into the types above. For example, a type
+    // of 0x0f would be an external N_SECT symbol (0x0e | 0x01).
+    enum { N_EXT  = 0x01,   // external symbol bit
+           N_PEXT = 0x10    // private external symbol bit
+    };
+    
+    // Constants for the n_desc field
+    // see <mach-o/loader.h>
+    enum { REFERENCE_FLAG_UNDEFINED_NON_LAZY          = 0,
+           REFERENCE_FLAG_UNDEFINED_LAZY              = 1,
+           REFERENCE_FLAG_DEFINED                     = 2,
+           REFERENCE_FLAG_PRIVATE_DEFINED             = 3,
+           REFERENCE_FLAG_PRIVATE_UNDEFINED_NON_LAZY  = 4,
+           REFERENCE_FLAG_PRIVATE_UNDEFINED_LAZY      = 5
+    };
+    enum { N_NO_DEAD_STRIP = 0x0020, // symbol is not to be dead stripped
+           N_WEAK_REF      = 0x0040, // symbol is weak referenced
+           N_WEAK_DEF      = 0x0080  // coalesced symbol is a weak definition
+    };
+    
+    MachOSym(const GlobalValue *gv, std::string name, uint8_t sect,
+             TargetMachine &TM);
+  };
+      
+  /// MachOWriter - This class implements the common target-independent code for
+  /// writing Mach-O files.  Targets should derive a class from this to
+  /// parameterize the output format.
+  ///
+  class MachOWriter : public MachineFunctionPass {
+    friend class MachOCodeEmitter;
+  public:
+    static char ID;
+    MachineCodeEmitter &getMachineCodeEmitter() const {
+      return *(MachineCodeEmitter*)MCE;
+    }
+
+    MachOWriter(std::ostream &O, TargetMachine &TM);
+    virtual ~MachOWriter();
+
+    virtual const char *getPassName() const {
+      return "Mach-O Writer";
+    }
+
+    typedef std::vector<unsigned char> DataBuffer;
+  protected:
+    /// Output stream to send the resultant object file to.
+    ///
+    std::ostream &O;
+
+    /// Target machine description.
+    ///
+    TargetMachine &TM;
+
+    /// Mang - The object used to perform name mangling for this module.
+    ///
+    Mangler *Mang;
+    
+    /// MCE - The MachineCodeEmitter object that we are exposing to emit machine
+    /// code for functions to the .o file.
+    MachOCodeEmitter *MCE;
+
+    /// is64Bit/isLittleEndian - This information is inferred from the target
+    /// machine directly, indicating what header values and flags to set.
+    bool is64Bit, isLittleEndian;
+
+    /// doInitialization - Emit the file header and all of the global variables
+    /// for the module to the Mach-O file.
+    bool doInitialization(Module &M);
+
+    bool runOnMachineFunction(MachineFunction &MF);
+
+    /// doFinalization - Now that the module has been completely processed, emit
+    /// the Mach-O file to 'O'.
+    bool doFinalization(Module &M);
+
+    /// MachOHeader - This struct contains the header information about a
+    /// specific architecture type/subtype pair that is emitted to the file.
+    struct MachOHeader {
+      uint32_t  magic;      // mach magic number identifier
+      uint32_t  filetype;   // type of file
+      uint32_t  ncmds;      // number of load commands
+      uint32_t  sizeofcmds; // the size of all the load commands
+      uint32_t  flags;      // flags
+      uint32_t  reserved;   // 64-bit only
+      
+      /// HeaderData - The actual data for the header which we are building
+      /// up for emission to the file.
+      DataBuffer HeaderData;
+
+      // Constants for the filetype field
+      // see <mach-o/loader.h> for additional info on the various types
+      enum { MH_OBJECT     = 1, // relocatable object file
+             MH_EXECUTE    = 2, // demand paged executable file
+             MH_FVMLIB     = 3, // fixed VM shared library file
+             MH_CORE       = 4, // core file
+             MH_PRELOAD    = 5, // preloaded executable file
+             MH_DYLIB      = 6, // dynamically bound shared library
+             MH_DYLINKER   = 7, // dynamic link editor
+             MH_BUNDLE     = 8, // dynamically bound bundle file
+             MH_DYLIB_STUB = 9, // shared library stub for static linking only
+             MH_DSYM       = 10 // companion file wiht only debug sections
+      };
+      
+      // Constants for the flags field
+      enum { MH_NOUNDEFS                = 1 << 0,
+                // the object file has no undefined references
+             MH_INCRLINK                = 1 << 1,
+                // the object file is the output of an incremental link against
+                // a base file and cannot be link edited again
+             MH_DYLDLINK                = 1 << 2,
+                // the object file is input for the dynamic linker and cannot be
+                // statically link edited again.
+             MH_BINDATLOAD              = 1 << 3,
+                // the object file's undefined references are bound by the
+                // dynamic linker when loaded.
+             MH_PREBOUND                = 1 << 4,
+                // the file has its dynamic undefined references prebound
+             MH_SPLIT_SEGS              = 1 << 5,
+                // the file has its read-only and read-write segments split
+                // see <mach/shared_memory_server.h>
+             MH_LAZY_INIT               = 1 << 6,
+                // the shared library init routine is to be run lazily via
+                // catching memory faults to its writable segments (obsolete)
+             MH_TWOLEVEL                = 1 << 7,
+                // the image is using two-level namespace bindings
+             MH_FORCE_FLAT              = 1 << 8,
+                // the executable is forcing all images to use flat namespace
+                // bindings.
+             MH_NOMULTIDEFS             = 1 << 8,
+                // this umbrella guarantees no multiple definitions of symbols
+                // in its sub-images so the two-level namespace hints can
+                // always be used.
+             MH_NOFIXPREBINDING         = 1 << 10,
+                // do not have dyld notify the prebidning agent about this
+                // executable.
+             MH_PREBINDABLE             = 1 << 11,
+                // the binary is not prebound but can have its prebinding
+                // redone.  only used when MH_PREBOUND is not set.
+             MH_ALLMODSBOUND            = 1 << 12,
+                // indicates that this binary binds to all two-level namespace
+                // modules of its dependent libraries.  Only used when
+                // MH_PREBINDABLE and MH_TWOLEVEL are both set.
+             MH_SUBSECTIONS_VIA_SYMBOLS = 1 << 13,
+                // safe to divide up the sections into sub-sections via symbols
+                // for dead code stripping.
+             MH_CANONICAL               = 1 << 14,
+                // the binary has been canonicalized via the unprebind operation
+             MH_WEAK_DEFINES            = 1 << 15,
+                // the final linked image contains external weak symbols
+             MH_BINDS_TO_WEAK           = 1 << 16,
+                // the final linked image uses weak symbols
+             MH_ALLOW_STACK_EXECUTION   = 1 << 17
+                // When this bit is set, all stacks in the task will be given
+                // stack execution privilege.  Only used in MH_EXECUTE filetype
+      };
+
+      MachOHeader() : magic(0), filetype(0), ncmds(0), sizeofcmds(0), flags(0),
+                      reserved(0) { }
+      
+      /// cmdSize - This routine returns the size of the MachOSection as written
+      /// to disk, depending on whether the destination is a 64 bit Mach-O file.
+      unsigned cmdSize(bool is64Bit) const {
+        if (is64Bit)
+          return 8 * sizeof(uint32_t);
+        else
+          return 7 * sizeof(uint32_t);
+      }
+
+      /// setMagic - This routine sets the appropriate value for the 'magic'
+      /// field based on pointer size and endianness.
+      void setMagic(bool isLittleEndian, bool is64Bit) {
+        if (isLittleEndian)
+          if (is64Bit) magic = 0xcffaedfe;
+          else         magic = 0xcefaedfe;
+        else
+          if (is64Bit) magic = 0xfeedfacf;
+          else         magic = 0xfeedface;
+      }
+    };
+    
+    /// Header - An instance of MachOHeader that we will update while we build
+    /// the file, and then emit during finalization.
+    MachOHeader Header;
+    
+    /// MachOSegment - This struct contains the necessary information to
+    /// emit the load commands for each section in the file.
+    struct MachOSegment {
+      uint32_t    cmd;      // LC_SEGMENT or LC_SEGMENT_64
+      uint32_t    cmdsize;  // Total size of this struct and section commands
+      std::string segname;  // segment name
+      uint64_t    vmaddr;   // address of this segment
+      uint64_t    vmsize;   // size of this segment, may be larger than filesize
+      uint64_t    fileoff;  // offset in file
+      uint64_t    filesize; // amount to read from file
+      uint32_t    maxprot;  // maximum VM protection
+      uint32_t    initprot; // initial VM protection
+      uint32_t    nsects;   // number of sections in this segment
+      uint32_t    flags;    // flags
+      
+      // The following constants are getting pulled in by one of the
+      // system headers, which creates a neat clash with the enum.
+#if !defined(VM_PROT_NONE)
+#define VM_PROT_NONE    0x00
+#endif
+#if !defined(VM_PROT_READ)
+#define VM_PROT_READ    0x01
+#endif
+#if !defined(VM_PROT_WRITE)
+#define VM_PROT_WRITE   0x02
+#endif
+#if !defined(VM_PROT_EXECUTE)
+#define VM_PROT_EXECUTE 0x04
+#endif
+#if !defined(VM_PROT_ALL)
+#define VM_PROT_ALL     0x07
+#endif
+
+      // Constants for the vm protection fields
+      // see <mach-o/vm_prot.h>
+      enum { SEG_VM_PROT_NONE     = VM_PROT_NONE, 
+             SEG_VM_PROT_READ     = VM_PROT_READ, // read permission
+             SEG_VM_PROT_WRITE    = VM_PROT_WRITE, // write permission
+             SEG_VM_PROT_EXECUTE  = VM_PROT_EXECUTE,
+             SEG_VM_PROT_ALL      = VM_PROT_ALL
+      };
+      
+      // Constants for the cmd field
+      // see <mach-o/loader.h>
+      enum { LC_SEGMENT    = 0x01,  // segment of this file to be mapped
+             LC_SEGMENT_64 = 0x19   // 64-bit segment of this file to be mapped
+      };
+      
+      /// cmdSize - This routine returns the size of the MachOSection as written
+      /// to disk, depending on whether the destination is a 64 bit Mach-O file.
+      unsigned cmdSize(bool is64Bit) const {
+        if (is64Bit)
+          return 6 * sizeof(uint32_t) + 4 * sizeof(uint64_t) + 16;
+        else
+          return 10 * sizeof(uint32_t) + 16;  // addresses only 32 bits
+      }
+
+      MachOSegment(const std::string &seg, bool is64Bit)
+        : cmd(is64Bit ? LC_SEGMENT_64 : LC_SEGMENT), cmdsize(0), segname(seg),
+          vmaddr(0), vmsize(0), fileoff(0), filesize(0), maxprot(VM_PROT_ALL),
+          initprot(VM_PROT_ALL), nsects(0), flags(0) { }
+    };
+
+    /// MachOSection - This struct contains information about each section in a 
+    /// particular segment that is emitted to the file.  This is eventually
+    /// turned into the SectionCommand in the load command for a particlar
+    /// segment.
+    struct MachOSection { 
+      std::string  sectname; // name of this section, 
+      std::string  segname;  // segment this section goes in
+      uint64_t  addr;        // memory address of this section
+      uint64_t  size;        // size in bytes of this section
+      uint32_t  offset;      // file offset of this section
+      uint32_t  align;       // section alignment (power of 2)
+      uint32_t  reloff;      // file offset of relocation entries
+      uint32_t  nreloc;      // number of relocation entries
+      uint32_t  flags;       // flags (section type and attributes)
+      uint32_t  reserved1;   // reserved (for offset or index)
+      uint32_t  reserved2;   // reserved (for count or sizeof)
+      uint32_t  reserved3;   // reserved (64 bit only)
+      
+      /// A unique number for this section, which will be used to match symbols
+      /// to the correct section.
+      uint32_t Index;
+      
+      /// SectionData - The actual data for this section which we are building
+      /// up for emission to the file.
+      DataBuffer SectionData;
+
+      /// RelocBuffer - A buffer to hold the mach-o relocations before we write
+      /// them out at the appropriate location in the file.
+      DataBuffer RelocBuffer;
+      
+      /// Relocations - The relocations that we have encountered so far in this 
+      /// section that we will need to convert to MachORelocation entries when
+      /// the file is written.
+      std::vector<MachineRelocation> Relocations;
+      
+      // Constants for the section types (low 8 bits of flags field)
+      // see <mach-o/loader.h>
+      enum { S_REGULAR = 0,
+                // regular section
+             S_ZEROFILL = 1,
+                // zero fill on demand section
+             S_CSTRING_LITERALS = 2,
+                // section with only literal C strings
+             S_4BYTE_LITERALS = 3,
+                // section with only 4 byte literals
+             S_8BYTE_LITERALS = 4,
+                // section with only 8 byte literals
+             S_LITERAL_POINTERS = 5, 
+                // section with only pointers to literals
+             S_NON_LAZY_SYMBOL_POINTERS = 6,
+                // section with only non-lazy symbol pointers
+             S_LAZY_SYMBOL_POINTERS = 7,
+                // section with only lazy symbol pointers
+             S_SYMBOL_STUBS = 8,
+                // section with only symbol stubs
+                // byte size of stub in the reserved2 field
+             S_MOD_INIT_FUNC_POINTERS = 9,
+                // section with only function pointers for initialization
+             S_MOD_TERM_FUNC_POINTERS = 10,
+                // section with only function pointers for termination
+             S_COALESCED = 11,
+                // section contains symbols that are coalesced
+             S_GB_ZEROFILL = 12,
+                // zero fill on demand section (that can be larger than 4GB)
+             S_INTERPOSING = 13,
+                // section with only pairs of function pointers for interposing
+             S_16BYTE_LITERALS = 14
+                // section with only 16 byte literals
+      };
+      
+      // Constants for the section flags (high 24 bits of flags field)
+      // see <mach-o/loader.h>
+      enum { S_ATTR_PURE_INSTRUCTIONS   = 1 << 31,
+                // section contains only true machine instructions
+             S_ATTR_NO_TOC              = 1 << 30,
+                // section contains coalesced symbols that are not to be in a 
+                // ranlib table of contents
+             S_ATTR_STRIP_STATIC_SYMS   = 1 << 29,
+                // ok to strip static symbols in this section in files with the
+                // MY_DYLDLINK flag
+             S_ATTR_NO_DEAD_STRIP       = 1 << 28,
+                // no dead stripping
+             S_ATTR_LIVE_SUPPORT        = 1 << 27,
+                // blocks are live if they reference live blocks
+             S_ATTR_SELF_MODIFYING_CODE = 1 << 26,
+                // used with i386 code stubs written on by dyld
+             S_ATTR_DEBUG               = 1 << 25,
+                // a debug section
+             S_ATTR_SOME_INSTRUCTIONS   = 1 << 10,
+                // section contains some machine instructions
+             S_ATTR_EXT_RELOC           = 1 << 9,
+                // section has external relocation entries
+             S_ATTR_LOC_RELOC           = 1 << 8
+                // section has local relocation entries
+      };
+
+      /// cmdSize - This routine returns the size of the MachOSection as written
+      /// to disk, depending on whether the destination is a 64 bit Mach-O file.
+      unsigned cmdSize(bool is64Bit) const {
+        if (is64Bit)
+          return 7 * sizeof(uint32_t) + 2 * sizeof(uint64_t) + 32;
+        else
+          return 9 * sizeof(uint32_t) + 32;  // addresses only 32 bits
+      }
+
+      MachOSection(const std::string &seg, const std::string &sect)
+        : sectname(sect), segname(seg), addr(0), size(0), offset(0), align(2),
+          reloff(0), nreloc(0), flags(0), reserved1(0), reserved2(0),
+          reserved3(0) { }
+    };
+
+  private:
+
+    /// SectionList - This is the list of sections that we have emitted to the
+    /// file.  Once the file has been completely built, the segment load command
+    /// SectionCommands are constructed from this info.
+    std::vector<MachOSection*> SectionList;
+
+    /// SectionLookup - This is a mapping from section name to SectionList entry
+    std::map<std::string, MachOSection*> SectionLookup;
+    
+    /// GVSection - This is a mapping from a GlobalValue to a MachOSection,
+    /// to aid in emitting relocations.
+    std::map<GlobalValue*, MachOSection*> GVSection;
+
+    /// GVOffset - This is a mapping from a GlobalValue to an offset from the 
+    /// start of the section in which the GV resides, to aid in emitting
+    /// relocations.
+    std::map<GlobalValue*, intptr_t> GVOffset;
+
+    /// getSection - Return the section with the specified name, creating a new
+    /// section if one does not already exist.
+    MachOSection *getSection(const std::string &seg, const std::string &sect,
+                             unsigned Flags = 0) {
+      MachOSection *MOS = SectionLookup[seg+sect];
+      if (MOS) return MOS;
+
+      MOS = new MachOSection(seg, sect);
+      SectionList.push_back(MOS);
+      MOS->Index = SectionList.size();
+      MOS->flags = MachOSection::S_REGULAR | Flags;
+      SectionLookup[seg+sect] = MOS;
+      return MOS;
+    }
+    MachOSection *getTextSection(bool isCode = true) {
+      if (isCode)
+        return getSection("__TEXT", "__text", 
+                          MachOSection::S_ATTR_PURE_INSTRUCTIONS |
+                          MachOSection::S_ATTR_SOME_INSTRUCTIONS);
+      else
+        return getSection("__TEXT", "__text");
+    }
+    MachOSection *getBSSSection() {
+      return getSection("__DATA", "__bss", MachOSection::S_ZEROFILL);
+    }
+    MachOSection *getDataSection() {
+      return getSection("__DATA", "__data");
+    }
+    MachOSection *getConstSection(Constant *C) {
+      const ConstantArray *CVA = dyn_cast<ConstantArray>(C);
+      if (CVA && CVA->isCString())
+        return getSection("__TEXT", "__cstring", 
+                          MachOSection::S_CSTRING_LITERALS);
+      
+      const Type *Ty = C->getType();
+      if (Ty->isPrimitiveType() || Ty->isInteger()) {
+        unsigned Size = TM.getTargetData()->getTypeSize(Ty);
+        switch(Size) {
+        default: break; // Fall through to __TEXT,__const
+        case 4:
+          return getSection("__TEXT", "__literal4",
+                            MachOSection::S_4BYTE_LITERALS);
+        case 8:
+          return getSection("__TEXT", "__literal8",
+                            MachOSection::S_8BYTE_LITERALS);
+        case 16:
+          return getSection("__TEXT", "__literal16",
+                            MachOSection::S_16BYTE_LITERALS);
+        }
+      }
+      return getSection("__TEXT", "__const");
+    }
+    MachOSection *getJumpTableSection() {
+      if (TM.getRelocationModel() == Reloc::PIC_)
+        return getTextSection(false);
+      else
+        return getSection("__TEXT", "__const");
+    }
+    
+    /// MachOSymTab - This struct contains information about the offsets and 
+    /// size of symbol table information.
+    /// segment.
+    struct MachOSymTab {
+      uint32_t cmd;     // LC_SYMTAB
+      uint32_t cmdsize; // sizeof( MachOSymTab )
+      uint32_t symoff;  // symbol table offset
+      uint32_t nsyms;   // number of symbol table entries
+      uint32_t stroff;  // string table offset
+      uint32_t strsize; // string table size in bytes
+
+      // Constants for the cmd field
+      // see <mach-o/loader.h>
+      enum { LC_SYMTAB = 0x02  // link-edit stab symbol table info
+      };
+      
+      MachOSymTab() : cmd(LC_SYMTAB), cmdsize(6 * sizeof(uint32_t)), symoff(0),
+        nsyms(0), stroff(0), strsize(0) { }
+    };
+    
+    /// MachOSymTab - This struct contains information about the offsets and 
+    /// size of symbol table information.
+    /// segment.
+    struct MachODySymTab {
+      uint32_t cmd;             // LC_DYSYMTAB
+      uint32_t cmdsize;         // sizeof( MachODySymTab )
+      uint32_t ilocalsym;       // index to local symbols
+      uint32_t nlocalsym;       // number of local symbols
+      uint32_t iextdefsym;      // index to externally defined symbols
+      uint32_t nextdefsym;      // number of externally defined symbols
+      uint32_t iundefsym;       // index to undefined symbols
+      uint32_t nundefsym;       // number of undefined symbols
+      uint32_t tocoff;          // file offset to table of contents
+      uint32_t ntoc;            // number of entries in table of contents
+      uint32_t modtaboff;       // file offset to module table
+      uint32_t nmodtab;         // number of module table entries
+      uint32_t extrefsymoff;    // offset to referenced symbol table
+      uint32_t nextrefsyms;     // number of referenced symbol table entries
+      uint32_t indirectsymoff;  // file offset to the indirect symbol table
+      uint32_t nindirectsyms;   // number of indirect symbol table entries
+      uint32_t extreloff;       // offset to external relocation entries
+      uint32_t nextrel;         // number of external relocation entries
+      uint32_t locreloff;       // offset to local relocation entries
+      uint32_t nlocrel;         // number of local relocation entries
+
+      // Constants for the cmd field
+      // see <mach-o/loader.h>
+      enum { LC_DYSYMTAB = 0x0B  // dynamic link-edit symbol table info
+      };
+      
+      MachODySymTab() : cmd(LC_DYSYMTAB), cmdsize(20 * sizeof(uint32_t)),
+        ilocalsym(0), nlocalsym(0), iextdefsym(0), nextdefsym(0),
+        iundefsym(0), nundefsym(0), tocoff(0), ntoc(0), modtaboff(0),
+        nmodtab(0), extrefsymoff(0), nextrefsyms(0), indirectsymoff(0),
+        nindirectsyms(0), extreloff(0), nextrel(0), locreloff(0), nlocrel(0) { }
+    };
+    
+    /// SymTab - The "stab" style symbol table information
+    MachOSymTab   SymTab;     
+    /// DySymTab - symbol table info for the dynamic link editor
+    MachODySymTab DySymTab;
+
+    struct MachOSymCmp {
+      // FIXME: this does not appear to be sorting 'f' after 'F'
+      bool operator()(const MachOSym &LHS, const MachOSym &RHS) {
+        return LHS.GVName < RHS.GVName;
+      }
+    };
+
+    /// PartitionByLocal - Simple boolean predicate that returns true if Sym is
+    /// a local symbol rather than an external symbol.
+    static bool PartitionByLocal(const MachOSym &Sym);
+
+    /// PartitionByDefined - Simple boolean predicate that returns true if Sym 
+    /// is defined in this module.
+    static bool PartitionByDefined(const MachOSym &Sym);
+
+  protected:
+  
+    /// SymbolTable - This is the list of symbols we have emitted to the file.
+    /// This actually gets rearranged before emission to the file (to put the
+    /// local symbols first in the list).
+    std::vector<MachOSym> SymbolTable;
+    
+    /// SymT - A buffer to hold the symbol table before we write it out at the
+    /// appropriate location in the file.
+    DataBuffer SymT;
+    
+    /// StrT - A buffer to hold the string table before we write it out at the
+    /// appropriate location in the file.
+    DataBuffer StrT;
+    
+    /// PendingSyms - This is a list of externally defined symbols that we have
+    /// been asked to emit, but have not seen a reference to.  When a reference
+    /// is seen, the symbol will move from this list to the SymbolTable.
+    std::vector<GlobalValue*> PendingGlobals;
+    
+    /// DynamicSymbolTable - This is just a vector of indices into
+    /// SymbolTable to aid in emitting the DYSYMTAB load command.
+    std::vector<unsigned> DynamicSymbolTable;
+    
+    static void InitMem(const Constant *C, void *Addr, intptr_t Offset,
+                        const TargetData *TD, 
+                        std::vector<MachineRelocation> &MRs);
+
+  private:
+    void AddSymbolToSection(MachOSection *MOS, GlobalVariable *GV);
+    void EmitGlobal(GlobalVariable *GV);
+    void EmitHeaderAndLoadCommands();
+    void EmitSections();
+    void BufferSymbolAndStringTable();
+    void CalculateRelocations(MachOSection &MOS);
+
+    MachineRelocation GetJTRelocation(unsigned Offset,
+                                      MachineBasicBlock *MBB) const {
+      return TM.getMachOWriterInfo()->GetJTRelocation(Offset, MBB);
+    }
+
+    /// GetTargetRelocation - Returns the number of relocations.
+    unsigned GetTargetRelocation(MachineRelocation &MR,
+                                 unsigned FromIdx,
+                                 unsigned ToAddr,
+                                 unsigned ToIndex,
+                                 OutputBuffer &RelocOut,
+                                 OutputBuffer &SecOut,
+                                 bool Scattered,
+                                 bool Extern) {
+      return TM.getMachOWriterInfo()->GetTargetRelocation(MR, FromIdx, ToAddr,
+                                                          ToIndex, RelocOut,
+                                                          SecOut, Scattered,
+                                                          Extern);
+    }
+  };
+}
+
+#endif
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
new file mode 100644
index 0000000..ba428c5
--- /dev/null
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -0,0 +1,287 @@
+//===-- llvm/CodeGen/MachineBasicBlock.cpp ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Collect the sequence of machine instructions for a basic block.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Target/MRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/LeakDetector.h"
+#include <algorithm>
+using namespace llvm;
+
+MachineBasicBlock::~MachineBasicBlock() {
+  LeakDetector::removeGarbageObject(this);
+}
+
+std::ostream& llvm::operator<<(std::ostream &OS, const MachineBasicBlock &MBB) {
+  MBB.print(OS);
+  return OS;
+}
+
+// MBBs start out as #-1. When a MBB is added to a MachineFunction, it
+// gets the next available unique MBB number. If it is removed from a
+// MachineFunction, it goes back to being #-1.
+void ilist_traits<MachineBasicBlock>::addNodeToList(MachineBasicBlock* N) {
+  assert(N->Parent == 0 && "machine instruction already in a basic block");
+  N->Parent = Parent;
+  N->Number = Parent->addToMBBNumbering(N);
+  LeakDetector::removeGarbageObject(N);
+}
+
+void ilist_traits<MachineBasicBlock>::removeNodeFromList(MachineBasicBlock* N) {
+  assert(N->Parent != 0 && "machine instruction not in a basic block");
+  N->Parent->removeFromMBBNumbering(N->Number);
+  N->Number = -1;
+  N->Parent = 0;
+  LeakDetector::addGarbageObject(N);
+}
+
+
+MachineInstr* ilist_traits<MachineInstr>::createSentinel() {
+  MachineInstr* dummy = new MachineInstr();
+  LeakDetector::removeGarbageObject(dummy);
+  return dummy;
+}
+
+void ilist_traits<MachineInstr>::addNodeToList(MachineInstr* N) {
+  assert(N->parent == 0 && "machine instruction already in a basic block");
+  N->parent = parent;
+  LeakDetector::removeGarbageObject(N);
+}
+
+void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr* N) {
+  assert(N->parent != 0 && "machine instruction not in a basic block");
+  N->parent = 0;
+  LeakDetector::addGarbageObject(N);
+}
+
+void ilist_traits<MachineInstr>::transferNodesFromList(
+  iplist<MachineInstr, ilist_traits<MachineInstr> >& fromList,
+  ilist_iterator<MachineInstr> first,
+  ilist_iterator<MachineInstr> last) {
+  if (parent != fromList.parent)
+    for (; first != last; ++first)
+      first->parent = parent;
+}
+
+MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() {
+  const TargetInstrInfo& TII = *getParent()->getTarget().getInstrInfo();
+  iterator I = end();
+  while (I != begin() && TII.isTerminatorInstr((--I)->getOpcode()));
+  if (I != end() && !TII.isTerminatorInstr(I->getOpcode())) ++I;
+  return I;
+}
+
+void MachineBasicBlock::dump() const {
+  print(*cerr.stream());
+}
+
+static inline void OutputReg(std::ostream &os, unsigned RegNo,
+                             const MRegisterInfo *MRI = 0) {
+  if (!RegNo || MRegisterInfo::isPhysicalRegister(RegNo)) {
+    if (MRI)
+      os << " %" << MRI->get(RegNo).Name;
+    else
+      os << " %mreg(" << RegNo << ")";
+  } else
+    os << " %reg" << RegNo;
+}
+
+void MachineBasicBlock::print(std::ostream &OS) const {
+  const MachineFunction *MF = getParent();
+  if(!MF) {
+    OS << "Can't print out MachineBasicBlock because parent MachineFunction"
+       << " is null\n";
+    return;
+  }
+
+  const BasicBlock *LBB = getBasicBlock();
+  OS << "\n";
+  if (LBB) OS << LBB->getName() << ": ";
+  OS << (const void*)this
+     << ", LLVM BB @" << (const void*) LBB << ", ID#" << getNumber();
+  if (isLandingPad()) OS << ", EH LANDING PAD";
+  OS << ":\n";
+
+  const MRegisterInfo *MRI = MF->getTarget().getRegisterInfo();  
+  if (livein_begin() != livein_end()) {
+    OS << "Live Ins:";
+    for (const_livein_iterator I = livein_begin(),E = livein_end(); I != E; ++I)
+      OutputReg(OS, *I, MRI);
+    OS << "\n";
+  }
+  // Print the preds of this block according to the CFG.
+  if (!pred_empty()) {
+    OS << "    Predecessors according to CFG:";
+    for (const_pred_iterator PI = pred_begin(), E = pred_end(); PI != E; ++PI)
+      OS << " " << *PI << " (#" << (*PI)->getNumber() << ")";
+    OS << "\n";
+  }
+  
+  for (const_iterator I = begin(); I != end(); ++I) {
+    OS << "\t";
+    I->print(OS, &getParent()->getTarget());
+  }
+
+  // Print the successors of this block according to the CFG.
+  if (!succ_empty()) {
+    OS << "    Successors according to CFG:";
+    for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI)
+      OS << " " << *SI << " (#" << (*SI)->getNumber() << ")";
+    OS << "\n";
+  }
+}
+
+void MachineBasicBlock::removeLiveIn(unsigned Reg) {
+  livein_iterator I = std::find(livein_begin(), livein_end(), Reg);
+  assert(I != livein_end() && "Not a live in!");
+  LiveIns.erase(I);
+}
+
+void MachineBasicBlock::moveBefore(MachineBasicBlock *NewAfter) {
+  MachineFunction::BasicBlockListType &BBList =getParent()->getBasicBlockList();
+  getParent()->getBasicBlockList().splice(NewAfter, BBList, this);
+}
+
+void MachineBasicBlock::moveAfter(MachineBasicBlock *NewBefore) {
+  MachineFunction::BasicBlockListType &BBList =getParent()->getBasicBlockList();
+  MachineFunction::iterator BBI = NewBefore;
+  getParent()->getBasicBlockList().splice(++BBI, BBList, this);
+}
+
+
+void MachineBasicBlock::addSuccessor(MachineBasicBlock *succ) {
+  Successors.push_back(succ);
+  succ->addPredecessor(this);
+}
+
+void MachineBasicBlock::removeSuccessor(MachineBasicBlock *succ) {
+  succ->removePredecessor(this);
+  succ_iterator I = std::find(Successors.begin(), Successors.end(), succ);
+  assert(I != Successors.end() && "Not a current successor!");
+  Successors.erase(I);
+}
+
+MachineBasicBlock::succ_iterator MachineBasicBlock::removeSuccessor(succ_iterator I) {
+  assert(I != Successors.end() && "Not a current successor!");
+  (*I)->removePredecessor(this);
+  return(Successors.erase(I));
+}
+
+void MachineBasicBlock::addPredecessor(MachineBasicBlock *pred) {
+  Predecessors.push_back(pred);
+}
+
+void MachineBasicBlock::removePredecessor(MachineBasicBlock *pred) {
+  std::vector<MachineBasicBlock *>::iterator I =
+    std::find(Predecessors.begin(), Predecessors.end(), pred);
+  assert(I != Predecessors.end() && "Pred is not a predecessor of this block!");
+  Predecessors.erase(I);
+}
+
+bool MachineBasicBlock::isSuccessor(MachineBasicBlock *MBB) const {
+  std::vector<MachineBasicBlock *>::const_iterator I =
+    std::find(Successors.begin(), Successors.end(), MBB);
+  return I != Successors.end();
+}
+
+/// ReplaceUsesOfBlockWith - Given a machine basic block that branched to
+/// 'Old', change the code and CFG so that it branches to 'New' instead.
+void MachineBasicBlock::ReplaceUsesOfBlockWith(MachineBasicBlock *Old,
+                                               MachineBasicBlock *New) {
+  assert(Old != New && "Cannot replace self with self!");
+
+  MachineBasicBlock::iterator I = end();
+  while (I != begin()) {
+    --I;
+    if (!(I->getInstrDescriptor()->Flags & M_TERMINATOR_FLAG)) break;
+
+    // Scan the operands of this machine instruction, replacing any uses of Old
+    // with New.
+    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+      if (I->getOperand(i).isMachineBasicBlock() &&
+          I->getOperand(i).getMachineBasicBlock() == Old)
+        I->getOperand(i).setMachineBasicBlock(New);
+  }
+
+  // Update the successor information.  If New was already a successor, just
+  // remove the link to Old instead of creating another one.  PR 1444.
+  removeSuccessor(Old);
+  if (!isSuccessor(New))
+    addSuccessor(New);
+}
+
+/// CorrectExtraCFGEdges - Various pieces of code can cause excess edges in the
+/// CFG to be inserted.  If we have proven that MBB can only branch to DestA and
+/// DestB, remove any other MBB successors from the CFG.  DestA and DestB can
+/// be null.
+/// Besides DestA and DestB, retain other edges leading to LandingPads (currently
+/// there can be only one; we don't check or require that here).
+/// Note it is possible that DestA and/or DestB are LandingPads.
+bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA,
+                                             MachineBasicBlock *DestB,
+                                             bool isCond) {
+  bool MadeChange = false;
+  bool AddedFallThrough = false;
+
+  MachineBasicBlock *FallThru = getNext();
+  
+  // If this block ends with a conditional branch that falls through to its
+  // successor, set DestB as the successor.
+  if (isCond) {
+    if (DestB == 0 && FallThru != getParent()->end()) {
+      DestB = FallThru;
+      AddedFallThrough = true;
+    }
+  } else {
+    // If this is an unconditional branch with no explicit dest, it must just be
+    // a fallthrough into DestB.
+    if (DestA == 0 && FallThru != getParent()->end()) {
+      DestA = FallThru;
+      AddedFallThrough = true;
+    }
+  }
+  
+  MachineBasicBlock::succ_iterator SI = succ_begin();
+  MachineBasicBlock *OrigDestA = DestA, *OrigDestB = DestB;
+  while (SI != succ_end()) {
+    if (*SI == DestA && DestA == DestB) {
+      DestA = DestB = 0;
+      ++SI;
+    } else if (*SI == DestA) {
+      DestA = 0;
+      ++SI;
+    } else if (*SI == DestB) {
+      DestB = 0;
+      ++SI;
+    } else if ((*SI)->isLandingPad() && 
+               *SI!=OrigDestA && *SI!=OrigDestB) {
+      ++SI;
+    } else {
+      // Otherwise, this is a superfluous edge, remove it.
+      SI = removeSuccessor(SI);
+      MadeChange = true;
+    }
+  }
+  if (!AddedFallThrough) {
+    assert(DestA == 0 && DestB == 0 &&
+           "MachineCFG is missing edges!");
+  } else if (isCond) {
+    assert(DestA == 0 && "MachineCFG is missing edges!");
+  }
+  return MadeChange;
+}
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
new file mode 100644
index 0000000..c762ae5
--- /dev/null
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -0,0 +1,483 @@
+//===-- MachineFunction.cpp -----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Collect native machine code information for a function.  This allows
+// target-specific information about the generated code to be stored with each
+// function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/SSARegMap.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/LeakDetector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Config/config.h"
+#include <fstream>
+#include <sstream>
+using namespace llvm;
+
+static AnnotationID MF_AID(
+  AnnotationManager::getID("CodeGen::MachineCodeForFunction"));
+
+// Out of line virtual function to home classes.
+void MachineFunctionPass::virtfn() {}
+
+namespace {
+  struct VISIBILITY_HIDDEN Printer : public MachineFunctionPass {
+    static char ID;
+
+    std::ostream *OS;
+    const std::string Banner;
+
+    Printer (std::ostream *_OS, const std::string &_Banner) 
+      : MachineFunctionPass((intptr_t)&ID), OS (_OS), Banner (_Banner) { }
+
+    const char *getPassName() const { return "MachineFunction Printer"; }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+    }
+
+    bool runOnMachineFunction(MachineFunction &MF) {
+      (*OS) << Banner;
+      MF.print (*OS);
+      return false;
+    }
+  };
+  char Printer::ID = 0;
+}
+
+/// Returns a newly-created MachineFunction Printer pass. The default output
+/// stream is std::cerr; the default banner is empty.
+///
+FunctionPass *llvm::createMachineFunctionPrinterPass(std::ostream *OS,
+                                                     const std::string &Banner){
+  return new Printer(OS, Banner);
+}
+
+namespace {
+  struct VISIBILITY_HIDDEN Deleter : public MachineFunctionPass {
+    static char ID;
+    Deleter() : MachineFunctionPass((intptr_t)&ID) {}
+
+    const char *getPassName() const { return "Machine Code Deleter"; }
+
+    bool runOnMachineFunction(MachineFunction &MF) {
+      // Delete the annotation from the function now.
+      MachineFunction::destruct(MF.getFunction());
+      return true;
+    }
+  };
+  char Deleter::ID = 0;
+}
+
+/// MachineCodeDeletion Pass - This pass deletes all of the machine code for
+/// the current function, which should happen after the function has been
+/// emitted to a .s file or to memory.
+FunctionPass *llvm::createMachineCodeDeleter() {
+  return new Deleter();
+}
+
+
+
+//===---------------------------------------------------------------------===//
+// MachineFunction implementation
+//===---------------------------------------------------------------------===//
+
+MachineBasicBlock* ilist_traits<MachineBasicBlock>::createSentinel() {
+  MachineBasicBlock* dummy = new MachineBasicBlock();
+  LeakDetector::removeGarbageObject(dummy);
+  return dummy;
+}
+
+void ilist_traits<MachineBasicBlock>::transferNodesFromList(
+  iplist<MachineBasicBlock, ilist_traits<MachineBasicBlock> >& toList,
+  ilist_iterator<MachineBasicBlock> first,
+  ilist_iterator<MachineBasicBlock> last) {
+  if (Parent != toList.Parent)
+    for (; first != last; ++first)
+      first->Parent = toList.Parent;
+}
+
+MachineFunction::MachineFunction(const Function *F,
+                                 const TargetMachine &TM)
+  : Annotation(MF_AID), Fn(F), Target(TM) {
+  SSARegMapping = new SSARegMap();
+  MFInfo = 0;
+  FrameInfo = new MachineFrameInfo();
+  ConstantPool = new MachineConstantPool(TM.getTargetData());
+  UsedPhysRegs.resize(TM.getRegisterInfo()->getNumRegs());
+  
+  // Set up jump table.
+  const TargetData &TD = *TM.getTargetData();
+  bool IsPic = TM.getRelocationModel() == Reloc::PIC_;
+  unsigned EntrySize = IsPic ? 4 : TD.getPointerSize();
+  unsigned Alignment = IsPic ? TD.getABITypeAlignment(Type::Int32Ty)
+                             : TD.getPointerABIAlignment();
+  JumpTableInfo = new MachineJumpTableInfo(EntrySize, Alignment);
+  
+  BasicBlocks.Parent = this;
+}
+
+MachineFunction::~MachineFunction() {
+  BasicBlocks.clear();
+  delete SSARegMapping;
+  delete MFInfo;
+  delete FrameInfo;
+  delete ConstantPool;
+  delete JumpTableInfo;
+}
+
+
+/// RenumberBlocks - This discards all of the MachineBasicBlock numbers and
+/// recomputes them.  This guarantees that the MBB numbers are sequential,
+/// dense, and match the ordering of the blocks within the function.  If a
+/// specific MachineBasicBlock is specified, only that block and those after
+/// it are renumbered.
+void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) {
+  if (empty()) { MBBNumbering.clear(); return; }
+  MachineFunction::iterator MBBI, E = end();
+  if (MBB == 0)
+    MBBI = begin();
+  else
+    MBBI = MBB;
+  
+  // Figure out the block number this should have.
+  unsigned BlockNo = 0;
+  if (MBBI != begin())
+    BlockNo = prior(MBBI)->getNumber()+1;
+  
+  for (; MBBI != E; ++MBBI, ++BlockNo) {
+    if (MBBI->getNumber() != (int)BlockNo) {
+      // Remove use of the old number.
+      if (MBBI->getNumber() != -1) {
+        assert(MBBNumbering[MBBI->getNumber()] == &*MBBI &&
+               "MBB number mismatch!");
+        MBBNumbering[MBBI->getNumber()] = 0;
+      }
+      
+      // If BlockNo is already taken, set that block's number to -1.
+      if (MBBNumbering[BlockNo])
+        MBBNumbering[BlockNo]->setNumber(-1);
+
+      MBBNumbering[BlockNo] = MBBI;
+      MBBI->setNumber(BlockNo);
+    }
+  }    
+
+  // Okay, all the blocks are renumbered.  If we have compactified the block
+  // numbering, shrink MBBNumbering now.
+  assert(BlockNo <= MBBNumbering.size() && "Mismatch!");
+  MBBNumbering.resize(BlockNo);
+}
+
+
+void MachineFunction::dump() const { print(*cerr.stream()); }
+
+void MachineFunction::print(std::ostream &OS) const {
+  OS << "# Machine code for " << Fn->getName () << "():\n";
+
+  // Print Frame Information
+  getFrameInfo()->print(*this, OS);
+  
+  // Print JumpTable Information
+  getJumpTableInfo()->print(OS);
+
+  // Print Constant Pool
+  getConstantPool()->print(OS);
+  
+  const MRegisterInfo *MRI = getTarget().getRegisterInfo();
+  
+  if (livein_begin() != livein_end()) {
+    OS << "Live Ins:";
+    for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I) {
+      if (MRI)
+        OS << " " << MRI->getName(I->first);
+      else
+        OS << " Reg #" << I->first;
+      
+      if (I->second)
+        OS << " in VR#" << I->second << " ";
+    }
+    OS << "\n";
+  }
+  if (liveout_begin() != liveout_end()) {
+    OS << "Live Outs:";
+    for (liveout_iterator I = liveout_begin(), E = liveout_end(); I != E; ++I)
+      if (MRI)
+        OS << " " << MRI->getName(*I);
+      else
+        OS << " Reg #" << *I;
+    OS << "\n";
+  }
+  
+  for (const_iterator BB = begin(); BB != end(); ++BB)
+    BB->print(OS);
+
+  OS << "\n# End machine code for " << Fn->getName () << "().\n\n";
+}
+
+/// CFGOnly flag - This is used to control whether or not the CFG graph printer
+/// prints out the contents of basic blocks or not.  This is acceptable because
+/// this code is only really used for debugging purposes.
+///
+static bool CFGOnly = false;
+
+namespace llvm {
+  template<>
+  struct DOTGraphTraits<const MachineFunction*> : public DefaultDOTGraphTraits {
+    static std::string getGraphName(const MachineFunction *F) {
+      return "CFG for '" + F->getFunction()->getName() + "' function";
+    }
+
+    static std::string getNodeLabel(const MachineBasicBlock *Node,
+                                    const MachineFunction *Graph) {
+      if (CFGOnly && Node->getBasicBlock() &&
+          !Node->getBasicBlock()->getName().empty())
+        return Node->getBasicBlock()->getName() + ":";
+
+      std::ostringstream Out;
+      if (CFGOnly) {
+        Out << Node->getNumber() << ':';
+        return Out.str();
+      }
+
+      Node->print(Out);
+
+      std::string OutStr = Out.str();
+      if (OutStr[0] == '\n') OutStr.erase(OutStr.begin());
+
+      // Process string output to make it nicer...
+      for (unsigned i = 0; i != OutStr.length(); ++i)
+        if (OutStr[i] == '\n') {                            // Left justify
+          OutStr[i] = '\\';
+          OutStr.insert(OutStr.begin()+i+1, 'l');
+        }
+      return OutStr;
+    }
+  };
+}
+
+void MachineFunction::viewCFG() const
+{
+#ifndef NDEBUG
+  ViewGraph(this, "mf" + getFunction()->getName());
+#else
+  cerr << "SelectionDAG::viewGraph is only available in debug builds on "
+       << "systems with Graphviz or gv!\n";
+#endif // NDEBUG
+}
+
+void MachineFunction::viewCFGOnly() const
+{
+  CFGOnly = true;
+  viewCFG();
+  CFGOnly = false;
+}
+
+// The next two methods are used to construct and to retrieve
+// the MachineCodeForFunction object for the given function.
+// construct() -- Allocates and initializes for a given function and target
+// get()       -- Returns a handle to the object.
+//                This should not be called before "construct()"
+//                for a given Function.
+//
+MachineFunction&
+MachineFunction::construct(const Function *Fn, const TargetMachine &Tar)
+{
+  assert(Fn->getAnnotation(MF_AID) == 0 &&
+         "Object already exists for this function!");
+  MachineFunction* mcInfo = new MachineFunction(Fn, Tar);
+  Fn->addAnnotation(mcInfo);
+  return *mcInfo;
+}
+
+void MachineFunction::destruct(const Function *Fn) {
+  bool Deleted = Fn->deleteAnnotation(MF_AID);
+  assert(Deleted && "Machine code did not exist for function!");
+}
+
+MachineFunction& MachineFunction::get(const Function *F)
+{
+  MachineFunction *mc = (MachineFunction*)F->getAnnotation(MF_AID);
+  assert(mc && "Call construct() method first to allocate the object");
+  return *mc;
+}
+
+void MachineFunction::clearSSARegMap() {
+  delete SSARegMapping;
+  SSARegMapping = 0;
+}
+
+//===----------------------------------------------------------------------===//
+//  MachineFrameInfo implementation
+//===----------------------------------------------------------------------===//
+
+void MachineFrameInfo::print(const MachineFunction &MF, std::ostream &OS) const{
+  int ValOffset = MF.getTarget().getFrameInfo()->getOffsetOfLocalArea();
+
+  for (unsigned i = 0, e = Objects.size(); i != e; ++i) {
+    const StackObject &SO = Objects[i];
+    OS << "  <fi #" << (int)(i-NumFixedObjects) << ">: ";
+    if (SO.Size == 0)
+      OS << "variable sized";
+    else
+      OS << "size is " << SO.Size << " byte" << (SO.Size != 1 ? "s," : ",");
+    OS << " alignment is " << SO.Alignment << " byte"
+       << (SO.Alignment != 1 ? "s," : ",");
+
+    if (i < NumFixedObjects)
+      OS << " fixed";
+    if (i < NumFixedObjects || SO.SPOffset != -1) {
+      int64_t Off = SO.SPOffset - ValOffset;
+      OS << " at location [SP";
+      if (Off > 0)
+        OS << "+" << Off;
+      else if (Off < 0)
+        OS << Off;
+      OS << "]";
+    }
+    OS << "\n";
+  }
+
+  if (HasVarSizedObjects)
+    OS << "  Stack frame contains variable sized objects\n";
+}
+
+void MachineFrameInfo::dump(const MachineFunction &MF) const {
+  print(MF, *cerr.stream());
+}
+
+
+//===----------------------------------------------------------------------===//
+//  MachineJumpTableInfo implementation
+//===----------------------------------------------------------------------===//
+
+/// getJumpTableIndex - Create a new jump table entry in the jump table info
+/// or return an existing one.
+///
+unsigned MachineJumpTableInfo::getJumpTableIndex(
+                               const std::vector<MachineBasicBlock*> &DestBBs) {
+  assert(!DestBBs.empty() && "Cannot create an empty jump table!");
+  for (unsigned i = 0, e = JumpTables.size(); i != e; ++i)
+    if (JumpTables[i].MBBs == DestBBs)
+      return i;
+  
+  JumpTables.push_back(MachineJumpTableEntry(DestBBs));
+  return JumpTables.size()-1;
+}
+
+
+void MachineJumpTableInfo::print(std::ostream &OS) const {
+  // FIXME: this is lame, maybe we could print out the MBB numbers or something
+  // like {1, 2, 4, 5, 3, 0}
+  for (unsigned i = 0, e = JumpTables.size(); i != e; ++i) {
+    OS << "  <jt #" << i << "> has " << JumpTables[i].MBBs.size() 
+       << " entries\n";
+  }
+}
+
+void MachineJumpTableInfo::dump() const { print(*cerr.stream()); }
+
+
+//===----------------------------------------------------------------------===//
+//  MachineConstantPool implementation
+//===----------------------------------------------------------------------===//
+
+const Type *MachineConstantPoolEntry::getType() const {
+  if (isMachineConstantPoolEntry())
+      return Val.MachineCPVal->getType();
+  return Val.ConstVal->getType();
+}
+
+MachineConstantPool::~MachineConstantPool() {
+  for (unsigned i = 0, e = Constants.size(); i != e; ++i)
+    if (Constants[i].isMachineConstantPoolEntry())
+      delete Constants[i].Val.MachineCPVal;
+}
+
+/// getConstantPoolIndex - Create a new entry in the constant pool or return
+/// an existing one.  User must specify an alignment in bytes for the object.
+///
+unsigned MachineConstantPool::getConstantPoolIndex(Constant *C, 
+                                                   unsigned Alignment) {
+  assert(Alignment && "Alignment must be specified!");
+  if (Alignment > PoolAlignment) PoolAlignment = Alignment;
+  
+  // Check to see if we already have this constant.
+  //
+  // FIXME, this could be made much more efficient for large constant pools.
+  unsigned AlignMask = (1 << Alignment)-1;
+  for (unsigned i = 0, e = Constants.size(); i != e; ++i)
+    if (Constants[i].Val.ConstVal == C && (Constants[i].Offset & AlignMask)== 0)
+      return i;
+  
+  unsigned Offset = 0;
+  if (!Constants.empty()) {
+    Offset = Constants.back().getOffset();
+    Offset += TD->getTypeSize(Constants.back().getType());
+    Offset = (Offset+AlignMask)&~AlignMask;
+  }
+  
+  Constants.push_back(MachineConstantPoolEntry(C, Offset));
+  return Constants.size()-1;
+}
+
+unsigned MachineConstantPool::getConstantPoolIndex(MachineConstantPoolValue *V,
+                                                   unsigned Alignment) {
+  assert(Alignment && "Alignment must be specified!");
+  if (Alignment > PoolAlignment) PoolAlignment = Alignment;
+  
+  // Check to see if we already have this constant.
+  //
+  // FIXME, this could be made much more efficient for large constant pools.
+  unsigned AlignMask = (1 << Alignment)-1;
+  int Idx = V->getExistingMachineCPValue(this, Alignment);
+  if (Idx != -1)
+    return (unsigned)Idx;
+  
+  unsigned Offset = 0;
+  if (!Constants.empty()) {
+    Offset = Constants.back().getOffset();
+    Offset += TD->getTypeSize(Constants.back().getType());
+    Offset = (Offset+AlignMask)&~AlignMask;
+  }
+  
+  Constants.push_back(MachineConstantPoolEntry(V, Offset));
+  return Constants.size()-1;
+}
+
+
+void MachineConstantPool::print(std::ostream &OS) const {
+  for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
+    OS << "  <cp #" << i << "> is";
+    if (Constants[i].isMachineConstantPoolEntry())
+      Constants[i].Val.MachineCPVal->print(OS);
+    else
+      OS << *(Value*)Constants[i].Val.ConstVal;
+    OS << " , offset=" << Constants[i].getOffset();
+    OS << "\n";
+  }
+}
+
+void MachineConstantPool::dump() const { print(*cerr.stream()); }
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
new file mode 100644
index 0000000..c5e2ba8
--- /dev/null
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -0,0 +1,426 @@
+//===-- MachineInstr.cpp --------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Methods common to all machine instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/MRegisterInfo.h"
+#include "llvm/Support/LeakDetector.h"
+#include "llvm/Support/Streams.h"
+#include <ostream>
+using namespace llvm;
+
+/// MachineInstr ctor - This constructor creates a dummy MachineInstr with
+/// TID NULL and no operands.
+MachineInstr::MachineInstr()
+  : TID(0), NumImplicitOps(0), parent(0) {
+  // Make sure that we get added to a machine basicblock
+  LeakDetector::addGarbageObject(this);
+}
+
+void MachineInstr::addImplicitDefUseOperands() {
+  if (TID->ImplicitDefs)
+    for (const unsigned *ImpDefs = TID->ImplicitDefs; *ImpDefs; ++ImpDefs) {
+      MachineOperand Op;
+      Op.opType = MachineOperand::MO_Register;
+      Op.IsDef = true;
+      Op.IsImp = true;
+      Op.IsKill = false;
+      Op.IsDead = false;
+      Op.contents.RegNo = *ImpDefs;
+      Op.auxInfo.subReg = 0;
+      Operands.push_back(Op);
+    }
+  if (TID->ImplicitUses)
+    for (const unsigned *ImpUses = TID->ImplicitUses; *ImpUses; ++ImpUses) {
+      MachineOperand Op;
+      Op.opType = MachineOperand::MO_Register;
+      Op.IsDef = false;
+      Op.IsImp = true;
+      Op.IsKill = false;
+      Op.IsDead = false;
+      Op.contents.RegNo = *ImpUses;
+      Op.auxInfo.subReg = 0;
+      Operands.push_back(Op);
+    }
+}
+
+/// MachineInstr ctor - This constructor create a MachineInstr and add the
+/// implicit operands. It reserves space for number of operands specified by
+/// TargetInstrDescriptor or the numOperands if it is not zero. (for
+/// instructions with variable number of operands).
+MachineInstr::MachineInstr(const TargetInstrDescriptor &tid)
+  : TID(&tid), NumImplicitOps(0), parent(0) {
+  if (TID->ImplicitDefs)
+    for (const unsigned *ImpDefs = TID->ImplicitDefs; *ImpDefs; ++ImpDefs)
+      NumImplicitOps++;
+  if (TID->ImplicitUses)
+    for (const unsigned *ImpUses = TID->ImplicitUses; *ImpUses; ++ImpUses)
+      NumImplicitOps++;
+  Operands.reserve(NumImplicitOps + TID->numOperands);
+  addImplicitDefUseOperands();
+  // Make sure that we get added to a machine basicblock
+  LeakDetector::addGarbageObject(this);
+}
+
+/// MachineInstr ctor - Work exactly the same as the ctor above, except that the
+/// MachineInstr is created and added to the end of the specified basic block.
+///
+MachineInstr::MachineInstr(MachineBasicBlock *MBB,
+                           const TargetInstrDescriptor &tid)
+  : TID(&tid), NumImplicitOps(0), parent(0) {
+  assert(MBB && "Cannot use inserting ctor with null basic block!");
+  if (TID->ImplicitDefs)
+    for (const unsigned *ImpDefs = TID->ImplicitDefs; *ImpDefs; ++ImpDefs)
+      NumImplicitOps++;
+  if (TID->ImplicitUses)
+    for (const unsigned *ImpUses = TID->ImplicitUses; *ImpUses; ++ImpUses)
+      NumImplicitOps++;
+  Operands.reserve(NumImplicitOps + TID->numOperands);
+  addImplicitDefUseOperands();
+  // Make sure that we get added to a machine basicblock
+  LeakDetector::addGarbageObject(this);
+  MBB->push_back(this);  // Add instruction to end of basic block!
+}
+
+/// MachineInstr ctor - Copies MachineInstr arg exactly
+///
+MachineInstr::MachineInstr(const MachineInstr &MI) {
+  TID = MI.getInstrDescriptor();
+  NumImplicitOps = MI.NumImplicitOps;
+  Operands.reserve(MI.getNumOperands());
+
+  // Add operands
+  for (unsigned i = 0; i != MI.getNumOperands(); ++i)
+    Operands.push_back(MI.getOperand(i));
+
+  // Set parent, next, and prev to null
+  parent = 0;
+  prev = 0;
+  next = 0;
+}
+
+
+MachineInstr::~MachineInstr() {
+  LeakDetector::removeGarbageObject(this);
+}
+
+/// getOpcode - Returns the opcode of this MachineInstr.
+///
+const int MachineInstr::getOpcode() const {
+  return TID->Opcode;
+}
+
+/// removeFromParent - This method unlinks 'this' from the containing basic
+/// block, and returns it, but does not delete it.
+MachineInstr *MachineInstr::removeFromParent() {
+  assert(getParent() && "Not embedded in a basic block!");
+  getParent()->remove(this);
+  return this;
+}
+
+
+/// OperandComplete - Return true if it's illegal to add a new operand
+///
+bool MachineInstr::OperandsComplete() const {
+  unsigned short NumOperands = TID->numOperands;
+  if ((TID->Flags & M_VARIABLE_OPS) == 0 &&
+      getNumOperands()-NumImplicitOps >= NumOperands)
+    return true;  // Broken: we have all the operands of this instruction!
+  return false;
+}
+
+/// getNumExplicitOperands - Returns the number of non-implicit operands.
+///
+unsigned MachineInstr::getNumExplicitOperands() const {
+  unsigned NumOperands = TID->numOperands;
+  if ((TID->Flags & M_VARIABLE_OPS) == 0)
+    return NumOperands;
+
+  for (unsigned e = getNumOperands(); NumOperands != e; ++NumOperands) {
+    const MachineOperand &MO = getOperand(NumOperands);
+    if (!MO.isRegister() || !MO.isImplicit())
+      NumOperands++;
+  }
+  return NumOperands;
+}
+
+/// isIdenticalTo - Return true if this operand is identical to the specified
+/// operand.
+bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
+  if (getType() != Other.getType()) return false;
+  
+  switch (getType()) {
+  default: assert(0 && "Unrecognized operand type");
+  case MachineOperand::MO_Register:
+    return getReg() == Other.getReg() && isDef() == Other.isDef();
+  case MachineOperand::MO_Immediate:
+    return getImm() == Other.getImm();
+  case MachineOperand::MO_MachineBasicBlock:
+    return getMBB() == Other.getMBB();
+  case MachineOperand::MO_FrameIndex:
+    return getFrameIndex() == Other.getFrameIndex();
+  case MachineOperand::MO_ConstantPoolIndex:
+    return getConstantPoolIndex() == Other.getConstantPoolIndex() &&
+           getOffset() == Other.getOffset();
+  case MachineOperand::MO_JumpTableIndex:
+    return getJumpTableIndex() == Other.getJumpTableIndex();
+  case MachineOperand::MO_GlobalAddress:
+    return getGlobal() == Other.getGlobal() && getOffset() == Other.getOffset();
+  case MachineOperand::MO_ExternalSymbol:
+    return !strcmp(getSymbolName(), Other.getSymbolName()) &&
+           getOffset() == Other.getOffset();
+  }
+}
+
+/// findRegisterUseOperandIdx() - Returns the MachineOperand that is a use of
+/// the specific register or -1 if it is not found. It further tightening
+/// the search criteria to a use that kills the register if isKill is true.
+int MachineInstr::findRegisterUseOperandIdx(unsigned Reg, bool isKill) const {
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = getOperand(i);
+    if (MO.isReg() && MO.isUse() && MO.getReg() == Reg)
+      if (!isKill || MO.isKill())
+        return i;
+  }
+  return -1;
+}
+  
+/// findRegisterDefOperand() - Returns the MachineOperand that is a def of
+/// the specific register or NULL if it is not found.
+MachineOperand *MachineInstr::findRegisterDefOperand(unsigned Reg) {
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = getOperand(i);
+    if (MO.isReg() && MO.isDef() && MO.getReg() == Reg)
+      return &MO;
+  }
+  return NULL;
+}
+
+/// findFirstPredOperandIdx() - Find the index of the first operand in the
+/// operand list that is used to represent the predicate. It returns -1 if
+/// none is found.
+int MachineInstr::findFirstPredOperandIdx() const {
+  const TargetInstrDescriptor *TID = getInstrDescriptor();
+  if (TID->Flags & M_PREDICABLE) {
+    for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+      if ((TID->OpInfo[i].Flags & M_PREDICATE_OPERAND))
+        return i;
+  }
+
+  return -1;
+}
+  
+/// copyKillDeadInfo - Copies kill / dead operand properties from MI.
+///
+void MachineInstr::copyKillDeadInfo(const MachineInstr *MI) {
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || (!MO.isKill() && !MO.isDead()))
+      continue;
+    for (unsigned j = 0, ee = getNumOperands(); j != ee; ++j) {
+      MachineOperand &MOp = getOperand(j);
+      if (!MOp.isIdenticalTo(MO))
+        continue;
+      if (MO.isKill())
+        MOp.setIsKill();
+      else
+        MOp.setIsDead();
+      break;
+    }
+  }
+}
+
+/// copyPredicates - Copies predicate operand(s) from MI.
+void MachineInstr::copyPredicates(const MachineInstr *MI) {
+  const TargetInstrDescriptor *TID = MI->getInstrDescriptor();
+  if (TID->Flags & M_PREDICABLE) {
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      if ((TID->OpInfo[i].Flags & M_PREDICATE_OPERAND)) {
+        const MachineOperand &MO = MI->getOperand(i);
+        // Predicated operands must be last operands.
+        if (MO.isReg())
+          addRegOperand(MO.getReg(), false);
+        else {
+          addImmOperand(MO.getImm());
+        }
+      }
+    }
+  }
+}
+
+void MachineInstr::dump() const {
+  cerr << "  " << *this;
+}
+
+static inline void OutputReg(std::ostream &os, unsigned RegNo,
+                             const MRegisterInfo *MRI = 0) {
+  if (!RegNo || MRegisterInfo::isPhysicalRegister(RegNo)) {
+    if (MRI)
+      os << "%" << MRI->get(RegNo).Name;
+    else
+      os << "%mreg(" << RegNo << ")";
+  } else
+    os << "%reg" << RegNo;
+}
+
+static void print(const MachineOperand &MO, std::ostream &OS,
+                  const TargetMachine *TM) {
+  const MRegisterInfo *MRI = 0;
+
+  if (TM) MRI = TM->getRegisterInfo();
+
+  switch (MO.getType()) {
+  case MachineOperand::MO_Register:
+    OutputReg(OS, MO.getReg(), MRI);
+    break;
+  case MachineOperand::MO_Immediate:
+    OS << MO.getImmedValue();
+    break;
+  case MachineOperand::MO_MachineBasicBlock:
+    OS << "mbb<"
+       << ((Value*)MO.getMachineBasicBlock()->getBasicBlock())->getName()
+       << "," << (void*)MO.getMachineBasicBlock() << ">";
+    break;
+  case MachineOperand::MO_FrameIndex:
+    OS << "<fi#" << MO.getFrameIndex() << ">";
+    break;
+  case MachineOperand::MO_ConstantPoolIndex:
+    OS << "<cp#" << MO.getConstantPoolIndex() << ">";
+    break;
+  case MachineOperand::MO_JumpTableIndex:
+    OS << "<jt#" << MO.getJumpTableIndex() << ">";
+    break;
+  case MachineOperand::MO_GlobalAddress:
+    OS << "<ga:" << ((Value*)MO.getGlobal())->getName();
+    if (MO.getOffset()) OS << "+" << MO.getOffset();
+    OS << ">";
+    break;
+  case MachineOperand::MO_ExternalSymbol:
+    OS << "<es:" << MO.getSymbolName();
+    if (MO.getOffset()) OS << "+" << MO.getOffset();
+    OS << ">";
+    break;
+  default:
+    assert(0 && "Unrecognized operand type");
+  }
+}
+
+void MachineInstr::print(std::ostream &OS, const TargetMachine *TM) const {
+  unsigned StartOp = 0;
+
+   // Specialize printing if op#0 is definition
+  if (getNumOperands() && getOperand(0).isReg() && getOperand(0).isDef()) {
+    ::print(getOperand(0), OS, TM);
+    if (getOperand(0).isDead())
+      OS << "<dead>";
+    OS << " = ";
+    ++StartOp;   // Don't print this operand again!
+  }
+
+  if (TID)
+    OS << TID->Name;
+
+  for (unsigned i = StartOp, e = getNumOperands(); i != e; ++i) {
+    const MachineOperand& mop = getOperand(i);
+    if (i != StartOp)
+      OS << ",";
+    OS << " ";
+    ::print(mop, OS, TM);
+
+    if (mop.isReg()) {
+      if (mop.isDef() || mop.isKill() || mop.isDead() || mop.isImplicit()) {
+        OS << "<";
+        bool NeedComma = false;
+        if (mop.isImplicit()) {
+          OS << (mop.isDef() ? "imp-def" : "imp-use");
+          NeedComma = true;
+        } else if (mop.isDef()) {
+          OS << "def";
+          NeedComma = true;
+        }
+        if (mop.isKill() || mop.isDead()) {
+          if (NeedComma)
+            OS << ",";
+          if (mop.isKill())
+            OS << "kill";
+          if (mop.isDead())
+            OS << "dead";
+        }
+        OS << ">";
+      }
+    }
+  }
+
+  OS << "\n";
+}
+
+void MachineInstr::print(std::ostream &os) const {
+  // If the instruction is embedded into a basic block, we can find the target
+  // info for the instruction.
+  if (const MachineBasicBlock *MBB = getParent()) {
+    const MachineFunction *MF = MBB->getParent();
+    if (MF)
+      print(os, &MF->getTarget());
+    else
+      print(os, 0);
+  }
+
+  // Otherwise, print it out in the "raw" format without symbolic register names
+  // and such.
+  os << getInstrDescriptor()->Name;
+
+  for (unsigned i = 0, N = getNumOperands(); i < N; i++) {
+    os << "\t" << getOperand(i);
+    if (getOperand(i).isReg() && getOperand(i).isDef())
+      os << "<d>";
+  }
+
+  os << "\n";
+}
+
+void MachineOperand::print(std::ostream &OS) const {
+  switch (getType()) {
+  case MO_Register:
+    OutputReg(OS, getReg());
+    break;
+  case MO_Immediate:
+    OS << (long)getImmedValue();
+    break;
+  case MO_MachineBasicBlock:
+    OS << "<mbb:"
+       << ((Value*)getMachineBasicBlock()->getBasicBlock())->getName()
+       << "@" << (void*)getMachineBasicBlock() << ">";
+    break;
+  case MO_FrameIndex:
+    OS << "<fi#" << getFrameIndex() << ">";
+    break;
+  case MO_ConstantPoolIndex:
+    OS << "<cp#" << getConstantPoolIndex() << ">";
+    break;
+  case MO_JumpTableIndex:
+    OS << "<jt#" << getJumpTableIndex() << ">";
+    break;
+  case MO_GlobalAddress:
+    OS << "<ga:" << ((Value*)getGlobal())->getName() << ">";
+    break;
+  case MO_ExternalSymbol:
+    OS << "<es:" << getSymbolName() << ">";
+    break;
+  default:
+    assert(0 && "Unrecognized operand type");
+    break;
+  }
+}
+
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
new file mode 100644
index 0000000..77fb643
--- /dev/null
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -0,0 +1,1905 @@
+//===-- llvm/CodeGen/MachineModuleInfo.cpp ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by James M. Laskey and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineModuleInfo.h"
+
+#include "llvm/Constants.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/Streams.h"
+using namespace llvm;
+using namespace llvm::dwarf;
+
+// Handle the Pass registration stuff necessary to use TargetData's.
+namespace {
+  RegisterPass<MachineModuleInfo> X("machinemoduleinfo", "Module Information");
+}
+char MachineModuleInfo::ID = 0;
+
+//===----------------------------------------------------------------------===//
+
+/// getGlobalVariablesUsing - Return all of the GlobalVariables which have the
+/// specified value in their initializer somewhere.
+static void
+getGlobalVariablesUsing(Value *V, std::vector<GlobalVariable*> &Result) {
+  // Scan though value users.
+  for (Value::use_iterator I = V->use_begin(), E = V->use_end(); I != E; ++I) {
+    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(*I)) {
+      // If the user is a GlobalVariable then add to result.
+      Result.push_back(GV);
+    } else if (Constant *C = dyn_cast<Constant>(*I)) {
+      // If the user is a constant variable then scan its users
+      getGlobalVariablesUsing(C, Result);
+    }
+  }
+}
+
+/// getGlobalVariablesUsing - Return all of the GlobalVariables that use the
+/// named GlobalVariable.
+static std::vector<GlobalVariable*>
+getGlobalVariablesUsing(Module &M, const std::string &RootName) {
+  std::vector<GlobalVariable*> Result;  // GlobalVariables matching criteria.
+  
+  std::vector<const Type*> FieldTypes;
+  FieldTypes.push_back(Type::Int32Ty);
+  FieldTypes.push_back(Type::Int32Ty);
+
+  // Get the GlobalVariable root.
+  GlobalVariable *UseRoot = M.getGlobalVariable(RootName,
+                                                StructType::get(FieldTypes));
+
+  // If present and linkonce then scan for users.
+  if (UseRoot && UseRoot->hasLinkOnceLinkage()) {
+    getGlobalVariablesUsing(UseRoot, Result);
+  }
+  
+  return Result;
+}
+  
+/// isStringValue - Return true if the given value can be coerced to a string.
+///
+static bool isStringValue(Value *V) {
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
+    if (GV->hasInitializer() && isa<ConstantArray>(GV->getInitializer())) {
+      ConstantArray *Init = cast<ConstantArray>(GV->getInitializer());
+      return Init->isString();
+    }
+  } else if (Constant *C = dyn_cast<Constant>(V)) {
+    if (GlobalValue *GV = dyn_cast<GlobalValue>(C))
+      return isStringValue(GV);
+    else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+      if (CE->getOpcode() == Instruction::GetElementPtr) {
+        if (CE->getNumOperands() == 3 &&
+            cast<Constant>(CE->getOperand(1))->isNullValue() &&
+            isa<ConstantInt>(CE->getOperand(2))) {
+          return isStringValue(CE->getOperand(0));
+        }
+      }
+    }
+  }
+  return false;
+}
+
+/// getGlobalVariable - Return either a direct or cast Global value.
+///
+static GlobalVariable *getGlobalVariable(Value *V) {
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
+    return GV;
+  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+    if (CE->getOpcode() == Instruction::BitCast) {
+      return dyn_cast<GlobalVariable>(CE->getOperand(0));
+    }
+  }
+  return NULL;
+}
+
+/// isGlobalVariable - Return true if the given value can be coerced to a
+/// GlobalVariable.
+static bool isGlobalVariable(Value *V) {
+  if (isa<GlobalVariable>(V) || isa<ConstantPointerNull>(V)) {
+    return true;
+  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+    if (CE->getOpcode() == Instruction::BitCast) {
+      return isa<GlobalVariable>(CE->getOperand(0));
+    }
+  }
+  return false;
+}
+
+/// getUIntOperand - Return ith operand if it is an unsigned integer.
+///
+static ConstantInt *getUIntOperand(GlobalVariable *GV, unsigned i) {
+  // Make sure the GlobalVariable has an initializer.
+  if (!GV->hasInitializer()) return NULL;
+  
+  // Get the initializer constant.
+  ConstantStruct *CI = dyn_cast<ConstantStruct>(GV->getInitializer());
+  if (!CI) return NULL;
+  
+  // Check if there is at least i + 1 operands.
+  unsigned N = CI->getNumOperands();
+  if (i >= N) return NULL;
+
+  // Check constant.
+  return dyn_cast<ConstantInt>(CI->getOperand(i));
+}
+
+//===----------------------------------------------------------------------===//
+
+/// ApplyToFields - Target the visitor to each field of the debug information
+/// descriptor.
+void DIVisitor::ApplyToFields(DebugInfoDesc *DD) {
+  DD->ApplyToFields(this);
+}
+
+//===----------------------------------------------------------------------===//
+/// DICountVisitor - This DIVisitor counts all the fields in the supplied debug
+/// the supplied DebugInfoDesc.
+class DICountVisitor : public DIVisitor {
+private:
+  unsigned Count;                       // Running count of fields.
+  
+public:
+  DICountVisitor() : DIVisitor(), Count(0) {}
+  
+  // Accessors.
+  unsigned getCount() const { return Count; }
+  
+  /// Apply - Count each of the fields.
+  ///
+  virtual void Apply(int &Field)             { ++Count; }
+  virtual void Apply(unsigned &Field)        { ++Count; }
+  virtual void Apply(int64_t &Field)         { ++Count; }
+  virtual void Apply(uint64_t &Field)        { ++Count; }
+  virtual void Apply(bool &Field)            { ++Count; }
+  virtual void Apply(std::string &Field)     { ++Count; }
+  virtual void Apply(DebugInfoDesc *&Field)  { ++Count; }
+  virtual void Apply(GlobalVariable *&Field) { ++Count; }
+  virtual void Apply(std::vector<DebugInfoDesc *> &Field) {
+    ++Count;
+  }
+};
+
+//===----------------------------------------------------------------------===//
+/// DIDeserializeVisitor - This DIVisitor deserializes all the fields in the
+/// supplied DebugInfoDesc.
+class DIDeserializeVisitor : public DIVisitor {
+private:
+  DIDeserializer &DR;                   // Active deserializer.
+  unsigned I;                           // Current operand index.
+  ConstantStruct *CI;                   // GlobalVariable constant initializer.
+
+public:
+  DIDeserializeVisitor(DIDeserializer &D, GlobalVariable *GV)
+  : DIVisitor()
+  , DR(D)
+  , I(0)
+  , CI(cast<ConstantStruct>(GV->getInitializer()))
+  {}
+  
+  /// Apply - Set the value of each of the fields.
+  ///
+  virtual void Apply(int &Field) {
+    Constant *C = CI->getOperand(I++);
+    Field = cast<ConstantInt>(C)->getSExtValue();
+  }
+  virtual void Apply(unsigned &Field) {
+    Constant *C = CI->getOperand(I++);
+    Field = cast<ConstantInt>(C)->getZExtValue();
+  }
+  virtual void Apply(int64_t &Field) {
+    Constant *C = CI->getOperand(I++);
+    Field = cast<ConstantInt>(C)->getSExtValue();
+  }
+  virtual void Apply(uint64_t &Field) {
+    Constant *C = CI->getOperand(I++);
+    Field = cast<ConstantInt>(C)->getZExtValue();
+  }
+  virtual void Apply(bool &Field) {
+    Constant *C = CI->getOperand(I++);
+    Field = cast<ConstantInt>(C)->getZExtValue();
+  }
+  virtual void Apply(std::string &Field) {
+    Constant *C = CI->getOperand(I++);
+    Field = C->getStringValue();
+  }
+  virtual void Apply(DebugInfoDesc *&Field) {
+    Constant *C = CI->getOperand(I++);
+    Field = DR.Deserialize(C);
+  }
+  virtual void Apply(GlobalVariable *&Field) {
+    Constant *C = CI->getOperand(I++);
+    Field = getGlobalVariable(C);
+  }
+  virtual void Apply(std::vector<DebugInfoDesc *> &Field) {
+    Field.resize(0);
+    Constant *C = CI->getOperand(I++);
+    GlobalVariable *GV = getGlobalVariable(C);
+    if (GV->hasInitializer()) {
+      if (ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer())) {
+        for (unsigned i = 0, N = CA->getNumOperands(); i < N; ++i) {
+          GlobalVariable *GVE = getGlobalVariable(CA->getOperand(i));
+          DebugInfoDesc *DE = DR.Deserialize(GVE);
+          Field.push_back(DE);
+        }
+      } else if (GV->getInitializer()->isNullValue()) {
+        if (const ArrayType *T =
+            dyn_cast<ArrayType>(GV->getType()->getElementType())) {
+          Field.resize(T->getNumElements());
+        }
+      }
+    }
+  }
+};
+
+//===----------------------------------------------------------------------===//
+/// DISerializeVisitor - This DIVisitor serializes all the fields in
+/// the supplied DebugInfoDesc.
+class DISerializeVisitor : public DIVisitor {
+private:
+  DISerializer &SR;                     // Active serializer.
+  std::vector<Constant*> &Elements;     // Element accumulator.
+  
+public:
+  DISerializeVisitor(DISerializer &S, std::vector<Constant*> &E)
+  : DIVisitor()
+  , SR(S)
+  , Elements(E)
+  {}
+  
+  /// Apply - Set the value of each of the fields.
+  ///
+  virtual void Apply(int &Field) {
+    Elements.push_back(ConstantInt::get(Type::Int32Ty, int32_t(Field)));
+  }
+  virtual void Apply(unsigned &Field) {
+    Elements.push_back(ConstantInt::get(Type::Int32Ty, uint32_t(Field)));
+  }
+  virtual void Apply(int64_t &Field) {
+    Elements.push_back(ConstantInt::get(Type::Int64Ty, int64_t(Field)));
+  }
+  virtual void Apply(uint64_t &Field) {
+    Elements.push_back(ConstantInt::get(Type::Int64Ty, uint64_t(Field)));
+  }
+  virtual void Apply(bool &Field) {
+    Elements.push_back(ConstantInt::get(Type::Int1Ty, Field));
+  }
+  virtual void Apply(std::string &Field) {
+      Elements.push_back(SR.getString(Field));
+  }
+  virtual void Apply(DebugInfoDesc *&Field) {
+    GlobalVariable *GV = NULL;
+    
+    // If non-NULL then convert to global.
+    if (Field) GV = SR.Serialize(Field);
+    
+    // FIXME - At some point should use specific type.
+    const PointerType *EmptyTy = SR.getEmptyStructPtrType();
+    
+    if (GV) {
+      // Set to pointer to global.
+      Elements.push_back(ConstantExpr::getBitCast(GV, EmptyTy));
+    } else {
+      // Use NULL.
+      Elements.push_back(ConstantPointerNull::get(EmptyTy));
+    }
+  }
+  virtual void Apply(GlobalVariable *&Field) {
+    const PointerType *EmptyTy = SR.getEmptyStructPtrType();
+    if (Field) {
+      Elements.push_back(ConstantExpr::getBitCast(Field, EmptyTy));
+    } else {
+      Elements.push_back(ConstantPointerNull::get(EmptyTy));
+    }
+  }
+  virtual void Apply(std::vector<DebugInfoDesc *> &Field) {
+    const PointerType *EmptyTy = SR.getEmptyStructPtrType();
+    unsigned N = Field.size();
+    ArrayType *AT = ArrayType::get(EmptyTy, N);
+    std::vector<Constant *> ArrayElements;
+
+    for (unsigned i = 0, N = Field.size(); i < N; ++i) {
+      if (DebugInfoDesc *Element = Field[i]) {
+        GlobalVariable *GVE = SR.Serialize(Element);
+        Constant *CE = ConstantExpr::getBitCast(GVE, EmptyTy);
+        ArrayElements.push_back(cast<Constant>(CE));
+      } else {
+        ArrayElements.push_back(ConstantPointerNull::get(EmptyTy));
+      }
+    }
+    
+    Constant *CA = ConstantArray::get(AT, ArrayElements);
+    GlobalVariable *CAGV = new GlobalVariable(AT, true,
+                                              GlobalValue::InternalLinkage,
+                                              CA, "llvm.dbg.array",
+                                              SR.getModule());
+    CAGV->setSection("llvm.metadata");
+    Constant *CAE = ConstantExpr::getBitCast(CAGV, EmptyTy);
+    Elements.push_back(CAE);
+  }
+};
+
+//===----------------------------------------------------------------------===//
+/// DIGetTypesVisitor - This DIVisitor gathers all the field types in
+/// the supplied DebugInfoDesc.
+class DIGetTypesVisitor : public DIVisitor {
+private:
+  DISerializer &SR;                     // Active serializer.
+  std::vector<const Type*> &Fields;     // Type accumulator.
+  
+public:
+  DIGetTypesVisitor(DISerializer &S, std::vector<const Type*> &F)
+  : DIVisitor()
+  , SR(S)
+  , Fields(F)
+  {}
+  
+  /// Apply - Set the value of each of the fields.
+  ///
+  virtual void Apply(int &Field) {
+    Fields.push_back(Type::Int32Ty);
+  }
+  virtual void Apply(unsigned &Field) {
+    Fields.push_back(Type::Int32Ty);
+  }
+  virtual void Apply(int64_t &Field) {
+    Fields.push_back(Type::Int64Ty);
+  }
+  virtual void Apply(uint64_t &Field) {
+    Fields.push_back(Type::Int64Ty);
+  }
+  virtual void Apply(bool &Field) {
+    Fields.push_back(Type::Int1Ty);
+  }
+  virtual void Apply(std::string &Field) {
+    Fields.push_back(SR.getStrPtrType());
+  }
+  virtual void Apply(DebugInfoDesc *&Field) {
+    // FIXME - At some point should use specific type.
+    const PointerType *EmptyTy = SR.getEmptyStructPtrType();
+    Fields.push_back(EmptyTy);
+  }
+  virtual void Apply(GlobalVariable *&Field) {
+    const PointerType *EmptyTy = SR.getEmptyStructPtrType();
+    Fields.push_back(EmptyTy);
+  }
+  virtual void Apply(std::vector<DebugInfoDesc *> &Field) {
+    const PointerType *EmptyTy = SR.getEmptyStructPtrType();
+    Fields.push_back(EmptyTy);
+  }
+};
+
+//===----------------------------------------------------------------------===//
+/// DIVerifyVisitor - This DIVisitor verifies all the field types against
+/// a constant initializer.
+class DIVerifyVisitor : public DIVisitor {
+private:
+  DIVerifier &VR;                       // Active verifier.
+  bool IsValid;                         // Validity status.
+  unsigned I;                           // Current operand index.
+  ConstantStruct *CI;                   // GlobalVariable constant initializer.
+  
+public:
+  DIVerifyVisitor(DIVerifier &V, GlobalVariable *GV)
+  : DIVisitor()
+  , VR(V)
+  , IsValid(true)
+  , I(0)
+  , CI(cast<ConstantStruct>(GV->getInitializer()))
+  {
+  }
+  
+  // Accessors.
+  bool isValid() const { return IsValid; }
+  
+  /// Apply - Set the value of each of the fields.
+  ///
+  virtual void Apply(int &Field) {
+    Constant *C = CI->getOperand(I++);
+    IsValid = IsValid && isa<ConstantInt>(C);
+  }
+  virtual void Apply(unsigned &Field) {
+    Constant *C = CI->getOperand(I++);
+    IsValid = IsValid && isa<ConstantInt>(C);
+  }
+  virtual void Apply(int64_t &Field) {
+    Constant *C = CI->getOperand(I++);
+    IsValid = IsValid && isa<ConstantInt>(C);
+  }
+  virtual void Apply(uint64_t &Field) {
+    Constant *C = CI->getOperand(I++);
+    IsValid = IsValid && isa<ConstantInt>(C);
+  }
+  virtual void Apply(bool &Field) {
+    Constant *C = CI->getOperand(I++);
+    IsValid = IsValid && isa<ConstantInt>(C) && C->getType() == Type::Int1Ty;
+  }
+  virtual void Apply(std::string &Field) {
+    Constant *C = CI->getOperand(I++);
+    IsValid = IsValid &&
+              (!C || isStringValue(C) || C->isNullValue());
+  }
+  virtual void Apply(DebugInfoDesc *&Field) {
+    // FIXME - Prepare the correct descriptor.
+    Constant *C = CI->getOperand(I++);
+    IsValid = IsValid && isGlobalVariable(C);
+  }
+  virtual void Apply(GlobalVariable *&Field) {
+    Constant *C = CI->getOperand(I++);
+    IsValid = IsValid && isGlobalVariable(C);
+  }
+  virtual void Apply(std::vector<DebugInfoDesc *> &Field) {
+    Constant *C = CI->getOperand(I++);
+    IsValid = IsValid && isGlobalVariable(C);
+    if (!IsValid) return;
+
+    GlobalVariable *GV = getGlobalVariable(C);
+    IsValid = IsValid && GV && GV->hasInitializer();
+    if (!IsValid) return;
+    
+    ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer());
+    IsValid = IsValid && CA;
+    if (!IsValid) return;
+
+    for (unsigned i = 0, N = CA->getNumOperands(); IsValid && i < N; ++i) {
+      IsValid = IsValid && isGlobalVariable(CA->getOperand(i));
+      if (!IsValid) return;
+    
+      GlobalVariable *GVE = getGlobalVariable(CA->getOperand(i));
+      VR.Verify(GVE);
+    }
+  }
+};
+
+
+//===----------------------------------------------------------------------===//
+
+/// TagFromGlobal - Returns the tag number from a debug info descriptor
+/// GlobalVariable.   Return DIIValid if operand is not an unsigned int. 
+unsigned DebugInfoDesc::TagFromGlobal(GlobalVariable *GV) {
+  ConstantInt *C = getUIntOperand(GV, 0);
+  return C ? ((unsigned)C->getZExtValue() & ~LLVMDebugVersionMask) :
+             (unsigned)DW_TAG_invalid;
+}
+
+/// VersionFromGlobal - Returns the version number from a debug info
+/// descriptor GlobalVariable.  Return DIIValid if operand is not an unsigned
+/// int.
+unsigned  DebugInfoDesc::VersionFromGlobal(GlobalVariable *GV) {
+  ConstantInt *C = getUIntOperand(GV, 0);
+  return C ? ((unsigned)C->getZExtValue() & LLVMDebugVersionMask) :
+             (unsigned)DW_TAG_invalid;
+}
+
+/// DescFactory - Create an instance of debug info descriptor based on Tag.
+/// Return NULL if not a recognized Tag.
+DebugInfoDesc *DebugInfoDesc::DescFactory(unsigned Tag) {
+  switch (Tag) {
+  case DW_TAG_anchor:           return new AnchorDesc();
+  case DW_TAG_compile_unit:     return new CompileUnitDesc();
+  case DW_TAG_variable:         return new GlobalVariableDesc();
+  case DW_TAG_subprogram:       return new SubprogramDesc();
+  case DW_TAG_lexical_block:    return new BlockDesc();
+  case DW_TAG_base_type:        return new BasicTypeDesc();
+  case DW_TAG_typedef:
+  case DW_TAG_pointer_type:        
+  case DW_TAG_reference_type:
+  case DW_TAG_const_type:
+  case DW_TAG_volatile_type:        
+  case DW_TAG_restrict_type:
+  case DW_TAG_member:
+  case DW_TAG_inheritance:      return new DerivedTypeDesc(Tag);
+  case DW_TAG_array_type:
+  case DW_TAG_structure_type:
+  case DW_TAG_union_type:
+  case DW_TAG_enumeration_type:
+  case DW_TAG_vector_type:
+  case DW_TAG_subroutine_type:  return new CompositeTypeDesc(Tag);
+  case DW_TAG_subrange_type:    return new SubrangeDesc();
+  case DW_TAG_enumerator:       return new EnumeratorDesc();
+  case DW_TAG_return_variable:
+  case DW_TAG_arg_variable:
+  case DW_TAG_auto_variable:    return new VariableDesc(Tag);
+  default: break;
+  }
+  return NULL;
+}
+
+/// getLinkage - get linkage appropriate for this type of descriptor.
+///
+GlobalValue::LinkageTypes DebugInfoDesc::getLinkage() const {
+  return GlobalValue::InternalLinkage;
+}
+
+/// ApplyToFields - Target the vistor to the fields of the descriptor.
+///
+void DebugInfoDesc::ApplyToFields(DIVisitor *Visitor) {
+  Visitor->Apply(Tag);
+}
+
+//===----------------------------------------------------------------------===//
+
+AnchorDesc::AnchorDesc()
+: DebugInfoDesc(DW_TAG_anchor)
+, AnchorTag(0)
+{}
+AnchorDesc::AnchorDesc(AnchoredDesc *D)
+: DebugInfoDesc(DW_TAG_anchor)
+, AnchorTag(D->getTag())
+{}
+
+// Implement isa/cast/dyncast.
+bool AnchorDesc::classof(const DebugInfoDesc *D) {
+  return D->getTag() == DW_TAG_anchor;
+}
+  
+/// getLinkage - get linkage appropriate for this type of descriptor.
+///
+GlobalValue::LinkageTypes AnchorDesc::getLinkage() const {
+  return GlobalValue::LinkOnceLinkage;
+}
+
+/// ApplyToFields - Target the visitor to the fields of the TransUnitDesc.
+///
+void AnchorDesc::ApplyToFields(DIVisitor *Visitor) {
+  DebugInfoDesc::ApplyToFields(Visitor);
+  
+  Visitor->Apply(AnchorTag);
+}
+
+/// getDescString - Return a string used to compose global names and labels. A
+/// A global variable name needs to be defined for each debug descriptor that is
+/// anchored. NOTE: that each global variable named here also needs to be added
+/// to the list of names left external in the internalizer.
+///   ExternalNames.insert("llvm.dbg.compile_units");
+///   ExternalNames.insert("llvm.dbg.global_variables");
+///   ExternalNames.insert("llvm.dbg.subprograms");
+const char *AnchorDesc::getDescString() const {
+  switch (AnchorTag) {
+  case DW_TAG_compile_unit: return CompileUnitDesc::AnchorString;
+  case DW_TAG_variable:     return GlobalVariableDesc::AnchorString;
+  case DW_TAG_subprogram:   return SubprogramDesc::AnchorString;
+  default: break;
+  }
+
+  assert(0 && "Tag does not have a case for anchor string");
+  return "";
+}
+
+/// getTypeString - Return a string used to label this descriptors type.
+///
+const char *AnchorDesc::getTypeString() const {
+  return "llvm.dbg.anchor.type";
+}
+
+#ifndef NDEBUG
+void AnchorDesc::dump() {
+  cerr << getDescString() << " "
+       << "Version(" << getVersion() << "), "
+       << "Tag(" << getTag() << "), "
+       << "AnchorTag(" << AnchorTag << ")\n";
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+
+AnchoredDesc::AnchoredDesc(unsigned T)
+: DebugInfoDesc(T)
+, Anchor(NULL)
+{}
+
+/// ApplyToFields - Target the visitor to the fields of the AnchoredDesc.
+///
+void AnchoredDesc::ApplyToFields(DIVisitor *Visitor) {
+  DebugInfoDesc::ApplyToFields(Visitor);
+
+  Visitor->Apply(Anchor);
+}
+
+//===----------------------------------------------------------------------===//
+
+CompileUnitDesc::CompileUnitDesc()
+: AnchoredDesc(DW_TAG_compile_unit)
+, Language(0)
+, FileName("")
+, Directory("")
+, Producer("")
+{}
+
+// Implement isa/cast/dyncast.
+bool CompileUnitDesc::classof(const DebugInfoDesc *D) {
+  return D->getTag() == DW_TAG_compile_unit;
+}
+
+/// ApplyToFields - Target the visitor to the fields of the CompileUnitDesc.
+///
+void CompileUnitDesc::ApplyToFields(DIVisitor *Visitor) {
+  AnchoredDesc::ApplyToFields(Visitor);
+  
+  // Handle cases out of sync with compiler.
+  if (getVersion() == 0) {
+    unsigned DebugVersion;
+    Visitor->Apply(DebugVersion);
+  }
+
+  Visitor->Apply(Language);
+  Visitor->Apply(FileName);
+  Visitor->Apply(Directory);
+  Visitor->Apply(Producer);
+}
+
+/// getDescString - Return a string used to compose global names and labels.
+///
+const char *CompileUnitDesc::getDescString() const {
+  return "llvm.dbg.compile_unit";
+}
+
+/// getTypeString - Return a string used to label this descriptors type.
+///
+const char *CompileUnitDesc::getTypeString() const {
+  return "llvm.dbg.compile_unit.type";
+}
+
+/// getAnchorString - Return a string used to label this descriptor's anchor.
+///
+const char *CompileUnitDesc::AnchorString = "llvm.dbg.compile_units";
+const char *CompileUnitDesc::getAnchorString() const {
+  return AnchorString;
+}
+
+#ifndef NDEBUG
+void CompileUnitDesc::dump() {
+  cerr << getDescString() << " "
+       << "Version(" << getVersion() << "), "
+       << "Tag(" << getTag() << "), "
+       << "Anchor(" << getAnchor() << "), "
+       << "Language(" << Language << "), "
+       << "FileName(\"" << FileName << "\"), "
+       << "Directory(\"" << Directory << "\"), "
+       << "Producer(\"" << Producer << "\")\n";
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+
+TypeDesc::TypeDesc(unsigned T)
+: DebugInfoDesc(T)
+, Context(NULL)
+, Name("")
+, File(NULL)
+, Line(0)
+, Size(0)
+, Align(0)
+, Offset(0)
+, Flags(0)
+{}
+
+/// ApplyToFields - Target the visitor to the fields of the TypeDesc.
+///
+void TypeDesc::ApplyToFields(DIVisitor *Visitor) {
+  DebugInfoDesc::ApplyToFields(Visitor);
+  
+  Visitor->Apply(Context);
+  Visitor->Apply(Name);
+  Visitor->Apply(File);
+  Visitor->Apply(Line);
+  Visitor->Apply(Size);
+  Visitor->Apply(Align);
+  Visitor->Apply(Offset);
+  if (getVersion() > LLVMDebugVersion4) Visitor->Apply(Flags);
+}
+
+/// getDescString - Return a string used to compose global names and labels.
+///
+const char *TypeDesc::getDescString() const {
+  return "llvm.dbg.type";
+}
+
+/// getTypeString - Return a string used to label this descriptor's type.
+///
+const char *TypeDesc::getTypeString() const {
+  return "llvm.dbg.type.type";
+}
+
+#ifndef NDEBUG
+void TypeDesc::dump() {
+  cerr << getDescString() << " "
+       << "Version(" << getVersion() << "), "
+       << "Tag(" << getTag() << "), "
+       << "Context(" << Context << "), "
+       << "Name(\"" << Name << "\"), "
+       << "File(" << File << "), "
+       << "Line(" << Line << "), "
+       << "Size(" << Size << "), "
+       << "Align(" << Align << "), "
+       << "Offset(" << Offset << "), "
+       << "Flags(" << Flags << ")\n";
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+
+BasicTypeDesc::BasicTypeDesc()
+: TypeDesc(DW_TAG_base_type)
+, Encoding(0)
+{}
+
+// Implement isa/cast/dyncast.
+bool BasicTypeDesc::classof(const DebugInfoDesc *D) {
+  return D->getTag() == DW_TAG_base_type;
+}
+
+/// ApplyToFields - Target the visitor to the fields of the BasicTypeDesc.
+///
+void BasicTypeDesc::ApplyToFields(DIVisitor *Visitor) {
+  TypeDesc::ApplyToFields(Visitor);
+  
+  Visitor->Apply(Encoding);
+}
+
+/// getDescString - Return a string used to compose global names and labels.
+///
+const char *BasicTypeDesc::getDescString() const {
+  return "llvm.dbg.basictype";
+}
+
+/// getTypeString - Return a string used to label this descriptor's type.
+///
+const char *BasicTypeDesc::getTypeString() const {
+  return "llvm.dbg.basictype.type";
+}
+
+#ifndef NDEBUG
+void BasicTypeDesc::dump() {
+  cerr << getDescString() << " "
+       << "Version(" << getVersion() << "), "
+       << "Tag(" << getTag() << "), "
+       << "Context(" << getContext() << "), "
+       << "Name(\"" << getName() << "\"), "
+       << "Size(" << getSize() << "), "
+       << "Encoding(" << Encoding << ")\n";
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+
+DerivedTypeDesc::DerivedTypeDesc(unsigned T)
+: TypeDesc(T)
+, FromType(NULL)
+{}
+
+// Implement isa/cast/dyncast.
+bool DerivedTypeDesc::classof(const DebugInfoDesc *D) {
+  unsigned T =  D->getTag();
+  switch (T) {
+  case DW_TAG_typedef:
+  case DW_TAG_pointer_type:
+  case DW_TAG_reference_type:
+  case DW_TAG_const_type:
+  case DW_TAG_volatile_type:
+  case DW_TAG_restrict_type:
+  case DW_TAG_member:
+  case DW_TAG_inheritance:
+    return true;
+  default: break;
+  }
+  return false;
+}
+
+/// ApplyToFields - Target the visitor to the fields of the DerivedTypeDesc.
+///
+void DerivedTypeDesc::ApplyToFields(DIVisitor *Visitor) {
+  TypeDesc::ApplyToFields(Visitor);
+  
+  Visitor->Apply(FromType);
+}
+
+/// getDescString - Return a string used to compose global names and labels.
+///
+const char *DerivedTypeDesc::getDescString() const {
+  return "llvm.dbg.derivedtype";
+}
+
+/// getTypeString - Return a string used to label this descriptor's type.
+///
+const char *DerivedTypeDesc::getTypeString() const {
+  return "llvm.dbg.derivedtype.type";
+}
+
+#ifndef NDEBUG
+void DerivedTypeDesc::dump() {
+  cerr << getDescString() << " "
+       << "Version(" << getVersion() << "), "
+       << "Tag(" << getTag() << "), "
+       << "Context(" << getContext() << "), "
+       << "Name(\"" << getName() << "\"), "
+       << "Size(" << getSize() << "), "
+       << "File(" << getFile() << "), "
+       << "Line(" << getLine() << "), "
+       << "FromType(" << FromType << ")\n";
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+
+CompositeTypeDesc::CompositeTypeDesc(unsigned T)
+: DerivedTypeDesc(T)
+, Elements()
+{}
+  
+// Implement isa/cast/dyncast.
+bool CompositeTypeDesc::classof(const DebugInfoDesc *D) {
+  unsigned T =  D->getTag();
+  switch (T) {
+  case DW_TAG_array_type:
+  case DW_TAG_structure_type:
+  case DW_TAG_union_type:
+  case DW_TAG_enumeration_type:
+  case DW_TAG_vector_type:
+  case DW_TAG_subroutine_type:
+    return true;
+  default: break;
+  }
+  return false;
+}
+
+/// ApplyToFields - Target the visitor to the fields of the CompositeTypeDesc.
+///
+void CompositeTypeDesc::ApplyToFields(DIVisitor *Visitor) {
+  DerivedTypeDesc::ApplyToFields(Visitor);  
+
+  Visitor->Apply(Elements);
+}
+
+/// getDescString - Return a string used to compose global names and labels.
+///
+const char *CompositeTypeDesc::getDescString() const {
+  return "llvm.dbg.compositetype";
+}
+
+/// getTypeString - Return a string used to label this descriptor's type.
+///
+const char *CompositeTypeDesc::getTypeString() const {
+  return "llvm.dbg.compositetype.type";
+}
+
+#ifndef NDEBUG
+void CompositeTypeDesc::dump() {
+  cerr << getDescString() << " "
+       << "Version(" << getVersion() << "), "
+       << "Tag(" << getTag() << "), "
+       << "Context(" << getContext() << "), "
+       << "Name(\"" << getName() << "\"), "
+       << "Size(" << getSize() << "), "
+       << "File(" << getFile() << "), "
+       << "Line(" << getLine() << "), "
+       << "FromType(" << getFromType() << "), "
+       << "Elements.size(" << Elements.size() << ")\n";
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+
+SubrangeDesc::SubrangeDesc()
+: DebugInfoDesc(DW_TAG_subrange_type)
+, Lo(0)
+, Hi(0)
+{}
+
+// Implement isa/cast/dyncast.
+bool SubrangeDesc::classof(const DebugInfoDesc *D) {
+  return D->getTag() == DW_TAG_subrange_type;
+}
+
+/// ApplyToFields - Target the visitor to the fields of the SubrangeDesc.
+///
+void SubrangeDesc::ApplyToFields(DIVisitor *Visitor) {
+  DebugInfoDesc::ApplyToFields(Visitor);
+
+  Visitor->Apply(Lo);
+  Visitor->Apply(Hi);
+}
+
+/// getDescString - Return a string used to compose global names and labels.
+///
+const char *SubrangeDesc::getDescString() const {
+  return "llvm.dbg.subrange";
+}
+  
+/// getTypeString - Return a string used to label this descriptor's type.
+///
+const char *SubrangeDesc::getTypeString() const {
+  return "llvm.dbg.subrange.type";
+}
+
+#ifndef NDEBUG
+void SubrangeDesc::dump() {
+  cerr << getDescString() << " "
+       << "Version(" << getVersion() << "), "
+       << "Tag(" << getTag() << "), "
+       << "Lo(" << Lo << "), "
+       << "Hi(" << Hi << ")\n";
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+
+EnumeratorDesc::EnumeratorDesc()
+: DebugInfoDesc(DW_TAG_enumerator)
+, Name("")
+, Value(0)
+{}
+
+// Implement isa/cast/dyncast.
+bool EnumeratorDesc::classof(const DebugInfoDesc *D) {
+  return D->getTag() == DW_TAG_enumerator;
+}
+
+/// ApplyToFields - Target the visitor to the fields of the EnumeratorDesc.
+///
+void EnumeratorDesc::ApplyToFields(DIVisitor *Visitor) {
+  DebugInfoDesc::ApplyToFields(Visitor);
+
+  Visitor->Apply(Name);
+  Visitor->Apply(Value);
+}
+
+/// getDescString - Return a string used to compose global names and labels.
+///
+const char *EnumeratorDesc::getDescString() const {
+  return "llvm.dbg.enumerator";
+}
+  
+/// getTypeString - Return a string used to label this descriptor's type.
+///
+const char *EnumeratorDesc::getTypeString() const {
+  return "llvm.dbg.enumerator.type";
+}
+
+#ifndef NDEBUG
+void EnumeratorDesc::dump() {
+  cerr << getDescString() << " "
+       << "Version(" << getVersion() << "), "
+       << "Tag(" << getTag() << "), "
+       << "Name(" << Name << "), "
+       << "Value(" << Value << ")\n";
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+
+VariableDesc::VariableDesc(unsigned T)
+: DebugInfoDesc(T)
+, Context(NULL)
+, Name("")
+, File(NULL)
+, Line(0)
+, TyDesc(0)
+{}
+
+// Implement isa/cast/dyncast.
+bool VariableDesc::classof(const DebugInfoDesc *D) {
+  unsigned T =  D->getTag();
+  switch (T) {
+  case DW_TAG_auto_variable:
+  case DW_TAG_arg_variable:
+  case DW_TAG_return_variable:
+    return true;
+  default: break;
+  }
+  return false;
+}
+
+/// ApplyToFields - Target the visitor to the fields of the VariableDesc.
+///
+void VariableDesc::ApplyToFields(DIVisitor *Visitor) {
+  DebugInfoDesc::ApplyToFields(Visitor);
+  
+  Visitor->Apply(Context);
+  Visitor->Apply(Name);
+  Visitor->Apply(File);
+  Visitor->Apply(Line);
+  Visitor->Apply(TyDesc);
+}
+
+/// getDescString - Return a string used to compose global names and labels.
+///
+const char *VariableDesc::getDescString() const {
+  return "llvm.dbg.variable";
+}
+
+/// getTypeString - Return a string used to label this descriptor's type.
+///
+const char *VariableDesc::getTypeString() const {
+  return "llvm.dbg.variable.type";
+}
+
+#ifndef NDEBUG
+void VariableDesc::dump() {
+  cerr << getDescString() << " "
+       << "Version(" << getVersion() << "), "
+       << "Tag(" << getTag() << "), "
+       << "Context(" << Context << "), "
+       << "Name(\"" << Name << "\"), "
+       << "File(" << File << "), "
+       << "Line(" << Line << "), "
+       << "TyDesc(" << TyDesc << ")\n";
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+
+GlobalDesc::GlobalDesc(unsigned T)
+: AnchoredDesc(T)
+, Context(0)
+, Name("")
+, FullName("")
+, LinkageName("")
+, File(NULL)
+, Line(0)
+, TyDesc(NULL)
+, IsStatic(false)
+, IsDefinition(false)
+{}
+
+/// ApplyToFields - Target the visitor to the fields of the global.
+///
+void GlobalDesc::ApplyToFields(DIVisitor *Visitor) {
+  AnchoredDesc::ApplyToFields(Visitor);
+
+  Visitor->Apply(Context);
+  Visitor->Apply(Name);
+  Visitor->Apply(FullName);
+  Visitor->Apply(LinkageName);
+  Visitor->Apply(File);
+  Visitor->Apply(Line);
+  Visitor->Apply(TyDesc);
+  Visitor->Apply(IsStatic);
+  Visitor->Apply(IsDefinition);
+}
+
+//===----------------------------------------------------------------------===//
+
+GlobalVariableDesc::GlobalVariableDesc()
+: GlobalDesc(DW_TAG_variable)
+, Global(NULL)
+{}
+
+// Implement isa/cast/dyncast.
+bool GlobalVariableDesc::classof(const DebugInfoDesc *D) {
+  return D->getTag() == DW_TAG_variable; 
+}
+
+/// ApplyToFields - Target the visitor to the fields of the GlobalVariableDesc.
+///
+void GlobalVariableDesc::ApplyToFields(DIVisitor *Visitor) {
+  GlobalDesc::ApplyToFields(Visitor);
+
+  Visitor->Apply(Global);
+}
+
+/// getDescString - Return a string used to compose global names and labels.
+///
+const char *GlobalVariableDesc::getDescString() const {
+  return "llvm.dbg.global_variable";
+}
+
+/// getTypeString - Return a string used to label this descriptors type.
+///
+const char *GlobalVariableDesc::getTypeString() const {
+  return "llvm.dbg.global_variable.type";
+}
+
+/// getAnchorString - Return a string used to label this descriptor's anchor.
+///
+const char *GlobalVariableDesc::AnchorString = "llvm.dbg.global_variables";
+const char *GlobalVariableDesc::getAnchorString() const {
+  return AnchorString;
+}
+
+#ifndef NDEBUG
+void GlobalVariableDesc::dump() {
+  cerr << getDescString() << " "
+       << "Version(" << getVersion() << "), "
+       << "Tag(" << getTag() << "), "
+       << "Anchor(" << getAnchor() << "), "
+       << "Name(\"" << getName() << "\"), "
+       << "FullName(\"" << getFullName() << "\"), "
+       << "LinkageName(\"" << getLinkageName() << "\"), "
+       << "File(" << getFile() << "),"
+       << "Line(" << getLine() << "),"
+       << "Type(" << getType() << "), "
+       << "IsStatic(" << (isStatic() ? "true" : "false") << "), "
+       << "IsDefinition(" << (isDefinition() ? "true" : "false") << "), "
+       << "Global(" << Global << ")\n";
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+
+SubprogramDesc::SubprogramDesc()
+: GlobalDesc(DW_TAG_subprogram)
+{}
+
+// Implement isa/cast/dyncast.
+bool SubprogramDesc::classof(const DebugInfoDesc *D) {
+  return D->getTag() == DW_TAG_subprogram;
+}
+
+/// ApplyToFields - Target the visitor to the fields of the
+/// SubprogramDesc.
+void SubprogramDesc::ApplyToFields(DIVisitor *Visitor) {
+  GlobalDesc::ApplyToFields(Visitor);
+}
+
+/// getDescString - Return a string used to compose global names and labels.
+///
+const char *SubprogramDesc::getDescString() const {
+  return "llvm.dbg.subprogram";
+}
+
+/// getTypeString - Return a string used to label this descriptors type.
+///
+const char *SubprogramDesc::getTypeString() const {
+  return "llvm.dbg.subprogram.type";
+}
+
+/// getAnchorString - Return a string used to label this descriptor's anchor.
+///
+const char *SubprogramDesc::AnchorString = "llvm.dbg.subprograms";
+const char *SubprogramDesc::getAnchorString() const {
+  return AnchorString;
+}
+
+#ifndef NDEBUG
+void SubprogramDesc::dump() {
+  cerr << getDescString() << " "
+       << "Version(" << getVersion() << "), "
+       << "Tag(" << getTag() << "), "
+       << "Anchor(" << getAnchor() << "), "
+       << "Name(\"" << getName() << "\"), "
+       << "FullName(\"" << getFullName() << "\"), "
+       << "LinkageName(\"" << getLinkageName() << "\"), "
+       << "File(" << getFile() << "),"
+       << "Line(" << getLine() << "),"
+       << "Type(" << getType() << "), "
+       << "IsStatic(" << (isStatic() ? "true" : "false") << "), "
+       << "IsDefinition(" << (isDefinition() ? "true" : "false") << ")\n";
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+
+BlockDesc::BlockDesc()
+: DebugInfoDesc(DW_TAG_lexical_block)
+, Context(NULL)
+{}
+
+// Implement isa/cast/dyncast.
+bool BlockDesc::classof(const DebugInfoDesc *D) {
+  return D->getTag() == DW_TAG_lexical_block;
+}
+
+/// ApplyToFields - Target the visitor to the fields of the BlockDesc.
+///
+void BlockDesc::ApplyToFields(DIVisitor *Visitor) {
+  DebugInfoDesc::ApplyToFields(Visitor);
+
+  Visitor->Apply(Context);
+}
+
+/// getDescString - Return a string used to compose global names and labels.
+///
+const char *BlockDesc::getDescString() const {
+  return "llvm.dbg.block";
+}
+
+/// getTypeString - Return a string used to label this descriptors type.
+///
+const char *BlockDesc::getTypeString() const {
+  return "llvm.dbg.block.type";
+}
+
+#ifndef NDEBUG
+void BlockDesc::dump() {
+  cerr << getDescString() << " "
+       << "Version(" << getVersion() << "), "
+       << "Tag(" << getTag() << "),"
+       << "Context(" << Context << ")\n";
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+
+DebugInfoDesc *DIDeserializer::Deserialize(Value *V) {
+  return Deserialize(getGlobalVariable(V));
+}
+DebugInfoDesc *DIDeserializer::Deserialize(GlobalVariable *GV) {
+  // Handle NULL.
+  if (!GV) return NULL;
+
+  // Check to see if it has been already deserialized.
+  DebugInfoDesc *&Slot = GlobalDescs[GV];
+  if (Slot) return Slot;
+
+  // Get the Tag from the global.
+  unsigned Tag = DebugInfoDesc::TagFromGlobal(GV);
+  
+  // Create an empty instance of the correct sort.
+  Slot = DebugInfoDesc::DescFactory(Tag);
+  
+  // If not a user defined descriptor.
+  if (Slot) {
+    // Deserialize the fields.
+    DIDeserializeVisitor DRAM(*this, GV);
+    DRAM.ApplyToFields(Slot);
+  }
+  
+  return Slot;
+}
+
+//===----------------------------------------------------------------------===//
+
+/// getStrPtrType - Return a "sbyte *" type.
+///
+const PointerType *DISerializer::getStrPtrType() {
+  // If not already defined.
+  if (!StrPtrTy) {
+    // Construct the pointer to signed bytes.
+    StrPtrTy = PointerType::get(Type::Int8Ty);
+  }
+  
+  return StrPtrTy;
+}
+
+/// getEmptyStructPtrType - Return a "{ }*" type.
+///
+const PointerType *DISerializer::getEmptyStructPtrType() {
+  // If not already defined.
+  if (!EmptyStructPtrTy) {
+    // Construct the empty structure type.
+    const StructType *EmptyStructTy =
+                                    StructType::get(std::vector<const Type*>());
+    // Construct the pointer to empty structure type.
+    EmptyStructPtrTy = PointerType::get(EmptyStructTy);
+  }
+  
+  return EmptyStructPtrTy;
+}
+
+/// getTagType - Return the type describing the specified descriptor (via tag.)
+///
+const StructType *DISerializer::getTagType(DebugInfoDesc *DD) {
+  // Attempt to get the previously defined type.
+  StructType *&Ty = TagTypes[DD->getTag()];
+  
+  // If not already defined.
+  if (!Ty) {
+    // Set up fields vector.
+    std::vector<const Type*> Fields;
+    // Get types of fields.
+    DIGetTypesVisitor GTAM(*this, Fields);
+    GTAM.ApplyToFields(DD);
+
+    // Construct structured type.
+    Ty = StructType::get(Fields);
+    
+    // Register type name with module.
+    M->addTypeName(DD->getTypeString(), Ty);
+  }
+  
+  return Ty;
+}
+
+/// getString - Construct the string as constant string global.
+///
+Constant *DISerializer::getString(const std::string &String) {
+  // Check string cache for previous edition.
+  Constant *&Slot = StringCache[String];
+  // Return Constant if previously defined.
+  if (Slot) return Slot;
+  // If empty string then use a sbyte* null instead.
+  if (String.empty()) {
+    Slot = ConstantPointerNull::get(getStrPtrType());
+  } else {
+    // Construct string as an llvm constant.
+    Constant *ConstStr = ConstantArray::get(String);
+    // Otherwise create and return a new string global.
+    GlobalVariable *StrGV = new GlobalVariable(ConstStr->getType(), true,
+                                               GlobalVariable::InternalLinkage,
+                                               ConstStr, ".str", M);
+    StrGV->setSection("llvm.metadata");
+    // Convert to generic string pointer.
+    Slot = ConstantExpr::getBitCast(StrGV, getStrPtrType());
+  }
+  return Slot;
+  
+}
+
+/// Serialize - Recursively cast the specified descriptor into a GlobalVariable
+/// so that it can be serialized to a .bc or .ll file.
+GlobalVariable *DISerializer::Serialize(DebugInfoDesc *DD) {
+  // Check if the DebugInfoDesc is already in the map.
+  GlobalVariable *&Slot = DescGlobals[DD];
+  
+  // See if DebugInfoDesc exists, if so return prior GlobalVariable.
+  if (Slot) return Slot;
+  
+  // Get the type associated with the Tag.
+  const StructType *Ty = getTagType(DD);
+
+  // Create the GlobalVariable early to prevent infinite recursion.
+  GlobalVariable *GV = new GlobalVariable(Ty, true, DD->getLinkage(),
+                                          NULL, DD->getDescString(), M);
+  GV->setSection("llvm.metadata");
+
+  // Insert new GlobalVariable in DescGlobals map.
+  Slot = GV;
+ 
+  // Set up elements vector
+  std::vector<Constant*> Elements;
+  // Add fields.
+  DISerializeVisitor SRAM(*this, Elements);
+  SRAM.ApplyToFields(DD);
+  
+  // Set the globals initializer.
+  GV->setInitializer(ConstantStruct::get(Ty, Elements));
+  
+  return GV;
+}
+
+//===----------------------------------------------------------------------===//
+
+/// Verify - Return true if the GlobalVariable appears to be a valid
+/// serialization of a DebugInfoDesc.
+bool DIVerifier::Verify(Value *V) {
+  return !V || Verify(getGlobalVariable(V));
+}
+bool DIVerifier::Verify(GlobalVariable *GV) {
+  // NULLs are valid.
+  if (!GV) return true;
+  
+  // Check prior validity.
+  unsigned &ValiditySlot = Validity[GV];
+  
+  // If visited before then use old state.
+  if (ValiditySlot) return ValiditySlot == Valid;
+  
+  // Assume validity for the time being (recursion.)
+  ValiditySlot = Valid;
+  
+  // Make sure the global is internal or link once (anchor.)
+  if (GV->getLinkage() != GlobalValue::InternalLinkage &&
+      GV->getLinkage() != GlobalValue::LinkOnceLinkage) {
+    ValiditySlot = Invalid;
+    return false;
+  }
+
+  // Get the Tag.
+  unsigned Tag = DebugInfoDesc::TagFromGlobal(GV);
+  
+  // Check for user defined descriptors.
+  if (Tag == DW_TAG_invalid) {
+    ValiditySlot = Valid;
+    return true;
+  }
+  
+  // Get the Version.
+  unsigned Version = DebugInfoDesc::VersionFromGlobal(GV);
+  
+  // Check for version mismatch.
+  if (Version != LLVMDebugVersion) {
+    ValiditySlot = Invalid;
+    return false;
+  }
+
+  // Construct an empty DebugInfoDesc.
+  DebugInfoDesc *DD = DebugInfoDesc::DescFactory(Tag);
+  
+  // Allow for user defined descriptors.
+  if (!DD) return true;
+  
+  // Get the initializer constant.
+  ConstantStruct *CI = cast<ConstantStruct>(GV->getInitializer());
+  
+  // Get the operand count.
+  unsigned N = CI->getNumOperands();
+  
+  // Get the field count.
+  unsigned &CountSlot = Counts[Tag];
+  if (!CountSlot) {
+    // Check the operand count to the field count
+    DICountVisitor CTAM;
+    CTAM.ApplyToFields(DD);
+    CountSlot = CTAM.getCount();
+  }
+  
+  // Field count must be at most equal operand count.
+  if (CountSlot >  N) {
+    delete DD;
+    ValiditySlot = Invalid;
+    return false;
+  }
+  
+  // Check each field for valid type.
+  DIVerifyVisitor VRAM(*this, GV);
+  VRAM.ApplyToFields(DD);
+  
+  // Release empty DebugInfoDesc.
+  delete DD;
+  
+  // If fields are not valid.
+  if (!VRAM.isValid()) {
+    ValiditySlot = Invalid;
+    return false;
+  }
+  
+  return true;
+}
+
+//===----------------------------------------------------------------------===//
+
+DebugScope::~DebugScope() {
+  for (unsigned i = 0, N = Scopes.size(); i < N; ++i) delete Scopes[i];
+  for (unsigned j = 0, M = Variables.size(); j < M; ++j) delete Variables[j];
+}
+
+//===----------------------------------------------------------------------===//
+
+MachineModuleInfo::MachineModuleInfo()
+: ImmutablePass((intptr_t)&ID)
+, DR()
+, VR()
+, CompileUnits()
+, Directories()
+, SourceFiles()
+, Lines()
+, LabelIDList()
+, ScopeMap()
+, RootScope(NULL)
+, FrameMoves()
+, LandingPads()
+, Personalities()
+, CallsEHReturn(0)
+, CallsUnwindInit(0)
+{
+  // Always emit "no personality" info
+  Personalities.push_back(NULL);
+}
+MachineModuleInfo::~MachineModuleInfo() {
+
+}
+
+/// doInitialization - Initialize the state for a new module.
+///
+bool MachineModuleInfo::doInitialization() {
+  return false;
+}
+
+/// doFinalization - Tear down the state after completion of a module.
+///
+bool MachineModuleInfo::doFinalization() {
+  return false;
+}
+
+/// BeginFunction - Begin gathering function meta information.
+///
+void MachineModuleInfo::BeginFunction(MachineFunction *MF) {
+  // Coming soon.
+}
+
+/// EndFunction - Discard function meta information.
+///
+void MachineModuleInfo::EndFunction() {
+  // Clean up scope information.
+  if (RootScope) {
+    delete RootScope;
+    ScopeMap.clear();
+    RootScope = NULL;
+  }
+  
+  // Clean up line info.
+  Lines.clear();
+
+  // Clean up frame info.
+  FrameMoves.clear();
+  
+  // Clean up exception info.
+  LandingPads.clear();
+  TypeInfos.clear();
+  FilterIds.clear();
+  FilterEnds.clear();
+  CallsEHReturn = 0;
+  CallsUnwindInit = 0;
+}
+
+/// getDescFor - Convert a Value to a debug information descriptor.
+///
+// FIXME - use new Value type when available.
+DebugInfoDesc *MachineModuleInfo::getDescFor(Value *V) {
+  return DR.Deserialize(V);
+}
+
+/// Verify - Verify that a Value is debug information descriptor.
+///
+bool MachineModuleInfo::Verify(Value *V) {
+  return VR.Verify(V);
+}
+
+/// AnalyzeModule - Scan the module for global debug information.
+///
+void MachineModuleInfo::AnalyzeModule(Module &M) {
+  SetupCompileUnits(M);
+}
+
+/// needsFrameInfo - Returns true if we need to gather callee-saved register
+/// move info for the frame.
+bool MachineModuleInfo::needsFrameInfo() const {
+  return hasDebugInfo() || ExceptionHandling;
+}
+
+/// SetupCompileUnits - Set up the unique vector of compile units.
+///
+void MachineModuleInfo::SetupCompileUnits(Module &M) {
+  std::vector<CompileUnitDesc *>CU = getAnchoredDescriptors<CompileUnitDesc>(M);
+  
+  for (unsigned i = 0, N = CU.size(); i < N; i++) {
+    CompileUnits.insert(CU[i]);
+  }
+}
+
+/// getCompileUnits - Return a vector of debug compile units.
+///
+const UniqueVector<CompileUnitDesc *> MachineModuleInfo::getCompileUnits()const{
+  return CompileUnits;
+}
+
+/// getGlobalVariablesUsing - Return all of the GlobalVariables that use the
+/// named GlobalVariable.
+std::vector<GlobalVariable*>
+MachineModuleInfo::getGlobalVariablesUsing(Module &M,
+                                           const std::string &RootName) {
+  return ::getGlobalVariablesUsing(M, RootName);
+}
+
+/// RecordLabel - Records location information and associates it with a
+/// debug label.  Returns a unique label ID used to generate a label and 
+/// provide correspondence to the source line list.
+unsigned MachineModuleInfo::RecordLabel(unsigned Line, unsigned Column,
+                                       unsigned Source) {
+  unsigned ID = NextLabelID();
+  Lines.push_back(SourceLineInfo(Line, Column, Source, ID));
+  return ID;
+}
+
+/// RecordSource - Register a source file with debug info. Returns an source
+/// ID.
+unsigned MachineModuleInfo::RecordSource(const std::string &Directory,
+                                         const std::string &Source) {
+  unsigned DirectoryID = Directories.insert(Directory);
+  return SourceFiles.insert(SourceFileInfo(DirectoryID, Source));
+}
+unsigned MachineModuleInfo::RecordSource(const CompileUnitDesc *CompileUnit) {
+  return RecordSource(CompileUnit->getDirectory(),
+                      CompileUnit->getFileName());
+}
+
+/// RecordRegionStart - Indicate the start of a region.
+///
+unsigned MachineModuleInfo::RecordRegionStart(Value *V) {
+  // FIXME - need to be able to handle split scopes because of bb cloning.
+  DebugInfoDesc *ScopeDesc = DR.Deserialize(V);
+  DebugScope *Scope = getOrCreateScope(ScopeDesc);
+  unsigned ID = NextLabelID();
+  if (!Scope->getStartLabelID()) Scope->setStartLabelID(ID);
+  return ID;
+}
+
+/// RecordRegionEnd - Indicate the end of a region.
+///
+unsigned MachineModuleInfo::RecordRegionEnd(Value *V) {
+  // FIXME - need to be able to handle split scopes because of bb cloning.
+  DebugInfoDesc *ScopeDesc = DR.Deserialize(V);
+  DebugScope *Scope = getOrCreateScope(ScopeDesc);
+  unsigned ID = NextLabelID();
+  Scope->setEndLabelID(ID);
+  return ID;
+}
+
+/// RecordVariable - Indicate the declaration of  a local variable.
+///
+void MachineModuleInfo::RecordVariable(Value *V, unsigned FrameIndex) {
+  VariableDesc *VD = cast<VariableDesc>(DR.Deserialize(V));
+  DebugScope *Scope = getOrCreateScope(VD->getContext());
+  DebugVariable *DV = new DebugVariable(VD, FrameIndex);
+  Scope->AddVariable(DV);
+}
+
+/// getOrCreateScope - Returns the scope associated with the given descriptor.
+///
+DebugScope *MachineModuleInfo::getOrCreateScope(DebugInfoDesc *ScopeDesc) {
+  DebugScope *&Slot = ScopeMap[ScopeDesc];
+  if (!Slot) {
+    // FIXME - breaks down when the context is an inlined function.
+    DebugInfoDesc *ParentDesc = NULL;
+    if (BlockDesc *Block = dyn_cast<BlockDesc>(ScopeDesc)) {
+      ParentDesc = Block->getContext();
+    }
+    DebugScope *Parent = ParentDesc ? getOrCreateScope(ParentDesc) : NULL;
+    Slot = new DebugScope(Parent, ScopeDesc);
+    if (Parent) {
+      Parent->AddScope(Slot);
+    } else if (RootScope) {
+      // FIXME - Add inlined function scopes to the root so we can delete
+      // them later.  Long term, handle inlined functions properly.
+      RootScope->AddScope(Slot);
+    } else {
+      // First function is top level function.
+      RootScope = Slot;
+    }
+  }
+  return Slot;
+}
+
+//===-EH-------------------------------------------------------------------===//
+
+/// getOrCreateLandingPadInfo - Find or create an LandingPadInfo for the
+/// specified MachineBasicBlock.
+LandingPadInfo &MachineModuleInfo::getOrCreateLandingPadInfo
+    (MachineBasicBlock *LandingPad) {
+  unsigned N = LandingPads.size();
+  for (unsigned i = 0; i < N; ++i) {
+    LandingPadInfo &LP = LandingPads[i];
+    if (LP.LandingPadBlock == LandingPad)
+      return LP;
+  }
+  
+  LandingPads.push_back(LandingPadInfo(LandingPad));
+  return LandingPads[N];
+}
+
+/// addInvoke - Provide the begin and end labels of an invoke style call and
+/// associate it with a try landing pad block.
+void MachineModuleInfo::addInvoke(MachineBasicBlock *LandingPad,
+                                  unsigned BeginLabel, unsigned EndLabel) {
+  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+  LP.BeginLabels.push_back(BeginLabel);
+  LP.EndLabels.push_back(EndLabel);
+}
+
+/// addLandingPad - Provide the label of a try LandingPad block.
+///
+unsigned MachineModuleInfo::addLandingPad(MachineBasicBlock *LandingPad) {
+  unsigned LandingPadLabel = NextLabelID();
+  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+  LP.LandingPadLabel = LandingPadLabel;  
+  return LandingPadLabel;
+}
+
+/// addPersonality - Provide the personality function for the exception
+/// information.
+void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad,
+                                       Function *Personality) {
+  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+  LP.Personality = Personality;
+
+  for (unsigned i = 0; i < Personalities.size(); ++i)
+    if (Personalities[i] == Personality)
+      return;
+  
+  Personalities.push_back(Personality);
+}
+
+/// addCatchTypeInfo - Provide the catch typeinfo for a landing pad.
+///
+void MachineModuleInfo::addCatchTypeInfo(MachineBasicBlock *LandingPad,
+                                        std::vector<GlobalVariable *> &TyInfo) {
+  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+  for (unsigned N = TyInfo.size(); N; --N)
+    LP.TypeIds.push_back(getTypeIDFor(TyInfo[N - 1]));
+}
+
+/// addFilterTypeInfo - Provide the filter typeinfo for a landing pad.
+///
+void MachineModuleInfo::addFilterTypeInfo(MachineBasicBlock *LandingPad,
+                                        std::vector<GlobalVariable *> &TyInfo) {
+  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+  std::vector<unsigned> IdsInFilter (TyInfo.size());
+  for (unsigned I = 0, E = TyInfo.size(); I != E; ++I)
+    IdsInFilter[I] = getTypeIDFor(TyInfo[I]);
+  LP.TypeIds.push_back(getFilterIDFor(IdsInFilter));
+}
+
+/// TidyLandingPads - Remap landing pad labels and remove any deleted landing
+/// pads.
+void MachineModuleInfo::TidyLandingPads() {
+  for (unsigned i = 0; i != LandingPads.size(); ) {
+    LandingPadInfo &LandingPad = LandingPads[i];
+    LandingPad.LandingPadLabel = MappedLabel(LandingPad.LandingPadLabel);
+
+    if (!LandingPad.LandingPadBlock)
+      // Must not have cleanups if no landing pad.
+      LandingPad.TypeIds.clear();
+
+    // Special case: we *should* emit LPs with null LP MBB. This indicates
+    // "rethrow" case.
+    if (!LandingPad.LandingPadLabel && LandingPad.LandingPadBlock) {
+      LandingPads.erase(LandingPads.begin() + i);
+      continue;
+    }
+          
+    for (unsigned j=0; j != LandingPads[i].BeginLabels.size(); ) {
+      unsigned BeginLabel = MappedLabel(LandingPad.BeginLabels[j]);
+      unsigned EndLabel = MappedLabel(LandingPad.EndLabels[j]);
+            
+          
+      if (!BeginLabel || !EndLabel) {
+        LandingPad.BeginLabels.erase(LandingPad.BeginLabels.begin() + j);
+        LandingPad.EndLabels.erase(LandingPad.EndLabels.begin() + j);
+        continue;
+      }
+
+      LandingPad.BeginLabels[j] = BeginLabel;
+      LandingPad.EndLabels[j] = EndLabel;
+      ++j;
+    }
+    
+    ++i;
+  }
+}
+
+/// getTypeIDFor - Return the type id for the specified typeinfo.  This is 
+/// function wide.
+unsigned MachineModuleInfo::getTypeIDFor(GlobalVariable *TI) {
+  for (unsigned i = 0, N = TypeInfos.size(); i != N; ++i)
+    if (TypeInfos[i] == TI) return i + 1;
+
+  TypeInfos.push_back(TI);
+  return TypeInfos.size();
+}
+
+/// getFilterIDFor - Return the filter id for the specified typeinfos.  This is
+/// function wide.
+int MachineModuleInfo::getFilterIDFor(std::vector<unsigned> &TyIds) {
+  // If the new filter coincides with the tail of an existing filter, then
+  // re-use the existing filter.  Folding filters more than this requires
+  // re-ordering filters and/or their elements - probably not worth it.
+  for (std::vector<unsigned>::iterator I = FilterEnds.begin(),
+       E = FilterEnds.end(); I != E; ++I) {
+    unsigned i = *I, j = TyIds.size();
+
+    while (i && j)
+      if (FilterIds[--i] != TyIds[--j])
+        goto try_next;
+
+    if (!j)
+      // The new filter coincides with range [i, end) of the existing filter.
+      return -(1 + i);
+
+try_next:;
+  }
+
+  // Add the new filter.
+  int FilterID = -(1 + FilterIds.size());
+  FilterIds.reserve(FilterIds.size() + TyIds.size() + 1);
+  for (unsigned I = 0, N = TyIds.size(); I != N; ++I)
+    FilterIds.push_back(TyIds[I]);
+  FilterEnds.push_back(FilterIds.size());
+  FilterIds.push_back(0); // terminator
+  return FilterID;
+}
+
+/// getPersonality - Return the personality function for the current function.
+Function *MachineModuleInfo::getPersonality() const {
+  // FIXME: Until PR1414 will be fixed, we're using 1 personality function per
+  // function
+  return !LandingPads.empty() ? LandingPads[0].Personality : NULL;
+}
+
+/// getPersonalityIndex - Return unique index for current personality
+/// function. NULL personality function should always get zero index.
+unsigned MachineModuleInfo::getPersonalityIndex() const {
+  const Function* Personality = NULL;
+  
+  // Scan landing pads. If there is at least one non-NULL personality - use it.
+  for (unsigned i = 0; i != LandingPads.size(); ++i)
+    if (LandingPads[i].Personality) {
+      Personality = LandingPads[i].Personality;
+      break;
+    }
+  
+  for (unsigned i = 0; i < Personalities.size(); ++i) {
+    if (Personalities[i] == Personality)
+      return i;
+  }
+
+  // This should never happen
+  assert(0 && "Personality function should be set!");
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+/// DebugLabelFolding pass - This pass prunes out redundant labels.  This allows
+/// a info consumer to determine if the range of two labels is empty, by seeing
+/// if the labels map to the same reduced label.
+
+namespace llvm {
+
+struct DebugLabelFolder : public MachineFunctionPass {
+  static char ID;
+  DebugLabelFolder() : MachineFunctionPass((intptr_t)&ID) {}
+
+  virtual bool runOnMachineFunction(MachineFunction &MF);
+  virtual const char *getPassName() const { return "Label Folder"; }
+};
+
+char DebugLabelFolder::ID = 0;
+
+bool DebugLabelFolder::runOnMachineFunction(MachineFunction &MF) {
+  // Get machine module info.
+  MachineModuleInfo *MMI = getAnalysisToUpdate<MachineModuleInfo>();
+  if (!MMI) return false;
+  // Get target instruction info.
+  const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
+  if (!TII) return false;
+  
+  // Track if change is made.
+  bool MadeChange = false;
+  // No prior label to begin.
+  unsigned PriorLabel = 0;
+  
+  // Iterate through basic blocks.
+  for (MachineFunction::iterator BB = MF.begin(), E = MF.end();
+       BB != E; ++BB) {
+    // Iterate through instructions.
+    for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
+      // Is it a label.
+      if ((unsigned)I->getOpcode() == TargetInstrInfo::LABEL) {
+        // The label ID # is always operand #0, an immediate.
+        unsigned NextLabel = I->getOperand(0).getImm();
+        
+        // If there was an immediate prior label.
+        if (PriorLabel) {
+          // Remap the current label to prior label.
+          MMI->RemapLabel(NextLabel, PriorLabel);
+          // Delete the current label.
+          I = BB->erase(I);
+          // Indicate a change has been made.
+          MadeChange = true;
+          continue;
+        } else {
+          // Start a new round.
+          PriorLabel = NextLabel;
+        }
+       } else {
+        // No consecutive labels.
+        PriorLabel = 0;
+      }
+      
+      ++I;
+    }
+  }
+  
+  return MadeChange;
+}
+
+FunctionPass *createDebugLabelFoldingPass() { return new DebugLabelFolder(); }
+
+}
+
diff --git a/lib/CodeGen/MachinePassRegistry.cpp b/lib/CodeGen/MachinePassRegistry.cpp
new file mode 100644
index 0000000..a7ba5bb
--- /dev/null
+++ b/lib/CodeGen/MachinePassRegistry.cpp
@@ -0,0 +1,41 @@
+//===-- CodeGen/MachineInstr.cpp ------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by James M. Laskey and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the machine function pass registry for register allocators
+// and instruction schedulers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachinePassRegistry.h"
+
+using namespace llvm;
+
+
+/// Add - Adds a function pass to the registration list.
+///
+void MachinePassRegistry::Add(MachinePassRegistryNode *Node) {
+  Node->setNext(List);
+  List = Node;
+  if (Listener) Listener->NotifyAdd(Node->getName(),
+                                    Node->getCtor(),
+                                    Node->getDescription());
+}
+
+
+/// Remove - Removes a function pass from the registration list.
+///
+void MachinePassRegistry::Remove(MachinePassRegistryNode *Node) {
+  for (MachinePassRegistryNode **I = &List; *I; I = (*I)->getNextAddress()) {
+    if (*I == Node) {
+      if (Listener) Listener->NotifyRemove(Node->getName());
+      *I = (*I)->getNext();
+      break;
+    }
+  }
+}
diff --git a/lib/CodeGen/Makefile b/lib/CodeGen/Makefile
new file mode 100644
index 0000000..7e5d8ee
--- /dev/null
+++ b/lib/CodeGen/Makefile
@@ -0,0 +1,22 @@
+##===- lib/CodeGen/Makefile --------------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file was developed by the LLVM research group and is distributed under
+# the University of Illinois Open Source License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+LIBRARYNAME = LLVMCodeGen
+PARALLEL_DIRS = SelectionDAG
+BUILD_ARCHIVE = 1
+
+include $(LEVEL)/Makefile.common
+
+# Xcode prior to 2.4 generates an error in -pedantic mode with use of HUGE_VAL
+# in this directory.  Disable -pedantic for this broken compiler.
+ifneq ($(HUGE_VAL_SANITY),yes)
+CompileCommonOpts := $(filter-out -pedantic, $(CompileCommonOpts))
+endif
+
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
new file mode 100644
index 0000000..fec9e2e
--- /dev/null
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -0,0 +1,342 @@
+//===-- PhiElimination.cpp - Eliminate PHI nodes by inserting copies ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass eliminates machine instruction PHI nodes by inserting copy
+// instructions.  This destroys SSA information, but is the desired input for
+// some register allocators.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "phielim"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/SSARegMap.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Compiler.h"
+#include <set>
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumAtomic, "Number of atomic phis lowered");
+//STATISTIC(NumSimple, "Number of simple phis lowered");
+
+namespace {
+  struct VISIBILITY_HIDDEN PNE : public MachineFunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    PNE() : MachineFunctionPass((intptr_t)&ID) {}
+
+    bool runOnMachineFunction(MachineFunction &Fn) {
+      analyzePHINodes(Fn);
+
+      bool Changed = false;
+
+      // Eliminate PHI instructions by inserting copies into predecessor blocks.
+      for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
+        Changed |= EliminatePHINodes(Fn, *I);
+
+      VRegPHIUseCount.clear();
+      return Changed;
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addPreserved<LiveVariables>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+  private:
+    /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions
+    /// in predecessor basic blocks.
+    ///
+    bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB);
+    void LowerAtomicPHINode(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator AfterPHIsIt);
+
+    /// analyzePHINodes - Gather information about the PHI nodes in
+    /// here. In particular, we want to map the number of uses of a virtual
+    /// register which is used in a PHI node. We map that to the BB the
+    /// vreg is coming from. This is used later to determine when the vreg
+    /// is killed in the BB.
+    ///
+    void analyzePHINodes(const MachineFunction& Fn);
+
+    typedef std::pair<const MachineBasicBlock*, unsigned> BBVRegPair;
+    typedef std::map<BBVRegPair, unsigned> VRegPHIUse;
+
+    VRegPHIUse VRegPHIUseCount;
+  };
+
+  char PNE::ID = 0;
+  RegisterPass<PNE> X("phi-node-elimination",
+                      "Eliminate PHI nodes for register allocation");
+}
+
+const PassInfo *llvm::PHIEliminationID = X.getPassInfo();
+
+/// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions in
+/// predecessor basic blocks.
+///
+bool PNE::EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB) {
+  if (MBB.empty() || MBB.front().getOpcode() != TargetInstrInfo::PHI)
+    return false;   // Quick exit for basic blocks without PHIs.
+
+  // Get an iterator to the first instruction after the last PHI node (this may
+  // also be the end of the basic block).
+  MachineBasicBlock::iterator AfterPHIsIt = MBB.begin();
+  while (AfterPHIsIt != MBB.end() &&
+         AfterPHIsIt->getOpcode() == TargetInstrInfo::PHI)
+    ++AfterPHIsIt;    // Skip over all of the PHI nodes...
+
+  while (MBB.front().getOpcode() == TargetInstrInfo::PHI)
+    LowerAtomicPHINode(MBB, AfterPHIsIt);
+
+  return true;
+}
+
+/// InstructionUsesRegister - Return true if the specified machine instr has a
+/// use of the specified register.
+static bool InstructionUsesRegister(MachineInstr *MI, unsigned SrcReg) {
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i)
+    if (MI->getOperand(i).isRegister() &&
+        MI->getOperand(i).getReg() == SrcReg &&
+        MI->getOperand(i).isUse())
+      return true;
+  return false;
+}
+
+/// LowerAtomicPHINode - Lower the PHI node at the top of the specified block,
+/// under the assuption that it needs to be lowered in a way that supports
+/// atomic execution of PHIs.  This lowering method is always correct all of the
+/// time.
+void PNE::LowerAtomicPHINode(MachineBasicBlock &MBB,
+                             MachineBasicBlock::iterator AfterPHIsIt) {
+  // Unlink the PHI node from the basic block, but don't delete the PHI yet.
+  MachineInstr *MPhi = MBB.remove(MBB.begin());
+
+  unsigned DestReg = MPhi->getOperand(0).getReg();
+
+  // Create a new register for the incoming PHI arguments.
+  MachineFunction &MF = *MBB.getParent();
+  const TargetRegisterClass *RC = MF.getSSARegMap()->getRegClass(DestReg);
+  unsigned IncomingReg = MF.getSSARegMap()->createVirtualRegister(RC);
+
+  // Insert a register to register copy in the top of the current block (but
+  // after any remaining phi nodes) which copies the new incoming register
+  // into the phi node destination.
+  //
+  const MRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+  RegInfo->copyRegToReg(MBB, AfterPHIsIt, DestReg, IncomingReg, RC);
+
+  // Update live variable information if there is any...
+  LiveVariables *LV = getAnalysisToUpdate<LiveVariables>();
+  if (LV) {
+    MachineInstr *PHICopy = prior(AfterPHIsIt);
+
+    // Increment use count of the newly created virtual register.
+    LV->getVarInfo(IncomingReg).NumUses++;
+
+    // Add information to LiveVariables to know that the incoming value is
+    // killed.  Note that because the value is defined in several places (once
+    // each for each incoming block), the "def" block and instruction fields
+    // for the VarInfo is not filled in.
+    //
+    LV->addVirtualRegisterKilled(IncomingReg, PHICopy);
+
+    // Since we are going to be deleting the PHI node, if it is the last use
+    // of any registers, or if the value itself is dead, we need to move this
+    // information over to the new copy we just inserted.
+    //
+    LV->removeVirtualRegistersKilled(MPhi);
+
+    // If the result is dead, update LV.
+    if (LV->RegisterDefIsDead(MPhi, DestReg)) {
+      LV->addVirtualRegisterDead(DestReg, PHICopy);
+      LV->removeVirtualRegistersDead(MPhi);
+    }
+    
+    // Realize that the destination register is defined by the PHI copy now, not
+    // the PHI itself.
+    LV->getVarInfo(DestReg).DefInst = PHICopy;
+  }
+
+  // Adjust the VRegPHIUseCount map to account for the removal of this PHI
+  // node.
+  for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2)
+    --VRegPHIUseCount[BBVRegPair(
+                        MPhi->getOperand(i + 1).getMachineBasicBlock(),
+                        MPhi->getOperand(i).getReg())];
+
+  // Now loop over all of the incoming arguments, changing them to copy into
+  // the IncomingReg register in the corresponding predecessor basic block.
+  //
+  std::set<MachineBasicBlock*> MBBsInsertedInto;
+  for (int i = MPhi->getNumOperands() - 1; i >= 2; i-=2) {
+    unsigned SrcReg = MPhi->getOperand(i-1).getReg();
+    assert(MRegisterInfo::isVirtualRegister(SrcReg) &&
+           "Machine PHI Operands must all be virtual registers!");
+
+    // Get the MachineBasicBlock equivalent of the BasicBlock that is the
+    // source path the PHI.
+    MachineBasicBlock &opBlock = *MPhi->getOperand(i).getMachineBasicBlock();
+
+    // Check to make sure we haven't already emitted the copy for this block.
+    // This can happen because PHI nodes may have multiple entries for the
+    // same basic block.
+    if (!MBBsInsertedInto.insert(&opBlock).second)
+      continue;  // If the copy has already been emitted, we're done.
+ 
+    // Get an iterator pointing to the first terminator in the block (or end()).
+    // This is the point where we can insert a copy if we'd like to.
+    MachineBasicBlock::iterator I = opBlock.getFirstTerminator();
+    
+    // Insert the copy.
+    RegInfo->copyRegToReg(opBlock, I, IncomingReg, SrcReg, RC);
+
+    // Now update live variable information if we have it.  Otherwise we're done
+    if (!LV) continue;
+    
+    // We want to be able to insert a kill of the register if this PHI
+    // (aka, the copy we just inserted) is the last use of the source
+    // value.  Live variable analysis conservatively handles this by
+    // saying that the value is live until the end of the block the PHI
+    // entry lives in.  If the value really is dead at the PHI copy, there
+    // will be no successor blocks which have the value live-in.
+    //
+    // Check to see if the copy is the last use, and if so, update the
+    // live variables information so that it knows the copy source
+    // instruction kills the incoming value.
+    //
+    LiveVariables::VarInfo &InRegVI = LV->getVarInfo(SrcReg);
+
+    // Loop over all of the successors of the basic block, checking to see
+    // if the value is either live in the block, or if it is killed in the
+    // block.  Also check to see if this register is in use by another PHI
+    // node which has not yet been eliminated.  If so, it will be killed
+    // at an appropriate point later.
+    //
+
+    // Is it used by any PHI instructions in this block?
+    bool ValueIsLive = VRegPHIUseCount[BBVRegPair(&opBlock, SrcReg)] != 0;
+
+    std::vector<MachineBasicBlock*> OpSuccBlocks;
+    
+    // Otherwise, scan successors, including the BB the PHI node lives in.
+    for (MachineBasicBlock::succ_iterator SI = opBlock.succ_begin(),
+           E = opBlock.succ_end(); SI != E && !ValueIsLive; ++SI) {
+      MachineBasicBlock *SuccMBB = *SI;
+
+      // Is it alive in this successor?
+      unsigned SuccIdx = SuccMBB->getNumber();
+      if (SuccIdx < InRegVI.AliveBlocks.size() &&
+          InRegVI.AliveBlocks[SuccIdx]) {
+        ValueIsLive = true;
+        break;
+      }
+
+      OpSuccBlocks.push_back(SuccMBB);
+    }
+
+    // Check to see if this value is live because there is a use in a successor
+    // that kills it.
+    if (!ValueIsLive) {
+      switch (OpSuccBlocks.size()) {
+      case 1: {
+        MachineBasicBlock *MBB = OpSuccBlocks[0];
+        for (unsigned i = 0, e = InRegVI.Kills.size(); i != e; ++i)
+          if (InRegVI.Kills[i]->getParent() == MBB) {
+            ValueIsLive = true;
+            break;
+          }
+        break;
+      }
+      case 2: {
+        MachineBasicBlock *MBB1 = OpSuccBlocks[0], *MBB2 = OpSuccBlocks[1];
+        for (unsigned i = 0, e = InRegVI.Kills.size(); i != e; ++i)
+          if (InRegVI.Kills[i]->getParent() == MBB1 || 
+              InRegVI.Kills[i]->getParent() == MBB2) {
+            ValueIsLive = true;
+            break;
+          }
+        break;        
+      }
+      default:
+        std::sort(OpSuccBlocks.begin(), OpSuccBlocks.end());
+        for (unsigned i = 0, e = InRegVI.Kills.size(); i != e; ++i)
+          if (std::binary_search(OpSuccBlocks.begin(), OpSuccBlocks.end(),
+                                 InRegVI.Kills[i]->getParent())) {
+            ValueIsLive = true;
+            break;
+          }
+      }
+    }        
+
+    // Okay, if we now know that the value is not live out of the block,
+    // we can add a kill marker in this block saying that it kills the incoming
+    // value!
+    if (!ValueIsLive) {
+      // In our final twist, we have to decide which instruction kills the
+      // register.  In most cases this is the copy, however, the first 
+      // terminator instruction at the end of the block may also use the value.
+      // In this case, we should mark *it* as being the killing block, not the
+      // copy.
+      bool FirstTerminatorUsesValue = false;
+      if (I != opBlock.end()) {
+        FirstTerminatorUsesValue = InstructionUsesRegister(I, SrcReg);
+      
+        // Check that no other terminators use values.
+#ifndef NDEBUG
+        for (MachineBasicBlock::iterator TI = next(I); TI != opBlock.end();
+             ++TI) {
+          assert(!InstructionUsesRegister(TI, SrcReg) &&
+                 "Terminator instructions cannot use virtual registers unless"
+                 "they are the first terminator in a block!");
+        }
+#endif
+      }
+      
+      MachineBasicBlock::iterator KillInst;
+      if (!FirstTerminatorUsesValue) 
+        KillInst = prior(I);
+      else
+        KillInst = I;
+      
+      // Finally, mark it killed.
+      LV->addVirtualRegisterKilled(SrcReg, KillInst);
+
+      // This vreg no longer lives all of the way through opBlock.
+      unsigned opBlockNum = opBlock.getNumber();
+      if (opBlockNum < InRegVI.AliveBlocks.size())
+        InRegVI.AliveBlocks[opBlockNum] = false;
+    }
+  }
+    
+  // Really delete the PHI instruction now!
+  delete MPhi;
+  ++NumAtomic;
+}
+
+/// analyzePHINodes - Gather information about the PHI nodes in here. In
+/// particular, we want to map the number of uses of a virtual register which is
+/// used in a PHI node. We map that to the BB the vreg is coming from. This is
+/// used later to determine when the vreg is killed in the BB.
+///
+void PNE::analyzePHINodes(const MachineFunction& Fn) {
+  for (MachineFunction::const_iterator I = Fn.begin(), E = Fn.end();
+       I != E; ++I)
+    for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end();
+         BBI != BBE && BBI->getOpcode() == TargetInstrInfo::PHI; ++BBI)
+      for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2)
+        ++VRegPHIUseCount[BBVRegPair(
+                            BBI->getOperand(i + 1).getMachineBasicBlock(),
+                            BBI->getOperand(i).getReg())];
+}
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
new file mode 100644
index 0000000..87510e4
--- /dev/null
+++ b/lib/CodeGen/Passes.cpp
@@ -0,0 +1,56 @@
+//===-- Passes.cpp - Target independent code generation passes ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines interfaces to access the target independent code
+// generation passes provided by the LLVM backend.
+//
+//===---------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/Passes.h"
+
+using namespace llvm;
+
+//===---------------------------------------------------------------------===//
+///
+/// RegisterRegAlloc class - Track the registration of register allocators.
+///
+//===---------------------------------------------------------------------===//
+MachinePassRegistry RegisterRegAlloc::Registry;
+
+
+//===---------------------------------------------------------------------===//
+///
+/// RegAlloc command line options.
+///
+//===---------------------------------------------------------------------===//
+namespace {
+  cl::opt<RegisterRegAlloc::FunctionPassCtor, false,
+          RegisterPassParser<RegisterRegAlloc> >
+  RegAlloc("regalloc",
+           cl::init(&createLinearScanRegisterAllocator),
+           cl::desc("Register allocator to use: (default = linearscan)")); 
+}
+
+
+//===---------------------------------------------------------------------===//
+///
+/// createRegisterAllocator - choose the appropriate register allocator.
+///
+//===---------------------------------------------------------------------===//
+FunctionPass *llvm::createRegisterAllocator() {
+  RegisterRegAlloc::FunctionPassCtor Ctor = RegisterRegAlloc::getDefault();
+  
+  if (!Ctor) {
+    Ctor = RegAlloc;
+    RegisterRegAlloc::setDefault(RegAlloc);
+  }
+  
+  return Ctor();
+}
diff --git a/lib/CodeGen/PhysRegTracker.h b/lib/CodeGen/PhysRegTracker.h
new file mode 100644
index 0000000..f5a2402
--- /dev/null
+++ b/lib/CodeGen/PhysRegTracker.h
@@ -0,0 +1,73 @@
+//===-- llvm/CodeGen/PhysRegTracker.h - Physical Register Tracker -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a physical register tracker. The tracker
+// tracks physical register usage through addRegUse and
+// delRegUse. isRegAvail checks if a physical register is available or
+// not taking into consideration register aliases.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PHYSREGTRACKER_H
+#define LLVM_CODEGEN_PHYSREGTRACKER_H
+
+#include "llvm/Target/MRegisterInfo.h"
+
+namespace llvm {
+
+    class PhysRegTracker {
+        const MRegisterInfo* mri_;
+        std::vector<unsigned> regUse_;
+
+    public:
+        PhysRegTracker(const MRegisterInfo& mri)
+            : mri_(&mri),
+              regUse_(mri_->getNumRegs(), 0) {
+        }
+
+        PhysRegTracker(const PhysRegTracker& rhs)
+            : mri_(rhs.mri_),
+              regUse_(rhs.regUse_) {
+        }
+
+        const PhysRegTracker& operator=(const PhysRegTracker& rhs) {
+            mri_ = rhs.mri_;
+            regUse_ = rhs.regUse_;
+            return *this;
+        }
+
+        void addRegUse(unsigned physReg) {
+            assert(MRegisterInfo::isPhysicalRegister(physReg) &&
+                   "should be physical register!");
+            ++regUse_[physReg];
+            for (const unsigned* as = mri_->getAliasSet(physReg); *as; ++as)
+                ++regUse_[*as];
+        }
+
+        void delRegUse(unsigned physReg) {
+            assert(MRegisterInfo::isPhysicalRegister(physReg) &&
+                   "should be physical register!");
+            assert(regUse_[physReg] != 0);
+            --regUse_[physReg];
+            for (const unsigned* as = mri_->getAliasSet(physReg); *as; ++as) {
+                assert(regUse_[*as] != 0);
+                --regUse_[*as];
+            }
+        }
+
+        bool isRegAvail(unsigned physReg) const {
+            assert(MRegisterInfo::isPhysicalRegister(physReg) &&
+                   "should be physical register!");
+            return regUse_[physReg] == 0;
+        }
+    };
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
new file mode 100644
index 0000000..06f47c7
--- /dev/null
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -0,0 +1,81 @@
+//===----- SchedulePostRAList.cpp - list scheduler ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Dale Johannesen and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a top-down list scheduler, using standard algorithms.
+// The basic approach uses a priority queue of available nodes to schedule.
+// One at a time, nodes are taken from the priority queue (thus in priority
+// order), checked for legality to schedule, and emitted if legal.
+//
+// Nodes may not be legal to schedule either due to structural hazards (e.g.
+// pipeline or resource constraints) or because an input to the instruction has
+// not completed execution.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "post-RA-sched"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Support/Debug.h"
+//#include "llvm/ADT/Statistic.h"
+//#include <climits>
+//#include <queue>
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+namespace {
+  bool NoPostRAScheduling;
+
+  // When this works it will be on by default.
+  cl::opt<bool, true>
+  DisablePostRAScheduler("disable-post-RA-scheduler",
+               cl::desc("Disable scheduling after register allocation"),
+               cl::location(NoPostRAScheduling),
+               cl::init(true));
+
+  class VISIBILITY_HIDDEN SchedulePostRATDList : public MachineFunctionPass {
+  public:
+    static char ID;
+    SchedulePostRATDList() : MachineFunctionPass((intptr_t)&ID) {}
+  private:
+    MachineFunction *MF;
+    const TargetMachine *TM;
+  public:
+    const char *getPassName() const {
+      return "Post RA top-down list latency scheduler (STUB)";
+    }
+
+    bool runOnMachineFunction(MachineFunction &Fn);
+  };
+  char SchedulePostRATDList::ID = 0;
+}
+
+bool SchedulePostRATDList::runOnMachineFunction(MachineFunction &Fn) {
+  if (NoPostRAScheduling)
+    return true;
+
+  DOUT << "SchedulePostRATDList\n";
+  MF = &Fn;
+  TM = &MF->getTarget();
+
+  // Loop over all of the basic blocks
+  for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
+       MBB != MBBe; ++MBB)
+    ;
+
+  return true;
+}
+  
+
+//===----------------------------------------------------------------------===//
+//                         Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createPostRAScheduler() {
+  return new SchedulePostRATDList();
+}
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
new file mode 100644
index 0000000..882c6a7
--- /dev/null
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -0,0 +1,549 @@
+//===-- PrologEpilogInserter.cpp - Insert Prolog/Epilog code in function --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is responsible for finalizing the functions frame layout, saving
+// callee saved registers, and for emitting prolog & epilog code for the
+// function.
+//
+// This pass must be run after register allocation.  After this pass is
+// executed, it is illegal to construct MO_FrameIndex operands.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/MRegisterInfo.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/STLExtras.h"
+#include <climits>
+using namespace llvm;
+
+namespace {
+  struct VISIBILITY_HIDDEN PEI : public MachineFunctionPass {
+    static char ID;
+    PEI() : MachineFunctionPass((intptr_t)&ID) {}
+
+    const char *getPassName() const {
+      return "Prolog/Epilog Insertion & Frame Finalization";
+    }
+
+    /// runOnMachineFunction - Insert prolog/epilog code and replace abstract
+    /// frame indexes with appropriate references.
+    ///
+    bool runOnMachineFunction(MachineFunction &Fn) {
+      const MRegisterInfo *MRI = Fn.getTarget().getRegisterInfo();
+      RS = MRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL;
+
+      // Get MachineModuleInfo so that we can track the construction of the
+      // frame.
+      if (MachineModuleInfo *MMI = getAnalysisToUpdate<MachineModuleInfo>()) {
+        Fn.getFrameInfo()->setMachineModuleInfo(MMI);
+      }
+
+      // Allow the target machine to make some adjustments to the function
+      // e.g. UsedPhysRegs before calculateCalleeSavedRegisters.
+      MRI->processFunctionBeforeCalleeSavedScan(Fn, RS);
+
+      // Scan the function for modified callee saved registers and insert spill
+      // code for any callee saved registers that are modified.  Also calculate
+      // the MaxCallFrameSize and HasCalls variables for the function's frame
+      // information and eliminates call frame pseudo instructions.
+      calculateCalleeSavedRegisters(Fn);
+
+      // Add the code to save and restore the callee saved registers
+      saveCalleeSavedRegisters(Fn);
+
+      // Allow the target machine to make final modifications to the function
+      // before the frame layout is finalized.
+      Fn.getTarget().getRegisterInfo()->processFunctionBeforeFrameFinalized(Fn);
+
+      // Calculate actual frame offsets for all of the abstract stack objects...
+      calculateFrameObjectOffsets(Fn);
+
+      // Add prolog and epilog code to the function.  This function is required
+      // to align the stack frame as necessary for any stack variables or
+      // called functions.  Because of this, calculateCalleeSavedRegisters
+      // must be called before this function in order to set the HasCalls
+      // and MaxCallFrameSize variables.
+      insertPrologEpilogCode(Fn);
+
+      // Replace all MO_FrameIndex operands with physical register references
+      // and actual offsets.
+      //
+      replaceFrameIndices(Fn);
+
+      delete RS;
+      return true;
+    }
+  
+  private:
+    RegScavenger *RS;
+
+    // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved
+    // stack frame indexes.
+    unsigned MinCSFrameIndex, MaxCSFrameIndex;
+
+    void calculateCalleeSavedRegisters(MachineFunction &Fn);
+    void saveCalleeSavedRegisters(MachineFunction &Fn);
+    void calculateFrameObjectOffsets(MachineFunction &Fn);
+    void replaceFrameIndices(MachineFunction &Fn);
+    void insertPrologEpilogCode(MachineFunction &Fn);
+  };
+  char PEI::ID = 0;
+}
+
+
+/// createPrologEpilogCodeInserter - This function returns a pass that inserts
+/// prolog and epilog code, and eliminates abstract frame references.
+///
+FunctionPass *llvm::createPrologEpilogCodeInserter() { return new PEI(); }
+
+
+/// calculateCalleeSavedRegisters - Scan the function for modified callee saved
+/// registers.  Also calculate the MaxCallFrameSize and HasCalls variables for
+/// the function's frame information and eliminates call frame pseudo
+/// instructions.
+///
+void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
+  const MRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
+  const TargetFrameInfo *TFI = Fn.getTarget().getFrameInfo();
+
+  // Get the callee saved register list...
+  const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(&Fn);
+
+  // Get the function call frame set-up and tear-down instruction opcode
+  int FrameSetupOpcode   = RegInfo->getCallFrameSetupOpcode();
+  int FrameDestroyOpcode = RegInfo->getCallFrameDestroyOpcode();
+
+  // These are used to keep track the callee-save area. Initialize them.
+  MinCSFrameIndex = INT_MAX;
+  MaxCSFrameIndex = 0;
+
+  // Early exit for targets which have no callee saved registers and no call
+  // frame setup/destroy pseudo instructions.
+  if ((CSRegs == 0 || CSRegs[0] == 0) &&
+      FrameSetupOpcode == -1 && FrameDestroyOpcode == -1)
+    return;
+
+  unsigned MaxCallFrameSize = 0;
+  bool HasCalls = false;
+
+  std::vector<MachineBasicBlock::iterator> FrameSDOps;
+  for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB)
+    for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I)
+      if (I->getOpcode() == FrameSetupOpcode ||
+          I->getOpcode() == FrameDestroyOpcode) {
+        assert(I->getNumOperands() >= 1 && "Call Frame Setup/Destroy Pseudo"
+               " instructions should have a single immediate argument!");
+        unsigned Size = I->getOperand(0).getImmedValue();
+        if (Size > MaxCallFrameSize) MaxCallFrameSize = Size;
+        HasCalls = true;
+        FrameSDOps.push_back(I);
+      }
+
+  MachineFrameInfo *FFI = Fn.getFrameInfo();
+  FFI->setHasCalls(HasCalls);
+  FFI->setMaxCallFrameSize(MaxCallFrameSize);
+
+  for (unsigned i = 0, e = FrameSDOps.size(); i != e; ++i) {
+    MachineBasicBlock::iterator I = FrameSDOps[i];
+    // If call frames are not being included as part of the stack frame,
+    // and there is no dynamic allocation (therefore referencing frame slots
+    // off sp), leave the pseudo ops alone. We'll eliminate them later.
+    if (RegInfo->hasReservedCallFrame(Fn) || RegInfo->hasFP(Fn))
+      RegInfo->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I);
+  }
+
+  // Now figure out which *callee saved* registers are modified by the current
+  // function, thus needing to be saved and restored in the prolog/epilog.
+  //
+  const TargetRegisterClass* const *CSRegClasses =
+    RegInfo->getCalleeSavedRegClasses(&Fn);
+  std::vector<CalleeSavedInfo> CSI;
+  for (unsigned i = 0; CSRegs[i]; ++i) {
+    unsigned Reg = CSRegs[i];
+    if (Fn.isPhysRegUsed(Reg)) {
+        // If the reg is modified, save it!
+      CSI.push_back(CalleeSavedInfo(Reg, CSRegClasses[i]));
+    } else {
+      for (const unsigned *AliasSet = RegInfo->getAliasSet(Reg);
+           *AliasSet; ++AliasSet) {  // Check alias registers too.
+        if (Fn.isPhysRegUsed(*AliasSet)) {
+          CSI.push_back(CalleeSavedInfo(Reg, CSRegClasses[i]));
+          break;
+        }
+      }
+    }
+  }
+
+  if (CSI.empty())
+    return;   // Early exit if no callee saved registers are modified!
+
+  unsigned NumFixedSpillSlots;
+  const std::pair<unsigned,int> *FixedSpillSlots =
+    TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots);
+
+  // Now that we know which registers need to be saved and restored, allocate
+  // stack slots for them.
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    const TargetRegisterClass *RC = CSI[i].getRegClass();
+
+    // Check to see if this physreg must be spilled to a particular stack slot
+    // on this target.
+    const std::pair<unsigned,int> *FixedSlot = FixedSpillSlots;
+    while (FixedSlot != FixedSpillSlots+NumFixedSpillSlots &&
+           FixedSlot->first != Reg)
+      ++FixedSlot;
+
+    int FrameIdx;
+    if (FixedSlot == FixedSpillSlots+NumFixedSpillSlots) {
+      // Nope, just spill it anywhere convenient.
+      unsigned Align = RC->getAlignment();
+      unsigned StackAlign = TFI->getStackAlignment();
+      // We may not be able to sastify the desired alignment specification of
+      // the TargetRegisterClass if the stack alignment is smaller. Use the min.
+      Align = std::min(Align, StackAlign);
+      FrameIdx = FFI->CreateStackObject(RC->getSize(), Align);
+      if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
+      if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
+    } else {
+      // Spill it to the stack where we must.
+      FrameIdx = FFI->CreateFixedObject(RC->getSize(), FixedSlot->second);
+    }
+    CSI[i].setFrameIdx(FrameIdx);
+  }
+
+  FFI->setCalleeSavedInfo(CSI);
+}
+
+/// saveCalleeSavedRegisters -  Insert spill code for any callee saved registers
+/// that are modified in the function.
+///
+void PEI::saveCalleeSavedRegisters(MachineFunction &Fn) {
+  // Get callee saved register information.
+  MachineFrameInfo *FFI = Fn.getFrameInfo();
+  const std::vector<CalleeSavedInfo> &CSI = FFI->getCalleeSavedInfo();
+  
+  // Early exit if no callee saved registers are modified!
+  if (CSI.empty())
+    return;
+
+  const MRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
+
+  // Now that we have a stack slot for each register to be saved, insert spill
+  // code into the entry block.
+  MachineBasicBlock *MBB = Fn.begin();
+  MachineBasicBlock::iterator I = MBB->begin();
+  if (!RegInfo->spillCalleeSavedRegisters(*MBB, I, CSI)) {
+    for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+      // Add the callee-saved register as live-in. It's killed at the spill.
+      MBB->addLiveIn(CSI[i].getReg());
+
+      // Insert the spill to the stack frame.
+      RegInfo->storeRegToStackSlot(*MBB, I, CSI[i].getReg(),
+                                   CSI[i].getFrameIdx(), CSI[i].getRegClass());
+    }
+  }
+
+  // Add code to restore the callee-save registers in each exiting block.
+  const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo();
+  for (MachineFunction::iterator FI = Fn.begin(), E = Fn.end(); FI != E; ++FI)
+    // If last instruction is a return instruction, add an epilogue.
+    if (!FI->empty() && TII.isReturn(FI->back().getOpcode())) {
+      MBB = FI;
+      I = MBB->end(); --I;
+
+      // Skip over all terminator instructions, which are part of the return
+      // sequence.
+      MachineBasicBlock::iterator I2 = I;
+      while (I2 != MBB->begin() && TII.isTerminatorInstr((--I2)->getOpcode()))
+        I = I2;
+
+      bool AtStart = I == MBB->begin();
+      MachineBasicBlock::iterator BeforeI = I;
+      if (!AtStart)
+        --BeforeI;
+      
+      // Restore all registers immediately before the return and any terminators
+      // that preceed it.
+      if (!RegInfo->restoreCalleeSavedRegisters(*MBB, I, CSI)) {
+        for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+          RegInfo->loadRegFromStackSlot(*MBB, I, CSI[i].getReg(),
+                                        CSI[i].getFrameIdx(),
+                                        CSI[i].getRegClass());
+          assert(I != MBB->begin() &&
+                 "loadRegFromStackSlot didn't insert any code!");
+          // Insert in reverse order.  loadRegFromStackSlot can insert multiple
+          // instructions.
+          if (AtStart)
+            I = MBB->begin();
+          else {
+            I = BeforeI;
+            ++I;
+          }
+        }
+      }
+    }
+}
+
+
+/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the
+/// abstract stack objects.
+///
+void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
+  const TargetFrameInfo &TFI = *Fn.getTarget().getFrameInfo();
+
+  bool StackGrowsDown =
+    TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown;
+
+  // Loop over all of the stack objects, assigning sequential addresses...
+  MachineFrameInfo *FFI = Fn.getFrameInfo();
+
+  unsigned MaxAlign = 0;
+
+  // Start at the beginning of the local area.
+  // The Offset is the distance from the stack top in the direction
+  // of stack growth -- so it's always positive.
+  int64_t Offset = TFI.getOffsetOfLocalArea();
+  if (StackGrowsDown)
+    Offset = -Offset;
+  assert(Offset >= 0
+         && "Local area offset should be in direction of stack growth");
+
+  // If there are fixed sized objects that are preallocated in the local area,
+  // non-fixed objects can't be allocated right at the start of local area.
+  // We currently don't support filling in holes in between fixed sized objects,
+  // so we adjust 'Offset' to point to the end of last fixed sized
+  // preallocated object.
+  for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) {
+    int64_t FixedOff;
+    if (StackGrowsDown) {
+      // The maximum distance from the stack pointer is at lower address of
+      // the object -- which is given by offset. For down growing stack
+      // the offset is negative, so we negate the offset to get the distance.
+      FixedOff = -FFI->getObjectOffset(i);
+    } else {
+      // The maximum distance from the start pointer is at the upper
+      // address of the object.
+      FixedOff = FFI->getObjectOffset(i) + FFI->getObjectSize(i);
+    }
+    if (FixedOff > Offset) Offset = FixedOff;
+  }
+
+  // First assign frame offsets to stack objects that are used to spill
+  // callee saved registers.
+  if (StackGrowsDown) {
+    for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) {
+      // If stack grows down, we need to add size of find the lowest
+      // address of the object.
+      Offset += FFI->getObjectSize(i);
+
+      unsigned Align = FFI->getObjectAlignment(i);
+      // If the alignment of this object is greater than that of the stack, then
+      // increase the stack alignment to match.
+      MaxAlign = std::max(MaxAlign, Align);
+      // Adjust to alignment boundary
+      Offset = (Offset+Align-1)/Align*Align;
+
+      FFI->setObjectOffset(i, -Offset);        // Set the computed offset
+    }
+  } else {
+    for (unsigned i = MaxCSFrameIndex; i >= MinCSFrameIndex; --i) {
+      unsigned Align = FFI->getObjectAlignment(i);
+      // If the alignment of this object is greater than that of the stack, then
+      // increase the stack alignment to match.
+      MaxAlign = std::max(MaxAlign, Align);
+      // Adjust to alignment boundary
+      Offset = (Offset+Align-1)/Align*Align;
+
+      FFI->setObjectOffset(i, Offset);
+      Offset += FFI->getObjectSize(i);
+    }
+  }
+
+  // Make sure the special register scavenging spill slot is closest to the
+  // frame pointer if a frame pointer is required.
+  const MRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
+  if (RS && RegInfo->hasFP(Fn)) {
+    int SFI = RS->getScavengingFrameIndex();
+    if (SFI >= 0) {
+      // If stack grows down, we need to add size of the lowest
+      // address of the object.
+      if (StackGrowsDown)
+        Offset += FFI->getObjectSize(SFI);
+
+      unsigned Align = FFI->getObjectAlignment(SFI);
+      // Adjust to alignment boundary
+      Offset = (Offset+Align-1)/Align*Align;
+
+      if (StackGrowsDown) {
+        FFI->setObjectOffset(SFI, -Offset);        // Set the computed offset
+      } else {
+        FFI->setObjectOffset(SFI, Offset);
+        Offset += FFI->getObjectSize(SFI);
+      }
+    }
+  }
+
+  // Then assign frame offsets to stack objects that are not used to spill
+  // callee saved registers.
+  for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) {
+    if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
+      continue;
+    if (RS && (int)i == RS->getScavengingFrameIndex())
+      continue;
+
+    // If stack grows down, we need to add size of find the lowest
+    // address of the object.
+    if (StackGrowsDown)
+      Offset += FFI->getObjectSize(i);
+
+    unsigned Align = FFI->getObjectAlignment(i);
+    // If the alignment of this object is greater than that of the stack, then
+    // increase the stack alignment to match.
+    MaxAlign = std::max(MaxAlign, Align);
+    // Adjust to alignment boundary
+    Offset = (Offset+Align-1)/Align*Align;
+
+    if (StackGrowsDown) {
+      FFI->setObjectOffset(i, -Offset);        // Set the computed offset
+    } else {
+      FFI->setObjectOffset(i, Offset);
+      Offset += FFI->getObjectSize(i);
+    }
+  }
+
+  // Make sure the special register scavenging spill slot is closest to the
+  // stack pointer.
+  if (RS && !RegInfo->hasFP(Fn)) {
+    int SFI = RS->getScavengingFrameIndex();
+    if (SFI >= 0) {
+      // If stack grows down, we need to add size of find the lowest
+      // address of the object.
+      if (StackGrowsDown)
+        Offset += FFI->getObjectSize(SFI);
+
+      unsigned Align = FFI->getObjectAlignment(SFI);
+      // Adjust to alignment boundary
+      Offset = (Offset+Align-1)/Align*Align;
+
+      if (StackGrowsDown) {
+        FFI->setObjectOffset(SFI, -Offset);        // Set the computed offset
+      } else {
+        FFI->setObjectOffset(SFI, Offset);
+        Offset += FFI->getObjectSize(SFI);
+      }
+    }
+  }
+
+  // Round up the size to a multiple of the alignment, but only if there are
+  // calls or alloca's in the function.  This ensures that any calls to
+  // subroutines have their stack frames suitable aligned.
+  if (!RegInfo->targetHandlesStackFrameRounding() &&
+      (FFI->hasCalls() || FFI->hasVarSizedObjects())) {
+    // If we have reserved argument space for call sites in the function
+    // immediately on entry to the current function, count it as part of the
+    // overall stack size.
+    if (RegInfo->hasReservedCallFrame(Fn))
+      Offset += FFI->getMaxCallFrameSize();
+
+    unsigned AlignMask = TFI.getStackAlignment() - 1;
+    Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
+  }
+
+  // Update frame info to pretend that this is part of the stack...
+  FFI->setStackSize(Offset+TFI.getOffsetOfLocalArea());
+
+  // Remember the required stack alignment in case targets need it to perform
+  // dynamic stack alignment.
+  assert(FFI->getMaxAlignment() == MaxAlign &&
+         "Stack alignment calculation broken!");
+}
+
+
+/// insertPrologEpilogCode - Scan the function for modified callee saved
+/// registers, insert spill code for these callee saved registers, then add
+/// prolog and epilog code to the function.
+///
+void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
+  // Add prologue to the function...
+  Fn.getTarget().getRegisterInfo()->emitPrologue(Fn);
+
+  // Add epilogue to restore the callee-save registers in each exiting block
+  const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo();
+  for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
+    // If last instruction is a return instruction, add an epilogue
+    if (!I->empty() && TII.isReturn(I->back().getOpcode()))
+      Fn.getTarget().getRegisterInfo()->emitEpilogue(Fn, *I);
+  }
+}
+
+
+/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical
+/// register references and actual offsets.
+///
+void PEI::replaceFrameIndices(MachineFunction &Fn) {
+  if (!Fn.getFrameInfo()->hasStackObjects()) return; // Nothing to do?
+
+  const TargetMachine &TM = Fn.getTarget();
+  assert(TM.getRegisterInfo() && "TM::getRegisterInfo() must be implemented!");
+  const MRegisterInfo &MRI = *TM.getRegisterInfo();
+  const TargetFrameInfo *TFI = TM.getFrameInfo();
+  bool StackGrowsDown =
+    TFI->getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown;
+  int FrameSetupOpcode   = MRI.getCallFrameSetupOpcode();
+  int FrameDestroyOpcode = MRI.getCallFrameDestroyOpcode();
+
+  for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
+    int SPAdj = 0;  // SP offset due to call frame setup / destroy.
+    if (RS) RS->enterBasicBlock(BB);
+    for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
+      MachineInstr *MI = I;
+
+      // Remember how much SP has been adjustment to create the call frame.
+      if (I->getOpcode() == FrameSetupOpcode ||
+          I->getOpcode() == FrameDestroyOpcode) {
+        int Size = I->getOperand(0).getImmedValue();
+        if ((!StackGrowsDown && I->getOpcode() == FrameSetupOpcode) ||
+            (StackGrowsDown && I->getOpcode() == FrameDestroyOpcode))
+          Size = -Size;
+        SPAdj += Size;
+        MachineBasicBlock::iterator PrevI = prior(I);
+        MRI.eliminateCallFramePseudoInstr(Fn, *BB, I);
+        // Visit the instructions created by eliminateCallFramePseudoInstr().
+        I = next(PrevI);
+        MI = NULL;
+      } else {
+        I++;
+        for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i)
+          if (MI->getOperand(i).isFrameIndex()) {
+            // If this instruction has a FrameIndex operand, we need to use that
+            // target machine register info object to eliminate it.
+            MRI.eliminateFrameIndex(MI, SPAdj, RS);
+
+            // Revisit the instruction in full.  Some instructions (e.g. inline
+            // asm instructions) can have multiple frame indices.
+            --I;
+            MI = 0;
+            break;
+          }
+      }
+      // Update register states.
+      if (RS && MI) RS->forward(MI);
+    }
+    assert(SPAdj == 0 && "Unbalanced call frame setup / destroy pairs?");
+  }
+}
diff --git a/lib/CodeGen/README.txt b/lib/CodeGen/README.txt
new file mode 100644
index 0000000..4fff15c
--- /dev/null
+++ b/lib/CodeGen/README.txt
@@ -0,0 +1,145 @@
+//===---------------------------------------------------------------------===//
+
+Common register allocation / spilling problem:
+
+        mul lr, r4, lr
+        str lr, [sp, #+52]
+        ldr lr, [r1, #+32]
+        sxth r3, r3
+        ldr r4, [sp, #+52]
+        mla r4, r3, lr, r4
+
+can be:
+
+        mul lr, r4, lr
+        mov r4, lr
+        str lr, [sp, #+52]
+        ldr lr, [r1, #+32]
+        sxth r3, r3
+        mla r4, r3, lr, r4
+
+and then "merge" mul and mov:
+
+        mul r4, r4, lr
+        str lr, [sp, #+52]
+        ldr lr, [r1, #+32]
+        sxth r3, r3
+        mla r4, r3, lr, r4
+
+It also increase the likelyhood the store may become dead.
+
+//===---------------------------------------------------------------------===//
+
+I think we should have a "hasSideEffects" flag (which is automatically set for
+stuff that "isLoad" "isCall" etc), and the remat pass should eventually be able
+to remat any instruction that has no side effects, if it can handle it and if
+profitable.
+
+For now, I'd suggest having the remat stuff work like this:
+
+1. I need to spill/reload this thing.
+2. Check to see if it has side effects.
+3. Check to see if it is simple enough: e.g. it only has one register
+destination and no register input.
+4. If so, clone the instruction, do the xform, etc.
+
+Advantages of this are:
+
+1. the .td file describes the behavior of the instructions, not the way the
+   algorithm should work.
+2. as remat gets smarter in the future, we shouldn't have to be changing the .td
+   files.
+3. it is easier to explain what the flag means in the .td file, because you
+   don't have to pull in the explanation of how the current remat algo works.
+
+Some potential added complexities:
+
+1. Some instructions have to be glued to it's predecessor or successor. All of
+   the PC relative instructions and condition code setting instruction. We could
+   mark them as hasSideEffects, but that's not quite right. PC relative loads
+   from constantpools can be remat'ed, for example. But it requires more than
+   just cloning the instruction. Some instructions can be remat'ed but it
+   expands to more than one instruction. But allocator will have to make a
+   decision.
+
+4. As stated in 3, not as simple as cloning in some cases. The target will have
+   to decide how to remat it. For example, an ARM 2-piece constant generation
+   instruction is remat'ed as a load from constantpool.
+
+//===---------------------------------------------------------------------===//
+
+bb27 ...
+        ...
+        %reg1037 = ADDri %reg1039, 1
+        %reg1038 = ADDrs %reg1032, %reg1039, %NOREG, 10
+    Successors according to CFG: 0x8b03bf0 (#5)
+
+bb76 (0x8b03bf0, LLVM BB @0x8b032d0, ID#5):
+    Predecessors according to CFG: 0x8b0c5f0 (#3) 0x8b0a7c0 (#4)
+        %reg1039 = PHI %reg1070, mbb<bb76.outer,0x8b0c5f0>, %reg1037, mbb<bb27,0x8b0a7c0>
+
+Note ADDri is not a two-address instruction. However, its result %reg1037 is an
+operand of the PHI node in bb76 and its operand %reg1039 is the result of the
+PHI node. We should treat it as a two-address code and make sure the ADDri is
+scheduled after any node that reads %reg1039.
+
+//===---------------------------------------------------------------------===//
+
+Use local info (i.e. register scavenger) to assign it a free register to allow
+reuse:
+	ldr r3, [sp, #+4]
+	add r3, r3, #3
+	ldr r2, [sp, #+8]
+	add r2, r2, #2
+	ldr r1, [sp, #+4]  <==
+	add r1, r1, #1
+	ldr r0, [sp, #+4]
+	add r0, r0, #2
+
+//===---------------------------------------------------------------------===//
+
+LLVM aggressively lift CSE out of loop. Sometimes this can be negative side-
+effects:
+
+R1 = X + 4
+R2 = X + 7
+R3 = X + 15
+
+loop:
+load [i + R1]
+...
+load [i + R2]
+...
+load [i + R3]
+
+Suppose there is high register pressure, R1, R2, R3, can be spilled. We need
+to implement proper re-materialization to handle this:
+
+R1 = X + 4
+R2 = X + 7
+R3 = X + 15
+
+loop:
+R1 = X + 4  @ re-materialized
+load [i + R1]
+...
+R2 = X + 7 @ re-materialized
+load [i + R2]
+...
+R3 = X + 15 @ re-materialized
+load [i + R3]
+
+Furthermore, with re-association, we can enable sharing:
+
+R1 = X + 4
+R2 = X + 7
+R3 = X + 15
+
+loop:
+T = i + X
+load [T + 4]
+...
+load [T + 7]
+...
+load [T + 15]
+//===---------------------------------------------------------------------===//
diff --git a/lib/CodeGen/RegAllocBigBlock.cpp b/lib/CodeGen/RegAllocBigBlock.cpp
new file mode 100644
index 0000000..c7f23f5
--- /dev/null
+++ b/lib/CodeGen/RegAllocBigBlock.cpp
@@ -0,0 +1,893 @@
+//===- RegAllocBigBlock.cpp - A register allocator for large basic blocks -===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Duraid Madina and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the RABigBlock class
+//
+//===----------------------------------------------------------------------===//
+
+// This register allocator is derived from RegAllocLocal.cpp. Like it, this
+// allocator works on one basic block at a time, oblivious to others.
+// However, the algorithm used here is suited for long blocks of
+// instructions - registers are spilled by greedily choosing those holding
+// values that will not be needed for the longest amount of time. This works
+// particularly well for blocks with 10 or more times as many instructions
+// as machine registers, but can be used for general code.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: - automagically invoke linearscan for (groups of) small BBs?
+//       - break ties when picking regs? (probably not worth it in a
+//         JIT context)
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "llvm/BasicBlock.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/SSARegMap.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumStores, "Number of stores added");
+STATISTIC(NumLoads , "Number of loads added");
+STATISTIC(NumFolded, "Number of loads/stores folded into instructions");
+
+namespace {
+  static RegisterRegAlloc
+    bigBlockRegAlloc("bigblock", "  Big-block register allocator",
+                  createBigBlockRegisterAllocator);
+
+/// VRegKeyInfo - Defines magic values required to use VirtRegs as DenseMap
+/// keys.
+  struct VRegKeyInfo {
+    static inline unsigned getEmptyKey() { return -1U; }
+    static inline unsigned getTombstoneKey() { return -2U; }
+    static unsigned getHashValue(const unsigned &Key) { return Key; }
+  };
+
+
+/// This register allocator is derived from RegAllocLocal.cpp. Like it, this
+/// allocator works on one basic block at a time, oblivious to others.
+/// However, the algorithm used here is suited for long blocks of
+/// instructions - registers are spilled by greedily choosing those holding
+/// values that will not be needed for the longest amount of time. This works
+/// particularly well for blocks with 10 or more times as many instructions
+/// as machine registers, but can be used for general code.
+///
+/// TODO: - automagically invoke linearscan for (groups of) small BBs?
+///       - break ties when picking regs? (probably not worth it in a
+///         JIT context)
+///
+  class VISIBILITY_HIDDEN RABigBlock : public MachineFunctionPass {
+  public:
+    static char ID;
+    RABigBlock() : MachineFunctionPass((intptr_t)&ID) {}
+  private:
+    /// TM - For getting at TargetMachine info 
+    ///
+    const TargetMachine *TM;
+    
+    /// MF - Our generic MachineFunction pointer
+    ///
+    MachineFunction *MF;
+    
+    /// RegInfo - For dealing with machine register info (aliases, folds
+    /// etc)
+    const MRegisterInfo *RegInfo;
+
+    /// LV - Our generic LiveVariables pointer
+    ///
+    LiveVariables *LV;
+
+    typedef SmallVector<unsigned, 2> VRegTimes;
+
+    /// VRegReadTable - maps VRegs in a BB to the set of times they are read
+    ///
+    DenseMap<unsigned, VRegTimes*, VRegKeyInfo> VRegReadTable;
+
+    /// VRegReadIdx - keeps track of the "current time" in terms of
+    /// positions in VRegReadTable
+    DenseMap<unsigned, unsigned , VRegKeyInfo> VRegReadIdx;
+
+    /// StackSlotForVirtReg - Maps virtual regs to the frame index where these
+    /// values are spilled.
+    IndexedMap<unsigned, VirtReg2IndexFunctor> StackSlotForVirtReg;
+
+    /// Virt2PhysRegMap - This map contains entries for each virtual register
+    /// that is currently available in a physical register.
+    IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2PhysRegMap;
+
+    /// PhysRegsUsed - This array is effectively a map, containing entries for
+    /// each physical register that currently has a value (ie, it is in
+    /// Virt2PhysRegMap).  The value mapped to is the virtual register
+    /// corresponding to the physical register (the inverse of the
+    /// Virt2PhysRegMap), or 0.  The value is set to 0 if this register is pinned
+    /// because it is used by a future instruction, and to -2 if it is not
+    /// allocatable.  If the entry for a physical register is -1, then the
+    /// physical register is "not in the map".
+    ///
+    std::vector<int> PhysRegsUsed;
+
+    /// VirtRegModified - This bitset contains information about which virtual
+    /// registers need to be spilled back to memory when their registers are
+    /// scavenged.  If a virtual register has simply been rematerialized, there
+    /// is no reason to spill it to memory when we need the register back.
+    ///
+    std::vector<int> VirtRegModified;
+
+    /// MBBLastInsnTime - the number of the the last instruction in MBB
+    ///
+    int MBBLastInsnTime;
+
+    /// MBBCurTime - the number of the the instruction being currently processed
+    ///
+    int MBBCurTime;
+
+    unsigned &getVirt2PhysRegMapSlot(unsigned VirtReg) {
+      return Virt2PhysRegMap[VirtReg];
+    }
+
+    unsigned &getVirt2StackSlot(unsigned VirtReg) {
+      return StackSlotForVirtReg[VirtReg];
+    }
+
+    /// markVirtRegModified - Lets us flip bits in the VirtRegModified bitset
+    ///
+    void markVirtRegModified(unsigned Reg, bool Val = true) {
+      assert(MRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!");
+      Reg -= MRegisterInfo::FirstVirtualRegister;
+      if (VirtRegModified.size() <= Reg)
+        VirtRegModified.resize(Reg+1);
+      VirtRegModified[Reg] = Val;
+    }
+    
+    /// isVirtRegModified - Lets us query the VirtRegModified bitset
+    ///
+    bool isVirtRegModified(unsigned Reg) const {
+      assert(MRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!");
+      assert(Reg - MRegisterInfo::FirstVirtualRegister < VirtRegModified.size()
+             && "Illegal virtual register!");
+      return VirtRegModified[Reg - MRegisterInfo::FirstVirtualRegister];
+    }
+
+  public:
+    /// getPassName - returns the BigBlock allocator's name
+    ///
+    virtual const char *getPassName() const {
+      return "BigBlock Register Allocator";
+    }
+
+    /// getAnalaysisUsage - declares the required analyses
+    ///
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<LiveVariables>();
+      AU.addRequiredID(PHIEliminationID);
+      AU.addRequiredID(TwoAddressInstructionPassID);
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+  private:
+    /// runOnMachineFunction - Register allocate the whole function
+    ///
+    bool runOnMachineFunction(MachineFunction &Fn);
+
+    /// AllocateBasicBlock - Register allocate the specified basic block.
+    ///
+    void AllocateBasicBlock(MachineBasicBlock &MBB);
+
+    /// FillVRegReadTable - Fill out the table of vreg read times given a BB
+    ///
+    void FillVRegReadTable(MachineBasicBlock &MBB);
+    
+    /// areRegsEqual - This method returns true if the specified registers are
+    /// related to each other.  To do this, it checks to see if they are equal
+    /// or if the first register is in the alias set of the second register.
+    ///
+    bool areRegsEqual(unsigned R1, unsigned R2) const {
+      if (R1 == R2) return true;
+      for (const unsigned *AliasSet = RegInfo->getAliasSet(R2);
+           *AliasSet; ++AliasSet) {
+        if (*AliasSet == R1) return true;
+      }
+      return false;
+    }
+
+    /// getStackSpaceFor - This returns the frame index of the specified virtual
+    /// register on the stack, allocating space if necessary.
+    int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC);
+
+    /// removePhysReg - This method marks the specified physical register as no
+    /// longer being in use.
+    ///
+    void removePhysReg(unsigned PhysReg);
+
+    /// spillVirtReg - This method spills the value specified by PhysReg into
+    /// the virtual register slot specified by VirtReg.  It then updates the RA
+    /// data structures to indicate the fact that PhysReg is now available.
+    ///
+    void spillVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+                      unsigned VirtReg, unsigned PhysReg);
+
+    /// spillPhysReg - This method spills the specified physical register into
+    /// the virtual register slot associated with it.  If OnlyVirtRegs is set to
+    /// true, then the request is ignored if the physical register does not
+    /// contain a virtual register.
+    ///
+    void spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I,
+                      unsigned PhysReg, bool OnlyVirtRegs = false);
+
+    /// assignVirtToPhysReg - This method updates local state so that we know
+    /// that PhysReg is the proper container for VirtReg now.  The physical
+    /// register must not be used for anything else when this is called.
+    ///
+    void assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg);
+
+    /// isPhysRegAvailable - Return true if the specified physical register is
+    /// free and available for use.  This also includes checking to see if
+    /// aliased registers are all free...
+    ///
+    bool isPhysRegAvailable(unsigned PhysReg) const;
+
+    /// getFreeReg - Look to see if there is a free register available in the
+    /// specified register class.  If not, return 0.
+    ///
+    unsigned getFreeReg(const TargetRegisterClass *RC);
+
+    /// chooseReg - Pick a physical register to hold the specified
+    /// virtual register by choosing the one which will be read furthest
+    /// in the future.
+    ///
+    unsigned chooseReg(MachineBasicBlock &MBB, MachineInstr *MI,
+                    unsigned VirtReg);
+
+    /// reloadVirtReg - This method transforms the specified specified virtual
+    /// register use to refer to a physical register.  This method may do this
+    /// in one of several ways: if the register is available in a physical
+    /// register already, it uses that physical register.  If the value is not
+    /// in a physical register, and if there are physical registers available,
+    /// it loads it into a register.  If register pressure is high, and it is
+    /// possible, it tries to fold the load of the virtual register into the
+    /// instruction itself.  It avoids doing this if register pressure is low to
+    /// improve the chance that subsequent instructions can use the reloaded
+    /// value.  This method returns the modified instruction.
+    ///
+    MachineInstr *reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
+                                unsigned OpNum);
+
+  };
+  char RABigBlock::ID = 0;
+}
+
+/// getStackSpaceFor - This allocates space for the specified virtual register
+/// to be held on the stack.
+int RABigBlock::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) {
+  // Find the location Reg would belong...
+  int FrameIdx = getVirt2StackSlot(VirtReg);
+
+  if (FrameIdx)
+    return FrameIdx - 1;          // Already has space allocated?
+
+  // Allocate a new stack object for this spill location...
+  FrameIdx = MF->getFrameInfo()->CreateStackObject(RC->getSize(),
+                                                       RC->getAlignment());
+
+  // Assign the slot...
+  getVirt2StackSlot(VirtReg) = FrameIdx + 1;
+  return FrameIdx;
+}
+
+
+/// removePhysReg - This method marks the specified physical register as no
+/// longer being in use.
+///
+void RABigBlock::removePhysReg(unsigned PhysReg) {
+  PhysRegsUsed[PhysReg] = -1;      // PhyReg no longer used
+}
+
+
+/// spillVirtReg - This method spills the value specified by PhysReg into the
+/// virtual register slot specified by VirtReg.  It then updates the RA data
+/// structures to indicate the fact that PhysReg is now available.
+///
+void RABigBlock::spillVirtReg(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator I,
+                           unsigned VirtReg, unsigned PhysReg) {
+  assert(VirtReg && "Spilling a physical register is illegal!"
+         " Must not have appropriate kill for the register or use exists beyond"
+         " the intended one.");
+  DOUT << "  Spilling register " << RegInfo->getName(PhysReg)
+       << " containing %reg" << VirtReg;
+  if (!isVirtRegModified(VirtReg))
+    DOUT << " which has not been modified, so no store necessary!";
+
+  // Otherwise, there is a virtual register corresponding to this physical
+  // register.  We only need to spill it into its stack slot if it has been
+  // modified.
+  if (isVirtRegModified(VirtReg)) {
+    const TargetRegisterClass *RC = MF->getSSARegMap()->getRegClass(VirtReg);
+    int FrameIndex = getStackSpaceFor(VirtReg, RC);
+    DOUT << " to stack slot #" << FrameIndex;
+    RegInfo->storeRegToStackSlot(MBB, I, PhysReg, FrameIndex, RC);
+    ++NumStores;   // Update statistics
+  }
+
+  getVirt2PhysRegMapSlot(VirtReg) = 0;   // VirtReg no longer available
+
+  DOUT << "\n";
+  removePhysReg(PhysReg);
+}
+
+
+/// spillPhysReg - This method spills the specified physical register into the
+/// virtual register slot associated with it.  If OnlyVirtRegs is set to true,
+/// then the request is ignored if the physical register does not contain a
+/// virtual register.
+///
+void RABigBlock::spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I,
+                           unsigned PhysReg, bool OnlyVirtRegs) {
+  if (PhysRegsUsed[PhysReg] != -1) {            // Only spill it if it's used!
+    assert(PhysRegsUsed[PhysReg] != -2 && "Non allocable reg used!");
+    if (PhysRegsUsed[PhysReg] || !OnlyVirtRegs)
+      spillVirtReg(MBB, I, PhysRegsUsed[PhysReg], PhysReg);
+  } else {
+    // If the selected register aliases any other registers, we must make
+    // sure that one of the aliases isn't alive.
+    for (const unsigned *AliasSet = RegInfo->getAliasSet(PhysReg);
+         *AliasSet; ++AliasSet)
+      if (PhysRegsUsed[*AliasSet] != -1 &&     // Spill aliased register.
+          PhysRegsUsed[*AliasSet] != -2)       // If allocatable.
+        if (PhysRegsUsed[*AliasSet])
+          spillVirtReg(MBB, I, PhysRegsUsed[*AliasSet], *AliasSet);
+  }
+}
+
+
+/// assignVirtToPhysReg - This method updates local state so that we know
+/// that PhysReg is the proper container for VirtReg now.  The physical
+/// register must not be used for anything else when this is called.
+///
+void RABigBlock::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) {
+  assert(PhysRegsUsed[PhysReg] == -1 && "Phys reg already assigned!");
+  // Update information to note the fact that this register was just used, and
+  // it holds VirtReg.
+  PhysRegsUsed[PhysReg] = VirtReg;
+  getVirt2PhysRegMapSlot(VirtReg) = PhysReg;
+}
+
+
+/// isPhysRegAvailable - Return true if the specified physical register is free
+/// and available for use.  This also includes checking to see if aliased
+/// registers are all free...
+///
+bool RABigBlock::isPhysRegAvailable(unsigned PhysReg) const {
+  if (PhysRegsUsed[PhysReg] != -1) return false;
+
+  // If the selected register aliases any other allocated registers, it is
+  // not free!
+  for (const unsigned *AliasSet = RegInfo->getAliasSet(PhysReg);
+       *AliasSet; ++AliasSet)
+    if (PhysRegsUsed[*AliasSet] != -1) // Aliased register in use?
+      return false;                    // Can't use this reg then.
+  return true;
+}
+
+  
+/// getFreeReg - Look to see if there is a free register available in the
+/// specified register class.  If not, return 0.
+///
+unsigned RABigBlock::getFreeReg(const TargetRegisterClass *RC) {
+  // Get iterators defining the range of registers that are valid to allocate in
+  // this class, which also specifies the preferred allocation order.
+  TargetRegisterClass::iterator RI = RC->allocation_order_begin(*MF);
+  TargetRegisterClass::iterator RE = RC->allocation_order_end(*MF);
+
+  for (; RI != RE; ++RI)
+    if (isPhysRegAvailable(*RI)) {       // Is reg unused?
+      assert(*RI != 0 && "Cannot use register!");
+      return *RI; // Found an unused register!
+    }
+  return 0;
+}
+
+
+/// chooseReg - Pick a physical register to hold the specified
+/// virtual register by choosing the one whose value will be read
+/// furthest in the future.
+///
+unsigned RABigBlock::chooseReg(MachineBasicBlock &MBB, MachineInstr *I,
+                         unsigned VirtReg) {
+  const TargetRegisterClass *RC = MF->getSSARegMap()->getRegClass(VirtReg);
+  // First check to see if we have a free register of the requested type...
+  unsigned PhysReg = getFreeReg(RC);
+
+  // If we didn't find an unused register, find the one which will be
+  // read at the most distant point in time.
+  if (PhysReg == 0) {
+    unsigned delay=0, longest_delay=0;
+    VRegTimes* ReadTimes;
+
+    unsigned curTime = MBBCurTime;
+
+    // for all physical regs in the RC,
+    for(TargetRegisterClass::iterator pReg = RC->begin(); 
+                                      pReg != RC->end();  ++pReg) {
+      // how long until they're read?
+      if(PhysRegsUsed[*pReg]>0) { // ignore non-allocatable regs
+        ReadTimes = VRegReadTable[PhysRegsUsed[*pReg]];
+        if(ReadTimes && !ReadTimes->empty()) {
+            unsigned& pt = VRegReadIdx[PhysRegsUsed[*pReg]];
+            while(pt < ReadTimes->size() && (*ReadTimes)[pt] < curTime) {
+                ++pt;
+            }
+
+            if(pt < ReadTimes->size())
+                delay = (*ReadTimes)[pt] - curTime;
+            else
+                delay = MBBLastInsnTime + 1 - curTime;
+        } else {
+            // This register is only defined, but never
+            // read in this MBB. Therefore the next read
+            // happens after the end of this MBB
+            delay = MBBLastInsnTime + 1 - curTime;
+        }
+
+        
+        if(delay > longest_delay) {
+          longest_delay = delay;
+          PhysReg = *pReg;
+        }
+      }
+    }
+
+    if(PhysReg == 0) { // ok, now we're desperate. We couldn't choose
+                       // a register to spill by looking through the
+                       // read timetable, so now we just spill the
+                       // first allocatable register we find.
+                       
+      // for all physical regs in the RC,
+      for(TargetRegisterClass::iterator pReg = RC->begin(); 
+                                        pReg != RC->end();  ++pReg) {
+        // if we find a register we can spill
+        if(PhysRegsUsed[*pReg]>=-1)
+          PhysReg = *pReg; // choose it to be spilled
+      }
+    }
+    
+    assert(PhysReg && "couldn't choose a register to spill :( ");
+    // TODO: assert that RC->contains(PhysReg) / handle aliased registers?
+
+    // since we needed to look in the table we need to spill this register.
+    spillPhysReg(MBB, I, PhysReg);
+  }
+
+  // assign the vreg to our chosen physical register
+  assignVirtToPhysReg(VirtReg, PhysReg);
+  return PhysReg; // and return it
+}
+
+
+/// reloadVirtReg - This method transforms an instruction with a virtual
+/// register use to one that references a physical register. It does this as
+/// follows:
+///
+///   1) If the register is already in a physical register, it uses it.
+///   2) Otherwise, if there is a free physical register, it uses that.
+///   3) Otherwise, it calls chooseReg() to get the physical register
+///      holding the most distantly needed value, generating a spill in
+///      the process.
+///
+/// This method returns the modified instruction.
+MachineInstr *RABigBlock::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
+                                     unsigned OpNum) {
+  unsigned VirtReg = MI->getOperand(OpNum).getReg();
+
+  // If the virtual register is already available in a physical register,
+  // just update the instruction and return.
+  if (unsigned PR = getVirt2PhysRegMapSlot(VirtReg)) {
+    MI->getOperand(OpNum).setReg(PR);
+    return MI;
+  }
+
+  // Otherwise, if we have free physical registers available to hold the
+  // value, use them.
+  const TargetRegisterClass *RC = MF->getSSARegMap()->getRegClass(VirtReg);
+  unsigned PhysReg = getFreeReg(RC);
+  int FrameIndex = getStackSpaceFor(VirtReg, RC);
+
+  if (PhysReg) {   // we have a free register, so use it.
+    assignVirtToPhysReg(VirtReg, PhysReg);
+  } else {  // no free registers available.
+    // try to fold the spill into the instruction
+    if(MachineInstr* FMI = RegInfo->foldMemoryOperand(MI, OpNum, FrameIndex)) {
+      ++NumFolded;
+      // Since we changed the address of MI, make sure to update live variables
+      // to know that the new instruction has the properties of the old one.
+      LV->instructionChanged(MI, FMI);
+      return MBB.insert(MBB.erase(MI), FMI);
+    }
+    
+    // determine which of the physical registers we'll kill off, since we
+    // couldn't fold.
+    PhysReg = chooseReg(MBB, MI, VirtReg);
+  }
+
+  // this virtual register is now unmodified (since we just reloaded it)
+  markVirtRegModified(VirtReg, false);
+
+  DOUT << "  Reloading %reg" << VirtReg << " into "
+       << RegInfo->getName(PhysReg) << "\n";
+
+  // Add move instruction(s)
+  RegInfo->loadRegFromStackSlot(MBB, MI, PhysReg, FrameIndex, RC);
+  ++NumLoads;    // Update statistics
+
+  MF->setPhysRegUsed(PhysReg);
+  MI->getOperand(OpNum).setReg(PhysReg);  // Assign the input register
+  return MI;
+}
+
+/// Fill out the vreg read timetable. Since ReadTime increases
+/// monotonically, the individual readtime sets will be sorted
+/// in ascending order.
+void RABigBlock::FillVRegReadTable(MachineBasicBlock &MBB) {
+  // loop over each instruction
+  MachineBasicBlock::iterator MII;
+  unsigned ReadTime;
+  
+  for(ReadTime=0, MII = MBB.begin(); MII != MBB.end(); ++ReadTime, ++MII) {
+    MachineInstr *MI = MII;
+    
+    for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
+      MachineOperand& MO = MI->getOperand(i);
+      // look for vreg reads..
+      if (MO.isRegister() && !MO.isDef() && MO.getReg() &&
+          MRegisterInfo::isVirtualRegister(MO.getReg())) {
+          // ..and add them to the read table.
+          VRegTimes* &Times = VRegReadTable[MO.getReg()];
+          if(!VRegReadTable[MO.getReg()]) {
+              Times = new VRegTimes;
+              VRegReadIdx[MO.getReg()] = 0;
+          }
+        Times->push_back(ReadTime);
+      }
+    }
+
+  }  
+
+  MBBLastInsnTime = ReadTime;
+
+  for(DenseMap<unsigned, VRegTimes*, VRegKeyInfo>::iterator Reads = VRegReadTable.begin();
+      Reads != VRegReadTable.end(); ++Reads) {
+      if(Reads->second) {
+          DOUT << "Reads[" << Reads->first << "]=" << Reads->second->size() << "\n";
+      }
+  }
+}
+
+/// isReadModWriteImplicitKill - True if this is an implicit kill for a
+/// read/mod/write register, i.e. update partial register.
+static bool isReadModWriteImplicitKill(MachineInstr *MI, unsigned Reg) {
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand& MO = MI->getOperand(i);
+    if (MO.isRegister() && MO.getReg() == Reg && MO.isImplicit() &&
+        MO.isDef() && !MO.isDead())
+      return true;
+  }
+  return false;
+}
+
+/// isReadModWriteImplicitDef - True if this is an implicit def for a
+/// read/mod/write register, i.e. update partial register.
+static bool isReadModWriteImplicitDef(MachineInstr *MI, unsigned Reg) {
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand& MO = MI->getOperand(i);
+    if (MO.isRegister() && MO.getReg() == Reg && MO.isImplicit() &&
+        !MO.isDef() && MO.isKill())
+      return true;
+  }
+  return false;
+}
+
+
+void RABigBlock::AllocateBasicBlock(MachineBasicBlock &MBB) {
+  // loop over each instruction
+  MachineBasicBlock::iterator MII = MBB.begin();
+  const TargetInstrInfo &TII = *TM->getInstrInfo();
+  
+  DEBUG(const BasicBlock *LBB = MBB.getBasicBlock();
+        if (LBB) DOUT << "\nStarting RegAlloc of BB: " << LBB->getName());
+
+  // If this is the first basic block in the machine function, add live-in
+  // registers as active.
+  if (&MBB == &*MF->begin()) {
+    for (MachineFunction::livein_iterator I = MF->livein_begin(),
+         E = MF->livein_end(); I != E; ++I) {
+      unsigned Reg = I->first;
+      MF->setPhysRegUsed(Reg);
+      PhysRegsUsed[Reg] = 0;            // It is free and reserved now
+      for (const unsigned *AliasSet = RegInfo->getSubRegisters(Reg);
+           *AliasSet; ++AliasSet) {
+        if (PhysRegsUsed[*AliasSet] != -2) {
+          PhysRegsUsed[*AliasSet] = 0;  // It is free and reserved now
+          MF->setPhysRegUsed(*AliasSet);
+        }
+      }
+    }    
+  }
+  
+  // Otherwise, sequentially allocate each instruction in the MBB.
+  MBBCurTime = -1;
+  while (MII != MBB.end()) {
+    MachineInstr *MI = MII++;
+    MBBCurTime++;
+    const TargetInstrDescriptor &TID = TII.get(MI->getOpcode());
+    DEBUG(DOUT << "\nTime=" << MBBCurTime << " Starting RegAlloc of: " << *MI;
+          DOUT << "  Regs have values: ";
+          for (unsigned i = 0; i != RegInfo->getNumRegs(); ++i)
+            if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2)
+               DOUT << "[" << RegInfo->getName(i)
+                    << ",%reg" << PhysRegsUsed[i] << "] ";
+          DOUT << "\n");
+
+    SmallVector<unsigned, 8> Kills;
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand& MO = MI->getOperand(i);
+      if (MO.isRegister() && MO.isKill()) {
+        if (!MO.isImplicit())
+          Kills.push_back(MO.getReg());
+        else if (!isReadModWriteImplicitKill(MI, MO.getReg()))
+          // These are extra physical register kills when a sub-register
+          // is defined (def of a sub-register is a read/mod/write of the
+          // larger registers). Ignore.
+          Kills.push_back(MO.getReg());
+      }
+    }
+
+    // Get the used operands into registers.  This has the potential to spill
+    // incoming values if we are out of registers.  Note that we completely
+    // ignore physical register uses here.  We assume that if an explicit
+    // physical register is referenced by the instruction, that it is guaranteed
+    // to be live-in, or the input is badly hosed.
+    //
+    for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
+      MachineOperand& MO = MI->getOperand(i);
+      // here we are looking for only used operands (never def&use)
+      if (MO.isRegister() && !MO.isDef() && MO.getReg() && !MO.isImplicit() &&
+          MRegisterInfo::isVirtualRegister(MO.getReg()))
+        MI = reloadVirtReg(MBB, MI, i);
+    }
+
+    // If this instruction is the last user of this register, kill the
+    // value, freeing the register being used, so it doesn't need to be
+    // spilled to memory.
+    //
+    for (unsigned i = 0, e = Kills.size(); i != e; ++i) {
+      unsigned VirtReg = Kills[i];
+      unsigned PhysReg = VirtReg;
+      if (MRegisterInfo::isVirtualRegister(VirtReg)) {
+        // If the virtual register was never materialized into a register, it
+        // might not be in the map, but it won't hurt to zero it out anyway.
+        unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg);
+        PhysReg = PhysRegSlot;
+        PhysRegSlot = 0;
+      } else if (PhysRegsUsed[PhysReg] == -2) {
+        // Unallocatable register dead, ignore.
+        continue;
+      } else {
+        assert(!PhysRegsUsed[PhysReg] || PhysRegsUsed[PhysReg] == -1 &&
+               "Silently clearing a virtual register?");
+      }
+
+      if (PhysReg) {
+        DOUT << "  Last use of " << RegInfo->getName(PhysReg)
+             << "[%reg" << VirtReg <<"], removing it from live set\n";
+        removePhysReg(PhysReg);
+        for (const unsigned *AliasSet = RegInfo->getSubRegisters(PhysReg);
+             *AliasSet; ++AliasSet) {
+          if (PhysRegsUsed[*AliasSet] != -2) {
+            DOUT  << "  Last use of "
+                  << RegInfo->getName(*AliasSet)
+                  << "[%reg" << VirtReg <<"], removing it from live set\n";
+            removePhysReg(*AliasSet);
+          }
+        }
+      }
+    }
+
+    // Loop over all of the operands of the instruction, spilling registers that
+    // are defined, and marking explicit destinations in the PhysRegsUsed map.
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand& MO = MI->getOperand(i);
+      if (MO.isRegister() && MO.isDef() && !MO.isImplicit() && MO.getReg() &&
+          MRegisterInfo::isPhysicalRegister(MO.getReg())) {
+        unsigned Reg = MO.getReg();
+        if (PhysRegsUsed[Reg] == -2) continue;  // Something like ESP.
+        // These are extra physical register defs when a sub-register
+        // is defined (def of a sub-register is a read/mod/write of the
+        // larger registers). Ignore.
+        if (isReadModWriteImplicitDef(MI, MO.getReg())) continue;
+
+        MF->setPhysRegUsed(Reg);
+        spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg
+        PhysRegsUsed[Reg] = 0;            // It is free and reserved now
+        for (const unsigned *AliasSet = RegInfo->getSubRegisters(Reg);
+             *AliasSet; ++AliasSet) {
+          if (PhysRegsUsed[*AliasSet] != -2) {
+            PhysRegsUsed[*AliasSet] = 0;  // It is free and reserved now
+            MF->setPhysRegUsed(*AliasSet);
+          }
+        }
+      }
+    }
+
+    // Loop over the implicit defs, spilling them as well.
+    if (TID.ImplicitDefs) {
+      for (const unsigned *ImplicitDefs = TID.ImplicitDefs;
+           *ImplicitDefs; ++ImplicitDefs) {
+        unsigned Reg = *ImplicitDefs;
+        if (PhysRegsUsed[Reg] != -2) {
+          spillPhysReg(MBB, MI, Reg, true);
+          PhysRegsUsed[Reg] = 0;            // It is free and reserved now
+        }
+        MF->setPhysRegUsed(Reg);
+        for (const unsigned *AliasSet = RegInfo->getSubRegisters(Reg);
+             *AliasSet; ++AliasSet) {
+          if (PhysRegsUsed[*AliasSet] != -2) {
+            PhysRegsUsed[*AliasSet] = 0;  // It is free and reserved now
+            MF->setPhysRegUsed(*AliasSet);
+          }
+        }
+      }
+    }
+
+    SmallVector<unsigned, 8> DeadDefs;
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand& MO = MI->getOperand(i);
+      if (MO.isRegister() && MO.isDead())
+        DeadDefs.push_back(MO.getReg());
+    }
+
+    // Okay, we have allocated all of the source operands and spilled any values
+    // that would be destroyed by defs of this instruction.  Loop over the
+    // explicit defs and assign them to a register, spilling incoming values if
+    // we need to scavenge a register.
+    //
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand& MO = MI->getOperand(i);
+      if (MO.isRegister() && MO.isDef() && MO.getReg() &&
+          MRegisterInfo::isVirtualRegister(MO.getReg())) {
+        unsigned DestVirtReg = MO.getReg();
+        unsigned DestPhysReg;
+
+        // If DestVirtReg already has a value, use it.
+        if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg)))
+          DestPhysReg = chooseReg(MBB, MI, DestVirtReg);
+        MF->setPhysRegUsed(DestPhysReg);
+        markVirtRegModified(DestVirtReg);
+        MI->getOperand(i).setReg(DestPhysReg);  // Assign the output register
+      }
+    }
+
+    // If this instruction defines any registers that are immediately dead,
+    // kill them now.
+    //
+    for (unsigned i = 0, e = DeadDefs.size(); i != e; ++i) {
+      unsigned VirtReg = DeadDefs[i];
+      unsigned PhysReg = VirtReg;
+      if (MRegisterInfo::isVirtualRegister(VirtReg)) {
+        unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg);
+        PhysReg = PhysRegSlot;
+        assert(PhysReg != 0);
+        PhysRegSlot = 0;
+      } else if (PhysRegsUsed[PhysReg] == -2) {
+        // Unallocatable register dead, ignore.
+        continue;
+      }
+
+      if (PhysReg) {
+        DOUT  << "  Register " << RegInfo->getName(PhysReg)
+              << " [%reg" << VirtReg
+              << "] is never used, removing it frame live list\n";
+        removePhysReg(PhysReg);
+        for (const unsigned *AliasSet = RegInfo->getAliasSet(PhysReg);
+             *AliasSet; ++AliasSet) {
+          if (PhysRegsUsed[*AliasSet] != -2) {
+            DOUT  << "  Register " << RegInfo->getName(*AliasSet)
+                  << " [%reg" << *AliasSet
+                  << "] is never used, removing it frame live list\n";
+            removePhysReg(*AliasSet);
+          }
+        }
+      }
+    }
+    
+    // Finally, if this is a noop copy instruction, zap it.
+    unsigned SrcReg, DstReg;
+    if (TII.isMoveInstr(*MI, SrcReg, DstReg) && SrcReg == DstReg) {
+      LV->removeVirtualRegistersKilled(MI);
+      LV->removeVirtualRegistersDead(MI);
+      MBB.erase(MI);
+    }
+  }
+
+  MachineBasicBlock::iterator MI = MBB.getFirstTerminator();
+
+  // Spill all physical registers holding virtual registers now.
+  for (unsigned i = 0, e = RegInfo->getNumRegs(); i != e; ++i)
+    if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2)
+      if (unsigned VirtReg = PhysRegsUsed[i])
+        spillVirtReg(MBB, MI, VirtReg, i);
+      else
+        removePhysReg(i);
+}
+
+/// runOnMachineFunction - Register allocate the whole function
+///
+bool RABigBlock::runOnMachineFunction(MachineFunction &Fn) {
+  DOUT << "Machine Function " << "\n";
+  MF = &Fn;
+  TM = &Fn.getTarget();
+  RegInfo = TM->getRegisterInfo();
+  LV = &getAnalysis<LiveVariables>();
+
+  PhysRegsUsed.assign(RegInfo->getNumRegs(), -1);
+  
+  // At various places we want to efficiently check to see whether a register
+  // is allocatable.  To handle this, we mark all unallocatable registers as
+  // being pinned down, permanently.
+  {
+    BitVector Allocable = RegInfo->getAllocatableSet(Fn);
+    for (unsigned i = 0, e = Allocable.size(); i != e; ++i)
+      if (!Allocable[i])
+        PhysRegsUsed[i] = -2;  // Mark the reg unallocable.
+  }
+
+  // initialize the virtual->physical register map to have a 'null'
+  // mapping for all virtual registers
+  Virt2PhysRegMap.grow(MF->getSSARegMap()->getLastVirtReg());
+  StackSlotForVirtReg.grow(MF->getSSARegMap()->getLastVirtReg());
+  VirtRegModified.resize(MF->getSSARegMap()->getLastVirtReg() - MRegisterInfo::FirstVirtualRegister + 1,0);
+
+  // Loop over all of the basic blocks, eliminating virtual register references
+  for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
+       MBB != MBBe; ++MBB) {
+    // fill out the read timetable 
+    FillVRegReadTable(*MBB);
+    // use it to allocate the BB
+    AllocateBasicBlock(*MBB);
+    // clear it
+    VRegReadTable.clear();
+  }
+  
+  StackSlotForVirtReg.clear();
+  PhysRegsUsed.clear();
+  VirtRegModified.clear();
+  Virt2PhysRegMap.clear();
+  return true;
+}
+
+FunctionPass *llvm::createBigBlockRegisterAllocator() {
+  return new RABigBlock();
+}
+
diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp
new file mode 100644
index 0000000..01d43fd
--- /dev/null
+++ b/lib/CodeGen/RegAllocLinearScan.cpp
@@ -0,0 +1,828 @@
+//===-- RegAllocLinearScan.cpp - Linear Scan register allocator -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a linear scan register allocator.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "PhysRegTracker.h"
+#include "VirtRegMap.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/SSARegMap.h"
+#include "llvm/Target/MRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include <algorithm>
+#include <set>
+#include <queue>
+#include <memory>
+#include <cmath>
+using namespace llvm;
+
+STATISTIC(NumIters     , "Number of iterations performed");
+STATISTIC(NumBacktracks, "Number of times we had to backtrack");
+
+static RegisterRegAlloc
+linearscanRegAlloc("linearscan", "  linear scan register allocator",
+                   createLinearScanRegisterAllocator);
+
+namespace {
+  static unsigned numIterations = 0;
+  static unsigned numIntervals = 0;
+
+  struct VISIBILITY_HIDDEN RALinScan : public MachineFunctionPass {
+    static char ID;
+    RALinScan() : MachineFunctionPass((intptr_t)&ID) {}
+
+    typedef std::pair<LiveInterval*, LiveInterval::iterator> IntervalPtr;
+    typedef std::vector<IntervalPtr> IntervalPtrs;
+  private:
+    /// RelatedRegClasses - This structure is built the first time a function is
+    /// compiled, and keeps track of which register classes have registers that
+    /// belong to multiple classes or have aliases that are in other classes.
+    EquivalenceClasses<const TargetRegisterClass*> RelatedRegClasses;
+    std::map<unsigned, const TargetRegisterClass*> OneClassForEachPhysReg;
+
+    MachineFunction* mf_;
+    const TargetMachine* tm_;
+    const MRegisterInfo* mri_;
+    LiveIntervals* li_;
+
+    /// handled_ - Intervals are added to the handled_ set in the order of their
+    /// start value.  This is uses for backtracking.
+    std::vector<LiveInterval*> handled_;
+
+    /// fixed_ - Intervals that correspond to machine registers.
+    ///
+    IntervalPtrs fixed_;
+
+    /// active_ - Intervals that are currently being processed, and which have a
+    /// live range active for the current point.
+    IntervalPtrs active_;
+
+    /// inactive_ - Intervals that are currently being processed, but which have
+    /// a hold at the current point.
+    IntervalPtrs inactive_;
+
+    typedef std::priority_queue<LiveInterval*,
+                                std::vector<LiveInterval*>,
+                                greater_ptr<LiveInterval> > IntervalHeap;
+    IntervalHeap unhandled_;
+    std::auto_ptr<PhysRegTracker> prt_;
+    std::auto_ptr<VirtRegMap> vrm_;
+    std::auto_ptr<Spiller> spiller_;
+
+  public:
+    virtual const char* getPassName() const {
+      return "Linear Scan Register Allocator";
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<LiveIntervals>();
+      AU.addRequiredID(SimpleRegisterCoalescingID);
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+    /// runOnMachineFunction - register allocate the whole function
+    bool runOnMachineFunction(MachineFunction&);
+
+  private:
+    /// linearScan - the linear scan algorithm
+    void linearScan();
+
+    /// initIntervalSets - initialize the interval sets.
+    ///
+    void initIntervalSets();
+
+    /// processActiveIntervals - expire old intervals and move non-overlapping
+    /// ones to the inactive list.
+    void processActiveIntervals(unsigned CurPoint);
+
+    /// processInactiveIntervals - expire old intervals and move overlapping
+    /// ones to the active list.
+    void processInactiveIntervals(unsigned CurPoint);
+
+    /// assignRegOrStackSlotAtInterval - assign a register if one
+    /// is available, or spill.
+    void assignRegOrStackSlotAtInterval(LiveInterval* cur);
+
+    ///
+    /// register handling helpers
+    ///
+
+    /// getFreePhysReg - return a free physical register for this virtual
+    /// register interval if we have one, otherwise return 0.
+    unsigned getFreePhysReg(LiveInterval* cur);
+
+    /// assignVirt2StackSlot - assigns this virtual register to a
+    /// stack slot. returns the stack slot
+    int assignVirt2StackSlot(unsigned virtReg);
+
+    void ComputeRelatedRegClasses();
+
+    template <typename ItTy>
+    void printIntervals(const char* const str, ItTy i, ItTy e) const {
+      if (str) DOUT << str << " intervals:\n";
+      for (; i != e; ++i) {
+        DOUT << "\t" << *i->first << " -> ";
+        unsigned reg = i->first->reg;
+        if (MRegisterInfo::isVirtualRegister(reg)) {
+          reg = vrm_->getPhys(reg);
+        }
+        DOUT << mri_->getName(reg) << '\n';
+      }
+    }
+  };
+  char RALinScan::ID = 0;
+}
+
+void RALinScan::ComputeRelatedRegClasses() {
+  const MRegisterInfo &MRI = *mri_;
+  
+  // First pass, add all reg classes to the union, and determine at least one
+  // reg class that each register is in.
+  bool HasAliases = false;
+  for (MRegisterInfo::regclass_iterator RCI = MRI.regclass_begin(),
+       E = MRI.regclass_end(); RCI != E; ++RCI) {
+    RelatedRegClasses.insert(*RCI);
+    for (TargetRegisterClass::iterator I = (*RCI)->begin(), E = (*RCI)->end();
+         I != E; ++I) {
+      HasAliases = HasAliases || *MRI.getAliasSet(*I) != 0;
+      
+      const TargetRegisterClass *&PRC = OneClassForEachPhysReg[*I];
+      if (PRC) {
+        // Already processed this register.  Just make sure we know that
+        // multiple register classes share a register.
+        RelatedRegClasses.unionSets(PRC, *RCI);
+      } else {
+        PRC = *RCI;
+      }
+    }
+  }
+  
+  // Second pass, now that we know conservatively what register classes each reg
+  // belongs to, add info about aliases.  We don't need to do this for targets
+  // without register aliases.
+  if (HasAliases)
+    for (std::map<unsigned, const TargetRegisterClass*>::iterator
+         I = OneClassForEachPhysReg.begin(), E = OneClassForEachPhysReg.end();
+         I != E; ++I)
+      for (const unsigned *AS = MRI.getAliasSet(I->first); *AS; ++AS)
+        RelatedRegClasses.unionSets(I->second, OneClassForEachPhysReg[*AS]);
+}
+
+bool RALinScan::runOnMachineFunction(MachineFunction &fn) {
+  mf_ = &fn;
+  tm_ = &fn.getTarget();
+  mri_ = tm_->getRegisterInfo();
+  li_ = &getAnalysis<LiveIntervals>();
+
+  // If this is the first function compiled, compute the related reg classes.
+  if (RelatedRegClasses.empty())
+    ComputeRelatedRegClasses();
+  
+  if (!prt_.get()) prt_.reset(new PhysRegTracker(*mri_));
+  vrm_.reset(new VirtRegMap(*mf_));
+  if (!spiller_.get()) spiller_.reset(createSpiller());
+
+  initIntervalSets();
+
+  linearScan();
+
+  // Rewrite spill code and update the PhysRegsUsed set.
+  spiller_->runOnMachineFunction(*mf_, *vrm_);
+
+  vrm_.reset();  // Free the VirtRegMap
+
+
+  while (!unhandled_.empty()) unhandled_.pop();
+  fixed_.clear();
+  active_.clear();
+  inactive_.clear();
+  handled_.clear();
+
+  return true;
+}
+
+/// initIntervalSets - initialize the interval sets.
+///
+void RALinScan::initIntervalSets()
+{
+  assert(unhandled_.empty() && fixed_.empty() &&
+         active_.empty() && inactive_.empty() &&
+         "interval sets should be empty on initialization");
+
+  for (LiveIntervals::iterator i = li_->begin(), e = li_->end(); i != e; ++i) {
+    if (MRegisterInfo::isPhysicalRegister(i->second.reg)) {
+      mf_->setPhysRegUsed(i->second.reg);
+      fixed_.push_back(std::make_pair(&i->second, i->second.begin()));
+    } else
+      unhandled_.push(&i->second);
+  }
+}
+
+void RALinScan::linearScan()
+{
+  // linear scan algorithm
+  DOUT << "********** LINEAR SCAN **********\n";
+  DOUT << "********** Function: " << mf_->getFunction()->getName() << '\n';
+
+  // DEBUG(printIntervals("unhandled", unhandled_.begin(), unhandled_.end()));
+  DEBUG(printIntervals("fixed", fixed_.begin(), fixed_.end()));
+  DEBUG(printIntervals("active", active_.begin(), active_.end()));
+  DEBUG(printIntervals("inactive", inactive_.begin(), inactive_.end()));
+
+  while (!unhandled_.empty()) {
+    // pick the interval with the earliest start point
+    LiveInterval* cur = unhandled_.top();
+    unhandled_.pop();
+    ++numIterations;
+    DOUT << "\n*** CURRENT ***: " << *cur << '\n';
+
+    processActiveIntervals(cur->beginNumber());
+    processInactiveIntervals(cur->beginNumber());
+
+    assert(MRegisterInfo::isVirtualRegister(cur->reg) &&
+           "Can only allocate virtual registers!");
+
+    // Allocating a virtual register. try to find a free
+    // physical register or spill an interval (possibly this one) in order to
+    // assign it one.
+    assignRegOrStackSlotAtInterval(cur);
+
+    DEBUG(printIntervals("active", active_.begin(), active_.end()));
+    DEBUG(printIntervals("inactive", inactive_.begin(), inactive_.end()));
+  }
+  numIntervals += li_->getNumIntervals();
+  NumIters += numIterations;
+
+  // expire any remaining active intervals
+  for (IntervalPtrs::reverse_iterator
+         i = active_.rbegin(); i != active_.rend(); ) {
+    unsigned reg = i->first->reg;
+    DOUT << "\tinterval " << *i->first << " expired\n";
+    assert(MRegisterInfo::isVirtualRegister(reg) &&
+           "Can only allocate virtual registers!");
+    reg = vrm_->getPhys(reg);
+    prt_->delRegUse(reg);
+    i = IntervalPtrs::reverse_iterator(active_.erase(i.base()-1));
+  }
+
+  // expire any remaining inactive intervals
+  for (IntervalPtrs::reverse_iterator
+         i = inactive_.rbegin(); i != inactive_.rend(); ) {
+    DOUT << "\tinterval " << *i->first << " expired\n";
+    i = IntervalPtrs::reverse_iterator(inactive_.erase(i.base()-1));
+  }
+
+  // A brute force way of adding live-ins to every BB.
+  MachineFunction::iterator MBB = mf_->begin();
+  ++MBB; // Skip entry MBB.
+  for (MachineFunction::iterator E = mf_->end(); MBB != E; ++MBB) {
+    unsigned StartIdx = li_->getMBBStartIdx(MBB->getNumber());
+    for (IntervalPtrs::iterator i = fixed_.begin(), e = fixed_.end();
+         i != e; ++i)
+      if (i->first->liveAt(StartIdx))
+        MBB->addLiveIn(i->first->reg);
+
+    for (unsigned i = 0, e = handled_.size(); i != e; ++i) { 
+      LiveInterval *HI = handled_[i];
+      unsigned Reg = HI->reg;
+      if (!vrm_->hasStackSlot(Reg) && HI->liveAt(StartIdx)) {
+        assert(MRegisterInfo::isVirtualRegister(Reg));
+        Reg = vrm_->getPhys(Reg);
+        MBB->addLiveIn(Reg);
+      }
+    }
+  }
+
+  DOUT << *vrm_;
+}
+
+/// processActiveIntervals - expire old intervals and move non-overlapping ones
+/// to the inactive list.
+void RALinScan::processActiveIntervals(unsigned CurPoint)
+{
+  DOUT << "\tprocessing active intervals:\n";
+
+  for (unsigned i = 0, e = active_.size(); i != e; ++i) {
+    LiveInterval *Interval = active_[i].first;
+    LiveInterval::iterator IntervalPos = active_[i].second;
+    unsigned reg = Interval->reg;
+
+    IntervalPos = Interval->advanceTo(IntervalPos, CurPoint);
+
+    if (IntervalPos == Interval->end()) {     // Remove expired intervals.
+      DOUT << "\t\tinterval " << *Interval << " expired\n";
+      assert(MRegisterInfo::isVirtualRegister(reg) &&
+             "Can only allocate virtual registers!");
+      reg = vrm_->getPhys(reg);
+      prt_->delRegUse(reg);
+
+      // Pop off the end of the list.
+      active_[i] = active_.back();
+      active_.pop_back();
+      --i; --e;
+
+    } else if (IntervalPos->start > CurPoint) {
+      // Move inactive intervals to inactive list.
+      DOUT << "\t\tinterval " << *Interval << " inactive\n";
+      assert(MRegisterInfo::isVirtualRegister(reg) &&
+             "Can only allocate virtual registers!");
+      reg = vrm_->getPhys(reg);
+      prt_->delRegUse(reg);
+      // add to inactive.
+      inactive_.push_back(std::make_pair(Interval, IntervalPos));
+
+      // Pop off the end of the list.
+      active_[i] = active_.back();
+      active_.pop_back();
+      --i; --e;
+    } else {
+      // Otherwise, just update the iterator position.
+      active_[i].second = IntervalPos;
+    }
+  }
+}
+
+/// processInactiveIntervals - expire old intervals and move overlapping
+/// ones to the active list.
+void RALinScan::processInactiveIntervals(unsigned CurPoint)
+{
+  DOUT << "\tprocessing inactive intervals:\n";
+
+  for (unsigned i = 0, e = inactive_.size(); i != e; ++i) {
+    LiveInterval *Interval = inactive_[i].first;
+    LiveInterval::iterator IntervalPos = inactive_[i].second;
+    unsigned reg = Interval->reg;
+
+    IntervalPos = Interval->advanceTo(IntervalPos, CurPoint);
+
+    if (IntervalPos == Interval->end()) {       // remove expired intervals.
+      DOUT << "\t\tinterval " << *Interval << " expired\n";
+
+      // Pop off the end of the list.
+      inactive_[i] = inactive_.back();
+      inactive_.pop_back();
+      --i; --e;
+    } else if (IntervalPos->start <= CurPoint) {
+      // move re-activated intervals in active list
+      DOUT << "\t\tinterval " << *Interval << " active\n";
+      assert(MRegisterInfo::isVirtualRegister(reg) &&
+             "Can only allocate virtual registers!");
+      reg = vrm_->getPhys(reg);
+      prt_->addRegUse(reg);
+      // add to active
+      active_.push_back(std::make_pair(Interval, IntervalPos));
+
+      // Pop off the end of the list.
+      inactive_[i] = inactive_.back();
+      inactive_.pop_back();
+      --i; --e;
+    } else {
+      // Otherwise, just update the iterator position.
+      inactive_[i].second = IntervalPos;
+    }
+  }
+}
+
+/// updateSpillWeights - updates the spill weights of the specifed physical
+/// register and its weight.
+static void updateSpillWeights(std::vector<float> &Weights,
+                               unsigned reg, float weight,
+                               const MRegisterInfo *MRI) {
+  Weights[reg] += weight;
+  for (const unsigned* as = MRI->getAliasSet(reg); *as; ++as)
+    Weights[*as] += weight;
+}
+
+static
+RALinScan::IntervalPtrs::iterator
+FindIntervalInVector(RALinScan::IntervalPtrs &IP, LiveInterval *LI) {
+  for (RALinScan::IntervalPtrs::iterator I = IP.begin(), E = IP.end();
+       I != E; ++I)
+    if (I->first == LI) return I;
+  return IP.end();
+}
+
+static void RevertVectorIteratorsTo(RALinScan::IntervalPtrs &V, unsigned Point){
+  for (unsigned i = 0, e = V.size(); i != e; ++i) {
+    RALinScan::IntervalPtr &IP = V[i];
+    LiveInterval::iterator I = std::upper_bound(IP.first->begin(),
+                                                IP.second, Point);
+    if (I != IP.first->begin()) --I;
+    IP.second = I;
+  }
+}
+
+/// assignRegOrStackSlotAtInterval - assign a register if one is available, or
+/// spill.
+void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
+{
+  DOUT << "\tallocating current interval: ";
+
+  PhysRegTracker backupPrt = *prt_;
+
+  std::vector<std::pair<unsigned, float> > SpillWeightsToAdd;
+  unsigned StartPosition = cur->beginNumber();
+  const TargetRegisterClass *RC = mf_->getSSARegMap()->getRegClass(cur->reg);
+  const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC);
+      
+  // for every interval in inactive we overlap with, mark the
+  // register as not free and update spill weights.
+  for (IntervalPtrs::const_iterator i = inactive_.begin(),
+         e = inactive_.end(); i != e; ++i) {
+    unsigned Reg = i->first->reg;
+    assert(MRegisterInfo::isVirtualRegister(Reg) &&
+           "Can only allocate virtual registers!");
+    const TargetRegisterClass *RegRC = mf_->getSSARegMap()->getRegClass(Reg);
+    // If this is not in a related reg class to the register we're allocating, 
+    // don't check it.
+    if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader &&
+        cur->overlapsFrom(*i->first, i->second-1)) {
+      Reg = vrm_->getPhys(Reg);
+      prt_->addRegUse(Reg);
+      SpillWeightsToAdd.push_back(std::make_pair(Reg, i->first->weight));
+    }
+  }
+  
+  // Speculatively check to see if we can get a register right now.  If not,
+  // we know we won't be able to by adding more constraints.  If so, we can
+  // check to see if it is valid.  Doing an exhaustive search of the fixed_ list
+  // is very bad (it contains all callee clobbered registers for any functions
+  // with a call), so we want to avoid doing that if possible.
+  unsigned physReg = getFreePhysReg(cur);
+  if (physReg) {
+    // We got a register.  However, if it's in the fixed_ list, we might
+    // conflict with it.  Check to see if we conflict with it or any of its
+    // aliases.
+    std::set<unsigned> RegAliases;
+    for (const unsigned *AS = mri_->getAliasSet(physReg); *AS; ++AS)
+      RegAliases.insert(*AS);
+    
+    bool ConflictsWithFixed = false;
+    for (unsigned i = 0, e = fixed_.size(); i != e; ++i) {
+      IntervalPtr &IP = fixed_[i];
+      if (physReg == IP.first->reg || RegAliases.count(IP.first->reg)) {
+        // Okay, this reg is on the fixed list.  Check to see if we actually
+        // conflict.
+        LiveInterval *I = IP.first;
+        if (I->endNumber() > StartPosition) {
+          LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition);
+          IP.second = II;
+          if (II != I->begin() && II->start > StartPosition)
+            --II;
+          if (cur->overlapsFrom(*I, II)) {
+            ConflictsWithFixed = true;
+            break;
+          }
+        }
+      }
+    }
+    
+    // Okay, the register picked by our speculative getFreePhysReg call turned
+    // out to be in use.  Actually add all of the conflicting fixed registers to
+    // prt so we can do an accurate query.
+    if (ConflictsWithFixed) {
+      // For every interval in fixed we overlap with, mark the register as not
+      // free and update spill weights.
+      for (unsigned i = 0, e = fixed_.size(); i != e; ++i) {
+        IntervalPtr &IP = fixed_[i];
+        LiveInterval *I = IP.first;
+
+        const TargetRegisterClass *RegRC = OneClassForEachPhysReg[I->reg];
+        if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader &&       
+            I->endNumber() > StartPosition) {
+          LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition);
+          IP.second = II;
+          if (II != I->begin() && II->start > StartPosition)
+            --II;
+          if (cur->overlapsFrom(*I, II)) {
+            unsigned reg = I->reg;
+            prt_->addRegUse(reg);
+            SpillWeightsToAdd.push_back(std::make_pair(reg, I->weight));
+          }
+        }
+      }
+
+      // Using the newly updated prt_ object, which includes conflicts in the
+      // future, see if there are any registers available.
+      physReg = getFreePhysReg(cur);
+    }
+  }
+    
+  // Restore the physical register tracker, removing information about the
+  // future.
+  *prt_ = backupPrt;
+  
+  // if we find a free register, we are done: assign this virtual to
+  // the free physical register and add this interval to the active
+  // list.
+  if (physReg) {
+    DOUT <<  mri_->getName(physReg) << '\n';
+    vrm_->assignVirt2Phys(cur->reg, physReg);
+    prt_->addRegUse(physReg);
+    active_.push_back(std::make_pair(cur, cur->begin()));
+    handled_.push_back(cur);
+    return;
+  }
+  DOUT << "no free registers\n";
+
+  // Compile the spill weights into an array that is better for scanning.
+  std::vector<float> SpillWeights(mri_->getNumRegs(), 0.0);
+  for (std::vector<std::pair<unsigned, float> >::iterator
+       I = SpillWeightsToAdd.begin(), E = SpillWeightsToAdd.end(); I != E; ++I)
+    updateSpillWeights(SpillWeights, I->first, I->second, mri_);
+  
+  // for each interval in active, update spill weights.
+  for (IntervalPtrs::const_iterator i = active_.begin(), e = active_.end();
+       i != e; ++i) {
+    unsigned reg = i->first->reg;
+    assert(MRegisterInfo::isVirtualRegister(reg) &&
+           "Can only allocate virtual registers!");
+    reg = vrm_->getPhys(reg);
+    updateSpillWeights(SpillWeights, reg, i->first->weight, mri_);
+  }
+ 
+  DOUT << "\tassigning stack slot at interval "<< *cur << ":\n";
+
+  // Find a register to spill.
+  float minWeight = HUGE_VALF;
+  unsigned minReg = cur->preference;  // Try the preferred register first.
+  
+  if (!minReg || SpillWeights[minReg] == HUGE_VALF)
+    for (TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_),
+           e = RC->allocation_order_end(*mf_); i != e; ++i) {
+      unsigned reg = *i;
+      if (minWeight > SpillWeights[reg]) {
+        minWeight = SpillWeights[reg];
+        minReg = reg;
+      }
+    }
+  
+  // If we didn't find a register that is spillable, try aliases?
+  if (!minReg) {
+    for (TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_),
+           e = RC->allocation_order_end(*mf_); i != e; ++i) {
+      unsigned reg = *i;
+      // No need to worry about if the alias register size < regsize of RC.
+      // We are going to spill all registers that alias it anyway.
+      for (const unsigned* as = mri_->getAliasSet(reg); *as; ++as) {
+        if (minWeight > SpillWeights[*as]) {
+          minWeight = SpillWeights[*as];
+          minReg = *as;
+        }
+      }
+    }
+
+    // All registers must have inf weight. Just grab one!
+    if (!minReg)
+      minReg = *RC->allocation_order_begin(*mf_);
+  }
+  
+  DOUT << "\t\tregister with min weight: "
+       << mri_->getName(minReg) << " (" << minWeight << ")\n";
+
+  // if the current has the minimum weight, we need to spill it and
+  // add any added intervals back to unhandled, and restart
+  // linearscan.
+  if (cur->weight != HUGE_VALF && cur->weight <= minWeight) {
+    DOUT << "\t\t\tspilling(c): " << *cur << '\n';
+    // if the current interval is re-materializable, remember so and don't
+    // assign it a spill slot.
+    if (cur->remat)
+      vrm_->setVirtIsReMaterialized(cur->reg, cur->remat);
+    int slot = cur->remat ? vrm_->assignVirtReMatId(cur->reg)
+      : vrm_->assignVirt2StackSlot(cur->reg);
+    std::vector<LiveInterval*> added =
+      li_->addIntervalsForSpills(*cur, *vrm_, slot);
+    if (added.empty())
+      return;  // Early exit if all spills were folded.
+
+    // Merge added with unhandled.  Note that we know that
+    // addIntervalsForSpills returns intervals sorted by their starting
+    // point.
+    for (unsigned i = 0, e = added.size(); i != e; ++i)
+      unhandled_.push(added[i]);
+    return;
+  }
+
+  ++NumBacktracks;
+
+  // push the current interval back to unhandled since we are going
+  // to re-run at least this iteration. Since we didn't modify it it
+  // should go back right in the front of the list
+  unhandled_.push(cur);
+
+  // otherwise we spill all intervals aliasing the register with
+  // minimum weight, rollback to the interval with the earliest
+  // start point and let the linear scan algorithm run again
+  std::vector<LiveInterval*> added;
+  assert(MRegisterInfo::isPhysicalRegister(minReg) &&
+         "did not choose a register to spill?");
+  BitVector toSpill(mri_->getNumRegs());
+
+  // We are going to spill minReg and all its aliases.
+  toSpill[minReg] = true;
+  for (const unsigned* as = mri_->getAliasSet(minReg); *as; ++as)
+    toSpill[*as] = true;
+
+  // the earliest start of a spilled interval indicates up to where
+  // in handled we need to roll back
+  unsigned earliestStart = cur->beginNumber();
+
+  // set of spilled vregs (used later to rollback properly)
+  std::set<unsigned> spilled;
+
+  // spill live intervals of virtual regs mapped to the physical register we
+  // want to clear (and its aliases).  We only spill those that overlap with the
+  // current interval as the rest do not affect its allocation. we also keep
+  // track of the earliest start of all spilled live intervals since this will
+  // mark our rollback point.
+  for (IntervalPtrs::iterator i = active_.begin(); i != active_.end(); ++i) {
+    unsigned reg = i->first->reg;
+    if (//MRegisterInfo::isVirtualRegister(reg) &&
+        toSpill[vrm_->getPhys(reg)] &&
+        cur->overlapsFrom(*i->first, i->second)) {
+      DOUT << "\t\t\tspilling(a): " << *i->first << '\n';
+      earliestStart = std::min(earliestStart, i->first->beginNumber());
+      if (i->first->remat)
+        vrm_->setVirtIsReMaterialized(reg, i->first->remat);
+      int slot = i->first->remat ? vrm_->assignVirtReMatId(reg)
+        : vrm_->assignVirt2StackSlot(reg);
+      std::vector<LiveInterval*> newIs =
+        li_->addIntervalsForSpills(*i->first, *vrm_, slot);
+      std::copy(newIs.begin(), newIs.end(), std::back_inserter(added));
+      spilled.insert(reg);
+    }
+  }
+  for (IntervalPtrs::iterator i = inactive_.begin(); i != inactive_.end(); ++i){
+    unsigned reg = i->first->reg;
+    if (//MRegisterInfo::isVirtualRegister(reg) &&
+        toSpill[vrm_->getPhys(reg)] &&
+        cur->overlapsFrom(*i->first, i->second-1)) {
+      DOUT << "\t\t\tspilling(i): " << *i->first << '\n';
+      earliestStart = std::min(earliestStart, i->first->beginNumber());
+      if (i->first->remat)
+        vrm_->setVirtIsReMaterialized(reg, i->first->remat);
+      int slot = i->first->remat ? vrm_->assignVirtReMatId(reg)
+        : vrm_->assignVirt2StackSlot(reg);
+      std::vector<LiveInterval*> newIs =
+        li_->addIntervalsForSpills(*i->first, *vrm_, slot);
+      std::copy(newIs.begin(), newIs.end(), std::back_inserter(added));
+      spilled.insert(reg);
+    }
+  }
+
+  DOUT << "\t\trolling back to: " << earliestStart << '\n';
+
+  // Scan handled in reverse order up to the earliest start of a
+  // spilled live interval and undo each one, restoring the state of
+  // unhandled.
+  while (!handled_.empty()) {
+    LiveInterval* i = handled_.back();
+    // If this interval starts before t we are done.
+    if (i->beginNumber() < earliestStart)
+      break;
+    DOUT << "\t\t\tundo changes for: " << *i << '\n';
+    handled_.pop_back();
+
+    // When undoing a live interval allocation we must know if it is active or
+    // inactive to properly update the PhysRegTracker and the VirtRegMap.
+    IntervalPtrs::iterator it;
+    if ((it = FindIntervalInVector(active_, i)) != active_.end()) {
+      active_.erase(it);
+      assert(!MRegisterInfo::isPhysicalRegister(i->reg));
+      if (!spilled.count(i->reg))
+        unhandled_.push(i);
+      prt_->delRegUse(vrm_->getPhys(i->reg));
+      vrm_->clearVirt(i->reg);
+    } else if ((it = FindIntervalInVector(inactive_, i)) != inactive_.end()) {
+      inactive_.erase(it);
+      assert(!MRegisterInfo::isPhysicalRegister(i->reg));
+      if (!spilled.count(i->reg))
+        unhandled_.push(i);
+      vrm_->clearVirt(i->reg);
+    } else {
+      assert(MRegisterInfo::isVirtualRegister(i->reg) &&
+             "Can only allocate virtual registers!");
+      vrm_->clearVirt(i->reg);
+      unhandled_.push(i);
+    }
+  }
+
+  // Rewind the iterators in the active, inactive, and fixed lists back to the
+  // point we reverted to.
+  RevertVectorIteratorsTo(active_, earliestStart);
+  RevertVectorIteratorsTo(inactive_, earliestStart);
+  RevertVectorIteratorsTo(fixed_, earliestStart);
+
+  // scan the rest and undo each interval that expired after t and
+  // insert it in active (the next iteration of the algorithm will
+  // put it in inactive if required)
+  for (unsigned i = 0, e = handled_.size(); i != e; ++i) {
+    LiveInterval *HI = handled_[i];
+    if (!HI->expiredAt(earliestStart) &&
+        HI->expiredAt(cur->beginNumber())) {
+      DOUT << "\t\t\tundo changes for: " << *HI << '\n';
+      active_.push_back(std::make_pair(HI, HI->begin()));
+      assert(!MRegisterInfo::isPhysicalRegister(HI->reg));
+      prt_->addRegUse(vrm_->getPhys(HI->reg));
+    }
+  }
+
+  // merge added with unhandled
+  for (unsigned i = 0, e = added.size(); i != e; ++i)
+    unhandled_.push(added[i]);
+}
+
+/// getFreePhysReg - return a free physical register for this virtual register
+/// interval if we have one, otherwise return 0.
+unsigned RALinScan::getFreePhysReg(LiveInterval *cur) {
+  std::vector<unsigned> inactiveCounts(mri_->getNumRegs(), 0);
+  unsigned MaxInactiveCount = 0;
+  
+  const TargetRegisterClass *RC = mf_->getSSARegMap()->getRegClass(cur->reg);
+  const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC);
+ 
+  for (IntervalPtrs::iterator i = inactive_.begin(), e = inactive_.end();
+       i != e; ++i) {
+    unsigned reg = i->first->reg;
+    assert(MRegisterInfo::isVirtualRegister(reg) &&
+           "Can only allocate virtual registers!");
+
+    // If this is not in a related reg class to the register we're allocating, 
+    // don't check it.
+    const TargetRegisterClass *RegRC = mf_->getSSARegMap()->getRegClass(reg);
+    if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader) {
+      reg = vrm_->getPhys(reg);
+      ++inactiveCounts[reg];
+      MaxInactiveCount = std::max(MaxInactiveCount, inactiveCounts[reg]);
+    }
+  }
+
+  unsigned FreeReg = 0;
+  unsigned FreeRegInactiveCount = 0;
+
+  // If copy coalescer has assigned a "preferred" register, check if it's
+  // available first.
+  if (cur->preference)
+    if (prt_->isRegAvail(cur->preference)) {
+      DOUT << "\t\tassigned the preferred register: "
+           << mri_->getName(cur->preference) << "\n";
+      return cur->preference;
+    } else
+      DOUT << "\t\tunable to assign the preferred register: "
+           << mri_->getName(cur->preference) << "\n";
+
+  // Scan for the first available register.
+  TargetRegisterClass::iterator I = RC->allocation_order_begin(*mf_);
+  TargetRegisterClass::iterator E = RC->allocation_order_end(*mf_);
+  for (; I != E; ++I)
+    if (prt_->isRegAvail(*I)) {
+      FreeReg = *I;
+      FreeRegInactiveCount = inactiveCounts[FreeReg];
+      break;
+    }
+  
+  // If there are no free regs, or if this reg has the max inactive count,
+  // return this register.
+  if (FreeReg == 0 || FreeRegInactiveCount == MaxInactiveCount) return FreeReg;
+  
+  // Continue scanning the registers, looking for the one with the highest
+  // inactive count.  Alkis found that this reduced register pressure very
+  // slightly on X86 (in rev 1.94 of this file), though this should probably be
+  // reevaluated now.
+  for (; I != E; ++I) {
+    unsigned Reg = *I;
+    if (prt_->isRegAvail(Reg) && FreeRegInactiveCount < inactiveCounts[Reg]) {
+      FreeReg = Reg;
+      FreeRegInactiveCount = inactiveCounts[Reg];
+      if (FreeRegInactiveCount == MaxInactiveCount)
+        break;    // We found the one with the max inactive count.
+    }
+  }
+  
+  return FreeReg;
+}
+
+FunctionPass* llvm::createLinearScanRegisterAllocator() {
+  return new RALinScan();
+}
diff --git a/lib/CodeGen/RegAllocLocal.cpp b/lib/CodeGen/RegAllocLocal.cpp
new file mode 100644
index 0000000..6454900
--- /dev/null
+++ b/lib/CodeGen/RegAllocLocal.cpp
@@ -0,0 +1,830 @@
+//===-- RegAllocLocal.cpp - A BasicBlock generic register allocator -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This register allocator allocates registers to a basic block at a time,
+// attempting to keep values in registers and reusing registers as appropriate.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "llvm/BasicBlock.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/SSARegMap.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumStores, "Number of stores added");
+STATISTIC(NumLoads , "Number of loads added");
+STATISTIC(NumFolded, "Number of loads/stores folded into instructions");
+
+namespace {
+  static RegisterRegAlloc
+    localRegAlloc("local", "  local register allocator",
+                  createLocalRegisterAllocator);
+
+
+  class VISIBILITY_HIDDEN RALocal : public MachineFunctionPass {
+  public:
+    static char ID;
+    RALocal() : MachineFunctionPass((intptr_t)&ID) {}
+  private:
+    const TargetMachine *TM;
+    MachineFunction *MF;
+    const MRegisterInfo *RegInfo;
+    LiveVariables *LV;
+
+    // StackSlotForVirtReg - Maps virtual regs to the frame index where these
+    // values are spilled.
+    std::map<unsigned, int> StackSlotForVirtReg;
+
+    // Virt2PhysRegMap - This map contains entries for each virtual register
+    // that is currently available in a physical register.
+    IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2PhysRegMap;
+
+    unsigned &getVirt2PhysRegMapSlot(unsigned VirtReg) {
+      return Virt2PhysRegMap[VirtReg];
+    }
+
+    // PhysRegsUsed - This array is effectively a map, containing entries for
+    // each physical register that currently has a value (ie, it is in
+    // Virt2PhysRegMap).  The value mapped to is the virtual register
+    // corresponding to the physical register (the inverse of the
+    // Virt2PhysRegMap), or 0.  The value is set to 0 if this register is pinned
+    // because it is used by a future instruction, and to -2 if it is not
+    // allocatable.  If the entry for a physical register is -1, then the
+    // physical register is "not in the map".
+    //
+    std::vector<int> PhysRegsUsed;
+
+    // PhysRegsUseOrder - This contains a list of the physical registers that
+    // currently have a virtual register value in them.  This list provides an
+    // ordering of registers, imposing a reallocation order.  This list is only
+    // used if all registers are allocated and we have to spill one, in which
+    // case we spill the least recently used register.  Entries at the front of
+    // the list are the least recently used registers, entries at the back are
+    // the most recently used.
+    //
+    std::vector<unsigned> PhysRegsUseOrder;
+
+    // VirtRegModified - This bitset contains information about which virtual
+    // registers need to be spilled back to memory when their registers are
+    // scavenged.  If a virtual register has simply been rematerialized, there
+    // is no reason to spill it to memory when we need the register back.
+    //
+    std::vector<bool> VirtRegModified;
+
+    void markVirtRegModified(unsigned Reg, bool Val = true) {
+      assert(MRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!");
+      Reg -= MRegisterInfo::FirstVirtualRegister;
+      if (VirtRegModified.size() <= Reg) VirtRegModified.resize(Reg+1);
+      VirtRegModified[Reg] = Val;
+    }
+
+    bool isVirtRegModified(unsigned Reg) const {
+      assert(MRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!");
+      assert(Reg - MRegisterInfo::FirstVirtualRegister < VirtRegModified.size()
+             && "Illegal virtual register!");
+      return VirtRegModified[Reg - MRegisterInfo::FirstVirtualRegister];
+    }
+
+    void AddToPhysRegsUseOrder(unsigned Reg) {
+      std::vector<unsigned>::iterator It =
+        std::find(PhysRegsUseOrder.begin(), PhysRegsUseOrder.end(), Reg);
+      if (It != PhysRegsUseOrder.end())
+        PhysRegsUseOrder.erase(It);
+      PhysRegsUseOrder.push_back(Reg);
+    }
+
+    void MarkPhysRegRecentlyUsed(unsigned Reg) {
+      if (PhysRegsUseOrder.empty() ||
+          PhysRegsUseOrder.back() == Reg) return;  // Already most recently used
+
+      for (unsigned i = PhysRegsUseOrder.size(); i != 0; --i)
+        if (areRegsEqual(Reg, PhysRegsUseOrder[i-1])) {
+          unsigned RegMatch = PhysRegsUseOrder[i-1];       // remove from middle
+          PhysRegsUseOrder.erase(PhysRegsUseOrder.begin()+i-1);
+          // Add it to the end of the list
+          PhysRegsUseOrder.push_back(RegMatch);
+          if (RegMatch == Reg)
+            return;    // Found an exact match, exit early
+        }
+    }
+
+  public:
+    virtual const char *getPassName() const {
+      return "Local Register Allocator";
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<LiveVariables>();
+      AU.addRequiredID(PHIEliminationID);
+      AU.addRequiredID(TwoAddressInstructionPassID);
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+  private:
+    /// runOnMachineFunction - Register allocate the whole function
+    bool runOnMachineFunction(MachineFunction &Fn);
+
+    /// AllocateBasicBlock - Register allocate the specified basic block.
+    void AllocateBasicBlock(MachineBasicBlock &MBB);
+
+
+    /// areRegsEqual - This method returns true if the specified registers are
+    /// related to each other.  To do this, it checks to see if they are equal
+    /// or if the first register is in the alias set of the second register.
+    ///
+    bool areRegsEqual(unsigned R1, unsigned R2) const {
+      if (R1 == R2) return true;
+      for (const unsigned *AliasSet = RegInfo->getAliasSet(R2);
+           *AliasSet; ++AliasSet) {
+        if (*AliasSet == R1) return true;
+      }
+      return false;
+    }
+
+    /// getStackSpaceFor - This returns the frame index of the specified virtual
+    /// register on the stack, allocating space if necessary.
+    int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC);
+
+    /// removePhysReg - This method marks the specified physical register as no
+    /// longer being in use.
+    ///
+    void removePhysReg(unsigned PhysReg);
+
+    /// spillVirtReg - This method spills the value specified by PhysReg into
+    /// the virtual register slot specified by VirtReg.  It then updates the RA
+    /// data structures to indicate the fact that PhysReg is now available.
+    ///
+    void spillVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+                      unsigned VirtReg, unsigned PhysReg);
+
+    /// spillPhysReg - This method spills the specified physical register into
+    /// the virtual register slot associated with it.  If OnlyVirtRegs is set to
+    /// true, then the request is ignored if the physical register does not
+    /// contain a virtual register.
+    ///
+    void spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I,
+                      unsigned PhysReg, bool OnlyVirtRegs = false);
+
+    /// assignVirtToPhysReg - This method updates local state so that we know
+    /// that PhysReg is the proper container for VirtReg now.  The physical
+    /// register must not be used for anything else when this is called.
+    ///
+    void assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg);
+
+    /// isPhysRegAvailable - Return true if the specified physical register is
+    /// free and available for use.  This also includes checking to see if
+    /// aliased registers are all free...
+    ///
+    bool isPhysRegAvailable(unsigned PhysReg) const;
+
+    /// getFreeReg - Look to see if there is a free register available in the
+    /// specified register class.  If not, return 0.
+    ///
+    unsigned getFreeReg(const TargetRegisterClass *RC);
+
+    /// getReg - Find a physical register to hold the specified virtual
+    /// register.  If all compatible physical registers are used, this method
+    /// spills the last used virtual register to the stack, and uses that
+    /// register.
+    ///
+    unsigned getReg(MachineBasicBlock &MBB, MachineInstr *MI,
+                    unsigned VirtReg);
+
+    /// reloadVirtReg - This method transforms the specified specified virtual
+    /// register use to refer to a physical register.  This method may do this
+    /// in one of several ways: if the register is available in a physical
+    /// register already, it uses that physical register.  If the value is not
+    /// in a physical register, and if there are physical registers available,
+    /// it loads it into a register.  If register pressure is high, and it is
+    /// possible, it tries to fold the load of the virtual register into the
+    /// instruction itself.  It avoids doing this if register pressure is low to
+    /// improve the chance that subsequent instructions can use the reloaded
+    /// value.  This method returns the modified instruction.
+    ///
+    MachineInstr *reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
+                                unsigned OpNum);
+
+
+    void reloadPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I,
+                       unsigned PhysReg);
+  };
+  char RALocal::ID = 0;
+}
+
+/// getStackSpaceFor - This allocates space for the specified virtual register
+/// to be held on the stack.
+int RALocal::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) {
+  // Find the location Reg would belong...
+  std::map<unsigned, int>::iterator I =StackSlotForVirtReg.lower_bound(VirtReg);
+
+  if (I != StackSlotForVirtReg.end() && I->first == VirtReg)
+    return I->second;          // Already has space allocated?
+
+  // Allocate a new stack object for this spill location...
+  int FrameIdx = MF->getFrameInfo()->CreateStackObject(RC->getSize(),
+                                                       RC->getAlignment());
+
+  // Assign the slot...
+  StackSlotForVirtReg.insert(I, std::make_pair(VirtReg, FrameIdx));
+  return FrameIdx;
+}
+
+
+/// removePhysReg - This method marks the specified physical register as no
+/// longer being in use.
+///
+void RALocal::removePhysReg(unsigned PhysReg) {
+  PhysRegsUsed[PhysReg] = -1;      // PhyReg no longer used
+
+  std::vector<unsigned>::iterator It =
+    std::find(PhysRegsUseOrder.begin(), PhysRegsUseOrder.end(), PhysReg);
+  if (It != PhysRegsUseOrder.end())
+    PhysRegsUseOrder.erase(It);
+}
+
+
+/// spillVirtReg - This method spills the value specified by PhysReg into the
+/// virtual register slot specified by VirtReg.  It then updates the RA data
+/// structures to indicate the fact that PhysReg is now available.
+///
+void RALocal::spillVirtReg(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator I,
+                           unsigned VirtReg, unsigned PhysReg) {
+  assert(VirtReg && "Spilling a physical register is illegal!"
+         " Must not have appropriate kill for the register or use exists beyond"
+         " the intended one.");
+  DOUT << "  Spilling register " << RegInfo->getName(PhysReg)
+       << " containing %reg" << VirtReg;
+  if (!isVirtRegModified(VirtReg))
+    DOUT << " which has not been modified, so no store necessary!";
+
+  // Otherwise, there is a virtual register corresponding to this physical
+  // register.  We only need to spill it into its stack slot if it has been
+  // modified.
+  if (isVirtRegModified(VirtReg)) {
+    const TargetRegisterClass *RC = MF->getSSARegMap()->getRegClass(VirtReg);
+    int FrameIndex = getStackSpaceFor(VirtReg, RC);
+    DOUT << " to stack slot #" << FrameIndex;
+    RegInfo->storeRegToStackSlot(MBB, I, PhysReg, FrameIndex, RC);
+    ++NumStores;   // Update statistics
+  }
+
+  getVirt2PhysRegMapSlot(VirtReg) = 0;   // VirtReg no longer available
+
+  DOUT << "\n";
+  removePhysReg(PhysReg);
+}
+
+
+/// spillPhysReg - This method spills the specified physical register into the
+/// virtual register slot associated with it.  If OnlyVirtRegs is set to true,
+/// then the request is ignored if the physical register does not contain a
+/// virtual register.
+///
+void RALocal::spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I,
+                           unsigned PhysReg, bool OnlyVirtRegs) {
+  if (PhysRegsUsed[PhysReg] != -1) {            // Only spill it if it's used!
+    assert(PhysRegsUsed[PhysReg] != -2 && "Non allocable reg used!");
+    if (PhysRegsUsed[PhysReg] || !OnlyVirtRegs)
+      spillVirtReg(MBB, I, PhysRegsUsed[PhysReg], PhysReg);
+  } else {
+    // If the selected register aliases any other registers, we must make
+    // sure that one of the aliases isn't alive.
+    for (const unsigned *AliasSet = RegInfo->getAliasSet(PhysReg);
+         *AliasSet; ++AliasSet)
+      if (PhysRegsUsed[*AliasSet] != -1 &&     // Spill aliased register.
+          PhysRegsUsed[*AliasSet] != -2)       // If allocatable.
+          if (PhysRegsUsed[*AliasSet])
+            spillVirtReg(MBB, I, PhysRegsUsed[*AliasSet], *AliasSet);
+  }
+}
+
+
+/// assignVirtToPhysReg - This method updates local state so that we know
+/// that PhysReg is the proper container for VirtReg now.  The physical
+/// register must not be used for anything else when this is called.
+///
+void RALocal::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) {
+  assert(PhysRegsUsed[PhysReg] == -1 && "Phys reg already assigned!");
+  // Update information to note the fact that this register was just used, and
+  // it holds VirtReg.
+  PhysRegsUsed[PhysReg] = VirtReg;
+  getVirt2PhysRegMapSlot(VirtReg) = PhysReg;
+  AddToPhysRegsUseOrder(PhysReg);   // New use of PhysReg
+}
+
+
+/// isPhysRegAvailable - Return true if the specified physical register is free
+/// and available for use.  This also includes checking to see if aliased
+/// registers are all free...
+///
+bool RALocal::isPhysRegAvailable(unsigned PhysReg) const {
+  if (PhysRegsUsed[PhysReg] != -1) return false;
+
+  // If the selected register aliases any other allocated registers, it is
+  // not free!
+  for (const unsigned *AliasSet = RegInfo->getAliasSet(PhysReg);
+       *AliasSet; ++AliasSet)
+    if (PhysRegsUsed[*AliasSet] != -1) // Aliased register in use?
+      return false;                    // Can't use this reg then.
+  return true;
+}
+
+
+/// getFreeReg - Look to see if there is a free register available in the
+/// specified register class.  If not, return 0.
+///
+unsigned RALocal::getFreeReg(const TargetRegisterClass *RC) {
+  // Get iterators defining the range of registers that are valid to allocate in
+  // this class, which also specifies the preferred allocation order.
+  TargetRegisterClass::iterator RI = RC->allocation_order_begin(*MF);
+  TargetRegisterClass::iterator RE = RC->allocation_order_end(*MF);
+
+  for (; RI != RE; ++RI)
+    if (isPhysRegAvailable(*RI)) {       // Is reg unused?
+      assert(*RI != 0 && "Cannot use register!");
+      return *RI; // Found an unused register!
+    }
+  return 0;
+}
+
+
+/// getReg - Find a physical register to hold the specified virtual
+/// register.  If all compatible physical registers are used, this method spills
+/// the last used virtual register to the stack, and uses that register.
+///
+unsigned RALocal::getReg(MachineBasicBlock &MBB, MachineInstr *I,
+                         unsigned VirtReg) {
+  const TargetRegisterClass *RC = MF->getSSARegMap()->getRegClass(VirtReg);
+
+  // First check to see if we have a free register of the requested type...
+  unsigned PhysReg = getFreeReg(RC);
+
+  // If we didn't find an unused register, scavenge one now!
+  if (PhysReg == 0) {
+    assert(!PhysRegsUseOrder.empty() && "No allocated registers??");
+
+    // Loop over all of the preallocated registers from the least recently used
+    // to the most recently used.  When we find one that is capable of holding
+    // our register, use it.
+    for (unsigned i = 0; PhysReg == 0; ++i) {
+      assert(i != PhysRegsUseOrder.size() &&
+             "Couldn't find a register of the appropriate class!");
+
+      unsigned R = PhysRegsUseOrder[i];
+
+      // We can only use this register if it holds a virtual register (ie, it
+      // can be spilled).  Do not use it if it is an explicitly allocated
+      // physical register!
+      assert(PhysRegsUsed[R] != -1 &&
+             "PhysReg in PhysRegsUseOrder, but is not allocated?");
+      if (PhysRegsUsed[R] && PhysRegsUsed[R] != -2) {
+        // If the current register is compatible, use it.
+        if (RC->contains(R)) {
+          PhysReg = R;
+          break;
+        } else {
+          // If one of the registers aliased to the current register is
+          // compatible, use it.
+          for (const unsigned *AliasIt = RegInfo->getAliasSet(R);
+               *AliasIt; ++AliasIt) {
+            if (RC->contains(*AliasIt) &&
+                // If this is pinned down for some reason, don't use it.  For
+                // example, if CL is pinned, and we run across CH, don't use
+                // CH as justification for using scavenging ECX (which will
+                // fail).
+                PhysRegsUsed[*AliasIt] != 0 &&
+                
+                // Make sure the register is allocatable.  Don't allocate SIL on
+                // x86-32.
+                PhysRegsUsed[*AliasIt] != -2) {
+              PhysReg = *AliasIt;    // Take an aliased register
+              break;
+            }
+          }
+        }
+      }
+    }
+
+    assert(PhysReg && "Physical register not assigned!?!?");
+
+    // At this point PhysRegsUseOrder[i] is the least recently used register of
+    // compatible register class.  Spill it to memory and reap its remains.
+    spillPhysReg(MBB, I, PhysReg);
+  }
+
+  // Now that we know which register we need to assign this to, do it now!
+  assignVirtToPhysReg(VirtReg, PhysReg);
+  return PhysReg;
+}
+
+
+/// reloadVirtReg - This method transforms the specified specified virtual
+/// register use to refer to a physical register.  This method may do this in
+/// one of several ways: if the register is available in a physical register
+/// already, it uses that physical register.  If the value is not in a physical
+/// register, and if there are physical registers available, it loads it into a
+/// register.  If register pressure is high, and it is possible, it tries to
+/// fold the load of the virtual register into the instruction itself.  It
+/// avoids doing this if register pressure is low to improve the chance that
+/// subsequent instructions can use the reloaded value.  This method returns the
+/// modified instruction.
+///
+MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
+                                     unsigned OpNum) {
+  unsigned VirtReg = MI->getOperand(OpNum).getReg();
+
+  // If the virtual register is already available, just update the instruction
+  // and return.
+  if (unsigned PR = getVirt2PhysRegMapSlot(VirtReg)) {
+    MarkPhysRegRecentlyUsed(PR);          // Already have this value available!
+    MI->getOperand(OpNum).setReg(PR);  // Assign the input register
+    return MI;
+  }
+
+  // Otherwise, we need to fold it into the current instruction, or reload it.
+  // If we have registers available to hold the value, use them.
+  const TargetRegisterClass *RC = MF->getSSARegMap()->getRegClass(VirtReg);
+  unsigned PhysReg = getFreeReg(RC);
+  int FrameIndex = getStackSpaceFor(VirtReg, RC);
+
+  if (PhysReg) {   // Register is available, allocate it!
+    assignVirtToPhysReg(VirtReg, PhysReg);
+  } else {         // No registers available.
+    // If we can fold this spill into this instruction, do so now.
+    if (MachineInstr* FMI = RegInfo->foldMemoryOperand(MI, OpNum, FrameIndex)){
+      ++NumFolded;
+      // Since we changed the address of MI, make sure to update live variables
+      // to know that the new instruction has the properties of the old one.
+      LV->instructionChanged(MI, FMI);
+      return MBB.insert(MBB.erase(MI), FMI);
+    }
+
+    // It looks like we can't fold this virtual register load into this
+    // instruction.  Force some poor hapless value out of the register file to
+    // make room for the new register, and reload it.
+    PhysReg = getReg(MBB, MI, VirtReg);
+  }
+
+  markVirtRegModified(VirtReg, false);   // Note that this reg was just reloaded
+
+  DOUT << "  Reloading %reg" << VirtReg << " into "
+       << RegInfo->getName(PhysReg) << "\n";
+
+  // Add move instruction(s)
+  RegInfo->loadRegFromStackSlot(MBB, MI, PhysReg, FrameIndex, RC);
+  ++NumLoads;    // Update statistics
+
+  MF->setPhysRegUsed(PhysReg);
+  MI->getOperand(OpNum).setReg(PhysReg);  // Assign the input register
+  return MI;
+}
+
+/// isReadModWriteImplicitKill - True if this is an implicit kill for a
+/// read/mod/write register, i.e. update partial register.
+static bool isReadModWriteImplicitKill(MachineInstr *MI, unsigned Reg) {
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand& MO = MI->getOperand(i);
+    if (MO.isRegister() && MO.getReg() == Reg && MO.isImplicit() &&
+        MO.isDef() && !MO.isDead())
+      return true;
+  }
+  return false;
+}
+
+/// isReadModWriteImplicitDef - True if this is an implicit def for a
+/// read/mod/write register, i.e. update partial register.
+static bool isReadModWriteImplicitDef(MachineInstr *MI, unsigned Reg) {
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand& MO = MI->getOperand(i);
+    if (MO.isRegister() && MO.getReg() == Reg && MO.isImplicit() &&
+        !MO.isDef() && MO.isKill())
+      return true;
+  }
+  return false;
+}
+
+void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
+  // loop over each instruction
+  MachineBasicBlock::iterator MII = MBB.begin();
+  const TargetInstrInfo &TII = *TM->getInstrInfo();
+  
+  DEBUG(const BasicBlock *LBB = MBB.getBasicBlock();
+        if (LBB) DOUT << "\nStarting RegAlloc of BB: " << LBB->getName());
+
+  // If this is the first basic block in the machine function, add live-in
+  // registers as active.
+  if (&MBB == &*MF->begin()) {
+    for (MachineFunction::livein_iterator I = MF->livein_begin(),
+         E = MF->livein_end(); I != E; ++I) {
+      unsigned Reg = I->first;
+      MF->setPhysRegUsed(Reg);
+      PhysRegsUsed[Reg] = 0;            // It is free and reserved now
+      AddToPhysRegsUseOrder(Reg); 
+      for (const unsigned *AliasSet = RegInfo->getSubRegisters(Reg);
+           *AliasSet; ++AliasSet) {
+        if (PhysRegsUsed[*AliasSet] != -2) {
+          AddToPhysRegsUseOrder(*AliasSet); 
+          PhysRegsUsed[*AliasSet] = 0;  // It is free and reserved now
+          MF->setPhysRegUsed(*AliasSet);
+        }
+      }
+    }    
+  }
+  
+  // Otherwise, sequentially allocate each instruction in the MBB.
+  while (MII != MBB.end()) {
+    MachineInstr *MI = MII++;
+    const TargetInstrDescriptor &TID = TII.get(MI->getOpcode());
+    DEBUG(DOUT << "\nStarting RegAlloc of: " << *MI;
+          DOUT << "  Regs have values: ";
+          for (unsigned i = 0; i != RegInfo->getNumRegs(); ++i)
+            if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2)
+               DOUT << "[" << RegInfo->getName(i)
+                    << ",%reg" << PhysRegsUsed[i] << "] ";
+          DOUT << "\n");
+
+    // Loop over the implicit uses, making sure that they are at the head of the
+    // use order list, so they don't get reallocated.
+    if (TID.ImplicitUses) {
+      for (const unsigned *ImplicitUses = TID.ImplicitUses;
+           *ImplicitUses; ++ImplicitUses)
+        MarkPhysRegRecentlyUsed(*ImplicitUses);
+    }
+
+    SmallVector<unsigned, 8> Kills;
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand& MO = MI->getOperand(i);
+      if (MO.isRegister() && MO.isKill()) {
+        if (!MO.isImplicit())
+          Kills.push_back(MO.getReg());
+        else if (!isReadModWriteImplicitKill(MI, MO.getReg()))
+          // These are extra physical register kills when a sub-register
+          // is defined (def of a sub-register is a read/mod/write of the
+          // larger registers). Ignore.
+          Kills.push_back(MO.getReg());
+      }
+    }
+
+    // Get the used operands into registers.  This has the potential to spill
+    // incoming values if we are out of registers.  Note that we completely
+    // ignore physical register uses here.  We assume that if an explicit
+    // physical register is referenced by the instruction, that it is guaranteed
+    // to be live-in, or the input is badly hosed.
+    //
+    for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
+      MachineOperand& MO = MI->getOperand(i);
+      // here we are looking for only used operands (never def&use)
+      if (MO.isRegister() && !MO.isDef() && MO.getReg() && !MO.isImplicit() &&
+          MRegisterInfo::isVirtualRegister(MO.getReg()))
+        MI = reloadVirtReg(MBB, MI, i);
+    }
+
+    // If this instruction is the last user of this register, kill the
+    // value, freeing the register being used, so it doesn't need to be
+    // spilled to memory.
+    //
+    for (unsigned i = 0, e = Kills.size(); i != e; ++i) {
+      unsigned VirtReg = Kills[i];
+      unsigned PhysReg = VirtReg;
+      if (MRegisterInfo::isVirtualRegister(VirtReg)) {
+        // If the virtual register was never materialized into a register, it
+        // might not be in the map, but it won't hurt to zero it out anyway.
+        unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg);
+        PhysReg = PhysRegSlot;
+        PhysRegSlot = 0;
+      } else if (PhysRegsUsed[PhysReg] == -2) {
+        // Unallocatable register dead, ignore.
+        continue;
+      } else {
+        assert(!PhysRegsUsed[PhysReg] || PhysRegsUsed[PhysReg] == -1 &&
+               "Silently clearing a virtual register?");
+      }
+
+      if (PhysReg) {
+        DOUT << "  Last use of " << RegInfo->getName(PhysReg)
+             << "[%reg" << VirtReg <<"], removing it from live set\n";
+        removePhysReg(PhysReg);
+        for (const unsigned *AliasSet = RegInfo->getSubRegisters(PhysReg);
+             *AliasSet; ++AliasSet) {
+          if (PhysRegsUsed[*AliasSet] != -2) {
+            DOUT  << "  Last use of "
+                  << RegInfo->getName(*AliasSet)
+                  << "[%reg" << VirtReg <<"], removing it from live set\n";
+            removePhysReg(*AliasSet);
+          }
+        }
+      }
+    }
+
+    // Loop over all of the operands of the instruction, spilling registers that
+    // are defined, and marking explicit destinations in the PhysRegsUsed map.
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand& MO = MI->getOperand(i);
+      if (MO.isRegister() && MO.isDef() && !MO.isImplicit() && MO.getReg() &&
+          MRegisterInfo::isPhysicalRegister(MO.getReg())) {
+        unsigned Reg = MO.getReg();
+        if (PhysRegsUsed[Reg] == -2) continue;  // Something like ESP.
+        // These are extra physical register defs when a sub-register
+        // is defined (def of a sub-register is a read/mod/write of the
+        // larger registers). Ignore.
+        if (isReadModWriteImplicitDef(MI, MO.getReg())) continue;
+
+        MF->setPhysRegUsed(Reg);
+        spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg
+        PhysRegsUsed[Reg] = 0;            // It is free and reserved now
+        AddToPhysRegsUseOrder(Reg); 
+
+        for (const unsigned *AliasSet = RegInfo->getSubRegisters(Reg);
+             *AliasSet; ++AliasSet) {
+          if (PhysRegsUsed[*AliasSet] != -2) {
+            MF->setPhysRegUsed(*AliasSet);
+            PhysRegsUsed[*AliasSet] = 0;  // It is free and reserved now
+            AddToPhysRegsUseOrder(*AliasSet); 
+          }
+        }
+      }
+    }
+
+    // Loop over the implicit defs, spilling them as well.
+    if (TID.ImplicitDefs) {
+      for (const unsigned *ImplicitDefs = TID.ImplicitDefs;
+           *ImplicitDefs; ++ImplicitDefs) {
+        unsigned Reg = *ImplicitDefs;
+        if (PhysRegsUsed[Reg] != -2) {
+          spillPhysReg(MBB, MI, Reg, true);
+          AddToPhysRegsUseOrder(Reg); 
+          PhysRegsUsed[Reg] = 0;            // It is free and reserved now
+        }
+        MF->setPhysRegUsed(Reg);
+        for (const unsigned *AliasSet = RegInfo->getSubRegisters(Reg);
+             *AliasSet; ++AliasSet) {
+          if (PhysRegsUsed[*AliasSet] != -2) {
+            AddToPhysRegsUseOrder(*AliasSet); 
+            PhysRegsUsed[*AliasSet] = 0;  // It is free and reserved now
+            MF->setPhysRegUsed(*AliasSet);
+          }
+        }
+      }
+    }
+
+    SmallVector<unsigned, 8> DeadDefs;
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand& MO = MI->getOperand(i);
+      if (MO.isRegister() && MO.isDead())
+        DeadDefs.push_back(MO.getReg());
+    }
+
+    // Okay, we have allocated all of the source operands and spilled any values
+    // that would be destroyed by defs of this instruction.  Loop over the
+    // explicit defs and assign them to a register, spilling incoming values if
+    // we need to scavenge a register.
+    //
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand& MO = MI->getOperand(i);
+      if (MO.isRegister() && MO.isDef() && MO.getReg() &&
+          MRegisterInfo::isVirtualRegister(MO.getReg())) {
+        unsigned DestVirtReg = MO.getReg();
+        unsigned DestPhysReg;
+
+        // If DestVirtReg already has a value, use it.
+        if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg)))
+          DestPhysReg = getReg(MBB, MI, DestVirtReg);
+        MF->setPhysRegUsed(DestPhysReg);
+        markVirtRegModified(DestVirtReg);
+        MI->getOperand(i).setReg(DestPhysReg);  // Assign the output register
+      }
+    }
+
+    // If this instruction defines any registers that are immediately dead,
+    // kill them now.
+    //
+    for (unsigned i = 0, e = DeadDefs.size(); i != e; ++i) {
+      unsigned VirtReg = DeadDefs[i];
+      unsigned PhysReg = VirtReg;
+      if (MRegisterInfo::isVirtualRegister(VirtReg)) {
+        unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg);
+        PhysReg = PhysRegSlot;
+        assert(PhysReg != 0);
+        PhysRegSlot = 0;
+      } else if (PhysRegsUsed[PhysReg] == -2) {
+        // Unallocatable register dead, ignore.
+        continue;
+      }
+
+      if (PhysReg) {
+        DOUT  << "  Register " << RegInfo->getName(PhysReg)
+              << " [%reg" << VirtReg
+              << "] is never used, removing it frame live list\n";
+        removePhysReg(PhysReg);
+        for (const unsigned *AliasSet = RegInfo->getAliasSet(PhysReg);
+             *AliasSet; ++AliasSet) {
+          if (PhysRegsUsed[*AliasSet] != -2) {
+            DOUT  << "  Register " << RegInfo->getName(*AliasSet)
+                  << " [%reg" << *AliasSet
+                  << "] is never used, removing it frame live list\n";
+            removePhysReg(*AliasSet);
+          }
+        }
+      }
+    }
+    
+    // Finally, if this is a noop copy instruction, zap it.
+    unsigned SrcReg, DstReg;
+    if (TII.isMoveInstr(*MI, SrcReg, DstReg) && SrcReg == DstReg) {
+      LV->removeVirtualRegistersKilled(MI);
+      LV->removeVirtualRegistersDead(MI);
+      MBB.erase(MI);
+    }
+  }
+
+  MachineBasicBlock::iterator MI = MBB.getFirstTerminator();
+
+  // Spill all physical registers holding virtual registers now.
+  for (unsigned i = 0, e = RegInfo->getNumRegs(); i != e; ++i)
+    if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2)
+      if (unsigned VirtReg = PhysRegsUsed[i])
+        spillVirtReg(MBB, MI, VirtReg, i);
+      else
+        removePhysReg(i);
+
+#if 0
+  // This checking code is very expensive.
+  bool AllOk = true;
+  for (unsigned i = MRegisterInfo::FirstVirtualRegister,
+           e = MF->getSSARegMap()->getLastVirtReg(); i <= e; ++i)
+    if (unsigned PR = Virt2PhysRegMap[i]) {
+      cerr << "Register still mapped: " << i << " -> " << PR << "\n";
+      AllOk = false;
+    }
+  assert(AllOk && "Virtual registers still in phys regs?");
+#endif
+
+  // Clear any physical register which appear live at the end of the basic
+  // block, but which do not hold any virtual registers.  e.g., the stack
+  // pointer.
+  PhysRegsUseOrder.clear();
+}
+
+
+/// runOnMachineFunction - Register allocate the whole function
+///
+bool RALocal::runOnMachineFunction(MachineFunction &Fn) {
+  DOUT << "Machine Function " << "\n";
+  MF = &Fn;
+  TM = &Fn.getTarget();
+  RegInfo = TM->getRegisterInfo();
+  LV = &getAnalysis<LiveVariables>();
+
+  PhysRegsUsed.assign(RegInfo->getNumRegs(), -1);
+  
+  // At various places we want to efficiently check to see whether a register
+  // is allocatable.  To handle this, we mark all unallocatable registers as
+  // being pinned down, permanently.
+  {
+    BitVector Allocable = RegInfo->getAllocatableSet(Fn);
+    for (unsigned i = 0, e = Allocable.size(); i != e; ++i)
+      if (!Allocable[i])
+        PhysRegsUsed[i] = -2;  // Mark the reg unallocable.
+  }
+
+  // initialize the virtual->physical register map to have a 'null'
+  // mapping for all virtual registers
+  Virt2PhysRegMap.grow(MF->getSSARegMap()->getLastVirtReg());
+
+  // Loop over all of the basic blocks, eliminating virtual register references
+  for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
+       MBB != MBBe; ++MBB)
+    AllocateBasicBlock(*MBB);
+
+  StackSlotForVirtReg.clear();
+  PhysRegsUsed.clear();
+  VirtRegModified.clear();
+  Virt2PhysRegMap.clear();
+  return true;
+}
+
+FunctionPass *llvm::createLocalRegisterAllocator() {
+  return new RALocal();
+}
diff --git a/lib/CodeGen/RegAllocSimple.cpp b/lib/CodeGen/RegAllocSimple.cpp
new file mode 100644
index 0000000..f49dd4c
--- /dev/null
+++ b/lib/CodeGen/RegAllocSimple.cpp
@@ -0,0 +1,253 @@
+//===-- RegAllocSimple.cpp - A simple generic register allocator ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a simple register allocator. *Very* simple: It immediate
+// spills every value right after it is computed, and it reloads all used
+// operands from the spill area to temporary registers before each instruction.
+// It does not keep values in registers across instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/SSARegMap.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+STATISTIC(NumStores, "Number of stores added");
+STATISTIC(NumLoads , "Number of loads added");
+
+namespace {
+  static RegisterRegAlloc
+    simpleRegAlloc("simple", "  simple register allocator",
+                   createSimpleRegisterAllocator);
+
+  class VISIBILITY_HIDDEN RegAllocSimple : public MachineFunctionPass {
+  public:
+    static char ID;
+    RegAllocSimple() : MachineFunctionPass((intptr_t)&ID) {}
+  private:
+    MachineFunction *MF;
+    const TargetMachine *TM;
+    const MRegisterInfo *RegInfo;
+
+    // StackSlotForVirtReg - Maps SSA Regs => frame index on the stack where
+    // these values are spilled
+    std::map<unsigned, int> StackSlotForVirtReg;
+
+    // RegsUsed - Keep track of what registers are currently in use.  This is a
+    // bitset.
+    std::vector<bool> RegsUsed;
+
+    // RegClassIdx - Maps RegClass => which index we can take a register
+    // from. Since this is a simple register allocator, when we need a register
+    // of a certain class, we just take the next available one.
+    std::map<const TargetRegisterClass*, unsigned> RegClassIdx;
+
+  public:
+    virtual const char *getPassName() const {
+      return "Simple Register Allocator";
+    }
+
+    /// runOnMachineFunction - Register allocate the whole function
+    bool runOnMachineFunction(MachineFunction &Fn);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequiredID(PHIEliminationID);           // Eliminate PHI nodes
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+  private:
+    /// AllocateBasicBlock - Register allocate the specified basic block.
+    void AllocateBasicBlock(MachineBasicBlock &MBB);
+
+    /// getStackSpaceFor - This returns the offset of the specified virtual
+    /// register on the stack, allocating space if necessary.
+    int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC);
+
+    /// Given a virtual register, return a compatible physical register that is
+    /// currently unused.
+    ///
+    /// Side effect: marks that register as being used until manually cleared
+    ///
+    unsigned getFreeReg(unsigned virtualReg);
+
+    /// Moves value from memory into that register
+    unsigned reloadVirtReg(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator I, unsigned VirtReg);
+
+    /// Saves reg value on the stack (maps virtual register to stack value)
+    void spillVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                      unsigned VirtReg, unsigned PhysReg);
+  };
+  char RegAllocSimple::ID = 0;
+}
+
+/// getStackSpaceFor - This allocates space for the specified virtual
+/// register to be held on the stack.
+int RegAllocSimple::getStackSpaceFor(unsigned VirtReg,
+                                     const TargetRegisterClass *RC) {
+  // Find the location VirtReg would belong...
+  std::map<unsigned, int>::iterator I =
+    StackSlotForVirtReg.lower_bound(VirtReg);
+
+  if (I != StackSlotForVirtReg.end() && I->first == VirtReg)
+    return I->second;          // Already has space allocated?
+
+  // Allocate a new stack object for this spill location...
+  int FrameIdx = MF->getFrameInfo()->CreateStackObject(RC->getSize(),
+                                                       RC->getAlignment());
+
+  // Assign the slot...
+  StackSlotForVirtReg.insert(I, std::make_pair(VirtReg, FrameIdx));
+
+  return FrameIdx;
+}
+
+unsigned RegAllocSimple::getFreeReg(unsigned virtualReg) {
+  const TargetRegisterClass* RC = MF->getSSARegMap()->getRegClass(virtualReg);
+  TargetRegisterClass::iterator RI = RC->allocation_order_begin(*MF);
+  TargetRegisterClass::iterator RE = RC->allocation_order_end(*MF);
+
+  while (1) {
+    unsigned regIdx = RegClassIdx[RC]++;
+    assert(RI+regIdx != RE && "Not enough registers!");
+    unsigned PhysReg = *(RI+regIdx);
+
+    if (!RegsUsed[PhysReg]) {
+      MF->setPhysRegUsed(PhysReg);
+      return PhysReg;
+    }
+  }
+}
+
+unsigned RegAllocSimple::reloadVirtReg(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator I,
+                                       unsigned VirtReg) {
+  const TargetRegisterClass* RC = MF->getSSARegMap()->getRegClass(VirtReg);
+  int FrameIdx = getStackSpaceFor(VirtReg, RC);
+  unsigned PhysReg = getFreeReg(VirtReg);
+
+  // Add move instruction(s)
+  ++NumLoads;
+  RegInfo->loadRegFromStackSlot(MBB, I, PhysReg, FrameIdx, RC);
+  return PhysReg;
+}
+
+void RegAllocSimple::spillVirtReg(MachineBasicBlock &MBB,
+                                  MachineBasicBlock::iterator I,
+                                  unsigned VirtReg, unsigned PhysReg) {
+  const TargetRegisterClass* RC = MF->getSSARegMap()->getRegClass(VirtReg);
+  int FrameIdx = getStackSpaceFor(VirtReg, RC);
+
+  // Add move instruction(s)
+  ++NumStores;
+  RegInfo->storeRegToStackSlot(MBB, I, PhysReg, FrameIdx, RC);
+}
+
+
+void RegAllocSimple::AllocateBasicBlock(MachineBasicBlock &MBB) {
+  // loop over each instruction
+  for (MachineBasicBlock::iterator MI = MBB.begin(); MI != MBB.end(); ++MI) {
+    // Made to combat the incorrect allocation of r2 = add r1, r1
+    std::map<unsigned, unsigned> Virt2PhysRegMap;
+
+    RegsUsed.resize(RegInfo->getNumRegs());
+
+    // This is a preliminary pass that will invalidate any registers that are
+    // used by the instruction (including implicit uses).
+    unsigned Opcode = MI->getOpcode();
+    const TargetInstrDescriptor &Desc = TM->getInstrInfo()->get(Opcode);
+    const unsigned *Regs;
+    if (Desc.ImplicitUses) {
+      for (Regs = Desc.ImplicitUses; *Regs; ++Regs)
+        RegsUsed[*Regs] = true;
+    }
+
+    if (Desc.ImplicitDefs) {
+      for (Regs = Desc.ImplicitDefs; *Regs; ++Regs) {
+        RegsUsed[*Regs] = true;
+        MF->setPhysRegUsed(*Regs);
+      }
+    }
+
+    // Loop over uses, move from memory into registers.
+    for (int i = MI->getNumOperands() - 1; i >= 0; --i) {
+      MachineOperand &op = MI->getOperand(i);
+
+      if (op.isRegister() && op.getReg() &&
+          MRegisterInfo::isVirtualRegister(op.getReg())) {
+        unsigned virtualReg = (unsigned) op.getReg();
+        DOUT << "op: " << op << "\n";
+        DOUT << "\t inst[" << i << "]: ";
+        DEBUG(MI->print(*cerr.stream(), TM));
+
+        // make sure the same virtual register maps to the same physical
+        // register in any given instruction
+        unsigned physReg = Virt2PhysRegMap[virtualReg];
+        if (physReg == 0) {
+          if (op.isDef()) {
+            int TiedOp = MI->getInstrDescriptor()->findTiedToSrcOperand(i);
+            if (TiedOp == -1) {
+              physReg = getFreeReg(virtualReg);
+            } else {
+              // must be same register number as the source operand that is 
+              // tied to. This maps a = b + c into b = b + c, and saves b into
+              // a's spot.
+              assert(MI->getOperand(TiedOp).isRegister()  &&
+                     MI->getOperand(TiedOp).getReg() &&
+                     MI->getOperand(TiedOp).isUse() &&
+                     "Two address instruction invalid!");
+
+              physReg = MI->getOperand(TiedOp).getReg();
+            }
+            spillVirtReg(MBB, next(MI), virtualReg, physReg);
+          } else {
+            physReg = reloadVirtReg(MBB, MI, virtualReg);
+            Virt2PhysRegMap[virtualReg] = physReg;
+          }
+        }
+        MI->getOperand(i).setReg(physReg);
+        DOUT << "virt: " << virtualReg << ", phys: " << op.getReg() << "\n";
+      }
+    }
+    RegClassIdx.clear();
+    RegsUsed.clear();
+  }
+}
+
+
+/// runOnMachineFunction - Register allocate the whole function
+///
+bool RegAllocSimple::runOnMachineFunction(MachineFunction &Fn) {
+  DOUT << "Machine Function\n";
+  MF = &Fn;
+  TM = &MF->getTarget();
+  RegInfo = TM->getRegisterInfo();
+
+  // Loop over all of the basic blocks, eliminating virtual register references
+  for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
+       MBB != MBBe; ++MBB)
+    AllocateBasicBlock(*MBB);
+
+  StackSlotForVirtReg.clear();
+  return true;
+}
+
+FunctionPass *llvm::createSimpleRegisterAllocator() {
+  return new RegAllocSimple();
+}
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
new file mode 100644
index 0000000..ae40e58
--- /dev/null
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -0,0 +1,292 @@
+//===-- RegisterScavenging.cpp - Machine register scavenging --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the Evan Cheng and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the machine register scavenger. It can provide
+// information such as unused register at any point in a machine basic block.
+// It also provides a mechanism to make registers availbale by evicting them
+// to spill slots.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "reg-scavenging"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Target/MRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) {
+  const MachineFunction &MF = *mbb->getParent();
+  const TargetMachine &TM = MF.getTarget();
+  TII = TM.getInstrInfo();
+  RegInfo = TM.getRegisterInfo();
+
+  assert((NumPhysRegs == 0 || NumPhysRegs == RegInfo->getNumRegs()) &&
+         "Target changed?");
+
+  if (!MBB) {
+    NumPhysRegs = RegInfo->getNumRegs();
+    RegsAvailable.resize(NumPhysRegs);
+
+    // Create reserved registers bitvector.
+    ReservedRegs = RegInfo->getReservedRegs(MF);
+
+    // Create callee-saved registers bitvector.
+    CalleeSavedRegs.resize(NumPhysRegs);
+    const unsigned *CSRegs = RegInfo->getCalleeSavedRegs();
+    if (CSRegs != NULL)
+      for (unsigned i = 0; CSRegs[i]; ++i)
+        CalleeSavedRegs.set(CSRegs[i]);
+  }
+
+  MBB = mbb;
+  ScavengedReg = 0;
+  ScavengedRC = NULL;
+
+  // All registers started out unused.
+  RegsAvailable.set();
+
+  // Reserved registers are always used.
+  RegsAvailable ^= ReservedRegs;
+
+  // Live-in registers are in use.
+  if (!MBB->livein_empty())
+    for (MachineBasicBlock::const_livein_iterator I = MBB->livein_begin(),
+           E = MBB->livein_end(); I != E; ++I)
+      setUsed(*I);
+
+  Tracking = false;
+}
+
+void RegScavenger::restoreScavengedReg() {
+  if (!ScavengedReg)
+    return;
+
+  RegInfo->loadRegFromStackSlot(*MBB, MBBI, ScavengedReg,
+                                ScavengingFrameIndex, ScavengedRC);
+  MachineBasicBlock::iterator II = prior(MBBI);
+  RegInfo->eliminateFrameIndex(II, 0, this);
+  setUsed(ScavengedReg);
+  ScavengedReg = 0;
+  ScavengedRC = NULL;
+}
+
+void RegScavenger::forward() {
+  // Move ptr forward.
+  if (!Tracking) {
+    MBBI = MBB->begin();
+    Tracking = true;
+  } else {
+    assert(MBBI != MBB->end() && "Already at the end of the basic block!");
+    MBBI = next(MBBI);
+  }
+
+  MachineInstr *MI = MBBI;
+
+  // Reaching a terminator instruction. Restore a scavenged register (which
+  // must be life out.
+  if (TII->isTerminatorInstr(MI->getOpcode()))
+    restoreScavengedReg();
+
+  // Process uses first.
+  BitVector ChangedRegs(NumPhysRegs);
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || !MO.isUse())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (Reg == 0)
+      continue;
+    if (!isUsed(Reg)) {
+      // Register has been scavenged. Restore it!
+      if (Reg != ScavengedReg)
+        assert(false && "Using an undefined register!");
+      else
+        restoreScavengedReg();
+    }
+    if (MO.isKill() && !isReserved(Reg))
+      ChangedRegs.set(Reg);
+  }
+  // Change states of all registers after all the uses are processed to guard
+  // against multiple uses.
+  setUnused(ChangedRegs);
+
+  // Process defs.
+  const TargetInstrDescriptor *TID = MI->getInstrDescriptor();
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || !MO.isDef())
+      continue;
+    unsigned Reg = MO.getReg();
+    // If it's dead upon def, then it is now free.
+    if (MO.isDead()) {
+      setUnused(Reg);
+      continue;
+    }
+    // Skip two-address destination operand.
+    if (TID->findTiedToSrcOperand(i) != -1) {
+      assert(isUsed(Reg) && "Using an undefined register!");
+      continue;
+    }
+    assert((isUnused(Reg) || isReserved(Reg)) &&
+           "Re-defining a live register!");
+    setUsed(Reg);
+  }
+}
+
+void RegScavenger::backward() {
+  assert(Tracking && "Not tracking states!");
+  assert(MBBI != MBB->begin() && "Already at start of basic block!");
+  // Move ptr backward.
+  MBBI = prior(MBBI);
+
+  MachineInstr *MI = MBBI;
+  // Process defs first.
+  const TargetInstrDescriptor *TID = MI->getInstrDescriptor();
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || !MO.isDef())
+      continue;
+    // Skip two-address destination operand.
+    if (TID->findTiedToSrcOperand(i) != -1)
+      continue;
+    unsigned Reg = MO.getReg();
+    assert(isUsed(Reg));
+    if (!isReserved(Reg))
+      setUnused(Reg);
+  }
+
+  // Process uses.
+  BitVector ChangedRegs(NumPhysRegs);
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || !MO.isUse())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (Reg == 0)
+      continue;
+    assert(isUnused(Reg) || isReserved(Reg));
+    ChangedRegs.set(Reg);
+  }
+  setUsed(ChangedRegs);
+}
+
+void RegScavenger::getRegsUsed(BitVector &used, bool includeReserved) {
+  if (includeReserved)
+    used = ~RegsAvailable;
+  else
+    used = ~RegsAvailable & ~ReservedRegs;
+}
+
+/// CreateRegClassMask - Set the bits that represent the registers in the
+/// TargetRegisterClass.
+static void CreateRegClassMask(const TargetRegisterClass *RC, BitVector &Mask) {
+  for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I != E;
+       ++I)
+    Mask.set(*I);
+}
+
+unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RegClass,
+                                     const BitVector &Candidates) const {
+  // Mask off the registers which are not in the TargetRegisterClass.
+  BitVector RegsAvailableCopy(NumPhysRegs, false);
+  CreateRegClassMask(RegClass, RegsAvailableCopy);
+  RegsAvailableCopy &= RegsAvailable;
+
+  // Restrict the search to candidates.
+  RegsAvailableCopy &= Candidates;
+
+  // Returns the first unused (bit is set) register, or 0 is none is found.
+  int Reg = RegsAvailableCopy.find_first();
+  return (Reg == -1) ? 0 : Reg;
+}
+
+unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RegClass,
+                                     bool ExCalleeSaved) const {
+  // Mask off the registers which are not in the TargetRegisterClass.
+  BitVector RegsAvailableCopy(NumPhysRegs, false);
+  CreateRegClassMask(RegClass, RegsAvailableCopy);
+  RegsAvailableCopy &= RegsAvailable;
+
+  // If looking for a non-callee-saved register, mask off all the callee-saved
+  // registers.
+  if (ExCalleeSaved)
+    RegsAvailableCopy &= ~CalleeSavedRegs;
+
+  // Returns the first unused (bit is set) register, or 0 is none is found.
+  int Reg = RegsAvailableCopy.find_first();
+  return (Reg == -1) ? 0 : Reg;
+}
+
+/// calcDistanceToUse - Calculate the distance to the first use of the
+/// specified register.
+static unsigned calcDistanceToUse(MachineBasicBlock *MBB,
+                                  MachineBasicBlock::iterator I, unsigned Reg) {
+  unsigned Dist = 0;
+  I = next(I);
+  while (I != MBB->end()) {
+    Dist++;
+    if (I->findRegisterUseOperandIdx(Reg) != -1)
+        return Dist;
+    I = next(I);    
+  }
+  return Dist + 1;
+}
+
+unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
+                                        MachineBasicBlock::iterator I,
+                                        int SPAdj) {
+  assert(ScavengingFrameIndex >= 0 &&
+         "Cannot scavenge a register without an emergency spill slot!");
+
+  // Mask off the registers which are not in the TargetRegisterClass.
+  BitVector Candidates(NumPhysRegs, false);
+  CreateRegClassMask(RC, Candidates);
+  Candidates ^= ReservedRegs;  // Do not include reserved registers.
+
+  // Exclude all the registers being used by the instruction.
+  for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = I->getOperand(i);
+    if (MO.isReg())
+      Candidates.reset(MO.getReg());
+  }
+
+  // Find the register whose use is furtherest aaway.
+  unsigned SReg = 0;
+  unsigned MaxDist = 0;
+  int Reg = Candidates.find_first();
+  while (Reg != -1) {
+    unsigned Dist = calcDistanceToUse(MBB, I, Reg);
+    if (Dist >= MaxDist) {
+      MaxDist = Dist;
+      SReg = Reg;
+    }
+    Reg = Candidates.find_next(Reg);
+  }
+
+  if (ScavengedReg != 0) {
+    // First restore previously scavenged register.
+    RegInfo->loadRegFromStackSlot(*MBB, I, ScavengedReg,
+                                  ScavengingFrameIndex, ScavengedRC);
+    MachineBasicBlock::iterator II = prior(I);
+    RegInfo->eliminateFrameIndex(II, SPAdj, this);
+  }
+
+  RegInfo->storeRegToStackSlot(*MBB, I, SReg, ScavengingFrameIndex, RC);
+  MachineBasicBlock::iterator II = prior(I);
+  RegInfo->eliminateFrameIndex(II, SPAdj, this);
+  ScavengedReg = SReg;
+  ScavengedRC = RC;
+
+  return SReg;
+}
diff --git a/lib/CodeGen/SelectionDAG/CallingConvLower.cpp b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp
new file mode 100644
index 0000000..defbe34
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp
@@ -0,0 +1,102 @@
+//===-- llvm/CallingConvLower.cpp - Calling Conventions -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CCState class, used for lowering and implementing
+// calling conventions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Target/MRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+CCState::CCState(unsigned CC, bool isVarArg, const TargetMachine &tm,
+                 SmallVector<CCValAssign, 16> &locs)
+  : CallingConv(CC), IsVarArg(isVarArg), TM(tm),
+    MRI(*TM.getRegisterInfo()), Locs(locs) {
+  // No stack is used.
+  StackOffset = 0;
+  
+  UsedRegs.resize(MRI.getNumRegs());
+}
+
+
+/// MarkAllocated - Mark a register and all of its aliases as allocated.
+void CCState::MarkAllocated(unsigned Reg) {
+  UsedRegs[Reg/32] |= 1 << (Reg&31);
+  
+  if (const unsigned *RegAliases = MRI.getAliasSet(Reg))
+    for (; (Reg = *RegAliases); ++RegAliases)
+      UsedRegs[Reg/32] |= 1 << (Reg&31);
+}
+
+/// AnalyzeFormalArguments - Analyze an ISD::FORMAL_ARGUMENTS node,
+/// incorporating info about the formals into this state.
+void CCState::AnalyzeFormalArguments(SDNode *TheArgs, CCAssignFn Fn) {
+  unsigned NumArgs = TheArgs->getNumValues()-1;
+  
+  for (unsigned i = 0; i != NumArgs; ++i) {
+    MVT::ValueType ArgVT = TheArgs->getValueType(i);
+    SDOperand FlagOp = TheArgs->getOperand(3+i);
+    unsigned ArgFlags = cast<ConstantSDNode>(FlagOp)->getValue();
+    if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
+      cerr << "Formal argument #" << i << " has unhandled type "
+           << MVT::getValueTypeString(ArgVT) << "\n";
+      abort();
+    }
+  }
+}
+
+/// AnalyzeReturn - Analyze the returned values of an ISD::RET node,
+/// incorporating info about the result values into this state.
+void CCState::AnalyzeReturn(SDNode *TheRet, CCAssignFn Fn) {
+  // Determine which register each value should be copied into.
+  for (unsigned i = 0, e = TheRet->getNumOperands() / 2; i != e; ++i) {
+    MVT::ValueType VT = TheRet->getOperand(i*2+1).getValueType();
+    if (Fn(i, VT, VT, CCValAssign::Full,
+           cast<ConstantSDNode>(TheRet->getOperand(i*2+2))->getValue(), *this)){
+      cerr << "Return operand #" << i << " has unhandled type "
+           << MVT::getValueTypeString(VT) << "\n";
+      abort();
+    }
+  }
+}
+
+
+/// AnalyzeCallOperands - Analyze an ISD::CALL node, incorporating info
+/// about the passed values into this state.
+void CCState::AnalyzeCallOperands(SDNode *TheCall, CCAssignFn Fn) {
+  unsigned NumOps = (TheCall->getNumOperands() - 5) / 2;
+  for (unsigned i = 0; i != NumOps; ++i) {
+    MVT::ValueType ArgVT = TheCall->getOperand(5+2*i).getValueType();
+    SDOperand FlagOp = TheCall->getOperand(5+2*i+1);
+    unsigned ArgFlags =cast<ConstantSDNode>(FlagOp)->getValue();
+    if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
+      cerr << "Call operand #" << i << " has unhandled type "
+           << MVT::getValueTypeString(ArgVT) << "\n";
+      abort();
+    }
+  }
+}
+
+/// AnalyzeCallResult - Analyze the return values of an ISD::CALL node,
+/// incorporating info about the passed values into this state.
+void CCState::AnalyzeCallResult(SDNode *TheCall, CCAssignFn Fn) {
+  for (unsigned i = 0, e = TheCall->getNumValues() - 1; i != e; ++i) {
+    MVT::ValueType VT = TheCall->getValueType(i);
+    if (Fn(i, VT, VT, CCValAssign::Full, 0, *this)) {
+      cerr << "Call result #" << i << " has unhandled type "
+           << MVT::getValueTypeString(VT) << "\n";
+      abort();
+    }
+  }
+}
+
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
new file mode 100644
index 0000000..22c6e6b
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -0,0 +1,4749 @@
+//===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Nate Begeman and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
+// both before and after the DAG is legalized.
+//
+// FIXME: Missing folds
+// sdiv, udiv, srem, urem (X, const) where X is an integer can be expanded into
+//  a sequence of multiplies, shifts, and adds.  This should be controlled by
+//  some kind of hint from the target that int div is expensive.
+// various folds of mulh[s,u] by constants such as -1, powers of 2, etc.
+//
+// FIXME: select C, pow2, pow2 -> something smart
+// FIXME: trunc(select X, Y, Z) -> select X, trunc(Y), trunc(Z)
+// FIXME: Dead stores -> nuke
+// FIXME: shr X, (and Y,31) -> shr X, Y   (TRICKY!)
+// FIXME: mul (x, const) -> shifts + adds
+// FIXME: undef values
+// FIXME: divide by zero is currently left unfolded.  do we want to turn this
+//        into an undef?
+// FIXME: select ne (select cc, 1, 0), 0, true, false -> select cc, true, false
+// 
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dagcombine"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NodesCombined   , "Number of dag nodes combined");
+STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
+STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
+
+namespace {
+#ifndef NDEBUG
+  static cl::opt<bool>
+    ViewDAGCombine1("view-dag-combine1-dags", cl::Hidden,
+                    cl::desc("Pop up a window to show dags before the first "
+                             "dag combine pass"));
+  static cl::opt<bool>
+    ViewDAGCombine2("view-dag-combine2-dags", cl::Hidden,
+                    cl::desc("Pop up a window to show dags before the second "
+                             "dag combine pass"));
+#else
+  static const bool ViewDAGCombine1 = false;
+  static const bool ViewDAGCombine2 = false;
+#endif
+  
+  static cl::opt<bool>
+    CombinerAA("combiner-alias-analysis", cl::Hidden,
+               cl::desc("Turn on alias analysis during testing"));
+
+  static cl::opt<bool>
+    CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
+               cl::desc("Include global information in alias analysis"));
+
+//------------------------------ DAGCombiner ---------------------------------//
+
+  class VISIBILITY_HIDDEN DAGCombiner {
+    SelectionDAG &DAG;
+    TargetLowering &TLI;
+    bool AfterLegalize;
+
+    // Worklist of all of the nodes that need to be simplified.
+    std::vector<SDNode*> WorkList;
+
+    // AA - Used for DAG load/store alias analysis.
+    AliasAnalysis &AA;
+
+    /// AddUsersToWorkList - When an instruction is simplified, add all users of
+    /// the instruction to the work lists because they might get more simplified
+    /// now.
+    ///
+    void AddUsersToWorkList(SDNode *N) {
+      for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+           UI != UE; ++UI)
+        AddToWorkList(*UI);
+    }
+
+    /// removeFromWorkList - remove all instances of N from the worklist.
+    ///
+    void removeFromWorkList(SDNode *N) {
+      WorkList.erase(std::remove(WorkList.begin(), WorkList.end(), N),
+                     WorkList.end());
+    }
+    
+  public:
+    /// AddToWorkList - Add to the work list making sure it's instance is at the
+    /// the back (next to be processed.)
+    void AddToWorkList(SDNode *N) {
+      removeFromWorkList(N);
+      WorkList.push_back(N);
+    }
+
+    SDOperand CombineTo(SDNode *N, const SDOperand *To, unsigned NumTo,
+                        bool AddTo = true) {
+      assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
+      ++NodesCombined;
+      DOUT << "\nReplacing.1 "; DEBUG(N->dump(&DAG));
+      DOUT << "\nWith: "; DEBUG(To[0].Val->dump(&DAG));
+      DOUT << " and " << NumTo-1 << " other values\n";
+      std::vector<SDNode*> NowDead;
+      DAG.ReplaceAllUsesWith(N, To, &NowDead);
+      
+      if (AddTo) {
+        // Push the new nodes and any users onto the worklist
+        for (unsigned i = 0, e = NumTo; i != e; ++i) {
+          AddToWorkList(To[i].Val);
+          AddUsersToWorkList(To[i].Val);
+        }
+      }
+      
+      // Nodes can be reintroduced into the worklist.  Make sure we do not
+      // process a node that has been replaced.
+      removeFromWorkList(N);
+      for (unsigned i = 0, e = NowDead.size(); i != e; ++i)
+        removeFromWorkList(NowDead[i]);
+      
+      // Finally, since the node is now dead, remove it from the graph.
+      DAG.DeleteNode(N);
+      return SDOperand(N, 0);
+    }
+    
+    SDOperand CombineTo(SDNode *N, SDOperand Res, bool AddTo = true) {
+      return CombineTo(N, &Res, 1, AddTo);
+    }
+    
+    SDOperand CombineTo(SDNode *N, SDOperand Res0, SDOperand Res1,
+                        bool AddTo = true) {
+      SDOperand To[] = { Res0, Res1 };
+      return CombineTo(N, To, 2, AddTo);
+    }
+  private:    
+    
+    /// SimplifyDemandedBits - Check the specified integer node value to see if
+    /// it can be simplified or if things it uses can be simplified by bit
+    /// propagation.  If so, return true.
+    bool SimplifyDemandedBits(SDOperand Op) {
+      TargetLowering::TargetLoweringOpt TLO(DAG);
+      uint64_t KnownZero, KnownOne;
+      uint64_t Demanded = MVT::getIntVTBitMask(Op.getValueType());
+      if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO))
+        return false;
+
+      // Revisit the node.
+      AddToWorkList(Op.Val);
+      
+      // Replace the old value with the new one.
+      ++NodesCombined;
+      DOUT << "\nReplacing.2 "; DEBUG(TLO.Old.Val->dump(&DAG));
+      DOUT << "\nWith: "; DEBUG(TLO.New.Val->dump(&DAG));
+      DOUT << '\n';
+
+      std::vector<SDNode*> NowDead;
+      DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New, NowDead);
+      
+      // Push the new node and any (possibly new) users onto the worklist.
+      AddToWorkList(TLO.New.Val);
+      AddUsersToWorkList(TLO.New.Val);
+      
+      // Nodes can end up on the worklist more than once.  Make sure we do
+      // not process a node that has been replaced.
+      for (unsigned i = 0, e = NowDead.size(); i != e; ++i)
+        removeFromWorkList(NowDead[i]);
+      
+      // Finally, if the node is now dead, remove it from the graph.  The node
+      // may not be dead if the replacement process recursively simplified to
+      // something else needing this node.
+      if (TLO.Old.Val->use_empty()) {
+        removeFromWorkList(TLO.Old.Val);
+        
+        // If the operands of this node are only used by the node, they will now
+        // be dead.  Make sure to visit them first to delete dead nodes early.
+        for (unsigned i = 0, e = TLO.Old.Val->getNumOperands(); i != e; ++i)
+          if (TLO.Old.Val->getOperand(i).Val->hasOneUse())
+            AddToWorkList(TLO.Old.Val->getOperand(i).Val);
+        
+        DAG.DeleteNode(TLO.Old.Val);
+      }
+      return true;
+    }
+
+    bool CombineToPreIndexedLoadStore(SDNode *N);
+    bool CombineToPostIndexedLoadStore(SDNode *N);
+    
+    
+    /// visit - call the node-specific routine that knows how to fold each
+    /// particular type of node.
+    SDOperand visit(SDNode *N);
+
+    // Visitation implementation - Implement dag node combining for different
+    // node types.  The semantics are as follows:
+    // Return Value:
+    //   SDOperand.Val == 0   - No change was made
+    //   SDOperand.Val == N   - N was replaced, is dead, and is already handled.
+    //   otherwise            - N should be replaced by the returned Operand.
+    //
+    SDOperand visitTokenFactor(SDNode *N);
+    SDOperand visitADD(SDNode *N);
+    SDOperand visitSUB(SDNode *N);
+    SDOperand visitADDC(SDNode *N);
+    SDOperand visitADDE(SDNode *N);
+    SDOperand visitMUL(SDNode *N);
+    SDOperand visitSDIV(SDNode *N);
+    SDOperand visitUDIV(SDNode *N);
+    SDOperand visitSREM(SDNode *N);
+    SDOperand visitUREM(SDNode *N);
+    SDOperand visitMULHU(SDNode *N);
+    SDOperand visitMULHS(SDNode *N);
+    SDOperand visitAND(SDNode *N);
+    SDOperand visitOR(SDNode *N);
+    SDOperand visitXOR(SDNode *N);
+    SDOperand SimplifyVBinOp(SDNode *N);
+    SDOperand visitSHL(SDNode *N);
+    SDOperand visitSRA(SDNode *N);
+    SDOperand visitSRL(SDNode *N);
+    SDOperand visitCTLZ(SDNode *N);
+    SDOperand visitCTTZ(SDNode *N);
+    SDOperand visitCTPOP(SDNode *N);
+    SDOperand visitSELECT(SDNode *N);
+    SDOperand visitSELECT_CC(SDNode *N);
+    SDOperand visitSETCC(SDNode *N);
+    SDOperand visitSIGN_EXTEND(SDNode *N);
+    SDOperand visitZERO_EXTEND(SDNode *N);
+    SDOperand visitANY_EXTEND(SDNode *N);
+    SDOperand visitSIGN_EXTEND_INREG(SDNode *N);
+    SDOperand visitTRUNCATE(SDNode *N);
+    SDOperand visitBIT_CONVERT(SDNode *N);
+    SDOperand visitFADD(SDNode *N);
+    SDOperand visitFSUB(SDNode *N);
+    SDOperand visitFMUL(SDNode *N);
+    SDOperand visitFDIV(SDNode *N);
+    SDOperand visitFREM(SDNode *N);
+    SDOperand visitFCOPYSIGN(SDNode *N);
+    SDOperand visitSINT_TO_FP(SDNode *N);
+    SDOperand visitUINT_TO_FP(SDNode *N);
+    SDOperand visitFP_TO_SINT(SDNode *N);
+    SDOperand visitFP_TO_UINT(SDNode *N);
+    SDOperand visitFP_ROUND(SDNode *N);
+    SDOperand visitFP_ROUND_INREG(SDNode *N);
+    SDOperand visitFP_EXTEND(SDNode *N);
+    SDOperand visitFNEG(SDNode *N);
+    SDOperand visitFABS(SDNode *N);
+    SDOperand visitBRCOND(SDNode *N);
+    SDOperand visitBR_CC(SDNode *N);
+    SDOperand visitLOAD(SDNode *N);
+    SDOperand visitSTORE(SDNode *N);
+    SDOperand visitINSERT_VECTOR_ELT(SDNode *N);
+    SDOperand visitBUILD_VECTOR(SDNode *N);
+    SDOperand visitCONCAT_VECTORS(SDNode *N);
+    SDOperand visitVECTOR_SHUFFLE(SDNode *N);
+
+    SDOperand XformToShuffleWithZero(SDNode *N);
+    SDOperand ReassociateOps(unsigned Opc, SDOperand LHS, SDOperand RHS);
+    
+    bool SimplifySelectOps(SDNode *SELECT, SDOperand LHS, SDOperand RHS);
+    SDOperand SimplifyBinOpWithSameOpcodeHands(SDNode *N);
+    SDOperand SimplifySelect(SDOperand N0, SDOperand N1, SDOperand N2);
+    SDOperand SimplifySelectCC(SDOperand N0, SDOperand N1, SDOperand N2, 
+                               SDOperand N3, ISD::CondCode CC, 
+                               bool NotExtCompare = false);
+    SDOperand SimplifySetCC(MVT::ValueType VT, SDOperand N0, SDOperand N1,
+                            ISD::CondCode Cond, bool foldBooleans = true);
+    SDOperand ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *, MVT::ValueType);
+    SDOperand BuildSDIV(SDNode *N);
+    SDOperand BuildUDIV(SDNode *N);
+    SDNode *MatchRotate(SDOperand LHS, SDOperand RHS);
+    SDOperand ReduceLoadWidth(SDNode *N);
+    
+    /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
+    /// looking for aliasing nodes and adding them to the Aliases vector.
+    void GatherAllAliases(SDNode *N, SDOperand OriginalChain,
+                          SmallVector<SDOperand, 8> &Aliases);
+
+    /// isAlias - Return true if there is any possibility that the two addresses
+    /// overlap.
+    bool isAlias(SDOperand Ptr1, int64_t Size1,
+                 const Value *SrcValue1, int SrcValueOffset1,
+                 SDOperand Ptr2, int64_t Size2,
+                 const Value *SrcValue2, int SrcValueOffset2);
+                 
+    /// FindAliasInfo - Extracts the relevant alias information from the memory
+    /// node.  Returns true if the operand was a load.
+    bool FindAliasInfo(SDNode *N,
+                       SDOperand &Ptr, int64_t &Size,
+                       const Value *&SrcValue, int &SrcValueOffset);
+                       
+    /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes,
+    /// looking for a better chain (aliasing node.)
+    SDOperand FindBetterChain(SDNode *N, SDOperand Chain);
+    
+public:
+    DAGCombiner(SelectionDAG &D, AliasAnalysis &A)
+      : DAG(D),
+        TLI(D.getTargetLoweringInfo()),
+        AfterLegalize(false),
+        AA(A) {}
+    
+    /// Run - runs the dag combiner on all nodes in the work list
+    void Run(bool RunningAfterLegalize); 
+  };
+}
+
+//===----------------------------------------------------------------------===//
+//  TargetLowering::DAGCombinerInfo implementation
+//===----------------------------------------------------------------------===//
+
+void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
+  ((DAGCombiner*)DC)->AddToWorkList(N);
+}
+
+SDOperand TargetLowering::DAGCombinerInfo::
+CombineTo(SDNode *N, const std::vector<SDOperand> &To) {
+  return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size());
+}
+
+SDOperand TargetLowering::DAGCombinerInfo::
+CombineTo(SDNode *N, SDOperand Res) {
+  return ((DAGCombiner*)DC)->CombineTo(N, Res);
+}
+
+
+SDOperand TargetLowering::DAGCombinerInfo::
+CombineTo(SDNode *N, SDOperand Res0, SDOperand Res1) {
+  return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Helper Functions
+//===----------------------------------------------------------------------===//
+
+/// isNegatibleForFree - Return 1 if we can compute the negated form of the
+/// specified expression for the same cost as the expression itself, or 2 if we
+/// can compute the negated form more cheaply than the expression itself.
+static char isNegatibleForFree(SDOperand Op, unsigned Depth = 0) {
+  // fneg is removable even if it has multiple uses.
+  if (Op.getOpcode() == ISD::FNEG) return 2;
+  
+  // Don't allow anything with multiple uses.
+  if (!Op.hasOneUse()) return 0;
+  
+  // Don't recurse exponentially.
+  if (Depth > 6) return 0;
+  
+  switch (Op.getOpcode()) {
+  default: return false;
+  case ISD::ConstantFP:
+    return 1;
+  case ISD::FADD:
+    // FIXME: determine better conditions for this xform.
+    if (!UnsafeFPMath) return 0;
+    
+    // -(A+B) -> -A - B
+    if (char V = isNegatibleForFree(Op.getOperand(0), Depth+1))
+      return V;
+    // -(A+B) -> -B - A
+    return isNegatibleForFree(Op.getOperand(1), Depth+1);
+  case ISD::FSUB:
+    // We can't turn -(A-B) into B-A when we honor signed zeros. 
+    if (!UnsafeFPMath) return 0;
+    
+    // -(A-B) -> B-A
+    return 1;
+    
+  case ISD::FMUL:
+  case ISD::FDIV:
+    if (HonorSignDependentRoundingFPMath()) return 0;
+    
+    // -(X*Y) -> (-X * Y) or (X*-Y)
+    if (char V = isNegatibleForFree(Op.getOperand(0), Depth+1))
+      return V;
+      
+    return isNegatibleForFree(Op.getOperand(1), Depth+1);
+    
+  case ISD::FP_EXTEND:
+  case ISD::FP_ROUND:
+  case ISD::FSIN:
+    return isNegatibleForFree(Op.getOperand(0), Depth+1);
+  }
+}
+
+/// GetNegatedExpression - If isNegatibleForFree returns true, this function
+/// returns the newly negated expression.
+static SDOperand GetNegatedExpression(SDOperand Op, SelectionDAG &DAG,
+                                      unsigned Depth = 0) {
+  // fneg is removable even if it has multiple uses.
+  if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
+  
+  // Don't allow anything with multiple uses.
+  assert(Op.hasOneUse() && "Unknown reuse!");
+  
+  assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
+  switch (Op.getOpcode()) {
+  default: assert(0 && "Unknown code");
+  case ISD::ConstantFP:
+    return DAG.getConstantFP(-cast<ConstantFPSDNode>(Op)->getValue(),
+                             Op.getValueType());
+  case ISD::FADD:
+    // FIXME: determine better conditions for this xform.
+    assert(UnsafeFPMath);
+    
+    // -(A+B) -> -A - B
+    if (isNegatibleForFree(Op.getOperand(0), Depth+1))
+      return DAG.getNode(ISD::FSUB, Op.getValueType(),
+                         GetNegatedExpression(Op.getOperand(0), DAG, Depth+1),
+                         Op.getOperand(1));
+    // -(A+B) -> -B - A
+    return DAG.getNode(ISD::FSUB, Op.getValueType(),
+                       GetNegatedExpression(Op.getOperand(1), DAG, Depth+1),
+                       Op.getOperand(0));
+  case ISD::FSUB:
+    // We can't turn -(A-B) into B-A when we honor signed zeros. 
+    assert(UnsafeFPMath);
+
+    // -(0-B) -> B
+    if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
+      if (N0CFP->getValue() == 0.0)
+        return Op.getOperand(1);
+    
+    // -(A-B) -> B-A
+    return DAG.getNode(ISD::FSUB, Op.getValueType(), Op.getOperand(1),
+                       Op.getOperand(0));
+    
+  case ISD::FMUL:
+  case ISD::FDIV:
+    assert(!HonorSignDependentRoundingFPMath());
+    
+    // -(X*Y) -> -X * Y
+    if (isNegatibleForFree(Op.getOperand(0), Depth+1))
+      return DAG.getNode(Op.getOpcode(), Op.getValueType(),
+                         GetNegatedExpression(Op.getOperand(0), DAG, Depth+1),
+                         Op.getOperand(1));
+      
+    // -(X*Y) -> X * -Y
+    return DAG.getNode(Op.getOpcode(), Op.getValueType(),
+                       Op.getOperand(0),
+                       GetNegatedExpression(Op.getOperand(1), DAG, Depth+1));
+    
+  case ISD::FP_EXTEND:
+  case ISD::FP_ROUND:
+  case ISD::FSIN:
+    return DAG.getNode(Op.getOpcode(), Op.getValueType(),
+                       GetNegatedExpression(Op.getOperand(0), DAG, Depth+1));
+  }
+}
+
+
+// isSetCCEquivalent - Return true if this node is a setcc, or is a select_cc
+// that selects between the values 1 and 0, making it equivalent to a setcc.
+// Also, set the incoming LHS, RHS, and CC references to the appropriate 
+// nodes based on the type of node we are checking.  This simplifies life a
+// bit for the callers.
+static bool isSetCCEquivalent(SDOperand N, SDOperand &LHS, SDOperand &RHS,
+                              SDOperand &CC) {
+  if (N.getOpcode() == ISD::SETCC) {
+    LHS = N.getOperand(0);
+    RHS = N.getOperand(1);
+    CC  = N.getOperand(2);
+    return true;
+  }
+  if (N.getOpcode() == ISD::SELECT_CC && 
+      N.getOperand(2).getOpcode() == ISD::Constant &&
+      N.getOperand(3).getOpcode() == ISD::Constant &&
+      cast<ConstantSDNode>(N.getOperand(2))->getValue() == 1 &&
+      cast<ConstantSDNode>(N.getOperand(3))->isNullValue()) {
+    LHS = N.getOperand(0);
+    RHS = N.getOperand(1);
+    CC  = N.getOperand(4);
+    return true;
+  }
+  return false;
+}
+
+// isOneUseSetCC - Return true if this is a SetCC-equivalent operation with only
+// one use.  If this is true, it allows the users to invert the operation for
+// free when it is profitable to do so.
+static bool isOneUseSetCC(SDOperand N) {
+  SDOperand N0, N1, N2;
+  if (isSetCCEquivalent(N, N0, N1, N2) && N.Val->hasOneUse())
+    return true;
+  return false;
+}
+
+SDOperand DAGCombiner::ReassociateOps(unsigned Opc, SDOperand N0, SDOperand N1){
+  MVT::ValueType VT = N0.getValueType();
+  // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one use
+  // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
+  if (N0.getOpcode() == Opc && isa<ConstantSDNode>(N0.getOperand(1))) {
+    if (isa<ConstantSDNode>(N1)) {
+      SDOperand OpNode = DAG.getNode(Opc, VT, N0.getOperand(1), N1);
+      AddToWorkList(OpNode.Val);
+      return DAG.getNode(Opc, VT, OpNode, N0.getOperand(0));
+    } else if (N0.hasOneUse()) {
+      SDOperand OpNode = DAG.getNode(Opc, VT, N0.getOperand(0), N1);
+      AddToWorkList(OpNode.Val);
+      return DAG.getNode(Opc, VT, OpNode, N0.getOperand(1));
+    }
+  }
+  // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one use
+  // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
+  if (N1.getOpcode() == Opc && isa<ConstantSDNode>(N1.getOperand(1))) {
+    if (isa<ConstantSDNode>(N0)) {
+      SDOperand OpNode = DAG.getNode(Opc, VT, N1.getOperand(1), N0);
+      AddToWorkList(OpNode.Val);
+      return DAG.getNode(Opc, VT, OpNode, N1.getOperand(0));
+    } else if (N1.hasOneUse()) {
+      SDOperand OpNode = DAG.getNode(Opc, VT, N1.getOperand(0), N0);
+      AddToWorkList(OpNode.Val);
+      return DAG.getNode(Opc, VT, OpNode, N1.getOperand(1));
+    }
+  }
+  return SDOperand();
+}
+
+//===----------------------------------------------------------------------===//
+//  Main DAG Combiner implementation
+//===----------------------------------------------------------------------===//
+
+void DAGCombiner::Run(bool RunningAfterLegalize) {
+  // set the instance variable, so that the various visit routines may use it.
+  AfterLegalize = RunningAfterLegalize;
+
+  // Add all the dag nodes to the worklist.
+  for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+       E = DAG.allnodes_end(); I != E; ++I)
+    WorkList.push_back(I);
+  
+  // Create a dummy node (which is not added to allnodes), that adds a reference
+  // to the root node, preventing it from being deleted, and tracking any
+  // changes of the root.
+  HandleSDNode Dummy(DAG.getRoot());
+  
+  // The root of the dag may dangle to deleted nodes until the dag combiner is
+  // done.  Set it to null to avoid confusion.
+  DAG.setRoot(SDOperand());
+  
+  /// DagCombineInfo - Expose the DAG combiner to the target combiner impls.
+  TargetLowering::DAGCombinerInfo 
+    DagCombineInfo(DAG, !RunningAfterLegalize, false, this);
+
+  // while the worklist isn't empty, inspect the node on the end of it and
+  // try and combine it.
+  while (!WorkList.empty()) {
+    SDNode *N = WorkList.back();
+    WorkList.pop_back();
+    
+    // If N has no uses, it is dead.  Make sure to revisit all N's operands once
+    // N is deleted from the DAG, since they too may now be dead or may have a
+    // reduced number of uses, allowing other xforms.
+    if (N->use_empty() && N != &Dummy) {
+      for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+        AddToWorkList(N->getOperand(i).Val);
+      
+      DAG.DeleteNode(N);
+      continue;
+    }
+    
+    SDOperand RV = visit(N);
+    
+    // If nothing happened, try a target-specific DAG combine.
+    if (RV.Val == 0) {
+      assert(N->getOpcode() != ISD::DELETED_NODE &&
+             "Node was deleted but visit returned NULL!");
+      if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
+          TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode()))
+        RV = TLI.PerformDAGCombine(N, DagCombineInfo);
+    }
+    
+    if (RV.Val) {
+      ++NodesCombined;
+      // If we get back the same node we passed in, rather than a new node or
+      // zero, we know that the node must have defined multiple values and
+      // CombineTo was used.  Since CombineTo takes care of the worklist 
+      // mechanics for us, we have no work to do in this case.
+      if (RV.Val != N) {
+        assert(N->getOpcode() != ISD::DELETED_NODE &&
+               RV.Val->getOpcode() != ISD::DELETED_NODE &&
+               "Node was deleted but visit returned new node!");
+
+        DOUT << "\nReplacing.3 "; DEBUG(N->dump(&DAG));
+        DOUT << "\nWith: "; DEBUG(RV.Val->dump(&DAG));
+        DOUT << '\n';
+        std::vector<SDNode*> NowDead;
+        if (N->getNumValues() == RV.Val->getNumValues())
+          DAG.ReplaceAllUsesWith(N, RV.Val, &NowDead);
+        else {
+          assert(N->getValueType(0) == RV.getValueType() && "Type mismatch");
+          SDOperand OpV = RV;
+          DAG.ReplaceAllUsesWith(N, &OpV, &NowDead);
+        }
+          
+        // Push the new node and any users onto the worklist
+        AddToWorkList(RV.Val);
+        AddUsersToWorkList(RV.Val);
+          
+        // Nodes can be reintroduced into the worklist.  Make sure we do not
+        // process a node that has been replaced.
+        removeFromWorkList(N);
+        for (unsigned i = 0, e = NowDead.size(); i != e; ++i)
+          removeFromWorkList(NowDead[i]);
+        
+        // Finally, since the node is now dead, remove it from the graph.
+        DAG.DeleteNode(N);
+      }
+    }
+  }
+  
+  // If the root changed (e.g. it was a dead load, update the root).
+  DAG.setRoot(Dummy.getValue());
+}
+
+SDOperand DAGCombiner::visit(SDNode *N) {
+  switch(N->getOpcode()) {
+  default: break;
+  case ISD::TokenFactor:        return visitTokenFactor(N);
+  case ISD::ADD:                return visitADD(N);
+  case ISD::SUB:                return visitSUB(N);
+  case ISD::ADDC:               return visitADDC(N);
+  case ISD::ADDE:               return visitADDE(N);
+  case ISD::MUL:                return visitMUL(N);
+  case ISD::SDIV:               return visitSDIV(N);
+  case ISD::UDIV:               return visitUDIV(N);
+  case ISD::SREM:               return visitSREM(N);
+  case ISD::UREM:               return visitUREM(N);
+  case ISD::MULHU:              return visitMULHU(N);
+  case ISD::MULHS:              return visitMULHS(N);
+  case ISD::AND:                return visitAND(N);
+  case ISD::OR:                 return visitOR(N);
+  case ISD::XOR:                return visitXOR(N);
+  case ISD::SHL:                return visitSHL(N);
+  case ISD::SRA:                return visitSRA(N);
+  case ISD::SRL:                return visitSRL(N);
+  case ISD::CTLZ:               return visitCTLZ(N);
+  case ISD::CTTZ:               return visitCTTZ(N);
+  case ISD::CTPOP:              return visitCTPOP(N);
+  case ISD::SELECT:             return visitSELECT(N);
+  case ISD::SELECT_CC:          return visitSELECT_CC(N);
+  case ISD::SETCC:              return visitSETCC(N);
+  case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
+  case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
+  case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
+  case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
+  case ISD::TRUNCATE:           return visitTRUNCATE(N);
+  case ISD::BIT_CONVERT:        return visitBIT_CONVERT(N);
+  case ISD::FADD:               return visitFADD(N);
+  case ISD::FSUB:               return visitFSUB(N);
+  case ISD::FMUL:               return visitFMUL(N);
+  case ISD::FDIV:               return visitFDIV(N);
+  case ISD::FREM:               return visitFREM(N);
+  case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
+  case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
+  case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
+  case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
+  case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
+  case ISD::FP_ROUND:           return visitFP_ROUND(N);
+  case ISD::FP_ROUND_INREG:     return visitFP_ROUND_INREG(N);
+  case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
+  case ISD::FNEG:               return visitFNEG(N);
+  case ISD::FABS:               return visitFABS(N);
+  case ISD::BRCOND:             return visitBRCOND(N);
+  case ISD::BR_CC:              return visitBR_CC(N);
+  case ISD::LOAD:               return visitLOAD(N);
+  case ISD::STORE:              return visitSTORE(N);
+  case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
+  case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
+  case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
+  case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
+  }
+  return SDOperand();
+}
+
+/// getInputChainForNode - Given a node, return its input chain if it has one,
+/// otherwise return a null sd operand.
+static SDOperand getInputChainForNode(SDNode *N) {
+  if (unsigned NumOps = N->getNumOperands()) {
+    if (N->getOperand(0).getValueType() == MVT::Other)
+      return N->getOperand(0);
+    else if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
+      return N->getOperand(NumOps-1);
+    for (unsigned i = 1; i < NumOps-1; ++i)
+      if (N->getOperand(i).getValueType() == MVT::Other)
+        return N->getOperand(i);
+  }
+  return SDOperand(0, 0);
+}
+
+SDOperand DAGCombiner::visitTokenFactor(SDNode *N) {
+  // If N has two operands, where one has an input chain equal to the other,
+  // the 'other' chain is redundant.
+  if (N->getNumOperands() == 2) {
+    if (getInputChainForNode(N->getOperand(0).Val) == N->getOperand(1))
+      return N->getOperand(0);
+    if (getInputChainForNode(N->getOperand(1).Val) == N->getOperand(0))
+      return N->getOperand(1);
+  }
+  
+  SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
+  SmallVector<SDOperand, 8> Ops;    // Ops for replacing token factor.
+  SmallPtrSet<SDNode*, 16> SeenOps; 
+  bool Changed = false;             // If we should replace this token factor.
+  
+  // Start out with this token factor.
+  TFs.push_back(N);
+  
+  // Iterate through token factors.  The TFs grows when new token factors are
+  // encountered.
+  for (unsigned i = 0; i < TFs.size(); ++i) {
+    SDNode *TF = TFs[i];
+    
+    // Check each of the operands.
+    for (unsigned i = 0, ie = TF->getNumOperands(); i != ie; ++i) {
+      SDOperand Op = TF->getOperand(i);
+      
+      switch (Op.getOpcode()) {
+      case ISD::EntryToken:
+        // Entry tokens don't need to be added to the list. They are
+        // rededundant.
+        Changed = true;
+        break;
+        
+      case ISD::TokenFactor:
+        if ((CombinerAA || Op.hasOneUse()) &&
+            std::find(TFs.begin(), TFs.end(), Op.Val) == TFs.end()) {
+          // Queue up for processing.
+          TFs.push_back(Op.Val);
+          // Clean up in case the token factor is removed.
+          AddToWorkList(Op.Val);
+          Changed = true;
+          break;
+        }
+        // Fall thru
+        
+      default:
+        // Only add if it isn't already in the list.
+        if (SeenOps.insert(Op.Val))
+          Ops.push_back(Op);
+        else
+          Changed = true;
+        break;
+      }
+    }
+  }
+
+  SDOperand Result;
+
+  // If we've change things around then replace token factor.
+  if (Changed) {
+    if (Ops.size() == 0) {
+      // The entry token is the only possible outcome.
+      Result = DAG.getEntryNode();
+    } else {
+      // New and improved token factor.
+      Result = DAG.getNode(ISD::TokenFactor, MVT::Other, &Ops[0], Ops.size());
+    }
+    
+    // Don't add users to work list.
+    return CombineTo(N, Result, false);
+  }
+  
+  return Result;
+}
+
+static
+SDOperand combineShlAddConstant(SDOperand N0, SDOperand N1, SelectionDAG &DAG) {
+  MVT::ValueType VT = N0.getValueType();
+  SDOperand N00 = N0.getOperand(0);
+  SDOperand N01 = N0.getOperand(1);
+  ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N01);
+  if (N01C && N00.getOpcode() == ISD::ADD && N00.Val->hasOneUse() &&
+      isa<ConstantSDNode>(N00.getOperand(1))) {
+    N0 = DAG.getNode(ISD::ADD, VT,
+                     DAG.getNode(ISD::SHL, VT, N00.getOperand(0), N01),
+                     DAG.getNode(ISD::SHL, VT, N00.getOperand(1), N01));
+    return DAG.getNode(ISD::ADD, VT, N0, N1);
+  }
+  return SDOperand();
+}
+
+static
+SDOperand combineSelectAndUse(SDNode *N, SDOperand Slct, SDOperand OtherOp,
+                              SelectionDAG &DAG) {
+  MVT::ValueType VT = N->getValueType(0);
+  unsigned Opc = N->getOpcode();
+  bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC;
+  SDOperand LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1);
+  SDOperand RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2);
+  ISD::CondCode CC = ISD::SETCC_INVALID;
+  if (isSlctCC)
+    CC = cast<CondCodeSDNode>(Slct.getOperand(4))->get();
+  else {
+    SDOperand CCOp = Slct.getOperand(0);
+    if (CCOp.getOpcode() == ISD::SETCC)
+      CC = cast<CondCodeSDNode>(CCOp.getOperand(2))->get();
+  }
+
+  bool DoXform = false;
+  bool InvCC = false;
+  assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) &&
+          "Bad input!");
+  if (LHS.getOpcode() == ISD::Constant &&
+      cast<ConstantSDNode>(LHS)->isNullValue())
+    DoXform = true;
+  else if (CC != ISD::SETCC_INVALID &&
+           RHS.getOpcode() == ISD::Constant &&
+           cast<ConstantSDNode>(RHS)->isNullValue()) {
+    std::swap(LHS, RHS);
+    bool isInt = MVT::isInteger(isSlctCC ? Slct.getOperand(0).getValueType()
+                                : Slct.getOperand(0).getOperand(0).getValueType());
+    CC = ISD::getSetCCInverse(CC, isInt);
+    DoXform = true;
+    InvCC = true;
+  }
+
+  if (DoXform) {
+    SDOperand Result = DAG.getNode(Opc, VT, OtherOp, RHS);
+    if (isSlctCC)
+      return DAG.getSelectCC(OtherOp, Result,
+                             Slct.getOperand(0), Slct.getOperand(1), CC);
+    SDOperand CCOp = Slct.getOperand(0);
+    if (InvCC)
+      CCOp = DAG.getSetCC(CCOp.getValueType(), CCOp.getOperand(0),
+                          CCOp.getOperand(1), CC);
+    return DAG.getNode(ISD::SELECT, VT, CCOp, OtherOp, Result);
+  }
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitADD(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  MVT::ValueType VT = N0.getValueType();
+
+  // fold vector ops
+  if (MVT::isVector(VT)) {
+    SDOperand FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.Val) return FoldedVOp;
+  }
+  
+  // fold (add x, undef) -> undef
+  if (N0.getOpcode() == ISD::UNDEF)
+    return N0;
+  if (N1.getOpcode() == ISD::UNDEF)
+    return N1;
+  // fold (add c1, c2) -> c1+c2
+  if (N0C && N1C)
+    return DAG.getNode(ISD::ADD, VT, N0, N1);
+  // canonicalize constant to RHS
+  if (N0C && !N1C)
+    return DAG.getNode(ISD::ADD, VT, N1, N0);
+  // fold (add x, 0) -> x
+  if (N1C && N1C->isNullValue())
+    return N0;
+  // fold ((c1-A)+c2) -> (c1+c2)-A
+  if (N1C && N0.getOpcode() == ISD::SUB)
+    if (ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
+      return DAG.getNode(ISD::SUB, VT,
+                         DAG.getConstant(N1C->getValue()+N0C->getValue(), VT),
+                         N0.getOperand(1));
+  // reassociate add
+  SDOperand RADD = ReassociateOps(ISD::ADD, N0, N1);
+  if (RADD.Val != 0)
+    return RADD;
+  // fold ((0-A) + B) -> B-A
+  if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) &&
+      cast<ConstantSDNode>(N0.getOperand(0))->isNullValue())
+    return DAG.getNode(ISD::SUB, VT, N1, N0.getOperand(1));
+  // fold (A + (0-B)) -> A-B
+  if (N1.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N1.getOperand(0)) &&
+      cast<ConstantSDNode>(N1.getOperand(0))->isNullValue())
+    return DAG.getNode(ISD::SUB, VT, N0, N1.getOperand(1));
+  // fold (A+(B-A)) -> B
+  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
+    return N1.getOperand(0);
+
+  if (!MVT::isVector(VT) && SimplifyDemandedBits(SDOperand(N, 0)))
+    return SDOperand(N, 0);
+  
+  // fold (a+b) -> (a|b) iff a and b share no bits.
+  if (MVT::isInteger(VT) && !MVT::isVector(VT)) {
+    uint64_t LHSZero, LHSOne;
+    uint64_t RHSZero, RHSOne;
+    uint64_t Mask = MVT::getIntVTBitMask(VT);
+    DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
+    if (LHSZero) {
+      DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne);
+      
+      // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
+      // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
+      if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) ||
+          (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask))
+        return DAG.getNode(ISD::OR, VT, N0, N1);
+    }
+  }
+
+  // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )
+  if (N0.getOpcode() == ISD::SHL && N0.Val->hasOneUse()) {
+    SDOperand Result = combineShlAddConstant(N0, N1, DAG);
+    if (Result.Val) return Result;
+  }
+  if (N1.getOpcode() == ISD::SHL && N1.Val->hasOneUse()) {
+    SDOperand Result = combineShlAddConstant(N1, N0, DAG);
+    if (Result.Val) return Result;
+  }
+
+  // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
+  if (N0.getOpcode() == ISD::SELECT && N0.Val->hasOneUse()) {
+    SDOperand Result = combineSelectAndUse(N, N0, N1, DAG);
+    if (Result.Val) return Result;
+  }
+  if (N1.getOpcode() == ISD::SELECT && N1.Val->hasOneUse()) {
+    SDOperand Result = combineSelectAndUse(N, N1, N0, DAG);
+    if (Result.Val) return Result;
+  }
+
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitADDC(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  MVT::ValueType VT = N0.getValueType();
+  
+  // If the flag result is dead, turn this into an ADD.
+  if (N->hasNUsesOfValue(0, 1))
+    return CombineTo(N, DAG.getNode(ISD::ADD, VT, N1, N0),
+                     DAG.getNode(ISD::CARRY_FALSE, MVT::Flag));
+  
+  // canonicalize constant to RHS.
+  if (N0C && !N1C) {
+    SDOperand Ops[] = { N1, N0 };
+    return DAG.getNode(ISD::ADDC, N->getVTList(), Ops, 2);
+  }
+  
+  // fold (addc x, 0) -> x + no carry out
+  if (N1C && N1C->isNullValue())
+    return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, MVT::Flag));
+  
+  // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
+  uint64_t LHSZero, LHSOne;
+  uint64_t RHSZero, RHSOne;
+  uint64_t Mask = MVT::getIntVTBitMask(VT);
+  DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
+  if (LHSZero) {
+    DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne);
+    
+    // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
+    // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
+    if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) ||
+        (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask))
+      return CombineTo(N, DAG.getNode(ISD::OR, VT, N0, N1),
+                       DAG.getNode(ISD::CARRY_FALSE, MVT::Flag));
+  }
+  
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitADDE(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  SDOperand CarryIn = N->getOperand(2);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  //MVT::ValueType VT = N0.getValueType();
+  
+  // canonicalize constant to RHS
+  if (N0C && !N1C) {
+    SDOperand Ops[] = { N1, N0, CarryIn };
+    return DAG.getNode(ISD::ADDE, N->getVTList(), Ops, 3);
+  }
+  
+  // fold (adde x, y, false) -> (addc x, y)
+  if (CarryIn.getOpcode() == ISD::CARRY_FALSE) {
+    SDOperand Ops[] = { N1, N0 };
+    return DAG.getNode(ISD::ADDC, N->getVTList(), Ops, 2);
+  }
+  
+  return SDOperand();
+}
+
+
+
+SDOperand DAGCombiner::visitSUB(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.Val);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.Val);
+  MVT::ValueType VT = N0.getValueType();
+  
+  // fold vector ops
+  if (MVT::isVector(VT)) {
+    SDOperand FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.Val) return FoldedVOp;
+  }
+  
+  // fold (sub x, x) -> 0
+  if (N0 == N1)
+    return DAG.getConstant(0, N->getValueType(0));
+  // fold (sub c1, c2) -> c1-c2
+  if (N0C && N1C)
+    return DAG.getNode(ISD::SUB, VT, N0, N1);
+  // fold (sub x, c) -> (add x, -c)
+  if (N1C)
+    return DAG.getNode(ISD::ADD, VT, N0, DAG.getConstant(-N1C->getValue(), VT));
+  // fold (A+B)-A -> B
+  if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
+    return N0.getOperand(1);
+  // fold (A+B)-B -> A
+  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
+    return N0.getOperand(0);
+  // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
+  if (N1.getOpcode() == ISD::SELECT && N1.Val->hasOneUse()) {
+    SDOperand Result = combineSelectAndUse(N, N1, N0, DAG);
+    if (Result.Val) return Result;
+  }
+  // If either operand of a sub is undef, the result is undef
+  if (N0.getOpcode() == ISD::UNDEF)
+    return N0;
+  if (N1.getOpcode() == ISD::UNDEF)
+    return N1;
+
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitMUL(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  MVT::ValueType VT = N0.getValueType();
+  
+  // fold vector ops
+  if (MVT::isVector(VT)) {
+    SDOperand FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.Val) return FoldedVOp;
+  }
+  
+  // fold (mul x, undef) -> 0
+  if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(0, VT);
+  // fold (mul c1, c2) -> c1*c2
+  if (N0C && N1C)
+    return DAG.getNode(ISD::MUL, VT, N0, N1);
+  // canonicalize constant to RHS
+  if (N0C && !N1C)
+    return DAG.getNode(ISD::MUL, VT, N1, N0);
+  // fold (mul x, 0) -> 0
+  if (N1C && N1C->isNullValue())
+    return N1;
+  // fold (mul x, -1) -> 0-x
+  if (N1C && N1C->isAllOnesValue())
+    return DAG.getNode(ISD::SUB, VT, DAG.getConstant(0, VT), N0);
+  // fold (mul x, (1 << c)) -> x << c
+  if (N1C && isPowerOf2_64(N1C->getValue()))
+    return DAG.getNode(ISD::SHL, VT, N0,
+                       DAG.getConstant(Log2_64(N1C->getValue()),
+                                       TLI.getShiftAmountTy()));
+  // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
+  if (N1C && isPowerOf2_64(-N1C->getSignExtended())) {
+    // FIXME: If the input is something that is easily negated (e.g. a 
+    // single-use add), we should put the negate there.
+    return DAG.getNode(ISD::SUB, VT, DAG.getConstant(0, VT),
+                       DAG.getNode(ISD::SHL, VT, N0,
+                            DAG.getConstant(Log2_64(-N1C->getSignExtended()),
+                                            TLI.getShiftAmountTy())));
+  }
+
+  // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
+  if (N1C && N0.getOpcode() == ISD::SHL && 
+      isa<ConstantSDNode>(N0.getOperand(1))) {
+    SDOperand C3 = DAG.getNode(ISD::SHL, VT, N1, N0.getOperand(1));
+    AddToWorkList(C3.Val);
+    return DAG.getNode(ISD::MUL, VT, N0.getOperand(0), C3);
+  }
+  
+  // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
+  // use.
+  {
+    SDOperand Sh(0,0), Y(0,0);
+    // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
+    if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
+        N0.Val->hasOneUse()) {
+      Sh = N0; Y = N1;
+    } else if (N1.getOpcode() == ISD::SHL && 
+               isa<ConstantSDNode>(N1.getOperand(1)) && N1.Val->hasOneUse()) {
+      Sh = N1; Y = N0;
+    }
+    if (Sh.Val) {
+      SDOperand Mul = DAG.getNode(ISD::MUL, VT, Sh.getOperand(0), Y);
+      return DAG.getNode(ISD::SHL, VT, Mul, Sh.getOperand(1));
+    }
+  }
+  // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
+  if (N1C && N0.getOpcode() == ISD::ADD && N0.Val->hasOneUse() && 
+      isa<ConstantSDNode>(N0.getOperand(1))) {
+    return DAG.getNode(ISD::ADD, VT, 
+                       DAG.getNode(ISD::MUL, VT, N0.getOperand(0), N1),
+                       DAG.getNode(ISD::MUL, VT, N0.getOperand(1), N1));
+  }
+  
+  // reassociate mul
+  SDOperand RMUL = ReassociateOps(ISD::MUL, N0, N1);
+  if (RMUL.Val != 0)
+    return RMUL;
+
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitSDIV(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.Val);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.Val);
+  MVT::ValueType VT = N->getValueType(0);
+
+  // fold vector ops
+  if (MVT::isVector(VT)) {
+    SDOperand FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.Val) return FoldedVOp;
+  }
+  
+  // fold (sdiv c1, c2) -> c1/c2
+  if (N0C && N1C && !N1C->isNullValue())
+    return DAG.getNode(ISD::SDIV, VT, N0, N1);
+  // fold (sdiv X, 1) -> X
+  if (N1C && N1C->getSignExtended() == 1LL)
+    return N0;
+  // fold (sdiv X, -1) -> 0-X
+  if (N1C && N1C->isAllOnesValue())
+    return DAG.getNode(ISD::SUB, VT, DAG.getConstant(0, VT), N0);
+  // If we know the sign bits of both operands are zero, strength reduce to a
+  // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
+  uint64_t SignBit = 1ULL << (MVT::getSizeInBits(VT)-1);
+  if (DAG.MaskedValueIsZero(N1, SignBit) &&
+      DAG.MaskedValueIsZero(N0, SignBit))
+    return DAG.getNode(ISD::UDIV, N1.getValueType(), N0, N1);
+  // fold (sdiv X, pow2) -> simple ops after legalize
+  if (N1C && N1C->getValue() && !TLI.isIntDivCheap() &&
+      (isPowerOf2_64(N1C->getSignExtended()) || 
+       isPowerOf2_64(-N1C->getSignExtended()))) {
+    // If dividing by powers of two is cheap, then don't perform the following
+    // fold.
+    if (TLI.isPow2DivCheap())
+      return SDOperand();
+    int64_t pow2 = N1C->getSignExtended();
+    int64_t abs2 = pow2 > 0 ? pow2 : -pow2;
+    unsigned lg2 = Log2_64(abs2);
+    // Splat the sign bit into the register
+    SDOperand SGN = DAG.getNode(ISD::SRA, VT, N0,
+                                DAG.getConstant(MVT::getSizeInBits(VT)-1,
+                                                TLI.getShiftAmountTy()));
+    AddToWorkList(SGN.Val);
+    // Add (N0 < 0) ? abs2 - 1 : 0;
+    SDOperand SRL = DAG.getNode(ISD::SRL, VT, SGN,
+                                DAG.getConstant(MVT::getSizeInBits(VT)-lg2,
+                                                TLI.getShiftAmountTy()));
+    SDOperand ADD = DAG.getNode(ISD::ADD, VT, N0, SRL);
+    AddToWorkList(SRL.Val);
+    AddToWorkList(ADD.Val);    // Divide by pow2
+    SDOperand SRA = DAG.getNode(ISD::SRA, VT, ADD,
+                                DAG.getConstant(lg2, TLI.getShiftAmountTy()));
+    // If we're dividing by a positive value, we're done.  Otherwise, we must
+    // negate the result.
+    if (pow2 > 0)
+      return SRA;
+    AddToWorkList(SRA.Val);
+    return DAG.getNode(ISD::SUB, VT, DAG.getConstant(0, VT), SRA);
+  }
+  // if integer divide is expensive and we satisfy the requirements, emit an
+  // alternate sequence.
+  if (N1C && (N1C->getSignExtended() < -1 || N1C->getSignExtended() > 1) && 
+      !TLI.isIntDivCheap()) {
+    SDOperand Op = BuildSDIV(N);
+    if (Op.Val) return Op;
+  }
+
+  // undef / X -> 0
+  if (N0.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(0, VT);
+  // X / undef -> undef
+  if (N1.getOpcode() == ISD::UNDEF)
+    return N1;
+
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitUDIV(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.Val);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.Val);
+  MVT::ValueType VT = N->getValueType(0);
+  
+  // fold vector ops
+  if (MVT::isVector(VT)) {
+    SDOperand FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.Val) return FoldedVOp;
+  }
+  
+  // fold (udiv c1, c2) -> c1/c2
+  if (N0C && N1C && !N1C->isNullValue())
+    return DAG.getNode(ISD::UDIV, VT, N0, N1);
+  // fold (udiv x, (1 << c)) -> x >>u c
+  if (N1C && isPowerOf2_64(N1C->getValue()))
+    return DAG.getNode(ISD::SRL, VT, N0, 
+                       DAG.getConstant(Log2_64(N1C->getValue()),
+                                       TLI.getShiftAmountTy()));
+  // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
+  if (N1.getOpcode() == ISD::SHL) {
+    if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
+      if (isPowerOf2_64(SHC->getValue())) {
+        MVT::ValueType ADDVT = N1.getOperand(1).getValueType();
+        SDOperand Add = DAG.getNode(ISD::ADD, ADDVT, N1.getOperand(1),
+                                    DAG.getConstant(Log2_64(SHC->getValue()),
+                                                    ADDVT));
+        AddToWorkList(Add.Val);
+        return DAG.getNode(ISD::SRL, VT, N0, Add);
+      }
+    }
+  }
+  // fold (udiv x, c) -> alternate
+  if (N1C && N1C->getValue() && !TLI.isIntDivCheap()) {
+    SDOperand Op = BuildUDIV(N);
+    if (Op.Val) return Op;
+  }
+
+  // undef / X -> 0
+  if (N0.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(0, VT);
+  // X / undef -> undef
+  if (N1.getOpcode() == ISD::UNDEF)
+    return N1;
+
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitSREM(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  MVT::ValueType VT = N->getValueType(0);
+  
+  // fold (srem c1, c2) -> c1%c2
+  if (N0C && N1C && !N1C->isNullValue())
+    return DAG.getNode(ISD::SREM, VT, N0, N1);
+  // If we know the sign bits of both operands are zero, strength reduce to a
+  // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
+  uint64_t SignBit = 1ULL << (MVT::getSizeInBits(VT)-1);
+  if (DAG.MaskedValueIsZero(N1, SignBit) &&
+      DAG.MaskedValueIsZero(N0, SignBit))
+    return DAG.getNode(ISD::UREM, VT, N0, N1);
+  
+  // Unconditionally lower X%C -> X-X/C*C.  This allows the X/C logic to hack on
+  // the remainder operation.
+  if (N1C && !N1C->isNullValue()) {
+    SDOperand Div = DAG.getNode(ISD::SDIV, VT, N0, N1);
+    SDOperand Mul = DAG.getNode(ISD::MUL, VT, Div, N1);
+    SDOperand Sub = DAG.getNode(ISD::SUB, VT, N0, Mul);
+    AddToWorkList(Div.Val);
+    AddToWorkList(Mul.Val);
+    return Sub;
+  }
+  
+  // undef % X -> 0
+  if (N0.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(0, VT);
+  // X % undef -> undef
+  if (N1.getOpcode() == ISD::UNDEF)
+    return N1;
+
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitUREM(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  MVT::ValueType VT = N->getValueType(0);
+  
+  // fold (urem c1, c2) -> c1%c2
+  if (N0C && N1C && !N1C->isNullValue())
+    return DAG.getNode(ISD::UREM, VT, N0, N1);
+  // fold (urem x, pow2) -> (and x, pow2-1)
+  if (N1C && !N1C->isNullValue() && isPowerOf2_64(N1C->getValue()))
+    return DAG.getNode(ISD::AND, VT, N0, DAG.getConstant(N1C->getValue()-1,VT));
+  // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
+  if (N1.getOpcode() == ISD::SHL) {
+    if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
+      if (isPowerOf2_64(SHC->getValue())) {
+        SDOperand Add = DAG.getNode(ISD::ADD, VT, N1,DAG.getConstant(~0ULL,VT));
+        AddToWorkList(Add.Val);
+        return DAG.getNode(ISD::AND, VT, N0, Add);
+      }
+    }
+  }
+  
+  // Unconditionally lower X%C -> X-X/C*C.  This allows the X/C logic to hack on
+  // the remainder operation.
+  if (N1C && !N1C->isNullValue()) {
+    SDOperand Div = DAG.getNode(ISD::UDIV, VT, N0, N1);
+    SDOperand Mul = DAG.getNode(ISD::MUL, VT, Div, N1);
+    SDOperand Sub = DAG.getNode(ISD::SUB, VT, N0, Mul);
+    AddToWorkList(Div.Val);
+    AddToWorkList(Mul.Val);
+    return Sub;
+  }
+  
+  // undef % X -> 0
+  if (N0.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(0, VT);
+  // X % undef -> undef
+  if (N1.getOpcode() == ISD::UNDEF)
+    return N1;
+
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitMULHS(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  MVT::ValueType VT = N->getValueType(0);
+  
+  // fold (mulhs x, 0) -> 0
+  if (N1C && N1C->isNullValue())
+    return N1;
+  // fold (mulhs x, 1) -> (sra x, size(x)-1)
+  if (N1C && N1C->getValue() == 1)
+    return DAG.getNode(ISD::SRA, N0.getValueType(), N0, 
+                       DAG.getConstant(MVT::getSizeInBits(N0.getValueType())-1,
+                                       TLI.getShiftAmountTy()));
+  // fold (mulhs x, undef) -> 0
+  if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(0, VT);
+
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitMULHU(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  MVT::ValueType VT = N->getValueType(0);
+  
+  // fold (mulhu x, 0) -> 0
+  if (N1C && N1C->isNullValue())
+    return N1;
+  // fold (mulhu x, 1) -> 0
+  if (N1C && N1C->getValue() == 1)
+    return DAG.getConstant(0, N0.getValueType());
+  // fold (mulhu x, undef) -> 0
+  if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(0, VT);
+
+  return SDOperand();
+}
+
+/// SimplifyBinOpWithSameOpcodeHands - If this is a binary operator with
+/// two operands of the same opcode, try to simplify it.
+SDOperand DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
+  SDOperand N0 = N->getOperand(0), N1 = N->getOperand(1);
+  MVT::ValueType VT = N0.getValueType();
+  assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
+  
+  // For each of OP in AND/OR/XOR:
+  // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
+  // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
+  // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
+  // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y))
+  if ((N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND||
+       N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::TRUNCATE) &&
+      N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()) {
+    SDOperand ORNode = DAG.getNode(N->getOpcode(), 
+                                   N0.getOperand(0).getValueType(),
+                                   N0.getOperand(0), N1.getOperand(0));
+    AddToWorkList(ORNode.Val);
+    return DAG.getNode(N0.getOpcode(), VT, ORNode);
+  }
+  
+  // For each of OP in SHL/SRL/SRA/AND...
+  //   fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
+  //   fold (or  (OP x, z), (OP y, z)) -> (OP (or  x, y), z)
+  //   fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
+  if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
+       N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
+      N0.getOperand(1) == N1.getOperand(1)) {
+    SDOperand ORNode = DAG.getNode(N->getOpcode(),
+                                   N0.getOperand(0).getValueType(),
+                                   N0.getOperand(0), N1.getOperand(0));
+    AddToWorkList(ORNode.Val);
+    return DAG.getNode(N0.getOpcode(), VT, ORNode, N0.getOperand(1));
+  }
+  
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitAND(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  SDOperand LL, LR, RL, RR, CC0, CC1;
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  MVT::ValueType VT = N1.getValueType();
+  
+  // fold vector ops
+  if (MVT::isVector(VT)) {
+    SDOperand FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.Val) return FoldedVOp;
+  }
+  
+  // fold (and x, undef) -> 0
+  if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(0, VT);
+  // fold (and c1, c2) -> c1&c2
+  if (N0C && N1C)
+    return DAG.getNode(ISD::AND, VT, N0, N1);
+  // canonicalize constant to RHS
+  if (N0C && !N1C)
+    return DAG.getNode(ISD::AND, VT, N1, N0);
+  // fold (and x, -1) -> x
+  if (N1C && N1C->isAllOnesValue())
+    return N0;
+  // if (and x, c) is known to be zero, return 0
+  if (N1C && DAG.MaskedValueIsZero(SDOperand(N, 0), MVT::getIntVTBitMask(VT)))
+    return DAG.getConstant(0, VT);
+  // reassociate and
+  SDOperand RAND = ReassociateOps(ISD::AND, N0, N1);
+  if (RAND.Val != 0)
+    return RAND;
+  // fold (and (or x, 0xFFFF), 0xFF) -> 0xFF
+  if (N1C && N0.getOpcode() == ISD::OR)
+    if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
+      if ((ORI->getValue() & N1C->getValue()) == N1C->getValue())
+        return N1;
+  // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
+  if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
+    unsigned InMask = MVT::getIntVTBitMask(N0.getOperand(0).getValueType());
+    if (DAG.MaskedValueIsZero(N0.getOperand(0),
+                              ~N1C->getValue() & InMask)) {
+      SDOperand Zext = DAG.getNode(ISD::ZERO_EXTEND, N0.getValueType(),
+                                   N0.getOperand(0));
+      
+      // Replace uses of the AND with uses of the Zero extend node.
+      CombineTo(N, Zext);
+      
+      // We actually want to replace all uses of the any_extend with the
+      // zero_extend, to avoid duplicating things.  This will later cause this
+      // AND to be folded.
+      CombineTo(N0.Val, Zext);
+      return SDOperand(N, 0);   // Return N so it doesn't get rechecked!
+    }
+  }
+  // fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
+  if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
+    ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
+    ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
+    
+    if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
+        MVT::isInteger(LL.getValueType())) {
+      // fold (X == 0) & (Y == 0) -> (X|Y == 0)
+      if (cast<ConstantSDNode>(LR)->getValue() == 0 && Op1 == ISD::SETEQ) {
+        SDOperand ORNode = DAG.getNode(ISD::OR, LR.getValueType(), LL, RL);
+        AddToWorkList(ORNode.Val);
+        return DAG.getSetCC(VT, ORNode, LR, Op1);
+      }
+      // fold (X == -1) & (Y == -1) -> (X&Y == -1)
+      if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) {
+        SDOperand ANDNode = DAG.getNode(ISD::AND, LR.getValueType(), LL, RL);
+        AddToWorkList(ANDNode.Val);
+        return DAG.getSetCC(VT, ANDNode, LR, Op1);
+      }
+      // fold (X >  -1) & (Y >  -1) -> (X|Y > -1)
+      if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) {
+        SDOperand ORNode = DAG.getNode(ISD::OR, LR.getValueType(), LL, RL);
+        AddToWorkList(ORNode.Val);
+        return DAG.getSetCC(VT, ORNode, LR, Op1);
+      }
+    }
+    // canonicalize equivalent to ll == rl
+    if (LL == RR && LR == RL) {
+      Op1 = ISD::getSetCCSwappedOperands(Op1);
+      std::swap(RL, RR);
+    }
+    if (LL == RL && LR == RR) {
+      bool isInteger = MVT::isInteger(LL.getValueType());
+      ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger);
+      if (Result != ISD::SETCC_INVALID)
+        return DAG.getSetCC(N0.getValueType(), LL, LR, Result);
+    }
+  }
+
+  // Simplify: and (op x...), (op y...)  -> (op (and x, y))
+  if (N0.getOpcode() == N1.getOpcode()) {
+    SDOperand Tmp = SimplifyBinOpWithSameOpcodeHands(N);
+    if (Tmp.Val) return Tmp;
+  }
+  
+  // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
+  // fold (and (sra)) -> (and (srl)) when possible.
+  if (!MVT::isVector(VT) &&
+      SimplifyDemandedBits(SDOperand(N, 0)))
+    return SDOperand(N, 0);
+  // fold (zext_inreg (extload x)) -> (zextload x)
+  if (ISD::isEXTLoad(N0.Val) && ISD::isUNINDEXEDLoad(N0.Val)) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    MVT::ValueType EVT = LN0->getLoadedVT();
+    // If we zero all the possible extended bits, then we can turn this into
+    // a zextload if we are running before legalize or the operation is legal.
+    if (DAG.MaskedValueIsZero(N1, ~0ULL << MVT::getSizeInBits(EVT)) &&
+        (!AfterLegalize || TLI.isLoadXLegal(ISD::ZEXTLOAD, EVT))) {
+      SDOperand ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, LN0->getChain(),
+                                         LN0->getBasePtr(), LN0->getSrcValue(),
+                                         LN0->getSrcValueOffset(), EVT,
+                                         LN0->isVolatile(), 
+                                         LN0->getAlignment());
+      AddToWorkList(N);
+      CombineTo(N0.Val, ExtLoad, ExtLoad.getValue(1));
+      return SDOperand(N, 0);   // Return N so it doesn't get rechecked!
+    }
+  }
+  // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
+  if (ISD::isSEXTLoad(N0.Val) && ISD::isUNINDEXEDLoad(N0.Val) &&
+      N0.hasOneUse()) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    MVT::ValueType EVT = LN0->getLoadedVT();
+    // If we zero all the possible extended bits, then we can turn this into
+    // a zextload if we are running before legalize or the operation is legal.
+    if (DAG.MaskedValueIsZero(N1, ~0ULL << MVT::getSizeInBits(EVT)) &&
+        (!AfterLegalize || TLI.isLoadXLegal(ISD::ZEXTLOAD, EVT))) {
+      SDOperand ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, LN0->getChain(),
+                                         LN0->getBasePtr(), LN0->getSrcValue(),
+                                         LN0->getSrcValueOffset(), EVT,
+                                         LN0->isVolatile(), 
+                                         LN0->getAlignment());
+      AddToWorkList(N);
+      CombineTo(N0.Val, ExtLoad, ExtLoad.getValue(1));
+      return SDOperand(N, 0);   // Return N so it doesn't get rechecked!
+    }
+  }
+  
+  // fold (and (load x), 255) -> (zextload x, i8)
+  // fold (and (extload x, i16), 255) -> (zextload x, i8)
+  if (N1C && N0.getOpcode() == ISD::LOAD) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    if (LN0->getExtensionType() != ISD::SEXTLOAD &&
+        LN0->getAddressingMode() == ISD::UNINDEXED &&
+        N0.hasOneUse()) {
+      MVT::ValueType EVT, LoadedVT;
+      if (N1C->getValue() == 255)
+        EVT = MVT::i8;
+      else if (N1C->getValue() == 65535)
+        EVT = MVT::i16;
+      else if (N1C->getValue() == ~0U)
+        EVT = MVT::i32;
+      else
+        EVT = MVT::Other;
+    
+      LoadedVT = LN0->getLoadedVT();
+      if (EVT != MVT::Other && LoadedVT > EVT &&
+          (!AfterLegalize || TLI.isLoadXLegal(ISD::ZEXTLOAD, EVT))) {
+        MVT::ValueType PtrType = N0.getOperand(1).getValueType();
+        // For big endian targets, we need to add an offset to the pointer to
+        // load the correct bytes.  For little endian systems, we merely need to
+        // read fewer bytes from the same pointer.
+        unsigned PtrOff =
+          (MVT::getSizeInBits(LoadedVT) - MVT::getSizeInBits(EVT)) / 8;
+        SDOperand NewPtr = LN0->getBasePtr();
+        if (!TLI.isLittleEndian())
+          NewPtr = DAG.getNode(ISD::ADD, PtrType, NewPtr,
+                               DAG.getConstant(PtrOff, PtrType));
+        AddToWorkList(NewPtr.Val);
+        SDOperand Load =
+          DAG.getExtLoad(ISD::ZEXTLOAD, VT, LN0->getChain(), NewPtr,
+                         LN0->getSrcValue(), LN0->getSrcValueOffset(), EVT,
+                         LN0->isVolatile(), LN0->getAlignment());
+        AddToWorkList(N);
+        CombineTo(N0.Val, Load, Load.getValue(1));
+        return SDOperand(N, 0);   // Return N so it doesn't get rechecked!
+      }
+    }
+  }
+  
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitOR(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  SDOperand LL, LR, RL, RR, CC0, CC1;
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  MVT::ValueType VT = N1.getValueType();
+  unsigned OpSizeInBits = MVT::getSizeInBits(VT);
+  
+  // fold vector ops
+  if (MVT::isVector(VT)) {
+    SDOperand FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.Val) return FoldedVOp;
+  }
+  
+  // fold (or x, undef) -> -1
+  if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(~0ULL, VT);
+  // fold (or c1, c2) -> c1|c2
+  if (N0C && N1C)
+    return DAG.getNode(ISD::OR, VT, N0, N1);
+  // canonicalize constant to RHS
+  if (N0C && !N1C)
+    return DAG.getNode(ISD::OR, VT, N1, N0);
+  // fold (or x, 0) -> x
+  if (N1C && N1C->isNullValue())
+    return N0;
+  // fold (or x, -1) -> -1
+  if (N1C && N1C->isAllOnesValue())
+    return N1;
+  // fold (or x, c) -> c iff (x & ~c) == 0
+  if (N1C && 
+      DAG.MaskedValueIsZero(N0,~N1C->getValue() & (~0ULL>>(64-OpSizeInBits))))
+    return N1;
+  // reassociate or
+  SDOperand ROR = ReassociateOps(ISD::OR, N0, N1);
+  if (ROR.Val != 0)
+    return ROR;
+  // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
+  if (N1C && N0.getOpcode() == ISD::AND && N0.Val->hasOneUse() &&
+             isa<ConstantSDNode>(N0.getOperand(1))) {
+    ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
+    return DAG.getNode(ISD::AND, VT, DAG.getNode(ISD::OR, VT, N0.getOperand(0),
+                                                 N1),
+                       DAG.getConstant(N1C->getValue() | C1->getValue(), VT));
+  }
+  // fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
+  if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
+    ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
+    ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
+    
+    if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
+        MVT::isInteger(LL.getValueType())) {
+      // fold (X != 0) | (Y != 0) -> (X|Y != 0)
+      // fold (X <  0) | (Y <  0) -> (X|Y < 0)
+      if (cast<ConstantSDNode>(LR)->getValue() == 0 && 
+          (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
+        SDOperand ORNode = DAG.getNode(ISD::OR, LR.getValueType(), LL, RL);
+        AddToWorkList(ORNode.Val);
+        return DAG.getSetCC(VT, ORNode, LR, Op1);
+      }
+      // fold (X != -1) | (Y != -1) -> (X&Y != -1)
+      // fold (X >  -1) | (Y >  -1) -> (X&Y >  -1)
+      if (cast<ConstantSDNode>(LR)->isAllOnesValue() && 
+          (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
+        SDOperand ANDNode = DAG.getNode(ISD::AND, LR.getValueType(), LL, RL);
+        AddToWorkList(ANDNode.Val);
+        return DAG.getSetCC(VT, ANDNode, LR, Op1);
+      }
+    }
+    // canonicalize equivalent to ll == rl
+    if (LL == RR && LR == RL) {
+      Op1 = ISD::getSetCCSwappedOperands(Op1);
+      std::swap(RL, RR);
+    }
+    if (LL == RL && LR == RR) {
+      bool isInteger = MVT::isInteger(LL.getValueType());
+      ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger);
+      if (Result != ISD::SETCC_INVALID)
+        return DAG.getSetCC(N0.getValueType(), LL, LR, Result);
+    }
+  }
+  
+  // Simplify: or (op x...), (op y...)  -> (op (or x, y))
+  if (N0.getOpcode() == N1.getOpcode()) {
+    SDOperand Tmp = SimplifyBinOpWithSameOpcodeHands(N);
+    if (Tmp.Val) return Tmp;
+  }
+  
+  // (X & C1) | (Y & C2)  -> (X|Y) & C3  if possible.
+  if (N0.getOpcode() == ISD::AND &&
+      N1.getOpcode() == ISD::AND &&
+      N0.getOperand(1).getOpcode() == ISD::Constant &&
+      N1.getOperand(1).getOpcode() == ISD::Constant &&
+      // Don't increase # computations.
+      (N0.Val->hasOneUse() || N1.Val->hasOneUse())) {
+    // We can only do this xform if we know that bits from X that are set in C2
+    // but not in C1 are already zero.  Likewise for Y.
+    uint64_t LHSMask = cast<ConstantSDNode>(N0.getOperand(1))->getValue();
+    uint64_t RHSMask = cast<ConstantSDNode>(N1.getOperand(1))->getValue();
+    
+    if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
+        DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
+      SDOperand X =DAG.getNode(ISD::OR, VT, N0.getOperand(0), N1.getOperand(0));
+      return DAG.getNode(ISD::AND, VT, X, DAG.getConstant(LHSMask|RHSMask, VT));
+    }
+  }
+  
+  
+  // See if this is some rotate idiom.
+  if (SDNode *Rot = MatchRotate(N0, N1))
+    return SDOperand(Rot, 0);
+
+  return SDOperand();
+}
+
+
+/// MatchRotateHalf - Match "(X shl/srl V1) & V2" where V2 may not be present.
+static bool MatchRotateHalf(SDOperand Op, SDOperand &Shift, SDOperand &Mask) {
+  if (Op.getOpcode() == ISD::AND) {
+    if (isa<ConstantSDNode>(Op.getOperand(1))) {
+      Mask = Op.getOperand(1);
+      Op = Op.getOperand(0);
+    } else {
+      return false;
+    }
+  }
+  
+  if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
+    Shift = Op;
+    return true;
+  }
+  return false;  
+}
+
+
+// MatchRotate - Handle an 'or' of two operands.  If this is one of the many
+// idioms for rotate, and if the target supports rotation instructions, generate
+// a rot[lr].
+SDNode *DAGCombiner::MatchRotate(SDOperand LHS, SDOperand RHS) {
+  // Must be a legal type.  Expanded an promoted things won't work with rotates.
+  MVT::ValueType VT = LHS.getValueType();
+  if (!TLI.isTypeLegal(VT)) return 0;
+
+  // The target must have at least one rotate flavor.
+  bool HasROTL = TLI.isOperationLegal(ISD::ROTL, VT);
+  bool HasROTR = TLI.isOperationLegal(ISD::ROTR, VT);
+  if (!HasROTL && !HasROTR) return 0;
+  
+  // Match "(X shl/srl V1) & V2" where V2 may not be present.
+  SDOperand LHSShift;   // The shift.
+  SDOperand LHSMask;    // AND value if any.
+  if (!MatchRotateHalf(LHS, LHSShift, LHSMask))
+    return 0; // Not part of a rotate.
+
+  SDOperand RHSShift;   // The shift.
+  SDOperand RHSMask;    // AND value if any.
+  if (!MatchRotateHalf(RHS, RHSShift, RHSMask))
+    return 0; // Not part of a rotate.
+  
+  if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
+    return 0;   // Not shifting the same value.
+
+  if (LHSShift.getOpcode() == RHSShift.getOpcode())
+    return 0;   // Shifts must disagree.
+    
+  // Canonicalize shl to left side in a shl/srl pair.
+  if (RHSShift.getOpcode() == ISD::SHL) {
+    std::swap(LHS, RHS);
+    std::swap(LHSShift, RHSShift);
+    std::swap(LHSMask , RHSMask );
+  }
+
+  unsigned OpSizeInBits = MVT::getSizeInBits(VT);
+  SDOperand LHSShiftArg = LHSShift.getOperand(0);
+  SDOperand LHSShiftAmt = LHSShift.getOperand(1);
+  SDOperand RHSShiftAmt = RHSShift.getOperand(1);
+
+  // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
+  // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
+  if (LHSShiftAmt.getOpcode() == ISD::Constant &&
+      RHSShiftAmt.getOpcode() == ISD::Constant) {
+    uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getValue();
+    uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getValue();
+    if ((LShVal + RShVal) != OpSizeInBits)
+      return 0;
+
+    SDOperand Rot;
+    if (HasROTL)
+      Rot = DAG.getNode(ISD::ROTL, VT, LHSShiftArg, LHSShiftAmt);
+    else
+      Rot = DAG.getNode(ISD::ROTR, VT, LHSShiftArg, RHSShiftAmt);
+    
+    // If there is an AND of either shifted operand, apply it to the result.
+    if (LHSMask.Val || RHSMask.Val) {
+      uint64_t Mask = MVT::getIntVTBitMask(VT);
+      
+      if (LHSMask.Val) {
+        uint64_t RHSBits = (1ULL << LShVal)-1;
+        Mask &= cast<ConstantSDNode>(LHSMask)->getValue() | RHSBits;
+      }
+      if (RHSMask.Val) {
+        uint64_t LHSBits = ~((1ULL << (OpSizeInBits-RShVal))-1);
+        Mask &= cast<ConstantSDNode>(RHSMask)->getValue() | LHSBits;
+      }
+        
+      Rot = DAG.getNode(ISD::AND, VT, Rot, DAG.getConstant(Mask, VT));
+    }
+    
+    return Rot.Val;
+  }
+  
+  // If there is a mask here, and we have a variable shift, we can't be sure
+  // that we're masking out the right stuff.
+  if (LHSMask.Val || RHSMask.Val)
+    return 0;
+  
+  // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotl x, y)
+  // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotr x, (sub 32, y))
+  if (RHSShiftAmt.getOpcode() == ISD::SUB &&
+      LHSShiftAmt == RHSShiftAmt.getOperand(1)) {
+    if (ConstantSDNode *SUBC = 
+          dyn_cast<ConstantSDNode>(RHSShiftAmt.getOperand(0))) {
+      if (SUBC->getValue() == OpSizeInBits)
+        if (HasROTL)
+          return DAG.getNode(ISD::ROTL, VT, LHSShiftArg, LHSShiftAmt).Val;
+        else
+          return DAG.getNode(ISD::ROTR, VT, LHSShiftArg, RHSShiftAmt).Val;
+    }
+  }
+  
+  // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotr x, y)
+  // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotl x, (sub 32, y))
+  if (LHSShiftAmt.getOpcode() == ISD::SUB &&
+      RHSShiftAmt == LHSShiftAmt.getOperand(1)) {
+    if (ConstantSDNode *SUBC = 
+          dyn_cast<ConstantSDNode>(LHSShiftAmt.getOperand(0))) {
+      if (SUBC->getValue() == OpSizeInBits)
+        if (HasROTL)
+          return DAG.getNode(ISD::ROTL, VT, LHSShiftArg, LHSShiftAmt).Val;
+        else
+          return DAG.getNode(ISD::ROTR, VT, LHSShiftArg, RHSShiftAmt).Val;
+    }
+  }
+
+  // Look for sign/zext/any-extended cases:
+  if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND
+       || LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND
+       || LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND) &&
+      (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND
+       || RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND
+       || RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND)) {
+    SDOperand LExtOp0 = LHSShiftAmt.getOperand(0);
+    SDOperand RExtOp0 = RHSShiftAmt.getOperand(0);
+    if (RExtOp0.getOpcode() == ISD::SUB &&
+        RExtOp0.getOperand(1) == LExtOp0) {
+      // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
+      //   (rotr x, y)
+      // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
+      //   (rotl x, (sub 32, y))
+      if (ConstantSDNode *SUBC = cast<ConstantSDNode>(RExtOp0.getOperand(0))) {
+        if (SUBC->getValue() == OpSizeInBits) {
+          if (HasROTL)
+            return DAG.getNode(ISD::ROTL, VT, LHSShiftArg, LHSShiftAmt).Val;
+          else
+            return DAG.getNode(ISD::ROTR, VT, LHSShiftArg, RHSShiftAmt).Val;
+        }
+      }
+    } else if (LExtOp0.getOpcode() == ISD::SUB &&
+               RExtOp0 == LExtOp0.getOperand(1)) {
+      // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext r))) -> 
+      //   (rotl x, y)
+      // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext r))) ->
+      //   (rotr x, (sub 32, y))
+      if (ConstantSDNode *SUBC = cast<ConstantSDNode>(LExtOp0.getOperand(0))) {
+        if (SUBC->getValue() == OpSizeInBits) {
+          if (HasROTL)
+            return DAG.getNode(ISD::ROTL, VT, LHSShiftArg, RHSShiftAmt).Val;
+          else
+            return DAG.getNode(ISD::ROTL, VT, LHSShiftArg, LHSShiftAmt).Val;
+        }
+      }
+    }
+  }
+  
+  return 0;
+}
+
+
+SDOperand DAGCombiner::visitXOR(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  SDOperand LHS, RHS, CC;
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  MVT::ValueType VT = N0.getValueType();
+  
+  // fold vector ops
+  if (MVT::isVector(VT)) {
+    SDOperand FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.Val) return FoldedVOp;
+  }
+  
+  // fold (xor x, undef) -> undef
+  if (N0.getOpcode() == ISD::UNDEF)
+    return N0;
+  if (N1.getOpcode() == ISD::UNDEF)
+    return N1;
+  // fold (xor c1, c2) -> c1^c2
+  if (N0C && N1C)
+    return DAG.getNode(ISD::XOR, VT, N0, N1);
+  // canonicalize constant to RHS
+  if (N0C && !N1C)
+    return DAG.getNode(ISD::XOR, VT, N1, N0);
+  // fold (xor x, 0) -> x
+  if (N1C && N1C->isNullValue())
+    return N0;
+  // reassociate xor
+  SDOperand RXOR = ReassociateOps(ISD::XOR, N0, N1);
+  if (RXOR.Val != 0)
+    return RXOR;
+  // fold !(x cc y) -> (x !cc y)
+  if (N1C && N1C->getValue() == 1 && isSetCCEquivalent(N0, LHS, RHS, CC)) {
+    bool isInt = MVT::isInteger(LHS.getValueType());
+    ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
+                                               isInt);
+    if (N0.getOpcode() == ISD::SETCC)
+      return DAG.getSetCC(VT, LHS, RHS, NotCC);
+    if (N0.getOpcode() == ISD::SELECT_CC)
+      return DAG.getSelectCC(LHS, RHS, N0.getOperand(2),N0.getOperand(3),NotCC);
+    assert(0 && "Unhandled SetCC Equivalent!");
+    abort();
+  }
+  // fold !(x or y) -> (!x and !y) iff x or y are setcc
+  if (N1C && N1C->getValue() == 1 && VT == MVT::i1 &&
+      (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
+    SDOperand LHS = N0.getOperand(0), RHS = N0.getOperand(1);
+    if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
+      unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
+      LHS = DAG.getNode(ISD::XOR, VT, LHS, N1);  // RHS = ~LHS
+      RHS = DAG.getNode(ISD::XOR, VT, RHS, N1);  // RHS = ~RHS
+      AddToWorkList(LHS.Val); AddToWorkList(RHS.Val);
+      return DAG.getNode(NewOpcode, VT, LHS, RHS);
+    }
+  }
+  // fold !(x or y) -> (!x and !y) iff x or y are constants
+  if (N1C && N1C->isAllOnesValue() && 
+      (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
+    SDOperand LHS = N0.getOperand(0), RHS = N0.getOperand(1);
+    if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
+      unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
+      LHS = DAG.getNode(ISD::XOR, VT, LHS, N1);  // RHS = ~LHS
+      RHS = DAG.getNode(ISD::XOR, VT, RHS, N1);  // RHS = ~RHS
+      AddToWorkList(LHS.Val); AddToWorkList(RHS.Val);
+      return DAG.getNode(NewOpcode, VT, LHS, RHS);
+    }
+  }
+  // fold (xor (xor x, c1), c2) -> (xor x, c1^c2)
+  if (N1C && N0.getOpcode() == ISD::XOR) {
+    ConstantSDNode *N00C = dyn_cast<ConstantSDNode>(N0.getOperand(0));
+    ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+    if (N00C)
+      return DAG.getNode(ISD::XOR, VT, N0.getOperand(1),
+                         DAG.getConstant(N1C->getValue()^N00C->getValue(), VT));
+    if (N01C)
+      return DAG.getNode(ISD::XOR, VT, N0.getOperand(0),
+                         DAG.getConstant(N1C->getValue()^N01C->getValue(), VT));
+  }
+  // fold (xor x, x) -> 0
+  if (N0 == N1) {
+    if (!MVT::isVector(VT)) {
+      return DAG.getConstant(0, VT);
+    } else if (!AfterLegalize || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
+      // Produce a vector of zeros.
+      SDOperand El = DAG.getConstant(0, MVT::getVectorElementType(VT));
+      std::vector<SDOperand> Ops(MVT::getVectorNumElements(VT), El);
+      return DAG.getNode(ISD::BUILD_VECTOR, VT, &Ops[0], Ops.size());
+    }
+  }
+  
+  // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
+  if (N0.getOpcode() == N1.getOpcode()) {
+    SDOperand Tmp = SimplifyBinOpWithSameOpcodeHands(N);
+    if (Tmp.Val) return Tmp;
+  }
+  
+  // Simplify the expression using non-local knowledge.
+  if (!MVT::isVector(VT) &&
+      SimplifyDemandedBits(SDOperand(N, 0)))
+    return SDOperand(N, 0);
+  
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitSHL(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  MVT::ValueType VT = N0.getValueType();
+  unsigned OpSizeInBits = MVT::getSizeInBits(VT);
+  
+  // fold (shl c1, c2) -> c1<<c2
+  if (N0C && N1C)
+    return DAG.getNode(ISD::SHL, VT, N0, N1);
+  // fold (shl 0, x) -> 0
+  if (N0C && N0C->isNullValue())
+    return N0;
+  // fold (shl x, c >= size(x)) -> undef
+  if (N1C && N1C->getValue() >= OpSizeInBits)
+    return DAG.getNode(ISD::UNDEF, VT);
+  // fold (shl x, 0) -> x
+  if (N1C && N1C->isNullValue())
+    return N0;
+  // if (shl x, c) is known to be zero, return 0
+  if (DAG.MaskedValueIsZero(SDOperand(N, 0), MVT::getIntVTBitMask(VT)))
+    return DAG.getConstant(0, VT);
+  if (N1C && SimplifyDemandedBits(SDOperand(N, 0)))
+    return SDOperand(N, 0);
+  // fold (shl (shl x, c1), c2) -> 0 or (shl x, c1+c2)
+  if (N1C && N0.getOpcode() == ISD::SHL && 
+      N0.getOperand(1).getOpcode() == ISD::Constant) {
+    uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getValue();
+    uint64_t c2 = N1C->getValue();
+    if (c1 + c2 > OpSizeInBits)
+      return DAG.getConstant(0, VT);
+    return DAG.getNode(ISD::SHL, VT, N0.getOperand(0), 
+                       DAG.getConstant(c1 + c2, N1.getValueType()));
+  }
+  // fold (shl (srl x, c1), c2) -> (shl (and x, -1 << c1), c2-c1) or
+  //                               (srl (and x, -1 << c1), c1-c2)
+  if (N1C && N0.getOpcode() == ISD::SRL && 
+      N0.getOperand(1).getOpcode() == ISD::Constant) {
+    uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getValue();
+    uint64_t c2 = N1C->getValue();
+    SDOperand Mask = DAG.getNode(ISD::AND, VT, N0.getOperand(0),
+                                 DAG.getConstant(~0ULL << c1, VT));
+    if (c2 > c1)
+      return DAG.getNode(ISD::SHL, VT, Mask, 
+                         DAG.getConstant(c2-c1, N1.getValueType()));
+    else
+      return DAG.getNode(ISD::SRL, VT, Mask, 
+                         DAG.getConstant(c1-c2, N1.getValueType()));
+  }
+  // fold (shl (sra x, c1), c1) -> (and x, -1 << c1)
+  if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1))
+    return DAG.getNode(ISD::AND, VT, N0.getOperand(0),
+                       DAG.getConstant(~0ULL << N1C->getValue(), VT));
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitSRA(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  MVT::ValueType VT = N0.getValueType();
+  
+  // fold (sra c1, c2) -> c1>>c2
+  if (N0C && N1C)
+    return DAG.getNode(ISD::SRA, VT, N0, N1);
+  // fold (sra 0, x) -> 0
+  if (N0C && N0C->isNullValue())
+    return N0;
+  // fold (sra -1, x) -> -1
+  if (N0C && N0C->isAllOnesValue())
+    return N0;
+  // fold (sra x, c >= size(x)) -> undef
+  if (N1C && N1C->getValue() >= MVT::getSizeInBits(VT))
+    return DAG.getNode(ISD::UNDEF, VT);
+  // fold (sra x, 0) -> x
+  if (N1C && N1C->isNullValue())
+    return N0;
+  // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
+  // sext_inreg.
+  if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
+    unsigned LowBits = MVT::getSizeInBits(VT) - (unsigned)N1C->getValue();
+    MVT::ValueType EVT;
+    switch (LowBits) {
+    default: EVT = MVT::Other; break;
+    case  1: EVT = MVT::i1;    break;
+    case  8: EVT = MVT::i8;    break;
+    case 16: EVT = MVT::i16;   break;
+    case 32: EVT = MVT::i32;   break;
+    }
+    if (EVT > MVT::Other && TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, EVT))
+      return DAG.getNode(ISD::SIGN_EXTEND_INREG, VT, N0.getOperand(0),
+                         DAG.getValueType(EVT));
+  }
+  
+  // fold (sra (sra x, c1), c2) -> (sra x, c1+c2)
+  if (N1C && N0.getOpcode() == ISD::SRA) {
+    if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+      unsigned Sum = N1C->getValue() + C1->getValue();
+      if (Sum >= MVT::getSizeInBits(VT)) Sum = MVT::getSizeInBits(VT)-1;
+      return DAG.getNode(ISD::SRA, VT, N0.getOperand(0),
+                         DAG.getConstant(Sum, N1C->getValueType(0)));
+    }
+  }
+  
+  // Simplify, based on bits shifted out of the LHS. 
+  if (N1C && SimplifyDemandedBits(SDOperand(N, 0)))
+    return SDOperand(N, 0);
+  
+  
+  // If the sign bit is known to be zero, switch this to a SRL.
+  if (DAG.MaskedValueIsZero(N0, MVT::getIntVTSignBit(VT)))
+    return DAG.getNode(ISD::SRL, VT, N0, N1);
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitSRL(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  MVT::ValueType VT = N0.getValueType();
+  unsigned OpSizeInBits = MVT::getSizeInBits(VT);
+  
+  // fold (srl c1, c2) -> c1 >>u c2
+  if (N0C && N1C)
+    return DAG.getNode(ISD::SRL, VT, N0, N1);
+  // fold (srl 0, x) -> 0
+  if (N0C && N0C->isNullValue())
+    return N0;
+  // fold (srl x, c >= size(x)) -> undef
+  if (N1C && N1C->getValue() >= OpSizeInBits)
+    return DAG.getNode(ISD::UNDEF, VT);
+  // fold (srl x, 0) -> x
+  if (N1C && N1C->isNullValue())
+    return N0;
+  // if (srl x, c) is known to be zero, return 0
+  if (N1C && DAG.MaskedValueIsZero(SDOperand(N, 0), ~0ULL >> (64-OpSizeInBits)))
+    return DAG.getConstant(0, VT);
+  
+  // fold (srl (srl x, c1), c2) -> 0 or (srl x, c1+c2)
+  if (N1C && N0.getOpcode() == ISD::SRL && 
+      N0.getOperand(1).getOpcode() == ISD::Constant) {
+    uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getValue();
+    uint64_t c2 = N1C->getValue();
+    if (c1 + c2 > OpSizeInBits)
+      return DAG.getConstant(0, VT);
+    return DAG.getNode(ISD::SRL, VT, N0.getOperand(0), 
+                       DAG.getConstant(c1 + c2, N1.getValueType()));
+  }
+  
+  // fold (srl (anyextend x), c) -> (anyextend (srl x, c))
+  if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
+    // Shifting in all undef bits?
+    MVT::ValueType SmallVT = N0.getOperand(0).getValueType();
+    if (N1C->getValue() >= MVT::getSizeInBits(SmallVT))
+      return DAG.getNode(ISD::UNDEF, VT);
+
+    SDOperand SmallShift = DAG.getNode(ISD::SRL, SmallVT, N0.getOperand(0), N1);
+    AddToWorkList(SmallShift.Val);
+    return DAG.getNode(ISD::ANY_EXTEND, VT, SmallShift);
+  }
+  
+  // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
+  // bit, which is unmodified by sra.
+  if (N1C && N1C->getValue()+1 == MVT::getSizeInBits(VT)) {
+    if (N0.getOpcode() == ISD::SRA)
+      return DAG.getNode(ISD::SRL, VT, N0.getOperand(0), N1);
+  }
+  
+  // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
+  if (N1C && N0.getOpcode() == ISD::CTLZ && 
+      N1C->getValue() == Log2_32(MVT::getSizeInBits(VT))) {
+    uint64_t KnownZero, KnownOne, Mask = MVT::getIntVTBitMask(VT);
+    DAG.ComputeMaskedBits(N0.getOperand(0), Mask, KnownZero, KnownOne);
+    
+    // If any of the input bits are KnownOne, then the input couldn't be all
+    // zeros, thus the result of the srl will always be zero.
+    if (KnownOne) return DAG.getConstant(0, VT);
+    
+    // If all of the bits input the to ctlz node are known to be zero, then
+    // the result of the ctlz is "32" and the result of the shift is one.
+    uint64_t UnknownBits = ~KnownZero & Mask;
+    if (UnknownBits == 0) return DAG.getConstant(1, VT);
+    
+    // Otherwise, check to see if there is exactly one bit input to the ctlz.
+    if ((UnknownBits & (UnknownBits-1)) == 0) {
+      // Okay, we know that only that the single bit specified by UnknownBits
+      // could be set on input to the CTLZ node.  If this bit is set, the SRL
+      // will return 0, if it is clear, it returns 1.  Change the CTLZ/SRL pair
+      // to an SRL,XOR pair, which is likely to simplify more.
+      unsigned ShAmt = CountTrailingZeros_64(UnknownBits);
+      SDOperand Op = N0.getOperand(0);
+      if (ShAmt) {
+        Op = DAG.getNode(ISD::SRL, VT, Op,
+                         DAG.getConstant(ShAmt, TLI.getShiftAmountTy()));
+        AddToWorkList(Op.Val);
+      }
+      return DAG.getNode(ISD::XOR, VT, Op, DAG.getConstant(1, VT));
+    }
+  }
+  
+  // fold operands of srl based on knowledge that the low bits are not
+  // demanded.
+  if (N1C && SimplifyDemandedBits(SDOperand(N, 0)))
+    return SDOperand(N, 0);
+  
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitCTLZ(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  MVT::ValueType VT = N->getValueType(0);
+
+  // fold (ctlz c1) -> c2
+  if (isa<ConstantSDNode>(N0))
+    return DAG.getNode(ISD::CTLZ, VT, N0);
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitCTTZ(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  MVT::ValueType VT = N->getValueType(0);
+  
+  // fold (cttz c1) -> c2
+  if (isa<ConstantSDNode>(N0))
+    return DAG.getNode(ISD::CTTZ, VT, N0);
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitCTPOP(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  MVT::ValueType VT = N->getValueType(0);
+  
+  // fold (ctpop c1) -> c2
+  if (isa<ConstantSDNode>(N0))
+    return DAG.getNode(ISD::CTPOP, VT, N0);
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitSELECT(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  SDOperand N2 = N->getOperand(2);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
+  MVT::ValueType VT = N->getValueType(0);
+
+  // fold select C, X, X -> X
+  if (N1 == N2)
+    return N1;
+  // fold select true, X, Y -> X
+  if (N0C && !N0C->isNullValue())
+    return N1;
+  // fold select false, X, Y -> Y
+  if (N0C && N0C->isNullValue())
+    return N2;
+  // fold select C, 1, X -> C | X
+  if (MVT::i1 == VT && N1C && N1C->getValue() == 1)
+    return DAG.getNode(ISD::OR, VT, N0, N2);
+  // fold select C, 0, X -> ~C & X
+  // FIXME: this should check for C type == X type, not i1?
+  if (MVT::i1 == VT && N1C && N1C->isNullValue()) {
+    SDOperand XORNode = DAG.getNode(ISD::XOR, VT, N0, DAG.getConstant(1, VT));
+    AddToWorkList(XORNode.Val);
+    return DAG.getNode(ISD::AND, VT, XORNode, N2);
+  }
+  // fold select C, X, 1 -> ~C | X
+  if (MVT::i1 == VT && N2C && N2C->getValue() == 1) {
+    SDOperand XORNode = DAG.getNode(ISD::XOR, VT, N0, DAG.getConstant(1, VT));
+    AddToWorkList(XORNode.Val);
+    return DAG.getNode(ISD::OR, VT, XORNode, N1);
+  }
+  // fold select C, X, 0 -> C & X
+  // FIXME: this should check for C type == X type, not i1?
+  if (MVT::i1 == VT && N2C && N2C->isNullValue())
+    return DAG.getNode(ISD::AND, VT, N0, N1);
+  // fold  X ? X : Y --> X ? 1 : Y --> X | Y
+  if (MVT::i1 == VT && N0 == N1)
+    return DAG.getNode(ISD::OR, VT, N0, N2);
+  // fold X ? Y : X --> X ? Y : 0 --> X & Y
+  if (MVT::i1 == VT && N0 == N2)
+    return DAG.getNode(ISD::AND, VT, N0, N1);
+  
+  // If we can fold this based on the true/false value, do so.
+  if (SimplifySelectOps(N, N1, N2))
+    return SDOperand(N, 0);  // Don't revisit N.
+  
+  // fold selects based on a setcc into other things, such as min/max/abs
+  if (N0.getOpcode() == ISD::SETCC)
+    // FIXME:
+    // Check against MVT::Other for SELECT_CC, which is a workaround for targets
+    // having to say they don't support SELECT_CC on every type the DAG knows
+    // about, since there is no way to mark an opcode illegal at all value types
+    if (TLI.isOperationLegal(ISD::SELECT_CC, MVT::Other))
+      return DAG.getNode(ISD::SELECT_CC, VT, N0.getOperand(0), N0.getOperand(1),
+                         N1, N2, N0.getOperand(2));
+    else
+      return SimplifySelect(N0, N1, N2);
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitSELECT_CC(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  SDOperand N2 = N->getOperand(2);
+  SDOperand N3 = N->getOperand(3);
+  SDOperand N4 = N->getOperand(4);
+  ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
+  
+  // fold select_cc lhs, rhs, x, x, cc -> x
+  if (N2 == N3)
+    return N2;
+  
+  // Determine if the condition we're dealing with is constant
+  SDOperand SCC = SimplifySetCC(TLI.getSetCCResultTy(), N0, N1, CC, false);
+  if (SCC.Val) AddToWorkList(SCC.Val);
+
+  if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.Val)) {
+    if (SCCC->getValue())
+      return N2;    // cond always true -> true val
+    else
+      return N3;    // cond always false -> false val
+  }
+  
+  // Fold to a simpler select_cc
+  if (SCC.Val && SCC.getOpcode() == ISD::SETCC)
+    return DAG.getNode(ISD::SELECT_CC, N2.getValueType(), 
+                       SCC.getOperand(0), SCC.getOperand(1), N2, N3, 
+                       SCC.getOperand(2));
+  
+  // If we can fold this based on the true/false value, do so.
+  if (SimplifySelectOps(N, N2, N3))
+    return SDOperand(N, 0);  // Don't revisit N.
+  
+  // fold select_cc into other things, such as min/max/abs
+  return SimplifySelectCC(N0, N1, N2, N3, CC);
+}
+
+SDOperand DAGCombiner::visitSETCC(SDNode *N) {
+  return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1),
+                       cast<CondCodeSDNode>(N->getOperand(2))->get());
+}
+
+SDOperand DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  MVT::ValueType VT = N->getValueType(0);
+
+  // fold (sext c1) -> c1
+  if (isa<ConstantSDNode>(N0))
+    return DAG.getNode(ISD::SIGN_EXTEND, VT, N0);
+  
+  // fold (sext (sext x)) -> (sext x)
+  // fold (sext (aext x)) -> (sext x)
+  if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
+    return DAG.getNode(ISD::SIGN_EXTEND, VT, N0.getOperand(0));
+  
+  // fold (sext (truncate (load x))) -> (sext (smaller load x))
+  // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
+  if (N0.getOpcode() == ISD::TRUNCATE) {
+    SDOperand NarrowLoad = ReduceLoadWidth(N0.Val);
+    if (NarrowLoad.Val) {
+      if (NarrowLoad.Val != N0.Val)
+        CombineTo(N0.Val, NarrowLoad);
+      return DAG.getNode(ISD::SIGN_EXTEND, VT, NarrowLoad);
+    }
+  }
+
+  // See if the value being truncated is already sign extended.  If so, just
+  // eliminate the trunc/sext pair.
+  if (N0.getOpcode() == ISD::TRUNCATE) {
+    SDOperand Op = N0.getOperand(0);
+    unsigned OpBits   = MVT::getSizeInBits(Op.getValueType());
+    unsigned MidBits  = MVT::getSizeInBits(N0.getValueType());
+    unsigned DestBits = MVT::getSizeInBits(VT);
+    unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
+    
+    if (OpBits == DestBits) {
+      // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
+      // bits, it is already ready.
+      if (NumSignBits > DestBits-MidBits)
+        return Op;
+    } else if (OpBits < DestBits) {
+      // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
+      // bits, just sext from i32.
+      if (NumSignBits > OpBits-MidBits)
+        return DAG.getNode(ISD::SIGN_EXTEND, VT, Op);
+    } else {
+      // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
+      // bits, just truncate to i32.
+      if (NumSignBits > OpBits-MidBits)
+        return DAG.getNode(ISD::TRUNCATE, VT, Op);
+    }
+    
+    // fold (sext (truncate x)) -> (sextinreg x).
+    if (!AfterLegalize || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
+                                               N0.getValueType())) {
+      if (Op.getValueType() < VT)
+        Op = DAG.getNode(ISD::ANY_EXTEND, VT, Op);
+      else if (Op.getValueType() > VT)
+        Op = DAG.getNode(ISD::TRUNCATE, VT, Op);
+      return DAG.getNode(ISD::SIGN_EXTEND_INREG, VT, Op,
+                         DAG.getValueType(N0.getValueType()));
+    }
+  }
+  
+  // fold (sext (load x)) -> (sext (truncate (sextload x)))
+  if (ISD::isNON_EXTLoad(N0.Val) && N0.hasOneUse() &&
+      (!AfterLegalize||TLI.isLoadXLegal(ISD::SEXTLOAD, N0.getValueType()))){
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    SDOperand ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, LN0->getChain(),
+                                       LN0->getBasePtr(), LN0->getSrcValue(),
+                                       LN0->getSrcValueOffset(),
+                                       N0.getValueType(), 
+                                       LN0->isVolatile(),
+                                       LN0->getAlignment());
+    CombineTo(N, ExtLoad);
+    CombineTo(N0.Val, DAG.getNode(ISD::TRUNCATE, N0.getValueType(), ExtLoad),
+              ExtLoad.getValue(1));
+    return SDOperand(N, 0);   // Return N so it doesn't get rechecked!
+  }
+
+  // fold (sext (sextload x)) -> (sext (truncate (sextload x)))
+  // fold (sext ( extload x)) -> (sext (truncate (sextload x)))
+  if ((ISD::isSEXTLoad(N0.Val) || ISD::isEXTLoad(N0.Val)) &&
+      ISD::isUNINDEXEDLoad(N0.Val) && N0.hasOneUse()) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    MVT::ValueType EVT = LN0->getLoadedVT();
+    if (!AfterLegalize || TLI.isLoadXLegal(ISD::SEXTLOAD, EVT)) {
+      SDOperand ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, LN0->getChain(),
+                                         LN0->getBasePtr(), LN0->getSrcValue(),
+                                         LN0->getSrcValueOffset(), EVT,
+                                         LN0->isVolatile(), 
+                                         LN0->getAlignment());
+      CombineTo(N, ExtLoad);
+      CombineTo(N0.Val, DAG.getNode(ISD::TRUNCATE, N0.getValueType(), ExtLoad),
+                ExtLoad.getValue(1));
+      return SDOperand(N, 0);   // Return N so it doesn't get rechecked!
+    }
+  }
+  
+  // sext(setcc x,y,cc) -> select_cc x, y, -1, 0, cc
+  if (N0.getOpcode() == ISD::SETCC) {
+    SDOperand SCC = 
+      SimplifySelectCC(N0.getOperand(0), N0.getOperand(1),
+                       DAG.getConstant(~0ULL, VT), DAG.getConstant(0, VT),
+                       cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
+    if (SCC.Val) return SCC;
+  }
+  
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitZERO_EXTEND(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  MVT::ValueType VT = N->getValueType(0);
+
+  // fold (zext c1) -> c1
+  if (isa<ConstantSDNode>(N0))
+    return DAG.getNode(ISD::ZERO_EXTEND, VT, N0);
+  // fold (zext (zext x)) -> (zext x)
+  // fold (zext (aext x)) -> (zext x)
+  if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
+    return DAG.getNode(ISD::ZERO_EXTEND, VT, N0.getOperand(0));
+
+  // fold (zext (truncate (load x))) -> (zext (smaller load x))
+  // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
+  if (N0.getOpcode() == ISD::TRUNCATE) {
+    SDOperand NarrowLoad = ReduceLoadWidth(N0.Val);
+    if (NarrowLoad.Val) {
+      if (NarrowLoad.Val != N0.Val)
+        CombineTo(N0.Val, NarrowLoad);
+      return DAG.getNode(ISD::ZERO_EXTEND, VT, NarrowLoad);
+    }
+  }
+
+  // fold (zext (truncate x)) -> (and x, mask)
+  if (N0.getOpcode() == ISD::TRUNCATE &&
+      (!AfterLegalize || TLI.isOperationLegal(ISD::AND, VT))) {
+    SDOperand Op = N0.getOperand(0);
+    if (Op.getValueType() < VT) {
+      Op = DAG.getNode(ISD::ANY_EXTEND, VT, Op);
+    } else if (Op.getValueType() > VT) {
+      Op = DAG.getNode(ISD::TRUNCATE, VT, Op);
+    }
+    return DAG.getZeroExtendInReg(Op, N0.getValueType());
+  }
+  
+  // fold (zext (and (trunc x), cst)) -> (and x, cst).
+  if (N0.getOpcode() == ISD::AND &&
+      N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
+      N0.getOperand(1).getOpcode() == ISD::Constant) {
+    SDOperand X = N0.getOperand(0).getOperand(0);
+    if (X.getValueType() < VT) {
+      X = DAG.getNode(ISD::ANY_EXTEND, VT, X);
+    } else if (X.getValueType() > VT) {
+      X = DAG.getNode(ISD::TRUNCATE, VT, X);
+    }
+    uint64_t Mask = cast<ConstantSDNode>(N0.getOperand(1))->getValue();
+    return DAG.getNode(ISD::AND, VT, X, DAG.getConstant(Mask, VT));
+  }
+  
+  // fold (zext (load x)) -> (zext (truncate (zextload x)))
+  if (ISD::isNON_EXTLoad(N0.Val) && N0.hasOneUse() &&
+      (!AfterLegalize||TLI.isLoadXLegal(ISD::ZEXTLOAD, N0.getValueType()))) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    SDOperand ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, LN0->getChain(),
+                                       LN0->getBasePtr(), LN0->getSrcValue(),
+                                       LN0->getSrcValueOffset(),
+                                       N0.getValueType(),
+                                       LN0->isVolatile(), 
+                                       LN0->getAlignment());
+    CombineTo(N, ExtLoad);
+    CombineTo(N0.Val, DAG.getNode(ISD::TRUNCATE, N0.getValueType(), ExtLoad),
+              ExtLoad.getValue(1));
+    return SDOperand(N, 0);   // Return N so it doesn't get rechecked!
+  }
+
+  // fold (zext (zextload x)) -> (zext (truncate (zextload x)))
+  // fold (zext ( extload x)) -> (zext (truncate (zextload x)))
+  if ((ISD::isZEXTLoad(N0.Val) || ISD::isEXTLoad(N0.Val)) &&
+      ISD::isUNINDEXEDLoad(N0.Val) && N0.hasOneUse()) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    MVT::ValueType EVT = LN0->getLoadedVT();
+    SDOperand ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, LN0->getChain(),
+                                       LN0->getBasePtr(), LN0->getSrcValue(),
+                                       LN0->getSrcValueOffset(), EVT,
+                                       LN0->isVolatile(), 
+                                       LN0->getAlignment());
+    CombineTo(N, ExtLoad);
+    CombineTo(N0.Val, DAG.getNode(ISD::TRUNCATE, N0.getValueType(), ExtLoad),
+              ExtLoad.getValue(1));
+    return SDOperand(N, 0);   // Return N so it doesn't get rechecked!
+  }
+  
+  // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
+  if (N0.getOpcode() == ISD::SETCC) {
+    SDOperand SCC = 
+      SimplifySelectCC(N0.getOperand(0), N0.getOperand(1),
+                       DAG.getConstant(1, VT), DAG.getConstant(0, VT),
+                       cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
+    if (SCC.Val) return SCC;
+  }
+  
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitANY_EXTEND(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  MVT::ValueType VT = N->getValueType(0);
+  
+  // fold (aext c1) -> c1
+  if (isa<ConstantSDNode>(N0))
+    return DAG.getNode(ISD::ANY_EXTEND, VT, N0);
+  // fold (aext (aext x)) -> (aext x)
+  // fold (aext (zext x)) -> (zext x)
+  // fold (aext (sext x)) -> (sext x)
+  if (N0.getOpcode() == ISD::ANY_EXTEND  ||
+      N0.getOpcode() == ISD::ZERO_EXTEND ||
+      N0.getOpcode() == ISD::SIGN_EXTEND)
+    return DAG.getNode(N0.getOpcode(), VT, N0.getOperand(0));
+  
+  // fold (aext (truncate (load x))) -> (aext (smaller load x))
+  // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
+  if (N0.getOpcode() == ISD::TRUNCATE) {
+    SDOperand NarrowLoad = ReduceLoadWidth(N0.Val);
+    if (NarrowLoad.Val) {
+      if (NarrowLoad.Val != N0.Val)
+        CombineTo(N0.Val, NarrowLoad);
+      return DAG.getNode(ISD::ANY_EXTEND, VT, NarrowLoad);
+    }
+  }
+
+  // fold (aext (truncate x))
+  if (N0.getOpcode() == ISD::TRUNCATE) {
+    SDOperand TruncOp = N0.getOperand(0);
+    if (TruncOp.getValueType() == VT)
+      return TruncOp; // x iff x size == zext size.
+    if (TruncOp.getValueType() > VT)
+      return DAG.getNode(ISD::TRUNCATE, VT, TruncOp);
+    return DAG.getNode(ISD::ANY_EXTEND, VT, TruncOp);
+  }
+  
+  // fold (aext (and (trunc x), cst)) -> (and x, cst).
+  if (N0.getOpcode() == ISD::AND &&
+      N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
+      N0.getOperand(1).getOpcode() == ISD::Constant) {
+    SDOperand X = N0.getOperand(0).getOperand(0);
+    if (X.getValueType() < VT) {
+      X = DAG.getNode(ISD::ANY_EXTEND, VT, X);
+    } else if (X.getValueType() > VT) {
+      X = DAG.getNode(ISD::TRUNCATE, VT, X);
+    }
+    uint64_t Mask = cast<ConstantSDNode>(N0.getOperand(1))->getValue();
+    return DAG.getNode(ISD::AND, VT, X, DAG.getConstant(Mask, VT));
+  }
+  
+  // fold (aext (load x)) -> (aext (truncate (extload x)))
+  if (ISD::isNON_EXTLoad(N0.Val) && N0.hasOneUse() &&
+      (!AfterLegalize||TLI.isLoadXLegal(ISD::EXTLOAD, N0.getValueType()))) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    SDOperand ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, VT, LN0->getChain(),
+                                       LN0->getBasePtr(), LN0->getSrcValue(),
+                                       LN0->getSrcValueOffset(),
+                                       N0.getValueType(),
+                                       LN0->isVolatile(), 
+                                       LN0->getAlignment());
+    CombineTo(N, ExtLoad);
+    CombineTo(N0.Val, DAG.getNode(ISD::TRUNCATE, N0.getValueType(), ExtLoad),
+              ExtLoad.getValue(1));
+    return SDOperand(N, 0);   // Return N so it doesn't get rechecked!
+  }
+  
+  // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
+  // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
+  // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
+  if (N0.getOpcode() == ISD::LOAD &&
+      !ISD::isNON_EXTLoad(N0.Val) && ISD::isUNINDEXEDLoad(N0.Val) &&
+      N0.hasOneUse()) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    MVT::ValueType EVT = LN0->getLoadedVT();
+    SDOperand ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), VT,
+                                       LN0->getChain(), LN0->getBasePtr(),
+                                       LN0->getSrcValue(),
+                                       LN0->getSrcValueOffset(), EVT,
+                                       LN0->isVolatile(), 
+                                       LN0->getAlignment());
+    CombineTo(N, ExtLoad);
+    CombineTo(N0.Val, DAG.getNode(ISD::TRUNCATE, N0.getValueType(), ExtLoad),
+              ExtLoad.getValue(1));
+    return SDOperand(N, 0);   // Return N so it doesn't get rechecked!
+  }
+  
+  // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
+  if (N0.getOpcode() == ISD::SETCC) {
+    SDOperand SCC = 
+      SimplifySelectCC(N0.getOperand(0), N0.getOperand(1),
+                       DAG.getConstant(1, VT), DAG.getConstant(0, VT),
+                       cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
+    if (SCC.Val)
+      return SCC;
+  }
+  
+  return SDOperand();
+}
+
+/// ReduceLoadWidth - If the result of a wider load is shifted to right of N
+/// bits and then truncated to a narrower type and where N is a multiple
+/// of number of bits of the narrower type, transform it to a narrower load
+/// from address + N / num of bits of new type. If the result is to be
+/// extended, also fold the extension to form a extending load.
+SDOperand DAGCombiner::ReduceLoadWidth(SDNode *N) {
+  unsigned Opc = N->getOpcode();
+  ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
+  SDOperand N0 = N->getOperand(0);
+  MVT::ValueType VT = N->getValueType(0);
+  MVT::ValueType EVT = N->getValueType(0);
+
+  // Special case: SIGN_EXTEND_INREG is basically truncating to EVT then
+  // extended to VT.
+  if (Opc == ISD::SIGN_EXTEND_INREG) {
+    ExtType = ISD::SEXTLOAD;
+    EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+    if (AfterLegalize && !TLI.isLoadXLegal(ISD::SEXTLOAD, EVT))
+      return SDOperand();
+  }
+
+  unsigned EVTBits = MVT::getSizeInBits(EVT);
+  unsigned ShAmt = 0;
+  bool CombineSRL =  false;
+  if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
+    if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+      ShAmt = N01->getValue();
+      // Is the shift amount a multiple of size of VT?
+      if ((ShAmt & (EVTBits-1)) == 0) {
+        N0 = N0.getOperand(0);
+        if (MVT::getSizeInBits(N0.getValueType()) <= EVTBits)
+          return SDOperand();
+        CombineSRL = true;
+      }
+    }
+  }
+
+  if (ISD::isNON_EXTLoad(N0.Val) && N0.hasOneUse() &&
+      // Do not allow folding to i1 here.  i1 is implicitly stored in memory in
+      // zero extended form: by shrinking the load, we lose track of the fact
+      // that it is already zero extended.
+      // FIXME: This should be reevaluated.
+      VT != MVT::i1) {
+    assert(MVT::getSizeInBits(N0.getValueType()) > EVTBits &&
+           "Cannot truncate to larger type!");
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    MVT::ValueType PtrType = N0.getOperand(1).getValueType();
+    // For big endian targets, we need to adjust the offset to the pointer to
+    // load the correct bytes.
+    if (!TLI.isLittleEndian())
+      ShAmt = MVT::getSizeInBits(N0.getValueType()) - ShAmt - EVTBits;
+    uint64_t PtrOff =  ShAmt / 8;
+    SDOperand NewPtr = DAG.getNode(ISD::ADD, PtrType, LN0->getBasePtr(),
+                                   DAG.getConstant(PtrOff, PtrType));
+    AddToWorkList(NewPtr.Val);
+    SDOperand Load = (ExtType == ISD::NON_EXTLOAD)
+      ? DAG.getLoad(VT, LN0->getChain(), NewPtr,
+                    LN0->getSrcValue(), LN0->getSrcValueOffset(),
+                    LN0->isVolatile(), LN0->getAlignment())
+      : DAG.getExtLoad(ExtType, VT, LN0->getChain(), NewPtr,
+                       LN0->getSrcValue(), LN0->getSrcValueOffset(), EVT,
+                       LN0->isVolatile(), LN0->getAlignment());
+    AddToWorkList(N);
+    if (CombineSRL) {
+      std::vector<SDNode*> NowDead;
+      DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1), NowDead);
+      CombineTo(N->getOperand(0).Val, Load);
+    } else
+      CombineTo(N0.Val, Load, Load.getValue(1));
+    if (ShAmt) {
+      if (Opc == ISD::SIGN_EXTEND_INREG)
+        return DAG.getNode(Opc, VT, Load, N->getOperand(1));
+      else
+        return DAG.getNode(Opc, VT, Load);
+    }
+    return SDOperand(N, 0);   // Return N so it doesn't get rechecked!
+  }
+
+  return SDOperand();
+}
+
+
+SDOperand DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  MVT::ValueType VT = N->getValueType(0);
+  MVT::ValueType EVT = cast<VTSDNode>(N1)->getVT();
+  unsigned EVTBits = MVT::getSizeInBits(EVT);
+  
+  // fold (sext_in_reg c1) -> c1
+  if (isa<ConstantSDNode>(N0) || N0.getOpcode() == ISD::UNDEF)
+    return DAG.getNode(ISD::SIGN_EXTEND_INREG, VT, N0, N1);
+  
+  // If the input is already sign extended, just drop the extension.
+  if (DAG.ComputeNumSignBits(N0) >= MVT::getSizeInBits(VT)-EVTBits+1)
+    return N0;
+  
+  // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
+  if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+      EVT < cast<VTSDNode>(N0.getOperand(1))->getVT()) {
+    return DAG.getNode(ISD::SIGN_EXTEND_INREG, VT, N0.getOperand(0), N1);
+  }
+
+  // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
+  if (DAG.MaskedValueIsZero(N0, 1ULL << (EVTBits-1)))
+    return DAG.getZeroExtendInReg(N0, EVT);
+  
+  // fold operands of sext_in_reg based on knowledge that the top bits are not
+  // demanded.
+  if (SimplifyDemandedBits(SDOperand(N, 0)))
+    return SDOperand(N, 0);
+  
+  // fold (sext_in_reg (load x)) -> (smaller sextload x)
+  // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
+  SDOperand NarrowLoad = ReduceLoadWidth(N);
+  if (NarrowLoad.Val)
+    return NarrowLoad;
+
+  // fold (sext_in_reg (srl X, 24), i8) -> sra X, 24
+  // fold (sext_in_reg (srl X, 23), i8) -> sra X, 23 iff possible.
+  // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
+  if (N0.getOpcode() == ISD::SRL) {
+    if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
+      if (ShAmt->getValue()+EVTBits <= MVT::getSizeInBits(VT)) {
+        // We can turn this into an SRA iff the input to the SRL is already sign
+        // extended enough.
+        unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
+        if (MVT::getSizeInBits(VT)-(ShAmt->getValue()+EVTBits) < InSignBits)
+          return DAG.getNode(ISD::SRA, VT, N0.getOperand(0), N0.getOperand(1));
+      }
+  }
+
+  // fold (sext_inreg (extload x)) -> (sextload x)
+  if (ISD::isEXTLoad(N0.Val) && 
+      ISD::isUNINDEXEDLoad(N0.Val) &&
+      EVT == cast<LoadSDNode>(N0)->getLoadedVT() &&
+      (!AfterLegalize || TLI.isLoadXLegal(ISD::SEXTLOAD, EVT))) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    SDOperand ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, LN0->getChain(),
+                                       LN0->getBasePtr(), LN0->getSrcValue(),
+                                       LN0->getSrcValueOffset(), EVT,
+                                       LN0->isVolatile(), 
+                                       LN0->getAlignment());
+    CombineTo(N, ExtLoad);
+    CombineTo(N0.Val, ExtLoad, ExtLoad.getValue(1));
+    return SDOperand(N, 0);   // Return N so it doesn't get rechecked!
+  }
+  // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
+  if (ISD::isZEXTLoad(N0.Val) && ISD::isUNINDEXEDLoad(N0.Val) &&
+      N0.hasOneUse() &&
+      EVT == cast<LoadSDNode>(N0)->getLoadedVT() &&
+      (!AfterLegalize || TLI.isLoadXLegal(ISD::SEXTLOAD, EVT))) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    SDOperand ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, LN0->getChain(),
+                                       LN0->getBasePtr(), LN0->getSrcValue(),
+                                       LN0->getSrcValueOffset(), EVT,
+                                       LN0->isVolatile(), 
+                                       LN0->getAlignment());
+    CombineTo(N, ExtLoad);
+    CombineTo(N0.Val, ExtLoad, ExtLoad.getValue(1));
+    return SDOperand(N, 0);   // Return N so it doesn't get rechecked!
+  }
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitTRUNCATE(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  MVT::ValueType VT = N->getValueType(0);
+
+  // noop truncate
+  if (N0.getValueType() == N->getValueType(0))
+    return N0;
+  // fold (truncate c1) -> c1
+  if (isa<ConstantSDNode>(N0))
+    return DAG.getNode(ISD::TRUNCATE, VT, N0);
+  // fold (truncate (truncate x)) -> (truncate x)
+  if (N0.getOpcode() == ISD::TRUNCATE)
+    return DAG.getNode(ISD::TRUNCATE, VT, N0.getOperand(0));
+  // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
+  if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND||
+      N0.getOpcode() == ISD::ANY_EXTEND) {
+    if (N0.getOperand(0).getValueType() < VT)
+      // if the source is smaller than the dest, we still need an extend
+      return DAG.getNode(N0.getOpcode(), VT, N0.getOperand(0));
+    else if (N0.getOperand(0).getValueType() > VT)
+      // if the source is larger than the dest, than we just need the truncate
+      return DAG.getNode(ISD::TRUNCATE, VT, N0.getOperand(0));
+    else
+      // if the source and dest are the same type, we can drop both the extend
+      // and the truncate
+      return N0.getOperand(0);
+  }
+
+  // fold (truncate (load x)) -> (smaller load x)
+  // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
+  return ReduceLoadWidth(N);
+}
+
+SDOperand DAGCombiner::visitBIT_CONVERT(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  MVT::ValueType VT = N->getValueType(0);
+
+  // If the input is a BUILD_VECTOR with all constant elements, fold this now.
+  // Only do this before legalize, since afterward the target may be depending
+  // on the bitconvert.
+  // First check to see if this is all constant.
+  if (!AfterLegalize &&
+      N0.getOpcode() == ISD::BUILD_VECTOR && N0.Val->hasOneUse() &&
+      MVT::isVector(VT)) {
+    bool isSimple = true;
+    for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i)
+      if (N0.getOperand(i).getOpcode() != ISD::UNDEF &&
+          N0.getOperand(i).getOpcode() != ISD::Constant &&
+          N0.getOperand(i).getOpcode() != ISD::ConstantFP) {
+        isSimple = false; 
+        break;
+      }
+        
+    MVT::ValueType DestEltVT = MVT::getVectorElementType(N->getValueType(0));
+    assert(!MVT::isVector(DestEltVT) &&
+           "Element type of vector ValueType must not be vector!");
+    if (isSimple) {
+      return ConstantFoldBIT_CONVERTofBUILD_VECTOR(N0.Val, DestEltVT);
+    }
+  }
+  
+  // If the input is a constant, let getNode() fold it.
+  if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
+    SDOperand Res = DAG.getNode(ISD::BIT_CONVERT, VT, N0);
+    if (Res.Val != N) return Res;
+  }
+  
+  if (N0.getOpcode() == ISD::BIT_CONVERT)  // conv(conv(x,t1),t2) -> conv(x,t2)
+    return DAG.getNode(ISD::BIT_CONVERT, VT, N0.getOperand(0));
+
+  // fold (conv (load x)) -> (load (conv*)x)
+  // If the resultant load doesn't need a  higher alignment than the original!
+  if (ISD::isNON_EXTLoad(N0.Val) && N0.hasOneUse() &&
+      ISD::isUNINDEXEDLoad(N0.Val) &&
+      TLI.isOperationLegal(ISD::LOAD, VT)) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    unsigned Align = TLI.getTargetMachine().getTargetData()->
+      getABITypeAlignment(MVT::getTypeForValueType(VT));
+    unsigned OrigAlign = LN0->getAlignment();
+    if (Align <= OrigAlign) {
+      SDOperand Load = DAG.getLoad(VT, LN0->getChain(), LN0->getBasePtr(),
+                                   LN0->getSrcValue(), LN0->getSrcValueOffset(),
+                                   LN0->isVolatile(), Align);
+      AddToWorkList(N);
+      CombineTo(N0.Val, DAG.getNode(ISD::BIT_CONVERT, N0.getValueType(), Load),
+                Load.getValue(1));
+      return Load;
+    }
+  }
+  
+  return SDOperand();
+}
+
+/// ConstantFoldBIT_CONVERTofBUILD_VECTOR - We know that BV is a build_vector
+/// node with Constant, ConstantFP or Undef operands.  DstEltVT indicates the 
+/// destination element value type.
+SDOperand DAGCombiner::
+ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT::ValueType DstEltVT) {
+  MVT::ValueType SrcEltVT = BV->getOperand(0).getValueType();
+  
+  // If this is already the right type, we're done.
+  if (SrcEltVT == DstEltVT) return SDOperand(BV, 0);
+  
+  unsigned SrcBitSize = MVT::getSizeInBits(SrcEltVT);
+  unsigned DstBitSize = MVT::getSizeInBits(DstEltVT);
+  
+  // If this is a conversion of N elements of one type to N elements of another
+  // type, convert each element.  This handles FP<->INT cases.
+  if (SrcBitSize == DstBitSize) {
+    SmallVector<SDOperand, 8> Ops;
+    for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
+      Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, DstEltVT, BV->getOperand(i)));
+      AddToWorkList(Ops.back().Val);
+    }
+    MVT::ValueType VT =
+      MVT::getVectorType(DstEltVT,
+                         MVT::getVectorNumElements(BV->getValueType(0)));
+    return DAG.getNode(ISD::BUILD_VECTOR, VT, &Ops[0], Ops.size());
+  }
+  
+  // Otherwise, we're growing or shrinking the elements.  To avoid having to
+  // handle annoying details of growing/shrinking FP values, we convert them to
+  // int first.
+  if (MVT::isFloatingPoint(SrcEltVT)) {
+    // Convert the input float vector to a int vector where the elements are the
+    // same sizes.
+    assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!");
+    MVT::ValueType IntVT = SrcEltVT == MVT::f32 ? MVT::i32 : MVT::i64;
+    BV = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, IntVT).Val;
+    SrcEltVT = IntVT;
+  }
+  
+  // Now we know the input is an integer vector.  If the output is a FP type,
+  // convert to integer first, then to FP of the right size.
+  if (MVT::isFloatingPoint(DstEltVT)) {
+    assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!");
+    MVT::ValueType TmpVT = DstEltVT == MVT::f32 ? MVT::i32 : MVT::i64;
+    SDNode *Tmp = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, TmpVT).Val;
+    
+    // Next, convert to FP elements of the same size.
+    return ConstantFoldBIT_CONVERTofBUILD_VECTOR(Tmp, DstEltVT);
+  }
+  
+  // Okay, we know the src/dst types are both integers of differing types.
+  // Handling growing first.
+  assert(MVT::isInteger(SrcEltVT) && MVT::isInteger(DstEltVT));
+  if (SrcBitSize < DstBitSize) {
+    unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
+    
+    SmallVector<SDOperand, 8> Ops;
+    for (unsigned i = 0, e = BV->getNumOperands(); i != e;
+         i += NumInputsPerOutput) {
+      bool isLE = TLI.isLittleEndian();
+      uint64_t NewBits = 0;
+      bool EltIsUndef = true;
+      for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
+        // Shift the previously computed bits over.
+        NewBits <<= SrcBitSize;
+        SDOperand Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
+        if (Op.getOpcode() == ISD::UNDEF) continue;
+        EltIsUndef = false;
+        
+        NewBits |= cast<ConstantSDNode>(Op)->getValue();
+      }
+      
+      if (EltIsUndef)
+        Ops.push_back(DAG.getNode(ISD::UNDEF, DstEltVT));
+      else
+        Ops.push_back(DAG.getConstant(NewBits, DstEltVT));
+    }
+
+    MVT::ValueType VT = MVT::getVectorType(DstEltVT,
+                                           Ops.size());
+    return DAG.getNode(ISD::BUILD_VECTOR, VT, &Ops[0], Ops.size());
+  }
+  
+  // Finally, this must be the case where we are shrinking elements: each input
+  // turns into multiple outputs.
+  unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
+  SmallVector<SDOperand, 8> Ops;
+  for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
+    if (BV->getOperand(i).getOpcode() == ISD::UNDEF) {
+      for (unsigned j = 0; j != NumOutputsPerInput; ++j)
+        Ops.push_back(DAG.getNode(ISD::UNDEF, DstEltVT));
+      continue;
+    }
+    uint64_t OpVal = cast<ConstantSDNode>(BV->getOperand(i))->getValue();
+
+    for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
+      unsigned ThisVal = OpVal & ((1ULL << DstBitSize)-1);
+      OpVal >>= DstBitSize;
+      Ops.push_back(DAG.getConstant(ThisVal, DstEltVT));
+    }
+
+    // For big endian targets, swap the order of the pieces of each element.
+    if (!TLI.isLittleEndian())
+      std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
+  }
+  MVT::ValueType VT = MVT::getVectorType(DstEltVT, Ops.size());
+  return DAG.getNode(ISD::BUILD_VECTOR, VT, &Ops[0], Ops.size());
+}
+
+
+
+SDOperand DAGCombiner::visitFADD(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+  MVT::ValueType VT = N->getValueType(0);
+  
+  // fold vector ops
+  if (MVT::isVector(VT)) {
+    SDOperand FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.Val) return FoldedVOp;
+  }
+  
+  // fold (fadd c1, c2) -> c1+c2
+  if (N0CFP && N1CFP)
+    return DAG.getNode(ISD::FADD, VT, N0, N1);
+  // canonicalize constant to RHS
+  if (N0CFP && !N1CFP)
+    return DAG.getNode(ISD::FADD, VT, N1, N0);
+  // fold (A + (-B)) -> A-B
+  if (isNegatibleForFree(N1) == 2)
+    return DAG.getNode(ISD::FSUB, VT, N0, GetNegatedExpression(N1, DAG));
+  // fold ((-A) + B) -> B-A
+  if (isNegatibleForFree(N0) == 2)
+    return DAG.getNode(ISD::FSUB, VT, N1, GetNegatedExpression(N0, DAG));
+  
+  // If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
+  if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FADD &&
+      N0.Val->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1)))
+    return DAG.getNode(ISD::FADD, VT, N0.getOperand(0),
+                       DAG.getNode(ISD::FADD, VT, N0.getOperand(1), N1));
+  
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitFSUB(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+  MVT::ValueType VT = N->getValueType(0);
+  
+  // fold vector ops
+  if (MVT::isVector(VT)) {
+    SDOperand FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.Val) return FoldedVOp;
+  }
+  
+  // fold (fsub c1, c2) -> c1-c2
+  if (N0CFP && N1CFP)
+    return DAG.getNode(ISD::FSUB, VT, N0, N1);
+  // fold (0-B) -> -B
+  if (UnsafeFPMath && N0CFP && N0CFP->getValue() == 0.0) {
+    if (isNegatibleForFree(N1))
+      return GetNegatedExpression(N1, DAG);
+    return DAG.getNode(ISD::FNEG, VT, N1);
+  }
+  // fold (A-(-B)) -> A+B
+  if (isNegatibleForFree(N1))
+    return DAG.getNode(ISD::FADD, VT, N0, GetNegatedExpression(N1, DAG));
+  
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitFMUL(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+  MVT::ValueType VT = N->getValueType(0);
+
+  // fold vector ops
+  if (MVT::isVector(VT)) {
+    SDOperand FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.Val) return FoldedVOp;
+  }
+  
+  // fold (fmul c1, c2) -> c1*c2
+  if (N0CFP && N1CFP)
+    return DAG.getNode(ISD::FMUL, VT, N0, N1);
+  // canonicalize constant to RHS
+  if (N0CFP && !N1CFP)
+    return DAG.getNode(ISD::FMUL, VT, N1, N0);
+  // fold (fmul X, 2.0) -> (fadd X, X)
+  if (N1CFP && N1CFP->isExactlyValue(+2.0))
+    return DAG.getNode(ISD::FADD, VT, N0, N0);
+  // fold (fmul X, -1.0) -> (fneg X)
+  if (N1CFP && N1CFP->isExactlyValue(-1.0))
+    return DAG.getNode(ISD::FNEG, VT, N0);
+  
+  // -X * -Y -> X*Y
+  if (char LHSNeg = isNegatibleForFree(N0)) {
+    if (char RHSNeg = isNegatibleForFree(N1)) {
+      // Both can be negated for free, check to see if at least one is cheaper
+      // negated.
+      if (LHSNeg == 2 || RHSNeg == 2)
+        return DAG.getNode(ISD::FMUL, VT, GetNegatedExpression(N0, DAG),
+                           GetNegatedExpression(N1, DAG));
+    }
+  }
+  
+  // If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
+  if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FMUL &&
+      N0.Val->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1)))
+    return DAG.getNode(ISD::FMUL, VT, N0.getOperand(0),
+                       DAG.getNode(ISD::FMUL, VT, N0.getOperand(1), N1));
+  
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitFDIV(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+  MVT::ValueType VT = N->getValueType(0);
+
+  // fold vector ops
+  if (MVT::isVector(VT)) {
+    SDOperand FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.Val) return FoldedVOp;
+  }
+  
+  // fold (fdiv c1, c2) -> c1/c2
+  if (N0CFP && N1CFP)
+    return DAG.getNode(ISD::FDIV, VT, N0, N1);
+  
+  
+  // -X / -Y -> X*Y
+  if (char LHSNeg = isNegatibleForFree(N0)) {
+    if (char RHSNeg = isNegatibleForFree(N1)) {
+      // Both can be negated for free, check to see if at least one is cheaper
+      // negated.
+      if (LHSNeg == 2 || RHSNeg == 2)
+        return DAG.getNode(ISD::FDIV, VT, GetNegatedExpression(N0, DAG),
+                           GetNegatedExpression(N1, DAG));
+    }
+  }
+  
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitFREM(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+  MVT::ValueType VT = N->getValueType(0);
+
+  // fold (frem c1, c2) -> fmod(c1,c2)
+  if (N0CFP && N1CFP)
+    return DAG.getNode(ISD::FREM, VT, N0, N1);
+
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitFCOPYSIGN(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+  MVT::ValueType VT = N->getValueType(0);
+
+  if (N0CFP && N1CFP)  // Constant fold
+    return DAG.getNode(ISD::FCOPYSIGN, VT, N0, N1);
+  
+  if (N1CFP) {
+    // copysign(x, c1) -> fabs(x)       iff ispos(c1)
+    // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
+    union {
+      double d;
+      int64_t i;
+    } u;
+    u.d = N1CFP->getValue();
+    if (u.i >= 0)
+      return DAG.getNode(ISD::FABS, VT, N0);
+    else
+      return DAG.getNode(ISD::FNEG, VT, DAG.getNode(ISD::FABS, VT, N0));
+  }
+  
+  // copysign(fabs(x), y) -> copysign(x, y)
+  // copysign(fneg(x), y) -> copysign(x, y)
+  // copysign(copysign(x,z), y) -> copysign(x, y)
+  if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
+      N0.getOpcode() == ISD::FCOPYSIGN)
+    return DAG.getNode(ISD::FCOPYSIGN, VT, N0.getOperand(0), N1);
+
+  // copysign(x, abs(y)) -> abs(x)
+  if (N1.getOpcode() == ISD::FABS)
+    return DAG.getNode(ISD::FABS, VT, N0);
+  
+  // copysign(x, copysign(y,z)) -> copysign(x, z)
+  if (N1.getOpcode() == ISD::FCOPYSIGN)
+    return DAG.getNode(ISD::FCOPYSIGN, VT, N0, N1.getOperand(1));
+  
+  // copysign(x, fp_extend(y)) -> copysign(x, y)
+  // copysign(x, fp_round(y)) -> copysign(x, y)
+  if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND)
+    return DAG.getNode(ISD::FCOPYSIGN, VT, N0, N1.getOperand(0));
+  
+  return SDOperand();
+}
+
+
+
+SDOperand DAGCombiner::visitSINT_TO_FP(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  MVT::ValueType VT = N->getValueType(0);
+  
+  // fold (sint_to_fp c1) -> c1fp
+  if (N0C)
+    return DAG.getNode(ISD::SINT_TO_FP, VT, N0);
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitUINT_TO_FP(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  MVT::ValueType VT = N->getValueType(0);
+
+  // fold (uint_to_fp c1) -> c1fp
+  if (N0C)
+    return DAG.getNode(ISD::UINT_TO_FP, VT, N0);
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitFP_TO_SINT(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  MVT::ValueType VT = N->getValueType(0);
+  
+  // fold (fp_to_sint c1fp) -> c1
+  if (N0CFP)
+    return DAG.getNode(ISD::FP_TO_SINT, VT, N0);
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitFP_TO_UINT(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  MVT::ValueType VT = N->getValueType(0);
+  
+  // fold (fp_to_uint c1fp) -> c1
+  if (N0CFP)
+    return DAG.getNode(ISD::FP_TO_UINT, VT, N0);
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitFP_ROUND(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  MVT::ValueType VT = N->getValueType(0);
+  
+  // fold (fp_round c1fp) -> c1fp
+  if (N0CFP)
+    return DAG.getNode(ISD::FP_ROUND, VT, N0);
+  
+  // fold (fp_round (fp_extend x)) -> x
+  if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
+    return N0.getOperand(0);
+  
+  // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
+  if (N0.getOpcode() == ISD::FCOPYSIGN && N0.Val->hasOneUse()) {
+    SDOperand Tmp = DAG.getNode(ISD::FP_ROUND, VT, N0.getOperand(0));
+    AddToWorkList(Tmp.Val);
+    return DAG.getNode(ISD::FCOPYSIGN, VT, Tmp, N0.getOperand(1));
+  }
+  
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  MVT::ValueType VT = N->getValueType(0);
+  MVT::ValueType EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  
+  // fold (fp_round_inreg c1fp) -> c1fp
+  if (N0CFP) {
+    SDOperand Round = DAG.getConstantFP(N0CFP->getValue(), EVT);
+    return DAG.getNode(ISD::FP_EXTEND, VT, Round);
+  }
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitFP_EXTEND(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  MVT::ValueType VT = N->getValueType(0);
+  
+  // fold (fp_extend c1fp) -> c1fp
+  if (N0CFP)
+    return DAG.getNode(ISD::FP_EXTEND, VT, N0);
+  
+  // fold (fpext (load x)) -> (fpext (fpround (extload x)))
+  if (ISD::isNON_EXTLoad(N0.Val) && N0.hasOneUse() &&
+      (!AfterLegalize||TLI.isLoadXLegal(ISD::EXTLOAD, N0.getValueType()))) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    SDOperand ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, VT, LN0->getChain(),
+                                       LN0->getBasePtr(), LN0->getSrcValue(),
+                                       LN0->getSrcValueOffset(),
+                                       N0.getValueType(),
+                                       LN0->isVolatile(), 
+                                       LN0->getAlignment());
+    CombineTo(N, ExtLoad);
+    CombineTo(N0.Val, DAG.getNode(ISD::FP_ROUND, N0.getValueType(), ExtLoad),
+              ExtLoad.getValue(1));
+    return SDOperand(N, 0);   // Return N so it doesn't get rechecked!
+  }
+  
+  
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitFNEG(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+
+  if (isNegatibleForFree(N0))
+    return GetNegatedExpression(N0, DAG);
+
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitFABS(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  MVT::ValueType VT = N->getValueType(0);
+  
+  // fold (fabs c1) -> fabs(c1)
+  if (N0CFP)
+    return DAG.getNode(ISD::FABS, VT, N0);
+  // fold (fabs (fabs x)) -> (fabs x)
+  if (N0.getOpcode() == ISD::FABS)
+    return N->getOperand(0);
+  // fold (fabs (fneg x)) -> (fabs x)
+  // fold (fabs (fcopysign x, y)) -> (fabs x)
+  if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
+    return DAG.getNode(ISD::FABS, VT, N0.getOperand(0));
+  
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitBRCOND(SDNode *N) {
+  SDOperand Chain = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  SDOperand N2 = N->getOperand(2);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  
+  // never taken branch, fold to chain
+  if (N1C && N1C->isNullValue())
+    return Chain;
+  // unconditional branch
+  if (N1C && N1C->getValue() == 1)
+    return DAG.getNode(ISD::BR, MVT::Other, Chain, N2);
+  // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
+  // on the target.
+  if (N1.getOpcode() == ISD::SETCC && 
+      TLI.isOperationLegal(ISD::BR_CC, MVT::Other)) {
+    return DAG.getNode(ISD::BR_CC, MVT::Other, Chain, N1.getOperand(2),
+                       N1.getOperand(0), N1.getOperand(1), N2);
+  }
+  return SDOperand();
+}
+
+// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
+//
+SDOperand DAGCombiner::visitBR_CC(SDNode *N) {
+  CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
+  SDOperand CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
+  
+  // Use SimplifySetCC  to simplify SETCC's.
+  SDOperand Simp = SimplifySetCC(MVT::i1, CondLHS, CondRHS, CC->get(), false);
+  if (Simp.Val) AddToWorkList(Simp.Val);
+
+  ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(Simp.Val);
+
+  // fold br_cc true, dest -> br dest (unconditional branch)
+  if (SCCC && SCCC->getValue())
+    return DAG.getNode(ISD::BR, MVT::Other, N->getOperand(0),
+                       N->getOperand(4));
+  // fold br_cc false, dest -> unconditional fall through
+  if (SCCC && SCCC->isNullValue())
+    return N->getOperand(0);
+
+  // fold to a simpler setcc
+  if (Simp.Val && Simp.getOpcode() == ISD::SETCC)
+    return DAG.getNode(ISD::BR_CC, MVT::Other, N->getOperand(0), 
+                       Simp.getOperand(2), Simp.getOperand(0),
+                       Simp.getOperand(1), N->getOperand(4));
+  return SDOperand();
+}
+
+
+/// CombineToPreIndexedLoadStore - Try turning a load / store and a
+/// pre-indexed load / store when the base pointer is a add or subtract
+/// and it has other uses besides the load / store. After the
+/// transformation, the new indexed load / store has effectively folded
+/// the add / subtract in and all of its other uses are redirected to the
+/// new load / store.
+bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
+  if (!AfterLegalize)
+    return false;
+
+  bool isLoad = true;
+  SDOperand Ptr;
+  MVT::ValueType VT;
+  if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
+    if (LD->getAddressingMode() != ISD::UNINDEXED)
+      return false;
+    VT = LD->getLoadedVT();
+    if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
+        !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
+      return false;
+    Ptr = LD->getBasePtr();
+  } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
+    if (ST->getAddressingMode() != ISD::UNINDEXED)
+      return false;
+    VT = ST->getStoredVT();
+    if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
+        !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
+      return false;
+    Ptr = ST->getBasePtr();
+    isLoad = false;
+  } else
+    return false;
+
+  // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
+  // out.  There is no reason to make this a preinc/predec.
+  if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
+      Ptr.Val->hasOneUse())
+    return false;
+
+  // Ask the target to do addressing mode selection.
+  SDOperand BasePtr;
+  SDOperand Offset;
+  ISD::MemIndexedMode AM = ISD::UNINDEXED;
+  if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
+    return false;
+  // Don't create a indexed load / store with zero offset.
+  if (isa<ConstantSDNode>(Offset) &&
+      cast<ConstantSDNode>(Offset)->getValue() == 0)
+    return false;
+  
+  // Try turning it into a pre-indexed load / store except when:
+  // 1) The new base ptr is a frame index.
+  // 2) If N is a store and the new base ptr is either the same as or is a
+  //    predecessor of the value being stored.
+  // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
+  //    that would create a cycle.
+  // 4) All uses are load / store ops that use it as old base ptr.
+
+  // Check #1.  Preinc'ing a frame index would require copying the stack pointer
+  // (plus the implicit offset) to a register to preinc anyway.
+  if (isa<FrameIndexSDNode>(BasePtr))
+    return false;
+  
+  // Check #2.
+  if (!isLoad) {
+    SDOperand Val = cast<StoreSDNode>(N)->getValue();
+    if (Val == BasePtr || BasePtr.Val->isPredecessor(Val.Val))
+      return false;
+  }
+
+  // Now check for #3 and #4.
+  bool RealUse = false;
+  for (SDNode::use_iterator I = Ptr.Val->use_begin(),
+         E = Ptr.Val->use_end(); I != E; ++I) {
+    SDNode *Use = *I;
+    if (Use == N)
+      continue;
+    if (Use->isPredecessor(N))
+      return false;
+
+    if (!((Use->getOpcode() == ISD::LOAD &&
+           cast<LoadSDNode>(Use)->getBasePtr() == Ptr) ||
+          (Use->getOpcode() == ISD::STORE) &&
+          cast<StoreSDNode>(Use)->getBasePtr() == Ptr))
+      RealUse = true;
+  }
+  if (!RealUse)
+    return false;
+
+  SDOperand Result;
+  if (isLoad)
+    Result = DAG.getIndexedLoad(SDOperand(N,0), BasePtr, Offset, AM);
+  else
+    Result = DAG.getIndexedStore(SDOperand(N,0), BasePtr, Offset, AM);
+  ++PreIndexedNodes;
+  ++NodesCombined;
+  DOUT << "\nReplacing.4 "; DEBUG(N->dump(&DAG));
+  DOUT << "\nWith: "; DEBUG(Result.Val->dump(&DAG));
+  DOUT << '\n';
+  std::vector<SDNode*> NowDead;
+  if (isLoad) {
+    DAG.ReplaceAllUsesOfValueWith(SDOperand(N, 0), Result.getValue(0),
+                                  NowDead);
+    DAG.ReplaceAllUsesOfValueWith(SDOperand(N, 1), Result.getValue(2),
+                                  NowDead);
+  } else {
+    DAG.ReplaceAllUsesOfValueWith(SDOperand(N, 0), Result.getValue(1),
+                                  NowDead);
+  }
+
+  // Nodes can end up on the worklist more than once.  Make sure we do
+  // not process a node that has been replaced.
+  for (unsigned i = 0, e = NowDead.size(); i != e; ++i)
+    removeFromWorkList(NowDead[i]);
+  // Finally, since the node is now dead, remove it from the graph.
+  DAG.DeleteNode(N);
+
+  // Replace the uses of Ptr with uses of the updated base value.
+  DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0),
+                                NowDead);
+  removeFromWorkList(Ptr.Val);
+  for (unsigned i = 0, e = NowDead.size(); i != e; ++i)
+    removeFromWorkList(NowDead[i]);
+  DAG.DeleteNode(Ptr.Val);
+
+  return true;
+}
+
+/// CombineToPostIndexedLoadStore - Try combine a load / store with a
+/// add / sub of the base pointer node into a post-indexed load / store.
+/// The transformation folded the add / subtract into the new indexed
+/// load / store effectively and all of its uses are redirected to the
+/// new load / store.
+bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
+  if (!AfterLegalize)
+    return false;
+
+  bool isLoad = true;
+  SDOperand Ptr;
+  MVT::ValueType VT;
+  if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
+    if (LD->getAddressingMode() != ISD::UNINDEXED)
+      return false;
+    VT = LD->getLoadedVT();
+    if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
+        !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
+      return false;
+    Ptr = LD->getBasePtr();
+  } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
+    if (ST->getAddressingMode() != ISD::UNINDEXED)
+      return false;
+    VT = ST->getStoredVT();
+    if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
+        !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
+      return false;
+    Ptr = ST->getBasePtr();
+    isLoad = false;
+  } else
+    return false;
+
+  if (Ptr.Val->hasOneUse())
+    return false;
+  
+  for (SDNode::use_iterator I = Ptr.Val->use_begin(),
+         E = Ptr.Val->use_end(); I != E; ++I) {
+    SDNode *Op = *I;
+    if (Op == N ||
+        (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
+      continue;
+
+    SDOperand BasePtr;
+    SDOperand Offset;
+    ISD::MemIndexedMode AM = ISD::UNINDEXED;
+    if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
+      if (Ptr == Offset)
+        std::swap(BasePtr, Offset);
+      if (Ptr != BasePtr)
+        continue;
+      // Don't create a indexed load / store with zero offset.
+      if (isa<ConstantSDNode>(Offset) &&
+          cast<ConstantSDNode>(Offset)->getValue() == 0)
+        continue;
+
+      // Try turning it into a post-indexed load / store except when
+      // 1) All uses are load / store ops that use it as base ptr.
+      // 2) Op must be independent of N, i.e. Op is neither a predecessor
+      //    nor a successor of N. Otherwise, if Op is folded that would
+      //    create a cycle.
+
+      // Check for #1.
+      bool TryNext = false;
+      for (SDNode::use_iterator II = BasePtr.Val->use_begin(),
+             EE = BasePtr.Val->use_end(); II != EE; ++II) {
+        SDNode *Use = *II;
+        if (Use == Ptr.Val)
+          continue;
+
+        // If all the uses are load / store addresses, then don't do the
+        // transformation.
+        if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
+          bool RealUse = false;
+          for (SDNode::use_iterator III = Use->use_begin(),
+                 EEE = Use->use_end(); III != EEE; ++III) {
+            SDNode *UseUse = *III;
+            if (!((UseUse->getOpcode() == ISD::LOAD &&
+                   cast<LoadSDNode>(UseUse)->getBasePtr().Val == Use) ||
+                  (UseUse->getOpcode() == ISD::STORE) &&
+                  cast<StoreSDNode>(UseUse)->getBasePtr().Val == Use))
+              RealUse = true;
+          }
+
+          if (!RealUse) {
+            TryNext = true;
+            break;
+          }
+        }
+      }
+      if (TryNext)
+        continue;
+
+      // Check for #2
+      if (!Op->isPredecessor(N) && !N->isPredecessor(Op)) {
+        SDOperand Result = isLoad
+          ? DAG.getIndexedLoad(SDOperand(N,0), BasePtr, Offset, AM)
+          : DAG.getIndexedStore(SDOperand(N,0), BasePtr, Offset, AM);
+        ++PostIndexedNodes;
+        ++NodesCombined;
+        DOUT << "\nReplacing.5 "; DEBUG(N->dump(&DAG));
+        DOUT << "\nWith: "; DEBUG(Result.Val->dump(&DAG));
+        DOUT << '\n';
+        std::vector<SDNode*> NowDead;
+        if (isLoad) {
+          DAG.ReplaceAllUsesOfValueWith(SDOperand(N, 0), Result.getValue(0),
+                                        NowDead);
+          DAG.ReplaceAllUsesOfValueWith(SDOperand(N, 1), Result.getValue(2),
+                                        NowDead);
+        } else {
+          DAG.ReplaceAllUsesOfValueWith(SDOperand(N, 0), Result.getValue(1),
+                                        NowDead);
+        }
+
+        // Nodes can end up on the worklist more than once.  Make sure we do
+        // not process a node that has been replaced.
+        for (unsigned i = 0, e = NowDead.size(); i != e; ++i)
+          removeFromWorkList(NowDead[i]);
+        // Finally, since the node is now dead, remove it from the graph.
+        DAG.DeleteNode(N);
+
+        // Replace the uses of Use with uses of the updated base value.
+        DAG.ReplaceAllUsesOfValueWith(SDOperand(Op, 0),
+                                      Result.getValue(isLoad ? 1 : 0),
+                                      NowDead);
+        removeFromWorkList(Op);
+        for (unsigned i = 0, e = NowDead.size(); i != e; ++i)
+          removeFromWorkList(NowDead[i]);
+        DAG.DeleteNode(Op);
+
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+
+SDOperand DAGCombiner::visitLOAD(SDNode *N) {
+  LoadSDNode *LD  = cast<LoadSDNode>(N);
+  SDOperand Chain = LD->getChain();
+  SDOperand Ptr   = LD->getBasePtr();
+
+  // If load is not volatile and there are no uses of the loaded value (and
+  // the updated indexed value in case of indexed loads), change uses of the
+  // chain value into uses of the chain input (i.e. delete the dead load).
+  if (!LD->isVolatile()) {
+    if (N->getValueType(1) == MVT::Other) {
+      // Unindexed loads.
+      if (N->hasNUsesOfValue(0, 0))
+        return CombineTo(N, DAG.getNode(ISD::UNDEF, N->getValueType(0)), Chain);
+    } else {
+      // Indexed loads.
+      assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
+      if (N->hasNUsesOfValue(0, 0) && N->hasNUsesOfValue(0, 1)) {
+        SDOperand Undef0 = DAG.getNode(ISD::UNDEF, N->getValueType(0));
+        SDOperand Undef1 = DAG.getNode(ISD::UNDEF, N->getValueType(1));
+        SDOperand To[] = { Undef0, Undef1, Chain };
+        return CombineTo(N, To, 3);
+      }
+    }
+  }
+  
+  // If this load is directly stored, replace the load value with the stored
+  // value.
+  // TODO: Handle store large -> read small portion.
+  // TODO: Handle TRUNCSTORE/LOADEXT
+  if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
+    if (ISD::isNON_TRUNCStore(Chain.Val)) {
+      StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
+      if (PrevST->getBasePtr() == Ptr &&
+          PrevST->getValue().getValueType() == N->getValueType(0))
+      return CombineTo(N, Chain.getOperand(1), Chain);
+    }
+  }
+    
+  if (CombinerAA) {
+    // Walk up chain skipping non-aliasing memory nodes.
+    SDOperand BetterChain = FindBetterChain(N, Chain);
+    
+    // If there is a better chain.
+    if (Chain != BetterChain) {
+      SDOperand ReplLoad;
+
+      // Replace the chain to void dependency.
+      if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
+        ReplLoad = DAG.getLoad(N->getValueType(0), BetterChain, Ptr,
+                              LD->getSrcValue(), LD->getSrcValueOffset(),
+                              LD->isVolatile(), LD->getAlignment());
+      } else {
+        ReplLoad = DAG.getExtLoad(LD->getExtensionType(),
+                                  LD->getValueType(0),
+                                  BetterChain, Ptr, LD->getSrcValue(),
+                                  LD->getSrcValueOffset(),
+                                  LD->getLoadedVT(),
+                                  LD->isVolatile(), 
+                                  LD->getAlignment());
+      }
+
+      // Create token factor to keep old chain connected.
+      SDOperand Token = DAG.getNode(ISD::TokenFactor, MVT::Other,
+                                    Chain, ReplLoad.getValue(1));
+      
+      // Replace uses with load result and token factor. Don't add users
+      // to work list.
+      return CombineTo(N, ReplLoad.getValue(0), Token, false);
+    }
+  }
+
+  // Try transforming N to an indexed load.
+  if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
+    return SDOperand(N, 0);
+
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitSTORE(SDNode *N) {
+  StoreSDNode *ST  = cast<StoreSDNode>(N);
+  SDOperand Chain = ST->getChain();
+  SDOperand Value = ST->getValue();
+  SDOperand Ptr   = ST->getBasePtr();
+  
+  // If this is a store of a bit convert, store the input value if the
+  // resultant store does not need a higher alignment than the original.
+  if (Value.getOpcode() == ISD::BIT_CONVERT && !ST->isTruncatingStore() &&
+      ST->getAddressingMode() == ISD::UNINDEXED) {
+    unsigned Align = ST->getAlignment();
+    MVT::ValueType SVT = Value.getOperand(0).getValueType();
+    unsigned OrigAlign = TLI.getTargetMachine().getTargetData()->
+      getABITypeAlignment(MVT::getTypeForValueType(SVT));
+    if (Align <= OrigAlign && TLI.isOperationLegal(ISD::STORE, SVT))
+      return DAG.getStore(Chain, Value.getOperand(0), Ptr, ST->getSrcValue(),
+                          ST->getSrcValueOffset(), ST->isVolatile(), Align);
+  }
+  
+  // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
+  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) {
+    if (Value.getOpcode() != ISD::TargetConstantFP) {
+      SDOperand Tmp;
+      switch (CFP->getValueType(0)) {
+      default: assert(0 && "Unknown FP type");
+      case MVT::f32:
+        if (!AfterLegalize || TLI.isTypeLegal(MVT::i32)) {
+          Tmp = DAG.getConstant(FloatToBits(CFP->getValue()), MVT::i32);
+          return DAG.getStore(Chain, Tmp, Ptr, ST->getSrcValue(),
+                              ST->getSrcValueOffset(), ST->isVolatile(),
+                              ST->getAlignment());
+        }
+        break;
+      case MVT::f64:
+        if (!AfterLegalize || TLI.isTypeLegal(MVT::i64)) {
+          Tmp = DAG.getConstant(DoubleToBits(CFP->getValue()), MVT::i64);
+          return DAG.getStore(Chain, Tmp, Ptr, ST->getSrcValue(),
+                              ST->getSrcValueOffset(), ST->isVolatile(),
+                              ST->getAlignment());
+        } else if (TLI.isTypeLegal(MVT::i32)) {
+          // Many FP stores are not make apparent until after legalize, e.g. for
+          // argument passing.  Since this is so common, custom legalize the
+          // 64-bit integer store into two 32-bit stores.
+          uint64_t Val = DoubleToBits(CFP->getValue());
+          SDOperand Lo = DAG.getConstant(Val & 0xFFFFFFFF, MVT::i32);
+          SDOperand Hi = DAG.getConstant(Val >> 32, MVT::i32);
+          if (!TLI.isLittleEndian()) std::swap(Lo, Hi);
+
+          int SVOffset = ST->getSrcValueOffset();
+          unsigned Alignment = ST->getAlignment();
+          bool isVolatile = ST->isVolatile();
+
+          SDOperand St0 = DAG.getStore(Chain, Lo, Ptr, ST->getSrcValue(),
+                                       ST->getSrcValueOffset(),
+                                       isVolatile, ST->getAlignment());
+          Ptr = DAG.getNode(ISD::ADD, Ptr.getValueType(), Ptr,
+                            DAG.getConstant(4, Ptr.getValueType()));
+          SVOffset += 4;
+          if (Alignment > 4)
+            Alignment = 4;
+          SDOperand St1 = DAG.getStore(Chain, Hi, Ptr, ST->getSrcValue(),
+                                       SVOffset, isVolatile, Alignment);
+          return DAG.getNode(ISD::TokenFactor, MVT::Other, St0, St1);
+        }
+        break;
+      }
+    }
+  }
+
+  if (CombinerAA) { 
+    // Walk up chain skipping non-aliasing memory nodes.
+    SDOperand BetterChain = FindBetterChain(N, Chain);
+    
+    // If there is a better chain.
+    if (Chain != BetterChain) {
+      // Replace the chain to avoid dependency.
+      SDOperand ReplStore;
+      if (ST->isTruncatingStore()) {
+        ReplStore = DAG.getTruncStore(BetterChain, Value, Ptr,
+          ST->getSrcValue(), ST->getSrcValueOffset(), ST->getStoredVT(),
+          ST->isVolatile(), ST->getAlignment());
+      } else {
+        ReplStore = DAG.getStore(BetterChain, Value, Ptr,
+          ST->getSrcValue(), ST->getSrcValueOffset(),
+          ST->isVolatile(), ST->getAlignment());
+      }
+      
+      // Create token to keep both nodes around.
+      SDOperand Token =
+        DAG.getNode(ISD::TokenFactor, MVT::Other, Chain, ReplStore);
+        
+      // Don't add users to work list.
+      return CombineTo(N, Token, false);
+    }
+  }
+  
+  // Try transforming N to an indexed store.
+  if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
+    return SDOperand(N, 0);
+
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
+  SDOperand InVec = N->getOperand(0);
+  SDOperand InVal = N->getOperand(1);
+  SDOperand EltNo = N->getOperand(2);
+  
+  // If the invec is a BUILD_VECTOR and if EltNo is a constant, build a new
+  // vector with the inserted element.
+  if (InVec.getOpcode() == ISD::BUILD_VECTOR && isa<ConstantSDNode>(EltNo)) {
+    unsigned Elt = cast<ConstantSDNode>(EltNo)->getValue();
+    SmallVector<SDOperand, 8> Ops(InVec.Val->op_begin(), InVec.Val->op_end());
+    if (Elt < Ops.size())
+      Ops[Elt] = InVal;
+    return DAG.getNode(ISD::BUILD_VECTOR, InVec.getValueType(),
+                       &Ops[0], Ops.size());
+  }
+  
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
+  unsigned NumInScalars = N->getNumOperands();
+  MVT::ValueType VT = N->getValueType(0);
+  unsigned NumElts = MVT::getVectorNumElements(VT);
+  MVT::ValueType EltType = MVT::getVectorElementType(VT);
+
+  // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
+  // operations.  If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
+  // at most two distinct vectors, turn this into a shuffle node.
+  SDOperand VecIn1, VecIn2;
+  for (unsigned i = 0; i != NumInScalars; ++i) {
+    // Ignore undef inputs.
+    if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+    
+    // If this input is something other than a EXTRACT_VECTOR_ELT with a
+    // constant index, bail out.
+    if (N->getOperand(i).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+        !isa<ConstantSDNode>(N->getOperand(i).getOperand(1))) {
+      VecIn1 = VecIn2 = SDOperand(0, 0);
+      break;
+    }
+    
+    // If the input vector type disagrees with the result of the build_vector,
+    // we can't make a shuffle.
+    SDOperand ExtractedFromVec = N->getOperand(i).getOperand(0);
+    if (ExtractedFromVec.getValueType() != VT) {
+      VecIn1 = VecIn2 = SDOperand(0, 0);
+      break;
+    }
+    
+    // Otherwise, remember this.  We allow up to two distinct input vectors.
+    if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2)
+      continue;
+    
+    if (VecIn1.Val == 0) {
+      VecIn1 = ExtractedFromVec;
+    } else if (VecIn2.Val == 0) {
+      VecIn2 = ExtractedFromVec;
+    } else {
+      // Too many inputs.
+      VecIn1 = VecIn2 = SDOperand(0, 0);
+      break;
+    }
+  }
+  
+  // If everything is good, we can make a shuffle operation.
+  if (VecIn1.Val) {
+    SmallVector<SDOperand, 8> BuildVecIndices;
+    for (unsigned i = 0; i != NumInScalars; ++i) {
+      if (N->getOperand(i).getOpcode() == ISD::UNDEF) {
+        BuildVecIndices.push_back(DAG.getNode(ISD::UNDEF, TLI.getPointerTy()));
+        continue;
+      }
+      
+      SDOperand Extract = N->getOperand(i);
+      
+      // If extracting from the first vector, just use the index directly.
+      if (Extract.getOperand(0) == VecIn1) {
+        BuildVecIndices.push_back(Extract.getOperand(1));
+        continue;
+      }
+
+      // Otherwise, use InIdx + VecSize
+      unsigned Idx = cast<ConstantSDNode>(Extract.getOperand(1))->getValue();
+      BuildVecIndices.push_back(DAG.getConstant(Idx+NumInScalars,
+                                                TLI.getPointerTy()));
+    }
+    
+    // Add count and size info.
+    MVT::ValueType BuildVecVT =
+      MVT::getVectorType(TLI.getPointerTy(), NumElts);
+    
+    // Return the new VECTOR_SHUFFLE node.
+    SDOperand Ops[5];
+    Ops[0] = VecIn1;
+    if (VecIn2.Val) {
+      Ops[1] = VecIn2;
+    } else {
+      // Use an undef build_vector as input for the second operand.
+      std::vector<SDOperand> UnOps(NumInScalars,
+                                   DAG.getNode(ISD::UNDEF, 
+                                               EltType));
+      Ops[1] = DAG.getNode(ISD::BUILD_VECTOR, VT,
+                           &UnOps[0], UnOps.size());
+      AddToWorkList(Ops[1].Val);
+    }
+    Ops[2] = DAG.getNode(ISD::BUILD_VECTOR, BuildVecVT,
+                         &BuildVecIndices[0], BuildVecIndices.size());
+    return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Ops, 3);
+  }
+  
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
+  // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of
+  // EXTRACT_SUBVECTOR operations.  If so, and if the EXTRACT_SUBVECTOR vector
+  // inputs come from at most two distinct vectors, turn this into a shuffle
+  // node.
+
+  // If we only have one input vector, we don't need to do any concatenation.
+  if (N->getNumOperands() == 1) {
+    return N->getOperand(0);
+  }
+
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
+  SDOperand ShufMask = N->getOperand(2);
+  unsigned NumElts = ShufMask.getNumOperands();
+
+  // If the shuffle mask is an identity operation on the LHS, return the LHS.
+  bool isIdentity = true;
+  for (unsigned i = 0; i != NumElts; ++i) {
+    if (ShufMask.getOperand(i).getOpcode() != ISD::UNDEF &&
+        cast<ConstantSDNode>(ShufMask.getOperand(i))->getValue() != i) {
+      isIdentity = false;
+      break;
+    }
+  }
+  if (isIdentity) return N->getOperand(0);
+
+  // If the shuffle mask is an identity operation on the RHS, return the RHS.
+  isIdentity = true;
+  for (unsigned i = 0; i != NumElts; ++i) {
+    if (ShufMask.getOperand(i).getOpcode() != ISD::UNDEF &&
+        cast<ConstantSDNode>(ShufMask.getOperand(i))->getValue() != i+NumElts) {
+      isIdentity = false;
+      break;
+    }
+  }
+  if (isIdentity) return N->getOperand(1);
+
+  // Check if the shuffle is a unary shuffle, i.e. one of the vectors is not
+  // needed at all.
+  bool isUnary = true;
+  bool isSplat = true;
+  int VecNum = -1;
+  unsigned BaseIdx = 0;
+  for (unsigned i = 0; i != NumElts; ++i)
+    if (ShufMask.getOperand(i).getOpcode() != ISD::UNDEF) {
+      unsigned Idx = cast<ConstantSDNode>(ShufMask.getOperand(i))->getValue();
+      int V = (Idx < NumElts) ? 0 : 1;
+      if (VecNum == -1) {
+        VecNum = V;
+        BaseIdx = Idx;
+      } else {
+        if (BaseIdx != Idx)
+          isSplat = false;
+        if (VecNum != V) {
+          isUnary = false;
+          break;
+        }
+      }
+    }
+
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  // Normalize unary shuffle so the RHS is undef.
+  if (isUnary && VecNum == 1)
+    std::swap(N0, N1);
+
+  // If it is a splat, check if the argument vector is a build_vector with
+  // all scalar elements the same.
+  if (isSplat) {
+    SDNode *V = N0.Val;
+
+    // If this is a bit convert that changes the element type of the vector but
+    // not the number of vector elements, look through it.  Be careful not to
+    // look though conversions that change things like v4f32 to v2f64.
+    if (V->getOpcode() == ISD::BIT_CONVERT) {
+      SDOperand ConvInput = V->getOperand(0);
+      if (MVT::getVectorNumElements(ConvInput.getValueType()) == NumElts)
+        V = ConvInput.Val;
+    }
+
+    if (V->getOpcode() == ISD::BUILD_VECTOR) {
+      unsigned NumElems = V->getNumOperands();
+      if (NumElems > BaseIdx) {
+        SDOperand Base;
+        bool AllSame = true;
+        for (unsigned i = 0; i != NumElems; ++i) {
+          if (V->getOperand(i).getOpcode() != ISD::UNDEF) {
+            Base = V->getOperand(i);
+            break;
+          }
+        }
+        // Splat of <u, u, u, u>, return <u, u, u, u>
+        if (!Base.Val)
+          return N0;
+        for (unsigned i = 0; i != NumElems; ++i) {
+          if (V->getOperand(i).getOpcode() != ISD::UNDEF &&
+              V->getOperand(i) != Base) {
+            AllSame = false;
+            break;
+          }
+        }
+        // Splat of <x, x, x, x>, return <x, x, x, x>
+        if (AllSame)
+          return N0;
+      }
+    }
+  }
+
+  // If it is a unary or the LHS and the RHS are the same node, turn the RHS
+  // into an undef.
+  if (isUnary || N0 == N1) {
+    // Check the SHUFFLE mask, mapping any inputs from the 2nd operand into the
+    // first operand.
+    SmallVector<SDOperand, 8> MappedOps;
+    for (unsigned i = 0; i != NumElts; ++i) {
+      if (ShufMask.getOperand(i).getOpcode() == ISD::UNDEF ||
+          cast<ConstantSDNode>(ShufMask.getOperand(i))->getValue() < NumElts) {
+        MappedOps.push_back(ShufMask.getOperand(i));
+      } else {
+        unsigned NewIdx = 
+          cast<ConstantSDNode>(ShufMask.getOperand(i))->getValue() - NumElts;
+        MappedOps.push_back(DAG.getConstant(NewIdx, MVT::i32));
+      }
+    }
+    ShufMask = DAG.getNode(ISD::BUILD_VECTOR, ShufMask.getValueType(),
+                           &MappedOps[0], MappedOps.size());
+    AddToWorkList(ShufMask.Val);
+    return DAG.getNode(ISD::VECTOR_SHUFFLE, N->getValueType(0),
+                       N0,
+                       DAG.getNode(ISD::UNDEF, N->getValueType(0)),
+                       ShufMask);
+  }
+ 
+  return SDOperand();
+}
+
+/// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform
+/// an AND to a vector_shuffle with the destination vector and a zero vector.
+/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
+///      vector_shuffle V, Zero, <0, 4, 2, 4>
+SDOperand DAGCombiner::XformToShuffleWithZero(SDNode *N) {
+  SDOperand LHS = N->getOperand(0);
+  SDOperand RHS = N->getOperand(1);
+  if (N->getOpcode() == ISD::AND) {
+    if (RHS.getOpcode() == ISD::BIT_CONVERT)
+      RHS = RHS.getOperand(0);
+    if (RHS.getOpcode() == ISD::BUILD_VECTOR) {
+      std::vector<SDOperand> IdxOps;
+      unsigned NumOps = RHS.getNumOperands();
+      unsigned NumElts = NumOps;
+      MVT::ValueType EVT = MVT::getVectorElementType(RHS.getValueType());
+      for (unsigned i = 0; i != NumElts; ++i) {
+        SDOperand Elt = RHS.getOperand(i);
+        if (!isa<ConstantSDNode>(Elt))
+          return SDOperand();
+        else if (cast<ConstantSDNode>(Elt)->isAllOnesValue())
+          IdxOps.push_back(DAG.getConstant(i, EVT));
+        else if (cast<ConstantSDNode>(Elt)->isNullValue())
+          IdxOps.push_back(DAG.getConstant(NumElts, EVT));
+        else
+          return SDOperand();
+      }
+
+      // Let's see if the target supports this vector_shuffle.
+      if (!TLI.isVectorClearMaskLegal(IdxOps, EVT, DAG))
+        return SDOperand();
+
+      // Return the new VECTOR_SHUFFLE node.
+      MVT::ValueType VT = MVT::getVectorType(EVT, NumElts);
+      std::vector<SDOperand> Ops;
+      LHS = DAG.getNode(ISD::BIT_CONVERT, VT, LHS);
+      Ops.push_back(LHS);
+      AddToWorkList(LHS.Val);
+      std::vector<SDOperand> ZeroOps(NumElts, DAG.getConstant(0, EVT));
+      Ops.push_back(DAG.getNode(ISD::BUILD_VECTOR, VT,
+                                &ZeroOps[0], ZeroOps.size()));
+      Ops.push_back(DAG.getNode(ISD::BUILD_VECTOR, VT,
+                                &IdxOps[0], IdxOps.size()));
+      SDOperand Result = DAG.getNode(ISD::VECTOR_SHUFFLE, VT,
+                                     &Ops[0], Ops.size());
+      if (VT != LHS.getValueType()) {
+        Result = DAG.getNode(ISD::BIT_CONVERT, LHS.getValueType(), Result);
+      }
+      return Result;
+    }
+  }
+  return SDOperand();
+}
+
+/// SimplifyVBinOp - Visit a binary vector operation, like ADD.
+SDOperand DAGCombiner::SimplifyVBinOp(SDNode *N) {
+  // After legalize, the target may be depending on adds and other
+  // binary ops to provide legal ways to construct constants or other
+  // things. Simplifying them may result in a loss of legality.
+  if (AfterLegalize) return SDOperand();
+
+  MVT::ValueType VT = N->getValueType(0);
+  assert(MVT::isVector(VT) && "SimplifyVBinOp only works on vectors!");
+
+  MVT::ValueType EltType = MVT::getVectorElementType(VT);
+  SDOperand LHS = N->getOperand(0);
+  SDOperand RHS = N->getOperand(1);
+  SDOperand Shuffle = XformToShuffleWithZero(N);
+  if (Shuffle.Val) return Shuffle;
+
+  // If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold
+  // this operation.
+  if (LHS.getOpcode() == ISD::BUILD_VECTOR && 
+      RHS.getOpcode() == ISD::BUILD_VECTOR) {
+    SmallVector<SDOperand, 8> Ops;
+    for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) {
+      SDOperand LHSOp = LHS.getOperand(i);
+      SDOperand RHSOp = RHS.getOperand(i);
+      // If these two elements can't be folded, bail out.
+      if ((LHSOp.getOpcode() != ISD::UNDEF &&
+           LHSOp.getOpcode() != ISD::Constant &&
+           LHSOp.getOpcode() != ISD::ConstantFP) ||
+          (RHSOp.getOpcode() != ISD::UNDEF &&
+           RHSOp.getOpcode() != ISD::Constant &&
+           RHSOp.getOpcode() != ISD::ConstantFP))
+        break;
+      // Can't fold divide by zero.
+      if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV ||
+          N->getOpcode() == ISD::FDIV) {
+        if ((RHSOp.getOpcode() == ISD::Constant &&
+             cast<ConstantSDNode>(RHSOp.Val)->isNullValue()) ||
+            (RHSOp.getOpcode() == ISD::ConstantFP &&
+             !cast<ConstantFPSDNode>(RHSOp.Val)->getValue()))
+          break;
+      }
+      Ops.push_back(DAG.getNode(N->getOpcode(), EltType, LHSOp, RHSOp));
+      AddToWorkList(Ops.back().Val);
+      assert((Ops.back().getOpcode() == ISD::UNDEF ||
+              Ops.back().getOpcode() == ISD::Constant ||
+              Ops.back().getOpcode() == ISD::ConstantFP) &&
+             "Scalar binop didn't fold!");
+    }
+    
+    if (Ops.size() == LHS.getNumOperands()) {
+      MVT::ValueType VT = LHS.getValueType();
+      return DAG.getNode(ISD::BUILD_VECTOR, VT, &Ops[0], Ops.size());
+    }
+  }
+  
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::SimplifySelect(SDOperand N0, SDOperand N1, SDOperand N2){
+  assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
+  
+  SDOperand SCC = SimplifySelectCC(N0.getOperand(0), N0.getOperand(1), N1, N2,
+                                 cast<CondCodeSDNode>(N0.getOperand(2))->get());
+  // If we got a simplified select_cc node back from SimplifySelectCC, then
+  // break it down into a new SETCC node, and a new SELECT node, and then return
+  // the SELECT node, since we were called with a SELECT node.
+  if (SCC.Val) {
+    // Check to see if we got a select_cc back (to turn into setcc/select).
+    // Otherwise, just return whatever node we got back, like fabs.
+    if (SCC.getOpcode() == ISD::SELECT_CC) {
+      SDOperand SETCC = DAG.getNode(ISD::SETCC, N0.getValueType(),
+                                    SCC.getOperand(0), SCC.getOperand(1), 
+                                    SCC.getOperand(4));
+      AddToWorkList(SETCC.Val);
+      return DAG.getNode(ISD::SELECT, SCC.getValueType(), SCC.getOperand(2),
+                         SCC.getOperand(3), SETCC);
+    }
+    return SCC;
+  }
+  return SDOperand();
+}
+
+/// SimplifySelectOps - Given a SELECT or a SELECT_CC node, where LHS and RHS
+/// are the two values being selected between, see if we can simplify the
+/// select.  Callers of this should assume that TheSelect is deleted if this
+/// returns true.  As such, they should return the appropriate thing (e.g. the
+/// node) back to the top-level of the DAG combiner loop to avoid it being
+/// looked at.
+///
+bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDOperand LHS, 
+                                    SDOperand RHS) {
+  
+  // If this is a select from two identical things, try to pull the operation
+  // through the select.
+  if (LHS.getOpcode() == RHS.getOpcode() && LHS.hasOneUse() && RHS.hasOneUse()){
+    // If this is a load and the token chain is identical, replace the select
+    // of two loads with a load through a select of the address to load from.
+    // This triggers in things like "select bool X, 10.0, 123.0" after the FP
+    // constants have been dropped into the constant pool.
+    if (LHS.getOpcode() == ISD::LOAD &&
+        // Token chains must be identical.
+        LHS.getOperand(0) == RHS.getOperand(0)) {
+      LoadSDNode *LLD = cast<LoadSDNode>(LHS);
+      LoadSDNode *RLD = cast<LoadSDNode>(RHS);
+
+      // If this is an EXTLOAD, the VT's must match.
+      if (LLD->getLoadedVT() == RLD->getLoadedVT()) {
+        // FIXME: this conflates two src values, discarding one.  This is not
+        // the right thing to do, but nothing uses srcvalues now.  When they do,
+        // turn SrcValue into a list of locations.
+        SDOperand Addr;
+        if (TheSelect->getOpcode() == ISD::SELECT) {
+          // Check that the condition doesn't reach either load.  If so, folding
+          // this will induce a cycle into the DAG.
+          if (!LLD->isPredecessor(TheSelect->getOperand(0).Val) &&
+              !RLD->isPredecessor(TheSelect->getOperand(0).Val)) {
+            Addr = DAG.getNode(ISD::SELECT, LLD->getBasePtr().getValueType(),
+                               TheSelect->getOperand(0), LLD->getBasePtr(),
+                               RLD->getBasePtr());
+          }
+        } else {
+          // Check that the condition doesn't reach either load.  If so, folding
+          // this will induce a cycle into the DAG.
+          if (!LLD->isPredecessor(TheSelect->getOperand(0).Val) &&
+              !RLD->isPredecessor(TheSelect->getOperand(0).Val) &&
+              !LLD->isPredecessor(TheSelect->getOperand(1).Val) &&
+              !RLD->isPredecessor(TheSelect->getOperand(1).Val)) {
+            Addr = DAG.getNode(ISD::SELECT_CC, LLD->getBasePtr().getValueType(),
+                             TheSelect->getOperand(0),
+                             TheSelect->getOperand(1), 
+                             LLD->getBasePtr(), RLD->getBasePtr(),
+                             TheSelect->getOperand(4));
+          }
+        }
+        
+        if (Addr.Val) {
+          SDOperand Load;
+          if (LLD->getExtensionType() == ISD::NON_EXTLOAD)
+            Load = DAG.getLoad(TheSelect->getValueType(0), LLD->getChain(),
+                               Addr,LLD->getSrcValue(), 
+                               LLD->getSrcValueOffset(),
+                               LLD->isVolatile(), 
+                               LLD->getAlignment());
+          else {
+            Load = DAG.getExtLoad(LLD->getExtensionType(),
+                                  TheSelect->getValueType(0),
+                                  LLD->getChain(), Addr, LLD->getSrcValue(),
+                                  LLD->getSrcValueOffset(),
+                                  LLD->getLoadedVT(),
+                                  LLD->isVolatile(), 
+                                  LLD->getAlignment());
+          }
+          // Users of the select now use the result of the load.
+          CombineTo(TheSelect, Load);
+        
+          // Users of the old loads now use the new load's chain.  We know the
+          // old-load value is dead now.
+          CombineTo(LHS.Val, Load.getValue(0), Load.getValue(1));
+          CombineTo(RHS.Val, Load.getValue(0), Load.getValue(1));
+          return true;
+        }
+      }
+    }
+  }
+  
+  return false;
+}
+
+SDOperand DAGCombiner::SimplifySelectCC(SDOperand N0, SDOperand N1, 
+                                        SDOperand N2, SDOperand N3,
+                                        ISD::CondCode CC, bool NotExtCompare) {
+  
+  MVT::ValueType VT = N2.getValueType();
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.Val);
+  ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.Val);
+  ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3.Val);
+
+  // Determine if the condition we're dealing with is constant
+  SDOperand SCC = SimplifySetCC(TLI.getSetCCResultTy(), N0, N1, CC, false);
+  if (SCC.Val) AddToWorkList(SCC.Val);
+  ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.Val);
+
+  // fold select_cc true, x, y -> x
+  if (SCCC && SCCC->getValue())
+    return N2;
+  // fold select_cc false, x, y -> y
+  if (SCCC && SCCC->getValue() == 0)
+    return N3;
+  
+  // Check to see if we can simplify the select into an fabs node
+  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) {
+    // Allow either -0.0 or 0.0
+    if (CFP->getValue() == 0.0) {
+      // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs
+      if ((CC == ISD::SETGE || CC == ISD::SETGT) &&
+          N0 == N2 && N3.getOpcode() == ISD::FNEG &&
+          N2 == N3.getOperand(0))
+        return DAG.getNode(ISD::FABS, VT, N0);
+      
+      // select (setl[te] X, +/-0.0), fneg(X), X -> fabs
+      if ((CC == ISD::SETLT || CC == ISD::SETLE) &&
+          N0 == N3 && N2.getOpcode() == ISD::FNEG &&
+          N2.getOperand(0) == N3)
+        return DAG.getNode(ISD::FABS, VT, N3);
+    }
+  }
+  
+  // Check to see if we can perform the "gzip trick", transforming
+  // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
+  if (N1C && N3C && N3C->isNullValue() && CC == ISD::SETLT &&
+      MVT::isInteger(N0.getValueType()) && 
+      MVT::isInteger(N2.getValueType()) && 
+      (N1C->isNullValue() ||                    // (a < 0) ? b : 0
+       (N1C->getValue() == 1 && N0 == N2))) {   // (a < 1) ? a : 0
+    MVT::ValueType XType = N0.getValueType();
+    MVT::ValueType AType = N2.getValueType();
+    if (XType >= AType) {
+      // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a
+      // single-bit constant.
+      if (N2C && ((N2C->getValue() & (N2C->getValue()-1)) == 0)) {
+        unsigned ShCtV = Log2_64(N2C->getValue());
+        ShCtV = MVT::getSizeInBits(XType)-ShCtV-1;
+        SDOperand ShCt = DAG.getConstant(ShCtV, TLI.getShiftAmountTy());
+        SDOperand Shift = DAG.getNode(ISD::SRL, XType, N0, ShCt);
+        AddToWorkList(Shift.Val);
+        if (XType > AType) {
+          Shift = DAG.getNode(ISD::TRUNCATE, AType, Shift);
+          AddToWorkList(Shift.Val);
+        }
+        return DAG.getNode(ISD::AND, AType, Shift, N2);
+      }
+      SDOperand Shift = DAG.getNode(ISD::SRA, XType, N0,
+                                    DAG.getConstant(MVT::getSizeInBits(XType)-1,
+                                                    TLI.getShiftAmountTy()));
+      AddToWorkList(Shift.Val);
+      if (XType > AType) {
+        Shift = DAG.getNode(ISD::TRUNCATE, AType, Shift);
+        AddToWorkList(Shift.Val);
+      }
+      return DAG.getNode(ISD::AND, AType, Shift, N2);
+    }
+  }
+  
+  // fold select C, 16, 0 -> shl C, 4
+  if (N2C && N3C && N3C->isNullValue() && isPowerOf2_64(N2C->getValue()) &&
+      TLI.getSetCCResultContents() == TargetLowering::ZeroOrOneSetCCResult) {
+    
+    // If the caller doesn't want us to simplify this into a zext of a compare,
+    // don't do it.
+    if (NotExtCompare && N2C->getValue() == 1)
+      return SDOperand();
+    
+    // Get a SetCC of the condition
+    // FIXME: Should probably make sure that setcc is legal if we ever have a
+    // target where it isn't.
+    SDOperand Temp, SCC;
+    // cast from setcc result type to select result type
+    if (AfterLegalize) {
+      SCC  = DAG.getSetCC(TLI.getSetCCResultTy(), N0, N1, CC);
+      if (N2.getValueType() < SCC.getValueType())
+        Temp = DAG.getZeroExtendInReg(SCC, N2.getValueType());
+      else
+        Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getValueType(), SCC);
+    } else {
+      SCC  = DAG.getSetCC(MVT::i1, N0, N1, CC);
+      Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getValueType(), SCC);
+    }
+    AddToWorkList(SCC.Val);
+    AddToWorkList(Temp.Val);
+    
+    if (N2C->getValue() == 1)
+      return Temp;
+    // shl setcc result by log2 n2c
+    return DAG.getNode(ISD::SHL, N2.getValueType(), Temp,
+                       DAG.getConstant(Log2_64(N2C->getValue()),
+                                       TLI.getShiftAmountTy()));
+  }
+    
+  // Check to see if this is the equivalent of setcc
+  // FIXME: Turn all of these into setcc if setcc if setcc is legal
+  // otherwise, go ahead with the folds.
+  if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getValue() == 1ULL)) {
+    MVT::ValueType XType = N0.getValueType();
+    if (TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultTy())) {
+      SDOperand Res = DAG.getSetCC(TLI.getSetCCResultTy(), N0, N1, CC);
+      if (Res.getValueType() != VT)
+        Res = DAG.getNode(ISD::ZERO_EXTEND, VT, Res);
+      return Res;
+    }
+    
+    // seteq X, 0 -> srl (ctlz X, log2(size(X)))
+    if (N1C && N1C->isNullValue() && CC == ISD::SETEQ && 
+        TLI.isOperationLegal(ISD::CTLZ, XType)) {
+      SDOperand Ctlz = DAG.getNode(ISD::CTLZ, XType, N0);
+      return DAG.getNode(ISD::SRL, XType, Ctlz, 
+                         DAG.getConstant(Log2_32(MVT::getSizeInBits(XType)),
+                                         TLI.getShiftAmountTy()));
+    }
+    // setgt X, 0 -> srl (and (-X, ~X), size(X)-1)
+    if (N1C && N1C->isNullValue() && CC == ISD::SETGT) { 
+      SDOperand NegN0 = DAG.getNode(ISD::SUB, XType, DAG.getConstant(0, XType),
+                                    N0);
+      SDOperand NotN0 = DAG.getNode(ISD::XOR, XType, N0, 
+                                    DAG.getConstant(~0ULL, XType));
+      return DAG.getNode(ISD::SRL, XType, 
+                         DAG.getNode(ISD::AND, XType, NegN0, NotN0),
+                         DAG.getConstant(MVT::getSizeInBits(XType)-1,
+                                         TLI.getShiftAmountTy()));
+    }
+    // setgt X, -1 -> xor (srl (X, size(X)-1), 1)
+    if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT) {
+      SDOperand Sign = DAG.getNode(ISD::SRL, XType, N0,
+                                   DAG.getConstant(MVT::getSizeInBits(XType)-1,
+                                                   TLI.getShiftAmountTy()));
+      return DAG.getNode(ISD::XOR, XType, Sign, DAG.getConstant(1, XType));
+    }
+  }
+  
+  // Check to see if this is an integer abs. select_cc setl[te] X, 0, -X, X ->
+  // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
+  if (N1C && N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE) &&
+      N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1) &&
+      N2.getOperand(0) == N1 && MVT::isInteger(N0.getValueType())) {
+    MVT::ValueType XType = N0.getValueType();
+    SDOperand Shift = DAG.getNode(ISD::SRA, XType, N0,
+                                  DAG.getConstant(MVT::getSizeInBits(XType)-1,
+                                                  TLI.getShiftAmountTy()));
+    SDOperand Add = DAG.getNode(ISD::ADD, XType, N0, Shift);
+    AddToWorkList(Shift.Val);
+    AddToWorkList(Add.Val);
+    return DAG.getNode(ISD::XOR, XType, Add, Shift);
+  }
+  // Check to see if this is an integer abs. select_cc setgt X, -1, X, -X ->
+  // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
+  if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT &&
+      N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) {
+    if (ConstantSDNode *SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0))) {
+      MVT::ValueType XType = N0.getValueType();
+      if (SubC->isNullValue() && MVT::isInteger(XType)) {
+        SDOperand Shift = DAG.getNode(ISD::SRA, XType, N0,
+                                    DAG.getConstant(MVT::getSizeInBits(XType)-1,
+                                                      TLI.getShiftAmountTy()));
+        SDOperand Add = DAG.getNode(ISD::ADD, XType, N0, Shift);
+        AddToWorkList(Shift.Val);
+        AddToWorkList(Add.Val);
+        return DAG.getNode(ISD::XOR, XType, Add, Shift);
+      }
+    }
+  }
+  
+  return SDOperand();
+}
+
+/// SimplifySetCC - This is a stub for TargetLowering::SimplifySetCC.
+SDOperand DAGCombiner::SimplifySetCC(MVT::ValueType VT, SDOperand N0,
+                                     SDOperand N1, ISD::CondCode Cond,
+                                     bool foldBooleans) {
+  TargetLowering::DAGCombinerInfo 
+    DagCombineInfo(DAG, !AfterLegalize, false, this);
+  return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo);
+}
+
+/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number.  See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDOperand DAGCombiner::BuildSDIV(SDNode *N) {
+  std::vector<SDNode*> Built;
+  SDOperand S = TLI.BuildSDIV(N, DAG, &Built);
+
+  for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();
+       ii != ee; ++ii)
+    AddToWorkList(*ii);
+  return S;
+}
+
+/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number.  See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDOperand DAGCombiner::BuildUDIV(SDNode *N) {
+  std::vector<SDNode*> Built;
+  SDOperand S = TLI.BuildUDIV(N, DAG, &Built);
+
+  for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();
+       ii != ee; ++ii)
+    AddToWorkList(*ii);
+  return S;
+}
+
+/// FindBaseOffset - Return true if base is known not to alias with anything
+/// but itself.  Provides base object and offset as results.
+static bool FindBaseOffset(SDOperand Ptr, SDOperand &Base, int64_t &Offset) {
+  // Assume it is a primitive operation.
+  Base = Ptr; Offset = 0;
+  
+  // If it's an adding a simple constant then integrate the offset.
+  if (Base.getOpcode() == ISD::ADD) {
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) {
+      Base = Base.getOperand(0);
+      Offset += C->getValue();
+    }
+  }
+  
+  // If it's any of the following then it can't alias with anything but itself.
+  return isa<FrameIndexSDNode>(Base) ||
+         isa<ConstantPoolSDNode>(Base) ||
+         isa<GlobalAddressSDNode>(Base);
+}
+
+/// isAlias - Return true if there is any possibility that the two addresses
+/// overlap.
+bool DAGCombiner::isAlias(SDOperand Ptr1, int64_t Size1,
+                          const Value *SrcValue1, int SrcValueOffset1,
+                          SDOperand Ptr2, int64_t Size2,
+                          const Value *SrcValue2, int SrcValueOffset2)
+{
+  // If they are the same then they must be aliases.
+  if (Ptr1 == Ptr2) return true;
+  
+  // Gather base node and offset information.
+  SDOperand Base1, Base2;
+  int64_t Offset1, Offset2;
+  bool KnownBase1 = FindBaseOffset(Ptr1, Base1, Offset1);
+  bool KnownBase2 = FindBaseOffset(Ptr2, Base2, Offset2);
+  
+  // If they have a same base address then...
+  if (Base1 == Base2) {
+    // Check to see if the addresses overlap.
+    return!((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1);
+  }
+  
+  // If we know both bases then they can't alias.
+  if (KnownBase1 && KnownBase2) return false;
+
+  if (CombinerGlobalAA) {
+    // Use alias analysis information.
+    int Overlap1 = Size1 + SrcValueOffset1 + Offset1;
+    int Overlap2 = Size2 + SrcValueOffset2 + Offset2;
+    AliasAnalysis::AliasResult AAResult = 
+                             AA.alias(SrcValue1, Overlap1, SrcValue2, Overlap2);
+    if (AAResult == AliasAnalysis::NoAlias)
+      return false;
+  }
+
+  // Otherwise we have to assume they alias.
+  return true;
+}
+
+/// FindAliasInfo - Extracts the relevant alias information from the memory
+/// node.  Returns true if the operand was a load.
+bool DAGCombiner::FindAliasInfo(SDNode *N,
+                        SDOperand &Ptr, int64_t &Size,
+                        const Value *&SrcValue, int &SrcValueOffset) {
+  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+    Ptr = LD->getBasePtr();
+    Size = MVT::getSizeInBits(LD->getLoadedVT()) >> 3;
+    SrcValue = LD->getSrcValue();
+    SrcValueOffset = LD->getSrcValueOffset();
+    return true;
+  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+    Ptr = ST->getBasePtr();
+    Size = MVT::getSizeInBits(ST->getStoredVT()) >> 3;
+    SrcValue = ST->getSrcValue();
+    SrcValueOffset = ST->getSrcValueOffset();
+  } else {
+    assert(0 && "FindAliasInfo expected a memory operand");
+  }
+  
+  return false;
+}
+
+/// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
+/// looking for aliasing nodes and adding them to the Aliases vector.
+void DAGCombiner::GatherAllAliases(SDNode *N, SDOperand OriginalChain,
+                                   SmallVector<SDOperand, 8> &Aliases) {
+  SmallVector<SDOperand, 8> Chains;     // List of chains to visit.
+  std::set<SDNode *> Visited;           // Visited node set.
+  
+  // Get alias information for node.
+  SDOperand Ptr;
+  int64_t Size;
+  const Value *SrcValue;
+  int SrcValueOffset;
+  bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset);
+
+  // Starting off.
+  Chains.push_back(OriginalChain);
+  
+  // Look at each chain and determine if it is an alias.  If so, add it to the
+  // aliases list.  If not, then continue up the chain looking for the next
+  // candidate.  
+  while (!Chains.empty()) {
+    SDOperand Chain = Chains.back();
+    Chains.pop_back();
+    
+     // Don't bother if we've been before.
+    if (Visited.find(Chain.Val) != Visited.end()) continue;
+    Visited.insert(Chain.Val);
+  
+    switch (Chain.getOpcode()) {
+    case ISD::EntryToken:
+      // Entry token is ideal chain operand, but handled in FindBetterChain.
+      break;
+      
+    case ISD::LOAD:
+    case ISD::STORE: {
+      // Get alias information for Chain.
+      SDOperand OpPtr;
+      int64_t OpSize;
+      const Value *OpSrcValue;
+      int OpSrcValueOffset;
+      bool IsOpLoad = FindAliasInfo(Chain.Val, OpPtr, OpSize,
+                                    OpSrcValue, OpSrcValueOffset);
+      
+      // If chain is alias then stop here.
+      if (!(IsLoad && IsOpLoad) &&
+          isAlias(Ptr, Size, SrcValue, SrcValueOffset,
+                  OpPtr, OpSize, OpSrcValue, OpSrcValueOffset)) {
+        Aliases.push_back(Chain);
+      } else {
+        // Look further up the chain.
+        Chains.push_back(Chain.getOperand(0));      
+        // Clean up old chain.
+        AddToWorkList(Chain.Val);
+      }
+      break;
+    }
+    
+    case ISD::TokenFactor:
+      // We have to check each of the operands of the token factor, so we queue
+      // then up.  Adding the  operands to the queue (stack) in reverse order
+      // maintains the original order and increases the likelihood that getNode
+      // will find a matching token factor (CSE.)
+      for (unsigned n = Chain.getNumOperands(); n;)
+        Chains.push_back(Chain.getOperand(--n));
+      // Eliminate the token factor if we can.
+      AddToWorkList(Chain.Val);
+      break;
+      
+    default:
+      // For all other instructions we will just have to take what we can get.
+      Aliases.push_back(Chain);
+      break;
+    }
+  }
+}
+
+/// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, looking
+/// for a better chain (aliasing node.)
+SDOperand DAGCombiner::FindBetterChain(SDNode *N, SDOperand OldChain) {
+  SmallVector<SDOperand, 8> Aliases;  // Ops for replacing token factor.
+  
+  // Accumulate all the aliases to this node.
+  GatherAllAliases(N, OldChain, Aliases);
+  
+  if (Aliases.size() == 0) {
+    // If no operands then chain to entry token.
+    return DAG.getEntryNode();
+  } else if (Aliases.size() == 1) {
+    // If a single operand then chain to it.  We don't need to revisit it.
+    return Aliases[0];
+  }
+
+  // Construct a custom tailored token factor.
+  SDOperand NewChain = DAG.getNode(ISD::TokenFactor, MVT::Other,
+                                   &Aliases[0], Aliases.size());
+
+  // Make sure the old chain gets cleaned up.
+  if (NewChain != OldChain) AddToWorkList(OldChain.Val);
+  
+  return NewChain;
+}
+
+// SelectionDAG::Combine - This is the entry point for the file.
+//
+void SelectionDAG::Combine(bool RunningAfterLegalize, AliasAnalysis &AA) {
+  if (!RunningAfterLegalize && ViewDAGCombine1)
+    viewGraph();
+  if (RunningAfterLegalize && ViewDAGCombine2)
+    viewGraph();
+  /// run - This is the main entry point to this class.
+  ///
+  DAGCombiner(*this, AA).Run(RunningAfterLegalize);
+}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
new file mode 100644
index 0000000..5bcee35
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -0,0 +1,5755 @@
+//===-- LegalizeDAG.cpp - Implement SelectionDAG::Legalize ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SelectionDAG::Legalize method.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include <map>
+using namespace llvm;
+
+#ifndef NDEBUG
+static cl::opt<bool>
+ViewLegalizeDAGs("view-legalize-dags", cl::Hidden,
+                 cl::desc("Pop up a window to show dags before legalize"));
+#else
+static const bool ViewLegalizeDAGs = 0;
+#endif
+
+//===----------------------------------------------------------------------===//
+/// SelectionDAGLegalize - This takes an arbitrary SelectionDAG as input and
+/// hacks on it until the target machine can handle it.  This involves
+/// eliminating value sizes the machine cannot handle (promoting small sizes to
+/// large sizes or splitting up large values into small values) as well as
+/// eliminating operations the machine cannot handle.
+///
+/// This code also does a small amount of optimization and recognition of idioms
+/// as part of its processing.  For example, if a target does not support a
+/// 'setcc' instruction efficiently, but does support 'brcc' instruction, this
+/// will attempt merge setcc and brc instructions into brcc's.
+///
+namespace {
+class VISIBILITY_HIDDEN SelectionDAGLegalize {
+  TargetLowering &TLI;
+  SelectionDAG &DAG;
+
+  // Libcall insertion helpers.
+  
+  /// LastCALLSEQ_END - This keeps track of the CALLSEQ_END node that has been
+  /// legalized.  We use this to ensure that calls are properly serialized
+  /// against each other, including inserted libcalls.
+  SDOperand LastCALLSEQ_END;
+  
+  /// IsLegalizingCall - This member is used *only* for purposes of providing
+  /// helpful assertions that a libcall isn't created while another call is 
+  /// being legalized (which could lead to non-serialized call sequences).
+  bool IsLegalizingCall;
+  
+  enum LegalizeAction {
+    Legal,      // The target natively supports this operation.
+    Promote,    // This operation should be executed in a larger type.
+    Expand      // Try to expand this to other ops, otherwise use a libcall.
+  };
+  
+  /// ValueTypeActions - This is a bitvector that contains two bits for each
+  /// value type, where the two bits correspond to the LegalizeAction enum.
+  /// This can be queried with "getTypeAction(VT)".
+  TargetLowering::ValueTypeActionImpl ValueTypeActions;
+
+  /// LegalizedNodes - For nodes that are of legal width, and that have more
+  /// than one use, this map indicates what regularized operand to use.  This
+  /// allows us to avoid legalizing the same thing more than once.
+  DenseMap<SDOperand, SDOperand> LegalizedNodes;
+
+  /// PromotedNodes - For nodes that are below legal width, and that have more
+  /// than one use, this map indicates what promoted value to use.  This allows
+  /// us to avoid promoting the same thing more than once.
+  DenseMap<SDOperand, SDOperand> PromotedNodes;
+
+  /// ExpandedNodes - For nodes that need to be expanded this map indicates
+  /// which which operands are the expanded version of the input.  This allows
+  /// us to avoid expanding the same node more than once.
+  DenseMap<SDOperand, std::pair<SDOperand, SDOperand> > ExpandedNodes;
+
+  /// SplitNodes - For vector nodes that need to be split, this map indicates
+  /// which which operands are the split version of the input.  This allows us
+  /// to avoid splitting the same node more than once.
+  std::map<SDOperand, std::pair<SDOperand, SDOperand> > SplitNodes;
+  
+  /// ScalarizedNodes - For nodes that need to be converted from vector types to
+  /// scalar types, this contains the mapping of ones we have already
+  /// processed to the result.
+  std::map<SDOperand, SDOperand> ScalarizedNodes;
+  
+  void AddLegalizedOperand(SDOperand From, SDOperand To) {
+    LegalizedNodes.insert(std::make_pair(From, To));
+    // If someone requests legalization of the new node, return itself.
+    if (From != To)
+      LegalizedNodes.insert(std::make_pair(To, To));
+  }
+  void AddPromotedOperand(SDOperand From, SDOperand To) {
+    bool isNew = PromotedNodes.insert(std::make_pair(From, To));
+    assert(isNew && "Got into the map somehow?");
+    // If someone requests legalization of the new node, return itself.
+    LegalizedNodes.insert(std::make_pair(To, To));
+  }
+
+public:
+
+  SelectionDAGLegalize(SelectionDAG &DAG);
+
+  /// getTypeAction - Return how we should legalize values of this type, either
+  /// it is already legal or we need to expand it into multiple registers of
+  /// smaller integer type, or we need to promote it to a larger type.
+  LegalizeAction getTypeAction(MVT::ValueType VT) const {
+    return (LegalizeAction)ValueTypeActions.getTypeAction(VT);
+  }
+
+  /// isTypeLegal - Return true if this type is legal on this target.
+  ///
+  bool isTypeLegal(MVT::ValueType VT) const {
+    return getTypeAction(VT) == Legal;
+  }
+
+  void LegalizeDAG();
+
+private:
+  /// HandleOp - Legalize, Promote, or Expand the specified operand as
+  /// appropriate for its type.
+  void HandleOp(SDOperand Op);
+    
+  /// LegalizeOp - We know that the specified value has a legal type.
+  /// Recursively ensure that the operands have legal types, then return the
+  /// result.
+  SDOperand LegalizeOp(SDOperand O);
+  
+  /// PromoteOp - Given an operation that produces a value in an invalid type,
+  /// promote it to compute the value into a larger type.  The produced value
+  /// will have the correct bits for the low portion of the register, but no
+  /// guarantee is made about the top bits: it may be zero, sign-extended, or
+  /// garbage.
+  SDOperand PromoteOp(SDOperand O);
+
+  /// ExpandOp - Expand the specified SDOperand into its two component pieces
+  /// Lo&Hi.  Note that the Op MUST be an expanded type.  As a result of this,
+  /// the LegalizeNodes map is filled in for any results that are not expanded,
+  /// the ExpandedNodes map is filled in for any results that are expanded, and
+  /// the Lo/Hi values are returned.   This applies to integer types and Vector
+  /// types.
+  void ExpandOp(SDOperand O, SDOperand &Lo, SDOperand &Hi);
+
+  /// SplitVectorOp - Given an operand of vector type, break it down into
+  /// two smaller values.
+  void SplitVectorOp(SDOperand O, SDOperand &Lo, SDOperand &Hi);
+  
+  /// ScalarizeVectorOp - Given an operand of single-element vector type
+  /// (e.g. v1f32), convert it into the equivalent operation that returns a
+  /// scalar (e.g. f32) value.
+  SDOperand ScalarizeVectorOp(SDOperand O);
+  
+  /// isShuffleLegal - Return true if a vector shuffle is legal with the
+  /// specified mask and type.  Targets can specify exactly which masks they
+  /// support and the code generator is tasked with not creating illegal masks.
+  ///
+  /// Note that this will also return true for shuffles that are promoted to a
+  /// different type.
+  ///
+  /// If this is a legal shuffle, this method returns the (possibly promoted)
+  /// build_vector Mask.  If it's not a legal shuffle, it returns null.
+  SDNode *isShuffleLegal(MVT::ValueType VT, SDOperand Mask) const;
+  
+  bool LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest,
+                                    SmallPtrSet<SDNode*, 32> &NodesLeadingTo);
+
+  void LegalizeSetCCOperands(SDOperand &LHS, SDOperand &RHS, SDOperand &CC);
+    
+  SDOperand CreateStackTemporary(MVT::ValueType VT);
+
+  SDOperand ExpandLibCall(const char *Name, SDNode *Node, bool isSigned,
+                          SDOperand &Hi);
+  SDOperand ExpandIntToFP(bool isSigned, MVT::ValueType DestTy,
+                          SDOperand Source);
+
+  SDOperand ExpandBIT_CONVERT(MVT::ValueType DestVT, SDOperand SrcOp);
+  SDOperand ExpandBUILD_VECTOR(SDNode *Node);
+  SDOperand ExpandSCALAR_TO_VECTOR(SDNode *Node);
+  SDOperand ExpandLegalINT_TO_FP(bool isSigned,
+                                 SDOperand LegalOp,
+                                 MVT::ValueType DestVT);
+  SDOperand PromoteLegalINT_TO_FP(SDOperand LegalOp, MVT::ValueType DestVT,
+                                  bool isSigned);
+  SDOperand PromoteLegalFP_TO_INT(SDOperand LegalOp, MVT::ValueType DestVT,
+                                  bool isSigned);
+
+  SDOperand ExpandBSWAP(SDOperand Op);
+  SDOperand ExpandBitCount(unsigned Opc, SDOperand Op);
+  bool ExpandShift(unsigned Opc, SDOperand Op, SDOperand Amt,
+                   SDOperand &Lo, SDOperand &Hi);
+  void ExpandShiftParts(unsigned NodeOp, SDOperand Op, SDOperand Amt,
+                        SDOperand &Lo, SDOperand &Hi);
+
+  SDOperand ExpandEXTRACT_SUBVECTOR(SDOperand Op);
+  SDOperand ExpandEXTRACT_VECTOR_ELT(SDOperand Op);
+  
+  SDOperand getIntPtrConstant(uint64_t Val) {
+    return DAG.getConstant(Val, TLI.getPointerTy());
+  }
+};
+}
+
+/// isVectorShuffleLegal - Return true if a vector shuffle is legal with the
+/// specified mask and type.  Targets can specify exactly which masks they
+/// support and the code generator is tasked with not creating illegal masks.
+///
+/// Note that this will also return true for shuffles that are promoted to a
+/// different type.
+SDNode *SelectionDAGLegalize::isShuffleLegal(MVT::ValueType VT, 
+                                             SDOperand Mask) const {
+  switch (TLI.getOperationAction(ISD::VECTOR_SHUFFLE, VT)) {
+  default: return 0;
+  case TargetLowering::Legal:
+  case TargetLowering::Custom:
+    break;
+  case TargetLowering::Promote: {
+    // If this is promoted to a different type, convert the shuffle mask and
+    // ask if it is legal in the promoted type!
+    MVT::ValueType NVT = TLI.getTypeToPromoteTo(ISD::VECTOR_SHUFFLE, VT);
+
+    // If we changed # elements, change the shuffle mask.
+    unsigned NumEltsGrowth =
+      MVT::getVectorNumElements(NVT) / MVT::getVectorNumElements(VT);
+    assert(NumEltsGrowth && "Cannot promote to vector type with fewer elts!");
+    if (NumEltsGrowth > 1) {
+      // Renumber the elements.
+      SmallVector<SDOperand, 8> Ops;
+      for (unsigned i = 0, e = Mask.getNumOperands(); i != e; ++i) {
+        SDOperand InOp = Mask.getOperand(i);
+        for (unsigned j = 0; j != NumEltsGrowth; ++j) {
+          if (InOp.getOpcode() == ISD::UNDEF)
+            Ops.push_back(DAG.getNode(ISD::UNDEF, MVT::i32));
+          else {
+            unsigned InEltNo = cast<ConstantSDNode>(InOp)->getValue();
+            Ops.push_back(DAG.getConstant(InEltNo*NumEltsGrowth+j, MVT::i32));
+          }
+        }
+      }
+      Mask = DAG.getNode(ISD::BUILD_VECTOR, NVT, &Ops[0], Ops.size());
+    }
+    VT = NVT;
+    break;
+  }
+  }
+  return TLI.isShuffleMaskLegal(Mask, VT) ? Mask.Val : 0;
+}
+
+SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag)
+  : TLI(dag.getTargetLoweringInfo()), DAG(dag),
+    ValueTypeActions(TLI.getValueTypeActions()) {
+  assert(MVT::LAST_VALUETYPE <= 32 &&
+         "Too many value types for ValueTypeActions to hold!");
+}
+
+/// ComputeTopDownOrdering - Compute a top-down ordering of the dag, where Order
+/// contains all of a nodes operands before it contains the node.
+static void ComputeTopDownOrdering(SelectionDAG &DAG,
+                                   SmallVector<SDNode*, 64> &Order) {
+
+  DenseMap<SDNode*, unsigned> Visited;
+  std::vector<SDNode*> Worklist;
+  Worklist.reserve(128);
+  
+  // Compute ordering from all of the leaves in the graphs, those (like the
+  // entry node) that have no operands.
+  for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+       E = DAG.allnodes_end(); I != E; ++I) {
+    if (I->getNumOperands() == 0) {
+      Visited[I] = 0 - 1U;
+      Worklist.push_back(I);
+    }
+  }
+  
+  while (!Worklist.empty()) {
+    SDNode *N = Worklist.back();
+    Worklist.pop_back();
+    
+    if (++Visited[N] != N->getNumOperands())
+      continue;  // Haven't visited all operands yet
+    
+    Order.push_back(N);
+
+    // Now that we have N in, add anything that uses it if all of their operands
+    // are now done.
+    for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
+         UI != E; ++UI)
+      Worklist.push_back(*UI);
+  }
+
+  assert(Order.size() == Visited.size() &&
+         Order.size() == 
+         (unsigned)std::distance(DAG.allnodes_begin(), DAG.allnodes_end()) &&
+         "Error: DAG is cyclic!");
+}
+
+
+void SelectionDAGLegalize::LegalizeDAG() {
+  LastCALLSEQ_END = DAG.getEntryNode();
+  IsLegalizingCall = false;
+  
+  // The legalize process is inherently a bottom-up recursive process (users
+  // legalize their uses before themselves).  Given infinite stack space, we
+  // could just start legalizing on the root and traverse the whole graph.  In
+  // practice however, this causes us to run out of stack space on large basic
+  // blocks.  To avoid this problem, compute an ordering of the nodes where each
+  // node is only legalized after all of its operands are legalized.
+  SmallVector<SDNode*, 64> Order;
+  ComputeTopDownOrdering(DAG, Order);
+  
+  for (unsigned i = 0, e = Order.size(); i != e; ++i)
+    HandleOp(SDOperand(Order[i], 0));
+
+  // Finally, it's possible the root changed.  Get the new root.
+  SDOperand OldRoot = DAG.getRoot();
+  assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
+  DAG.setRoot(LegalizedNodes[OldRoot]);
+
+  ExpandedNodes.clear();
+  LegalizedNodes.clear();
+  PromotedNodes.clear();
+  SplitNodes.clear();
+  ScalarizedNodes.clear();
+
+  // Remove dead nodes now.
+  DAG.RemoveDeadNodes();
+}
+
+
+/// FindCallEndFromCallStart - Given a chained node that is part of a call
+/// sequence, find the CALLSEQ_END node that terminates the call sequence.
+static SDNode *FindCallEndFromCallStart(SDNode *Node) {
+  if (Node->getOpcode() == ISD::CALLSEQ_END)
+    return Node;
+  if (Node->use_empty())
+    return 0;   // No CallSeqEnd
+  
+  // The chain is usually at the end.
+  SDOperand TheChain(Node, Node->getNumValues()-1);
+  if (TheChain.getValueType() != MVT::Other) {
+    // Sometimes it's at the beginning.
+    TheChain = SDOperand(Node, 0);
+    if (TheChain.getValueType() != MVT::Other) {
+      // Otherwise, hunt for it.
+      for (unsigned i = 1, e = Node->getNumValues(); i != e; ++i)
+        if (Node->getValueType(i) == MVT::Other) {
+          TheChain = SDOperand(Node, i);
+          break;
+        }
+          
+      // Otherwise, we walked into a node without a chain.  
+      if (TheChain.getValueType() != MVT::Other)
+        return 0;
+    }
+  }
+  
+  for (SDNode::use_iterator UI = Node->use_begin(),
+       E = Node->use_end(); UI != E; ++UI) {
+    
+    // Make sure to only follow users of our token chain.
+    SDNode *User = *UI;
+    for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i)
+      if (User->getOperand(i) == TheChain)
+        if (SDNode *Result = FindCallEndFromCallStart(User))
+          return Result;
+  }
+  return 0;
+}
+
+/// FindCallStartFromCallEnd - Given a chained node that is part of a call 
+/// sequence, find the CALLSEQ_START node that initiates the call sequence.
+static SDNode *FindCallStartFromCallEnd(SDNode *Node) {
+  assert(Node && "Didn't find callseq_start for a call??");
+  if (Node->getOpcode() == ISD::CALLSEQ_START) return Node;
+  
+  assert(Node->getOperand(0).getValueType() == MVT::Other &&
+         "Node doesn't have a token chain argument!");
+  return FindCallStartFromCallEnd(Node->getOperand(0).Val);
+}
+
+/// LegalizeAllNodesNotLeadingTo - Recursively walk the uses of N, looking to
+/// see if any uses can reach Dest.  If no dest operands can get to dest, 
+/// legalize them, legalize ourself, and return false, otherwise, return true.
+///
+/// Keep track of the nodes we fine that actually do lead to Dest in
+/// NodesLeadingTo.  This avoids retraversing them exponential number of times.
+///
+bool SelectionDAGLegalize::LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest,
+                                     SmallPtrSet<SDNode*, 32> &NodesLeadingTo) {
+  if (N == Dest) return true;  // N certainly leads to Dest :)
+  
+  // If we've already processed this node and it does lead to Dest, there is no
+  // need to reprocess it.
+  if (NodesLeadingTo.count(N)) return true;
+  
+  // If the first result of this node has been already legalized, then it cannot
+  // reach N.
+  switch (getTypeAction(N->getValueType(0))) {
+  case Legal: 
+    if (LegalizedNodes.count(SDOperand(N, 0))) return false;
+    break;
+  case Promote:
+    if (PromotedNodes.count(SDOperand(N, 0))) return false;
+    break;
+  case Expand:
+    if (ExpandedNodes.count(SDOperand(N, 0))) return false;
+    break;
+  }
+  
+  // Okay, this node has not already been legalized.  Check and legalize all
+  // operands.  If none lead to Dest, then we can legalize this node.
+  bool OperandsLeadToDest = false;
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+    OperandsLeadToDest |=     // If an operand leads to Dest, so do we.
+      LegalizeAllNodesNotLeadingTo(N->getOperand(i).Val, Dest, NodesLeadingTo);
+
+  if (OperandsLeadToDest) {
+    NodesLeadingTo.insert(N);
+    return true;
+  }
+
+  // Okay, this node looks safe, legalize it and return false.
+  HandleOp(SDOperand(N, 0));
+  return false;
+}
+
+/// HandleOp - Legalize, Promote, or Expand the specified operand as
+/// appropriate for its type.
+void SelectionDAGLegalize::HandleOp(SDOperand Op) {
+  MVT::ValueType VT = Op.getValueType();
+  switch (getTypeAction(VT)) {
+  default: assert(0 && "Bad type action!");
+  case Legal:   (void)LegalizeOp(Op); break;
+  case Promote: (void)PromoteOp(Op); break;
+  case Expand:
+    if (!MVT::isVector(VT)) {
+      // If this is an illegal scalar, expand it into its two component
+      // pieces.
+      SDOperand X, Y;
+      ExpandOp(Op, X, Y);
+    } else if (MVT::getVectorNumElements(VT) == 1) {
+      // If this is an illegal single element vector, convert it to a
+      // scalar operation.
+      (void)ScalarizeVectorOp(Op);
+    } else {
+      // Otherwise, this is an illegal multiple element vector.
+      // Split it in half and legalize both parts.
+      SDOperand X, Y;
+      SplitVectorOp(Op, X, Y);
+    }
+    break;
+  }
+}
+
+/// ExpandConstantFP - Expands the ConstantFP node to an integer constant or
+/// a load from the constant pool.
+static SDOperand ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP,
+                                  SelectionDAG &DAG, TargetLowering &TLI) {
+  bool Extend = false;
+
+  // If a FP immediate is precise when represented as a float and if the
+  // target can do an extending load from float to double, we put it into
+  // the constant pool as a float, even if it's is statically typed as a
+  // double.
+  MVT::ValueType VT = CFP->getValueType(0);
+  bool isDouble = VT == MVT::f64;
+  ConstantFP *LLVMC = ConstantFP::get(isDouble ? Type::DoubleTy :
+                                      Type::FloatTy, CFP->getValue());
+  if (!UseCP) {
+    double Val = LLVMC->getValue();
+    return isDouble
+      ? DAG.getConstant(DoubleToBits(Val), MVT::i64)
+      : DAG.getConstant(FloatToBits(Val), MVT::i32);
+  }
+
+  if (isDouble && CFP->isExactlyValue((float)CFP->getValue()) &&
+      // Only do this if the target has a native EXTLOAD instruction from f32.
+      TLI.isLoadXLegal(ISD::EXTLOAD, MVT::f32)) {
+    LLVMC = cast<ConstantFP>(ConstantExpr::getFPTrunc(LLVMC,Type::FloatTy));
+    VT = MVT::f32;
+    Extend = true;
+  }
+
+  SDOperand CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy());
+  if (Extend) {
+    return DAG.getExtLoad(ISD::EXTLOAD, MVT::f64, DAG.getEntryNode(),
+                          CPIdx, NULL, 0, MVT::f32);
+  } else {
+    return DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0);
+  }
+}
+
+
+/// ExpandFCOPYSIGNToBitwiseOps - Expands fcopysign to a series of bitwise
+/// operations.
+static
+SDOperand ExpandFCOPYSIGNToBitwiseOps(SDNode *Node, MVT::ValueType NVT,
+                                      SelectionDAG &DAG, TargetLowering &TLI) {
+  MVT::ValueType VT = Node->getValueType(0);
+  MVT::ValueType SrcVT = Node->getOperand(1).getValueType();
+  assert((SrcVT == MVT::f32 || SrcVT == MVT::f64) &&
+         "fcopysign expansion only supported for f32 and f64");
+  MVT::ValueType SrcNVT = (SrcVT == MVT::f64) ? MVT::i64 : MVT::i32;
+
+  // First get the sign bit of second operand.
+  SDOperand Mask1 = (SrcVT == MVT::f64)
+    ? DAG.getConstantFP(BitsToDouble(1ULL << 63), SrcVT)
+    : DAG.getConstantFP(BitsToFloat(1U << 31), SrcVT);
+  Mask1 = DAG.getNode(ISD::BIT_CONVERT, SrcNVT, Mask1);
+  SDOperand SignBit= DAG.getNode(ISD::BIT_CONVERT, SrcNVT, Node->getOperand(1));
+  SignBit = DAG.getNode(ISD::AND, SrcNVT, SignBit, Mask1);
+  // Shift right or sign-extend it if the two operands have different types.
+  int SizeDiff = MVT::getSizeInBits(SrcNVT) - MVT::getSizeInBits(NVT);
+  if (SizeDiff > 0) {
+    SignBit = DAG.getNode(ISD::SRL, SrcNVT, SignBit,
+                          DAG.getConstant(SizeDiff, TLI.getShiftAmountTy()));
+    SignBit = DAG.getNode(ISD::TRUNCATE, NVT, SignBit);
+  } else if (SizeDiff < 0)
+    SignBit = DAG.getNode(ISD::SIGN_EXTEND, NVT, SignBit);
+
+  // Clear the sign bit of first operand.
+  SDOperand Mask2 = (VT == MVT::f64)
+    ? DAG.getConstantFP(BitsToDouble(~(1ULL << 63)), VT)
+    : DAG.getConstantFP(BitsToFloat(~(1U << 31)), VT);
+  Mask2 = DAG.getNode(ISD::BIT_CONVERT, NVT, Mask2);
+  SDOperand Result = DAG.getNode(ISD::BIT_CONVERT, NVT, Node->getOperand(0));
+  Result = DAG.getNode(ISD::AND, NVT, Result, Mask2);
+
+  // Or the value with the sign bit.
+  Result = DAG.getNode(ISD::OR, NVT, Result, SignBit);
+  return Result;
+}
+
+
+/// LegalizeOp - We know that the specified value has a legal type, and
+/// that its operands are legal.  Now ensure that the operation itself
+/// is legal, recursively ensuring that the operands' operations remain
+/// legal.
+SDOperand SelectionDAGLegalize::LegalizeOp(SDOperand Op) {
+  assert(isTypeLegal(Op.getValueType()) &&
+         "Caller should expand or promote operands that are not legal!");
+  SDNode *Node = Op.Val;
+
+  // If this operation defines any values that cannot be represented in a
+  // register on this target, make sure to expand or promote them.
+  if (Node->getNumValues() > 1) {
+    for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+      if (getTypeAction(Node->getValueType(i)) != Legal) {
+        HandleOp(Op.getValue(i));
+        assert(LegalizedNodes.count(Op) &&
+               "Handling didn't add legal operands!");
+        return LegalizedNodes[Op];
+      }
+  }
+
+  // Note that LegalizeOp may be reentered even from single-use nodes, which
+  // means that we always must cache transformed nodes.
+  DenseMap<SDOperand, SDOperand>::iterator I = LegalizedNodes.find(Op);
+  if (I != LegalizedNodes.end()) return I->second;
+
+  SDOperand Tmp1, Tmp2, Tmp3, Tmp4;
+  SDOperand Result = Op;
+  bool isCustom = false;
+  
+  switch (Node->getOpcode()) {
+  case ISD::FrameIndex:
+  case ISD::EntryToken:
+  case ISD::Register:
+  case ISD::BasicBlock:
+  case ISD::TargetFrameIndex:
+  case ISD::TargetJumpTable:
+  case ISD::TargetConstant:
+  case ISD::TargetConstantFP:
+  case ISD::TargetConstantPool:
+  case ISD::TargetGlobalAddress:
+  case ISD::TargetGlobalTLSAddress:
+  case ISD::TargetExternalSymbol:
+  case ISD::VALUETYPE:
+  case ISD::SRCVALUE:
+  case ISD::STRING:
+  case ISD::CONDCODE:
+    // Primitives must all be legal.
+    assert(TLI.isOperationLegal(Node->getValueType(0), Node->getValueType(0)) &&
+           "This must be legal!");
+    break;
+  default:
+    if (Node->getOpcode() >= ISD::BUILTIN_OP_END) {
+      // If this is a target node, legalize it by legalizing the operands then
+      // passing it through.
+      SmallVector<SDOperand, 8> Ops;
+      for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
+        Ops.push_back(LegalizeOp(Node->getOperand(i)));
+
+      Result = DAG.UpdateNodeOperands(Result.getValue(0), &Ops[0], Ops.size());
+
+      for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+        AddLegalizedOperand(Op.getValue(i), Result.getValue(i));
+      return Result.getValue(Op.ResNo);
+    }
+    // Otherwise this is an unhandled builtin node.  splat.
+#ifndef NDEBUG
+    cerr << "NODE: "; Node->dump(&DAG); cerr << "\n";
+#endif
+    assert(0 && "Do not know how to legalize this operator!");
+    abort();
+  case ISD::GLOBAL_OFFSET_TABLE:
+  case ISD::GlobalAddress:
+  case ISD::GlobalTLSAddress:
+  case ISD::ExternalSymbol:
+  case ISD::ConstantPool:
+  case ISD::JumpTable: // Nothing to do.
+    switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))) {
+    default: assert(0 && "This action is not supported yet!");
+    case TargetLowering::Custom:
+      Tmp1 = TLI.LowerOperation(Op, DAG);
+      if (Tmp1.Val) Result = Tmp1;
+      // FALLTHROUGH if the target doesn't want to lower this op after all.
+    case TargetLowering::Legal:
+      break;
+    }
+    break;
+  case ISD::FRAMEADDR:
+  case ISD::RETURNADDR:
+  case ISD::FRAME_TO_ARGS_OFFSET:
+    // The only option for these nodes is to custom lower them.  If the target
+    // does not custom lower them, then return zero.
+    Tmp1 = TLI.LowerOperation(Op, DAG);
+    if (Tmp1.Val) 
+      Result = Tmp1;
+    else
+      Result = DAG.getConstant(0, TLI.getPointerTy());
+    break;
+  case ISD::EXCEPTIONADDR: {
+    Tmp1 = LegalizeOp(Node->getOperand(0));
+    MVT::ValueType VT = Node->getValueType(0);
+    switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
+    default: assert(0 && "This action is not supported yet!");
+    case TargetLowering::Expand: {
+        unsigned Reg = TLI.getExceptionAddressRegister();
+        Result = DAG.getCopyFromReg(Tmp1, Reg, VT).getValue(Op.ResNo);
+      }
+      break;
+    case TargetLowering::Custom:
+      Result = TLI.LowerOperation(Op, DAG);
+      if (Result.Val) break;
+      // Fall Thru
+    case TargetLowering::Legal: {
+      SDOperand Ops[] = { DAG.getConstant(0, VT), Tmp1 };
+      Result = DAG.getNode(ISD::MERGE_VALUES, DAG.getVTList(VT, MVT::Other),
+                           Ops, 2).getValue(Op.ResNo);
+      break;
+    }
+    }
+    }
+    break;
+  case ISD::EHSELECTION: {
+    Tmp1 = LegalizeOp(Node->getOperand(0));
+    Tmp2 = LegalizeOp(Node->getOperand(1));
+    MVT::ValueType VT = Node->getValueType(0);
+    switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
+    default: assert(0 && "This action is not supported yet!");
+    case TargetLowering::Expand: {
+        unsigned Reg = TLI.getExceptionSelectorRegister();
+        Result = DAG.getCopyFromReg(Tmp2, Reg, VT).getValue(Op.ResNo);
+      }
+      break;
+    case TargetLowering::Custom:
+      Result = TLI.LowerOperation(Op, DAG);
+      if (Result.Val) break;
+      // Fall Thru
+    case TargetLowering::Legal: {
+      SDOperand Ops[] = { DAG.getConstant(0, VT), Tmp2 };
+      Result = DAG.getNode(ISD::MERGE_VALUES, DAG.getVTList(VT, MVT::Other),
+                           Ops, 2).getValue(Op.ResNo);
+      break;
+    }
+    }
+    }
+    break;
+  case ISD::EH_RETURN: {
+    MVT::ValueType VT = Node->getValueType(0);
+    // The only "good" option for this node is to custom lower it.
+    switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
+    default: assert(0 && "This action is not supported at all!");
+    case TargetLowering::Custom:
+      Result = TLI.LowerOperation(Op, DAG);
+      if (Result.Val) break;
+      // Fall Thru
+    case TargetLowering::Legal:
+      // Target does not know, how to lower this, lower to noop
+      Result = LegalizeOp(Node->getOperand(0));
+      break;
+    }
+    }
+    break;
+  case ISD::AssertSext:
+  case ISD::AssertZext:
+    Tmp1 = LegalizeOp(Node->getOperand(0));
+    Result = DAG.UpdateNodeOperands(Result, Tmp1, Node->getOperand(1));
+    break;
+  case ISD::MERGE_VALUES:
+    // Legalize eliminates MERGE_VALUES nodes.
+    Result = Node->getOperand(Op.ResNo);
+    break;
+  case ISD::CopyFromReg:
+    Tmp1 = LegalizeOp(Node->getOperand(0));
+    Result = Op.getValue(0);
+    if (Node->getNumValues() == 2) {
+      Result = DAG.UpdateNodeOperands(Result, Tmp1, Node->getOperand(1));
+    } else {
+      assert(Node->getNumValues() == 3 && "Invalid copyfromreg!");
+      if (Node->getNumOperands() == 3) {
+        Tmp2 = LegalizeOp(Node->getOperand(2));
+        Result = DAG.UpdateNodeOperands(Result, Tmp1, Node->getOperand(1),Tmp2);
+      } else {
+        Result = DAG.UpdateNodeOperands(Result, Tmp1, Node->getOperand(1));
+      }
+      AddLegalizedOperand(Op.getValue(2), Result.getValue(2));
+    }
+    // Since CopyFromReg produces two values, make sure to remember that we
+    // legalized both of them.
+    AddLegalizedOperand(Op.getValue(0), Result);
+    AddLegalizedOperand(Op.getValue(1), Result.getValue(1));
+    return Result.getValue(Op.ResNo);
+  case ISD::UNDEF: {
+    MVT::ValueType VT = Op.getValueType();
+    switch (TLI.getOperationAction(ISD::UNDEF, VT)) {
+    default: assert(0 && "This action is not supported yet!");
+    case TargetLowering::Expand:
+      if (MVT::isInteger(VT))
+        Result = DAG.getConstant(0, VT);
+      else if (MVT::isFloatingPoint(VT))
+        Result = DAG.getConstantFP(0, VT);
+      else
+        assert(0 && "Unknown value type!");
+      break;
+    case TargetLowering::Legal:
+      break;
+    }
+    break;
+  }
+    
+  case ISD::INTRINSIC_W_CHAIN:
+  case ISD::INTRINSIC_WO_CHAIN:
+  case ISD::INTRINSIC_VOID: {
+    SmallVector<SDOperand, 8> Ops;
+    for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
+      Ops.push_back(LegalizeOp(Node->getOperand(i)));
+    Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size());
+    
+    // Allow the target to custom lower its intrinsics if it wants to.
+    if (TLI.getOperationAction(Node->getOpcode(), MVT::Other) == 
+        TargetLowering::Custom) {
+      Tmp3 = TLI.LowerOperation(Result, DAG);
+      if (Tmp3.Val) Result = Tmp3;
+    }
+
+    if (Result.Val->getNumValues() == 1) break;
+
+    // Must have return value and chain result.
+    assert(Result.Val->getNumValues() == 2 &&
+           "Cannot return more than two values!");
+
+    // Since loads produce two values, make sure to remember that we 
+    // legalized both of them.
+    AddLegalizedOperand(SDOperand(Node, 0), Result.getValue(0));
+    AddLegalizedOperand(SDOperand(Node, 1), Result.getValue(1));
+    return Result.getValue(Op.ResNo);
+  }    
+
+  case ISD::LOCATION:
+    assert(Node->getNumOperands() == 5 && "Invalid LOCATION node!");
+    Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the input chain.
+    
+    switch (TLI.getOperationAction(ISD::LOCATION, MVT::Other)) {
+    case TargetLowering::Promote:
+    default: assert(0 && "This action is not supported yet!");
+    case TargetLowering::Expand: {
+      MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+      bool useDEBUG_LOC = TLI.isOperationLegal(ISD::DEBUG_LOC, MVT::Other);
+      bool useLABEL = TLI.isOperationLegal(ISD::LABEL, MVT::Other);
+      
+      if (MMI && (useDEBUG_LOC || useLABEL)) {
+        const std::string &FName =
+          cast<StringSDNode>(Node->getOperand(3))->getValue();
+        const std::string &DirName = 
+          cast<StringSDNode>(Node->getOperand(4))->getValue();
+        unsigned SrcFile = MMI->RecordSource(DirName, FName);
+
+        SmallVector<SDOperand, 8> Ops;
+        Ops.push_back(Tmp1);  // chain
+        SDOperand LineOp = Node->getOperand(1);
+        SDOperand ColOp = Node->getOperand(2);
+        
+        if (useDEBUG_LOC) {
+          Ops.push_back(LineOp);  // line #
+          Ops.push_back(ColOp);  // col #
+          Ops.push_back(DAG.getConstant(SrcFile, MVT::i32));  // source file id
+          Result = DAG.getNode(ISD::DEBUG_LOC, MVT::Other, &Ops[0], Ops.size());
+        } else {
+          unsigned Line = cast<ConstantSDNode>(LineOp)->getValue();
+          unsigned Col = cast<ConstantSDNode>(ColOp)->getValue();
+          unsigned ID = MMI->RecordLabel(Line, Col, SrcFile);
+          Ops.push_back(DAG.getConstant(ID, MVT::i32));
+          Result = DAG.getNode(ISD::LABEL, MVT::Other,&Ops[0],Ops.size());
+        }
+      } else {
+        Result = Tmp1;  // chain
+      }
+      break;
+    }
+    case TargetLowering::Legal:
+      if (Tmp1 != Node->getOperand(0) ||
+          getTypeAction(Node->getOperand(1).getValueType()) == Promote) {
+        SmallVector<SDOperand, 8> Ops;
+        Ops.push_back(Tmp1);
+        if (getTypeAction(Node->getOperand(1).getValueType()) == Legal) {
+          Ops.push_back(Node->getOperand(1));  // line # must be legal.
+          Ops.push_back(Node->getOperand(2));  // col # must be legal.
+        } else {
+          // Otherwise promote them.
+          Ops.push_back(PromoteOp(Node->getOperand(1)));
+          Ops.push_back(PromoteOp(Node->getOperand(2)));
+        }
+        Ops.push_back(Node->getOperand(3));  // filename must be legal.
+        Ops.push_back(Node->getOperand(4));  // working dir # must be legal.
+        Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size());
+      }
+      break;
+    }
+    break;
+    
+  case ISD::DEBUG_LOC:
+    assert(Node->getNumOperands() == 4 && "Invalid DEBUG_LOC node!");
+    switch (TLI.getOperationAction(ISD::DEBUG_LOC, MVT::Other)) {
+    default: assert(0 && "This action is not supported yet!");
+    case TargetLowering::Legal:
+      Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
+      Tmp2 = LegalizeOp(Node->getOperand(1));  // Legalize the line #.
+      Tmp3 = LegalizeOp(Node->getOperand(2));  // Legalize the col #.
+      Tmp4 = LegalizeOp(Node->getOperand(3));  // Legalize the source file id.
+      Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3, Tmp4);
+      break;
+    }
+    break;    
+
+  case ISD::LABEL:
+    assert(Node->getNumOperands() == 2 && "Invalid LABEL node!");
+    switch (TLI.getOperationAction(ISD::LABEL, MVT::Other)) {
+    default: assert(0 && "This action is not supported yet!");
+    case TargetLowering::Legal:
+      Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
+      Tmp2 = LegalizeOp(Node->getOperand(1));  // Legalize the label id.
+      Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2);
+      break;
+    case TargetLowering::Expand:
+      Result = LegalizeOp(Node->getOperand(0));
+      break;
+    }
+    break;
+
+  case ISD::Constant:
+    // We know we don't need to expand constants here, constants only have one
+    // value and we check that it is fine above.
+
+    // FIXME: Maybe we should handle things like targets that don't support full
+    // 32-bit immediates?
+    break;
+  case ISD::ConstantFP: {
+    // Spill FP immediates to the constant pool if the target cannot directly
+    // codegen them.  Targets often have some immediate values that can be
+    // efficiently generated into an FP register without a load.  We explicitly
+    // leave these constants as ConstantFP nodes for the target to deal with.
+    ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Node);
+
+    // Check to see if this FP immediate is already legal.
+    bool isLegal = false;
+    for (TargetLowering::legal_fpimm_iterator I = TLI.legal_fpimm_begin(),
+           E = TLI.legal_fpimm_end(); I != E; ++I)
+      if (CFP->isExactlyValue(*I)) {
+        isLegal = true;
+        break;
+      }
+
+    // If this is a legal constant, turn it into a TargetConstantFP node.
+    if (isLegal) {
+      Result = DAG.getTargetConstantFP(CFP->getValue(), CFP->getValueType(0));
+      break;
+    }
+
+    switch (TLI.getOperationAction(ISD::ConstantFP, CFP->getValueType(0))) {
+    default: assert(0 && "This action is not supported yet!");
+    case TargetLowering::Custom:
+      Tmp3 = TLI.LowerOperation(Result, DAG);
+      if (Tmp3.Val) {
+        Result = Tmp3;
+        break;
+      }
+      // FALLTHROUGH
+    case TargetLowering::Expand:
+      Result = ExpandConstantFP(CFP, true, DAG, TLI);
+    }
+    break;
+  }
+  case ISD::TokenFactor:
+    if (Node->getNumOperands() == 2) {
+      Tmp1 = LegalizeOp(Node->getOperand(0));
+      Tmp2 = LegalizeOp(Node->getOperand(1));
+      Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2);
+    } else if (Node->getNumOperands() == 3) {
+      Tmp1 = LegalizeOp(Node->getOperand(0));
+      Tmp2 = LegalizeOp(Node->getOperand(1));
+      Tmp3 = LegalizeOp(Node->getOperand(2));
+      Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3);
+    } else {
+      SmallVector<SDOperand, 8> Ops;
+      // Legalize the operands.
+      for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
+        Ops.push_back(LegalizeOp(Node->getOperand(i)));
+      Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size());
+    }
+    break;
+    
+  case ISD::FORMAL_ARGUMENTS:
+  case ISD::CALL:
+    // The only option for this is to custom lower it.
+    Tmp3 = TLI.LowerOperation(Result.getValue(0), DAG);
+    assert(Tmp3.Val && "Target didn't custom lower this node!");
+    assert(Tmp3.Val->getNumValues() == Result.Val->getNumValues() &&
+           "Lowering call/formal_arguments produced unexpected # results!");
+    
+    // Since CALL/FORMAL_ARGUMENTS nodes produce multiple values, make sure to
+    // remember that we legalized all of them, so it doesn't get relegalized.
+    for (unsigned i = 0, e = Tmp3.Val->getNumValues(); i != e; ++i) {
+      Tmp1 = LegalizeOp(Tmp3.getValue(i));
+      if (Op.ResNo == i)
+        Tmp2 = Tmp1;
+      AddLegalizedOperand(SDOperand(Node, i), Tmp1);
+    }
+    return Tmp2;
+        
+  case ISD::BUILD_VECTOR:
+    switch (TLI.getOperationAction(ISD::BUILD_VECTOR, Node->getValueType(0))) {
+    default: assert(0 && "This action is not supported yet!");
+    case TargetLowering::Custom:
+      Tmp3 = TLI.LowerOperation(Result, DAG);
+      if (Tmp3.Val) {
+        Result = Tmp3;
+        break;
+      }
+      // FALLTHROUGH
+    case TargetLowering::Expand:
+      Result = ExpandBUILD_VECTOR(Result.Val);
+      break;
+    }
+    break;
+  case ISD::INSERT_VECTOR_ELT:
+    Tmp1 = LegalizeOp(Node->getOperand(0));  // InVec
+    Tmp2 = LegalizeOp(Node->getOperand(1));  // InVal
+    Tmp3 = LegalizeOp(Node->getOperand(2));  // InEltNo
+    Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3);
+    
+    switch (TLI.getOperationAction(ISD::INSERT_VECTOR_ELT,
+                                   Node->getValueType(0))) {
+    default: assert(0 && "This action is not supported yet!");
+    case TargetLowering::Legal:
+      break;
+    case TargetLowering::Custom:
+      Tmp3 = TLI.LowerOperation(Result, DAG);
+      if (Tmp3.Val) {
+        Result = Tmp3;
+        break;
+      }
+      // FALLTHROUGH
+    case TargetLowering::Expand: {
+      // If the insert index is a constant, codegen this as a scalar_to_vector,
+      // then a shuffle that inserts it into the right position in the vector.
+      if (ConstantSDNode *InsertPos = dyn_cast<ConstantSDNode>(Tmp3)) {
+        SDOperand ScVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, 
+                                      Tmp1.getValueType(), Tmp2);
+        
+        unsigned NumElts = MVT::getVectorNumElements(Tmp1.getValueType());
+        MVT::ValueType ShufMaskVT = MVT::getIntVectorWithNumElements(NumElts);
+        MVT::ValueType ShufMaskEltVT = MVT::getVectorElementType(ShufMaskVT);
+        
+        // We generate a shuffle of InVec and ScVec, so the shuffle mask should
+        // be 0,1,2,3,4,5... with the appropriate element replaced with elt 0 of
+        // the RHS.
+        SmallVector<SDOperand, 8> ShufOps;
+        for (unsigned i = 0; i != NumElts; ++i) {
+          if (i != InsertPos->getValue())
+            ShufOps.push_back(DAG.getConstant(i, ShufMaskEltVT));
+          else
+            ShufOps.push_back(DAG.getConstant(NumElts, ShufMaskEltVT));
+        }
+        SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, ShufMaskVT,
+                                         &ShufOps[0], ShufOps.size());
+        
+        Result = DAG.getNode(ISD::VECTOR_SHUFFLE, Tmp1.getValueType(),
+                             Tmp1, ScVec, ShufMask);
+        Result = LegalizeOp(Result);
+        break;
+      }
+      
+      // If the target doesn't support this, we have to spill the input vector
+      // to a temporary stack slot, update the element, then reload it.  This is
+      // badness.  We could also load the value into a vector register (either
+      // with a "move to register" or "extload into register" instruction, then
+      // permute it into place, if the idx is a constant and if the idx is
+      // supported by the target.
+      MVT::ValueType VT    = Tmp1.getValueType();
+      MVT::ValueType EltVT = Tmp2.getValueType();
+      MVT::ValueType IdxVT = Tmp3.getValueType();
+      MVT::ValueType PtrVT = TLI.getPointerTy();
+      SDOperand StackPtr = CreateStackTemporary(VT);
+      // Store the vector.
+      SDOperand Ch = DAG.getStore(DAG.getEntryNode(), Tmp1, StackPtr, NULL, 0);
+
+      // Truncate or zero extend offset to target pointer type.
+      unsigned CastOpc = (IdxVT > PtrVT) ? ISD::TRUNCATE : ISD::ZERO_EXTEND;
+      Tmp3 = DAG.getNode(CastOpc, PtrVT, Tmp3);
+      // Add the offset to the index.
+      unsigned EltSize = MVT::getSizeInBits(EltVT)/8;
+      Tmp3 = DAG.getNode(ISD::MUL, IdxVT, Tmp3,DAG.getConstant(EltSize, IdxVT));
+      SDOperand StackPtr2 = DAG.getNode(ISD::ADD, IdxVT, Tmp3, StackPtr);
+      // Store the scalar value.
+      Ch = DAG.getStore(Ch, Tmp2, StackPtr2, NULL, 0);
+      // Load the updated vector.
+      Result = DAG.getLoad(VT, Ch, StackPtr, NULL, 0);
+      break;
+    }
+    }
+    break;
+  case ISD::SCALAR_TO_VECTOR:
+    if (!TLI.isTypeLegal(Node->getOperand(0).getValueType())) {
+      Result = LegalizeOp(ExpandSCALAR_TO_VECTOR(Node));
+      break;
+    }
+    
+    Tmp1 = LegalizeOp(Node->getOperand(0));  // InVal
+    Result = DAG.UpdateNodeOperands(Result, Tmp1);
+    switch (TLI.getOperationAction(ISD::SCALAR_TO_VECTOR,
+                                   Node->getValueType(0))) {
+    default: assert(0 && "This action is not supported yet!");
+    case TargetLowering::Legal:
+      break;
+    case TargetLowering::Custom:
+      Tmp3 = TLI.LowerOperation(Result, DAG);
+      if (Tmp3.Val) {
+        Result = Tmp3;
+        break;
+      }
+      // FALLTHROUGH
+    case TargetLowering::Expand:
+      Result = LegalizeOp(ExpandSCALAR_TO_VECTOR(Node));
+      break;
+    }
+    break;
+  case ISD::VECTOR_SHUFFLE:
+    Tmp1 = LegalizeOp(Node->getOperand(0));   // Legalize the input vectors,
+    Tmp2 = LegalizeOp(Node->getOperand(1));   // but not the shuffle mask.
+    Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Node->getOperand(2));
+
+    // Allow targets to custom lower the SHUFFLEs they support.
+    switch (TLI.getOperationAction(ISD::VECTOR_SHUFFLE,Result.getValueType())) {
+    default: assert(0 && "Unknown operation action!");
+    case TargetLowering::Legal:
+      assert(isShuffleLegal(Result.getValueType(), Node->getOperand(2)) &&
+             "vector shuffle should not be created if not legal!");
+      break;
+    case TargetLowering::Custom:
+      Tmp3 = TLI.LowerOperation(Result, DAG);
+      if (Tmp3.Val) {
+        Result = Tmp3;
+        break;
+      }
+      // FALLTHROUGH
+    case TargetLowering::Expand: {
+      MVT::ValueType VT = Node->getValueType(0);
+      MVT::ValueType EltVT = MVT::getVectorElementType(VT);
+      MVT::ValueType PtrVT = TLI.getPointerTy();
+      SDOperand Mask = Node->getOperand(2);
+      unsigned NumElems = Mask.getNumOperands();
+      SmallVector<SDOperand,8> Ops;
+      for (unsigned i = 0; i != NumElems; ++i) {
+        SDOperand Arg = Mask.getOperand(i);
+        if (Arg.getOpcode() == ISD::UNDEF) {
+          Ops.push_back(DAG.getNode(ISD::UNDEF, EltVT));
+        } else {
+          assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
+          unsigned Idx = cast<ConstantSDNode>(Arg)->getValue();
+          if (Idx < NumElems)
+            Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT, Tmp1,
+                                      DAG.getConstant(Idx, PtrVT)));
+          else
+            Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT, Tmp2,
+                                      DAG.getConstant(Idx - NumElems, PtrVT)));
+        }
+      }
+      Result = DAG.getNode(ISD::BUILD_VECTOR, VT, &Ops[0], Ops.size());
+      break;
+    }
+    case TargetLowering::Promote: {
+      // Change base type to a different vector type.
+      MVT::ValueType OVT = Node->getValueType(0);
+      MVT::ValueType NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT);
+
+      // Cast the two input vectors.
+      Tmp1 = DAG.getNode(ISD::BIT_CONVERT, NVT, Tmp1);
+      Tmp2 = DAG.getNode(ISD::BIT_CONVERT, NVT, Tmp2);
+      
+      // Convert the shuffle mask to the right # elements.
+      Tmp3 = SDOperand(isShuffleLegal(OVT, Node->getOperand(2)), 0);
+      assert(Tmp3.Val && "Shuffle not legal?");
+      Result = DAG.getNode(ISD::VECTOR_SHUFFLE, NVT, Tmp1, Tmp2, Tmp3);
+      Result = DAG.getNode(ISD::BIT_CONVERT, OVT, Result);
+      break;
+    }
+    }
+    break;
+  
+  case ISD::EXTRACT_VECTOR_ELT:
+    Tmp1 = Node->getOperand(0);
+    Tmp2 = LegalizeOp(Node->getOperand(1));
+    Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2);
+    Result = ExpandEXTRACT_VECTOR_ELT(Result);
+    break;
+
+  case ISD::EXTRACT_SUBVECTOR: 
+    Tmp1 = Node->getOperand(0);
+    Tmp2 = LegalizeOp(Node->getOperand(1));
+    Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2);
+    Result = ExpandEXTRACT_SUBVECTOR(Result);
+    break;
+    
+  case ISD::CALLSEQ_START: {
+    SDNode *CallEnd = FindCallEndFromCallStart(Node);
+    
+    // Recursively Legalize all of the inputs of the call end that do not lead
+    // to this call start.  This ensures that any libcalls that need be inserted
+    // are inserted *before* the CALLSEQ_START.
+    {SmallPtrSet<SDNode*, 32> NodesLeadingTo;
+    for (unsigned i = 0, e = CallEnd->getNumOperands(); i != e; ++i)
+      LegalizeAllNodesNotLeadingTo(CallEnd->getOperand(i).Val, Node,
+                                   NodesLeadingTo);
+    }
+
+    // Now that we legalized all of the inputs (which may have inserted
+    // libcalls) create the new CALLSEQ_START node.
+    Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
+
+    // Merge in the last call, to ensure that this call start after the last
+    // call ended.
+    if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken) {
+      Tmp1 = DAG.getNode(ISD::TokenFactor, MVT::Other, Tmp1, LastCALLSEQ_END);
+      Tmp1 = LegalizeOp(Tmp1);
+    }
+      
+    // Do not try to legalize the target-specific arguments (#1+).
+    if (Tmp1 != Node->getOperand(0)) {
+      SmallVector<SDOperand, 8> Ops(Node->op_begin(), Node->op_end());
+      Ops[0] = Tmp1;
+      Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size());
+    }
+    
+    // Remember that the CALLSEQ_START is legalized.
+    AddLegalizedOperand(Op.getValue(0), Result);
+    if (Node->getNumValues() == 2)    // If this has a flag result, remember it.
+      AddLegalizedOperand(Op.getValue(1), Result.getValue(1));
+    
+    // Now that the callseq_start and all of the non-call nodes above this call
+    // sequence have been legalized, legalize the call itself.  During this 
+    // process, no libcalls can/will be inserted, guaranteeing that no calls
+    // can overlap.
+    assert(!IsLegalizingCall && "Inconsistent sequentialization of calls!");
+    SDOperand InCallSEQ = LastCALLSEQ_END;
+    // Note that we are selecting this call!
+    LastCALLSEQ_END = SDOperand(CallEnd, 0);
+    IsLegalizingCall = true;
+    
+    // Legalize the call, starting from the CALLSEQ_END.
+    LegalizeOp(LastCALLSEQ_END);
+    assert(!IsLegalizingCall && "CALLSEQ_END should have cleared this!");
+    return Result;
+  }
+  case ISD::CALLSEQ_END:
+    // If the CALLSEQ_START node hasn't been legalized first, legalize it.  This
+    // will cause this node to be legalized as well as handling libcalls right.
+    if (LastCALLSEQ_END.Val != Node) {
+      LegalizeOp(SDOperand(FindCallStartFromCallEnd(Node), 0));
+      DenseMap<SDOperand, SDOperand>::iterator I = LegalizedNodes.find(Op);
+      assert(I != LegalizedNodes.end() &&
+             "Legalizing the call start should have legalized this node!");
+      return I->second;
+    }
+    
+    // Otherwise, the call start has been legalized and everything is going 
+    // according to plan.  Just legalize ourselves normally here.
+    Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
+    // Do not try to legalize the target-specific arguments (#1+), except for
+    // an optional flag input.
+    if (Node->getOperand(Node->getNumOperands()-1).getValueType() != MVT::Flag){
+      if (Tmp1 != Node->getOperand(0)) {
+        SmallVector<SDOperand, 8> Ops(Node->op_begin(), Node->op_end());
+        Ops[0] = Tmp1;
+        Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size());
+      }
+    } else {
+      Tmp2 = LegalizeOp(Node->getOperand(Node->getNumOperands()-1));
+      if (Tmp1 != Node->getOperand(0) ||
+          Tmp2 != Node->getOperand(Node->getNumOperands()-1)) {
+        SmallVector<SDOperand, 8> Ops(Node->op_begin(), Node->op_end());
+        Ops[0] = Tmp1;
+        Ops.back() = Tmp2;
+        Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size());
+      }
+    }
+    assert(IsLegalizingCall && "Call sequence imbalance between start/end?");
+    // This finishes up call legalization.
+    IsLegalizingCall = false;
+    
+    // If the CALLSEQ_END node has a flag, remember that we legalized it.
+    AddLegalizedOperand(SDOperand(Node, 0), Result.getValue(0));
+    if (Node->getNumValues() == 2)
+      AddLegalizedOperand(SDOperand(Node, 1), Result.getValue(1));
+    return Result.getValue(Op.ResNo);
+  case ISD::DYNAMIC_STACKALLOC: {
+    Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
+    Tmp2 = LegalizeOp(Node->getOperand(1));  // Legalize the size.
+    Tmp3 = LegalizeOp(Node->getOperand(2));  // Legalize the alignment.
+    Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3);
+
+    Tmp1 = Result.getValue(0);
+    Tmp2 = Result.getValue(1);
+    switch (TLI.getOperationAction(Node->getOpcode(),
+                                   Node->getValueType(0))) {
+    default: assert(0 && "This action is not supported yet!");
+    case TargetLowering::Expand: {
+      unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore();
+      assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and"
+             " not tell us which reg is the stack pointer!");
+      SDOperand Chain = Tmp1.getOperand(0);
+      SDOperand Size  = Tmp2.getOperand(1);
+      SDOperand SP = DAG.getCopyFromReg(Chain, SPReg, Node->getValueType(0));
+      Tmp1 = DAG.getNode(ISD::SUB, Node->getValueType(0), SP, Size);    // Value
+      Tmp2 = DAG.getCopyToReg(SP.getValue(1), SPReg, Tmp1);      // Output chain
+      Tmp1 = LegalizeOp(Tmp1);
+      Tmp2 = LegalizeOp(Tmp2);
+      break;
+    }
+    case TargetLowering::Custom:
+      Tmp3 = TLI.LowerOperation(Tmp1, DAG);
+      if (Tmp3.Val) {
+        Tmp1 = LegalizeOp(Tmp3);
+        Tmp2 = LegalizeOp(Tmp3.getValue(1));
+      }
+      break;
+    case TargetLowering::Legal:
+      break;
+    }
+    // Since this op produce two values, make sure to remember that we
+    // legalized both of them.
+    AddLegalizedOperand(SDOperand(Node, 0), Tmp1);
+    AddLegalizedOperand(SDOperand(Node, 1), Tmp2);
+    return Op.ResNo ? Tmp2 : Tmp1;
+  }
+  case ISD::INLINEASM: {
+    SmallVector<SDOperand, 8> Ops(Node->op_begin(), Node->op_end());
+    bool Changed = false;
+    // Legalize all of the operands of the inline asm, in case they are nodes
+    // that need to be expanded or something.  Note we skip the asm string and
+    // all of the TargetConstant flags.
+    SDOperand Op = LegalizeOp(Ops[0]);
+    Changed = Op != Ops[0];
+    Ops[0] = Op;
+
+    bool HasInFlag = Ops.back().getValueType() == MVT::Flag;
+    for (unsigned i = 2, e = Ops.size()-HasInFlag; i < e; ) {
+      unsigned NumVals = cast<ConstantSDNode>(Ops[i])->getValue() >> 3;
+      for (++i; NumVals; ++i, --NumVals) {
+        SDOperand Op = LegalizeOp(Ops[i]);
+        if (Op != Ops[i]) {
+          Changed = true;
+          Ops[i] = Op;
+        }
+      }
+    }
+
+    if (HasInFlag) {
+      Op = LegalizeOp(Ops.back());
+      Changed |= Op != Ops.back();
+      Ops.back() = Op;
+    }
+    
+    if (Changed)
+      Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size());
+      
+    // INLINE asm returns a chain and flag, make sure to add both to the map.
+    AddLegalizedOperand(SDOperand(Node, 0), Result.getValue(0));
+    AddLegalizedOperand(SDOperand(Node, 1), Result.getValue(1));
+    return Result.getValue(Op.ResNo);
+  }
+  case ISD::BR:
+    Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
+    // Ensure that libcalls are emitted before a branch.
+    Tmp1 = DAG.getNode(ISD::TokenFactor, MVT::Other, Tmp1, LastCALLSEQ_END);
+    Tmp1 = LegalizeOp(Tmp1);
+    LastCALLSEQ_END = DAG.getEntryNode();
+    
+    Result = DAG.UpdateNodeOperands(Result, Tmp1, Node->getOperand(1));
+    break;
+  case ISD::BRIND:
+    Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
+    // Ensure that libcalls are emitted before a branch.
+    Tmp1 = DAG.getNode(ISD::TokenFactor, MVT::Other, Tmp1, LastCALLSEQ_END);
+    Tmp1 = LegalizeOp(Tmp1);
+    LastCALLSEQ_END = DAG.getEntryNode();
+    
+    switch (getTypeAction(Node->getOperand(1).getValueType())) {
+    default: assert(0 && "Indirect target must be legal type (pointer)!");
+    case Legal:
+      Tmp2 = LegalizeOp(Node->getOperand(1)); // Legalize the condition.
+      break;
+    }
+    Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2);
+    break;
+  case ISD::BR_JT:
+    Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
+    // Ensure that libcalls are emitted before a branch.
+    Tmp1 = DAG.getNode(ISD::TokenFactor, MVT::Other, Tmp1, LastCALLSEQ_END);
+    Tmp1 = LegalizeOp(Tmp1);
+    LastCALLSEQ_END = DAG.getEntryNode();
+
+    Tmp2 = LegalizeOp(Node->getOperand(1));  // Legalize the jumptable node.
+    Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Node->getOperand(2));
+
+    switch (TLI.getOperationAction(ISD::BR_JT, MVT::Other)) {  
+    default: assert(0 && "This action is not supported yet!");
+    case TargetLowering::Legal: break;
+    case TargetLowering::Custom:
+      Tmp1 = TLI.LowerOperation(Result, DAG);
+      if (Tmp1.Val) Result = Tmp1;
+      break;
+    case TargetLowering::Expand: {
+      SDOperand Chain = Result.getOperand(0);
+      SDOperand Table = Result.getOperand(1);
+      SDOperand Index = Result.getOperand(2);
+
+      MVT::ValueType PTy = TLI.getPointerTy();
+      MachineFunction &MF = DAG.getMachineFunction();
+      unsigned EntrySize = MF.getJumpTableInfo()->getEntrySize();
+      Index= DAG.getNode(ISD::MUL, PTy, Index, DAG.getConstant(EntrySize, PTy));
+      SDOperand Addr = DAG.getNode(ISD::ADD, PTy, Index, Table);
+      
+      SDOperand LD;
+      switch (EntrySize) {
+      default: assert(0 && "Size of jump table not supported yet."); break;
+      case 4: LD = DAG.getLoad(MVT::i32, Chain, Addr, NULL, 0); break;
+      case 8: LD = DAG.getLoad(MVT::i64, Chain, Addr, NULL, 0); break;
+      }
+
+      if (TLI.getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+        // For PIC, the sequence is:
+        // BRIND(load(Jumptable + index) + RelocBase)
+        // RelocBase is the JumpTable on PPC and X86, GOT on Alpha
+        SDOperand Reloc;
+        if (TLI.usesGlobalOffsetTable())
+          Reloc = DAG.getNode(ISD::GLOBAL_OFFSET_TABLE, PTy);
+        else
+          Reloc = Table;
+        Addr = (PTy != MVT::i32) ? DAG.getNode(ISD::SIGN_EXTEND, PTy, LD) : LD;
+        Addr = DAG.getNode(ISD::ADD, PTy, Addr, Reloc);
+        Result = DAG.getNode(ISD::BRIND, MVT::Other, LD.getValue(1), Addr);
+      } else {
+        Result = DAG.getNode(ISD::BRIND, MVT::Other, LD.getValue(1), LD);
+      }
+    }
+    }
+    break;
+  case ISD::BRCOND:
+    Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
+    // Ensure that libcalls are emitted before a return.
+    Tmp1 = DAG.getNode(ISD::TokenFactor, MVT::Other, Tmp1, LastCALLSEQ_END);
+    Tmp1 = LegalizeOp(Tmp1);
+    LastCALLSEQ_END = DAG.getEntryNode();
+
+    switch (getTypeAction(Node->getOperand(1).getValueType())) {
+    case Expand: assert(0 && "It's impossible to expand bools");
+    case Legal:
+      Tmp2 = LegalizeOp(Node->getOperand(1)); // Legalize the condition.
+      break;
+    case Promote:
+      Tmp2 = PromoteOp(Node->getOperand(1));  // Promote the condition.
+      
+      // The top bits of the promoted condition are not necessarily zero, ensure
+      // that the value is properly zero extended.
+      if (!DAG.MaskedValueIsZero(Tmp2, 
+                                 MVT::getIntVTBitMask(Tmp2.getValueType())^1))
+        Tmp2 = DAG.getZeroExtendInReg(Tmp2, MVT::i1);
+      break;
+    }
+
+    // Basic block destination (Op#2) is always legal.
+    Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Node->getOperand(2));
+      
+    switch (TLI.getOperationAction(ISD::BRCOND, MVT::Other)) {  
+    default: assert(0 && "This action is not supported yet!");
+    case TargetLowering::Legal: break;
+    case TargetLowering::Custom:
+      Tmp1 = TLI.LowerOperation(Result, DAG);
+      if (Tmp1.Val) Result = Tmp1;
+      break;
+    case TargetLowering::Expand:
+      // Expand brcond's setcc into its constituent parts and create a BR_CC
+      // Node.
+      if (Tmp2.getOpcode() == ISD::SETCC) {
+        Result = DAG.getNode(ISD::BR_CC, MVT::Other, Tmp1, Tmp2.getOperand(2),
+                             Tmp2.getOperand(0), Tmp2.getOperand(1),
+                             Node->getOperand(2));
+      } else {
+        Result = DAG.getNode(ISD::BR_CC, MVT::Other, Tmp1, 
+                             DAG.getCondCode(ISD::SETNE), Tmp2,
+                             DAG.getConstant(0, Tmp2.getValueType()),
+                             Node->getOperand(2));
+      }
+      break;
+    }
+    break;
+  case ISD::BR_CC:
+    Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
+    // Ensure that libcalls are emitted before a branch.
+    Tmp1 = DAG.getNode(ISD::TokenFactor, MVT::Other, Tmp1, LastCALLSEQ_END);
+    Tmp1 = LegalizeOp(Tmp1);
+    Tmp2 = Node->getOperand(2);              // LHS 
+    Tmp3 = Node->getOperand(3);              // RHS
+    Tmp4 = Node->getOperand(1);              // CC
+
+    LegalizeSetCCOperands(Tmp2, Tmp3, Tmp4);
+    LastCALLSEQ_END = DAG.getEntryNode();
+
+    // If we didn't get both a LHS and RHS back from LegalizeSetCCOperands,
+    // the LHS is a legal SETCC itself.  In this case, we need to compare
+    // the result against zero to select between true and false values.
+    if (Tmp3.Val == 0) {
+      Tmp3 = DAG.getConstant(0, Tmp2.getValueType());
+      Tmp4 = DAG.getCondCode(ISD::SETNE);
+    }
+    
+    Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp4, Tmp2, Tmp3, 
+                                    Node->getOperand(4));
+      
+    switch (TLI.getOperationAction(ISD::BR_CC, Tmp3.getValueType())) {
+    default: assert(0 && "Unexpected action for BR_CC!");
+    case TargetLowering::Legal: break;
+    case TargetLowering::Custom:
+      Tmp4 = TLI.LowerOperation(Result, DAG);
+      if (Tmp4.Val) Result = Tmp4;
+      break;
+    }
+    break;
+  case ISD::LOAD: {
+    LoadSDNode *LD = cast<LoadSDNode>(Node);
+    Tmp1 = LegalizeOp(LD->getChain());   // Legalize the chain.
+    Tmp2 = LegalizeOp(LD->getBasePtr()); // Legalize the base pointer.
+
+    ISD::LoadExtType ExtType = LD->getExtensionType();
+    if (ExtType == ISD::NON_EXTLOAD) {
+      MVT::ValueType VT = Node->getValueType(0);
+      Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, LD->getOffset());
+      Tmp3 = Result.getValue(0);
+      Tmp4 = Result.getValue(1);
+    
+      switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
+      default: assert(0 && "This action is not supported yet!");
+      case TargetLowering::Legal: break;
+      case TargetLowering::Custom:
+        Tmp1 = TLI.LowerOperation(Tmp3, DAG);
+        if (Tmp1.Val) {
+          Tmp3 = LegalizeOp(Tmp1);
+          Tmp4 = LegalizeOp(Tmp1.getValue(1));
+        }
+        break;
+      case TargetLowering::Promote: {
+        // Only promote a load of vector type to another.
+        assert(MVT::isVector(VT) && "Cannot promote this load!");
+        // Change base type to a different vector type.
+        MVT::ValueType NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
+
+        Tmp1 = DAG.getLoad(NVT, Tmp1, Tmp2, LD->getSrcValue(),
+                           LD->getSrcValueOffset(),
+                           LD->isVolatile(), LD->getAlignment());
+        Tmp3 = LegalizeOp(DAG.getNode(ISD::BIT_CONVERT, VT, Tmp1));
+        Tmp4 = LegalizeOp(Tmp1.getValue(1));
+        break;
+      }
+      }
+      // Since loads produce two values, make sure to remember that we 
+      // legalized both of them.
+      AddLegalizedOperand(SDOperand(Node, 0), Tmp3);
+      AddLegalizedOperand(SDOperand(Node, 1), Tmp4);
+      return Op.ResNo ? Tmp4 : Tmp3;
+    } else {
+      MVT::ValueType SrcVT = LD->getLoadedVT();
+      switch (TLI.getLoadXAction(ExtType, SrcVT)) {
+      default: assert(0 && "This action is not supported yet!");
+      case TargetLowering::Promote:
+        assert(SrcVT == MVT::i1 &&
+               "Can only promote extending LOAD from i1 -> i8!");
+        Result = DAG.getExtLoad(ExtType, Node->getValueType(0), Tmp1, Tmp2,
+                                LD->getSrcValue(), LD->getSrcValueOffset(),
+                                MVT::i8, LD->isVolatile(), LD->getAlignment());
+      Tmp1 = Result.getValue(0);
+      Tmp2 = Result.getValue(1);
+      break;
+      case TargetLowering::Custom:
+        isCustom = true;
+        // FALLTHROUGH
+      case TargetLowering::Legal:
+        Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, LD->getOffset());
+        Tmp1 = Result.getValue(0);
+        Tmp2 = Result.getValue(1);
+      
+        if (isCustom) {
+          Tmp3 = TLI.LowerOperation(Result, DAG);
+          if (Tmp3.Val) {
+            Tmp1 = LegalizeOp(Tmp3);
+            Tmp2 = LegalizeOp(Tmp3.getValue(1));
+          }
+        }
+        break;
+      case TargetLowering::Expand:
+        // f64 = EXTLOAD f32 should expand to LOAD, FP_EXTEND
+        if (SrcVT == MVT::f32 && Node->getValueType(0) == MVT::f64) {
+          SDOperand Load = DAG.getLoad(SrcVT, Tmp1, Tmp2, LD->getSrcValue(),
+                                       LD->getSrcValueOffset(),
+                                       LD->isVolatile(), LD->getAlignment());
+          Result = DAG.getNode(ISD::FP_EXTEND, Node->getValueType(0), Load);
+          Tmp1 = LegalizeOp(Result);  // Relegalize new nodes.
+          Tmp2 = LegalizeOp(Load.getValue(1));
+          break;
+        }
+        assert(ExtType != ISD::EXTLOAD &&"EXTLOAD should always be supported!");
+        // Turn the unsupported load into an EXTLOAD followed by an explicit
+        // zero/sign extend inreg.
+        Result = DAG.getExtLoad(ISD::EXTLOAD, Node->getValueType(0),
+                                Tmp1, Tmp2, LD->getSrcValue(),
+                                LD->getSrcValueOffset(), SrcVT,
+                                LD->isVolatile(), LD->getAlignment());
+        SDOperand ValRes;
+        if (ExtType == ISD::SEXTLOAD)
+          ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, Result.getValueType(),
+                               Result, DAG.getValueType(SrcVT));
+        else
+          ValRes = DAG.getZeroExtendInReg(Result, SrcVT);
+        Tmp1 = LegalizeOp(ValRes);  // Relegalize new nodes.
+        Tmp2 = LegalizeOp(Result.getValue(1));  // Relegalize new nodes.
+        break;
+      }
+      // Since loads produce two values, make sure to remember that we legalized
+      // both of them.
+      AddLegalizedOperand(SDOperand(Node, 0), Tmp1);
+      AddLegalizedOperand(SDOperand(Node, 1), Tmp2);
+      return Op.ResNo ? Tmp2 : Tmp1;
+    }
+  }
+  case ISD::EXTRACT_ELEMENT: {
+    MVT::ValueType OpTy = Node->getOperand(0).getValueType();
+    switch (getTypeAction(OpTy)) {
+    default: assert(0 && "EXTRACT_ELEMENT action for type unimplemented!");
+    case Legal:
+      if (cast<ConstantSDNode>(Node->getOperand(1))->getValue()) {
+        // 1 -> Hi
+        Result = DAG.getNode(ISD::SRL, OpTy, Node->getOperand(0),
+                             DAG.getConstant(MVT::getSizeInBits(OpTy)/2, 
+                                             TLI.getShiftAmountTy()));
+        Result = DAG.getNode(ISD::TRUNCATE, Node->getValueType(0), Result);
+      } else {
+        // 0 -> Lo
+        Result = DAG.getNode(ISD::TRUNCATE, Node->getValueType(0), 
+                             Node->getOperand(0));
+      }
+      break;
+    case Expand:
+      // Get both the low and high parts.
+      ExpandOp(Node->getOperand(0), Tmp1, Tmp2);
+      if (cast<ConstantSDNode>(Node->getOperand(1))->getValue())
+        Result = Tmp2;  // 1 -> Hi
+      else
+        Result = Tmp1;  // 0 -> Lo
+      break;
+    }
+    break;
+  }
+
+  case ISD::CopyToReg:
+    Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
+
+    assert(isTypeLegal(Node->getOperand(2).getValueType()) &&
+           "Register type must be legal!");
+    // Legalize the incoming value (must be a legal type).
+    Tmp2 = LegalizeOp(Node->getOperand(2));
+    if (Node->getNumValues() == 1) {
+      Result = DAG.UpdateNodeOperands(Result, Tmp1, Node->getOperand(1), Tmp2);
+    } else {
+      assert(Node->getNumValues() == 2 && "Unknown CopyToReg");
+      if (Node->getNumOperands() == 4) {
+        Tmp3 = LegalizeOp(Node->getOperand(3));
+        Result = DAG.UpdateNodeOperands(Result, Tmp1, Node->getOperand(1), Tmp2,
+                                        Tmp3);
+      } else {
+        Result = DAG.UpdateNodeOperands(Result, Tmp1, Node->getOperand(1),Tmp2);
+      }
+      
+      // Since this produces two values, make sure to remember that we legalized
+      // both of them.
+      AddLegalizedOperand(SDOperand(Node, 0), Result.getValue(0));
+      AddLegalizedOperand(SDOperand(Node, 1), Result.getValue(1));
+      return Result;
+    }
+    break;
+
+  case ISD::RET:
+    Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
+
+    // Ensure that libcalls are emitted before a return.
+    Tmp1 = DAG.getNode(ISD::TokenFactor, MVT::Other, Tmp1, LastCALLSEQ_END);
+    Tmp1 = LegalizeOp(Tmp1);
+    LastCALLSEQ_END = DAG.getEntryNode();
+      
+    switch (Node->getNumOperands()) {
+    case 3:  // ret val
+      Tmp2 = Node->getOperand(1);
+      Tmp3 = Node->getOperand(2);  // Signness
+      switch (getTypeAction(Tmp2.getValueType())) {
+      case Legal:
+        Result = DAG.UpdateNodeOperands(Result, Tmp1, LegalizeOp(Tmp2), Tmp3);
+        break;
+      case Expand:
+        if (!MVT::isVector(Tmp2.getValueType())) {
+          SDOperand Lo, Hi;
+          ExpandOp(Tmp2, Lo, Hi);
+
+          // Big endian systems want the hi reg first.
+          if (!TLI.isLittleEndian())
+            std::swap(Lo, Hi);
+          
+          if (Hi.Val)
+            Result = DAG.getNode(ISD::RET, MVT::Other, Tmp1, Lo, Tmp3, Hi,Tmp3);
+          else
+            Result = DAG.getNode(ISD::RET, MVT::Other, Tmp1, Lo, Tmp3);
+          Result = LegalizeOp(Result);
+        } else {
+          SDNode *InVal = Tmp2.Val;
+          unsigned NumElems = MVT::getVectorNumElements(InVal->getValueType(0));
+          MVT::ValueType EVT = MVT::getVectorElementType(InVal->getValueType(0));
+          
+          // Figure out if there is a simple type corresponding to this Vector
+          // type.  If so, convert to the vector type.
+          MVT::ValueType TVT = MVT::getVectorType(EVT, NumElems);
+          if (TLI.isTypeLegal(TVT)) {
+            // Turn this into a return of the vector type.
+            Tmp2 = LegalizeOp(Tmp2);
+            Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3);
+          } else if (NumElems == 1) {
+            // Turn this into a return of the scalar type.
+            Tmp2 = ScalarizeVectorOp(Tmp2);
+            Tmp2 = LegalizeOp(Tmp2);
+            Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3);
+            
+            // FIXME: Returns of gcc generic vectors smaller than a legal type
+            // should be returned in integer registers!
+            
+            // The scalarized value type may not be legal, e.g. it might require
+            // promotion or expansion.  Relegalize the return.
+            Result = LegalizeOp(Result);
+          } else {
+            // FIXME: Returns of gcc generic vectors larger than a legal vector
+            // type should be returned by reference!
+            SDOperand Lo, Hi;
+            SplitVectorOp(Tmp2, Lo, Hi);
+            Result = DAG.getNode(ISD::RET, MVT::Other, Tmp1, Lo, Tmp3, Hi,Tmp3);
+            Result = LegalizeOp(Result);
+          }
+        }
+        break;
+      case Promote:
+        Tmp2 = PromoteOp(Node->getOperand(1));
+        Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3);
+        Result = LegalizeOp(Result);
+        break;
+      }
+      break;
+    case 1:  // ret void
+      Result = DAG.UpdateNodeOperands(Result, Tmp1);
+      break;
+    default: { // ret <values>
+      SmallVector<SDOperand, 8> NewValues;
+      NewValues.push_back(Tmp1);
+      for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2)
+        switch (getTypeAction(Node->getOperand(i).getValueType())) {
+        case Legal:
+          NewValues.push_back(LegalizeOp(Node->getOperand(i)));
+          NewValues.push_back(Node->getOperand(i+1));
+          break;
+        case Expand: {
+          SDOperand Lo, Hi;
+          assert(!MVT::isExtendedVT(Node->getOperand(i).getValueType()) &&
+                 "FIXME: TODO: implement returning non-legal vector types!");
+          ExpandOp(Node->getOperand(i), Lo, Hi);
+          NewValues.push_back(Lo);
+          NewValues.push_back(Node->getOperand(i+1));
+          if (Hi.Val) {
+            NewValues.push_back(Hi);
+            NewValues.push_back(Node->getOperand(i+1));
+          }
+          break;
+        }
+        case Promote:
+          assert(0 && "Can't promote multiple return value yet!");
+        }
+          
+      if (NewValues.size() == Node->getNumOperands())
+        Result = DAG.UpdateNodeOperands(Result, &NewValues[0],NewValues.size());
+      else
+        Result = DAG.getNode(ISD::RET, MVT::Other,
+                             &NewValues[0], NewValues.size());
+      break;
+    }
+    }
+
+    if (Result.getOpcode() == ISD::RET) {
+      switch (TLI.getOperationAction(Result.getOpcode(), MVT::Other)) {
+      default: assert(0 && "This action is not supported yet!");
+      case TargetLowering::Legal: break;
+      case TargetLowering::Custom:
+        Tmp1 = TLI.LowerOperation(Result, DAG);
+        if (Tmp1.Val) Result = Tmp1;
+        break;
+      }
+    }
+    break;
+  case ISD::STORE: {
+    StoreSDNode *ST = cast<StoreSDNode>(Node);
+    Tmp1 = LegalizeOp(ST->getChain());    // Legalize the chain.
+    Tmp2 = LegalizeOp(ST->getBasePtr());  // Legalize the pointer.
+    int SVOffset = ST->getSrcValueOffset();
+    unsigned Alignment = ST->getAlignment();
+    bool isVolatile = ST->isVolatile();
+
+    if (!ST->isTruncatingStore()) {
+      // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
+      // FIXME: We shouldn't do this for TargetConstantFP's.
+      // FIXME: move this to the DAG Combiner!  Note that we can't regress due
+      // to phase ordering between legalized code and the dag combiner.  This
+      // probably means that we need to integrate dag combiner and legalizer
+      // together.
+      if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) {
+        if (CFP->getValueType(0) == MVT::f32) {
+          Tmp3 = DAG.getConstant(FloatToBits(CFP->getValue()), MVT::i32);
+        } else {
+          assert(CFP->getValueType(0) == MVT::f64 && "Unknown FP type!");
+          Tmp3 = DAG.getConstant(DoubleToBits(CFP->getValue()), MVT::i64);
+        }
+        Result = DAG.getStore(Tmp1, Tmp3, Tmp2, ST->getSrcValue(),
+                              SVOffset, isVolatile, Alignment);
+        break;
+      }
+      
+      switch (getTypeAction(ST->getStoredVT())) {
+      case Legal: {
+        Tmp3 = LegalizeOp(ST->getValue());
+        Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp3, Tmp2, 
+                                        ST->getOffset());
+
+        MVT::ValueType VT = Tmp3.getValueType();
+        switch (TLI.getOperationAction(ISD::STORE, VT)) {
+        default: assert(0 && "This action is not supported yet!");
+        case TargetLowering::Legal:  break;
+        case TargetLowering::Custom:
+          Tmp1 = TLI.LowerOperation(Result, DAG);
+          if (Tmp1.Val) Result = Tmp1;
+          break;
+        case TargetLowering::Promote:
+          assert(MVT::isVector(VT) && "Unknown legal promote case!");
+          Tmp3 = DAG.getNode(ISD::BIT_CONVERT, 
+                             TLI.getTypeToPromoteTo(ISD::STORE, VT), Tmp3);
+          Result = DAG.getStore(Tmp1, Tmp3, Tmp2,
+                                ST->getSrcValue(), SVOffset, isVolatile,
+                                Alignment);
+          break;
+        }
+        break;
+      }
+      case Promote:
+        // Truncate the value and store the result.
+        Tmp3 = PromoteOp(ST->getValue());
+        Result = DAG.getTruncStore(Tmp1, Tmp3, Tmp2, ST->getSrcValue(),
+                                   SVOffset, ST->getStoredVT(),
+                                   isVolatile, Alignment);
+        break;
+
+      case Expand:
+        unsigned IncrementSize = 0;
+        SDOperand Lo, Hi;
+      
+        // If this is a vector type, then we have to calculate the increment as
+        // the product of the element size in bytes, and the number of elements
+        // in the high half of the vector.
+        if (MVT::isVector(ST->getValue().getValueType())) {
+          SDNode *InVal = ST->getValue().Val;
+          unsigned NumElems = MVT::getVectorNumElements(InVal->getValueType(0));
+          MVT::ValueType EVT = MVT::getVectorElementType(InVal->getValueType(0));
+
+          // Figure out if there is a simple type corresponding to this Vector
+          // type.  If so, convert to the vector type.
+          MVT::ValueType TVT = MVT::getVectorType(EVT, NumElems);
+          if (TLI.isTypeLegal(TVT)) {
+            // Turn this into a normal store of the vector type.
+            Tmp3 = LegalizeOp(Node->getOperand(1));
+            Result = DAG.getStore(Tmp1, Tmp3, Tmp2, ST->getSrcValue(),
+                                  SVOffset, isVolatile, Alignment);
+            Result = LegalizeOp(Result);
+            break;
+          } else if (NumElems == 1) {
+            // Turn this into a normal store of the scalar type.
+            Tmp3 = ScalarizeVectorOp(Node->getOperand(1));
+            Result = DAG.getStore(Tmp1, Tmp3, Tmp2, ST->getSrcValue(),
+                                  SVOffset, isVolatile, Alignment);
+            // The scalarized value type may not be legal, e.g. it might require
+            // promotion or expansion.  Relegalize the scalar store.
+            Result = LegalizeOp(Result);
+            break;
+          } else {
+            SplitVectorOp(Node->getOperand(1), Lo, Hi);
+            IncrementSize = NumElems/2 * MVT::getSizeInBits(EVT)/8;
+          }
+        } else {
+          ExpandOp(Node->getOperand(1), Lo, Hi);
+          IncrementSize = Hi.Val ? MVT::getSizeInBits(Hi.getValueType())/8 : 0;
+
+          if (!TLI.isLittleEndian())
+            std::swap(Lo, Hi);
+        }
+
+        Lo = DAG.getStore(Tmp1, Lo, Tmp2, ST->getSrcValue(),
+                          SVOffset, isVolatile, Alignment);
+
+        if (Hi.Val == NULL) {
+          // Must be int <-> float one-to-one expansion.
+          Result = Lo;
+          break;
+        }
+
+        Tmp2 = DAG.getNode(ISD::ADD, Tmp2.getValueType(), Tmp2,
+                           getIntPtrConstant(IncrementSize));
+        assert(isTypeLegal(Tmp2.getValueType()) &&
+               "Pointers must be legal!");
+        SVOffset += IncrementSize;
+        if (Alignment > IncrementSize)
+          Alignment = IncrementSize;
+        Hi = DAG.getStore(Tmp1, Hi, Tmp2, ST->getSrcValue(),
+                          SVOffset, isVolatile, Alignment);
+        Result = DAG.getNode(ISD::TokenFactor, MVT::Other, Lo, Hi);
+        break;
+      }
+    } else {
+      // Truncating store
+      assert(isTypeLegal(ST->getValue().getValueType()) &&
+             "Cannot handle illegal TRUNCSTORE yet!");
+      Tmp3 = LegalizeOp(ST->getValue());
+    
+      // The only promote case we handle is TRUNCSTORE:i1 X into
+      //   -> TRUNCSTORE:i8 (and X, 1)
+      if (ST->getStoredVT() == MVT::i1 &&
+          TLI.getStoreXAction(MVT::i1) == TargetLowering::Promote) {
+        // Promote the bool to a mask then store.
+        Tmp3 = DAG.getNode(ISD::AND, Tmp3.getValueType(), Tmp3,
+                           DAG.getConstant(1, Tmp3.getValueType()));
+        Result = DAG.getTruncStore(Tmp1, Tmp3, Tmp2, ST->getSrcValue(),
+                                   SVOffset, MVT::i8,
+                                   isVolatile, Alignment);
+      } else if (Tmp1 != ST->getChain() || Tmp3 != ST->getValue() ||
+                 Tmp2 != ST->getBasePtr()) {
+        Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp3, Tmp2,
+                                        ST->getOffset());
+      }
+
+      MVT::ValueType StVT = cast<StoreSDNode>(Result.Val)->getStoredVT();
+      switch (TLI.getStoreXAction(StVT)) {
+      default: assert(0 && "This action is not supported yet!");
+      case TargetLowering::Legal: break;
+      case TargetLowering::Custom:
+        Tmp1 = TLI.LowerOperation(Result, DAG);
+        if (Tmp1.Val) Result = Tmp1;
+        break;
+      }
+    }
+    break;
+  }
+  case ISD::PCMARKER:
+    Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
+    Result = DAG.UpdateNodeOperands(Result, Tmp1, Node->getOperand(1));
+    break;
+  case ISD::STACKSAVE:
+    Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
+    Result = DAG.UpdateNodeOperands(Result, Tmp1);
+    Tmp1 = Result.getValue(0);
+    Tmp2 = Result.getValue(1);
+    
+    switch (TLI.getOperationAction(ISD::STACKSAVE, MVT::Other)) {
+    default: assert(0 && "This action is not supported yet!");
+    case TargetLowering::Legal: break;
+    case TargetLowering::Custom:
+      Tmp3 = TLI.LowerOperation(Result, DAG);
+      if (Tmp3.Val) {
+        Tmp1 = LegalizeOp(Tmp3);
+        Tmp2 = LegalizeOp(Tmp3.getValue(1));
+      }
+      break;
+    case TargetLowering::Expand:
+      // Expand to CopyFromReg if the target set 
+      // StackPointerRegisterToSaveRestore.
+      if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) {
+        Tmp1 = DAG.getCopyFromReg(Result.getOperand(0), SP,
+                                  Node->getValueType(0));
+        Tmp2 = Tmp1.getValue(1);
+      } else {
+        Tmp1 = DAG.getNode(ISD::UNDEF, Node->getValueType(0));
+        Tmp2 = Node->getOperand(0);
+      }
+      break;
+    }
+
+    // Since stacksave produce two values, make sure to remember that we
+    // legalized both of them.
+    AddLegalizedOperand(SDOperand(Node, 0), Tmp1);
+    AddLegalizedOperand(SDOperand(Node, 1), Tmp2);
+    return Op.ResNo ? Tmp2 : Tmp1;
+
+  case ISD::STACKRESTORE:
+    Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
+    Tmp2 = LegalizeOp(Node->getOperand(1));  // Legalize the pointer.
+    Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2);
+      
+    switch (TLI.getOperationAction(ISD::STACKRESTORE, MVT::Other)) {
+    default: assert(0 && "This action is not supported yet!");
+    case TargetLowering::Legal: break;
+    case TargetLowering::Custom:
+      Tmp1 = TLI.LowerOperation(Result, DAG);
+      if (Tmp1.Val) Result = Tmp1;
+      break;
+    case TargetLowering::Expand:
+      // Expand to CopyToReg if the target set 
+      // StackPointerRegisterToSaveRestore.
+      if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) {
+        Result = DAG.getCopyToReg(Tmp1, SP, Tmp2);
+      } else {
+        Result = Tmp1;
+      }
+      break;
+    }
+    break;
+
+  case ISD::READCYCLECOUNTER:
+    Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain
+    Result = DAG.UpdateNodeOperands(Result, Tmp1);
+    switch (TLI.getOperationAction(ISD::READCYCLECOUNTER,
+                                   Node->getValueType(0))) {
+    default: assert(0 && "This action is not supported yet!");
+    case TargetLowering::Legal:
+      Tmp1 = Result.getValue(0);
+      Tmp2 = Result.getValue(1);
+      break;
+    case TargetLowering::Custom:
+      Result = TLI.LowerOperation(Result, DAG);
+      Tmp1 = LegalizeOp(Result.getValue(0));
+      Tmp2 = LegalizeOp(Result.getValue(1));
+      break;
+    }
+
+    // Since rdcc produce two values, make sure to remember that we legalized
+    // both of them.
+    AddLegalizedOperand(SDOperand(Node, 0), Tmp1);
+    AddLegalizedOperand(SDOperand(Node, 1), Tmp2);
+    return Result;
+
+  case ISD::SELECT:
+    switch (getTypeAction(Node->getOperand(0).getValueType())) {
+    case Expand: assert(0 && "It's impossible to expand bools");
+    case Legal:
+      Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the condition.
+      break;
+    case Promote:
+      Tmp1 = PromoteOp(Node->getOperand(0));  // Promote the condition.
+      // Make sure the condition is either zero or one.
+      if (!DAG.MaskedValueIsZero(Tmp1,
+                                 MVT::getIntVTBitMask(Tmp1.getValueType())^1))
+        Tmp1 = DAG.getZeroExtendInReg(Tmp1, MVT::i1);
+      break;
+    }
+    Tmp2 = LegalizeOp(Node->getOperand(1));   // TrueVal
+    Tmp3 = LegalizeOp(Node->getOperand(2));   // FalseVal
+
+    Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3);
+      
+    switch (TLI.getOperationAction(ISD::SELECT, Tmp2.getValueType())) {
+    default: assert(0 && "This action is not supported yet!");
+    case TargetLowering::Legal: break;
+    case TargetLowering::Custom: {
+      Tmp1 = TLI.LowerOperation(Result, DAG);
+      if (Tmp1.Val) Result = Tmp1;
+      break;
+    }
+    case TargetLowering::Expand:
+      if (Tmp1.getOpcode() == ISD::SETCC) {
+        Result = DAG.getSelectCC(Tmp1.getOperand(0), Tmp1.getOperand(1), 
+                              Tmp2, Tmp3,
+                              cast<CondCodeSDNode>(Tmp1.getOperand(2))->get());
+      } else {
+        Result = DAG.getSelectCC(Tmp1, 
+                                 DAG.getConstant(0, Tmp1.getValueType()),
+                                 Tmp2, Tmp3, ISD::SETNE);
+      }
+      break;
+    case TargetLowering::Promote: {
+      MVT::ValueType NVT =
+        TLI.getTypeToPromoteTo(ISD::SELECT, Tmp2.getValueType());
+      unsigned ExtOp, TruncOp;
+      if (MVT::isVector(Tmp2.getValueType())) {
+        ExtOp   = ISD::BIT_CONVERT;
+        TruncOp = ISD::BIT_CONVERT;
+      } else if (MVT::isInteger(Tmp2.getValueType())) {
+        ExtOp   = ISD::ANY_EXTEND;
+        TruncOp = ISD::TRUNCATE;
+      } else {
+        ExtOp   = ISD::FP_EXTEND;
+        TruncOp = ISD::FP_ROUND;
+      }
+      // Promote each of the values to the new type.
+      Tmp2 = DAG.getNode(ExtOp, NVT, Tmp2);
+      Tmp3 = DAG.getNode(ExtOp, NVT, Tmp3);
+      // Perform the larger operation, then round down.
+      Result = DAG.getNode(ISD::SELECT, NVT, Tmp1, Tmp2,Tmp3);
+      Result = DAG.getNode(TruncOp, Node->getValueType(0), Result);
+      break;
+    }
+    }
+    break;
+  case ISD::SELECT_CC: {
+    Tmp1 = Node->getOperand(0);               // LHS
+    Tmp2 = Node->getOperand(1);               // RHS
+    Tmp3 = LegalizeOp(Node->getOperand(2));   // True
+    Tmp4 = LegalizeOp(Node->getOperand(3));   // False
+    SDOperand CC = Node->getOperand(4);
+    
+    LegalizeSetCCOperands(Tmp1, Tmp2, CC);
+    
+    // If we didn't get both a LHS and RHS back from LegalizeSetCCOperands,
+    // the LHS is a legal SETCC itself.  In this case, we need to compare
+    // the result against zero to select between true and false values.
+    if (Tmp2.Val == 0) {
+      Tmp2 = DAG.getConstant(0, Tmp1.getValueType());
+      CC = DAG.getCondCode(ISD::SETNE);
+    }
+    Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3, Tmp4, CC);
+
+    // Everything is legal, see if we should expand this op or something.
+    switch (TLI.getOperationAction(ISD::SELECT_CC, Tmp3.getValueType())) {
+    default: assert(0 && "This action is not supported yet!");
+    case TargetLowering::Legal: break;
+    case TargetLowering::Custom:
+      Tmp1 = TLI.LowerOperation(Result, DAG);
+      if (Tmp1.Val) Result = Tmp1;
+      break;
+    }
+    break;
+  }
+  case ISD::SETCC:
+    Tmp1 = Node->getOperand(0);
+    Tmp2 = Node->getOperand(1);
+    Tmp3 = Node->getOperand(2);
+    LegalizeSetCCOperands(Tmp1, Tmp2, Tmp3);
+    
+    // If we had to Expand the SetCC operands into a SELECT node, then it may 
+    // not always be possible to return a true LHS & RHS.  In this case, just 
+    // return the value we legalized, returned in the LHS
+    if (Tmp2.Val == 0) {
+      Result = Tmp1;
+      break;
+    }
+
+    switch (TLI.getOperationAction(ISD::SETCC, Tmp1.getValueType())) {
+    default: assert(0 && "Cannot handle this action for SETCC yet!");
+    case TargetLowering::Custom:
+      isCustom = true;
+      // FALLTHROUGH.
+    case TargetLowering::Legal:
+      Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3);
+      if (isCustom) {
+        Tmp4 = TLI.LowerOperation(Result, DAG);
+        if (Tmp4.Val) Result = Tmp4;
+      }
+      break;
+    case TargetLowering::Promote: {
+      // First step, figure out the appropriate operation to use.
+      // Allow SETCC to not be supported for all legal data types
+      // Mostly this targets FP
+      MVT::ValueType NewInTy = Node->getOperand(0).getValueType();
+      MVT::ValueType OldVT = NewInTy; OldVT = OldVT;
+
+      // Scan for the appropriate larger type to use.
+      while (1) {
+        NewInTy = (MVT::ValueType)(NewInTy+1);
+
+        assert(MVT::isInteger(NewInTy) == MVT::isInteger(OldVT) &&
+               "Fell off of the edge of the integer world");
+        assert(MVT::isFloatingPoint(NewInTy) == MVT::isFloatingPoint(OldVT) &&
+               "Fell off of the edge of the floating point world");
+          
+        // If the target supports SETCC of this type, use it.
+        if (TLI.isOperationLegal(ISD::SETCC, NewInTy))
+          break;
+      }
+      if (MVT::isInteger(NewInTy))
+        assert(0 && "Cannot promote Legal Integer SETCC yet");
+      else {
+        Tmp1 = DAG.getNode(ISD::FP_EXTEND, NewInTy, Tmp1);
+        Tmp2 = DAG.getNode(ISD::FP_EXTEND, NewInTy, Tmp2);
+      }
+      Tmp1 = LegalizeOp(Tmp1);
+      Tmp2 = LegalizeOp(Tmp2);
+      Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3);
+      Result = LegalizeOp(Result);
+      break;
+    }
+    case TargetLowering::Expand:
+      // Expand a setcc node into a select_cc of the same condition, lhs, and
+      // rhs that selects between const 1 (true) and const 0 (false).
+      MVT::ValueType VT = Node->getValueType(0);
+      Result = DAG.getNode(ISD::SELECT_CC, VT, Tmp1, Tmp2, 
+                           DAG.getConstant(1, VT), DAG.getConstant(0, VT),
+                           Tmp3);
+      break;
+    }
+    break;
+  case ISD::MEMSET:
+  case ISD::MEMCPY:
+  case ISD::MEMMOVE: {
+    Tmp1 = LegalizeOp(Node->getOperand(0));      // Chain
+    Tmp2 = LegalizeOp(Node->getOperand(1));      // Pointer
+
+    if (Node->getOpcode() == ISD::MEMSET) {      // memset = ubyte
+      switch (getTypeAction(Node->getOperand(2).getValueType())) {
+      case Expand: assert(0 && "Cannot expand a byte!");
+      case Legal:
+        Tmp3 = LegalizeOp(Node->getOperand(2));
+        break;
+      case Promote:
+        Tmp3 = PromoteOp(Node->getOperand(2));
+        break;
+      }
+    } else {
+      Tmp3 = LegalizeOp(Node->getOperand(2));    // memcpy/move = pointer,
+    }
+
+    SDOperand Tmp4;
+    switch (getTypeAction(Node->getOperand(3).getValueType())) {
+    case Expand: {
+      // Length is too big, just take the lo-part of the length.
+      SDOperand HiPart;
+      ExpandOp(Node->getOperand(3), Tmp4, HiPart);
+      break;
+    }
+    case Legal:
+      Tmp4 = LegalizeOp(Node->getOperand(3));
+      break;
+    case Promote:
+      Tmp4 = PromoteOp(Node->getOperand(3));
+      break;
+    }
+
+    SDOperand Tmp5;
+    switch (getTypeAction(Node->getOperand(4).getValueType())) {  // uint
+    case Expand: assert(0 && "Cannot expand this yet!");
+    case Legal:
+      Tmp5 = LegalizeOp(Node->getOperand(4));
+      break;
+    case Promote:
+      Tmp5 = PromoteOp(Node->getOperand(4));
+      break;
+    }
+
+    switch (TLI.getOperationAction(Node->getOpcode(), MVT::Other)) {
+    default: assert(0 && "This action not implemented for this operation!");
+    case TargetLowering::Custom:
+      isCustom = true;
+      // FALLTHROUGH
+    case TargetLowering::Legal:
+      Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3, Tmp4, Tmp5);
+      if (isCustom) {
+        Tmp1 = TLI.LowerOperation(Result, DAG);
+        if (Tmp1.Val) Result = Tmp1;
+      }
+      break;
+    case TargetLowering::Expand: {
+      // Otherwise, the target does not support this operation.  Lower the
+      // operation to an explicit libcall as appropriate.
+      MVT::ValueType IntPtr = TLI.getPointerTy();
+      const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType();
+      TargetLowering::ArgListTy Args;
+      TargetLowering::ArgListEntry Entry;
+
+      const char *FnName = 0;
+      if (Node->getOpcode() == ISD::MEMSET) {
+        Entry.Node = Tmp2; Entry.Ty = IntPtrTy;
+        Args.push_back(Entry);
+        // Extend the (previously legalized) ubyte argument to be an int value
+        // for the call.
+        if (Tmp3.getValueType() > MVT::i32)
+          Tmp3 = DAG.getNode(ISD::TRUNCATE, MVT::i32, Tmp3);
+        else
+          Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Tmp3);
+        Entry.Node = Tmp3; Entry.Ty = Type::Int32Ty; Entry.isSExt = true;
+        Args.push_back(Entry);
+        Entry.Node = Tmp4; Entry.Ty = IntPtrTy; Entry.isSExt = false;
+        Args.push_back(Entry);
+
+        FnName = "memset";
+      } else if (Node->getOpcode() == ISD::MEMCPY ||
+                 Node->getOpcode() == ISD::MEMMOVE) {
+        Entry.Ty = IntPtrTy;
+        Entry.Node = Tmp2; Args.push_back(Entry);
+        Entry.Node = Tmp3; Args.push_back(Entry);
+        Entry.Node = Tmp4; Args.push_back(Entry);
+        FnName = Node->getOpcode() == ISD::MEMMOVE ? "memmove" : "memcpy";
+      } else {
+        assert(0 && "Unknown op!");
+      }
+
+      std::pair<SDOperand,SDOperand> CallResult =
+        TLI.LowerCallTo(Tmp1, Type::VoidTy, false, false, CallingConv::C, false,
+                        DAG.getExternalSymbol(FnName, IntPtr), Args, DAG);
+      Result = CallResult.second;
+      break;
+    }
+    }
+    break;
+  }
+
+  case ISD::SHL_PARTS:
+  case ISD::SRA_PARTS:
+  case ISD::SRL_PARTS: {
+    SmallVector<SDOperand, 8> Ops;
+    bool Changed = false;
+    for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
+      Ops.push_back(LegalizeOp(Node->getOperand(i)));
+      Changed |= Ops.back() != Node->getOperand(i);
+    }
+    if (Changed)
+      Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size());
+
+    switch (TLI.getOperationAction(Node->getOpcode(),
+                                   Node->getValueType(0))) {
+    default: assert(0 && "This action is not supported yet!");
+    case TargetLowering::Legal: break;
+    case TargetLowering::Custom:
+      Tmp1 = TLI.LowerOperation(Result, DAG);
+      if (Tmp1.Val) {
+        SDOperand Tmp2, RetVal(0, 0);
+        for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) {
+          Tmp2 = LegalizeOp(Tmp1.getValue(i));
+          AddLegalizedOperand(SDOperand(Node, i), Tmp2);
+          if (i == Op.ResNo)
+            RetVal = Tmp2;
+        }
+        assert(RetVal.Val && "Illegal result number");
+        return RetVal;
+      }
+      break;
+    }
+
+    // Since these produce multiple values, make sure to remember that we
+    // legalized all of them.
+    for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+      AddLegalizedOperand(SDOperand(Node, i), Result.getValue(i));
+    return Result.getValue(Op.ResNo);
+  }
+
+    // Binary operators
+  case ISD::ADD:
+  case ISD::SUB:
+  case ISD::MUL:
+  case ISD::MULHS:
+  case ISD::MULHU:
+  case ISD::UDIV:
+  case ISD::SDIV:
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR:
+  case ISD::SHL:
+  case ISD::SRL:
+  case ISD::SRA:
+  case ISD::FADD:
+  case ISD::FSUB:
+  case ISD::FMUL:
+  case ISD::FDIV:
+    Tmp1 = LegalizeOp(Node->getOperand(0));   // LHS
+    switch (getTypeAction(Node->getOperand(1).getValueType())) {
+    case Expand: assert(0 && "Not possible");
+    case Legal:
+      Tmp2 = LegalizeOp(Node->getOperand(1)); // Legalize the RHS.
+      break;
+    case Promote:
+      Tmp2 = PromoteOp(Node->getOperand(1));  // Promote the RHS.
+      break;
+    }
+    
+    Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2);
+      
+    switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))) {
+    default: assert(0 && "BinOp legalize operation not supported");
+    case TargetLowering::Legal: break;
+    case TargetLowering::Custom:
+      Tmp1 = TLI.LowerOperation(Result, DAG);
+      if (Tmp1.Val) Result = Tmp1;
+      break;
+    case TargetLowering::Expand: {
+      if (Node->getValueType(0) == MVT::i32) {
+        switch (Node->getOpcode()) {
+        default:  assert(0 && "Do not know how to expand this integer BinOp!");
+        case ISD::UDIV:
+        case ISD::SDIV:
+          RTLIB::Libcall LC = Node->getOpcode() == ISD::UDIV
+            ? RTLIB::UDIV_I32 : RTLIB::SDIV_I32;
+          SDOperand Dummy;
+          bool isSigned = Node->getOpcode() == ISD::SDIV;
+          Result = ExpandLibCall(TLI.getLibcallName(LC), Node, isSigned, Dummy);
+        };
+        break;
+      }
+
+      assert(MVT::isVector(Node->getValueType(0)) &&
+             "Cannot expand this binary operator!");
+      // Expand the operation into a bunch of nasty scalar code.
+      SmallVector<SDOperand, 8> Ops;
+      MVT::ValueType EltVT = MVT::getVectorElementType(Node->getValueType(0));
+      MVT::ValueType PtrVT = TLI.getPointerTy();
+      for (unsigned i = 0, e = MVT::getVectorNumElements(Node->getValueType(0));
+           i != e; ++i) {
+        SDOperand Idx = DAG.getConstant(i, PtrVT);
+        SDOperand LHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT, Tmp1, Idx);
+        SDOperand RHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT, Tmp2, Idx);
+        Ops.push_back(DAG.getNode(Node->getOpcode(), EltVT, LHS, RHS));
+      }
+      Result = DAG.getNode(ISD::BUILD_VECTOR, Node->getValueType(0), 
+                           &Ops[0], Ops.size());
+      break;
+    }
+    case TargetLowering::Promote: {
+      switch (Node->getOpcode()) {
+      default:  assert(0 && "Do not know how to promote this BinOp!");
+      case ISD::AND:
+      case ISD::OR:
+      case ISD::XOR: {
+        MVT::ValueType OVT = Node->getValueType(0);
+        MVT::ValueType NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT);
+        assert(MVT::isVector(OVT) && "Cannot promote this BinOp!");
+        // Bit convert each of the values to the new type.
+        Tmp1 = DAG.getNode(ISD::BIT_CONVERT, NVT, Tmp1);
+        Tmp2 = DAG.getNode(ISD::BIT_CONVERT, NVT, Tmp2);
+        Result = DAG.getNode(Node->getOpcode(), NVT, Tmp1, Tmp2);
+        // Bit convert the result back the original type.
+        Result = DAG.getNode(ISD::BIT_CONVERT, OVT, Result);
+        break;
+      }
+      }
+    }
+    }
+    break;
+    
+  case ISD::FCOPYSIGN:  // FCOPYSIGN does not require LHS/RHS to match type!
+    Tmp1 = LegalizeOp(Node->getOperand(0));   // LHS
+    switch (getTypeAction(Node->getOperand(1).getValueType())) {
+      case Expand: assert(0 && "Not possible");
+      case Legal:
+        Tmp2 = LegalizeOp(Node->getOperand(1)); // Legalize the RHS.
+        break;
+      case Promote:
+        Tmp2 = PromoteOp(Node->getOperand(1));  // Promote the RHS.
+        break;
+    }
+      
+    Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2);
+    
+    switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))) {
+    default: assert(0 && "Operation not supported");
+    case TargetLowering::Custom:
+      Tmp1 = TLI.LowerOperation(Result, DAG);
+      if (Tmp1.Val) Result = Tmp1;
+      break;
+    case TargetLowering::Legal: break;
+    case TargetLowering::Expand: {
+      // If this target supports fabs/fneg natively and select is cheap,
+      // do this efficiently.
+      if (!TLI.isSelectExpensive() &&
+          TLI.getOperationAction(ISD::FABS, Tmp1.getValueType()) ==
+          TargetLowering::Legal &&
+          TLI.getOperationAction(ISD::FNEG, Tmp1.getValueType()) ==
+          TargetLowering::Legal) {
+        // Get the sign bit of the RHS.
+        MVT::ValueType IVT = 
+          Tmp2.getValueType() == MVT::f32 ? MVT::i32 : MVT::i64;
+        SDOperand SignBit = DAG.getNode(ISD::BIT_CONVERT, IVT, Tmp2);
+        SignBit = DAG.getSetCC(TLI.getSetCCResultTy(),
+                               SignBit, DAG.getConstant(0, IVT), ISD::SETLT);
+        // Get the absolute value of the result.
+        SDOperand AbsVal = DAG.getNode(ISD::FABS, Tmp1.getValueType(), Tmp1);
+        // Select between the nabs and abs value based on the sign bit of
+        // the input.
+        Result = DAG.getNode(ISD::SELECT, AbsVal.getValueType(), SignBit,
+                             DAG.getNode(ISD::FNEG, AbsVal.getValueType(), 
+                                         AbsVal),
+                             AbsVal);
+        Result = LegalizeOp(Result);
+        break;
+      }
+      
+      // Otherwise, do bitwise ops!
+      MVT::ValueType NVT = 
+        Node->getValueType(0) == MVT::f32 ? MVT::i32 : MVT::i64;
+      Result = ExpandFCOPYSIGNToBitwiseOps(Node, NVT, DAG, TLI);
+      Result = DAG.getNode(ISD::BIT_CONVERT, Node->getValueType(0), Result);
+      Result = LegalizeOp(Result);
+      break;
+    }
+    }
+    break;
+    
+  case ISD::ADDC:
+  case ISD::SUBC:
+    Tmp1 = LegalizeOp(Node->getOperand(0));
+    Tmp2 = LegalizeOp(Node->getOperand(1));
+    Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2);
+    // Since this produces two values, make sure to remember that we legalized
+    // both of them.
+    AddLegalizedOperand(SDOperand(Node, 0), Result.getValue(0));
+    AddLegalizedOperand(SDOperand(Node, 1), Result.getValue(1));
+    return Result;
+
+  case ISD::ADDE:
+  case ISD::SUBE:
+    Tmp1 = LegalizeOp(Node->getOperand(0));
+    Tmp2 = LegalizeOp(Node->getOperand(1));
+    Tmp3 = LegalizeOp(Node->getOperand(2));
+    Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3);
+    // Since this produces two values, make sure to remember that we legalized
+    // both of them.
+    AddLegalizedOperand(SDOperand(Node, 0), Result.getValue(0));
+    AddLegalizedOperand(SDOperand(Node, 1), Result.getValue(1));
+    return Result;
+    
+  case ISD::BUILD_PAIR: {
+    MVT::ValueType PairTy = Node->getValueType(0);
+    // TODO: handle the case where the Lo and Hi operands are not of legal type
+    Tmp1 = LegalizeOp(Node->getOperand(0));   // Lo
+    Tmp2 = LegalizeOp(Node->getOperand(1));   // Hi
+    switch (TLI.getOperationAction(ISD::BUILD_PAIR, PairTy)) {
+    case TargetLowering::Promote:
+    case TargetLowering::Custom:
+      assert(0 && "Cannot promote/custom this yet!");
+    case TargetLowering::Legal:
+      if (Tmp1 != Node->getOperand(0) || Tmp2 != Node->getOperand(1))
+        Result = DAG.getNode(ISD::BUILD_PAIR, PairTy, Tmp1, Tmp2);
+      break;
+    case TargetLowering::Expand:
+      Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, PairTy, Tmp1);
+      Tmp2 = DAG.getNode(ISD::ANY_EXTEND, PairTy, Tmp2);
+      Tmp2 = DAG.getNode(ISD::SHL, PairTy, Tmp2,
+                         DAG.getConstant(MVT::getSizeInBits(PairTy)/2, 
+                                         TLI.getShiftAmountTy()));
+      Result = DAG.getNode(ISD::OR, PairTy, Tmp1, Tmp2);
+      break;
+    }
+    break;
+  }
+
+  case ISD::UREM:
+  case ISD::SREM:
+  case ISD::FREM:
+    Tmp1 = LegalizeOp(Node->getOperand(0));   // LHS
+    Tmp2 = LegalizeOp(Node->getOperand(1));   // RHS
+
+    switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))) {
+    case TargetLowering::Promote: assert(0 && "Cannot promote this yet!");
+    case TargetLowering::Custom:
+      isCustom = true;
+      // FALLTHROUGH
+    case TargetLowering::Legal:
+      Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2);
+      if (isCustom) {
+        Tmp1 = TLI.LowerOperation(Result, DAG);
+        if (Tmp1.Val) Result = Tmp1;
+      }
+      break;
+    case TargetLowering::Expand:
+      unsigned DivOpc= (Node->getOpcode() == ISD::UREM) ? ISD::UDIV : ISD::SDIV;
+      bool isSigned = DivOpc == ISD::SDIV;
+      if (MVT::isInteger(Node->getValueType(0))) {
+        if (TLI.getOperationAction(DivOpc, Node->getValueType(0)) ==
+            TargetLowering::Legal) {
+          // X % Y -> X-X/Y*Y
+          MVT::ValueType VT = Node->getValueType(0);
+          Result = DAG.getNode(DivOpc, VT, Tmp1, Tmp2);
+          Result = DAG.getNode(ISD::MUL, VT, Result, Tmp2);
+          Result = DAG.getNode(ISD::SUB, VT, Tmp1, Result);
+        } else {
+          assert(Node->getValueType(0) == MVT::i32 &&
+                 "Cannot expand this binary operator!");
+          RTLIB::Libcall LC = Node->getOpcode() == ISD::UREM
+            ? RTLIB::UREM_I32 : RTLIB::SREM_I32;
+          SDOperand Dummy;
+          Result = ExpandLibCall(TLI.getLibcallName(LC), Node, isSigned, Dummy);
+        }
+      } else {
+        // Floating point mod -> fmod libcall.
+        RTLIB::Libcall LC = Node->getValueType(0) == MVT::f32
+          ? RTLIB::REM_F32 : RTLIB::REM_F64;
+        SDOperand Dummy;
+        Result = ExpandLibCall(TLI.getLibcallName(LC), Node,
+                               false/*sign irrelevant*/, Dummy);
+      }
+      break;
+    }
+    break;
+  case ISD::VAARG: {
+    Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
+    Tmp2 = LegalizeOp(Node->getOperand(1));  // Legalize the pointer.
+
+    MVT::ValueType VT = Node->getValueType(0);
+    switch (TLI.getOperationAction(Node->getOpcode(), MVT::Other)) {
+    default: assert(0 && "This action is not supported yet!");
+    case TargetLowering::Custom:
+      isCustom = true;
+      // FALLTHROUGH
+    case TargetLowering::Legal:
+      Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Node->getOperand(2));
+      Result = Result.getValue(0);
+      Tmp1 = Result.getValue(1);
+
+      if (isCustom) {
+        Tmp2 = TLI.LowerOperation(Result, DAG);
+        if (Tmp2.Val) {
+          Result = LegalizeOp(Tmp2);
+          Tmp1 = LegalizeOp(Tmp2.getValue(1));
+        }
+      }
+      break;
+    case TargetLowering::Expand: {
+      SrcValueSDNode *SV = cast<SrcValueSDNode>(Node->getOperand(2));
+      SDOperand VAList = DAG.getLoad(TLI.getPointerTy(), Tmp1, Tmp2,
+                                     SV->getValue(), SV->getOffset());
+      // Increment the pointer, VAList, to the next vaarg
+      Tmp3 = DAG.getNode(ISD::ADD, TLI.getPointerTy(), VAList, 
+                         DAG.getConstant(MVT::getSizeInBits(VT)/8, 
+                                         TLI.getPointerTy()));
+      // Store the incremented VAList to the legalized pointer
+      Tmp3 = DAG.getStore(VAList.getValue(1), Tmp3, Tmp2, SV->getValue(),
+                          SV->getOffset());
+      // Load the actual argument out of the pointer VAList
+      Result = DAG.getLoad(VT, Tmp3, VAList, NULL, 0);
+      Tmp1 = LegalizeOp(Result.getValue(1));
+      Result = LegalizeOp(Result);
+      break;
+    }
+    }
+    // Since VAARG produces two values, make sure to remember that we 
+    // legalized both of them.
+    AddLegalizedOperand(SDOperand(Node, 0), Result);
+    AddLegalizedOperand(SDOperand(Node, 1), Tmp1);
+    return Op.ResNo ? Tmp1 : Result;
+  }
+    
+  case ISD::VACOPY: 
+    Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
+    Tmp2 = LegalizeOp(Node->getOperand(1));  // Legalize the dest pointer.
+    Tmp3 = LegalizeOp(Node->getOperand(2));  // Legalize the source pointer.
+
+    switch (TLI.getOperationAction(ISD::VACOPY, MVT::Other)) {
+    default: assert(0 && "This action is not supported yet!");
+    case TargetLowering::Custom:
+      isCustom = true;
+      // FALLTHROUGH
+    case TargetLowering::Legal:
+      Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3,
+                                      Node->getOperand(3), Node->getOperand(4));
+      if (isCustom) {
+        Tmp1 = TLI.LowerOperation(Result, DAG);
+        if (Tmp1.Val) Result = Tmp1;
+      }
+      break;
+    case TargetLowering::Expand:
+      // This defaults to loading a pointer from the input and storing it to the
+      // output, returning the chain.
+      SrcValueSDNode *SVD = cast<SrcValueSDNode>(Node->getOperand(3));
+      SrcValueSDNode *SVS = cast<SrcValueSDNode>(Node->getOperand(4));
+      Tmp4 = DAG.getLoad(TLI.getPointerTy(), Tmp1, Tmp3, SVD->getValue(),
+                         SVD->getOffset());
+      Result = DAG.getStore(Tmp4.getValue(1), Tmp4, Tmp2, SVS->getValue(),
+                            SVS->getOffset());
+      break;
+    }
+    break;
+
+  case ISD::VAEND: 
+    Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
+    Tmp2 = LegalizeOp(Node->getOperand(1));  // Legalize the pointer.
+
+    switch (TLI.getOperationAction(ISD::VAEND, MVT::Other)) {
+    default: assert(0 && "This action is not supported yet!");
+    case TargetLowering::Custom:
+      isCustom = true;
+      // FALLTHROUGH
+    case TargetLowering::Legal:
+      Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Node->getOperand(2));
+      if (isCustom) {
+        Tmp1 = TLI.LowerOperation(Tmp1, DAG);
+        if (Tmp1.Val) Result = Tmp1;
+      }
+      break;
+    case TargetLowering::Expand:
+      Result = Tmp1; // Default to a no-op, return the chain
+      break;
+    }
+    break;
+    
+  case ISD::VASTART: 
+    Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
+    Tmp2 = LegalizeOp(Node->getOperand(1));  // Legalize the pointer.
+
+    Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Node->getOperand(2));
+    
+    switch (TLI.getOperationAction(ISD::VASTART, MVT::Other)) {
+    default: assert(0 && "This action is not supported yet!");
+    case TargetLowering::Legal: break;
+    case TargetLowering::Custom:
+      Tmp1 = TLI.LowerOperation(Result, DAG);
+      if (Tmp1.Val) Result = Tmp1;
+      break;
+    }
+    break;
+    
+  case ISD::ROTL:
+  case ISD::ROTR:
+    Tmp1 = LegalizeOp(Node->getOperand(0));   // LHS
+    Tmp2 = LegalizeOp(Node->getOperand(1));   // RHS
+    Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2);
+    switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))) {
+    default:
+      assert(0 && "ROTL/ROTR legalize operation not supported");
+      break;
+    case TargetLowering::Legal:
+      break;
+    case TargetLowering::Custom:
+      Tmp1 = TLI.LowerOperation(Result, DAG);
+      if (Tmp1.Val) Result = Tmp1;
+      break;
+    case TargetLowering::Promote:
+      assert(0 && "Do not know how to promote ROTL/ROTR");
+      break;
+    case TargetLowering::Expand:
+      assert(0 && "Do not know how to expand ROTL/ROTR");
+      break;
+    }
+    break;
+    
+  case ISD::BSWAP:
+    Tmp1 = LegalizeOp(Node->getOperand(0));   // Op
+    switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))) {
+    case TargetLowering::Custom:
+      assert(0 && "Cannot custom legalize this yet!");
+    case TargetLowering::Legal:
+      Result = DAG.UpdateNodeOperands(Result, Tmp1);
+      break;
+    case TargetLowering::Promote: {
+      MVT::ValueType OVT = Tmp1.getValueType();
+      MVT::ValueType NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT);
+      unsigned DiffBits = MVT::getSizeInBits(NVT) - MVT::getSizeInBits(OVT);
+
+      Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, NVT, Tmp1);
+      Tmp1 = DAG.getNode(ISD::BSWAP, NVT, Tmp1);
+      Result = DAG.getNode(ISD::SRL, NVT, Tmp1,
+                           DAG.getConstant(DiffBits, TLI.getShiftAmountTy()));
+      break;
+    }
+    case TargetLowering::Expand:
+      Result = ExpandBSWAP(Tmp1);
+      break;
+    }
+    break;
+    
+  case ISD::CTPOP:
+  case ISD::CTTZ:
+  case ISD::CTLZ:
+    Tmp1 = LegalizeOp(Node->getOperand(0));   // Op
+    switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))) {
+    case TargetLowering::Custom: assert(0 && "Cannot custom handle this yet!");
+    case TargetLowering::Legal:
+      Result = DAG.UpdateNodeOperands(Result, Tmp1);
+      break;
+    case TargetLowering::Promote: {
+      MVT::ValueType OVT = Tmp1.getValueType();
+      MVT::ValueType NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT);
+
+      // Zero extend the argument.
+      Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, NVT, Tmp1);
+      // Perform the larger operation, then subtract if needed.
+      Tmp1 = DAG.getNode(Node->getOpcode(), Node->getValueType(0), Tmp1);
+      switch (Node->getOpcode()) {
+      case ISD::CTPOP:
+        Result = Tmp1;
+        break;
+      case ISD::CTTZ:
+        //if Tmp1 == sizeinbits(NVT) then Tmp1 = sizeinbits(Old VT)
+        Tmp2 = DAG.getSetCC(TLI.getSetCCResultTy(), Tmp1,
+                            DAG.getConstant(MVT::getSizeInBits(NVT), NVT),
+                            ISD::SETEQ);
+        Result = DAG.getNode(ISD::SELECT, NVT, Tmp2,
+                           DAG.getConstant(MVT::getSizeInBits(OVT),NVT), Tmp1);
+        break;
+      case ISD::CTLZ:
+        // Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT))
+        Result = DAG.getNode(ISD::SUB, NVT, Tmp1,
+                             DAG.getConstant(MVT::getSizeInBits(NVT) -
+                                             MVT::getSizeInBits(OVT), NVT));
+        break;
+      }
+      break;
+    }
+    case TargetLowering::Expand:
+      Result = ExpandBitCount(Node->getOpcode(), Tmp1);
+      break;
+    }
+    break;
+
+    // Unary operators
+  case ISD::FABS:
+  case ISD::FNEG:
+  case ISD::FSQRT:
+  case ISD::FSIN:
+  case ISD::FCOS:
+    Tmp1 = LegalizeOp(Node->getOperand(0));
+    switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))) {
+    case TargetLowering::Promote:
+    case TargetLowering::Custom:
+     isCustom = true;
+     // FALLTHROUGH
+    case TargetLowering::Legal:
+      Result = DAG.UpdateNodeOperands(Result, Tmp1);
+      if (isCustom) {
+        Tmp1 = TLI.LowerOperation(Result, DAG);
+        if (Tmp1.Val) Result = Tmp1;
+      }
+      break;
+    case TargetLowering::Expand:
+      switch (Node->getOpcode()) {
+      default: assert(0 && "Unreachable!");
+      case ISD::FNEG:
+        // Expand Y = FNEG(X) ->  Y = SUB -0.0, X
+        Tmp2 = DAG.getConstantFP(-0.0, Node->getValueType(0));
+        Result = DAG.getNode(ISD::FSUB, Node->getValueType(0), Tmp2, Tmp1);
+        break;
+      case ISD::FABS: {
+        // Expand Y = FABS(X) -> Y = (X >u 0.0) ? X : fneg(X).
+        MVT::ValueType VT = Node->getValueType(0);
+        Tmp2 = DAG.getConstantFP(0.0, VT);
+        Tmp2 = DAG.getSetCC(TLI.getSetCCResultTy(), Tmp1, Tmp2, ISD::SETUGT);
+        Tmp3 = DAG.getNode(ISD::FNEG, VT, Tmp1);
+        Result = DAG.getNode(ISD::SELECT, VT, Tmp2, Tmp1, Tmp3);
+        break;
+      }
+      case ISD::FSQRT:
+      case ISD::FSIN:
+      case ISD::FCOS: {
+        MVT::ValueType VT = Node->getValueType(0);
+        RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+        switch(Node->getOpcode()) {
+        case ISD::FSQRT:
+          LC = VT == MVT::f32 ? RTLIB::SQRT_F32 : RTLIB::SQRT_F64;
+          break;
+        case ISD::FSIN:
+          LC = VT == MVT::f32 ? RTLIB::SIN_F32 : RTLIB::SIN_F64;
+          break;
+        case ISD::FCOS:
+          LC = VT == MVT::f32 ? RTLIB::COS_F32 : RTLIB::COS_F64;
+          break;
+        default: assert(0 && "Unreachable!");
+        }
+        SDOperand Dummy;
+        Result = ExpandLibCall(TLI.getLibcallName(LC), Node,
+                               false/*sign irrelevant*/, Dummy);
+        break;
+      }
+      }
+      break;
+    }
+    break;
+  case ISD::FPOWI: {
+    // We always lower FPOWI into a libcall.  No target support it yet.
+    RTLIB::Libcall LC = Node->getValueType(0) == MVT::f32
+      ? RTLIB::POWI_F32 : RTLIB::POWI_F64;
+    SDOperand Dummy;
+    Result = ExpandLibCall(TLI.getLibcallName(LC), Node,
+                           false/*sign irrelevant*/, Dummy);
+    break;
+  }
+  case ISD::BIT_CONVERT:
+    if (!isTypeLegal(Node->getOperand(0).getValueType())) {
+      Result = ExpandBIT_CONVERT(Node->getValueType(0), Node->getOperand(0));
+    } else if (MVT::isVector(Op.getOperand(0).getValueType())) {
+      // The input has to be a vector type, we have to either scalarize it, pack
+      // it, or convert it based on whether the input vector type is legal.
+      SDNode *InVal = Node->getOperand(0).Val;
+      unsigned NumElems = MVT::getVectorNumElements(InVal->getValueType(0));
+      MVT::ValueType EVT = MVT::getVectorElementType(InVal->getValueType(0));
+    
+      // Figure out if there is a simple type corresponding to this Vector
+      // type.  If so, convert to the vector type.
+      MVT::ValueType TVT = MVT::getVectorType(EVT, NumElems);
+      if (TLI.isTypeLegal(TVT)) {
+        // Turn this into a bit convert of the vector input.
+        Result = DAG.getNode(ISD::BIT_CONVERT, Node->getValueType(0), 
+                             LegalizeOp(Node->getOperand(0)));
+        break;
+      } else if (NumElems == 1) {
+        // Turn this into a bit convert of the scalar input.
+        Result = DAG.getNode(ISD::BIT_CONVERT, Node->getValueType(0), 
+                             ScalarizeVectorOp(Node->getOperand(0)));
+        break;
+      } else {
+        // FIXME: UNIMP!  Store then reload
+        assert(0 && "Cast from unsupported vector type not implemented yet!");
+      }
+    } else {
+      switch (TLI.getOperationAction(ISD::BIT_CONVERT,
+                                     Node->getOperand(0).getValueType())) {
+      default: assert(0 && "Unknown operation action!");
+      case TargetLowering::Expand:
+        Result = ExpandBIT_CONVERT(Node->getValueType(0), Node->getOperand(0));
+        break;
+      case TargetLowering::Legal:
+        Tmp1 = LegalizeOp(Node->getOperand(0));
+        Result = DAG.UpdateNodeOperands(Result, Tmp1);
+        break;
+      }
+    }
+    break;
+      
+    // Conversion operators.  The source and destination have different types.
+  case ISD::SINT_TO_FP:
+  case ISD::UINT_TO_FP: {
+    bool isSigned = Node->getOpcode() == ISD::SINT_TO_FP;
+    switch (getTypeAction(Node->getOperand(0).getValueType())) {
+    case Legal:
+      switch (TLI.getOperationAction(Node->getOpcode(),
+                                     Node->getOperand(0).getValueType())) {
+      default: assert(0 && "Unknown operation action!");
+      case TargetLowering::Custom:
+        isCustom = true;
+        // FALLTHROUGH
+      case TargetLowering::Legal:
+        Tmp1 = LegalizeOp(Node->getOperand(0));
+        Result = DAG.UpdateNodeOperands(Result, Tmp1);
+        if (isCustom) {
+          Tmp1 = TLI.LowerOperation(Result, DAG);
+          if (Tmp1.Val) Result = Tmp1;
+        }
+        break;
+      case TargetLowering::Expand:
+        Result = ExpandLegalINT_TO_FP(isSigned,
+                                      LegalizeOp(Node->getOperand(0)),
+                                      Node->getValueType(0));
+        break;
+      case TargetLowering::Promote:
+        Result = PromoteLegalINT_TO_FP(LegalizeOp(Node->getOperand(0)),
+                                       Node->getValueType(0),
+                                       isSigned);
+        break;
+      }
+      break;
+    case Expand:
+      Result = ExpandIntToFP(Node->getOpcode() == ISD::SINT_TO_FP,
+                             Node->getValueType(0), Node->getOperand(0));
+      break;
+    case Promote:
+      Tmp1 = PromoteOp(Node->getOperand(0));
+      if (isSigned) {
+        Tmp1 = DAG.getNode(ISD::SIGN_EXTEND_INREG, Tmp1.getValueType(),
+                 Tmp1, DAG.getValueType(Node->getOperand(0).getValueType()));
+      } else {
+        Tmp1 = DAG.getZeroExtendInReg(Tmp1,
+                                      Node->getOperand(0).getValueType());
+      }
+      Result = DAG.UpdateNodeOperands(Result, Tmp1);
+      Result = LegalizeOp(Result);  // The 'op' is not necessarily legal!
+      break;
+    }
+    break;
+  }
+  case ISD::TRUNCATE:
+    switch (getTypeAction(Node->getOperand(0).getValueType())) {
+    case Legal:
+      Tmp1 = LegalizeOp(Node->getOperand(0));
+      Result = DAG.UpdateNodeOperands(Result, Tmp1);
+      break;
+    case Expand:
+      ExpandOp(Node->getOperand(0), Tmp1, Tmp2);
+
+      // Since the result is legal, we should just be able to truncate the low
+      // part of the source.
+      Result = DAG.getNode(ISD::TRUNCATE, Node->getValueType(0), Tmp1);
+      break;
+    case Promote:
+      Result = PromoteOp(Node->getOperand(0));
+      Result = DAG.getNode(ISD::TRUNCATE, Op.getValueType(), Result);
+      break;
+    }
+    break;
+
+  case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT:
+    switch (getTypeAction(Node->getOperand(0).getValueType())) {
+    case Legal:
+      Tmp1 = LegalizeOp(Node->getOperand(0));
+
+      switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))){
+      default: assert(0 && "Unknown operation action!");
+      case TargetLowering::Custom:
+        isCustom = true;
+        // FALLTHROUGH
+      case TargetLowering::Legal:
+        Result = DAG.UpdateNodeOperands(Result, Tmp1);
+        if (isCustom) {
+          Tmp1 = TLI.LowerOperation(Result, DAG);
+          if (Tmp1.Val) Result = Tmp1;
+        }
+        break;
+      case TargetLowering::Promote:
+        Result = PromoteLegalFP_TO_INT(Tmp1, Node->getValueType(0),
+                                       Node->getOpcode() == ISD::FP_TO_SINT);
+        break;
+      case TargetLowering::Expand:
+        if (Node->getOpcode() == ISD::FP_TO_UINT) {
+          SDOperand True, False;
+          MVT::ValueType VT =  Node->getOperand(0).getValueType();
+          MVT::ValueType NVT = Node->getValueType(0);
+          unsigned ShiftAmt = MVT::getSizeInBits(Node->getValueType(0))-1;
+          Tmp2 = DAG.getConstantFP((double)(1ULL << ShiftAmt), VT);
+          Tmp3 = DAG.getSetCC(TLI.getSetCCResultTy(),
+                            Node->getOperand(0), Tmp2, ISD::SETLT);
+          True = DAG.getNode(ISD::FP_TO_SINT, NVT, Node->getOperand(0));
+          False = DAG.getNode(ISD::FP_TO_SINT, NVT,
+                              DAG.getNode(ISD::FSUB, VT, Node->getOperand(0),
+                                          Tmp2));
+          False = DAG.getNode(ISD::XOR, NVT, False, 
+                              DAG.getConstant(1ULL << ShiftAmt, NVT));
+          Result = DAG.getNode(ISD::SELECT, NVT, Tmp3, True, False);
+          break;
+        } else {
+          assert(0 && "Do not know how to expand FP_TO_SINT yet!");
+        }
+        break;
+      }
+      break;
+    case Expand: {
+      // Convert f32 / f64 to i32 / i64.
+      MVT::ValueType VT = Op.getValueType();
+      RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+      switch (Node->getOpcode()) {
+      case ISD::FP_TO_SINT:
+        if (Node->getOperand(0).getValueType() == MVT::f32)
+          LC = (VT == MVT::i32)
+            ? RTLIB::FPTOSINT_F32_I32 : RTLIB::FPTOSINT_F32_I64;
+        else
+          LC = (VT == MVT::i32)
+            ? RTLIB::FPTOSINT_F64_I32 : RTLIB::FPTOSINT_F64_I64;
+        break;
+      case ISD::FP_TO_UINT:
+        if (Node->getOperand(0).getValueType() == MVT::f32)
+          LC = (VT == MVT::i32)
+            ? RTLIB::FPTOUINT_F32_I32 : RTLIB::FPTOSINT_F32_I64;
+        else
+          LC = (VT == MVT::i32)
+            ? RTLIB::FPTOUINT_F64_I32 : RTLIB::FPTOSINT_F64_I64;
+        break;
+      default: assert(0 && "Unreachable!");
+      }
+      SDOperand Dummy;
+      Result = ExpandLibCall(TLI.getLibcallName(LC), Node,
+                             false/*sign irrelevant*/, Dummy);
+      break;
+    }
+    case Promote:
+      Tmp1 = PromoteOp(Node->getOperand(0));
+      Result = DAG.UpdateNodeOperands(Result, LegalizeOp(Tmp1));
+      Result = LegalizeOp(Result);
+      break;
+    }
+    break;
+
+  case ISD::FP_ROUND:
+    if (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)) == 
+        TargetLowering::Expand) {
+      // The only way we can lower this is to turn it into a TRUNCSTORE,
+      // EXTLOAD pair, targetting a temporary location (a stack slot).
+
+      // NOTE: there is a choice here between constantly creating new stack
+      // slots and always reusing the same one.  We currently always create
+      // new ones, as reuse may inhibit scheduling.
+      MVT::ValueType VT = Op.getValueType();    // 32
+      const Type *Ty = MVT::getTypeForValueType(VT);
+      uint64_t TySize = TLI.getTargetData()->getTypeSize(Ty);
+      unsigned Align  = TLI.getTargetData()->getPrefTypeAlignment(Ty);
+      MachineFunction &MF = DAG.getMachineFunction();
+      int SSFI =
+        MF.getFrameInfo()->CreateStackObject(TySize, Align);
+      SDOperand StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
+      Result = DAG.getTruncStore(DAG.getEntryNode(), Node->getOperand(0),
+                                 StackSlot, NULL, 0, VT);
+      Result = DAG.getLoad(VT, Result, StackSlot, NULL, 0, VT);
+      break;
+    }
+    // FALL THROUGH
+  case ISD::ANY_EXTEND:
+  case ISD::ZERO_EXTEND:
+  case ISD::SIGN_EXTEND:
+  case ISD::FP_EXTEND:
+    switch (getTypeAction(Node->getOperand(0).getValueType())) {
+    case Expand: assert(0 && "Shouldn't need to expand other operators here!");
+    case Legal:
+      Tmp1 = LegalizeOp(Node->getOperand(0));
+      Result = DAG.UpdateNodeOperands(Result, Tmp1);
+      break;
+    case Promote:
+      switch (Node->getOpcode()) {
+      case ISD::ANY_EXTEND:
+        Tmp1 = PromoteOp(Node->getOperand(0));
+        Result = DAG.getNode(ISD::ANY_EXTEND, Op.getValueType(), Tmp1);
+        break;
+      case ISD::ZERO_EXTEND:
+        Result = PromoteOp(Node->getOperand(0));
+        Result = DAG.getNode(ISD::ANY_EXTEND, Op.getValueType(), Result);
+        Result = DAG.getZeroExtendInReg(Result,
+                                        Node->getOperand(0).getValueType());
+        break;
+      case ISD::SIGN_EXTEND:
+        Result = PromoteOp(Node->getOperand(0));
+        Result = DAG.getNode(ISD::ANY_EXTEND, Op.getValueType(), Result);
+        Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, Result.getValueType(),
+                             Result,
+                          DAG.getValueType(Node->getOperand(0).getValueType()));
+        break;
+      case ISD::FP_EXTEND:
+        Result = PromoteOp(Node->getOperand(0));
+        if (Result.getValueType() != Op.getValueType())
+          // Dynamically dead while we have only 2 FP types.
+          Result = DAG.getNode(ISD::FP_EXTEND, Op.getValueType(), Result);
+        break;
+      case ISD::FP_ROUND:
+        Result = PromoteOp(Node->getOperand(0));
+        Result = DAG.getNode(Node->getOpcode(), Op.getValueType(), Result);
+        break;
+      }
+    }
+    break;
+  case ISD::FP_ROUND_INREG:
+  case ISD::SIGN_EXTEND_INREG: {
+    Tmp1 = LegalizeOp(Node->getOperand(0));
+    MVT::ValueType ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
+
+    // If this operation is not supported, convert it to a shl/shr or load/store
+    // pair.
+    switch (TLI.getOperationAction(Node->getOpcode(), ExtraVT)) {
+    default: assert(0 && "This action not supported for this op yet!");
+    case TargetLowering::Legal:
+      Result = DAG.UpdateNodeOperands(Result, Tmp1, Node->getOperand(1));
+      break;
+    case TargetLowering::Expand:
+      // If this is an integer extend and shifts are supported, do that.
+      if (Node->getOpcode() == ISD::SIGN_EXTEND_INREG) {
+        // NOTE: we could fall back on load/store here too for targets without
+        // SAR.  However, it is doubtful that any exist.
+        unsigned BitsDiff = MVT::getSizeInBits(Node->getValueType(0)) -
+                            MVT::getSizeInBits(ExtraVT);
+        SDOperand ShiftCst = DAG.getConstant(BitsDiff, TLI.getShiftAmountTy());
+        Result = DAG.getNode(ISD::SHL, Node->getValueType(0),
+                             Node->getOperand(0), ShiftCst);
+        Result = DAG.getNode(ISD::SRA, Node->getValueType(0),
+                             Result, ShiftCst);
+      } else if (Node->getOpcode() == ISD::FP_ROUND_INREG) {
+        // The only way we can lower this is to turn it into a TRUNCSTORE,
+        // EXTLOAD pair, targetting a temporary location (a stack slot).
+
+        // NOTE: there is a choice here between constantly creating new stack
+        // slots and always reusing the same one.  We currently always create
+        // new ones, as reuse may inhibit scheduling.
+        const Type *Ty = MVT::getTypeForValueType(ExtraVT);
+        uint64_t TySize = TLI.getTargetData()->getTypeSize(Ty);
+        unsigned Align  = TLI.getTargetData()->getPrefTypeAlignment(Ty);
+        MachineFunction &MF = DAG.getMachineFunction();
+        int SSFI =
+          MF.getFrameInfo()->CreateStackObject(TySize, Align);
+        SDOperand StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
+        Result = DAG.getTruncStore(DAG.getEntryNode(), Node->getOperand(0),
+                                   StackSlot, NULL, 0, ExtraVT);
+        Result = DAG.getExtLoad(ISD::EXTLOAD, Node->getValueType(0),
+                                Result, StackSlot, NULL, 0, ExtraVT);
+      } else {
+        assert(0 && "Unknown op");
+      }
+      break;
+    }
+    break;
+  }
+  }
+  
+  assert(Result.getValueType() == Op.getValueType() &&
+         "Bad legalization!");
+  
+  // Make sure that the generated code is itself legal.
+  if (Result != Op)
+    Result = LegalizeOp(Result);
+
+  // Note that LegalizeOp may be reentered even from single-use nodes, which
+  // means that we always must cache transformed nodes.
+  AddLegalizedOperand(Op, Result);
+  return Result;
+}
+
+/// PromoteOp - Given an operation that produces a value in an invalid type,
+/// promote it to compute the value into a larger type.  The produced value will
+/// have the correct bits for the low portion of the register, but no guarantee
+/// is made about the top bits: it may be zero, sign-extended, or garbage.
+SDOperand SelectionDAGLegalize::PromoteOp(SDOperand Op) {
+  MVT::ValueType VT = Op.getValueType();
+  MVT::ValueType NVT = TLI.getTypeToTransformTo(VT);
+  assert(getTypeAction(VT) == Promote &&
+         "Caller should expand or legalize operands that are not promotable!");
+  assert(NVT > VT && MVT::isInteger(NVT) == MVT::isInteger(VT) &&
+         "Cannot promote to smaller type!");
+
+  SDOperand Tmp1, Tmp2, Tmp3;
+  SDOperand Result;
+  SDNode *Node = Op.Val;
+
+  DenseMap<SDOperand, SDOperand>::iterator I = PromotedNodes.find(Op);
+  if (I != PromotedNodes.end()) return I->second;
+
+  switch (Node->getOpcode()) {
+  case ISD::CopyFromReg:
+    assert(0 && "CopyFromReg must be legal!");
+  default:
+#ifndef NDEBUG
+    cerr << "NODE: "; Node->dump(&DAG); cerr << "\n";
+#endif
+    assert(0 && "Do not know how to promote this operator!");
+    abort();
+  case ISD::UNDEF:
+    Result = DAG.getNode(ISD::UNDEF, NVT);
+    break;
+  case ISD::Constant:
+    if (VT != MVT::i1)
+      Result = DAG.getNode(ISD::SIGN_EXTEND, NVT, Op);
+    else
+      Result = DAG.getNode(ISD::ZERO_EXTEND, NVT, Op);
+    assert(isa<ConstantSDNode>(Result) && "Didn't constant fold zext?");
+    break;
+  case ISD::ConstantFP:
+    Result = DAG.getNode(ISD::FP_EXTEND, NVT, Op);
+    assert(isa<ConstantFPSDNode>(Result) && "Didn't constant fold fp_extend?");
+    break;
+
+  case ISD::SETCC:
+    assert(isTypeLegal(TLI.getSetCCResultTy()) && "SetCC type is not legal??");
+    Result = DAG.getNode(ISD::SETCC, TLI.getSetCCResultTy(),Node->getOperand(0),
+                         Node->getOperand(1), Node->getOperand(2));
+    break;
+    
+  case ISD::TRUNCATE:
+    switch (getTypeAction(Node->getOperand(0).getValueType())) {
+    case Legal:
+      Result = LegalizeOp(Node->getOperand(0));
+      assert(Result.getValueType() >= NVT &&
+             "This truncation doesn't make sense!");
+      if (Result.getValueType() > NVT)    // Truncate to NVT instead of VT
+        Result = DAG.getNode(ISD::TRUNCATE, NVT, Result);
+      break;
+    case Promote:
+      // The truncation is not required, because we don't guarantee anything
+      // about high bits anyway.
+      Result = PromoteOp(Node->getOperand(0));
+      break;
+    case Expand:
+      ExpandOp(Node->getOperand(0), Tmp1, Tmp2);
+      // Truncate the low part of the expanded value to the result type
+      Result = DAG.getNode(ISD::TRUNCATE, NVT, Tmp1);
+    }
+    break;
+  case ISD::SIGN_EXTEND:
+  case ISD::ZERO_EXTEND:
+  case ISD::ANY_EXTEND:
+    switch (getTypeAction(Node->getOperand(0).getValueType())) {
+    case Expand: assert(0 && "BUG: Smaller reg should have been promoted!");
+    case Legal:
+      // Input is legal?  Just do extend all the way to the larger type.
+      Result = DAG.getNode(Node->getOpcode(), NVT, Node->getOperand(0));
+      break;
+    case Promote:
+      // Promote the reg if it's smaller.
+      Result = PromoteOp(Node->getOperand(0));
+      // The high bits are not guaranteed to be anything.  Insert an extend.
+      if (Node->getOpcode() == ISD::SIGN_EXTEND)
+        Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, NVT, Result,
+                         DAG.getValueType(Node->getOperand(0).getValueType()));
+      else if (Node->getOpcode() == ISD::ZERO_EXTEND)
+        Result = DAG.getZeroExtendInReg(Result,
+                                        Node->getOperand(0).getValueType());
+      break;
+    }
+    break;
+  case ISD::BIT_CONVERT:
+    Result = ExpandBIT_CONVERT(Node->getValueType(0), Node->getOperand(0));
+    Result = PromoteOp(Result);
+    break;
+    
+  case ISD::FP_EXTEND:
+    assert(0 && "Case not implemented.  Dynamically dead with 2 FP types!");
+  case ISD::FP_ROUND:
+    switch (getTypeAction(Node->getOperand(0).getValueType())) {
+    case Expand: assert(0 && "BUG: Cannot expand FP regs!");
+    case Promote:  assert(0 && "Unreachable with 2 FP types!");
+    case Legal:
+      // Input is legal?  Do an FP_ROUND_INREG.
+      Result = DAG.getNode(ISD::FP_ROUND_INREG, NVT, Node->getOperand(0),
+                           DAG.getValueType(VT));
+      break;
+    }
+    break;
+
+  case ISD::SINT_TO_FP:
+  case ISD::UINT_TO_FP:
+    switch (getTypeAction(Node->getOperand(0).getValueType())) {
+    case Legal:
+      // No extra round required here.
+      Result = DAG.getNode(Node->getOpcode(), NVT, Node->getOperand(0));
+      break;
+
+    case Promote:
+      Result = PromoteOp(Node->getOperand(0));
+      if (Node->getOpcode() == ISD::SINT_TO_FP)
+        Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, Result.getValueType(),
+                             Result,
+                         DAG.getValueType(Node->getOperand(0).getValueType()));
+      else
+        Result = DAG.getZeroExtendInReg(Result,
+                                        Node->getOperand(0).getValueType());
+      // No extra round required here.
+      Result = DAG.getNode(Node->getOpcode(), NVT, Result);
+      break;
+    case Expand:
+      Result = ExpandIntToFP(Node->getOpcode() == ISD::SINT_TO_FP, NVT,
+                             Node->getOperand(0));
+      // Round if we cannot tolerate excess precision.
+      if (NoExcessFPPrecision)
+        Result = DAG.getNode(ISD::FP_ROUND_INREG, NVT, Result,
+                             DAG.getValueType(VT));
+      break;
+    }
+    break;
+
+  case ISD::SIGN_EXTEND_INREG:
+    Result = PromoteOp(Node->getOperand(0));
+    Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, NVT, Result, 
+                         Node->getOperand(1));
+    break;
+  case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT:
+    switch (getTypeAction(Node->getOperand(0).getValueType())) {
+    case Legal:
+    case Expand:
+      Tmp1 = Node->getOperand(0);
+      break;
+    case Promote:
+      // The input result is prerounded, so we don't have to do anything
+      // special.
+      Tmp1 = PromoteOp(Node->getOperand(0));
+      break;
+    }
+    // If we're promoting a UINT to a larger size, check to see if the new node
+    // will be legal.  If it isn't, check to see if FP_TO_SINT is legal, since
+    // we can use that instead.  This allows us to generate better code for
+    // FP_TO_UINT for small destination sizes on targets where FP_TO_UINT is not
+    // legal, such as PowerPC.
+    if (Node->getOpcode() == ISD::FP_TO_UINT && 
+        !TLI.isOperationLegal(ISD::FP_TO_UINT, NVT) &&
+        (TLI.isOperationLegal(ISD::FP_TO_SINT, NVT) ||
+         TLI.getOperationAction(ISD::FP_TO_SINT, NVT)==TargetLowering::Custom)){
+      Result = DAG.getNode(ISD::FP_TO_SINT, NVT, Tmp1);
+    } else {
+      Result = DAG.getNode(Node->getOpcode(), NVT, Tmp1);
+    }
+    break;
+
+  case ISD::FABS:
+  case ISD::FNEG:
+    Tmp1 = PromoteOp(Node->getOperand(0));
+    assert(Tmp1.getValueType() == NVT);
+    Result = DAG.getNode(Node->getOpcode(), NVT, Tmp1);
+    // NOTE: we do not have to do any extra rounding here for
+    // NoExcessFPPrecision, because we know the input will have the appropriate
+    // precision, and these operations don't modify precision at all.
+    break;
+
+  case ISD::FSQRT:
+  case ISD::FSIN:
+  case ISD::FCOS:
+    Tmp1 = PromoteOp(Node->getOperand(0));
+    assert(Tmp1.getValueType() == NVT);
+    Result = DAG.getNode(Node->getOpcode(), NVT, Tmp1);
+    if (NoExcessFPPrecision)
+      Result = DAG.getNode(ISD::FP_ROUND_INREG, NVT, Result,
+                           DAG.getValueType(VT));
+    break;
+
+  case ISD::FPOWI: {
+    // Promote f32 powi to f64 powi.  Note that this could insert a libcall
+    // directly as well, which may be better.
+    Tmp1 = PromoteOp(Node->getOperand(0));
+    assert(Tmp1.getValueType() == NVT);
+    Result = DAG.getNode(ISD::FPOWI, NVT, Tmp1, Node->getOperand(1));
+    if (NoExcessFPPrecision)
+      Result = DAG.getNode(ISD::FP_ROUND_INREG, NVT, Result,
+                           DAG.getValueType(VT));
+    break;
+  }
+    
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR:
+  case ISD::ADD:
+  case ISD::SUB:
+  case ISD::MUL:
+    // The input may have strange things in the top bits of the registers, but
+    // these operations don't care.  They may have weird bits going out, but
+    // that too is okay if they are integer operations.
+    Tmp1 = PromoteOp(Node->getOperand(0));
+    Tmp2 = PromoteOp(Node->getOperand(1));
+    assert(Tmp1.getValueType() == NVT && Tmp2.getValueType() == NVT);
+    Result = DAG.getNode(Node->getOpcode(), NVT, Tmp1, Tmp2);
+    break;
+  case ISD::FADD:
+  case ISD::FSUB:
+  case ISD::FMUL:
+    Tmp1 = PromoteOp(Node->getOperand(0));
+    Tmp2 = PromoteOp(Node->getOperand(1));
+    assert(Tmp1.getValueType() == NVT && Tmp2.getValueType() == NVT);
+    Result = DAG.getNode(Node->getOpcode(), NVT, Tmp1, Tmp2);
+    
+    // Floating point operations will give excess precision that we may not be
+    // able to tolerate.  If we DO allow excess precision, just leave it,
+    // otherwise excise it.
+    // FIXME: Why would we need to round FP ops more than integer ones?
+    //     Is Round(Add(Add(A,B),C)) != Round(Add(Round(Add(A,B)), C))
+    if (NoExcessFPPrecision)
+      Result = DAG.getNode(ISD::FP_ROUND_INREG, NVT, Result,
+                           DAG.getValueType(VT));
+    break;
+
+  case ISD::SDIV:
+  case ISD::SREM:
+    // These operators require that their input be sign extended.
+    Tmp1 = PromoteOp(Node->getOperand(0));
+    Tmp2 = PromoteOp(Node->getOperand(1));
+    if (MVT::isInteger(NVT)) {
+      Tmp1 = DAG.getNode(ISD::SIGN_EXTEND_INREG, NVT, Tmp1,
+                         DAG.getValueType(VT));
+      Tmp2 = DAG.getNode(ISD::SIGN_EXTEND_INREG, NVT, Tmp2,
+                         DAG.getValueType(VT));
+    }
+    Result = DAG.getNode(Node->getOpcode(), NVT, Tmp1, Tmp2);
+
+    // Perform FP_ROUND: this is probably overly pessimistic.
+    if (MVT::isFloatingPoint(NVT) && NoExcessFPPrecision)
+      Result = DAG.getNode(ISD::FP_ROUND_INREG, NVT, Result,
+                           DAG.getValueType(VT));
+    break;
+  case ISD::FDIV:
+  case ISD::FREM:
+  case ISD::FCOPYSIGN:
+    // These operators require that their input be fp extended.
+    switch (getTypeAction(Node->getOperand(0).getValueType())) {
+      case Legal:
+        Tmp1 = LegalizeOp(Node->getOperand(0));
+        break;
+      case Promote:
+        Tmp1 = PromoteOp(Node->getOperand(0));
+        break;
+      case Expand:
+        assert(0 && "not implemented");
+    }
+    switch (getTypeAction(Node->getOperand(1).getValueType())) {
+      case Legal:
+        Tmp2 = LegalizeOp(Node->getOperand(1));
+        break;
+      case Promote:
+        Tmp2 = PromoteOp(Node->getOperand(1));
+        break;
+      case Expand:
+        assert(0 && "not implemented");
+    }
+    Result = DAG.getNode(Node->getOpcode(), NVT, Tmp1, Tmp2);
+    
+    // Perform FP_ROUND: this is probably overly pessimistic.
+    if (NoExcessFPPrecision && Node->getOpcode() != ISD::FCOPYSIGN)
+      Result = DAG.getNode(ISD::FP_ROUND_INREG, NVT, Result,
+                           DAG.getValueType(VT));
+    break;
+
+  case ISD::UDIV:
+  case ISD::UREM:
+    // These operators require that their input be zero extended.
+    Tmp1 = PromoteOp(Node->getOperand(0));
+    Tmp2 = PromoteOp(Node->getOperand(1));
+    assert(MVT::isInteger(NVT) && "Operators don't apply to FP!");
+    Tmp1 = DAG.getZeroExtendInReg(Tmp1, VT);
+    Tmp2 = DAG.getZeroExtendInReg(Tmp2, VT);
+    Result = DAG.getNode(Node->getOpcode(), NVT, Tmp1, Tmp2);
+    break;
+
+  case ISD::SHL:
+    Tmp1 = PromoteOp(Node->getOperand(0));
+    Result = DAG.getNode(ISD::SHL, NVT, Tmp1, Node->getOperand(1));
+    break;
+  case ISD::SRA:
+    // The input value must be properly sign extended.
+    Tmp1 = PromoteOp(Node->getOperand(0));
+    Tmp1 = DAG.getNode(ISD::SIGN_EXTEND_INREG, NVT, Tmp1,
+                       DAG.getValueType(VT));
+    Result = DAG.getNode(ISD::SRA, NVT, Tmp1, Node->getOperand(1));
+    break;
+  case ISD::SRL:
+    // The input value must be properly zero extended.
+    Tmp1 = PromoteOp(Node->getOperand(0));
+    Tmp1 = DAG.getZeroExtendInReg(Tmp1, VT);
+    Result = DAG.getNode(ISD::SRL, NVT, Tmp1, Node->getOperand(1));
+    break;
+
+  case ISD::VAARG:
+    Tmp1 = Node->getOperand(0);   // Get the chain.
+    Tmp2 = Node->getOperand(1);   // Get the pointer.
+    if (TLI.getOperationAction(ISD::VAARG, VT) == TargetLowering::Custom) {
+      Tmp3 = DAG.getVAArg(VT, Tmp1, Tmp2, Node->getOperand(2));
+      Result = TLI.CustomPromoteOperation(Tmp3, DAG);
+    } else {
+      SrcValueSDNode *SV = cast<SrcValueSDNode>(Node->getOperand(2));
+      SDOperand VAList = DAG.getLoad(TLI.getPointerTy(), Tmp1, Tmp2,
+                                     SV->getValue(), SV->getOffset());
+      // Increment the pointer, VAList, to the next vaarg
+      Tmp3 = DAG.getNode(ISD::ADD, TLI.getPointerTy(), VAList, 
+                         DAG.getConstant(MVT::getSizeInBits(VT)/8, 
+                                         TLI.getPointerTy()));
+      // Store the incremented VAList to the legalized pointer
+      Tmp3 = DAG.getStore(VAList.getValue(1), Tmp3, Tmp2, SV->getValue(),
+                          SV->getOffset());
+      // Load the actual argument out of the pointer VAList
+      Result = DAG.getExtLoad(ISD::EXTLOAD, NVT, Tmp3, VAList, NULL, 0, VT);
+    }
+    // Remember that we legalized the chain.
+    AddLegalizedOperand(Op.getValue(1), LegalizeOp(Result.getValue(1)));
+    break;
+
+  case ISD::LOAD: {
+    LoadSDNode *LD = cast<LoadSDNode>(Node);
+    ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(Node)
+      ? ISD::EXTLOAD : LD->getExtensionType();
+    Result = DAG.getExtLoad(ExtType, NVT,
+                            LD->getChain(), LD->getBasePtr(),
+                            LD->getSrcValue(), LD->getSrcValueOffset(),
+                            LD->getLoadedVT(),
+                            LD->isVolatile(),
+                            LD->getAlignment());
+    // Remember that we legalized the chain.
+    AddLegalizedOperand(Op.getValue(1), LegalizeOp(Result.getValue(1)));
+    break;
+  }
+  case ISD::SELECT:
+    Tmp2 = PromoteOp(Node->getOperand(1));   // Legalize the op0
+    Tmp3 = PromoteOp(Node->getOperand(2));   // Legalize the op1
+    Result = DAG.getNode(ISD::SELECT, NVT, Node->getOperand(0), Tmp2, Tmp3);
+    break;
+  case ISD::SELECT_CC:
+    Tmp2 = PromoteOp(Node->getOperand(2));   // True
+    Tmp3 = PromoteOp(Node->getOperand(3));   // False
+    Result = DAG.getNode(ISD::SELECT_CC, NVT, Node->getOperand(0),
+                         Node->getOperand(1), Tmp2, Tmp3, Node->getOperand(4));
+    break;
+  case ISD::BSWAP:
+    Tmp1 = Node->getOperand(0);
+    Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, NVT, Tmp1);
+    Tmp1 = DAG.getNode(ISD::BSWAP, NVT, Tmp1);
+    Result = DAG.getNode(ISD::SRL, NVT, Tmp1,
+                         DAG.getConstant(MVT::getSizeInBits(NVT) -
+                                         MVT::getSizeInBits(VT),
+                                         TLI.getShiftAmountTy()));
+    break;
+  case ISD::CTPOP:
+  case ISD::CTTZ:
+  case ISD::CTLZ:
+    // Zero extend the argument
+    Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, NVT, Node->getOperand(0));
+    // Perform the larger operation, then subtract if needed.
+    Tmp1 = DAG.getNode(Node->getOpcode(), NVT, Tmp1);
+    switch(Node->getOpcode()) {
+    case ISD::CTPOP:
+      Result = Tmp1;
+      break;
+    case ISD::CTTZ:
+      // if Tmp1 == sizeinbits(NVT) then Tmp1 = sizeinbits(Old VT)
+      Tmp2 = DAG.getSetCC(TLI.getSetCCResultTy(), Tmp1,
+                          DAG.getConstant(MVT::getSizeInBits(NVT), NVT),
+                          ISD::SETEQ);
+      Result = DAG.getNode(ISD::SELECT, NVT, Tmp2,
+                           DAG.getConstant(MVT::getSizeInBits(VT), NVT), Tmp1);
+      break;
+    case ISD::CTLZ:
+      //Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT))
+      Result = DAG.getNode(ISD::SUB, NVT, Tmp1,
+                           DAG.getConstant(MVT::getSizeInBits(NVT) -
+                                           MVT::getSizeInBits(VT), NVT));
+      break;
+    }
+    break;
+  case ISD::EXTRACT_SUBVECTOR:
+    Result = PromoteOp(ExpandEXTRACT_SUBVECTOR(Op));
+    break;
+  case ISD::EXTRACT_VECTOR_ELT:
+    Result = PromoteOp(ExpandEXTRACT_VECTOR_ELT(Op));
+    break;
+  }
+
+  assert(Result.Val && "Didn't set a result!");
+
+  // Make sure the result is itself legal.
+  Result = LegalizeOp(Result);
+  
+  // Remember that we promoted this!
+  AddPromotedOperand(Op, Result);
+  return Result;
+}
+
+/// ExpandEXTRACT_VECTOR_ELT - Expand an EXTRACT_VECTOR_ELT operation into
+/// a legal EXTRACT_VECTOR_ELT operation, scalar code, or memory traffic,
+/// based on the vector type. The return type of this matches the element type
+/// of the vector, which may not be legal for the target.
+SDOperand SelectionDAGLegalize::ExpandEXTRACT_VECTOR_ELT(SDOperand Op) {
+  // We know that operand #0 is the Vec vector.  If the index is a constant
+  // or if the invec is a supported hardware type, we can use it.  Otherwise,
+  // lower to a store then an indexed load.
+  SDOperand Vec = Op.getOperand(0);
+  SDOperand Idx = Op.getOperand(1);
+  
+  SDNode *InVal = Vec.Val;
+  MVT::ValueType TVT = InVal->getValueType(0);
+  unsigned NumElems = MVT::getVectorNumElements(TVT);
+  
+  switch (TLI.getOperationAction(ISD::EXTRACT_VECTOR_ELT, TVT)) {
+  default: assert(0 && "This action is not supported yet!");
+  case TargetLowering::Custom: {
+    Vec = LegalizeOp(Vec);
+    Op = DAG.UpdateNodeOperands(Op, Vec, Idx);
+    SDOperand Tmp3 = TLI.LowerOperation(Op, DAG);
+    if (Tmp3.Val)
+      return Tmp3;
+    break;
+  }
+  case TargetLowering::Legal:
+    if (isTypeLegal(TVT)) {
+      Vec = LegalizeOp(Vec);
+      Op = DAG.UpdateNodeOperands(Op, Vec, Idx);
+      Op = LegalizeOp(Op);
+    }
+    break;
+  case TargetLowering::Expand:
+    break;
+  }
+
+  if (NumElems == 1) {
+    // This must be an access of the only element.  Return it.
+    Op = ScalarizeVectorOp(Vec);
+  } else if (!TLI.isTypeLegal(TVT) && isa<ConstantSDNode>(Idx)) {
+    ConstantSDNode *CIdx = cast<ConstantSDNode>(Idx);
+    SDOperand Lo, Hi;
+    SplitVectorOp(Vec, Lo, Hi);
+    if (CIdx->getValue() < NumElems/2) {
+      Vec = Lo;
+    } else {
+      Vec = Hi;
+      Idx = DAG.getConstant(CIdx->getValue() - NumElems/2,
+                            Idx.getValueType());
+    }
+  
+    // It's now an extract from the appropriate high or low part.  Recurse.
+    Op = DAG.UpdateNodeOperands(Op, Vec, Idx);
+    Op = ExpandEXTRACT_VECTOR_ELT(Op);
+  } else {
+    // Store the value to a temporary stack slot, then LOAD the scalar
+    // element back out.
+    SDOperand StackPtr = CreateStackTemporary(Vec.getValueType());
+    SDOperand Ch = DAG.getStore(DAG.getEntryNode(), Vec, StackPtr, NULL, 0);
+
+    // Add the offset to the index.
+    unsigned EltSize = MVT::getSizeInBits(Op.getValueType())/8;
+    Idx = DAG.getNode(ISD::MUL, Idx.getValueType(), Idx,
+                      DAG.getConstant(EltSize, Idx.getValueType()));
+    StackPtr = DAG.getNode(ISD::ADD, Idx.getValueType(), Idx, StackPtr);
+
+    Op = DAG.getLoad(Op.getValueType(), Ch, StackPtr, NULL, 0);
+  }
+  return Op;
+}
+
+/// ExpandEXTRACT_SUBVECTOR - Expand a EXTRACT_SUBVECTOR operation.  For now
+/// we assume the operation can be split if it is not already legal.
+SDOperand SelectionDAGLegalize::ExpandEXTRACT_SUBVECTOR(SDOperand Op) {
+  // We know that operand #0 is the Vec vector.  For now we assume the index
+  // is a constant and that the extracted result is a supported hardware type.
+  SDOperand Vec = Op.getOperand(0);
+  SDOperand Idx = LegalizeOp(Op.getOperand(1));
+  
+  unsigned NumElems = MVT::getVectorNumElements(Vec.getValueType());
+  
+  if (NumElems == MVT::getVectorNumElements(Op.getValueType())) {
+    // This must be an access of the desired vector length.  Return it.
+    return Vec;
+  }
+
+  ConstantSDNode *CIdx = cast<ConstantSDNode>(Idx);
+  SDOperand Lo, Hi;
+  SplitVectorOp(Vec, Lo, Hi);
+  if (CIdx->getValue() < NumElems/2) {
+    Vec = Lo;
+  } else {
+    Vec = Hi;
+    Idx = DAG.getConstant(CIdx->getValue() - NumElems/2, Idx.getValueType());
+  }
+  
+  // It's now an extract from the appropriate high or low part.  Recurse.
+  Op = DAG.UpdateNodeOperands(Op, Vec, Idx);
+  return ExpandEXTRACT_SUBVECTOR(Op);
+}
+
+/// LegalizeSetCCOperands - Attempts to create a legal LHS and RHS for a SETCC
+/// with condition CC on the current target.  This usually involves legalizing
+/// or promoting the arguments.  In the case where LHS and RHS must be expanded,
+/// there may be no choice but to create a new SetCC node to represent the
+/// legalized value of setcc lhs, rhs.  In this case, the value is returned in
+/// LHS, and the SDOperand returned in RHS has a nil SDNode value.
+void SelectionDAGLegalize::LegalizeSetCCOperands(SDOperand &LHS,
+                                                 SDOperand &RHS,
+                                                 SDOperand &CC) {
+  SDOperand Tmp1, Tmp2, Result;    
+  
+  switch (getTypeAction(LHS.getValueType())) {
+  case Legal:
+    Tmp1 = LegalizeOp(LHS);   // LHS
+    Tmp2 = LegalizeOp(RHS);   // RHS
+    break;
+  case Promote:
+    Tmp1 = PromoteOp(LHS);   // LHS
+    Tmp2 = PromoteOp(RHS);   // RHS
+
+    // If this is an FP compare, the operands have already been extended.
+    if (MVT::isInteger(LHS.getValueType())) {
+      MVT::ValueType VT = LHS.getValueType();
+      MVT::ValueType NVT = TLI.getTypeToTransformTo(VT);
+
+      // Otherwise, we have to insert explicit sign or zero extends.  Note
+      // that we could insert sign extends for ALL conditions, but zero extend
+      // is cheaper on many machines (an AND instead of two shifts), so prefer
+      // it.
+      switch (cast<CondCodeSDNode>(CC)->get()) {
+      default: assert(0 && "Unknown integer comparison!");
+      case ISD::SETEQ:
+      case ISD::SETNE:
+      case ISD::SETUGE:
+      case ISD::SETUGT:
+      case ISD::SETULE:
+      case ISD::SETULT:
+        // ALL of these operations will work if we either sign or zero extend
+        // the operands (including the unsigned comparisons!).  Zero extend is
+        // usually a simpler/cheaper operation, so prefer it.
+        Tmp1 = DAG.getZeroExtendInReg(Tmp1, VT);
+        Tmp2 = DAG.getZeroExtendInReg(Tmp2, VT);
+        break;
+      case ISD::SETGE:
+      case ISD::SETGT:
+      case ISD::SETLT:
+      case ISD::SETLE:
+        Tmp1 = DAG.getNode(ISD::SIGN_EXTEND_INREG, NVT, Tmp1,
+                           DAG.getValueType(VT));
+        Tmp2 = DAG.getNode(ISD::SIGN_EXTEND_INREG, NVT, Tmp2,
+                           DAG.getValueType(VT));
+        break;
+      }
+    }
+    break;
+  case Expand: {
+    MVT::ValueType VT = LHS.getValueType();
+    if (VT == MVT::f32 || VT == MVT::f64) {
+      // Expand into one or more soft-fp libcall(s).
+      RTLIB::Libcall LC1, LC2 = RTLIB::UNKNOWN_LIBCALL;
+      switch (cast<CondCodeSDNode>(CC)->get()) {
+      case ISD::SETEQ:
+      case ISD::SETOEQ:
+        LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64;
+        break;
+      case ISD::SETNE:
+      case ISD::SETUNE:
+        LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 : RTLIB::UNE_F64;
+        break;
+      case ISD::SETGE:
+      case ISD::SETOGE:
+        LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 : RTLIB::OGE_F64;
+        break;
+      case ISD::SETLT:
+      case ISD::SETOLT:
+        LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64;
+        break;
+      case ISD::SETLE:
+      case ISD::SETOLE:
+        LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 : RTLIB::OLE_F64;
+        break;
+      case ISD::SETGT:
+      case ISD::SETOGT:
+        LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 : RTLIB::OGT_F64;
+        break;
+      case ISD::SETUO:
+        LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : RTLIB::UO_F64;
+        break;
+      case ISD::SETO:
+        LC1 = (VT == MVT::f32) ? RTLIB::O_F32 : RTLIB::O_F64;
+        break;
+      default:
+        LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : RTLIB::UO_F64;
+        switch (cast<CondCodeSDNode>(CC)->get()) {
+        case ISD::SETONE:
+          // SETONE = SETOLT | SETOGT
+          LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64;
+          // Fallthrough
+        case ISD::SETUGT:
+          LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 : RTLIB::OGT_F64;
+          break;
+        case ISD::SETUGE:
+          LC2 = (VT == MVT::f32) ? RTLIB::OGE_F32 : RTLIB::OGE_F64;
+          break;
+        case ISD::SETULT:
+          LC2 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64;
+          break;
+        case ISD::SETULE:
+          LC2 = (VT == MVT::f32) ? RTLIB::OLE_F32 : RTLIB::OLE_F64;
+          break;
+        case ISD::SETUEQ:
+          LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64;
+          break;
+        default: assert(0 && "Unsupported FP setcc!");
+        }
+      }
+      
+      SDOperand Dummy;
+      Tmp1 = ExpandLibCall(TLI.getLibcallName(LC1),
+                           DAG.getNode(ISD::MERGE_VALUES, VT, LHS, RHS).Val, 
+                           false /*sign irrelevant*/, Dummy);
+      Tmp2 = DAG.getConstant(0, MVT::i32);
+      CC = DAG.getCondCode(TLI.getCmpLibcallCC(LC1));
+      if (LC2 != RTLIB::UNKNOWN_LIBCALL) {
+        Tmp1 = DAG.getNode(ISD::SETCC, TLI.getSetCCResultTy(), Tmp1, Tmp2, CC);
+        LHS = ExpandLibCall(TLI.getLibcallName(LC2),
+                            DAG.getNode(ISD::MERGE_VALUES, VT, LHS, RHS).Val, 
+                            false /*sign irrelevant*/, Dummy);
+        Tmp2 = DAG.getNode(ISD::SETCC, TLI.getSetCCResultTy(), LHS, Tmp2,
+                           DAG.getCondCode(TLI.getCmpLibcallCC(LC2)));
+        Tmp1 = DAG.getNode(ISD::OR, Tmp1.getValueType(), Tmp1, Tmp2);
+        Tmp2 = SDOperand();
+      }
+      LHS = Tmp1;
+      RHS = Tmp2;
+      return;
+    }
+
+    SDOperand LHSLo, LHSHi, RHSLo, RHSHi;
+    ExpandOp(LHS, LHSLo, LHSHi);
+    ExpandOp(RHS, RHSLo, RHSHi);    
+    switch (cast<CondCodeSDNode>(CC)->get()) {
+    case ISD::SETEQ:
+    case ISD::SETNE:
+      if (RHSLo == RHSHi)
+        if (ConstantSDNode *RHSCST = dyn_cast<ConstantSDNode>(RHSLo))
+          if (RHSCST->isAllOnesValue()) {
+            // Comparison to -1.
+            Tmp1 = DAG.getNode(ISD::AND, LHSLo.getValueType(), LHSLo, LHSHi);
+            Tmp2 = RHSLo;
+            break;
+          }
+
+      Tmp1 = DAG.getNode(ISD::XOR, LHSLo.getValueType(), LHSLo, RHSLo);
+      Tmp2 = DAG.getNode(ISD::XOR, LHSLo.getValueType(), LHSHi, RHSHi);
+      Tmp1 = DAG.getNode(ISD::OR, Tmp1.getValueType(), Tmp1, Tmp2);
+      Tmp2 = DAG.getConstant(0, Tmp1.getValueType());
+      break;
+    default:
+      // If this is a comparison of the sign bit, just look at the top part.
+      // X > -1,  x < 0
+      if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(RHS))
+        if ((cast<CondCodeSDNode>(CC)->get() == ISD::SETLT && 
+             CST->getValue() == 0) ||             // X < 0
+            (cast<CondCodeSDNode>(CC)->get() == ISD::SETGT &&
+             CST->isAllOnesValue())) {            // X > -1
+          Tmp1 = LHSHi;
+          Tmp2 = RHSHi;
+          break;
+        }
+
+      // FIXME: This generated code sucks.
+      ISD::CondCode LowCC;
+      ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
+      switch (CCCode) {
+      default: assert(0 && "Unknown integer setcc!");
+      case ISD::SETLT:
+      case ISD::SETULT: LowCC = ISD::SETULT; break;
+      case ISD::SETGT:
+      case ISD::SETUGT: LowCC = ISD::SETUGT; break;
+      case ISD::SETLE:
+      case ISD::SETULE: LowCC = ISD::SETULE; break;
+      case ISD::SETGE:
+      case ISD::SETUGE: LowCC = ISD::SETUGE; break;
+      }
+
+      // Tmp1 = lo(op1) < lo(op2)   // Always unsigned comparison
+      // Tmp2 = hi(op1) < hi(op2)   // Signedness depends on operands
+      // dest = hi(op1) == hi(op2) ? Tmp1 : Tmp2;
+
+      // NOTE: on targets without efficient SELECT of bools, we can always use
+      // this identity: (B1 ? B2 : B3) --> (B1 & B2)|(!B1&B3)
+      TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, false, true, NULL);
+      Tmp1 = TLI.SimplifySetCC(TLI.getSetCCResultTy(), LHSLo, RHSLo, LowCC,
+                               false, DagCombineInfo);
+      if (!Tmp1.Val)
+        Tmp1 = DAG.getSetCC(TLI.getSetCCResultTy(), LHSLo, RHSLo, LowCC);
+      Tmp2 = TLI.SimplifySetCC(TLI.getSetCCResultTy(), LHSHi, RHSHi,
+                               CCCode, false, DagCombineInfo);
+      if (!Tmp2.Val)
+        Tmp2 = DAG.getNode(ISD::SETCC, TLI.getSetCCResultTy(), LHSHi, RHSHi, CC);
+      
+      ConstantSDNode *Tmp1C = dyn_cast<ConstantSDNode>(Tmp1.Val);
+      ConstantSDNode *Tmp2C = dyn_cast<ConstantSDNode>(Tmp2.Val);
+      if ((Tmp1C && Tmp1C->getValue() == 0) ||
+          (Tmp2C && Tmp2C->getValue() == 0 &&
+           (CCCode == ISD::SETLE || CCCode == ISD::SETGE ||
+            CCCode == ISD::SETUGE || CCCode == ISD::SETULE)) ||
+          (Tmp2C && Tmp2C->getValue() == 1 &&
+           (CCCode == ISD::SETLT || CCCode == ISD::SETGT ||
+            CCCode == ISD::SETUGT || CCCode == ISD::SETULT))) {
+        // low part is known false, returns high part.
+        // For LE / GE, if high part is known false, ignore the low part.
+        // For LT / GT, if high part is known true, ignore the low part.
+        Tmp1 = Tmp2;
+        Tmp2 = SDOperand();
+      } else {
+        Result = TLI.SimplifySetCC(TLI.getSetCCResultTy(), LHSHi, RHSHi,
+                                   ISD::SETEQ, false, DagCombineInfo);
+        if (!Result.Val)
+          Result=DAG.getSetCC(TLI.getSetCCResultTy(), LHSHi, RHSHi, ISD::SETEQ);
+        Result = LegalizeOp(DAG.getNode(ISD::SELECT, Tmp1.getValueType(),
+                                        Result, Tmp1, Tmp2));
+        Tmp1 = Result;
+        Tmp2 = SDOperand();
+      }
+    }
+  }
+  }
+  LHS = Tmp1;
+  RHS = Tmp2;
+}
+
+/// ExpandBIT_CONVERT - Expand a BIT_CONVERT node into a store/load combination.
+/// The resultant code need not be legal.  Note that SrcOp is the input operand
+/// to the BIT_CONVERT, not the BIT_CONVERT node itself.
+SDOperand SelectionDAGLegalize::ExpandBIT_CONVERT(MVT::ValueType DestVT, 
+                                                  SDOperand SrcOp) {
+  // Create the stack frame object.
+  SDOperand FIPtr = CreateStackTemporary(DestVT);
+  
+  // Emit a store to the stack slot.
+  SDOperand Store = DAG.getStore(DAG.getEntryNode(), SrcOp, FIPtr, NULL, 0);
+  // Result is a load from the stack slot.
+  return DAG.getLoad(DestVT, Store, FIPtr, NULL, 0);
+}
+
+SDOperand SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) {
+  // Create a vector sized/aligned stack slot, store the value to element #0,
+  // then load the whole vector back out.
+  SDOperand StackPtr = CreateStackTemporary(Node->getValueType(0));
+  SDOperand Ch = DAG.getStore(DAG.getEntryNode(), Node->getOperand(0), StackPtr,
+                              NULL, 0);
+  return DAG.getLoad(Node->getValueType(0), Ch, StackPtr, NULL, 0);
+}
+
+
+/// ExpandBUILD_VECTOR - Expand a BUILD_VECTOR node on targets that don't
+/// support the operation, but do support the resultant vector type.
+SDOperand SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
+  
+  // If the only non-undef value is the low element, turn this into a 
+  // SCALAR_TO_VECTOR node.  If this is { X, X, X, X }, determine X.
+  unsigned NumElems = Node->getNumOperands();
+  bool isOnlyLowElement = true;
+  SDOperand SplatValue = Node->getOperand(0);
+  std::map<SDOperand, std::vector<unsigned> > Values;
+  Values[SplatValue].push_back(0);
+  bool isConstant = true;
+  if (!isa<ConstantFPSDNode>(SplatValue) && !isa<ConstantSDNode>(SplatValue) &&
+      SplatValue.getOpcode() != ISD::UNDEF)
+    isConstant = false;
+  
+  for (unsigned i = 1; i < NumElems; ++i) {
+    SDOperand V = Node->getOperand(i);
+    Values[V].push_back(i);
+    if (V.getOpcode() != ISD::UNDEF)
+      isOnlyLowElement = false;
+    if (SplatValue != V)
+      SplatValue = SDOperand(0,0);
+
+    // If this isn't a constant element or an undef, we can't use a constant
+    // pool load.
+    if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V) &&
+        V.getOpcode() != ISD::UNDEF)
+      isConstant = false;
+  }
+  
+  if (isOnlyLowElement) {
+    // If the low element is an undef too, then this whole things is an undef.
+    if (Node->getOperand(0).getOpcode() == ISD::UNDEF)
+      return DAG.getNode(ISD::UNDEF, Node->getValueType(0));
+    // Otherwise, turn this into a scalar_to_vector node.
+    return DAG.getNode(ISD::SCALAR_TO_VECTOR, Node->getValueType(0),
+                       Node->getOperand(0));
+  }
+  
+  // If all elements are constants, create a load from the constant pool.
+  if (isConstant) {
+    MVT::ValueType VT = Node->getValueType(0);
+    const Type *OpNTy = 
+      MVT::getTypeForValueType(Node->getOperand(0).getValueType());
+    std::vector<Constant*> CV;
+    for (unsigned i = 0, e = NumElems; i != e; ++i) {
+      if (ConstantFPSDNode *V = 
+          dyn_cast<ConstantFPSDNode>(Node->getOperand(i))) {
+        CV.push_back(ConstantFP::get(OpNTy, V->getValue()));
+      } else if (ConstantSDNode *V = 
+                 dyn_cast<ConstantSDNode>(Node->getOperand(i))) {
+        CV.push_back(ConstantInt::get(OpNTy, V->getValue()));
+      } else {
+        assert(Node->getOperand(i).getOpcode() == ISD::UNDEF);
+        CV.push_back(UndefValue::get(OpNTy));
+      }
+    }
+    Constant *CP = ConstantVector::get(CV);
+    SDOperand CPIdx = DAG.getConstantPool(CP, TLI.getPointerTy());
+    return DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0);
+  }
+  
+  if (SplatValue.Val) {   // Splat of one value?
+    // Build the shuffle constant vector: <0, 0, 0, 0>
+    MVT::ValueType MaskVT = 
+      MVT::getIntVectorWithNumElements(NumElems);
+    SDOperand Zero = DAG.getConstant(0, MVT::getVectorElementType(MaskVT));
+    std::vector<SDOperand> ZeroVec(NumElems, Zero);
+    SDOperand SplatMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
+                                      &ZeroVec[0], ZeroVec.size());
+
+    // If the target supports VECTOR_SHUFFLE and this shuffle mask, use it.
+    if (isShuffleLegal(Node->getValueType(0), SplatMask)) {
+      // Get the splatted value into the low element of a vector register.
+      SDOperand LowValVec = 
+        DAG.getNode(ISD::SCALAR_TO_VECTOR, Node->getValueType(0), SplatValue);
+    
+      // Return shuffle(LowValVec, undef, <0,0,0,0>)
+      return DAG.getNode(ISD::VECTOR_SHUFFLE, Node->getValueType(0), LowValVec,
+                         DAG.getNode(ISD::UNDEF, Node->getValueType(0)),
+                         SplatMask);
+    }
+  }
+  
+  // If there are only two unique elements, we may be able to turn this into a
+  // vector shuffle.
+  if (Values.size() == 2) {
+    // Build the shuffle constant vector: e.g. <0, 4, 0, 4>
+    MVT::ValueType MaskVT = 
+      MVT::getIntVectorWithNumElements(NumElems);
+    std::vector<SDOperand> MaskVec(NumElems);
+    unsigned i = 0;
+    for (std::map<SDOperand,std::vector<unsigned> >::iterator I=Values.begin(),
+           E = Values.end(); I != E; ++I) {
+      for (std::vector<unsigned>::iterator II = I->second.begin(),
+             EE = I->second.end(); II != EE; ++II)
+        MaskVec[*II] = DAG.getConstant(i, MVT::getVectorElementType(MaskVT));
+      i += NumElems;
+    }
+    SDOperand ShuffleMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
+                                        &MaskVec[0], MaskVec.size());
+
+    // If the target supports VECTOR_SHUFFLE and this shuffle mask, use it.
+    if (TLI.isOperationLegal(ISD::SCALAR_TO_VECTOR, Node->getValueType(0)) &&
+        isShuffleLegal(Node->getValueType(0), ShuffleMask)) {
+      SmallVector<SDOperand, 8> Ops;
+      for(std::map<SDOperand,std::vector<unsigned> >::iterator I=Values.begin(),
+            E = Values.end(); I != E; ++I) {
+        SDOperand Op = DAG.getNode(ISD::SCALAR_TO_VECTOR, Node->getValueType(0),
+                                   I->first);
+        Ops.push_back(Op);
+      }
+      Ops.push_back(ShuffleMask);
+
+      // Return shuffle(LoValVec, HiValVec, <0,1,0,1>)
+      return DAG.getNode(ISD::VECTOR_SHUFFLE, Node->getValueType(0), 
+                         &Ops[0], Ops.size());
+    }
+  }
+  
+  // Otherwise, we can't handle this case efficiently.  Allocate a sufficiently
+  // aligned object on the stack, store each element into it, then load
+  // the result as a vector.
+  MVT::ValueType VT = Node->getValueType(0);
+  // Create the stack frame object.
+  SDOperand FIPtr = CreateStackTemporary(VT);
+  
+  // Emit a store of each element to the stack slot.
+  SmallVector<SDOperand, 8> Stores;
+  unsigned TypeByteSize = 
+    MVT::getSizeInBits(Node->getOperand(0).getValueType())/8;
+  // Store (in the right endianness) the elements to memory.
+  for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
+    // Ignore undef elements.
+    if (Node->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+    
+    unsigned Offset = TypeByteSize*i;
+    
+    SDOperand Idx = DAG.getConstant(Offset, FIPtr.getValueType());
+    Idx = DAG.getNode(ISD::ADD, FIPtr.getValueType(), FIPtr, Idx);
+    
+    Stores.push_back(DAG.getStore(DAG.getEntryNode(), Node->getOperand(i), Idx, 
+                                  NULL, 0));
+  }
+  
+  SDOperand StoreChain;
+  if (!Stores.empty())    // Not all undef elements?
+    StoreChain = DAG.getNode(ISD::TokenFactor, MVT::Other,
+                             &Stores[0], Stores.size());
+  else
+    StoreChain = DAG.getEntryNode();
+  
+  // Result is a load from the stack slot.
+  return DAG.getLoad(VT, StoreChain, FIPtr, NULL, 0);
+}
+
+/// CreateStackTemporary - Create a stack temporary, suitable for holding the
+/// specified value type.
+SDOperand SelectionDAGLegalize::CreateStackTemporary(MVT::ValueType VT) {
+  MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
+  unsigned ByteSize = MVT::getSizeInBits(VT)/8;
+  const Type *Ty = MVT::getTypeForValueType(VT);
+  unsigned StackAlign = (unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty);
+  int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign);
+  return DAG.getFrameIndex(FrameIdx, TLI.getPointerTy());
+}
+
+void SelectionDAGLegalize::ExpandShiftParts(unsigned NodeOp,
+                                            SDOperand Op, SDOperand Amt,
+                                            SDOperand &Lo, SDOperand &Hi) {
+  // Expand the subcomponents.
+  SDOperand LHSL, LHSH;
+  ExpandOp(Op, LHSL, LHSH);
+
+  SDOperand Ops[] = { LHSL, LHSH, Amt };
+  MVT::ValueType VT = LHSL.getValueType();
+  Lo = DAG.getNode(NodeOp, DAG.getNodeValueTypes(VT, VT), 2, Ops, 3);
+  Hi = Lo.getValue(1);
+}
+
+
+/// ExpandShift - Try to find a clever way to expand this shift operation out to
+/// smaller elements.  If we can't find a way that is more efficient than a
+/// libcall on this target, return false.  Otherwise, return true with the
+/// low-parts expanded into Lo and Hi.
+bool SelectionDAGLegalize::ExpandShift(unsigned Opc, SDOperand Op,SDOperand Amt,
+                                       SDOperand &Lo, SDOperand &Hi) {
+  assert((Opc == ISD::SHL || Opc == ISD::SRA || Opc == ISD::SRL) &&
+         "This is not a shift!");
+
+  MVT::ValueType NVT = TLI.getTypeToTransformTo(Op.getValueType());
+  SDOperand ShAmt = LegalizeOp(Amt);
+  MVT::ValueType ShTy = ShAmt.getValueType();
+  unsigned VTBits = MVT::getSizeInBits(Op.getValueType());
+  unsigned NVTBits = MVT::getSizeInBits(NVT);
+
+  // Handle the case when Amt is an immediate.  Other cases are currently broken
+  // and are disabled.
+  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Amt.Val)) {
+    unsigned Cst = CN->getValue();
+    // Expand the incoming operand to be shifted, so that we have its parts
+    SDOperand InL, InH;
+    ExpandOp(Op, InL, InH);
+    switch(Opc) {
+    case ISD::SHL:
+      if (Cst > VTBits) {
+        Lo = DAG.getConstant(0, NVT);
+        Hi = DAG.getConstant(0, NVT);
+      } else if (Cst > NVTBits) {
+        Lo = DAG.getConstant(0, NVT);
+        Hi = DAG.getNode(ISD::SHL, NVT, InL, DAG.getConstant(Cst-NVTBits,ShTy));
+      } else if (Cst == NVTBits) {
+        Lo = DAG.getConstant(0, NVT);
+        Hi = InL;
+      } else {
+        Lo = DAG.getNode(ISD::SHL, NVT, InL, DAG.getConstant(Cst, ShTy));
+        Hi = DAG.getNode(ISD::OR, NVT,
+           DAG.getNode(ISD::SHL, NVT, InH, DAG.getConstant(Cst, ShTy)),
+           DAG.getNode(ISD::SRL, NVT, InL, DAG.getConstant(NVTBits-Cst, ShTy)));
+      }
+      return true;
+    case ISD::SRL:
+      if (Cst > VTBits) {
+        Lo = DAG.getConstant(0, NVT);
+        Hi = DAG.getConstant(0, NVT);
+      } else if (Cst > NVTBits) {
+        Lo = DAG.getNode(ISD::SRL, NVT, InH, DAG.getConstant(Cst-NVTBits,ShTy));
+        Hi = DAG.getConstant(0, NVT);
+      } else if (Cst == NVTBits) {
+        Lo = InH;
+        Hi = DAG.getConstant(0, NVT);
+      } else {
+        Lo = DAG.getNode(ISD::OR, NVT,
+           DAG.getNode(ISD::SRL, NVT, InL, DAG.getConstant(Cst, ShTy)),
+           DAG.getNode(ISD::SHL, NVT, InH, DAG.getConstant(NVTBits-Cst, ShTy)));
+        Hi = DAG.getNode(ISD::SRL, NVT, InH, DAG.getConstant(Cst, ShTy));
+      }
+      return true;
+    case ISD::SRA:
+      if (Cst > VTBits) {
+        Hi = Lo = DAG.getNode(ISD::SRA, NVT, InH,
+                              DAG.getConstant(NVTBits-1, ShTy));
+      } else if (Cst > NVTBits) {
+        Lo = DAG.getNode(ISD::SRA, NVT, InH,
+                           DAG.getConstant(Cst-NVTBits, ShTy));
+        Hi = DAG.getNode(ISD::SRA, NVT, InH,
+                              DAG.getConstant(NVTBits-1, ShTy));
+      } else if (Cst == NVTBits) {
+        Lo = InH;
+        Hi = DAG.getNode(ISD::SRA, NVT, InH,
+                              DAG.getConstant(NVTBits-1, ShTy));
+      } else {
+        Lo = DAG.getNode(ISD::OR, NVT,
+           DAG.getNode(ISD::SRL, NVT, InL, DAG.getConstant(Cst, ShTy)),
+           DAG.getNode(ISD::SHL, NVT, InH, DAG.getConstant(NVTBits-Cst, ShTy)));
+        Hi = DAG.getNode(ISD::SRA, NVT, InH, DAG.getConstant(Cst, ShTy));
+      }
+      return true;
+    }
+  }
+  
+  // Okay, the shift amount isn't constant.  However, if we can tell that it is
+  // >= 32 or < 32, we can still simplify it, without knowing the actual value.
+  uint64_t Mask = NVTBits, KnownZero, KnownOne;
+  DAG.ComputeMaskedBits(Amt, Mask, KnownZero, KnownOne);
+  
+  // If we know that the high bit of the shift amount is one, then we can do
+  // this as a couple of simple shifts.
+  if (KnownOne & Mask) {
+    // Mask out the high bit, which we know is set.
+    Amt = DAG.getNode(ISD::AND, Amt.getValueType(), Amt,
+                      DAG.getConstant(NVTBits-1, Amt.getValueType()));
+    
+    // Expand the incoming operand to be shifted, so that we have its parts
+    SDOperand InL, InH;
+    ExpandOp(Op, InL, InH);
+    switch(Opc) {
+    case ISD::SHL:
+      Lo = DAG.getConstant(0, NVT);              // Low part is zero.
+      Hi = DAG.getNode(ISD::SHL, NVT, InL, Amt); // High part from Lo part.
+      return true;
+    case ISD::SRL:
+      Hi = DAG.getConstant(0, NVT);              // Hi part is zero.
+      Lo = DAG.getNode(ISD::SRL, NVT, InH, Amt); // Lo part from Hi part.
+      return true;
+    case ISD::SRA:
+      Hi = DAG.getNode(ISD::SRA, NVT, InH,       // Sign extend high part.
+                       DAG.getConstant(NVTBits-1, Amt.getValueType()));
+      Lo = DAG.getNode(ISD::SRA, NVT, InH, Amt); // Lo part from Hi part.
+      return true;
+    }
+  }
+  
+  // If we know that the high bit of the shift amount is zero, then we can do
+  // this as a couple of simple shifts.
+  if (KnownZero & Mask) {
+    // Compute 32-amt.
+    SDOperand Amt2 = DAG.getNode(ISD::SUB, Amt.getValueType(),
+                                 DAG.getConstant(NVTBits, Amt.getValueType()),
+                                 Amt);
+    
+    // Expand the incoming operand to be shifted, so that we have its parts
+    SDOperand InL, InH;
+    ExpandOp(Op, InL, InH);
+    switch(Opc) {
+    case ISD::SHL:
+      Lo = DAG.getNode(ISD::SHL, NVT, InL, Amt);
+      Hi = DAG.getNode(ISD::OR, NVT,
+                       DAG.getNode(ISD::SHL, NVT, InH, Amt),
+                       DAG.getNode(ISD::SRL, NVT, InL, Amt2));
+      return true;
+    case ISD::SRL:
+      Hi = DAG.getNode(ISD::SRL, NVT, InH, Amt);
+      Lo = DAG.getNode(ISD::OR, NVT,
+                       DAG.getNode(ISD::SRL, NVT, InL, Amt),
+                       DAG.getNode(ISD::SHL, NVT, InH, Amt2));
+      return true;
+    case ISD::SRA:
+      Hi = DAG.getNode(ISD::SRA, NVT, InH, Amt);
+      Lo = DAG.getNode(ISD::OR, NVT,
+                       DAG.getNode(ISD::SRL, NVT, InL, Amt),
+                       DAG.getNode(ISD::SHL, NVT, InH, Amt2));
+      return true;
+    }
+  }
+  
+  return false;
+}
+
+
+// ExpandLibCall - Expand a node into a call to a libcall.  If the result value
+// does not fit into a register, return the lo part and set the hi part to the
+// by-reg argument.  If it does fit into a single register, return the result
+// and leave the Hi part unset.
+SDOperand SelectionDAGLegalize::ExpandLibCall(const char *Name, SDNode *Node,
+                                              bool isSigned, SDOperand &Hi) {
+  assert(!IsLegalizingCall && "Cannot overlap legalization of calls!");
+  // The input chain to this libcall is the entry node of the function. 
+  // Legalizing the call will automatically add the previous call to the
+  // dependence.
+  SDOperand InChain = DAG.getEntryNode();
+  
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
+    MVT::ValueType ArgVT = Node->getOperand(i).getValueType();
+    const Type *ArgTy = MVT::getTypeForValueType(ArgVT);
+    Entry.Node = Node->getOperand(i); Entry.Ty = ArgTy; 
+    Entry.isSExt = isSigned;
+    Args.push_back(Entry);
+  }
+  SDOperand Callee = DAG.getExternalSymbol(Name, TLI.getPointerTy());
+
+  // Splice the libcall in wherever FindInputOutputChains tells us to.
+  const Type *RetTy = MVT::getTypeForValueType(Node->getValueType(0));
+  std::pair<SDOperand,SDOperand> CallInfo =
+    TLI.LowerCallTo(InChain, RetTy, isSigned, false, CallingConv::C, false,
+                    Callee, Args, DAG);
+
+  // Legalize the call sequence, starting with the chain.  This will advance
+  // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
+  // was added by LowerCallTo (guaranteeing proper serialization of calls).
+  LegalizeOp(CallInfo.second);
+  SDOperand Result;
+  switch (getTypeAction(CallInfo.first.getValueType())) {
+  default: assert(0 && "Unknown thing");
+  case Legal:
+    Result = CallInfo.first;
+    break;
+  case Expand:
+    ExpandOp(CallInfo.first, Result, Hi);
+    break;
+  }
+  return Result;
+}
+
+
+/// ExpandIntToFP - Expand a [US]INT_TO_FP operation.
+///
+SDOperand SelectionDAGLegalize::
+ExpandIntToFP(bool isSigned, MVT::ValueType DestTy, SDOperand Source) {
+  assert(getTypeAction(Source.getValueType()) == Expand &&
+         "This is not an expansion!");
+  assert(Source.getValueType() == MVT::i64 && "Only handle expand from i64!");
+
+  if (!isSigned) {
+    assert(Source.getValueType() == MVT::i64 &&
+           "This only works for 64-bit -> FP");
+    // The 64-bit value loaded will be incorrectly if the 'sign bit' of the
+    // incoming integer is set.  To handle this, we dynamically test to see if
+    // it is set, and, if so, add a fudge factor.
+    SDOperand Lo, Hi;
+    ExpandOp(Source, Lo, Hi);
+
+    // If this is unsigned, and not supported, first perform the conversion to
+    // signed, then adjust the result if the sign bit is set.
+    SDOperand SignedConv = ExpandIntToFP(true, DestTy,
+                   DAG.getNode(ISD::BUILD_PAIR, Source.getValueType(), Lo, Hi));
+
+    SDOperand SignSet = DAG.getSetCC(TLI.getSetCCResultTy(), Hi,
+                                     DAG.getConstant(0, Hi.getValueType()),
+                                     ISD::SETLT);
+    SDOperand Zero = getIntPtrConstant(0), Four = getIntPtrConstant(4);
+    SDOperand CstOffset = DAG.getNode(ISD::SELECT, Zero.getValueType(),
+                                      SignSet, Four, Zero);
+    uint64_t FF = 0x5f800000ULL;
+    if (TLI.isLittleEndian()) FF <<= 32;
+    static Constant *FudgeFactor = ConstantInt::get(Type::Int64Ty, FF);
+
+    SDOperand CPIdx = DAG.getConstantPool(FudgeFactor, TLI.getPointerTy());
+    CPIdx = DAG.getNode(ISD::ADD, TLI.getPointerTy(), CPIdx, CstOffset);
+    SDOperand FudgeInReg;
+    if (DestTy == MVT::f32)
+      FudgeInReg = DAG.getLoad(MVT::f32, DAG.getEntryNode(), CPIdx, NULL, 0);
+    else {
+      assert(DestTy == MVT::f64 && "Unexpected conversion");
+      // FIXME: Avoid the extend by construction the right constantpool?
+      FudgeInReg = DAG.getExtLoad(ISD::EXTLOAD, MVT::f64, DAG.getEntryNode(),
+                                  CPIdx, NULL, 0, MVT::f32);
+    }
+    MVT::ValueType SCVT = SignedConv.getValueType();
+    if (SCVT != DestTy) {
+      // Destination type needs to be expanded as well. The FADD now we are
+      // constructing will be expanded into a libcall.
+      if (MVT::getSizeInBits(SCVT) != MVT::getSizeInBits(DestTy)) {
+        assert(SCVT == MVT::i32 && DestTy == MVT::f64);
+        SignedConv = DAG.getNode(ISD::BUILD_PAIR, MVT::i64,
+                                 SignedConv, SignedConv.getValue(1));
+      }
+      SignedConv = DAG.getNode(ISD::BIT_CONVERT, DestTy, SignedConv);
+    }
+    return DAG.getNode(ISD::FADD, DestTy, SignedConv, FudgeInReg);
+  }
+
+  // Check to see if the target has a custom way to lower this.  If so, use it.
+  switch (TLI.getOperationAction(ISD::SINT_TO_FP, Source.getValueType())) {
+  default: assert(0 && "This action not implemented for this operation!");
+  case TargetLowering::Legal:
+  case TargetLowering::Expand:
+    break;   // This case is handled below.
+  case TargetLowering::Custom: {
+    SDOperand NV = TLI.LowerOperation(DAG.getNode(ISD::SINT_TO_FP, DestTy,
+                                                  Source), DAG);
+    if (NV.Val)
+      return LegalizeOp(NV);
+    break;   // The target decided this was legal after all
+  }
+  }
+
+  // Expand the source, then glue it back together for the call.  We must expand
+  // the source in case it is shared (this pass of legalize must traverse it).
+  SDOperand SrcLo, SrcHi;
+  ExpandOp(Source, SrcLo, SrcHi);
+  Source = DAG.getNode(ISD::BUILD_PAIR, Source.getValueType(), SrcLo, SrcHi);
+
+  RTLIB::Libcall LC;
+  if (DestTy == MVT::f32)
+    LC = RTLIB::SINTTOFP_I64_F32;
+  else {
+    assert(DestTy == MVT::f64 && "Unknown fp value type!");
+    LC = RTLIB::SINTTOFP_I64_F64;
+  }
+  
+  assert(TLI.getLibcallName(LC) && "Don't know how to expand this SINT_TO_FP!");
+  Source = DAG.getNode(ISD::SINT_TO_FP, DestTy, Source);
+  SDOperand UnusedHiPart;
+  return ExpandLibCall(TLI.getLibcallName(LC), Source.Val, isSigned,
+                       UnusedHiPart);
+}
+
+/// ExpandLegalINT_TO_FP - This function is responsible for legalizing a
+/// INT_TO_FP operation of the specified operand when the target requests that
+/// we expand it.  At this point, we know that the result and operand types are
+/// legal for the target.
+SDOperand SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
+                                                     SDOperand Op0,
+                                                     MVT::ValueType DestVT) {
+  if (Op0.getValueType() == MVT::i32) {
+    // simple 32-bit [signed|unsigned] integer to float/double expansion
+    
+    // get the stack frame index of a 8 byte buffer, pessimistically aligned
+    MachineFunction &MF = DAG.getMachineFunction();
+    const Type *F64Type = MVT::getTypeForValueType(MVT::f64);
+    unsigned StackAlign =
+      (unsigned)TLI.getTargetData()->getPrefTypeAlignment(F64Type);
+    int SSFI = MF.getFrameInfo()->CreateStackObject(8, StackAlign);
+    // get address of 8 byte buffer
+    SDOperand StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
+    // word offset constant for Hi/Lo address computation
+    SDOperand WordOff = DAG.getConstant(sizeof(int), TLI.getPointerTy());
+    // set up Hi and Lo (into buffer) address based on endian
+    SDOperand Hi = StackSlot;
+    SDOperand Lo = DAG.getNode(ISD::ADD, TLI.getPointerTy(), StackSlot,WordOff);
+    if (TLI.isLittleEndian())
+      std::swap(Hi, Lo);
+    
+    // if signed map to unsigned space
+    SDOperand Op0Mapped;
+    if (isSigned) {
+      // constant used to invert sign bit (signed to unsigned mapping)
+      SDOperand SignBit = DAG.getConstant(0x80000000u, MVT::i32);
+      Op0Mapped = DAG.getNode(ISD::XOR, MVT::i32, Op0, SignBit);
+    } else {
+      Op0Mapped = Op0;
+    }
+    // store the lo of the constructed double - based on integer input
+    SDOperand Store1 = DAG.getStore(DAG.getEntryNode(),
+                                    Op0Mapped, Lo, NULL, 0);
+    // initial hi portion of constructed double
+    SDOperand InitialHi = DAG.getConstant(0x43300000u, MVT::i32);
+    // store the hi of the constructed double - biased exponent
+    SDOperand Store2=DAG.getStore(Store1, InitialHi, Hi, NULL, 0);
+    // load the constructed double
+    SDOperand Load = DAG.getLoad(MVT::f64, Store2, StackSlot, NULL, 0);
+    // FP constant to bias correct the final result
+    SDOperand Bias = DAG.getConstantFP(isSigned ?
+                                            BitsToDouble(0x4330000080000000ULL)
+                                          : BitsToDouble(0x4330000000000000ULL),
+                                     MVT::f64);
+    // subtract the bias
+    SDOperand Sub = DAG.getNode(ISD::FSUB, MVT::f64, Load, Bias);
+    // final result
+    SDOperand Result;
+    // handle final rounding
+    if (DestVT == MVT::f64) {
+      // do nothing
+      Result = Sub;
+    } else {
+     // if f32 then cast to f32
+      Result = DAG.getNode(ISD::FP_ROUND, MVT::f32, Sub);
+    }
+    return Result;
+  }
+  assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet");
+  SDOperand Tmp1 = DAG.getNode(ISD::SINT_TO_FP, DestVT, Op0);
+
+  SDOperand SignSet = DAG.getSetCC(TLI.getSetCCResultTy(), Op0,
+                                   DAG.getConstant(0, Op0.getValueType()),
+                                   ISD::SETLT);
+  SDOperand Zero = getIntPtrConstant(0), Four = getIntPtrConstant(4);
+  SDOperand CstOffset = DAG.getNode(ISD::SELECT, Zero.getValueType(),
+                                    SignSet, Four, Zero);
+
+  // If the sign bit of the integer is set, the large number will be treated
+  // as a negative number.  To counteract this, the dynamic code adds an
+  // offset depending on the data type.
+  uint64_t FF;
+  switch (Op0.getValueType()) {
+  default: assert(0 && "Unsupported integer type!");
+  case MVT::i8 : FF = 0x43800000ULL; break;  // 2^8  (as a float)
+  case MVT::i16: FF = 0x47800000ULL; break;  // 2^16 (as a float)
+  case MVT::i32: FF = 0x4F800000ULL; break;  // 2^32 (as a float)
+  case MVT::i64: FF = 0x5F800000ULL; break;  // 2^64 (as a float)
+  }
+  if (TLI.isLittleEndian()) FF <<= 32;
+  static Constant *FudgeFactor = ConstantInt::get(Type::Int64Ty, FF);
+
+  SDOperand CPIdx = DAG.getConstantPool(FudgeFactor, TLI.getPointerTy());
+  CPIdx = DAG.getNode(ISD::ADD, TLI.getPointerTy(), CPIdx, CstOffset);
+  SDOperand FudgeInReg;
+  if (DestVT == MVT::f32)
+    FudgeInReg = DAG.getLoad(MVT::f32, DAG.getEntryNode(), CPIdx, NULL, 0);
+  else {
+    assert(DestVT == MVT::f64 && "Unexpected conversion");
+    FudgeInReg = LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, MVT::f64,
+                                           DAG.getEntryNode(), CPIdx,
+                                           NULL, 0, MVT::f32));
+  }
+
+  return DAG.getNode(ISD::FADD, DestVT, Tmp1, FudgeInReg);
+}
+
+/// PromoteLegalINT_TO_FP - This function is responsible for legalizing a
+/// *INT_TO_FP operation of the specified operand when the target requests that
+/// we promote it.  At this point, we know that the result and operand types are
+/// legal for the target, and that there is a legal UINT_TO_FP or SINT_TO_FP
+/// operation that takes a larger input.
+SDOperand SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDOperand LegalOp,
+                                                      MVT::ValueType DestVT,
+                                                      bool isSigned) {
+  // First step, figure out the appropriate *INT_TO_FP operation to use.
+  MVT::ValueType NewInTy = LegalOp.getValueType();
+
+  unsigned OpToUse = 0;
+
+  // Scan for the appropriate larger type to use.
+  while (1) {
+    NewInTy = (MVT::ValueType)(NewInTy+1);
+    assert(MVT::isInteger(NewInTy) && "Ran out of possibilities!");
+
+    // If the target supports SINT_TO_FP of this type, use it.
+    switch (TLI.getOperationAction(ISD::SINT_TO_FP, NewInTy)) {
+      default: break;
+      case TargetLowering::Legal:
+        if (!TLI.isTypeLegal(NewInTy))
+          break;  // Can't use this datatype.
+        // FALL THROUGH.
+      case TargetLowering::Custom:
+        OpToUse = ISD::SINT_TO_FP;
+        break;
+    }
+    if (OpToUse) break;
+    if (isSigned) continue;
+
+    // If the target supports UINT_TO_FP of this type, use it.
+    switch (TLI.getOperationAction(ISD::UINT_TO_FP, NewInTy)) {
+      default: break;
+      case TargetLowering::Legal:
+        if (!TLI.isTypeLegal(NewInTy))
+          break;  // Can't use this datatype.
+        // FALL THROUGH.
+      case TargetLowering::Custom:
+        OpToUse = ISD::UINT_TO_FP;
+        break;
+    }
+    if (OpToUse) break;
+
+    // Otherwise, try a larger type.
+  }
+
+  // Okay, we found the operation and type to use.  Zero extend our input to the
+  // desired type then run the operation on it.
+  return DAG.getNode(OpToUse, DestVT,
+                     DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
+                                 NewInTy, LegalOp));
+}
+
+/// PromoteLegalFP_TO_INT - This function is responsible for legalizing a
+/// FP_TO_*INT operation of the specified operand when the target requests that
+/// we promote it.  At this point, we know that the result and operand types are
+/// legal for the target, and that there is a legal FP_TO_UINT or FP_TO_SINT
+/// operation that returns a larger result.
+SDOperand SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDOperand LegalOp,
+                                                      MVT::ValueType DestVT,
+                                                      bool isSigned) {
+  // First step, figure out the appropriate FP_TO*INT operation to use.
+  MVT::ValueType NewOutTy = DestVT;
+
+  unsigned OpToUse = 0;
+
+  // Scan for the appropriate larger type to use.
+  while (1) {
+    NewOutTy = (MVT::ValueType)(NewOutTy+1);
+    assert(MVT::isInteger(NewOutTy) && "Ran out of possibilities!");
+
+    // If the target supports FP_TO_SINT returning this type, use it.
+    switch (TLI.getOperationAction(ISD::FP_TO_SINT, NewOutTy)) {
+    default: break;
+    case TargetLowering::Legal:
+      if (!TLI.isTypeLegal(NewOutTy))
+        break;  // Can't use this datatype.
+      // FALL THROUGH.
+    case TargetLowering::Custom:
+      OpToUse = ISD::FP_TO_SINT;
+      break;
+    }
+    if (OpToUse) break;
+
+    // If the target supports FP_TO_UINT of this type, use it.
+    switch (TLI.getOperationAction(ISD::FP_TO_UINT, NewOutTy)) {
+    default: break;
+    case TargetLowering::Legal:
+      if (!TLI.isTypeLegal(NewOutTy))
+        break;  // Can't use this datatype.
+      // FALL THROUGH.
+    case TargetLowering::Custom:
+      OpToUse = ISD::FP_TO_UINT;
+      break;
+    }
+    if (OpToUse) break;
+
+    // Otherwise, try a larger type.
+  }
+
+  // Okay, we found the operation and type to use.  Truncate the result of the
+  // extended FP_TO_*INT operation to the desired size.
+  return DAG.getNode(ISD::TRUNCATE, DestVT,
+                     DAG.getNode(OpToUse, NewOutTy, LegalOp));
+}
+
+/// ExpandBSWAP - Open code the operations for BSWAP of the specified operation.
+///
+SDOperand SelectionDAGLegalize::ExpandBSWAP(SDOperand Op) {
+  MVT::ValueType VT = Op.getValueType();
+  MVT::ValueType SHVT = TLI.getShiftAmountTy();
+  SDOperand Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
+  switch (VT) {
+  default: assert(0 && "Unhandled Expand type in BSWAP!"); abort();
+  case MVT::i16:
+    Tmp2 = DAG.getNode(ISD::SHL, VT, Op, DAG.getConstant(8, SHVT));
+    Tmp1 = DAG.getNode(ISD::SRL, VT, Op, DAG.getConstant(8, SHVT));
+    return DAG.getNode(ISD::OR, VT, Tmp1, Tmp2);
+  case MVT::i32:
+    Tmp4 = DAG.getNode(ISD::SHL, VT, Op, DAG.getConstant(24, SHVT));
+    Tmp3 = DAG.getNode(ISD::SHL, VT, Op, DAG.getConstant(8, SHVT));
+    Tmp2 = DAG.getNode(ISD::SRL, VT, Op, DAG.getConstant(8, SHVT));
+    Tmp1 = DAG.getNode(ISD::SRL, VT, Op, DAG.getConstant(24, SHVT));
+    Tmp3 = DAG.getNode(ISD::AND, VT, Tmp3, DAG.getConstant(0xFF0000, VT));
+    Tmp2 = DAG.getNode(ISD::AND, VT, Tmp2, DAG.getConstant(0xFF00, VT));
+    Tmp4 = DAG.getNode(ISD::OR, VT, Tmp4, Tmp3);
+    Tmp2 = DAG.getNode(ISD::OR, VT, Tmp2, Tmp1);
+    return DAG.getNode(ISD::OR, VT, Tmp4, Tmp2);
+  case MVT::i64:
+    Tmp8 = DAG.getNode(ISD::SHL, VT, Op, DAG.getConstant(56, SHVT));
+    Tmp7 = DAG.getNode(ISD::SHL, VT, Op, DAG.getConstant(40, SHVT));
+    Tmp6 = DAG.getNode(ISD::SHL, VT, Op, DAG.getConstant(24, SHVT));
+    Tmp5 = DAG.getNode(ISD::SHL, VT, Op, DAG.getConstant(8, SHVT));
+    Tmp4 = DAG.getNode(ISD::SRL, VT, Op, DAG.getConstant(8, SHVT));
+    Tmp3 = DAG.getNode(ISD::SRL, VT, Op, DAG.getConstant(24, SHVT));
+    Tmp2 = DAG.getNode(ISD::SRL, VT, Op, DAG.getConstant(40, SHVT));
+    Tmp1 = DAG.getNode(ISD::SRL, VT, Op, DAG.getConstant(56, SHVT));
+    Tmp7 = DAG.getNode(ISD::AND, VT, Tmp7, DAG.getConstant(255ULL<<48, VT));
+    Tmp6 = DAG.getNode(ISD::AND, VT, Tmp6, DAG.getConstant(255ULL<<40, VT));
+    Tmp5 = DAG.getNode(ISD::AND, VT, Tmp5, DAG.getConstant(255ULL<<32, VT));
+    Tmp4 = DAG.getNode(ISD::AND, VT, Tmp4, DAG.getConstant(255ULL<<24, VT));
+    Tmp3 = DAG.getNode(ISD::AND, VT, Tmp3, DAG.getConstant(255ULL<<16, VT));
+    Tmp2 = DAG.getNode(ISD::AND, VT, Tmp2, DAG.getConstant(255ULL<<8 , VT));
+    Tmp8 = DAG.getNode(ISD::OR, VT, Tmp8, Tmp7);
+    Tmp6 = DAG.getNode(ISD::OR, VT, Tmp6, Tmp5);
+    Tmp4 = DAG.getNode(ISD::OR, VT, Tmp4, Tmp3);
+    Tmp2 = DAG.getNode(ISD::OR, VT, Tmp2, Tmp1);
+    Tmp8 = DAG.getNode(ISD::OR, VT, Tmp8, Tmp6);
+    Tmp4 = DAG.getNode(ISD::OR, VT, Tmp4, Tmp2);
+    return DAG.getNode(ISD::OR, VT, Tmp8, Tmp4);
+  }
+}
+
+/// ExpandBitCount - Expand the specified bitcount instruction into operations.
+///
+SDOperand SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDOperand Op) {
+  switch (Opc) {
+  default: assert(0 && "Cannot expand this yet!");
+  case ISD::CTPOP: {
+    static const uint64_t mask[6] = {
+      0x5555555555555555ULL, 0x3333333333333333ULL,
+      0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL,
+      0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL
+    };
+    MVT::ValueType VT = Op.getValueType();
+    MVT::ValueType ShVT = TLI.getShiftAmountTy();
+    unsigned len = MVT::getSizeInBits(VT);
+    for (unsigned i = 0; (1U << i) <= (len / 2); ++i) {
+      //x = (x & mask[i][len/8]) + (x >> (1 << i) & mask[i][len/8])
+      SDOperand Tmp2 = DAG.getConstant(mask[i], VT);
+      SDOperand Tmp3 = DAG.getConstant(1ULL << i, ShVT);
+      Op = DAG.getNode(ISD::ADD, VT, DAG.getNode(ISD::AND, VT, Op, Tmp2),
+                       DAG.getNode(ISD::AND, VT,
+                                   DAG.getNode(ISD::SRL, VT, Op, Tmp3),Tmp2));
+    }
+    return Op;
+  }
+  case ISD::CTLZ: {
+    // for now, we do this:
+    // x = x | (x >> 1);
+    // x = x | (x >> 2);
+    // ...
+    // x = x | (x >>16);
+    // x = x | (x >>32); // for 64-bit input
+    // return popcount(~x);
+    //
+    // but see also: http://www.hackersdelight.org/HDcode/nlz.cc
+    MVT::ValueType VT = Op.getValueType();
+    MVT::ValueType ShVT = TLI.getShiftAmountTy();
+    unsigned len = MVT::getSizeInBits(VT);
+    for (unsigned i = 0; (1U << i) <= (len / 2); ++i) {
+      SDOperand Tmp3 = DAG.getConstant(1ULL << i, ShVT);
+      Op = DAG.getNode(ISD::OR, VT, Op, DAG.getNode(ISD::SRL, VT, Op, Tmp3));
+    }
+    Op = DAG.getNode(ISD::XOR, VT, Op, DAG.getConstant(~0ULL, VT));
+    return DAG.getNode(ISD::CTPOP, VT, Op);
+  }
+  case ISD::CTTZ: {
+    // for now, we use: { return popcount(~x & (x - 1)); }
+    // unless the target has ctlz but not ctpop, in which case we use:
+    // { return 32 - nlz(~x & (x-1)); }
+    // see also http://www.hackersdelight.org/HDcode/ntz.cc
+    MVT::ValueType VT = Op.getValueType();
+    SDOperand Tmp2 = DAG.getConstant(~0ULL, VT);
+    SDOperand Tmp3 = DAG.getNode(ISD::AND, VT,
+                       DAG.getNode(ISD::XOR, VT, Op, Tmp2),
+                       DAG.getNode(ISD::SUB, VT, Op, DAG.getConstant(1, VT)));
+    // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
+    if (!TLI.isOperationLegal(ISD::CTPOP, VT) &&
+        TLI.isOperationLegal(ISD::CTLZ, VT))
+      return DAG.getNode(ISD::SUB, VT,
+                         DAG.getConstant(MVT::getSizeInBits(VT), VT),
+                         DAG.getNode(ISD::CTLZ, VT, Tmp3));
+    return DAG.getNode(ISD::CTPOP, VT, Tmp3);
+  }
+  }
+}
+
+/// ExpandOp - Expand the specified SDOperand into its two component pieces
+/// Lo&Hi.  Note that the Op MUST be an expanded type.  As a result of this, the
+/// LegalizeNodes map is filled in for any results that are not expanded, the
+/// ExpandedNodes map is filled in for any results that are expanded, and the
+/// Lo/Hi values are returned.
+void SelectionDAGLegalize::ExpandOp(SDOperand Op, SDOperand &Lo, SDOperand &Hi){
+  MVT::ValueType VT = Op.getValueType();
+  MVT::ValueType NVT = TLI.getTypeToTransformTo(VT);
+  SDNode *Node = Op.Val;
+  assert(getTypeAction(VT) == Expand && "Not an expanded type!");
+  assert(((MVT::isInteger(NVT) && NVT < VT) || MVT::isFloatingPoint(VT) ||
+         MVT::isVector(VT)) &&
+         "Cannot expand to FP value or to larger int value!");
+
+  // See if we already expanded it.
+  DenseMap<SDOperand, std::pair<SDOperand, SDOperand> >::iterator I
+    = ExpandedNodes.find(Op);
+  if (I != ExpandedNodes.end()) {
+    Lo = I->second.first;
+    Hi = I->second.second;
+    return;
+  }
+
+  switch (Node->getOpcode()) {
+  case ISD::CopyFromReg:
+    assert(0 && "CopyFromReg must be legal!");
+  default:
+#ifndef NDEBUG
+    cerr << "NODE: "; Node->dump(&DAG); cerr << "\n";
+#endif
+    assert(0 && "Do not know how to expand this operator!");
+    abort();
+  case ISD::UNDEF:
+    NVT = TLI.getTypeToExpandTo(VT);
+    Lo = DAG.getNode(ISD::UNDEF, NVT);
+    Hi = DAG.getNode(ISD::UNDEF, NVT);
+    break;
+  case ISD::Constant: {
+    uint64_t Cst = cast<ConstantSDNode>(Node)->getValue();
+    Lo = DAG.getConstant(Cst, NVT);
+    Hi = DAG.getConstant(Cst >> MVT::getSizeInBits(NVT), NVT);
+    break;
+  }
+  case ISD::ConstantFP: {
+    ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Node);
+    Lo = ExpandConstantFP(CFP, false, DAG, TLI);
+    if (getTypeAction(Lo.getValueType()) == Expand)
+      ExpandOp(Lo, Lo, Hi);
+    break;
+  }
+  case ISD::BUILD_PAIR:
+    // Return the operands.
+    Lo = Node->getOperand(0);
+    Hi = Node->getOperand(1);
+    break;
+    
+  case ISD::SIGN_EXTEND_INREG:
+    ExpandOp(Node->getOperand(0), Lo, Hi);
+    // sext_inreg the low part if needed.
+    Lo = DAG.getNode(ISD::SIGN_EXTEND_INREG, NVT, Lo, Node->getOperand(1));
+    
+    // The high part gets the sign extension from the lo-part.  This handles
+    // things like sextinreg V:i64 from i8.
+    Hi = DAG.getNode(ISD::SRA, NVT, Lo,
+                     DAG.getConstant(MVT::getSizeInBits(NVT)-1,
+                                     TLI.getShiftAmountTy()));
+    break;
+
+  case ISD::BSWAP: {
+    ExpandOp(Node->getOperand(0), Lo, Hi);
+    SDOperand TempLo = DAG.getNode(ISD::BSWAP, NVT, Hi);
+    Hi = DAG.getNode(ISD::BSWAP, NVT, Lo);
+    Lo = TempLo;
+    break;
+  }
+    
+  case ISD::CTPOP:
+    ExpandOp(Node->getOperand(0), Lo, Hi);
+    Lo = DAG.getNode(ISD::ADD, NVT,          // ctpop(HL) -> ctpop(H)+ctpop(L)
+                     DAG.getNode(ISD::CTPOP, NVT, Lo),
+                     DAG.getNode(ISD::CTPOP, NVT, Hi));
+    Hi = DAG.getConstant(0, NVT);
+    break;
+
+  case ISD::CTLZ: {
+    // ctlz (HL) -> ctlz(H) != 32 ? ctlz(H) : (ctlz(L)+32)
+    ExpandOp(Node->getOperand(0), Lo, Hi);
+    SDOperand BitsC = DAG.getConstant(MVT::getSizeInBits(NVT), NVT);
+    SDOperand HLZ = DAG.getNode(ISD::CTLZ, NVT, Hi);
+    SDOperand TopNotZero = DAG.getSetCC(TLI.getSetCCResultTy(), HLZ, BitsC,
+                                        ISD::SETNE);
+    SDOperand LowPart = DAG.getNode(ISD::CTLZ, NVT, Lo);
+    LowPart = DAG.getNode(ISD::ADD, NVT, LowPart, BitsC);
+
+    Lo = DAG.getNode(ISD::SELECT, NVT, TopNotZero, HLZ, LowPart);
+    Hi = DAG.getConstant(0, NVT);
+    break;
+  }
+
+  case ISD::CTTZ: {
+    // cttz (HL) -> cttz(L) != 32 ? cttz(L) : (cttz(H)+32)
+    ExpandOp(Node->getOperand(0), Lo, Hi);
+    SDOperand BitsC = DAG.getConstant(MVT::getSizeInBits(NVT), NVT);
+    SDOperand LTZ = DAG.getNode(ISD::CTTZ, NVT, Lo);
+    SDOperand BotNotZero = DAG.getSetCC(TLI.getSetCCResultTy(), LTZ, BitsC,
+                                        ISD::SETNE);
+    SDOperand HiPart = DAG.getNode(ISD::CTTZ, NVT, Hi);
+    HiPart = DAG.getNode(ISD::ADD, NVT, HiPart, BitsC);
+
+    Lo = DAG.getNode(ISD::SELECT, NVT, BotNotZero, LTZ, HiPart);
+    Hi = DAG.getConstant(0, NVT);
+    break;
+  }
+
+  case ISD::VAARG: {
+    SDOperand Ch = Node->getOperand(0);   // Legalize the chain.
+    SDOperand Ptr = Node->getOperand(1);  // Legalize the pointer.
+    Lo = DAG.getVAArg(NVT, Ch, Ptr, Node->getOperand(2));
+    Hi = DAG.getVAArg(NVT, Lo.getValue(1), Ptr, Node->getOperand(2));
+
+    // Remember that we legalized the chain.
+    Hi = LegalizeOp(Hi);
+    AddLegalizedOperand(Op.getValue(1), Hi.getValue(1));
+    if (!TLI.isLittleEndian())
+      std::swap(Lo, Hi);
+    break;
+  }
+    
+  case ISD::LOAD: {
+    LoadSDNode *LD = cast<LoadSDNode>(Node);
+    SDOperand Ch  = LD->getChain();    // Legalize the chain.
+    SDOperand Ptr = LD->getBasePtr();  // Legalize the pointer.
+    ISD::LoadExtType ExtType = LD->getExtensionType();
+    int SVOffset = LD->getSrcValueOffset();
+    unsigned Alignment = LD->getAlignment();
+    bool isVolatile = LD->isVolatile();
+
+    if (ExtType == ISD::NON_EXTLOAD) {
+      Lo = DAG.getLoad(NVT, Ch, Ptr, LD->getSrcValue(), SVOffset,
+                       isVolatile, Alignment);
+      if (VT == MVT::f32 || VT == MVT::f64) {
+        // f32->i32 or f64->i64 one to one expansion.
+        // Remember that we legalized the chain.
+        AddLegalizedOperand(SDOperand(Node, 1), LegalizeOp(Lo.getValue(1)));
+        // Recursively expand the new load.
+        if (getTypeAction(NVT) == Expand)
+          ExpandOp(Lo, Lo, Hi);
+        break;
+      }
+
+      // Increment the pointer to the other half.
+      unsigned IncrementSize = MVT::getSizeInBits(Lo.getValueType())/8;
+      Ptr = DAG.getNode(ISD::ADD, Ptr.getValueType(), Ptr,
+                        getIntPtrConstant(IncrementSize));
+      SVOffset += IncrementSize;
+      if (Alignment > IncrementSize)
+        Alignment = IncrementSize;
+      Hi = DAG.getLoad(NVT, Ch, Ptr, LD->getSrcValue(), SVOffset,
+                       isVolatile, Alignment);
+
+      // Build a factor node to remember that this load is independent of the
+      // other one.
+      SDOperand TF = DAG.getNode(ISD::TokenFactor, MVT::Other, Lo.getValue(1),
+                                 Hi.getValue(1));
+
+      // Remember that we legalized the chain.
+      AddLegalizedOperand(Op.getValue(1), LegalizeOp(TF));
+      if (!TLI.isLittleEndian())
+        std::swap(Lo, Hi);
+    } else {
+      MVT::ValueType EVT = LD->getLoadedVT();
+
+      if (VT == MVT::f64 && EVT == MVT::f32) {
+        // f64 = EXTLOAD f32 should expand to LOAD, FP_EXTEND
+        SDOperand Load = DAG.getLoad(EVT, Ch, Ptr, LD->getSrcValue(),
+                                     SVOffset, isVolatile, Alignment);
+        // Remember that we legalized the chain.
+        AddLegalizedOperand(SDOperand(Node, 1), LegalizeOp(Load.getValue(1)));
+        ExpandOp(DAG.getNode(ISD::FP_EXTEND, VT, Load), Lo, Hi);
+        break;
+      }
+    
+      if (EVT == NVT)
+        Lo = DAG.getLoad(NVT, Ch, Ptr, LD->getSrcValue(),
+                         SVOffset, isVolatile, Alignment);
+      else
+        Lo = DAG.getExtLoad(ExtType, NVT, Ch, Ptr, LD->getSrcValue(),
+                            SVOffset, EVT, isVolatile,
+                            Alignment);
+    
+      // Remember that we legalized the chain.
+      AddLegalizedOperand(SDOperand(Node, 1), LegalizeOp(Lo.getValue(1)));
+
+      if (ExtType == ISD::SEXTLOAD) {
+        // The high part is obtained by SRA'ing all but one of the bits of the
+        // lo part.
+        unsigned LoSize = MVT::getSizeInBits(Lo.getValueType());
+        Hi = DAG.getNode(ISD::SRA, NVT, Lo,
+                         DAG.getConstant(LoSize-1, TLI.getShiftAmountTy()));
+      } else if (ExtType == ISD::ZEXTLOAD) {
+        // The high part is just a zero.
+        Hi = DAG.getConstant(0, NVT);
+      } else /* if (ExtType == ISD::EXTLOAD) */ {
+        // The high part is undefined.
+        Hi = DAG.getNode(ISD::UNDEF, NVT);
+      }
+    }
+    break;
+  }
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR: {   // Simple logical operators -> two trivial pieces.
+    SDOperand LL, LH, RL, RH;
+    ExpandOp(Node->getOperand(0), LL, LH);
+    ExpandOp(Node->getOperand(1), RL, RH);
+    Lo = DAG.getNode(Node->getOpcode(), NVT, LL, RL);
+    Hi = DAG.getNode(Node->getOpcode(), NVT, LH, RH);
+    break;
+  }
+  case ISD::SELECT: {
+    SDOperand LL, LH, RL, RH;
+    ExpandOp(Node->getOperand(1), LL, LH);
+    ExpandOp(Node->getOperand(2), RL, RH);
+    if (getTypeAction(NVT) == Expand)
+      NVT = TLI.getTypeToExpandTo(NVT);
+    Lo = DAG.getNode(ISD::SELECT, NVT, Node->getOperand(0), LL, RL);
+    if (VT != MVT::f32)
+      Hi = DAG.getNode(ISD::SELECT, NVT, Node->getOperand(0), LH, RH);
+    break;
+  }
+  case ISD::SELECT_CC: {
+    SDOperand TL, TH, FL, FH;
+    ExpandOp(Node->getOperand(2), TL, TH);
+    ExpandOp(Node->getOperand(3), FL, FH);
+    if (getTypeAction(NVT) == Expand)
+      NVT = TLI.getTypeToExpandTo(NVT);
+    Lo = DAG.getNode(ISD::SELECT_CC, NVT, Node->getOperand(0),
+                     Node->getOperand(1), TL, FL, Node->getOperand(4));
+    if (VT != MVT::f32)
+      Hi = DAG.getNode(ISD::SELECT_CC, NVT, Node->getOperand(0),
+                       Node->getOperand(1), TH, FH, Node->getOperand(4));
+    break;
+  }
+  case ISD::ANY_EXTEND:
+    // The low part is any extension of the input (which degenerates to a copy).
+    Lo = DAG.getNode(ISD::ANY_EXTEND, NVT, Node->getOperand(0));
+    // The high part is undefined.
+    Hi = DAG.getNode(ISD::UNDEF, NVT);
+    break;
+  case ISD::SIGN_EXTEND: {
+    // The low part is just a sign extension of the input (which degenerates to
+    // a copy).
+    Lo = DAG.getNode(ISD::SIGN_EXTEND, NVT, Node->getOperand(0));
+
+    // The high part is obtained by SRA'ing all but one of the bits of the lo
+    // part.
+    unsigned LoSize = MVT::getSizeInBits(Lo.getValueType());
+    Hi = DAG.getNode(ISD::SRA, NVT, Lo,
+                     DAG.getConstant(LoSize-1, TLI.getShiftAmountTy()));
+    break;
+  }
+  case ISD::ZERO_EXTEND:
+    // The low part is just a zero extension of the input (which degenerates to
+    // a copy).
+    Lo = DAG.getNode(ISD::ZERO_EXTEND, NVT, Node->getOperand(0));
+
+    // The high part is just a zero.
+    Hi = DAG.getConstant(0, NVT);
+    break;
+    
+  case ISD::TRUNCATE: {
+    // The input value must be larger than this value.  Expand *it*.
+    SDOperand NewLo;
+    ExpandOp(Node->getOperand(0), NewLo, Hi);
+    
+    // The low part is now either the right size, or it is closer.  If not the
+    // right size, make an illegal truncate so we recursively expand it.
+    if (NewLo.getValueType() != Node->getValueType(0))
+      NewLo = DAG.getNode(ISD::TRUNCATE, Node->getValueType(0), NewLo);
+    ExpandOp(NewLo, Lo, Hi);
+    break;
+  }
+    
+  case ISD::BIT_CONVERT: {
+    SDOperand Tmp;
+    if (TLI.getOperationAction(ISD::BIT_CONVERT, VT) == TargetLowering::Custom){
+      // If the target wants to, allow it to lower this itself.
+      switch (getTypeAction(Node->getOperand(0).getValueType())) {
+      case Expand: assert(0 && "cannot expand FP!");
+      case Legal:   Tmp = LegalizeOp(Node->getOperand(0)); break;
+      case Promote: Tmp = PromoteOp (Node->getOperand(0)); break;
+      }
+      Tmp = TLI.LowerOperation(DAG.getNode(ISD::BIT_CONVERT, VT, Tmp), DAG);
+    }
+
+    // f32 / f64 must be expanded to i32 / i64.
+    if (VT == MVT::f32 || VT == MVT::f64) {
+      Lo = DAG.getNode(ISD::BIT_CONVERT, NVT, Node->getOperand(0));
+      if (getTypeAction(NVT) == Expand)
+        ExpandOp(Lo, Lo, Hi);
+      break;
+    }
+
+    // If source operand will be expanded to the same type as VT, i.e.
+    // i64 <- f64, i32 <- f32, expand the source operand instead.
+    MVT::ValueType VT0 = Node->getOperand(0).getValueType();
+    if (getTypeAction(VT0) == Expand && TLI.getTypeToTransformTo(VT0) == VT) {
+      ExpandOp(Node->getOperand(0), Lo, Hi);
+      break;
+    }
+
+    // Turn this into a load/store pair by default.
+    if (Tmp.Val == 0)
+      Tmp = ExpandBIT_CONVERT(VT, Node->getOperand(0));
+    
+    ExpandOp(Tmp, Lo, Hi);
+    break;
+  }
+
+  case ISD::READCYCLECOUNTER:
+    assert(TLI.getOperationAction(ISD::READCYCLECOUNTER, VT) == 
+                 TargetLowering::Custom &&
+           "Must custom expand ReadCycleCounter");
+    Lo = TLI.LowerOperation(Op, DAG);
+    assert(Lo.Val && "Node must be custom expanded!");
+    Hi = Lo.getValue(1);
+    AddLegalizedOperand(SDOperand(Node, 1), // Remember we legalized the chain.
+                        LegalizeOp(Lo.getValue(2)));
+    break;
+
+    // These operators cannot be expanded directly, emit them as calls to
+    // library functions.
+  case ISD::FP_TO_SINT: {
+    if (TLI.getOperationAction(ISD::FP_TO_SINT, VT) == TargetLowering::Custom) {
+      SDOperand Op;
+      switch (getTypeAction(Node->getOperand(0).getValueType())) {
+      case Expand: assert(0 && "cannot expand FP!");
+      case Legal:   Op = LegalizeOp(Node->getOperand(0)); break;
+      case Promote: Op = PromoteOp (Node->getOperand(0)); break;
+      }
+
+      Op = TLI.LowerOperation(DAG.getNode(ISD::FP_TO_SINT, VT, Op), DAG);
+
+      // Now that the custom expander is done, expand the result, which is still
+      // VT.
+      if (Op.Val) {
+        ExpandOp(Op, Lo, Hi);
+        break;
+      }
+    }
+
+    RTLIB::Libcall LC;
+    if (Node->getOperand(0).getValueType() == MVT::f32)
+      LC = RTLIB::FPTOSINT_F32_I64;
+    else
+      LC = RTLIB::FPTOSINT_F64_I64;
+    Lo = ExpandLibCall(TLI.getLibcallName(LC), Node,
+                       false/*sign irrelevant*/, Hi);
+    break;
+  }
+
+  case ISD::FP_TO_UINT: {
+    if (TLI.getOperationAction(ISD::FP_TO_UINT, VT) == TargetLowering::Custom) {
+      SDOperand Op;
+      switch (getTypeAction(Node->getOperand(0).getValueType())) {
+        case Expand: assert(0 && "cannot expand FP!");
+        case Legal:   Op = LegalizeOp(Node->getOperand(0)); break;
+        case Promote: Op = PromoteOp (Node->getOperand(0)); break;
+      }
+        
+      Op = TLI.LowerOperation(DAG.getNode(ISD::FP_TO_UINT, VT, Op), DAG);
+
+      // Now that the custom expander is done, expand the result.
+      if (Op.Val) {
+        ExpandOp(Op, Lo, Hi);
+        break;
+      }
+    }
+
+    RTLIB::Libcall LC;
+    if (Node->getOperand(0).getValueType() == MVT::f32)
+      LC = RTLIB::FPTOUINT_F32_I64;
+    else
+      LC = RTLIB::FPTOUINT_F64_I64;
+    Lo = ExpandLibCall(TLI.getLibcallName(LC), Node,
+                       false/*sign irrelevant*/, Hi);
+    break;
+  }
+
+  case ISD::SHL: {
+    // If the target wants custom lowering, do so.
+    SDOperand ShiftAmt = LegalizeOp(Node->getOperand(1));
+    if (TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Custom) {
+      SDOperand Op = DAG.getNode(ISD::SHL, VT, Node->getOperand(0), ShiftAmt);
+      Op = TLI.LowerOperation(Op, DAG);
+      if (Op.Val) {
+        // Now that the custom expander is done, expand the result, which is
+        // still VT.
+        ExpandOp(Op, Lo, Hi);
+        break;
+      }
+    }
+    
+    // If ADDC/ADDE are supported and if the shift amount is a constant 1, emit 
+    // this X << 1 as X+X.
+    if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(ShiftAmt)) {
+      if (ShAmt->getValue() == 1 && TLI.isOperationLegal(ISD::ADDC, NVT) && 
+          TLI.isOperationLegal(ISD::ADDE, NVT)) {
+        SDOperand LoOps[2], HiOps[3];
+        ExpandOp(Node->getOperand(0), LoOps[0], HiOps[0]);
+        SDVTList VTList = DAG.getVTList(LoOps[0].getValueType(), MVT::Flag);
+        LoOps[1] = LoOps[0];
+        Lo = DAG.getNode(ISD::ADDC, VTList, LoOps, 2);
+
+        HiOps[1] = HiOps[0];
+        HiOps[2] = Lo.getValue(1);
+        Hi = DAG.getNode(ISD::ADDE, VTList, HiOps, 3);
+        break;
+      }
+    }
+    
+    // If we can emit an efficient shift operation, do so now.
+    if (ExpandShift(ISD::SHL, Node->getOperand(0), ShiftAmt, Lo, Hi))
+      break;
+
+    // If this target supports SHL_PARTS, use it.
+    TargetLowering::LegalizeAction Action =
+      TLI.getOperationAction(ISD::SHL_PARTS, NVT);
+    if ((Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) ||
+        Action == TargetLowering::Custom) {
+      ExpandShiftParts(ISD::SHL_PARTS, Node->getOperand(0), ShiftAmt, Lo, Hi);
+      break;
+    }
+
+    // Otherwise, emit a libcall.
+    Lo = ExpandLibCall(TLI.getLibcallName(RTLIB::SHL_I64), Node,
+                       false/*left shift=unsigned*/, Hi);
+    break;
+  }
+
+  case ISD::SRA: {
+    // If the target wants custom lowering, do so.
+    SDOperand ShiftAmt = LegalizeOp(Node->getOperand(1));
+    if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Custom) {
+      SDOperand Op = DAG.getNode(ISD::SRA, VT, Node->getOperand(0), ShiftAmt);
+      Op = TLI.LowerOperation(Op, DAG);
+      if (Op.Val) {
+        // Now that the custom expander is done, expand the result, which is
+        // still VT.
+        ExpandOp(Op, Lo, Hi);
+        break;
+      }
+    }
+    
+    // If we can emit an efficient shift operation, do so now.
+    if (ExpandShift(ISD::SRA, Node->getOperand(0), ShiftAmt, Lo, Hi))
+      break;
+
+    // If this target supports SRA_PARTS, use it.
+    TargetLowering::LegalizeAction Action =
+      TLI.getOperationAction(ISD::SRA_PARTS, NVT);
+    if ((Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) ||
+        Action == TargetLowering::Custom) {
+      ExpandShiftParts(ISD::SRA_PARTS, Node->getOperand(0), ShiftAmt, Lo, Hi);
+      break;
+    }
+
+    // Otherwise, emit a libcall.
+    Lo = ExpandLibCall(TLI.getLibcallName(RTLIB::SRA_I64), Node,
+                       true/*ashr is signed*/, Hi);
+    break;
+  }
+
+  case ISD::SRL: {
+    // If the target wants custom lowering, do so.
+    SDOperand ShiftAmt = LegalizeOp(Node->getOperand(1));
+    if (TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Custom) {
+      SDOperand Op = DAG.getNode(ISD::SRL, VT, Node->getOperand(0), ShiftAmt);
+      Op = TLI.LowerOperation(Op, DAG);
+      if (Op.Val) {
+        // Now that the custom expander is done, expand the result, which is
+        // still VT.
+        ExpandOp(Op, Lo, Hi);
+        break;
+      }
+    }
+
+    // If we can emit an efficient shift operation, do so now.
+    if (ExpandShift(ISD::SRL, Node->getOperand(0), ShiftAmt, Lo, Hi))
+      break;
+
+    // If this target supports SRL_PARTS, use it.
+    TargetLowering::LegalizeAction Action =
+      TLI.getOperationAction(ISD::SRL_PARTS, NVT);
+    if ((Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) ||
+        Action == TargetLowering::Custom) {
+      ExpandShiftParts(ISD::SRL_PARTS, Node->getOperand(0), ShiftAmt, Lo, Hi);
+      break;
+    }
+
+    // Otherwise, emit a libcall.
+    Lo = ExpandLibCall(TLI.getLibcallName(RTLIB::SRL_I64), Node,
+                       false/*lshr is unsigned*/, Hi);
+    break;
+  }
+
+  case ISD::ADD:
+  case ISD::SUB: {
+    // If the target wants to custom expand this, let them.
+    if (TLI.getOperationAction(Node->getOpcode(), VT) ==
+            TargetLowering::Custom) {
+      Op = TLI.LowerOperation(Op, DAG);
+      if (Op.Val) {
+        ExpandOp(Op, Lo, Hi);
+        break;
+      }
+    }
+    
+    // Expand the subcomponents.
+    SDOperand LHSL, LHSH, RHSL, RHSH;
+    ExpandOp(Node->getOperand(0), LHSL, LHSH);
+    ExpandOp(Node->getOperand(1), RHSL, RHSH);
+    SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Flag);
+    SDOperand LoOps[2], HiOps[3];
+    LoOps[0] = LHSL;
+    LoOps[1] = RHSL;
+    HiOps[0] = LHSH;
+    HiOps[1] = RHSH;
+    if (Node->getOpcode() == ISD::ADD) {
+      Lo = DAG.getNode(ISD::ADDC, VTList, LoOps, 2);
+      HiOps[2] = Lo.getValue(1);
+      Hi = DAG.getNode(ISD::ADDE, VTList, HiOps, 3);
+    } else {
+      Lo = DAG.getNode(ISD::SUBC, VTList, LoOps, 2);
+      HiOps[2] = Lo.getValue(1);
+      Hi = DAG.getNode(ISD::SUBE, VTList, HiOps, 3);
+    }
+    break;
+  }
+    
+  case ISD::ADDC:
+  case ISD::SUBC: {
+    // Expand the subcomponents.
+    SDOperand LHSL, LHSH, RHSL, RHSH;
+    ExpandOp(Node->getOperand(0), LHSL, LHSH);
+    ExpandOp(Node->getOperand(1), RHSL, RHSH);
+    SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Flag);
+    SDOperand LoOps[2] = { LHSL, RHSL };
+    SDOperand HiOps[3] = { LHSH, RHSH };
+    
+    if (Node->getOpcode() == ISD::ADDC) {
+      Lo = DAG.getNode(ISD::ADDC, VTList, LoOps, 2);
+      HiOps[2] = Lo.getValue(1);
+      Hi = DAG.getNode(ISD::ADDE, VTList, HiOps, 3);
+    } else {
+      Lo = DAG.getNode(ISD::SUBC, VTList, LoOps, 2);
+      HiOps[2] = Lo.getValue(1);
+      Hi = DAG.getNode(ISD::SUBE, VTList, HiOps, 3);
+    }
+    // Remember that we legalized the flag.
+    AddLegalizedOperand(Op.getValue(1), LegalizeOp(Hi.getValue(1)));
+    break;
+  }
+  case ISD::ADDE:
+  case ISD::SUBE: {
+    // Expand the subcomponents.
+    SDOperand LHSL, LHSH, RHSL, RHSH;
+    ExpandOp(Node->getOperand(0), LHSL, LHSH);
+    ExpandOp(Node->getOperand(1), RHSL, RHSH);
+    SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Flag);
+    SDOperand LoOps[3] = { LHSL, RHSL, Node->getOperand(2) };
+    SDOperand HiOps[3] = { LHSH, RHSH };
+    
+    Lo = DAG.getNode(Node->getOpcode(), VTList, LoOps, 3);
+    HiOps[2] = Lo.getValue(1);
+    Hi = DAG.getNode(Node->getOpcode(), VTList, HiOps, 3);
+    
+    // Remember that we legalized the flag.
+    AddLegalizedOperand(Op.getValue(1), LegalizeOp(Hi.getValue(1)));
+    break;
+  }
+  case ISD::MUL: {
+    // If the target wants to custom expand this, let them.
+    if (TLI.getOperationAction(ISD::MUL, VT) == TargetLowering::Custom) {
+      SDOperand New = TLI.LowerOperation(Op, DAG);
+      if (New.Val) {
+        ExpandOp(New, Lo, Hi);
+        break;
+      }
+    }
+    
+    bool HasMULHS = TLI.isOperationLegal(ISD::MULHS, NVT);
+    bool HasMULHU = TLI.isOperationLegal(ISD::MULHU, NVT);
+    if (HasMULHS || HasMULHU) {
+      SDOperand LL, LH, RL, RH;
+      ExpandOp(Node->getOperand(0), LL, LH);
+      ExpandOp(Node->getOperand(1), RL, RH);
+      unsigned SH = MVT::getSizeInBits(RH.getValueType())-1;
+      // FIXME: Move this to the dag combiner.
+      // MULHS implicitly sign extends its inputs.  Check to see if ExpandOp
+      // extended the sign bit of the low half through the upper half, and if so
+      // emit a MULHS instead of the alternate sequence that is valid for any
+      // i64 x i64 multiply.
+      if (HasMULHS &&
+          // is RH an extension of the sign bit of RL?
+          RH.getOpcode() == ISD::SRA && RH.getOperand(0) == RL &&
+          RH.getOperand(1).getOpcode() == ISD::Constant &&
+          cast<ConstantSDNode>(RH.getOperand(1))->getValue() == SH &&
+          // is LH an extension of the sign bit of LL?
+          LH.getOpcode() == ISD::SRA && LH.getOperand(0) == LL &&
+          LH.getOperand(1).getOpcode() == ISD::Constant &&
+          cast<ConstantSDNode>(LH.getOperand(1))->getValue() == SH) {
+        // Low part:
+        Lo = DAG.getNode(ISD::MUL, NVT, LL, RL);
+        // High part:
+        Hi = DAG.getNode(ISD::MULHS, NVT, LL, RL);
+        break;
+      } else if (HasMULHU) {
+        // Low part:
+        Lo = DAG.getNode(ISD::MUL, NVT, LL, RL);
+        
+        // High part:
+        Hi = DAG.getNode(ISD::MULHU, NVT, LL, RL);
+        RH = DAG.getNode(ISD::MUL, NVT, LL, RH);
+        LH = DAG.getNode(ISD::MUL, NVT, LH, RL);
+        Hi = DAG.getNode(ISD::ADD, NVT, Hi, RH);
+        Hi = DAG.getNode(ISD::ADD, NVT, Hi, LH);
+        break;
+      }
+    }
+
+    Lo = ExpandLibCall(TLI.getLibcallName(RTLIB::MUL_I64), Node,
+                       false/*sign irrelevant*/, Hi);
+    break;
+  }
+  case ISD::SDIV:
+    Lo = ExpandLibCall(TLI.getLibcallName(RTLIB::SDIV_I64), Node, true, Hi);
+    break;
+  case ISD::UDIV:
+    Lo = ExpandLibCall(TLI.getLibcallName(RTLIB::UDIV_I64), Node, true, Hi);
+    break;
+  case ISD::SREM:
+    Lo = ExpandLibCall(TLI.getLibcallName(RTLIB::SREM_I64), Node, true, Hi);
+    break;
+  case ISD::UREM:
+    Lo = ExpandLibCall(TLI.getLibcallName(RTLIB::UREM_I64), Node, true, Hi);
+    break;
+
+  case ISD::FADD:
+    Lo = ExpandLibCall(TLI.getLibcallName((VT == MVT::f32)
+                                          ? RTLIB::ADD_F32 : RTLIB::ADD_F64),
+                       Node, false, Hi);
+    break;
+  case ISD::FSUB:
+    Lo = ExpandLibCall(TLI.getLibcallName((VT == MVT::f32)
+                                          ? RTLIB::SUB_F32 : RTLIB::SUB_F64),
+                       Node, false, Hi);
+    break;
+  case ISD::FMUL:
+    Lo = ExpandLibCall(TLI.getLibcallName((VT == MVT::f32)
+                                          ? RTLIB::MUL_F32 : RTLIB::MUL_F64),
+                       Node, false, Hi);
+    break;
+  case ISD::FDIV:
+    Lo = ExpandLibCall(TLI.getLibcallName((VT == MVT::f32)
+                                          ? RTLIB::DIV_F32 : RTLIB::DIV_F64),
+                       Node, false, Hi);
+    break;
+  case ISD::FP_EXTEND:
+    Lo = ExpandLibCall(TLI.getLibcallName(RTLIB::FPEXT_F32_F64), Node, true,Hi);
+    break;
+  case ISD::FP_ROUND:
+    Lo = ExpandLibCall(TLI.getLibcallName(RTLIB::FPROUND_F64_F32),Node,true,Hi);
+    break;
+  case ISD::FSQRT:
+  case ISD::FSIN:
+  case ISD::FCOS: {
+    RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+    switch(Node->getOpcode()) {
+    case ISD::FSQRT:
+      LC = (VT == MVT::f32) ? RTLIB::SQRT_F32 : RTLIB::SQRT_F64;
+      break;
+    case ISD::FSIN:
+      LC = (VT == MVT::f32) ? RTLIB::SIN_F32 : RTLIB::SIN_F64;
+      break;
+    case ISD::FCOS:
+      LC = (VT == MVT::f32) ? RTLIB::COS_F32 : RTLIB::COS_F64;
+      break;
+    default: assert(0 && "Unreachable!");
+    }
+    Lo = ExpandLibCall(TLI.getLibcallName(LC), Node, false, Hi);
+    break;
+  }
+  case ISD::FABS: {
+    SDOperand Mask = (VT == MVT::f64)
+      ? DAG.getConstantFP(BitsToDouble(~(1ULL << 63)), VT)
+      : DAG.getConstantFP(BitsToFloat(~(1U << 31)), VT);
+    Mask = DAG.getNode(ISD::BIT_CONVERT, NVT, Mask);
+    Lo = DAG.getNode(ISD::BIT_CONVERT, NVT, Node->getOperand(0));
+    Lo = DAG.getNode(ISD::AND, NVT, Lo, Mask);
+    if (getTypeAction(NVT) == Expand)
+      ExpandOp(Lo, Lo, Hi);
+    break;
+  }
+  case ISD::FNEG: {
+    SDOperand Mask = (VT == MVT::f64)
+      ? DAG.getConstantFP(BitsToDouble(1ULL << 63), VT)
+      : DAG.getConstantFP(BitsToFloat(1U << 31), VT);
+    Mask = DAG.getNode(ISD::BIT_CONVERT, NVT, Mask);
+    Lo = DAG.getNode(ISD::BIT_CONVERT, NVT, Node->getOperand(0));
+    Lo = DAG.getNode(ISD::XOR, NVT, Lo, Mask);
+    if (getTypeAction(NVT) == Expand)
+      ExpandOp(Lo, Lo, Hi);
+    break;
+  }
+  case ISD::FCOPYSIGN: {
+    Lo = ExpandFCOPYSIGNToBitwiseOps(Node, NVT, DAG, TLI);
+    if (getTypeAction(NVT) == Expand)
+      ExpandOp(Lo, Lo, Hi);
+    break;
+  }
+  case ISD::SINT_TO_FP:
+  case ISD::UINT_TO_FP: {
+    bool isSigned = Node->getOpcode() == ISD::SINT_TO_FP;
+    MVT::ValueType SrcVT = Node->getOperand(0).getValueType();
+    RTLIB::Libcall LC;
+    if (Node->getOperand(0).getValueType() == MVT::i64) {
+      if (VT == MVT::f32)
+        LC = isSigned ? RTLIB::SINTTOFP_I64_F32 : RTLIB::UINTTOFP_I64_F32;
+      else
+        LC = isSigned ? RTLIB::SINTTOFP_I64_F64 : RTLIB::UINTTOFP_I64_F64;
+    } else {
+      if (VT == MVT::f32)
+        LC = isSigned ? RTLIB::SINTTOFP_I32_F32 : RTLIB::UINTTOFP_I32_F32;
+      else
+        LC = isSigned ? RTLIB::SINTTOFP_I32_F64 : RTLIB::UINTTOFP_I32_F64;
+    }
+
+    // Promote the operand if needed.
+    if (getTypeAction(SrcVT) == Promote) {
+      SDOperand Tmp = PromoteOp(Node->getOperand(0));
+      Tmp = isSigned
+        ? DAG.getNode(ISD::SIGN_EXTEND_INREG, Tmp.getValueType(), Tmp,
+                      DAG.getValueType(SrcVT))
+        : DAG.getZeroExtendInReg(Tmp, SrcVT);
+      Node = DAG.UpdateNodeOperands(Op, Tmp).Val;
+    }
+
+    const char *LibCall = TLI.getLibcallName(LC);
+    if (LibCall)
+      Lo = ExpandLibCall(TLI.getLibcallName(LC), Node, isSigned, Hi);
+    else  {
+      Lo = ExpandIntToFP(Node->getOpcode() == ISD::SINT_TO_FP, VT,
+                         Node->getOperand(0));
+      if (getTypeAction(Lo.getValueType()) == Expand)
+        ExpandOp(Lo, Lo, Hi);
+    }
+    break;
+  }
+  }
+
+  // Make sure the resultant values have been legalized themselves, unless this
+  // is a type that requires multi-step expansion.
+  if (getTypeAction(NVT) != Expand && NVT != MVT::isVoid) {
+    Lo = LegalizeOp(Lo);
+    if (Hi.Val)
+      // Don't legalize the high part if it is expanded to a single node.
+      Hi = LegalizeOp(Hi);
+  }
+
+  // Remember in a map if the values will be reused later.
+  bool isNew = ExpandedNodes.insert(std::make_pair(Op, std::make_pair(Lo, Hi)));
+  assert(isNew && "Value already expanded?!?");
+}
+
+/// SplitVectorOp - Given an operand of vector type, break it down into
+/// two smaller values, still of vector type.
+void SelectionDAGLegalize::SplitVectorOp(SDOperand Op, SDOperand &Lo,
+                                         SDOperand &Hi) {
+  assert(MVT::isVector(Op.getValueType()) && "Cannot split non-vector type!");
+  SDNode *Node = Op.Val;
+  unsigned NumElements = MVT::getVectorNumElements(Node->getValueType(0));
+  assert(NumElements > 1 && "Cannot split a single element vector!");
+  unsigned NewNumElts = NumElements/2;
+  MVT::ValueType NewEltVT = MVT::getVectorElementType(Node->getValueType(0));
+  MVT::ValueType NewVT = MVT::getVectorType(NewEltVT, NewNumElts);
+  
+  // See if we already split it.
+  std::map<SDOperand, std::pair<SDOperand, SDOperand> >::iterator I
+    = SplitNodes.find(Op);
+  if (I != SplitNodes.end()) {
+    Lo = I->second.first;
+    Hi = I->second.second;
+    return;
+  }
+  
+  switch (Node->getOpcode()) {
+  default: 
+#ifndef NDEBUG
+    Node->dump(&DAG);
+#endif
+    assert(0 && "Unhandled operation in SplitVectorOp!");
+  case ISD::BUILD_PAIR:
+    Lo = Node->getOperand(0);
+    Hi = Node->getOperand(1);
+    break;
+  case ISD::BUILD_VECTOR: {
+    SmallVector<SDOperand, 8> LoOps(Node->op_begin(), 
+                                    Node->op_begin()+NewNumElts);
+    Lo = DAG.getNode(ISD::BUILD_VECTOR, NewVT, &LoOps[0], LoOps.size());
+
+    SmallVector<SDOperand, 8> HiOps(Node->op_begin()+NewNumElts, 
+                                    Node->op_end());
+    Hi = DAG.getNode(ISD::BUILD_VECTOR, NewVT, &HiOps[0], HiOps.size());
+    break;
+  }
+  case ISD::CONCAT_VECTORS: {
+    unsigned NewNumSubvectors = Node->getNumOperands() / 2;
+    if (NewNumSubvectors == 1) {
+      Lo = Node->getOperand(0);
+      Hi = Node->getOperand(1);
+    } else {
+      SmallVector<SDOperand, 8> LoOps(Node->op_begin(), 
+                                      Node->op_begin()+NewNumSubvectors);
+      Lo = DAG.getNode(ISD::CONCAT_VECTORS, NewVT, &LoOps[0], LoOps.size());
+
+      SmallVector<SDOperand, 8> HiOps(Node->op_begin()+NewNumSubvectors, 
+                                      Node->op_end());
+      Hi = DAG.getNode(ISD::CONCAT_VECTORS, NewVT, &HiOps[0], HiOps.size());
+    }
+    break;
+  }
+  case ISD::ADD:
+  case ISD::SUB:
+  case ISD::MUL:
+  case ISD::FADD:
+  case ISD::FSUB:
+  case ISD::FMUL:
+  case ISD::SDIV:
+  case ISD::UDIV:
+  case ISD::FDIV:
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR: {
+    SDOperand LL, LH, RL, RH;
+    SplitVectorOp(Node->getOperand(0), LL, LH);
+    SplitVectorOp(Node->getOperand(1), RL, RH);
+    
+    Lo = DAG.getNode(Node->getOpcode(), NewVT, LL, RL);
+    Hi = DAG.getNode(Node->getOpcode(), NewVT, LH, RH);
+    break;
+  }
+  case ISD::LOAD: {
+    LoadSDNode *LD = cast<LoadSDNode>(Node);
+    SDOperand Ch = LD->getChain();
+    SDOperand Ptr = LD->getBasePtr();
+    const Value *SV = LD->getSrcValue();
+    int SVOffset = LD->getSrcValueOffset();
+    unsigned Alignment = LD->getAlignment();
+    bool isVolatile = LD->isVolatile();
+
+    Lo = DAG.getLoad(NewVT, Ch, Ptr, SV, SVOffset, isVolatile, Alignment);
+    unsigned IncrementSize = NewNumElts * MVT::getSizeInBits(NewEltVT)/8;
+    Ptr = DAG.getNode(ISD::ADD, Ptr.getValueType(), Ptr,
+                      getIntPtrConstant(IncrementSize));
+    SVOffset += IncrementSize;
+    if (Alignment > IncrementSize)
+      Alignment = IncrementSize;
+    Hi = DAG.getLoad(NewVT, Ch, Ptr, SV, SVOffset, isVolatile, Alignment);
+    
+    // Build a factor node to remember that this load is independent of the
+    // other one.
+    SDOperand TF = DAG.getNode(ISD::TokenFactor, MVT::Other, Lo.getValue(1),
+                               Hi.getValue(1));
+    
+    // Remember that we legalized the chain.
+    AddLegalizedOperand(Op.getValue(1), LegalizeOp(TF));
+    break;
+  }
+  case ISD::BIT_CONVERT: {
+    // We know the result is a vector.  The input may be either a vector or a
+    // scalar value.
+    SDOperand InOp = Node->getOperand(0);
+    if (!MVT::isVector(InOp.getValueType()) ||
+        MVT::getVectorNumElements(InOp.getValueType()) == 1) {
+      // The input is a scalar or single-element vector.
+      // Lower to a store/load so that it can be split.
+      // FIXME: this could be improved probably.
+      SDOperand Ptr = CreateStackTemporary(InOp.getValueType());
+
+      SDOperand St = DAG.getStore(DAG.getEntryNode(),
+                                  InOp, Ptr, NULL, 0);
+      InOp = DAG.getLoad(Op.getValueType(), St, Ptr, NULL, 0);
+    }
+    // Split the vector and convert each of the pieces now.
+    SplitVectorOp(InOp, Lo, Hi);
+    Lo = DAG.getNode(ISD::BIT_CONVERT, NewVT, Lo);
+    Hi = DAG.getNode(ISD::BIT_CONVERT, NewVT, Hi);
+    break;
+  }
+  }
+      
+  // Remember in a map if the values will be reused later.
+  bool isNew = 
+    SplitNodes.insert(std::make_pair(Op, std::make_pair(Lo, Hi))).second;
+  assert(isNew && "Value already split?!?");
+}
+
+
+/// ScalarizeVectorOp - Given an operand of single-element vector type
+/// (e.g. v1f32), convert it into the equivalent operation that returns a
+/// scalar (e.g. f32) value.
+SDOperand SelectionDAGLegalize::ScalarizeVectorOp(SDOperand Op) {
+  assert(MVT::isVector(Op.getValueType()) &&
+         "Bad ScalarizeVectorOp invocation!");
+  SDNode *Node = Op.Val;
+  MVT::ValueType NewVT = MVT::getVectorElementType(Op.getValueType());
+  assert(MVT::getVectorNumElements(Op.getValueType()) == 1);
+  
+  // See if we already scalarized it.
+  std::map<SDOperand, SDOperand>::iterator I = ScalarizedNodes.find(Op);
+  if (I != ScalarizedNodes.end()) return I->second;
+  
+  SDOperand Result;
+  switch (Node->getOpcode()) {
+  default: 
+#ifndef NDEBUG
+    Node->dump(&DAG); cerr << "\n";
+#endif
+    assert(0 && "Unknown vector operation in ScalarizeVectorOp!");
+  case ISD::ADD:
+  case ISD::FADD:
+  case ISD::SUB:
+  case ISD::FSUB:
+  case ISD::MUL:
+  case ISD::FMUL:
+  case ISD::SDIV:
+  case ISD::UDIV:
+  case ISD::FDIV:
+  case ISD::SREM:
+  case ISD::UREM:
+  case ISD::FREM:
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR:
+    Result = DAG.getNode(Node->getOpcode(),
+                         NewVT, 
+                         ScalarizeVectorOp(Node->getOperand(0)),
+                         ScalarizeVectorOp(Node->getOperand(1)));
+    break;
+  case ISD::FNEG:
+  case ISD::FABS:
+  case ISD::FSQRT:
+  case ISD::FSIN:
+  case ISD::FCOS:
+    Result = DAG.getNode(Node->getOpcode(),
+                         NewVT, 
+                         ScalarizeVectorOp(Node->getOperand(0)));
+    break;
+  case ISD::LOAD: {
+    LoadSDNode *LD = cast<LoadSDNode>(Node);
+    SDOperand Ch = LegalizeOp(LD->getChain());     // Legalize the chain.
+    SDOperand Ptr = LegalizeOp(LD->getBasePtr());  // Legalize the pointer.
+    
+    const Value *SV = LD->getSrcValue();
+    int SVOffset = LD->getSrcValueOffset();
+    Result = DAG.getLoad(NewVT, Ch, Ptr, SV, SVOffset,
+                         LD->isVolatile(), LD->getAlignment());
+
+    // Remember that we legalized the chain.
+    AddLegalizedOperand(Op.getValue(1), LegalizeOp(Result.getValue(1)));
+    break;
+  }
+  case ISD::BUILD_VECTOR:
+    Result = Node->getOperand(0);
+    break;
+  case ISD::INSERT_VECTOR_ELT:
+    // Returning the inserted scalar element.
+    Result = Node->getOperand(1);
+    break;
+  case ISD::CONCAT_VECTORS:
+    assert(Node->getOperand(0).getValueType() == NewVT &&
+           "Concat of non-legal vectors not yet supported!");
+    Result = Node->getOperand(0);
+    break;
+  case ISD::VECTOR_SHUFFLE: {
+    // Figure out if the scalar is the LHS or RHS and return it.
+    SDOperand EltNum = Node->getOperand(2).getOperand(0);
+    if (cast<ConstantSDNode>(EltNum)->getValue())
+      Result = ScalarizeVectorOp(Node->getOperand(1));
+    else
+      Result = ScalarizeVectorOp(Node->getOperand(0));
+    break;
+  }
+  case ISD::EXTRACT_SUBVECTOR:
+    Result = Node->getOperand(0);
+    assert(Result.getValueType() == NewVT);
+    break;
+  case ISD::BIT_CONVERT:
+    Result = DAG.getNode(ISD::BIT_CONVERT, NewVT, Op.getOperand(0));
+    break;
+  case ISD::SELECT:
+    Result = DAG.getNode(ISD::SELECT, NewVT, Op.getOperand(0),
+                         ScalarizeVectorOp(Op.getOperand(1)),
+                         ScalarizeVectorOp(Op.getOperand(2)));
+    break;
+  }
+
+  if (TLI.isTypeLegal(NewVT))
+    Result = LegalizeOp(Result);
+  bool isNew = ScalarizedNodes.insert(std::make_pair(Op, Result)).second;
+  assert(isNew && "Value already scalarized?");
+  return Result;
+}
+
+
+// SelectionDAG::Legalize - This is the entry point for the file.
+//
+void SelectionDAG::Legalize() {
+  if (ViewLegalizeDAGs) viewGraph();
+
+  /// run - This is the main entry point to this class.
+  ///
+  SelectionDAGLegalize(*this).LegalizeDAG();
+}
+
diff --git a/lib/CodeGen/SelectionDAG/Makefile b/lib/CodeGen/SelectionDAG/Makefile
new file mode 100644
index 0000000..6c50288
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/Makefile
@@ -0,0 +1,15 @@
+##===- lib/CodeGen/SelectionDAG/Makefile -------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file was developed by the LLVM research group and is distributed under
+# the University of Illinois Open Source License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMSelectionDAG
+PARALLEL_DIRS =
+BUILD_ARCHIVE = 1
+DONT_BUILD_RELINKED = 1
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp
new file mode 100644
index 0000000..06b2329
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp
@@ -0,0 +1,725 @@
+//===---- ScheduleDAG.cpp - Implement the ScheduleDAG class ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by James M. Laskey and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a simple two pass scheduler.  The first pass attempts to push
+// backward any lengthy instructions and critical paths.  The second pass packs
+// instructions into semi-optimal time slots.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SSARegMap.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+/// BuildSchedUnits - Build SUnits from the selection dag that we are input.
+/// This SUnit graph is similar to the SelectionDAG, but represents flagged
+/// together nodes with a single SUnit.
+void ScheduleDAG::BuildSchedUnits() {
+  // Reserve entries in the vector for each of the SUnits we are creating.  This
+  // ensure that reallocation of the vector won't happen, so SUnit*'s won't get
+  // invalidated.
+  SUnits.reserve(std::distance(DAG.allnodes_begin(), DAG.allnodes_end()));
+  
+  const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
+  
+  for (SelectionDAG::allnodes_iterator NI = DAG.allnodes_begin(),
+       E = DAG.allnodes_end(); NI != E; ++NI) {
+    if (isPassiveNode(NI))  // Leaf node, e.g. a TargetImmediate.
+      continue;
+    
+    // If this node has already been processed, stop now.
+    if (SUnitMap[NI]) continue;
+    
+    SUnit *NodeSUnit = NewSUnit(NI);
+    
+    // See if anything is flagged to this node, if so, add them to flagged
+    // nodes.  Nodes can have at most one flag input and one flag output.  Flags
+    // are required the be the last operand and result of a node.
+    
+    // Scan up, adding flagged preds to FlaggedNodes.
+    SDNode *N = NI;
+    if (N->getNumOperands() &&
+        N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Flag) {
+      do {
+        N = N->getOperand(N->getNumOperands()-1).Val;
+        NodeSUnit->FlaggedNodes.push_back(N);
+        SUnitMap[N] = NodeSUnit;
+      } while (N->getNumOperands() &&
+               N->getOperand(N->getNumOperands()-1).getValueType()== MVT::Flag);
+      std::reverse(NodeSUnit->FlaggedNodes.begin(),
+                   NodeSUnit->FlaggedNodes.end());
+    }
+    
+    // Scan down, adding this node and any flagged succs to FlaggedNodes if they
+    // have a user of the flag operand.
+    N = NI;
+    while (N->getValueType(N->getNumValues()-1) == MVT::Flag) {
+      SDOperand FlagVal(N, N->getNumValues()-1);
+      
+      // There are either zero or one users of the Flag result.
+      bool HasFlagUse = false;
+      for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); 
+           UI != E; ++UI)
+        if (FlagVal.isOperand(*UI)) {
+          HasFlagUse = true;
+          NodeSUnit->FlaggedNodes.push_back(N);
+          SUnitMap[N] = NodeSUnit;
+          N = *UI;
+          break;
+        }
+      if (!HasFlagUse) break;
+    }
+    
+    // Now all flagged nodes are in FlaggedNodes and N is the bottom-most node.
+    // Update the SUnit
+    NodeSUnit->Node = N;
+    SUnitMap[N] = NodeSUnit;
+    
+    // Compute the latency for the node.  We use the sum of the latencies for
+    // all nodes flagged together into this SUnit.
+    if (InstrItins.isEmpty()) {
+      // No latency information.
+      NodeSUnit->Latency = 1;
+    } else {
+      NodeSUnit->Latency = 0;
+      if (N->isTargetOpcode()) {
+        unsigned SchedClass = TII->getSchedClass(N->getTargetOpcode());
+        InstrStage *S = InstrItins.begin(SchedClass);
+        InstrStage *E = InstrItins.end(SchedClass);
+        for (; S != E; ++S)
+          NodeSUnit->Latency += S->Cycles;
+      }
+      for (unsigned i = 0, e = NodeSUnit->FlaggedNodes.size(); i != e; ++i) {
+        SDNode *FNode = NodeSUnit->FlaggedNodes[i];
+        if (FNode->isTargetOpcode()) {
+          unsigned SchedClass = TII->getSchedClass(FNode->getTargetOpcode());
+          InstrStage *S = InstrItins.begin(SchedClass);
+          InstrStage *E = InstrItins.end(SchedClass);
+          for (; S != E; ++S)
+            NodeSUnit->Latency += S->Cycles;
+        }
+      }
+    }
+  }
+  
+  // Pass 2: add the preds, succs, etc.
+  for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
+    SUnit *SU = &SUnits[su];
+    SDNode *MainNode = SU->Node;
+    
+    if (MainNode->isTargetOpcode()) {
+      unsigned Opc = MainNode->getTargetOpcode();
+      for (unsigned i = 0, ee = TII->getNumOperands(Opc); i != ee; ++i) {
+        if (TII->getOperandConstraint(Opc, i, TOI::TIED_TO) != -1) {
+          SU->isTwoAddress = true;
+          break;
+        }
+      }
+      if (TII->isCommutableInstr(Opc))
+        SU->isCommutable = true;
+    }
+    
+    // Find all predecessors and successors of the group.
+    // Temporarily add N to make code simpler.
+    SU->FlaggedNodes.push_back(MainNode);
+    
+    for (unsigned n = 0, e = SU->FlaggedNodes.size(); n != e; ++n) {
+      SDNode *N = SU->FlaggedNodes[n];
+      
+      for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+        SDNode *OpN = N->getOperand(i).Val;
+        if (isPassiveNode(OpN)) continue;   // Not scheduled.
+        SUnit *OpSU = SUnitMap[OpN];
+        assert(OpSU && "Node has no SUnit!");
+        if (OpSU == SU) continue;           // In the same group.
+
+        MVT::ValueType OpVT = N->getOperand(i).getValueType();
+        assert(OpVT != MVT::Flag && "Flagged nodes should be in same sunit!");
+        bool isChain = OpVT == MVT::Other;
+        
+        if (SU->addPred(OpSU, isChain)) {
+          if (!isChain) {
+            SU->NumPreds++;
+            SU->NumPredsLeft++;
+          } else {
+            SU->NumChainPredsLeft++;
+          }
+        }
+        if (OpSU->addSucc(SU, isChain)) {
+          if (!isChain) {
+            OpSU->NumSuccs++;
+            OpSU->NumSuccsLeft++;
+          } else {
+            OpSU->NumChainSuccsLeft++;
+          }
+        }
+      }
+    }
+    
+    // Remove MainNode from FlaggedNodes again.
+    SU->FlaggedNodes.pop_back();
+  }
+  
+  return;
+}
+
+void ScheduleDAG::CalculateDepths() {
+  std::vector<std::pair<SUnit*, unsigned> > WorkList;
+  for (unsigned i = 0, e = SUnits.size(); i != e; ++i)
+    if (SUnits[i].Preds.size() == 0/* && &SUnits[i] != Entry*/)
+      WorkList.push_back(std::make_pair(&SUnits[i], 0U));
+
+  while (!WorkList.empty()) {
+    SUnit *SU = WorkList.back().first;
+    unsigned Depth = WorkList.back().second;
+    WorkList.pop_back();
+    if (SU->Depth == 0 || Depth > SU->Depth) {
+      SU->Depth = Depth;
+      for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+           I != E; ++I)
+        WorkList.push_back(std::make_pair(I->first, Depth+1));
+    }
+  }
+}
+
+void ScheduleDAG::CalculateHeights() {
+  std::vector<std::pair<SUnit*, unsigned> > WorkList;
+  SUnit *Root = SUnitMap[DAG.getRoot().Val];
+  WorkList.push_back(std::make_pair(Root, 0U));
+
+  while (!WorkList.empty()) {
+    SUnit *SU = WorkList.back().first;
+    unsigned Height = WorkList.back().second;
+    WorkList.pop_back();
+    if (SU->Height == 0 || Height > SU->Height) {
+      SU->Height = Height;
+      for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+           I != E; ++I)
+        WorkList.push_back(std::make_pair(I->first, Height+1));
+    }
+  }
+}
+
+/// CountResults - The results of target nodes have register or immediate
+/// operands first, then an optional chain, and optional flag operands (which do
+/// not go into the machine instrs.)
+unsigned ScheduleDAG::CountResults(SDNode *Node) {
+  unsigned N = Node->getNumValues();
+  while (N && Node->getValueType(N - 1) == MVT::Flag)
+    --N;
+  if (N && Node->getValueType(N - 1) == MVT::Other)
+    --N;    // Skip over chain result.
+  return N;
+}
+
+/// CountOperands  The inputs to target nodes have any actual inputs first,
+/// followed by an optional chain operand, then flag operands.  Compute the
+/// number of actual operands that  will go into the machine instr.
+unsigned ScheduleDAG::CountOperands(SDNode *Node) {
+  unsigned N = Node->getNumOperands();
+  while (N && Node->getOperand(N - 1).getValueType() == MVT::Flag)
+    --N;
+  if (N && Node->getOperand(N - 1).getValueType() == MVT::Other)
+    --N; // Ignore chain if it exists.
+  return N;
+}
+
+static const TargetRegisterClass *getInstrOperandRegClass(
+        const MRegisterInfo *MRI, 
+        const TargetInstrInfo *TII,
+        const TargetInstrDescriptor *II,
+        unsigned Op) {
+  if (Op >= II->numOperands) {
+    assert((II->Flags & M_VARIABLE_OPS)&& "Invalid operand # of instruction");
+    return NULL;
+  }
+  const TargetOperandInfo &toi = II->OpInfo[Op];
+  return (toi.Flags & M_LOOK_UP_PTR_REG_CLASS)
+         ? TII->getPointerRegClass() : MRI->getRegClass(toi.RegClass);
+}
+
+static void CreateVirtualRegisters(SDNode *Node,
+                                   unsigned NumResults, 
+                                   const MRegisterInfo *MRI,
+                                   MachineInstr *MI,
+                                   SSARegMap *RegMap,
+                                   const TargetInstrInfo *TII,
+                                   const TargetInstrDescriptor &II,
+                                   DenseMap<SDOperand, unsigned> &VRBaseMap) {
+  for (unsigned i = 0; i < NumResults; ++i) {
+    // If the specific node value is only used by a CopyToReg and the dest reg
+    // is a vreg, use the CopyToReg'd destination register instead of creating
+    // a new vreg.
+    unsigned VRBase = 0;
+    for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
+         UI != E; ++UI) {
+      SDNode *Use = *UI;
+      if (Use->getOpcode() == ISD::CopyToReg && 
+          Use->getOperand(2).Val == Node &&
+          Use->getOperand(2).ResNo == i) {
+        unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg();
+        if (MRegisterInfo::isVirtualRegister(Reg)) {
+          VRBase = Reg;
+          MI->addRegOperand(Reg, true);
+          break;
+        }
+      }
+    }
+
+    if (VRBase == 0) {
+      // Create the result registers for this node and add the result regs to
+      // the machine instruction.
+      const TargetRegisterClass *RC = getInstrOperandRegClass(MRI, TII, &II, i);
+      assert(RC && "Isn't a register operand!");
+      VRBase = RegMap->createVirtualRegister(RC);
+      MI->addRegOperand(VRBase, true);
+    }
+
+    bool isNew = VRBaseMap.insert(std::make_pair(SDOperand(Node,i), VRBase));
+    assert(isNew && "Node emitted out of order - early");
+  }
+}
+
+/// getVR - Return the virtual register corresponding to the specified result
+/// of the specified node.
+static unsigned getVR(SDOperand Op, DenseMap<SDOperand, unsigned> &VRBaseMap) {
+  DenseMap<SDOperand, unsigned>::iterator I = VRBaseMap.find(Op);
+  assert(I != VRBaseMap.end() && "Node emitted out of order - late");
+  return I->second;
+}
+
+
+/// AddOperand - Add the specified operand to the specified machine instr.  II
+/// specifies the instruction information for the node, and IIOpNum is the
+/// operand number (in the II) that we are adding. IIOpNum and II are used for 
+/// assertions only.
+void ScheduleDAG::AddOperand(MachineInstr *MI, SDOperand Op,
+                             unsigned IIOpNum,
+                             const TargetInstrDescriptor *II,
+                             DenseMap<SDOperand, unsigned> &VRBaseMap) {
+  if (Op.isTargetOpcode()) {
+    // Note that this case is redundant with the final else block, but we
+    // include it because it is the most common and it makes the logic
+    // simpler here.
+    assert(Op.getValueType() != MVT::Other &&
+           Op.getValueType() != MVT::Flag &&
+           "Chain and flag operands should occur at end of operand list!");
+    
+    // Get/emit the operand.
+    unsigned VReg = getVR(Op, VRBaseMap);
+    const TargetInstrDescriptor *TID = MI->getInstrDescriptor();
+    bool isOptDef = (IIOpNum < TID->numOperands)
+      ? (TID->OpInfo[IIOpNum].Flags & M_OPTIONAL_DEF_OPERAND) : false;
+    MI->addRegOperand(VReg, isOptDef);
+    
+    // Verify that it is right.
+    assert(MRegisterInfo::isVirtualRegister(VReg) && "Not a vreg?");
+    if (II) {
+      const TargetRegisterClass *RC =
+                          getInstrOperandRegClass(MRI, TII, II, IIOpNum);
+      assert(RC && "Don't have operand info for this instruction!");
+      const TargetRegisterClass *VRC = RegMap->getRegClass(VReg);
+      if (VRC != RC) {
+        cerr << "Register class of operand and regclass of use don't agree!\n";
+#ifndef NDEBUG
+        cerr << "Operand = " << IIOpNum << "\n";
+        cerr << "Op->Val = "; Op.Val->dump(&DAG); cerr << "\n";
+        cerr << "MI = "; MI->print(cerr);
+        cerr << "VReg = " << VReg << "\n";
+        cerr << "VReg RegClass     size = " << VRC->getSize()
+             << ", align = " << VRC->getAlignment() << "\n";
+        cerr << "Expected RegClass size = " << RC->getSize()
+             << ", align = " << RC->getAlignment() << "\n";
+#endif
+        cerr << "Fatal error, aborting.\n";
+        abort();
+      }
+    }
+  } else if (ConstantSDNode *C =
+             dyn_cast<ConstantSDNode>(Op)) {
+    MI->addImmOperand(C->getValue());
+  } else if (RegisterSDNode *R =
+             dyn_cast<RegisterSDNode>(Op)) {
+    MI->addRegOperand(R->getReg(), false);
+  } else if (GlobalAddressSDNode *TGA =
+             dyn_cast<GlobalAddressSDNode>(Op)) {
+    MI->addGlobalAddressOperand(TGA->getGlobal(), TGA->getOffset());
+  } else if (BasicBlockSDNode *BB =
+             dyn_cast<BasicBlockSDNode>(Op)) {
+    MI->addMachineBasicBlockOperand(BB->getBasicBlock());
+  } else if (FrameIndexSDNode *FI =
+             dyn_cast<FrameIndexSDNode>(Op)) {
+    MI->addFrameIndexOperand(FI->getIndex());
+  } else if (JumpTableSDNode *JT =
+             dyn_cast<JumpTableSDNode>(Op)) {
+    MI->addJumpTableIndexOperand(JT->getIndex());
+  } else if (ConstantPoolSDNode *CP = 
+             dyn_cast<ConstantPoolSDNode>(Op)) {
+    int Offset = CP->getOffset();
+    unsigned Align = CP->getAlignment();
+    const Type *Type = CP->getType();
+    // MachineConstantPool wants an explicit alignment.
+    if (Align == 0) {
+      Align = TM.getTargetData()->getPreferredTypeAlignmentShift(Type);
+      if (Align == 0) {
+        // Alignment of vector types.  FIXME!
+        Align = TM.getTargetData()->getTypeSize(Type);
+        Align = Log2_64(Align);
+      }
+    }
+    
+    unsigned Idx;
+    if (CP->isMachineConstantPoolEntry())
+      Idx = ConstPool->getConstantPoolIndex(CP->getMachineCPVal(), Align);
+    else
+      Idx = ConstPool->getConstantPoolIndex(CP->getConstVal(), Align);
+    MI->addConstantPoolIndexOperand(Idx, Offset);
+  } else if (ExternalSymbolSDNode *ES = 
+             dyn_cast<ExternalSymbolSDNode>(Op)) {
+    MI->addExternalSymbolOperand(ES->getSymbol());
+  } else {
+    assert(Op.getValueType() != MVT::Other &&
+           Op.getValueType() != MVT::Flag &&
+           "Chain and flag operands should occur at end of operand list!");
+    unsigned VReg = getVR(Op, VRBaseMap);
+    MI->addRegOperand(VReg, false);
+    
+    // Verify that it is right.
+    assert(MRegisterInfo::isVirtualRegister(VReg) && "Not a vreg?");
+    if (II) {
+      const TargetRegisterClass *RC =
+                            getInstrOperandRegClass(MRI, TII, II, IIOpNum);
+      assert(RC && "Don't have operand info for this instruction!");
+      assert(RegMap->getRegClass(VReg) == RC &&
+             "Register class of operand and regclass of use don't agree!");
+    }
+  }
+  
+}
+
+// Returns the Register Class of a physical register
+static const TargetRegisterClass *getPhysicalRegisterRegClass(
+        const MRegisterInfo *MRI,
+        MVT::ValueType VT,
+        unsigned reg) {
+  assert(MRegisterInfo::isPhysicalRegister(reg) &&
+         "reg must be a physical register");
+  // Pick the register class of the right type that contains this physreg.
+  for (MRegisterInfo::regclass_iterator I = MRI->regclass_begin(),
+         E = MRI->regclass_end(); I != E; ++I)
+    if ((*I)->hasType(VT) && (*I)->contains(reg))
+      return *I;
+  assert(false && "Couldn't find the register class");
+  return 0;
+}
+
+/// EmitNode - Generate machine code for an node and needed dependencies.
+///
+void ScheduleDAG::EmitNode(SDNode *Node, 
+                           DenseMap<SDOperand, unsigned> &VRBaseMap) {
+  // If machine instruction
+  if (Node->isTargetOpcode()) {
+    unsigned Opc = Node->getTargetOpcode();
+    const TargetInstrDescriptor &II = TII->get(Opc);
+
+    unsigned NumResults = CountResults(Node);
+    unsigned NodeOperands = CountOperands(Node);
+    unsigned NumMIOperands = NodeOperands + NumResults;
+#ifndef NDEBUG
+    assert((unsigned(II.numOperands) == NumMIOperands ||
+            (II.Flags & M_VARIABLE_OPS)) &&
+           "#operands for dag node doesn't match .td file!"); 
+#endif
+
+    // Create the new machine instruction.
+    MachineInstr *MI = new MachineInstr(II);
+    
+    // Add result register values for things that are defined by this
+    // instruction.
+    if (NumResults)
+      CreateVirtualRegisters(Node, NumResults, MRI, MI, RegMap,
+                             TII, II, VRBaseMap);
+    
+    // Emit all of the actual operands of this instruction, adding them to the
+    // instruction as appropriate.
+    for (unsigned i = 0; i != NodeOperands; ++i)
+      AddOperand(MI, Node->getOperand(i), i+NumResults, &II, VRBaseMap);
+
+    // Commute node if it has been determined to be profitable.
+    if (CommuteSet.count(Node)) {
+      MachineInstr *NewMI = TII->commuteInstruction(MI);
+      if (NewMI == 0)
+        DOUT << "Sched: COMMUTING FAILED!\n";
+      else {
+        DOUT << "Sched: COMMUTED TO: " << *NewMI;
+        if (MI != NewMI) {
+          delete MI;
+          MI = NewMI;
+        }
+      }
+    }
+
+    // Now that we have emitted all operands, emit this instruction itself.
+    if ((II.Flags & M_USES_CUSTOM_DAG_SCHED_INSERTION) == 0) {
+      BB->insert(BB->end(), MI);
+    } else {
+      // Insert this instruction into the end of the basic block, potentially
+      // taking some custom action.
+      BB = DAG.getTargetLoweringInfo().InsertAtEndOfBasicBlock(MI, BB);
+    }
+  } else {
+    switch (Node->getOpcode()) {
+    default:
+#ifndef NDEBUG
+      Node->dump(&DAG);
+#endif
+      assert(0 && "This target-independent node should have been selected!");
+    case ISD::EntryToken: // fall thru
+    case ISD::TokenFactor:
+    case ISD::LABEL:
+      break;
+    case ISD::CopyToReg: {
+      unsigned InReg;
+      if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(2)))
+        InReg = R->getReg();
+      else
+        InReg = getVR(Node->getOperand(2), VRBaseMap);
+      unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+      if (InReg != DestReg)  {// Coalesced away the copy?
+        const TargetRegisterClass *TRC = 0;
+        // Get the target register class
+        if (MRegisterInfo::isVirtualRegister(InReg))
+          TRC = RegMap->getRegClass(InReg);
+        else
+          TRC = getPhysicalRegisterRegClass(MRI,
+                                            Node->getOperand(2).getValueType(),
+                                            InReg);
+        MRI->copyRegToReg(*BB, BB->end(), DestReg, InReg, TRC);
+      }
+      break;
+    }
+    case ISD::CopyFromReg: {
+      unsigned VRBase = 0;
+      unsigned SrcReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+      if (MRegisterInfo::isVirtualRegister(SrcReg)) {
+        // Just use the input register directly!
+        bool isNew = VRBaseMap.insert(std::make_pair(SDOperand(Node,0),SrcReg));
+        assert(isNew && "Node emitted out of order - early");
+        break;
+      }
+
+      // If the node is only used by a CopyToReg and the dest reg is a vreg, use
+      // the CopyToReg'd destination register instead of creating a new vreg.
+      for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
+           UI != E; ++UI) {
+        SDNode *Use = *UI;
+        if (Use->getOpcode() == ISD::CopyToReg && 
+            Use->getOperand(2).Val == Node) {
+          unsigned DestReg = cast<RegisterSDNode>(Use->getOperand(1))->getReg();
+          if (MRegisterInfo::isVirtualRegister(DestReg)) {
+            VRBase = DestReg;
+            break;
+          }
+        }
+      }
+
+      // Figure out the register class to create for the destreg.
+      const TargetRegisterClass *TRC = 0;
+      if (VRBase) {
+        TRC = RegMap->getRegClass(VRBase);
+      } else {
+        TRC = getPhysicalRegisterRegClass(MRI, Node->getValueType(0), SrcReg);
+
+        // Create the reg, emit the copy.
+        VRBase = RegMap->createVirtualRegister(TRC);
+      }
+      MRI->copyRegToReg(*BB, BB->end(), VRBase, SrcReg, TRC);
+
+      bool isNew = VRBaseMap.insert(std::make_pair(SDOperand(Node,0), VRBase));
+      assert(isNew && "Node emitted out of order - early");
+      break;
+    }
+    case ISD::INLINEASM: {
+      unsigned NumOps = Node->getNumOperands();
+      if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)
+        --NumOps;  // Ignore the flag operand.
+      
+      // Create the inline asm machine instruction.
+      MachineInstr *MI =
+        new MachineInstr(BB, TII->get(TargetInstrInfo::INLINEASM));
+
+      // Add the asm string as an external symbol operand.
+      const char *AsmStr =
+        cast<ExternalSymbolSDNode>(Node->getOperand(1))->getSymbol();
+      MI->addExternalSymbolOperand(AsmStr);
+      
+      // Add all of the operand registers to the instruction.
+      for (unsigned i = 2; i != NumOps;) {
+        unsigned Flags = cast<ConstantSDNode>(Node->getOperand(i))->getValue();
+        unsigned NumVals = Flags >> 3;
+        
+        MI->addImmOperand(Flags);
+        ++i;  // Skip the ID value.
+        
+        switch (Flags & 7) {
+        default: assert(0 && "Bad flags!");
+        case 1:  // Use of register.
+          for (; NumVals; --NumVals, ++i) {
+            unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+            MI->addRegOperand(Reg, false);
+          }
+          break;
+        case 2:   // Def of register.
+          for (; NumVals; --NumVals, ++i) {
+            unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+            MI->addRegOperand(Reg, true);
+          }
+          break;
+        case 3: { // Immediate.
+          assert(NumVals == 1 && "Unknown immediate value!");
+          if (ConstantSDNode *CS=dyn_cast<ConstantSDNode>(Node->getOperand(i))){
+            MI->addImmOperand(CS->getValue());
+          } else {
+            GlobalAddressSDNode *GA = 
+              cast<GlobalAddressSDNode>(Node->getOperand(i));
+            MI->addGlobalAddressOperand(GA->getGlobal(), GA->getOffset());
+          }
+          ++i;
+          break;
+        }
+        case 4:  // Addressing mode.
+          // The addressing mode has been selected, just add all of the
+          // operands to the machine instruction.
+          for (; NumVals; --NumVals, ++i)
+            AddOperand(MI, Node->getOperand(i), 0, 0, VRBaseMap);
+          break;
+        }
+      }
+      break;
+    }
+    }
+  }
+}
+
+void ScheduleDAG::EmitNoop() {
+  TII->insertNoop(*BB, BB->end());
+}
+
+/// EmitSchedule - Emit the machine code in scheduled order.
+void ScheduleDAG::EmitSchedule() {
+  // If this is the first basic block in the function, and if it has live ins
+  // that need to be copied into vregs, emit the copies into the top of the
+  // block before emitting the code for the block.
+  MachineFunction &MF = DAG.getMachineFunction();
+  if (&MF.front() == BB && MF.livein_begin() != MF.livein_end()) {
+    for (MachineFunction::livein_iterator LI = MF.livein_begin(),
+         E = MF.livein_end(); LI != E; ++LI)
+      if (LI->second)
+        MRI->copyRegToReg(*MF.begin(), MF.begin()->end(), LI->second,
+                          LI->first, RegMap->getRegClass(LI->second));
+  }
+  
+  
+  // Finally, emit the code for all of the scheduled instructions.
+  DenseMap<SDOperand, unsigned> VRBaseMap;
+  for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+    if (SUnit *SU = Sequence[i]) {
+      for (unsigned j = 0, ee = SU->FlaggedNodes.size(); j != ee; j++)
+        EmitNode(SU->FlaggedNodes[j], VRBaseMap);
+      EmitNode(SU->Node, VRBaseMap);
+    } else {
+      // Null SUnit* is a noop.
+      EmitNoop();
+    }
+  }
+}
+
+/// dump - dump the schedule.
+void ScheduleDAG::dumpSchedule() const {
+  for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+    if (SUnit *SU = Sequence[i])
+      SU->dump(&DAG);
+    else
+      cerr << "**** NOOP ****\n";
+  }
+}
+
+
+/// Run - perform scheduling.
+///
+MachineBasicBlock *ScheduleDAG::Run() {
+  TII = TM.getInstrInfo();
+  MRI = TM.getRegisterInfo();
+  RegMap = BB->getParent()->getSSARegMap();
+  ConstPool = BB->getParent()->getConstantPool();
+
+  Schedule();
+  return BB;
+}
+
+/// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or
+/// a group of nodes flagged together.
+void SUnit::dump(const SelectionDAG *G) const {
+  cerr << "SU(" << NodeNum << "): ";
+  Node->dump(G);
+  cerr << "\n";
+  if (FlaggedNodes.size() != 0) {
+    for (unsigned i = 0, e = FlaggedNodes.size(); i != e; i++) {
+      cerr << "    ";
+      FlaggedNodes[i]->dump(G);
+      cerr << "\n";
+    }
+  }
+}
+
+void SUnit::dumpAll(const SelectionDAG *G) const {
+  dump(G);
+
+  cerr << "  # preds left       : " << NumPredsLeft << "\n";
+  cerr << "  # succs left       : " << NumSuccsLeft << "\n";
+  cerr << "  # chain preds left : " << NumChainPredsLeft << "\n";
+  cerr << "  # chain succs left : " << NumChainSuccsLeft << "\n";
+  cerr << "  Latency            : " << Latency << "\n";
+  cerr << "  Depth              : " << Depth << "\n";
+  cerr << "  Height             : " << Height << "\n";
+
+  if (Preds.size() != 0) {
+    cerr << "  Predecessors:\n";
+    for (SUnit::const_succ_iterator I = Preds.begin(), E = Preds.end();
+         I != E; ++I) {
+      if (I->second)
+        cerr << "   ch  #";
+      else
+        cerr << "   val #";
+      cerr << I->first << " - SU(" << I->first->NodeNum << ")\n";
+    }
+  }
+  if (Succs.size() != 0) {
+    cerr << "  Successors:\n";
+    for (SUnit::const_succ_iterator I = Succs.begin(), E = Succs.end();
+         I != E; ++I) {
+      if (I->second)
+        cerr << "   ch  #";
+      else
+        cerr << "   val #";
+      cerr << I->first << " - SU(" << I->first->NodeNum << ")\n";
+    }
+  }
+  cerr << "\n";
+}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
new file mode 100644
index 0000000..9e4e46f
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
@@ -0,0 +1,531 @@
+//===---- ScheduleDAGList.cpp - Implement a list scheduler for isel DAG ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Evan Cheng and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a top-down list scheduler, using standard algorithms.
+// The basic approach uses a priority queue of available nodes to schedule.
+// One at a time, nodes are taken from the priority queue (thus in priority
+// order), checked for legality to schedule, and emitted if legal.
+//
+// Nodes may not be legal to schedule either due to structural hazards (e.g.
+// pipeline or resource constraints) or because an input to the instruction has
+// not completed execution.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/SSARegMap.h"
+#include "llvm/Target/MRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/Statistic.h"
+#include <climits>
+#include <queue>
+using namespace llvm;
+
+STATISTIC(NumNoops , "Number of noops inserted");
+STATISTIC(NumStalls, "Number of pipeline stalls");
+
+static RegisterScheduler
+  tdListDAGScheduler("list-td", "  Top-down list scheduler",
+                     createTDListDAGScheduler);
+   
+namespace {
+//===----------------------------------------------------------------------===//
+/// ScheduleDAGList - The actual list scheduler implementation.  This supports
+/// top-down scheduling.
+///
+class VISIBILITY_HIDDEN ScheduleDAGList : public ScheduleDAG {
+private:
+  /// AvailableQueue - The priority queue to use for the available SUnits.
+  ///
+  SchedulingPriorityQueue *AvailableQueue;
+  
+  /// PendingQueue - This contains all of the instructions whose operands have
+  /// been issued, but their results are not ready yet (due to the latency of
+  /// the operation).  Once the operands becomes available, the instruction is
+  /// added to the AvailableQueue.  This keeps track of each SUnit and the
+  /// number of cycles left to execute before the operation is available.
+  std::vector<std::pair<unsigned, SUnit*> > PendingQueue;
+
+  /// HazardRec - The hazard recognizer to use.
+  HazardRecognizer *HazardRec;
+
+public:
+  ScheduleDAGList(SelectionDAG &dag, MachineBasicBlock *bb,
+                  const TargetMachine &tm,
+                  SchedulingPriorityQueue *availqueue,
+                  HazardRecognizer *HR)
+    : ScheduleDAG(dag, bb, tm),
+      AvailableQueue(availqueue), HazardRec(HR) {
+    }
+
+  ~ScheduleDAGList() {
+    delete HazardRec;
+    delete AvailableQueue;
+  }
+
+  void Schedule();
+
+private:
+  void ReleaseSucc(SUnit *SuccSU, bool isChain);
+  void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
+  void ListScheduleTopDown();
+};
+}  // end anonymous namespace
+
+HazardRecognizer::~HazardRecognizer() {}
+
+
+/// Schedule - Schedule the DAG using list scheduling.
+void ScheduleDAGList::Schedule() {
+  DOUT << "********** List Scheduling **********\n";
+  
+  // Build scheduling units.
+  BuildSchedUnits();
+
+  AvailableQueue->initNodes(SUnitMap, SUnits);
+  
+  ListScheduleTopDown();
+  
+  AvailableQueue->releaseState();
+  
+  DOUT << "*** Final schedule ***\n";
+  DEBUG(dumpSchedule());
+  DOUT << "\n";
+  
+  // Emit in scheduled order
+  EmitSchedule();
+}
+
+//===----------------------------------------------------------------------===//
+//  Top-Down Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
+/// the PendingQueue if the count reaches zero.
+void ScheduleDAGList::ReleaseSucc(SUnit *SuccSU, bool isChain) {
+  if (!isChain)
+    SuccSU->NumPredsLeft--;
+  else
+    SuccSU->NumChainPredsLeft--;
+  
+  assert(SuccSU->NumPredsLeft >= 0 && SuccSU->NumChainPredsLeft >= 0 &&
+         "List scheduling internal error");
+  
+  if ((SuccSU->NumPredsLeft + SuccSU->NumChainPredsLeft) == 0) {
+    // Compute how many cycles it will be before this actually becomes
+    // available.  This is the max of the start time of all predecessors plus
+    // their latencies.
+    unsigned AvailableCycle = 0;
+    for (SUnit::pred_iterator I = SuccSU->Preds.begin(),
+         E = SuccSU->Preds.end(); I != E; ++I) {
+      // If this is a token edge, we don't need to wait for the latency of the
+      // preceeding instruction (e.g. a long-latency load) unless there is also
+      // some other data dependence.
+      SUnit &Pred = *I->first;
+      unsigned PredDoneCycle = Pred.Cycle;
+      if (!I->second)
+        PredDoneCycle += Pred.Latency;
+      else if (Pred.Latency)
+        PredDoneCycle += 1;
+
+      AvailableCycle = std::max(AvailableCycle, PredDoneCycle);
+    }
+    
+    PendingQueue.push_back(std::make_pair(AvailableCycle, SuccSU));
+  }
+}
+
+/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
+/// count of its successors. If a successor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
+  DOUT << "*** Scheduling [" << CurCycle << "]: ";
+  DEBUG(SU->dump(&DAG));
+  
+  Sequence.push_back(SU);
+  SU->Cycle = CurCycle;
+  
+  // Bottom up: release successors.
+  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I)
+    ReleaseSucc(I->first, I->second);
+}
+
+/// ListScheduleTopDown - The main loop of list scheduling for top-down
+/// schedulers.
+void ScheduleDAGList::ListScheduleTopDown() {
+  unsigned CurCycle = 0;
+  SUnit *Entry = SUnitMap[DAG.getEntryNode().Val];
+
+  // All leaves to Available queue.
+  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+    // It is available if it has no predecessors.
+    if (SUnits[i].Preds.size() == 0 && &SUnits[i] != Entry) {
+      AvailableQueue->push(&SUnits[i]);
+      SUnits[i].isAvailable = SUnits[i].isPending = true;
+    }
+  }
+  
+  // Emit the entry node first.
+  ScheduleNodeTopDown(Entry, CurCycle);
+  HazardRec->EmitInstruction(Entry->Node);
+  
+  // While Available queue is not empty, grab the node with the highest
+  // priority. If it is not ready put it back.  Schedule the node.
+  std::vector<SUnit*> NotReady;
+  while (!AvailableQueue->empty() || !PendingQueue.empty()) {
+    // Check to see if any of the pending instructions are ready to issue.  If
+    // so, add them to the available queue.
+    for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
+      if (PendingQueue[i].first == CurCycle) {
+        AvailableQueue->push(PendingQueue[i].second);
+        PendingQueue[i].second->isAvailable = true;
+        PendingQueue[i] = PendingQueue.back();
+        PendingQueue.pop_back();
+        --i; --e;
+      } else {
+        assert(PendingQueue[i].first > CurCycle && "Negative latency?");
+      }
+    }
+    
+    // If there are no instructions available, don't try to issue anything, and
+    // don't advance the hazard recognizer.
+    if (AvailableQueue->empty()) {
+      ++CurCycle;
+      continue;
+    }
+
+    SUnit *FoundSUnit = 0;
+    SDNode *FoundNode = 0;
+    
+    bool HasNoopHazards = false;
+    while (!AvailableQueue->empty()) {
+      SUnit *CurSUnit = AvailableQueue->pop();
+      
+      // Get the node represented by this SUnit.
+      FoundNode = CurSUnit->Node;
+      
+      // If this is a pseudo op, like copyfromreg, look to see if there is a
+      // real target node flagged to it.  If so, use the target node.
+      for (unsigned i = 0, e = CurSUnit->FlaggedNodes.size(); 
+           FoundNode->getOpcode() < ISD::BUILTIN_OP_END && i != e; ++i)
+        FoundNode = CurSUnit->FlaggedNodes[i];
+      
+      HazardRecognizer::HazardType HT = HazardRec->getHazardType(FoundNode);
+      if (HT == HazardRecognizer::NoHazard) {
+        FoundSUnit = CurSUnit;
+        break;
+      }
+      
+      // Remember if this is a noop hazard.
+      HasNoopHazards |= HT == HazardRecognizer::NoopHazard;
+      
+      NotReady.push_back(CurSUnit);
+    }
+    
+    // Add the nodes that aren't ready back onto the available list.
+    if (!NotReady.empty()) {
+      AvailableQueue->push_all(NotReady);
+      NotReady.clear();
+    }
+
+    // If we found a node to schedule, do it now.
+    if (FoundSUnit) {
+      ScheduleNodeTopDown(FoundSUnit, CurCycle);
+      HazardRec->EmitInstruction(FoundNode);
+      FoundSUnit->isScheduled = true;
+      AvailableQueue->ScheduledNode(FoundSUnit);
+
+      // If this is a pseudo-op node, we don't want to increment the current
+      // cycle.
+      if (FoundSUnit->Latency)  // Don't increment CurCycle for pseudo-ops!
+        ++CurCycle;        
+    } else if (!HasNoopHazards) {
+      // Otherwise, we have a pipeline stall, but no other problem, just advance
+      // the current cycle and try again.
+      DOUT << "*** Advancing cycle, no work to do\n";
+      HazardRec->AdvanceCycle();
+      ++NumStalls;
+      ++CurCycle;
+    } else {
+      // Otherwise, we have no instructions to issue and we have instructions
+      // that will fault if we don't do this right.  This is the case for
+      // processors without pipeline interlocks and other cases.
+      DOUT << "*** Emitting noop\n";
+      HazardRec->EmitNoop();
+      Sequence.push_back(0);   // NULL SUnit* -> noop
+      ++NumNoops;
+      ++CurCycle;
+    }
+  }
+
+#ifndef NDEBUG
+  // Verify that all SUnits were scheduled.
+  bool AnyNotSched = false;
+  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+    if (SUnits[i].NumPredsLeft != 0 || SUnits[i].NumChainPredsLeft != 0) {
+      if (!AnyNotSched)
+        cerr << "*** List scheduling failed! ***\n";
+      SUnits[i].dump(&DAG);
+      cerr << "has not been scheduled!\n";
+      AnyNotSched = true;
+    }
+  }
+  assert(!AnyNotSched);
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+//                    LatencyPriorityQueue Implementation
+//===----------------------------------------------------------------------===//
+//
+// This is a SchedulingPriorityQueue that schedules using latency information to
+// reduce the length of the critical path through the basic block.
+// 
+namespace {
+  class LatencyPriorityQueue;
+  
+  /// Sorting functions for the Available queue.
+  struct latency_sort : public std::binary_function<SUnit*, SUnit*, bool> {
+    LatencyPriorityQueue *PQ;
+    latency_sort(LatencyPriorityQueue *pq) : PQ(pq) {}
+    latency_sort(const latency_sort &RHS) : PQ(RHS.PQ) {}
+    
+    bool operator()(const SUnit* left, const SUnit* right) const;
+  };
+}  // end anonymous namespace
+
+namespace {
+  class LatencyPriorityQueue : public SchedulingPriorityQueue {
+    // SUnits - The SUnits for the current graph.
+    std::vector<SUnit> *SUnits;
+    
+    // Latencies - The latency (max of latency from this node to the bb exit)
+    // for each node.
+    std::vector<int> Latencies;
+
+    /// NumNodesSolelyBlocking - This vector contains, for every node in the
+    /// Queue, the number of nodes that the node is the sole unscheduled
+    /// predecessor for.  This is used as a tie-breaker heuristic for better
+    /// mobility.
+    std::vector<unsigned> NumNodesSolelyBlocking;
+
+    std::priority_queue<SUnit*, std::vector<SUnit*>, latency_sort> Queue;
+public:
+    LatencyPriorityQueue() : Queue(latency_sort(this)) {
+    }
+    
+    void initNodes(DenseMap<SDNode*, SUnit*> &sumap,
+                   std::vector<SUnit> &sunits) {
+      SUnits = &sunits;
+      // Calculate node priorities.
+      CalculatePriorities();
+    }
+    void releaseState() {
+      SUnits = 0;
+      Latencies.clear();
+    }
+    
+    unsigned getLatency(unsigned NodeNum) const {
+      assert(NodeNum < Latencies.size());
+      return Latencies[NodeNum];
+    }
+    
+    unsigned getNumSolelyBlockNodes(unsigned NodeNum) const {
+      assert(NodeNum < NumNodesSolelyBlocking.size());
+      return NumNodesSolelyBlocking[NodeNum];
+    }
+    
+    bool empty() const { return Queue.empty(); }
+    
+    virtual void push(SUnit *U) {
+      push_impl(U);
+    }
+    void push_impl(SUnit *U);
+    
+    void push_all(const std::vector<SUnit *> &Nodes) {
+      for (unsigned i = 0, e = Nodes.size(); i != e; ++i)
+        push_impl(Nodes[i]);
+    }
+    
+    SUnit *pop() {
+      if (empty()) return NULL;
+      SUnit *V = Queue.top();
+      Queue.pop();
+      return V;
+    }
+
+    // ScheduledNode - As nodes are scheduled, we look to see if there are any
+    // successor nodes that have a single unscheduled predecessor.  If so, that
+    // single predecessor has a higher priority, since scheduling it will make
+    // the node available.
+    void ScheduledNode(SUnit *Node);
+
+private:
+    void CalculatePriorities();
+    int CalcLatency(const SUnit &SU);
+    void AdjustPriorityOfUnscheduledPreds(SUnit *SU);
+    SUnit *getSingleUnscheduledPred(SUnit *SU);
+
+    /// RemoveFromPriorityQueue - This is a really inefficient way to remove a
+    /// node from a priority queue.  We should roll our own heap to make this
+    /// better or something.
+    void RemoveFromPriorityQueue(SUnit *SU) {
+      std::vector<SUnit*> Temp;
+      
+      assert(!Queue.empty() && "Not in queue!");
+      while (Queue.top() != SU) {
+        Temp.push_back(Queue.top());
+        Queue.pop();
+        assert(!Queue.empty() && "Not in queue!");
+      }
+
+      // Remove the node from the PQ.
+      Queue.pop();
+      
+      // Add all the other nodes back.
+      for (unsigned i = 0, e = Temp.size(); i != e; ++i)
+        Queue.push(Temp[i]);
+    }
+  };
+}
+
+bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
+  unsigned LHSNum = LHS->NodeNum;
+  unsigned RHSNum = RHS->NodeNum;
+
+  // The most important heuristic is scheduling the critical path.
+  unsigned LHSLatency = PQ->getLatency(LHSNum);
+  unsigned RHSLatency = PQ->getLatency(RHSNum);
+  if (LHSLatency < RHSLatency) return true;
+  if (LHSLatency > RHSLatency) return false;
+  
+  // After that, if two nodes have identical latencies, look to see if one will
+  // unblock more other nodes than the other.
+  unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum);
+  unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum);
+  if (LHSBlocked < RHSBlocked) return true;
+  if (LHSBlocked > RHSBlocked) return false;
+  
+  // Finally, just to provide a stable ordering, use the node number as a
+  // deciding factor.
+  return LHSNum < RHSNum;
+}
+
+
+/// CalcNodePriority - Calculate the maximal path from the node to the exit.
+///
+int LatencyPriorityQueue::CalcLatency(const SUnit &SU) {
+  int &Latency = Latencies[SU.NodeNum];
+  if (Latency != -1)
+    return Latency;
+  
+  int MaxSuccLatency = 0;
+  for (SUnit::const_succ_iterator I = SU.Succs.begin(), E = SU.Succs.end();
+       I != E; ++I)
+    MaxSuccLatency = std::max(MaxSuccLatency, CalcLatency(*I->first));
+
+  return Latency = MaxSuccLatency + SU.Latency;
+}
+
+/// CalculatePriorities - Calculate priorities of all scheduling units.
+void LatencyPriorityQueue::CalculatePriorities() {
+  Latencies.assign(SUnits->size(), -1);
+  NumNodesSolelyBlocking.assign(SUnits->size(), 0);
+  
+  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
+    CalcLatency((*SUnits)[i]);
+}
+
+/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
+/// of SU, return it, otherwise return null.
+SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
+  SUnit *OnlyAvailablePred = 0;
+  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    SUnit &Pred = *I->first;
+    if (!Pred.isScheduled) {
+      // We found an available, but not scheduled, predecessor.  If it's the
+      // only one we have found, keep track of it... otherwise give up.
+      if (OnlyAvailablePred && OnlyAvailablePred != &Pred)
+        return 0;
+      OnlyAvailablePred = &Pred;
+    }
+  }
+      
+  return OnlyAvailablePred;
+}
+
+void LatencyPriorityQueue::push_impl(SUnit *SU) {
+  // Look at all of the successors of this node.  Count the number of nodes that
+  // this node is the sole unscheduled node for.
+  unsigned NumNodesBlocking = 0;
+  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I)
+    if (getSingleUnscheduledPred(I->first) == SU)
+      ++NumNodesBlocking;
+  NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
+  
+  Queue.push(SU);
+}
+
+
+// ScheduledNode - As nodes are scheduled, we look to see if there are any
+// successor nodes that have a single unscheduled predecessor.  If so, that
+// single predecessor has a higher priority, since scheduling it will make
+// the node available.
+void LatencyPriorityQueue::ScheduledNode(SUnit *SU) {
+  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I)
+    AdjustPriorityOfUnscheduledPreds(I->first);
+}
+
+/// AdjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just
+/// scheduled.  If SU is not itself available, then there is at least one
+/// predecessor node that has not been scheduled yet.  If SU has exactly ONE
+/// unscheduled predecessor, we want to increase its priority: it getting
+/// scheduled will make this node available, so it is better than some other
+/// node of the same priority that will not make a node available.
+void LatencyPriorityQueue::AdjustPriorityOfUnscheduledPreds(SUnit *SU) {
+  if (SU->isPending) return;  // All preds scheduled.
+  
+  SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU);
+  if (OnlyAvailablePred == 0 || !OnlyAvailablePred->isAvailable) return;
+  
+  // Okay, we found a single predecessor that is available, but not scheduled.
+  // Since it is available, it must be in the priority queue.  First remove it.
+  RemoveFromPriorityQueue(OnlyAvailablePred);
+
+  // Reinsert the node into the priority queue, which recomputes its
+  // NumNodesSolelyBlocking value.
+  push(OnlyAvailablePred);
+}
+
+
+//===----------------------------------------------------------------------===//
+//                         Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+/// createTDListDAGScheduler - This creates a top-down list scheduler with a
+/// new hazard recognizer. This scheduler takes ownership of the hazard
+/// recognizer and deletes it when done.
+ScheduleDAG* llvm::createTDListDAGScheduler(SelectionDAGISel *IS,
+                                            SelectionDAG *DAG,
+                                            MachineBasicBlock *BB) {
+  return new ScheduleDAGList(*DAG, BB, DAG->getTarget(),
+                             new LatencyPriorityQueue(),
+                             IS->CreateTargetHazardRecognizer());
+}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
new file mode 100644
index 0000000..f95be7d
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -0,0 +1,944 @@
+//===----- ScheduleDAGList.cpp - Reg pressure reduction list scheduler ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Evan Cheng and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements bottom-up and top-down register pressure reduction list
+// schedulers, using standard algorithms.  The basic approach uses a priority
+// queue of available nodes to schedule.  One at a time, nodes are taken from
+// the priority queue (thus in priority order), checked for legality to
+// schedule, and emitted if legal.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SSARegMap.h"
+#include "llvm/Target/MRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/Statistic.h"
+#include <climits>
+#include <queue>
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+static RegisterScheduler
+  burrListDAGScheduler("list-burr",
+                       "  Bottom-up register reduction list scheduling",
+                       createBURRListDAGScheduler);
+static RegisterScheduler
+  tdrListrDAGScheduler("list-tdrr",
+                       "  Top-down register reduction list scheduling",
+                       createTDRRListDAGScheduler);
+
+namespace {
+//===----------------------------------------------------------------------===//
+/// ScheduleDAGRRList - The actual register reduction list scheduler
+/// implementation.  This supports both top-down and bottom-up scheduling.
+///
+
+class VISIBILITY_HIDDEN ScheduleDAGRRList : public ScheduleDAG {
+private:
+  /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
+  /// it is top-down.
+  bool isBottomUp;
+  
+  /// AvailableQueue - The priority queue to use for the available SUnits.
+  ///
+  SchedulingPriorityQueue *AvailableQueue;
+
+public:
+  ScheduleDAGRRList(SelectionDAG &dag, MachineBasicBlock *bb,
+                  const TargetMachine &tm, bool isbottomup,
+                  SchedulingPriorityQueue *availqueue)
+    : ScheduleDAG(dag, bb, tm), isBottomUp(isbottomup),
+      AvailableQueue(availqueue) {
+    }
+
+  ~ScheduleDAGRRList() {
+    delete AvailableQueue;
+  }
+
+  void Schedule();
+
+private:
+  void ReleasePred(SUnit *PredSU, bool isChain, unsigned CurCycle);
+  void ReleaseSucc(SUnit *SuccSU, bool isChain, unsigned CurCycle);
+  void ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle);
+  void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
+  void ListScheduleTopDown();
+  void ListScheduleBottomUp();
+  void CommuteNodesToReducePressure();
+};
+}  // end anonymous namespace
+
+
+/// Schedule - Schedule the DAG using list scheduling.
+void ScheduleDAGRRList::Schedule() {
+  DOUT << "********** List Scheduling **********\n";
+  
+  // Build scheduling units.
+  BuildSchedUnits();
+
+  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+          SUnits[su].dumpAll(&DAG));
+  CalculateDepths();
+  CalculateHeights();
+
+  AvailableQueue->initNodes(SUnitMap, SUnits);
+
+  // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
+  if (isBottomUp)
+    ListScheduleBottomUp();
+  else
+    ListScheduleTopDown();
+  
+  AvailableQueue->releaseState();
+
+  CommuteNodesToReducePressure();
+  
+  DOUT << "*** Final schedule ***\n";
+  DEBUG(dumpSchedule());
+  DOUT << "\n";
+  
+  // Emit in scheduled order
+  EmitSchedule();
+}
+
+/// CommuteNodesToReducePressure - If a node is two-address and commutable, and
+/// it is not the last use of its first operand, add it to the CommuteSet if
+/// possible. It will be commuted when it is translated to a MI.
+void ScheduleDAGRRList::CommuteNodesToReducePressure() {
+  SmallPtrSet<SUnit*, 4> OperandSeen;
+  for (unsigned i = Sequence.size()-1; i != 0; --i) {  // Ignore first node.
+    SUnit *SU = Sequence[i];
+    if (!SU) continue;
+    if (SU->isCommutable) {
+      unsigned Opc = SU->Node->getTargetOpcode();
+      unsigned NumRes = CountResults(SU->Node);
+      unsigned NumOps = CountOperands(SU->Node);
+      for (unsigned j = 0; j != NumOps; ++j) {
+        if (TII->getOperandConstraint(Opc, j+NumRes, TOI::TIED_TO) == -1)
+          continue;
+
+        SDNode *OpN = SU->Node->getOperand(j).Val;
+        SUnit *OpSU = SUnitMap[OpN];
+        if (OpSU && OperandSeen.count(OpSU) == 1) {
+          // Ok, so SU is not the last use of OpSU, but SU is two-address so
+          // it will clobber OpSU. Try to commute SU if no other source operands
+          // are live below.
+          bool DoCommute = true;
+          for (unsigned k = 0; k < NumOps; ++k) {
+            if (k != j) {
+              OpN = SU->Node->getOperand(k).Val;
+              OpSU = SUnitMap[OpN];
+              if (OpSU && OperandSeen.count(OpSU) == 1) {
+                DoCommute = false;
+                break;
+              }
+            }
+          }
+          if (DoCommute)
+            CommuteSet.insert(SU->Node);
+        }
+
+        // Only look at the first use&def node for now.
+        break;
+      }
+    }
+
+    for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+         I != E; ++I) {
+      if (!I->second)
+        OperandSeen.insert(I->first);
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//  Bottom-Up Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
+/// the Available queue is the count reaches zero. Also update its cycle bound.
+void ScheduleDAGRRList::ReleasePred(SUnit *PredSU, bool isChain, 
+                                    unsigned CurCycle) {
+  // FIXME: the distance between two nodes is not always == the predecessor's
+  // latency. For example, the reader can very well read the register written
+  // by the predecessor later than the issue cycle. It also depends on the
+  // interrupt model (drain vs. freeze).
+  PredSU->CycleBound = std::max(PredSU->CycleBound, CurCycle + PredSU->Latency);
+
+  if (!isChain)
+    PredSU->NumSuccsLeft--;
+  else
+    PredSU->NumChainSuccsLeft--;
+  
+#ifndef NDEBUG
+  if (PredSU->NumSuccsLeft < 0 || PredSU->NumChainSuccsLeft < 0) {
+    cerr << "*** List scheduling failed! ***\n";
+    PredSU->dump(&DAG);
+    cerr << " has been released too many times!\n";
+    assert(0);
+  }
+#endif
+  
+  if ((PredSU->NumSuccsLeft + PredSU->NumChainSuccsLeft) == 0) {
+    // EntryToken has to go last!  Special case it here.
+    if (PredSU->Node->getOpcode() != ISD::EntryToken) {
+      PredSU->isAvailable = true;
+      AvailableQueue->push(PredSU);
+    }
+  }
+}
+
+/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
+/// count of its predecessors. If a predecessor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
+  DOUT << "*** Scheduling [" << CurCycle << "]: ";
+  DEBUG(SU->dump(&DAG));
+  SU->Cycle = CurCycle;
+
+  AvailableQueue->ScheduledNode(SU);
+  Sequence.push_back(SU);
+
+  // Bottom up: release predecessors
+  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I)
+    ReleasePred(I->first, I->second, CurCycle);
+  SU->isScheduled = true;
+}
+
+/// isReady - True if node's lower cycle bound is less or equal to the current
+/// scheduling cycle. Always true if all nodes have uniform latency 1.
+static inline bool isReady(SUnit *SU, unsigned CurCycle) {
+  return SU->CycleBound <= CurCycle;
+}
+
+/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
+/// schedulers.
+void ScheduleDAGRRList::ListScheduleBottomUp() {
+  unsigned CurCycle = 0;
+  // Add root to Available queue.
+  AvailableQueue->push(SUnitMap[DAG.getRoot().Val]);
+
+  // While Available queue is not empty, grab the node with the highest
+  // priority. If it is not ready put it back. Schedule the node.
+  std::vector<SUnit*> NotReady;
+  while (!AvailableQueue->empty()) {
+    SUnit *CurNode = AvailableQueue->pop();
+    while (CurNode && !isReady(CurNode, CurCycle)) {
+      NotReady.push_back(CurNode);
+      CurNode = AvailableQueue->pop();
+    }
+    
+    // Add the nodes that aren't ready back onto the available list.
+    AvailableQueue->push_all(NotReady);
+    NotReady.clear();
+
+    if (CurNode != NULL)
+      ScheduleNodeBottomUp(CurNode, CurCycle);
+    CurCycle++;
+  }
+
+  // Add entry node last
+  if (DAG.getEntryNode().Val != DAG.getRoot().Val) {
+    SUnit *Entry = SUnitMap[DAG.getEntryNode().Val];
+    Sequence.push_back(Entry);
+  }
+
+  // Reverse the order if it is bottom up.
+  std::reverse(Sequence.begin(), Sequence.end());
+  
+  
+#ifndef NDEBUG
+  // Verify that all SUnits were scheduled.
+  bool AnyNotSched = false;
+  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+    if (SUnits[i].NumSuccsLeft != 0 || SUnits[i].NumChainSuccsLeft != 0) {
+      if (!AnyNotSched)
+        cerr << "*** List scheduling failed! ***\n";
+      SUnits[i].dump(&DAG);
+      cerr << "has not been scheduled!\n";
+      AnyNotSched = true;
+    }
+  }
+  assert(!AnyNotSched);
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+//  Top-Down Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
+/// the PendingQueue if the count reaches zero.
+void ScheduleDAGRRList::ReleaseSucc(SUnit *SuccSU, bool isChain, 
+                                    unsigned CurCycle) {
+  // FIXME: the distance between two nodes is not always == the predecessor's
+  // latency. For example, the reader can very well read the register written
+  // by the predecessor later than the issue cycle. It also depends on the
+  // interrupt model (drain vs. freeze).
+  SuccSU->CycleBound = std::max(SuccSU->CycleBound, CurCycle + SuccSU->Latency);
+
+  if (!isChain)
+    SuccSU->NumPredsLeft--;
+  else
+    SuccSU->NumChainPredsLeft--;
+  
+#ifndef NDEBUG
+  if (SuccSU->NumPredsLeft < 0 || SuccSU->NumChainPredsLeft < 0) {
+    cerr << "*** List scheduling failed! ***\n";
+    SuccSU->dump(&DAG);
+    cerr << " has been released too many times!\n";
+    assert(0);
+  }
+#endif
+  
+  if ((SuccSU->NumPredsLeft + SuccSU->NumChainPredsLeft) == 0) {
+    SuccSU->isAvailable = true;
+    AvailableQueue->push(SuccSU);
+  }
+}
+
+
+/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
+/// count of its successors. If a successor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
+  DOUT << "*** Scheduling [" << CurCycle << "]: ";
+  DEBUG(SU->dump(&DAG));
+  SU->Cycle = CurCycle;
+
+  AvailableQueue->ScheduledNode(SU);
+  Sequence.push_back(SU);
+
+  // Top down: release successors
+  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I)
+    ReleaseSucc(I->first, I->second, CurCycle);
+  SU->isScheduled = true;
+}
+
+void ScheduleDAGRRList::ListScheduleTopDown() {
+  unsigned CurCycle = 0;
+  SUnit *Entry = SUnitMap[DAG.getEntryNode().Val];
+
+  // All leaves to Available queue.
+  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+    // It is available if it has no predecessors.
+    if (SUnits[i].Preds.size() == 0 && &SUnits[i] != Entry) {
+      AvailableQueue->push(&SUnits[i]);
+      SUnits[i].isAvailable = true;
+    }
+  }
+  
+  // Emit the entry node first.
+  ScheduleNodeTopDown(Entry, CurCycle);
+  CurCycle++;
+
+  // While Available queue is not empty, grab the node with the highest
+  // priority. If it is not ready put it back. Schedule the node.
+  std::vector<SUnit*> NotReady;
+  while (!AvailableQueue->empty()) {
+    SUnit *CurNode = AvailableQueue->pop();
+    while (CurNode && !isReady(CurNode, CurCycle)) {
+      NotReady.push_back(CurNode);
+      CurNode = AvailableQueue->pop();
+    }
+    
+    // Add the nodes that aren't ready back onto the available list.
+    AvailableQueue->push_all(NotReady);
+    NotReady.clear();
+
+    if (CurNode != NULL)
+      ScheduleNodeTopDown(CurNode, CurCycle);
+    CurCycle++;
+  }
+  
+  
+#ifndef NDEBUG
+  // Verify that all SUnits were scheduled.
+  bool AnyNotSched = false;
+  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+    if (!SUnits[i].isScheduled) {
+      if (!AnyNotSched)
+        cerr << "*** List scheduling failed! ***\n";
+      SUnits[i].dump(&DAG);
+      cerr << "has not been scheduled!\n";
+      AnyNotSched = true;
+    }
+  }
+  assert(!AnyNotSched);
+#endif
+}
+
+
+
+//===----------------------------------------------------------------------===//
+//                RegReductionPriorityQueue Implementation
+//===----------------------------------------------------------------------===//
+//
+// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
+// to reduce register pressure.
+// 
+namespace {
+  template<class SF>
+  class RegReductionPriorityQueue;
+  
+  /// Sorting functions for the Available queue.
+  struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
+    RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ;
+    bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {}
+    bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
+    
+    bool operator()(const SUnit* left, const SUnit* right) const;
+  };
+
+  struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
+    RegReductionPriorityQueue<td_ls_rr_sort> *SPQ;
+    td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {}
+    td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
+    
+    bool operator()(const SUnit* left, const SUnit* right) const;
+  };
+}  // end anonymous namespace
+
+static inline bool isCopyFromLiveIn(const SUnit *SU) {
+  SDNode *N = SU->Node;
+  return N->getOpcode() == ISD::CopyFromReg &&
+    N->getOperand(N->getNumOperands()-1).getValueType() != MVT::Flag;
+}
+
+namespace {
+  template<class SF>
+  class VISIBILITY_HIDDEN RegReductionPriorityQueue
+   : public SchedulingPriorityQueue {
+    std::priority_queue<SUnit*, std::vector<SUnit*>, SF> Queue;
+
+  public:
+    RegReductionPriorityQueue() :
+    Queue(SF(this)) {}
+    
+    virtual void initNodes(DenseMap<SDNode*, SUnit*> &sumap,
+                           std::vector<SUnit> &sunits) {}
+    virtual void releaseState() {}
+    
+    virtual unsigned getNodePriority(const SUnit *SU) const {
+      return 0;
+    }
+    
+    bool empty() const { return Queue.empty(); }
+    
+    void push(SUnit *U) {
+      Queue.push(U);
+    }
+    void push_all(const std::vector<SUnit *> &Nodes) {
+      for (unsigned i = 0, e = Nodes.size(); i != e; ++i)
+        Queue.push(Nodes[i]);
+    }
+    
+    SUnit *pop() {
+      if (empty()) return NULL;
+      SUnit *V = Queue.top();
+      Queue.pop();
+      return V;
+    }
+
+    virtual bool isDUOperand(const SUnit *SU1, const SUnit *SU2) {
+      return false;
+    }
+  };
+
+  template<class SF>
+  class VISIBILITY_HIDDEN BURegReductionPriorityQueue
+   : public RegReductionPriorityQueue<SF> {
+    // SUnitMap SDNode to SUnit mapping (n -> 1).
+    DenseMap<SDNode*, SUnit*> *SUnitMap;
+
+    // SUnits - The SUnits for the current graph.
+    const std::vector<SUnit> *SUnits;
+    
+    // SethiUllmanNumbers - The SethiUllman number for each node.
+    std::vector<unsigned> SethiUllmanNumbers;
+
+    const TargetInstrInfo *TII;
+  public:
+    BURegReductionPriorityQueue(const TargetInstrInfo *tii)
+      : TII(tii) {}
+
+    void initNodes(DenseMap<SDNode*, SUnit*> &sumap,
+                   std::vector<SUnit> &sunits) {
+      SUnitMap = &sumap;
+      SUnits = &sunits;
+      // Add pseudo dependency edges for two-address nodes.
+      AddPseudoTwoAddrDeps();
+      // Calculate node priorities.
+      CalculateSethiUllmanNumbers();
+    }
+
+    void releaseState() {
+      SUnits = 0;
+      SethiUllmanNumbers.clear();
+    }
+
+    unsigned getNodePriority(const SUnit *SU) const {
+      assert(SU->NodeNum < SethiUllmanNumbers.size());
+      unsigned Opc = SU->Node->getOpcode();
+      if (Opc == ISD::CopyFromReg && !isCopyFromLiveIn(SU))
+        // CopyFromReg should be close to its def because it restricts
+        // allocation choices. But if it is a livein then perhaps we want it
+        // closer to its uses so it can be coalesced.
+        return 0xffff;
+      else if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
+        // CopyToReg should be close to its uses to facilitate coalescing and
+        // avoid spilling.
+        return 0;
+      else if (SU->NumSuccs == 0)
+        // If SU does not have a use, i.e. it doesn't produce a value that would
+        // be consumed (e.g. store), then it terminates a chain of computation.
+        // Give it a large SethiUllman number so it will be scheduled right
+        // before its predecessors that it doesn't lengthen their live ranges.
+        return 0xffff;
+      else if (SU->NumPreds == 0)
+        // If SU does not have a def, schedule it close to its uses because it
+        // does not lengthen any live ranges.
+        return 0;
+      else
+        return SethiUllmanNumbers[SU->NodeNum];
+    }
+
+    bool isDUOperand(const SUnit *SU1, const SUnit *SU2) {
+      unsigned Opc = SU1->Node->getTargetOpcode();
+      unsigned NumRes = ScheduleDAG::CountResults(SU1->Node);
+      unsigned NumOps = ScheduleDAG::CountOperands(SU1->Node);
+      for (unsigned i = 0; i != NumOps; ++i) {
+        if (TII->getOperandConstraint(Opc, i+NumRes, TOI::TIED_TO) == -1)
+          continue;
+        if (SU1->Node->getOperand(i).isOperand(SU2->Node))
+          return true;
+      }
+      return false;
+    }
+  private:
+    bool canClobber(SUnit *SU, SUnit *Op);
+    void AddPseudoTwoAddrDeps();
+    void CalculateSethiUllmanNumbers();
+    unsigned CalcNodeSethiUllmanNumber(const SUnit *SU);
+  };
+
+
+  template<class SF>
+  class TDRegReductionPriorityQueue : public RegReductionPriorityQueue<SF> {
+    // SUnitMap SDNode to SUnit mapping (n -> 1).
+    DenseMap<SDNode*, SUnit*> *SUnitMap;
+
+    // SUnits - The SUnits for the current graph.
+    const std::vector<SUnit> *SUnits;
+    
+    // SethiUllmanNumbers - The SethiUllman number for each node.
+    std::vector<unsigned> SethiUllmanNumbers;
+
+  public:
+    TDRegReductionPriorityQueue() {}
+
+    void initNodes(DenseMap<SDNode*, SUnit*> &sumap,
+                   std::vector<SUnit> &sunits) {
+      SUnitMap = &sumap;
+      SUnits = &sunits;
+      // Calculate node priorities.
+      CalculateSethiUllmanNumbers();
+    }
+
+    void releaseState() {
+      SUnits = 0;
+      SethiUllmanNumbers.clear();
+    }
+
+    unsigned getNodePriority(const SUnit *SU) const {
+      assert(SU->NodeNum < SethiUllmanNumbers.size());
+      return SethiUllmanNumbers[SU->NodeNum];
+    }
+
+  private:
+    void CalculateSethiUllmanNumbers();
+    unsigned CalcNodeSethiUllmanNumber(const SUnit *SU);
+  };
+}
+
+/// closestSucc - Returns the scheduled cycle of the successor which is
+/// closet to the current cycle.
+static unsigned closestSucc(const SUnit *SU) {
+  unsigned MaxCycle = 0;
+  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    unsigned Cycle = I->first->Cycle;
+    // If there are bunch of CopyToRegs stacked up, they should be considered
+    // to be at the same position.
+    if (I->first->Node->getOpcode() == ISD::CopyToReg)
+      Cycle = closestSucc(I->first)+1;
+    if (Cycle > MaxCycle)
+      MaxCycle = Cycle;
+  }
+  return MaxCycle;
+}
+
+/// calcMaxScratches - Returns an cost estimate of the worse case requirement
+/// for scratch registers. Live-in operands and live-out results don't count
+/// since they are "fixed".
+static unsigned calcMaxScratches(const SUnit *SU) {
+  unsigned Scratches = 0;
+  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    if (I->second) continue;  // ignore chain preds
+    if (I->first->Node->getOpcode() != ISD::CopyFromReg)
+      Scratches++;
+  }
+  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    if (I->second) continue;  // ignore chain succs
+    if (I->first->Node->getOpcode() != ISD::CopyToReg)
+      Scratches += 10;
+  }
+  return Scratches;
+}
+
+// Bottom up
+bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
+  // There used to be a special tie breaker here that looked for
+  // two-address instructions and preferred the instruction with a
+  // def&use operand.  The special case triggered diagnostics when
+  // _GLIBCXX_DEBUG was enabled because it broke the strict weak
+  // ordering that priority_queue requires. It didn't help much anyway
+  // because AddPseudoTwoAddrDeps already covers many of the cases
+  // where it would have applied.  In addition, it's counter-intuitive
+  // that a tie breaker would be the first thing attempted.  There's a
+  // "real" tie breaker below that is the operation of last resort.
+  // The fact that the "special tie breaker" would trigger when there
+  // wasn't otherwise a tie is what broke the strict weak ordering
+  // constraint.
+
+  unsigned LPriority = SPQ->getNodePriority(left);
+  unsigned RPriority = SPQ->getNodePriority(right);
+  if (LPriority > RPriority)
+    return true;
+  else if (LPriority == RPriority) {
+    // Try schedule def + use closer when Sethi-Ullman numbers are the same.
+    // e.g.
+    // t1 = op t2, c1
+    // t3 = op t4, c2
+    //
+    // and the following instructions are both ready.
+    // t2 = op c3
+    // t4 = op c4
+    //
+    // Then schedule t2 = op first.
+    // i.e.
+    // t4 = op c4
+    // t2 = op c3
+    // t1 = op t2, c1
+    // t3 = op t4, c2
+    //
+    // This creates more short live intervals.
+    unsigned LDist = closestSucc(left);
+    unsigned RDist = closestSucc(right);
+    if (LDist < RDist)
+      return true;
+    else if (LDist == RDist) {
+      // Intuitively, it's good to push down instructions whose results are
+      // liveout so their long live ranges won't conflict with other values
+      // which are needed inside the BB. Further prioritize liveout instructions
+      // by the number of operands which are calculated within the BB.
+      unsigned LScratch = calcMaxScratches(left);
+      unsigned RScratch = calcMaxScratches(right);
+      if (LScratch > RScratch)
+        return true;
+      else if (LScratch == RScratch)
+        if (left->Height > right->Height)
+          return true;
+        else if (left->Height == right->Height)
+          if (left->Depth < right->Depth)
+            return true;
+          else if (left->Depth == right->Depth)
+            if (left->CycleBound > right->CycleBound) 
+              return true;
+    }
+  }
+  return false;
+}
+
+// FIXME: This is probably too slow!
+static void isReachable(SUnit *SU, SUnit *TargetSU,
+                        SmallPtrSet<SUnit*, 32> &Visited, bool &Reached) {
+  if (Reached) return;
+  if (SU == TargetSU) {
+    Reached = true;
+    return;
+  }
+  if (!Visited.insert(SU)) return;
+
+  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E;
+       ++I)
+    isReachable(I->first, TargetSU, Visited, Reached);
+}
+
+static bool isReachable(SUnit *SU, SUnit *TargetSU) {
+  SmallPtrSet<SUnit*, 32> Visited;
+  bool Reached = false;
+  isReachable(SU, TargetSU, Visited, Reached);
+  return Reached;
+}
+
+template<class SF>
+bool BURegReductionPriorityQueue<SF>::canClobber(SUnit *SU, SUnit *Op) {
+  if (SU->isTwoAddress) {
+    unsigned Opc = SU->Node->getTargetOpcode();
+    unsigned NumRes = ScheduleDAG::CountResults(SU->Node);
+    unsigned NumOps = ScheduleDAG::CountOperands(SU->Node);
+    for (unsigned i = 0; i != NumOps; ++i) {
+      if (TII->getOperandConstraint(Opc, i+NumRes, TOI::TIED_TO) != -1) {
+        SDNode *DU = SU->Node->getOperand(i).Val;
+        if (Op == (*SUnitMap)[DU])
+          return true;
+      }
+    }
+  }
+  return false;
+}
+
+
+/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
+/// it as a def&use operand. Add a pseudo control edge from it to the other
+/// node (if it won't create a cycle) so the two-address one will be scheduled
+/// first (lower in the schedule).
+template<class SF>
+void BURegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
+  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
+    SUnit *SU = (SUnit *)&((*SUnits)[i]);
+    if (!SU->isTwoAddress)
+      continue;
+
+    SDNode *Node = SU->Node;
+    if (!Node->isTargetOpcode())
+      continue;
+
+    unsigned Opc = Node->getTargetOpcode();
+    unsigned NumRes = ScheduleDAG::CountResults(Node);
+    unsigned NumOps = ScheduleDAG::CountOperands(Node);
+    for (unsigned j = 0; j != NumOps; ++j) {
+      if (TII->getOperandConstraint(Opc, j+NumRes, TOI::TIED_TO) != -1) {
+        SDNode *DU = SU->Node->getOperand(j).Val;
+        SUnit *DUSU = (*SUnitMap)[DU];
+        if (!DUSU) continue;
+        for (SUnit::succ_iterator I = DUSU->Succs.begin(),E = DUSU->Succs.end();
+             I != E; ++I) {
+          if (I->second) continue;
+          SUnit *SuccSU = I->first;
+          if (SuccSU != SU &&
+              (!canClobber(SuccSU, DUSU) ||
+               (!SU->isCommutable && SuccSU->isCommutable))){
+            if (SuccSU->Depth == SU->Depth && !isReachable(SuccSU, SU)) {
+              DOUT << "Adding an edge from SU # " << SU->NodeNum
+                   << " to SU #" << SuccSU->NodeNum << "\n";
+              if (SU->addPred(SuccSU, true))
+                SU->NumChainPredsLeft++;
+              if (SuccSU->addSucc(SU, true))
+                SuccSU->NumChainSuccsLeft++;
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+/// CalcNodeSethiUllmanNumber - Priority is the Sethi Ullman number. 
+/// Smaller number is the higher priority.
+template<class SF>
+unsigned BURegReductionPriorityQueue<SF>::
+CalcNodeSethiUllmanNumber(const SUnit *SU) {
+  unsigned &SethiUllmanNumber = SethiUllmanNumbers[SU->NodeNum];
+  if (SethiUllmanNumber != 0)
+    return SethiUllmanNumber;
+
+  unsigned Extra = 0;
+  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    if (I->second) continue;  // ignore chain preds
+    SUnit *PredSU = I->first;
+    unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU);
+    if (PredSethiUllman > SethiUllmanNumber) {
+      SethiUllmanNumber = PredSethiUllman;
+      Extra = 0;
+    } else if (PredSethiUllman == SethiUllmanNumber && !I->second)
+      Extra++;
+  }
+
+  SethiUllmanNumber += Extra;
+
+  if (SethiUllmanNumber == 0)
+    SethiUllmanNumber = 1;
+  
+  return SethiUllmanNumber;
+}
+
+/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
+/// scheduling units.
+template<class SF>
+void BURegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() {
+  SethiUllmanNumbers.assign(SUnits->size(), 0);
+  
+  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
+    CalcNodeSethiUllmanNumber(&(*SUnits)[i]);
+}
+
+static unsigned SumOfUnscheduledPredsOfSuccs(const SUnit *SU) {
+  unsigned Sum = 0;
+  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    SUnit *SuccSU = I->first;
+    for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
+         EE = SuccSU->Preds.end(); II != EE; ++II) {
+      SUnit *PredSU = II->first;
+      if (!PredSU->isScheduled)
+        Sum++;
+    }
+  }
+
+  return Sum;
+}
+
+
+// Top down
+bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
+  unsigned LPriority = SPQ->getNodePriority(left);
+  unsigned RPriority = SPQ->getNodePriority(right);
+  bool LIsTarget = left->Node->isTargetOpcode();
+  bool RIsTarget = right->Node->isTargetOpcode();
+  bool LIsFloater = LIsTarget && left->NumPreds == 0;
+  bool RIsFloater = RIsTarget && right->NumPreds == 0;
+  unsigned LBonus = (SumOfUnscheduledPredsOfSuccs(left) == 1) ? 2 : 0;
+  unsigned RBonus = (SumOfUnscheduledPredsOfSuccs(right) == 1) ? 2 : 0;
+
+  if (left->NumSuccs == 0 && right->NumSuccs != 0)
+    return false;
+  else if (left->NumSuccs != 0 && right->NumSuccs == 0)
+    return true;
+
+  // Special tie breaker: if two nodes share a operand, the one that use it
+  // as a def&use operand is preferred.
+  if (LIsTarget && RIsTarget) {
+    if (left->isTwoAddress && !right->isTwoAddress) {
+      SDNode *DUNode = left->Node->getOperand(0).Val;
+      if (DUNode->isOperand(right->Node))
+        RBonus += 2;
+    }
+    if (!left->isTwoAddress && right->isTwoAddress) {
+      SDNode *DUNode = right->Node->getOperand(0).Val;
+      if (DUNode->isOperand(left->Node))
+        LBonus += 2;
+    }
+  }
+  if (LIsFloater)
+    LBonus -= 2;
+  if (RIsFloater)
+    RBonus -= 2;
+  if (left->NumSuccs == 1)
+    LBonus += 2;
+  if (right->NumSuccs == 1)
+    RBonus += 2;
+
+  if (LPriority+LBonus < RPriority+RBonus)
+    return true;
+  else if (LPriority == RPriority)
+    if (left->Depth < right->Depth)
+      return true;
+    else if (left->Depth == right->Depth)
+      if (left->NumSuccsLeft > right->NumSuccsLeft)
+        return true;
+      else if (left->NumSuccsLeft == right->NumSuccsLeft)
+        if (left->CycleBound > right->CycleBound) 
+          return true;
+  return false;
+}
+
+/// CalcNodeSethiUllmanNumber - Priority is the Sethi Ullman number. 
+/// Smaller number is the higher priority.
+template<class SF>
+unsigned TDRegReductionPriorityQueue<SF>::
+CalcNodeSethiUllmanNumber(const SUnit *SU) {
+  unsigned &SethiUllmanNumber = SethiUllmanNumbers[SU->NodeNum];
+  if (SethiUllmanNumber != 0)
+    return SethiUllmanNumber;
+
+  unsigned Opc = SU->Node->getOpcode();
+  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
+    SethiUllmanNumber = 0xffff;
+  else if (SU->NumSuccsLeft == 0)
+    // If SU does not have a use, i.e. it doesn't produce a value that would
+    // be consumed (e.g. store), then it terminates a chain of computation.
+    // Give it a small SethiUllman number so it will be scheduled right before
+    // its predecessors that it doesn't lengthen their live ranges.
+    SethiUllmanNumber = 0;
+  else if (SU->NumPredsLeft == 0 &&
+           (Opc != ISD::CopyFromReg || isCopyFromLiveIn(SU)))
+    SethiUllmanNumber = 0xffff;
+  else {
+    int Extra = 0;
+    for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+         I != E; ++I) {
+      if (I->second) continue;  // ignore chain preds
+      SUnit *PredSU = I->first;
+      unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU);
+      if (PredSethiUllman > SethiUllmanNumber) {
+        SethiUllmanNumber = PredSethiUllman;
+        Extra = 0;
+      } else if (PredSethiUllman == SethiUllmanNumber && !I->second)
+        Extra++;
+    }
+
+    SethiUllmanNumber += Extra;
+  }
+  
+  return SethiUllmanNumber;
+}
+
+/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
+/// scheduling units.
+template<class SF>
+void TDRegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() {
+  SethiUllmanNumbers.assign(SUnits->size(), 0);
+  
+  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
+    CalcNodeSethiUllmanNumber(&(*SUnits)[i]);
+}
+
+//===----------------------------------------------------------------------===//
+//                         Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+llvm::ScheduleDAG* llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
+                                                    SelectionDAG *DAG,
+                                                    MachineBasicBlock *BB) {
+  const TargetInstrInfo *TII = DAG->getTarget().getInstrInfo();
+  return new ScheduleDAGRRList(*DAG, BB, DAG->getTarget(), true,
+                           new BURegReductionPriorityQueue<bu_ls_rr_sort>(TII));
+}
+
+llvm::ScheduleDAG* llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS,
+                                                    SelectionDAG *DAG,
+                                                    MachineBasicBlock *BB) {
+  return new ScheduleDAGRRList(*DAG, BB, DAG->getTarget(), false,
+                              new TDRegReductionPriorityQueue<td_ls_rr_sort>());
+}
+
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSimple.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSimple.cpp
new file mode 100644
index 0000000..62854f7
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSimple.cpp
@@ -0,0 +1,1156 @@
+//===-- ScheduleDAGSimple.cpp - Implement a trivial DAG scheduler ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by James M. Laskey and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a simple two pass scheduler.  The first pass attempts to push
+// backward any lengthy instructions and critical paths.  The second pass packs
+// instructions into semi-optimal time slots.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SSARegMap.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include <algorithm>
+using namespace llvm;
+
+namespace {
+
+static RegisterScheduler
+  bfsDAGScheduler("none", "  No scheduling: breadth first sequencing",
+                  createBFS_DAGScheduler);
+static RegisterScheduler
+  simpleDAGScheduler("simple",
+                     "  Simple two pass scheduling: minimize critical path "
+                     "and maximize processor utilization",
+                      createSimpleDAGScheduler);
+static RegisterScheduler
+  noitinDAGScheduler("simple-noitin",
+                     "  Simple two pass scheduling: Same as simple "
+                     "except using generic latency",
+                     createNoItinsDAGScheduler);
+                     
+class NodeInfo;
+typedef NodeInfo *NodeInfoPtr;
+typedef std::vector<NodeInfoPtr>           NIVector;
+typedef std::vector<NodeInfoPtr>::iterator NIIterator;
+
+//===--------------------------------------------------------------------===//
+///
+/// Node group -  This struct is used to manage flagged node groups.
+///
+class NodeGroup {
+public:
+  NodeGroup     *Next;
+private:
+  NIVector      Members;                // Group member nodes
+  NodeInfo      *Dominator;             // Node with highest latency
+  unsigned      Latency;                // Total latency of the group
+  int           Pending;                // Number of visits pending before
+                                        // adding to order  
+
+public:
+  // Ctor.
+  NodeGroup() : Next(NULL), Dominator(NULL), Pending(0) {}
+
+  // Accessors
+  inline void setDominator(NodeInfo *D) { Dominator = D; }
+  inline NodeInfo *getTop() { return Members.front(); }
+  inline NodeInfo *getBottom() { return Members.back(); }
+  inline NodeInfo *getDominator() { return Dominator; }
+  inline void setLatency(unsigned L) { Latency = L; }
+  inline unsigned getLatency() { return Latency; }
+  inline int getPending() const { return Pending; }
+  inline void setPending(int P)  { Pending = P; }
+  inline int addPending(int I)  { return Pending += I; }
+
+  // Pass thru
+  inline bool group_empty() { return Members.empty(); }
+  inline NIIterator group_begin() { return Members.begin(); }
+  inline NIIterator group_end() { return Members.end(); }
+  inline void group_push_back(const NodeInfoPtr &NI) {
+    Members.push_back(NI);
+  }
+  inline NIIterator group_insert(NIIterator Pos, const NodeInfoPtr &NI) {
+    return Members.insert(Pos, NI);
+  }
+  inline void group_insert(NIIterator Pos, NIIterator First,
+                           NIIterator Last) {
+    Members.insert(Pos, First, Last);
+  }
+
+  static void Add(NodeInfo *D, NodeInfo *U);
+};
+
+//===--------------------------------------------------------------------===//
+///
+/// NodeInfo - This struct tracks information used to schedule the a node.
+///
+class NodeInfo {
+private:
+  int           Pending;                // Number of visits pending before
+                                        // adding to order
+public:
+  SDNode        *Node;                  // DAG node
+  InstrStage    *StageBegin;            // First stage in itinerary
+  InstrStage    *StageEnd;              // Last+1 stage in itinerary
+  unsigned      Latency;                // Total cycles to complete instr
+  bool          IsCall : 1;             // Is function call
+  bool          IsLoad : 1;             // Is memory load
+  bool          IsStore : 1;            // Is memory store
+  unsigned      Slot;                   // Node's time slot
+  NodeGroup     *Group;                 // Grouping information
+#ifndef NDEBUG
+  unsigned      Preorder;               // Index before scheduling
+#endif
+
+  // Ctor.
+  NodeInfo(SDNode *N = NULL)
+    : Pending(0)
+    , Node(N)
+    , StageBegin(NULL)
+    , StageEnd(NULL)
+    , Latency(0)
+    , IsCall(false)
+    , Slot(0)
+    , Group(NULL)
+#ifndef NDEBUG
+    , Preorder(0)
+#endif
+  {}
+
+  // Accessors
+  inline bool isInGroup() const {
+    assert(!Group || !Group->group_empty() && "Group with no members");
+    return Group != NULL;
+  }
+  inline bool isGroupDominator() const {
+    return isInGroup() && Group->getDominator() == this;
+  }
+  inline int getPending() const {
+    return Group ? Group->getPending() : Pending;
+  }
+  inline void setPending(int P) {
+    if (Group) Group->setPending(P);
+    else       Pending = P;
+  }
+  inline int addPending(int I) {
+    if (Group) return Group->addPending(I);
+    else       return Pending += I;
+  }
+};
+
+//===--------------------------------------------------------------------===//
+///
+/// NodeGroupIterator - Iterates over all the nodes indicated by the node
+/// info. If the node is in a group then iterate over the members of the
+/// group, otherwise just the node info.
+///
+class NodeGroupIterator {
+private:
+  NodeInfo   *NI;                       // Node info
+  NIIterator NGI;                       // Node group iterator
+  NIIterator NGE;                       // Node group iterator end
+
+public:
+  // Ctor.
+  NodeGroupIterator(NodeInfo *N) : NI(N) {
+    // If the node is in a group then set up the group iterator.  Otherwise
+    // the group iterators will trip first time out.
+    if (N->isInGroup()) {
+      // get Group
+      NodeGroup *Group = NI->Group;
+      NGI = Group->group_begin();
+      NGE = Group->group_end();
+      // Prevent this node from being used (will be in members list
+      NI = NULL;
+    }
+  }
+
+  /// next - Return the next node info, otherwise NULL.
+  ///
+  NodeInfo *next() {
+    // If members list
+    if (NGI != NGE) return *NGI++;
+    // Use node as the result (may be NULL)
+    NodeInfo *Result = NI;
+    // Only use once
+    NI = NULL;
+    // Return node or NULL
+    return Result;
+  }
+};
+//===--------------------------------------------------------------------===//
+
+
+//===--------------------------------------------------------------------===//
+///
+/// NodeGroupOpIterator - Iterates over all the operands of a node.  If the
+/// node is a member of a group, this iterates over all the operands of all
+/// the members of the group.
+///
+class NodeGroupOpIterator {
+private:
+  NodeInfo            *NI;              // Node containing operands
+  NodeGroupIterator   GI;               // Node group iterator
+  SDNode::op_iterator OI;               // Operand iterator
+  SDNode::op_iterator OE;               // Operand iterator end
+
+  /// CheckNode - Test if node has more operands.  If not get the next node
+  /// skipping over nodes that have no operands.
+  void CheckNode() {
+    // Only if operands are exhausted first
+    while (OI == OE) {
+      // Get next node info
+      NodeInfo *NI = GI.next();
+      // Exit if nodes are exhausted
+      if (!NI) return;
+      // Get node itself
+      SDNode *Node = NI->Node;
+      // Set up the operand iterators
+      OI = Node->op_begin();
+      OE = Node->op_end();
+    }
+  }
+
+public:
+  // Ctor.
+  NodeGroupOpIterator(NodeInfo *N)
+    : NI(N), GI(N), OI(SDNode::op_iterator()), OE(SDNode::op_iterator()) {}
+
+  /// isEnd - Returns true when not more operands are available.
+  ///
+  inline bool isEnd() { CheckNode(); return OI == OE; }
+
+  /// next - Returns the next available operand.
+  ///
+  inline SDOperand next() {
+    assert(OI != OE &&
+           "Not checking for end of NodeGroupOpIterator correctly");
+    return *OI++;
+  }
+};
+
+
+//===----------------------------------------------------------------------===//
+///
+/// BitsIterator - Provides iteration through individual bits in a bit vector.
+///
+template<class T>
+class BitsIterator {
+private:
+  T Bits;                               // Bits left to iterate through
+
+public:
+  /// Ctor.
+  BitsIterator(T Initial) : Bits(Initial) {}
+  
+  /// Next - Returns the next bit set or zero if exhausted.
+  inline T Next() {
+    // Get the rightmost bit set
+    T Result = Bits & -Bits;
+    // Remove from rest
+    Bits &= ~Result;
+    // Return single bit or zero
+    return Result;
+  }
+};
+  
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+///
+/// ResourceTally - Manages the use of resources over time intervals.  Each
+/// item (slot) in the tally vector represents the resources used at a given
+/// moment.  A bit set to 1 indicates that a resource is in use, otherwise
+/// available.  An assumption is made that the tally is large enough to schedule
+/// all current instructions (asserts otherwise.)
+///
+template<class T>
+class ResourceTally {
+private:
+  std::vector<T> Tally;                 // Resources used per slot
+  typedef typename std::vector<T>::iterator Iter;
+                                        // Tally iterator 
+  
+  /// SlotsAvailable - Returns true if all units are available.
+  ///
+  bool SlotsAvailable(Iter Begin, unsigned N, unsigned ResourceSet,
+                      unsigned &Resource) {
+    assert(N && "Must check availability with N != 0");
+    // Determine end of interval
+    Iter End = Begin + N;
+    assert(End <= Tally.end() && "Tally is not large enough for schedule");
+    
+    // Iterate thru each resource
+    BitsIterator<T> Resources(ResourceSet & ~*Begin);
+    while (unsigned Res = Resources.Next()) {
+      // Check if resource is available for next N slots
+      Iter Interval = End;
+      do {
+        Interval--;
+        if (*Interval & Res) break;
+      } while (Interval != Begin);
+      
+      // If available for N
+      if (Interval == Begin) {
+        // Success
+        Resource = Res;
+        return true;
+      }
+    }
+    
+    // No luck
+    Resource = 0;
+    return false;
+  }
+  
+  /// RetrySlot - Finds a good candidate slot to retry search.
+  Iter RetrySlot(Iter Begin, unsigned N, unsigned ResourceSet) {
+    assert(N && "Must check availability with N != 0");
+    // Determine end of interval
+    Iter End = Begin + N;
+    assert(End <= Tally.end() && "Tally is not large enough for schedule");
+    
+    while (Begin != End--) {
+      // Clear units in use
+      ResourceSet &= ~*End;
+      // If no units left then we should go no further 
+      if (!ResourceSet) return End + 1;
+    }
+    // Made it all the way through
+    return Begin;
+  }
+  
+  /// FindAndReserveStages - Return true if the stages can be completed. If
+  /// so mark as busy.
+  bool FindAndReserveStages(Iter Begin,
+                            InstrStage *Stage, InstrStage *StageEnd) {
+    // If at last stage then we're done
+    if (Stage == StageEnd) return true;
+    // Get number of cycles for current stage
+    unsigned N = Stage->Cycles;
+    // Check to see if N slots are available, if not fail
+    unsigned Resource;
+    if (!SlotsAvailable(Begin, N, Stage->Units, Resource)) return false;
+    // Check to see if remaining stages are available, if not fail
+    if (!FindAndReserveStages(Begin + N, Stage + 1, StageEnd)) return false;
+    // Reserve resource
+    Reserve(Begin, N, Resource);
+    // Success
+    return true;
+  }
+
+  /// Reserve - Mark busy (set) the specified N slots.
+  void Reserve(Iter Begin, unsigned N, unsigned Resource) {
+    // Determine end of interval
+    Iter End = Begin + N;
+    assert(End <= Tally.end() && "Tally is not large enough for schedule");
+ 
+    // Set resource bit in each slot
+    for (; Begin < End; Begin++)
+      *Begin |= Resource;
+  }
+
+  /// FindSlots - Starting from Begin, locate consecutive slots where all stages
+  /// can be completed.  Returns the address of first slot.
+  Iter FindSlots(Iter Begin, InstrStage *StageBegin, InstrStage *StageEnd) {
+    // Track position      
+    Iter Cursor = Begin;
+    
+    // Try all possible slots forward
+    while (true) {
+      // Try at cursor, if successful return position.
+      if (FindAndReserveStages(Cursor, StageBegin, StageEnd)) return Cursor;
+      // Locate a better position
+      Cursor = RetrySlot(Cursor + 1, StageBegin->Cycles, StageBegin->Units);
+    }
+  }
+  
+public:
+  /// Initialize - Resize and zero the tally to the specified number of time
+  /// slots.
+  inline void Initialize(unsigned N) {
+    Tally.assign(N, 0);   // Initialize tally to all zeros.
+  }
+
+  // FindAndReserve - Locate an ideal slot for the specified stages and mark
+  // as busy.
+  unsigned FindAndReserve(unsigned Slot, InstrStage *StageBegin,
+                          InstrStage *StageEnd) {
+    // Where to begin 
+    Iter Begin = Tally.begin() + Slot;
+    // Find a free slot
+    Iter Where = FindSlots(Begin, StageBegin, StageEnd);
+    // Distance is slot number
+    unsigned Final = Where - Tally.begin();
+    return Final;
+  }
+
+};
+
+//===----------------------------------------------------------------------===//
+///
+/// ScheduleDAGSimple - Simple two pass scheduler.
+///
+class VISIBILITY_HIDDEN ScheduleDAGSimple : public ScheduleDAG {
+private:
+  bool NoSched;                         // Just do a BFS schedule, nothing fancy
+  bool NoItins;                         // Don't use itineraries?
+  ResourceTally<unsigned> Tally;        // Resource usage tally
+  unsigned NSlots;                      // Total latency
+  static const unsigned NotFound = ~0U; // Search marker
+
+  unsigned NodeCount;                   // Number of nodes in DAG
+  std::map<SDNode *, NodeInfo *> Map;   // Map nodes to info
+  bool HasGroups;                       // True if there are any groups
+  NodeInfo *Info;                       // Info for nodes being scheduled
+  NIVector Ordering;                    // Emit ordering of nodes
+  NodeGroup *HeadNG, *TailNG;           // Keep track of allocated NodeGroups
+  
+public:
+
+  // Ctor.
+  ScheduleDAGSimple(bool noSched, bool noItins, SelectionDAG &dag,
+                    MachineBasicBlock *bb, const TargetMachine &tm)
+    : ScheduleDAG(dag, bb, tm), NoSched(noSched), NoItins(noItins), NSlots(0),
+    NodeCount(0), HasGroups(false), Info(NULL), HeadNG(NULL), TailNG(NULL) {
+    assert(&TII && "Target doesn't provide instr info?");
+    assert(&MRI && "Target doesn't provide register info?");
+  }
+
+  virtual ~ScheduleDAGSimple() {
+    if (Info)
+      delete[] Info;
+    
+    NodeGroup *NG = HeadNG;
+    while (NG) {
+      NodeGroup *NextSU = NG->Next;
+      delete NG;
+      NG = NextSU;
+    }
+  }
+
+  void Schedule();
+
+  /// getNI - Returns the node info for the specified node.
+  ///
+  NodeInfo *getNI(SDNode *Node) { return Map[Node]; }
+  
+private:
+  static bool isDefiner(NodeInfo *A, NodeInfo *B);
+  void IncludeNode(NodeInfo *NI);
+  void VisitAll();
+  void GatherSchedulingInfo();
+  void FakeGroupDominators(); 
+  bool isStrongDependency(NodeInfo *A, NodeInfo *B);
+  bool isWeakDependency(NodeInfo *A, NodeInfo *B);
+  void ScheduleBackward();
+  void ScheduleForward();
+  
+  void AddToGroup(NodeInfo *D, NodeInfo *U);
+  /// PrepareNodeInfo - Set up the basic minimum node info for scheduling.
+  /// 
+  void PrepareNodeInfo();
+  
+  /// IdentifyGroups - Put flagged nodes into groups.
+  ///
+  void IdentifyGroups();
+  
+  /// print - Print ordering to specified output stream.
+  ///
+  void print(std::ostream &O) const;
+  void print(std::ostream *O) const { if (O) print(*O); }
+  
+  void dump(const char *tag) const;
+  
+  virtual void dump() const;
+  
+  /// EmitAll - Emit all nodes in schedule sorted order.
+  ///
+  void EmitAll();
+
+  /// printNI - Print node info.
+  ///
+  void printNI(std::ostream &O, NodeInfo *NI) const;
+  void printNI(std::ostream *O, NodeInfo *NI) const { if (O) printNI(*O, NI); }
+  
+  /// printChanges - Hilight changes in order caused by scheduling.
+  ///
+  void printChanges(unsigned Index) const;
+};
+
+//===----------------------------------------------------------------------===//
+/// Special case itineraries.
+///
+enum {
+  CallLatency = 40,          // To push calls back in time
+
+  RSInteger   = 0xC0000000,  // Two integer units
+  RSFloat     = 0x30000000,  // Two float units
+  RSLoadStore = 0x0C000000,  // Two load store units
+  RSBranch    = 0x02000000   // One branch unit
+};
+static InstrStage LoadStage  = { 5, RSLoadStore };
+static InstrStage StoreStage = { 2, RSLoadStore };
+static InstrStage IntStage   = { 2, RSInteger };
+static InstrStage FloatStage = { 3, RSFloat };
+//===----------------------------------------------------------------------===//
+
+} // namespace
+
+//===----------------------------------------------------------------------===//
+
+/// PrepareNodeInfo - Set up the basic minimum node info for scheduling.
+/// 
+void ScheduleDAGSimple::PrepareNodeInfo() {
+  // Allocate node information
+  Info = new NodeInfo[NodeCount];
+  
+  unsigned i = 0;
+  for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+       E = DAG.allnodes_end(); I != E; ++I, ++i) {
+    // Fast reference to node schedule info
+    NodeInfo* NI = &Info[i];
+    // Set up map
+    Map[I] = NI;
+    // Set node
+    NI->Node = I;
+    // Set pending visit count
+    NI->setPending(I->use_size());
+  }
+}
+
+/// IdentifyGroups - Put flagged nodes into groups.
+///
+void ScheduleDAGSimple::IdentifyGroups() {
+  for (unsigned i = 0, N = NodeCount; i < N; i++) {
+    NodeInfo* NI = &Info[i];
+    SDNode *Node = NI->Node;
+    
+    // For each operand (in reverse to only look at flags)
+    for (unsigned N = Node->getNumOperands(); 0 < N--;) {
+      // Get operand
+      SDOperand Op = Node->getOperand(N);
+      // No more flags to walk
+      if (Op.getValueType() != MVT::Flag) break;
+      // Add to node group
+      AddToGroup(getNI(Op.Val), NI);
+      // Let everyone else know
+      HasGroups = true;
+    }
+  }
+}
+
+/// CountInternalUses - Returns the number of edges between the two nodes.
+///
+static unsigned CountInternalUses(NodeInfo *D, NodeInfo *U) {
+  unsigned N = 0;
+  for (unsigned M = U->Node->getNumOperands(); 0 < M--;) {
+    SDOperand Op = U->Node->getOperand(M);
+    if (Op.Val == D->Node) N++;
+  }
+  
+  return N;
+}
+
+//===----------------------------------------------------------------------===//
+/// Add - Adds a definer and user pair to a node group.
+///
+void ScheduleDAGSimple::AddToGroup(NodeInfo *D, NodeInfo *U) {
+  // Get current groups
+  NodeGroup *DGroup = D->Group;
+  NodeGroup *UGroup = U->Group;
+  // If both are members of groups
+  if (DGroup && UGroup) {
+    // There may have been another edge connecting 
+    if (DGroup == UGroup) return;
+    // Add the pending users count
+    DGroup->addPending(UGroup->getPending());
+    // For each member of the users group
+    NodeGroupIterator UNGI(U);
+    while (NodeInfo *UNI = UNGI.next() ) {
+      // Change the group
+      UNI->Group = DGroup;
+      // For each member of the definers group
+      NodeGroupIterator DNGI(D);
+      while (NodeInfo *DNI = DNGI.next() ) {
+        // Remove internal edges
+        DGroup->addPending(-CountInternalUses(DNI, UNI));
+      }
+    }
+    // Merge the two lists
+    DGroup->group_insert(DGroup->group_end(),
+                         UGroup->group_begin(), UGroup->group_end());
+  } else if (DGroup) {
+    // Make user member of definers group
+    U->Group = DGroup;
+    // Add users uses to definers group pending
+    DGroup->addPending(U->Node->use_size());
+    // For each member of the definers group
+    NodeGroupIterator DNGI(D);
+    while (NodeInfo *DNI = DNGI.next() ) {
+      // Remove internal edges
+      DGroup->addPending(-CountInternalUses(DNI, U));
+    }
+    DGroup->group_push_back(U);
+  } else if (UGroup) {
+    // Make definer member of users group
+    D->Group = UGroup;
+    // Add definers uses to users group pending
+    UGroup->addPending(D->Node->use_size());
+    // For each member of the users group
+    NodeGroupIterator UNGI(U);
+    while (NodeInfo *UNI = UNGI.next() ) {
+      // Remove internal edges
+      UGroup->addPending(-CountInternalUses(D, UNI));
+    }
+    UGroup->group_insert(UGroup->group_begin(), D);
+  } else {
+    D->Group = U->Group = DGroup = new NodeGroup();
+    DGroup->addPending(D->Node->use_size() + U->Node->use_size() -
+                       CountInternalUses(D, U));
+    DGroup->group_push_back(D);
+    DGroup->group_push_back(U);
+    
+    if (HeadNG == NULL)
+      HeadNG = DGroup;
+    if (TailNG != NULL)
+      TailNG->Next = DGroup;
+    TailNG = DGroup;
+  }
+}
+
+
+/// print - Print ordering to specified output stream.
+///
+void ScheduleDAGSimple::print(std::ostream &O) const {
+#ifndef NDEBUG
+  O << "Ordering\n";
+  for (unsigned i = 0, N = Ordering.size(); i < N; i++) {
+    NodeInfo *NI = Ordering[i];
+    printNI(O, NI);
+    O << "\n";
+    if (NI->isGroupDominator()) {
+      NodeGroup *Group = NI->Group;
+      for (NIIterator NII = Group->group_begin(), E = Group->group_end();
+           NII != E; NII++) {
+        O << "    ";
+        printNI(O, *NII);
+        O << "\n";
+      }
+    }
+  }
+#endif
+}
+
+void ScheduleDAGSimple::dump(const char *tag) const {
+  cerr << tag; dump();
+}
+
+void ScheduleDAGSimple::dump() const {
+  print(cerr);
+}
+
+
+/// EmitAll - Emit all nodes in schedule sorted order.
+///
+void ScheduleDAGSimple::EmitAll() {
+  // If this is the first basic block in the function, and if it has live ins
+  // that need to be copied into vregs, emit the copies into the top of the
+  // block before emitting the code for the block.
+  MachineFunction &MF = DAG.getMachineFunction();
+  if (&MF.front() == BB && MF.livein_begin() != MF.livein_end()) {
+    for (MachineFunction::livein_iterator LI = MF.livein_begin(),
+         E = MF.livein_end(); LI != E; ++LI)
+      if (LI->second)
+        MRI->copyRegToReg(*MF.begin(), MF.begin()->end(), LI->second,
+                          LI->first, RegMap->getRegClass(LI->second));
+  }
+  
+  DenseMap<SDOperand, unsigned> VRBaseMap;
+  
+  // For each node in the ordering
+  for (unsigned i = 0, N = Ordering.size(); i < N; i++) {
+    // Get the scheduling info
+    NodeInfo *NI = Ordering[i];
+    if (NI->isInGroup()) {
+      NodeGroupIterator NGI(Ordering[i]);
+      while (NodeInfo *NI = NGI.next()) EmitNode(NI->Node, VRBaseMap);
+    } else {
+      EmitNode(NI->Node, VRBaseMap);
+    }
+  }
+}
+
+/// isFlagDefiner - Returns true if the node defines a flag result.
+static bool isFlagDefiner(SDNode *A) {
+  unsigned N = A->getNumValues();
+  return N && A->getValueType(N - 1) == MVT::Flag;
+}
+
+/// isFlagUser - Returns true if the node uses a flag result.
+///
+static bool isFlagUser(SDNode *A) {
+  unsigned N = A->getNumOperands();
+  return N && A->getOperand(N - 1).getValueType() == MVT::Flag;
+}
+
+/// printNI - Print node info.
+///
+void ScheduleDAGSimple::printNI(std::ostream &O, NodeInfo *NI) const {
+#ifndef NDEBUG
+  SDNode *Node = NI->Node;
+  O << " "
+    << std::hex << Node << std::dec
+    << ", Lat=" << NI->Latency
+    << ", Slot=" << NI->Slot
+    << ", ARITY=(" << Node->getNumOperands() << ","
+    << Node->getNumValues() << ")"
+    << " " << Node->getOperationName(&DAG);
+  if (isFlagDefiner(Node)) O << "<#";
+  if (isFlagUser(Node)) O << ">#";
+#endif
+}
+
+/// printChanges - Hilight changes in order caused by scheduling.
+///
+void ScheduleDAGSimple::printChanges(unsigned Index) const {
+#ifndef NDEBUG
+  // Get the ordered node count
+  unsigned N = Ordering.size();
+  // Determine if any changes
+  unsigned i = 0;
+  for (; i < N; i++) {
+    NodeInfo *NI = Ordering[i];
+    if (NI->Preorder != i) break;
+  }
+  
+  if (i < N) {
+    cerr << Index << ". New Ordering\n";
+    
+    for (i = 0; i < N; i++) {
+      NodeInfo *NI = Ordering[i];
+      cerr << "  " << NI->Preorder << ". ";
+      printNI(cerr, NI);
+      cerr << "\n";
+      if (NI->isGroupDominator()) {
+        NodeGroup *Group = NI->Group;
+        for (NIIterator NII = Group->group_begin(), E = Group->group_end();
+             NII != E; NII++) {
+          cerr << "          ";
+          printNI(cerr, *NII);
+          cerr << "\n";
+        }
+      }
+    }
+  } else {
+    cerr << Index << ". No Changes\n";
+  }
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+/// isDefiner - Return true if node A is a definer for B.
+///
+bool ScheduleDAGSimple::isDefiner(NodeInfo *A, NodeInfo *B) {
+  // While there are A nodes
+  NodeGroupIterator NII(A);
+  while (NodeInfo *NI = NII.next()) {
+    // Extract node
+    SDNode *Node = NI->Node;
+    // While there operands in nodes of B
+    NodeGroupOpIterator NGOI(B);
+    while (!NGOI.isEnd()) {
+      SDOperand Op = NGOI.next();
+      // If node from A defines a node in B
+      if (Node == Op.Val) return true;
+    }
+  }
+  return false;
+}
+
+/// IncludeNode - Add node to NodeInfo vector.
+///
+void ScheduleDAGSimple::IncludeNode(NodeInfo *NI) {
+  // Get node
+  SDNode *Node = NI->Node;
+  // Ignore entry node
+  if (Node->getOpcode() == ISD::EntryToken) return;
+  // Check current count for node
+  int Count = NI->getPending();
+  // If the node is already in list
+  if (Count < 0) return;
+  // Decrement count to indicate a visit
+  Count--;
+  // If count has gone to zero then add node to list
+  if (!Count) {
+    // Add node
+    if (NI->isInGroup()) {
+      Ordering.push_back(NI->Group->getDominator());
+    } else {
+      Ordering.push_back(NI);
+    }
+    // indicate node has been added
+    Count--;
+  }
+  // Mark as visited with new count 
+  NI->setPending(Count);
+}
+
+/// GatherSchedulingInfo - Get latency and resource information about each node.
+///
+void ScheduleDAGSimple::GatherSchedulingInfo() {
+  // Get instruction itineraries for the target
+  const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
+  
+  // For each node
+  for (unsigned i = 0, N = NodeCount; i < N; i++) {
+    // Get node info
+    NodeInfo* NI = &Info[i];
+    SDNode *Node = NI->Node;
+    
+    // If there are itineraries and it is a machine instruction
+    if (InstrItins.isEmpty() || NoItins) {
+      // If machine opcode
+      if (Node->isTargetOpcode()) {
+        // Get return type to guess which processing unit 
+        MVT::ValueType VT = Node->getValueType(0);
+        // Get machine opcode
+        MachineOpCode TOpc = Node->getTargetOpcode();
+        NI->IsCall = TII->isCall(TOpc);
+        NI->IsLoad = TII->isLoad(TOpc);
+        NI->IsStore = TII->isStore(TOpc);
+
+        if (TII->isLoad(TOpc))             NI->StageBegin = &LoadStage;
+        else if (TII->isStore(TOpc))       NI->StageBegin = &StoreStage;
+        else if (MVT::isInteger(VT))       NI->StageBegin = &IntStage;
+        else if (MVT::isFloatingPoint(VT)) NI->StageBegin = &FloatStage;
+        if (NI->StageBegin) NI->StageEnd = NI->StageBegin + 1;
+      }
+    } else if (Node->isTargetOpcode()) {
+      // get machine opcode
+      MachineOpCode TOpc = Node->getTargetOpcode();
+      // Check to see if it is a call
+      NI->IsCall = TII->isCall(TOpc);
+      // Get itinerary stages for instruction
+      unsigned II = TII->getSchedClass(TOpc);
+      NI->StageBegin = InstrItins.begin(II);
+      NI->StageEnd = InstrItins.end(II);
+    }
+    
+    // One slot for the instruction itself
+    NI->Latency = 1;
+    
+    // Add long latency for a call to push it back in time
+    if (NI->IsCall) NI->Latency += CallLatency;
+    
+    // Sum up all the latencies
+    for (InstrStage *Stage = NI->StageBegin, *E = NI->StageEnd;
+        Stage != E; Stage++) {
+      NI->Latency += Stage->Cycles;
+    }
+    
+    // Sum up all the latencies for max tally size
+    NSlots += NI->Latency;
+  }
+  
+  // Unify metrics if in a group
+  if (HasGroups) {
+    for (unsigned i = 0, N = NodeCount; i < N; i++) {
+      NodeInfo* NI = &Info[i];
+      
+      if (NI->isInGroup()) {
+        NodeGroup *Group = NI->Group;
+        
+        if (!Group->getDominator()) {
+          NIIterator NGI = Group->group_begin(), NGE = Group->group_end();
+          NodeInfo *Dominator = *NGI;
+          unsigned Latency = 0;
+          
+          for (NGI++; NGI != NGE; NGI++) {
+            NodeInfo* NGNI = *NGI;
+            Latency += NGNI->Latency;
+            if (Dominator->Latency < NGNI->Latency) Dominator = NGNI;
+          }
+          
+          Dominator->Latency = Latency;
+          Group->setDominator(Dominator);
+        }
+      }
+    }
+  }
+}
+
+/// VisitAll - Visit each node breadth-wise to produce an initial ordering.
+/// Note that the ordering in the Nodes vector is reversed.
+void ScheduleDAGSimple::VisitAll() {
+  // Add first element to list
+  NodeInfo *NI = getNI(DAG.getRoot().Val);
+  if (NI->isInGroup()) {
+    Ordering.push_back(NI->Group->getDominator());
+  } else {
+    Ordering.push_back(NI);
+  }
+
+  // Iterate through all nodes that have been added
+  for (unsigned i = 0; i < Ordering.size(); i++) { // note: size() varies
+    // Visit all operands
+    NodeGroupOpIterator NGI(Ordering[i]);
+    while (!NGI.isEnd()) {
+      // Get next operand
+      SDOperand Op = NGI.next();
+      // Get node
+      SDNode *Node = Op.Val;
+      // Ignore passive nodes
+      if (isPassiveNode(Node)) continue;
+      // Check out node
+      IncludeNode(getNI(Node));
+    }
+  }
+
+  // Add entry node last (IncludeNode filters entry nodes)
+  if (DAG.getEntryNode().Val != DAG.getRoot().Val)
+    Ordering.push_back(getNI(DAG.getEntryNode().Val));
+    
+  // Reverse the order
+  std::reverse(Ordering.begin(), Ordering.end());
+}
+
+/// FakeGroupDominators - Set dominators for non-scheduling.
+/// 
+void ScheduleDAGSimple::FakeGroupDominators() {
+  for (unsigned i = 0, N = NodeCount; i < N; i++) {
+    NodeInfo* NI = &Info[i];
+    
+    if (NI->isInGroup()) {
+      NodeGroup *Group = NI->Group;
+      
+      if (!Group->getDominator()) {
+        Group->setDominator(NI);
+      }
+    }
+  }
+}
+
+/// isStrongDependency - Return true if node A has results used by node B. 
+/// I.E., B must wait for latency of A.
+bool ScheduleDAGSimple::isStrongDependency(NodeInfo *A, NodeInfo *B) {
+  // If A defines for B then it's a strong dependency or
+  // if a load follows a store (may be dependent but why take a chance.)
+  return isDefiner(A, B) || (A->IsStore && B->IsLoad);
+}
+
+/// isWeakDependency Return true if node A produces a result that will
+/// conflict with operands of B.  It is assumed that we have called
+/// isStrongDependency prior.
+bool ScheduleDAGSimple::isWeakDependency(NodeInfo *A, NodeInfo *B) {
+  // TODO check for conflicting real registers and aliases
+#if 0 // FIXME - Since we are in SSA form and not checking register aliasing
+  return A->Node->getOpcode() == ISD::EntryToken || isStrongDependency(B, A);
+#else
+  return A->Node->getOpcode() == ISD::EntryToken;
+#endif
+}
+
+/// ScheduleBackward - Schedule instructions so that any long latency
+/// instructions and the critical path get pushed back in time. Time is run in
+/// reverse to allow code reuse of the Tally and eliminate the overhead of
+/// biasing every slot indices against NSlots.
+void ScheduleDAGSimple::ScheduleBackward() {
+  // Size and clear the resource tally
+  Tally.Initialize(NSlots);
+  // Get number of nodes to schedule
+  unsigned N = Ordering.size();
+  
+  // For each node being scheduled
+  for (unsigned i = N; 0 < i--;) {
+    NodeInfo *NI = Ordering[i];
+    // Track insertion
+    unsigned Slot = NotFound;
+    
+    // Compare against those previously scheduled nodes
+    unsigned j = i + 1;
+    for (; j < N; j++) {
+      // Get following instruction
+      NodeInfo *Other = Ordering[j];
+      
+      // Check dependency against previously inserted nodes
+      if (isStrongDependency(NI, Other)) {
+        Slot = Other->Slot + Other->Latency;
+        break;
+      } else if (isWeakDependency(NI, Other)) {
+        Slot = Other->Slot;
+        break;
+      }
+    }
+    
+    // If independent of others (or first entry)
+    if (Slot == NotFound) Slot = 0;
+    
+#if 0 // FIXME - measure later
+    // Find a slot where the needed resources are available
+    if (NI->StageBegin != NI->StageEnd)
+      Slot = Tally.FindAndReserve(Slot, NI->StageBegin, NI->StageEnd);
+#endif
+      
+    // Set node slot
+    NI->Slot = Slot;
+    
+    // Insert sort based on slot
+    j = i + 1;
+    for (; j < N; j++) {
+      // Get following instruction
+      NodeInfo *Other = Ordering[j];
+      // Should we look further (remember slots are in reverse time)
+      if (Slot >= Other->Slot) break;
+      // Shuffle other into ordering
+      Ordering[j - 1] = Other;
+    }
+    // Insert node in proper slot
+    if (j != i + 1) Ordering[j - 1] = NI;
+  }
+}
+
+/// ScheduleForward - Schedule instructions to maximize packing.
+///
+void ScheduleDAGSimple::ScheduleForward() {
+  // Size and clear the resource tally
+  Tally.Initialize(NSlots);
+  // Get number of nodes to schedule
+  unsigned N = Ordering.size();
+  
+  // For each node being scheduled
+  for (unsigned i = 0; i < N; i++) {
+    NodeInfo *NI = Ordering[i];
+    // Track insertion
+    unsigned Slot = NotFound;
+    
+    // Compare against those previously scheduled nodes
+    unsigned j = i;
+    for (; 0 < j--;) {
+      // Get following instruction
+      NodeInfo *Other = Ordering[j];
+      
+      // Check dependency against previously inserted nodes
+      if (isStrongDependency(Other, NI)) {
+        Slot = Other->Slot + Other->Latency;
+        break;
+      } else if (Other->IsCall || isWeakDependency(Other, NI)) {
+        Slot = Other->Slot;
+        break;
+      }
+    }
+    
+    // If independent of others (or first entry)
+    if (Slot == NotFound) Slot = 0;
+    
+    // Find a slot where the needed resources are available
+    if (NI->StageBegin != NI->StageEnd)
+      Slot = Tally.FindAndReserve(Slot, NI->StageBegin, NI->StageEnd);
+      
+    // Set node slot
+    NI->Slot = Slot;
+    
+    // Insert sort based on slot
+    j = i;
+    for (; 0 < j--;) {
+      // Get prior instruction
+      NodeInfo *Other = Ordering[j];
+      // Should we look further
+      if (Slot >= Other->Slot) break;
+      // Shuffle other into ordering
+      Ordering[j + 1] = Other;
+    }
+    // Insert node in proper slot
+    if (j != i) Ordering[j + 1] = NI;
+  }
+}
+
+/// Schedule - Order nodes according to selected style.
+///
+void ScheduleDAGSimple::Schedule() {
+  // Number the nodes
+  NodeCount = std::distance(DAG.allnodes_begin(), DAG.allnodes_end());
+
+  // Set up minimum info for scheduling
+  PrepareNodeInfo();
+  // Construct node groups for flagged nodes
+  IdentifyGroups();
+  
+  // Test to see if scheduling should occur
+  bool ShouldSchedule = NodeCount > 3 && !NoSched;
+  // Don't waste time if is only entry and return
+  if (ShouldSchedule) {
+    // Get latency and resource requirements
+    GatherSchedulingInfo();
+  } else if (HasGroups) {
+    // Make sure all the groups have dominators
+    FakeGroupDominators();
+  }
+
+  // Breadth first walk of DAG
+  VisitAll();
+
+#ifndef NDEBUG
+  static unsigned Count = 0;
+  Count++;
+  for (unsigned i = 0, N = Ordering.size(); i < N; i++) {
+    NodeInfo *NI = Ordering[i];
+    NI->Preorder = i;
+  }
+#endif  
+  
+  // Don't waste time if is only entry and return
+  if (ShouldSchedule) {
+    // Push back long instructions and critical path
+    ScheduleBackward();
+    
+    // Pack instructions to maximize resource utilization
+    ScheduleForward();
+  }
+  
+  DEBUG(printChanges(Count));
+  
+  // Emit in scheduled order
+  EmitAll();
+}
+
+
+/// createSimpleDAGScheduler - This creates a simple two pass instruction
+/// scheduler using instruction itinerary.
+llvm::ScheduleDAG* llvm::createSimpleDAGScheduler(SelectionDAGISel *IS,
+                                                  SelectionDAG *DAG,
+                                                  MachineBasicBlock *BB) {
+  return new ScheduleDAGSimple(false, false, *DAG, BB, DAG->getTarget());
+}
+
+/// createNoItinsDAGScheduler - This creates a simple two pass instruction
+/// scheduler without using instruction itinerary.
+llvm::ScheduleDAG* llvm::createNoItinsDAGScheduler(SelectionDAGISel *IS,
+                                                   SelectionDAG *DAG,
+                                                   MachineBasicBlock *BB) {
+  return new ScheduleDAGSimple(false, true, *DAG, BB, DAG->getTarget());
+}
+
+/// createBFS_DAGScheduler - This creates a simple breadth first instruction
+/// scheduler.
+llvm::ScheduleDAG* llvm::createBFS_DAGScheduler(SelectionDAGISel *IS,
+                                                SelectionDAG *DAG,
+                                                MachineBasicBlock *BB) {
+  return new ScheduleDAGSimple(true, false, *DAG, BB,  DAG->getTarget());
+}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
new file mode 100644
index 0000000..9803ab8
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -0,0 +1,3694 @@
+//===-- SelectionDAG.cpp - Implement the SelectionDAG data structures -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAG class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Constants.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/MRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include <algorithm>
+#include <cmath>
+using namespace llvm;
+
+/// makeVTList - Return an instance of the SDVTList struct initialized with the
+/// specified members.
+static SDVTList makeVTList(const MVT::ValueType *VTs, unsigned NumVTs) {
+  SDVTList Res = {VTs, NumVTs};
+  return Res;
+}
+
+//===----------------------------------------------------------------------===//
+//                              ConstantFPSDNode Class
+//===----------------------------------------------------------------------===//
+
+/// isExactlyValue - We don't rely on operator== working on double values, as
+/// it returns true for things that are clearly not equal, like -0.0 and 0.0.
+/// As such, this method can be used to do an exact bit-for-bit comparison of
+/// two floating point values.
+bool ConstantFPSDNode::isExactlyValue(double V) const {
+  return DoubleToBits(V) == DoubleToBits(Value);
+}
+
+//===----------------------------------------------------------------------===//
+//                              ISD Namespace
+//===----------------------------------------------------------------------===//
+
+/// isBuildVectorAllOnes - Return true if the specified node is a
+/// BUILD_VECTOR where all of the elements are ~0 or undef.
+bool ISD::isBuildVectorAllOnes(const SDNode *N) {
+  // Look through a bit convert.
+  if (N->getOpcode() == ISD::BIT_CONVERT)
+    N = N->getOperand(0).Val;
+  
+  if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
+  
+  unsigned i = 0, e = N->getNumOperands();
+  
+  // Skip over all of the undef values.
+  while (i != e && N->getOperand(i).getOpcode() == ISD::UNDEF)
+    ++i;
+  
+  // Do not accept an all-undef vector.
+  if (i == e) return false;
+  
+  // Do not accept build_vectors that aren't all constants or which have non-~0
+  // elements.
+  SDOperand NotZero = N->getOperand(i);
+  if (isa<ConstantSDNode>(NotZero)) {
+    if (!cast<ConstantSDNode>(NotZero)->isAllOnesValue())
+      return false;
+  } else if (isa<ConstantFPSDNode>(NotZero)) {
+    MVT::ValueType VT = NotZero.getValueType();
+    if (VT== MVT::f64) {
+      if (DoubleToBits(cast<ConstantFPSDNode>(NotZero)->getValue()) !=
+          (uint64_t)-1)
+        return false;
+    } else {
+      if (FloatToBits(cast<ConstantFPSDNode>(NotZero)->getValue()) !=
+          (uint32_t)-1)
+        return false;
+    }
+  } else
+    return false;
+  
+  // Okay, we have at least one ~0 value, check to see if the rest match or are
+  // undefs.
+  for (++i; i != e; ++i)
+    if (N->getOperand(i) != NotZero &&
+        N->getOperand(i).getOpcode() != ISD::UNDEF)
+      return false;
+  return true;
+}
+
+
+/// isBuildVectorAllZeros - Return true if the specified node is a
+/// BUILD_VECTOR where all of the elements are 0 or undef.
+bool ISD::isBuildVectorAllZeros(const SDNode *N) {
+  // Look through a bit convert.
+  if (N->getOpcode() == ISD::BIT_CONVERT)
+    N = N->getOperand(0).Val;
+  
+  if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
+  
+  unsigned i = 0, e = N->getNumOperands();
+  
+  // Skip over all of the undef values.
+  while (i != e && N->getOperand(i).getOpcode() == ISD::UNDEF)
+    ++i;
+  
+  // Do not accept an all-undef vector.
+  if (i == e) return false;
+  
+  // Do not accept build_vectors that aren't all constants or which have non-~0
+  // elements.
+  SDOperand Zero = N->getOperand(i);
+  if (isa<ConstantSDNode>(Zero)) {
+    if (!cast<ConstantSDNode>(Zero)->isNullValue())
+      return false;
+  } else if (isa<ConstantFPSDNode>(Zero)) {
+    if (!cast<ConstantFPSDNode>(Zero)->isExactlyValue(0.0))
+      return false;
+  } else
+    return false;
+  
+  // Okay, we have at least one ~0 value, check to see if the rest match or are
+  // undefs.
+  for (++i; i != e; ++i)
+    if (N->getOperand(i) != Zero &&
+        N->getOperand(i).getOpcode() != ISD::UNDEF)
+      return false;
+  return true;
+}
+
+/// getSetCCSwappedOperands - Return the operation corresponding to (Y op X)
+/// when given the operation for (X op Y).
+ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) {
+  // To perform this operation, we just need to swap the L and G bits of the
+  // operation.
+  unsigned OldL = (Operation >> 2) & 1;
+  unsigned OldG = (Operation >> 1) & 1;
+  return ISD::CondCode((Operation & ~6) |  // Keep the N, U, E bits
+                       (OldL << 1) |       // New G bit
+                       (OldG << 2));        // New L bit.
+}
+
+/// getSetCCInverse - Return the operation corresponding to !(X op Y), where
+/// 'op' is a valid SetCC operation.
+ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) {
+  unsigned Operation = Op;
+  if (isInteger)
+    Operation ^= 7;   // Flip L, G, E bits, but not U.
+  else
+    Operation ^= 15;  // Flip all of the condition bits.
+  if (Operation > ISD::SETTRUE2)
+    Operation &= ~8;     // Don't let N and U bits get set.
+  return ISD::CondCode(Operation);
+}
+
+
+/// isSignedOp - For an integer comparison, return 1 if the comparison is a
+/// signed operation and 2 if the result is an unsigned comparison.  Return zero
+/// if the operation does not depend on the sign of the input (setne and seteq).
+static int isSignedOp(ISD::CondCode Opcode) {
+  switch (Opcode) {
+  default: assert(0 && "Illegal integer setcc operation!");
+  case ISD::SETEQ:
+  case ISD::SETNE: return 0;
+  case ISD::SETLT:
+  case ISD::SETLE:
+  case ISD::SETGT:
+  case ISD::SETGE: return 1;
+  case ISD::SETULT:
+  case ISD::SETULE:
+  case ISD::SETUGT:
+  case ISD::SETUGE: return 2;
+  }
+}
+
+/// getSetCCOrOperation - Return the result of a logical OR between different
+/// comparisons of identical values: ((X op1 Y) | (X op2 Y)).  This function
+/// returns SETCC_INVALID if it is not possible to represent the resultant
+/// comparison.
+ISD::CondCode ISD::getSetCCOrOperation(ISD::CondCode Op1, ISD::CondCode Op2,
+                                       bool isInteger) {
+  if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
+    // Cannot fold a signed integer setcc with an unsigned integer setcc.
+    return ISD::SETCC_INVALID;
+
+  unsigned Op = Op1 | Op2;  // Combine all of the condition bits.
+
+  // If the N and U bits get set then the resultant comparison DOES suddenly
+  // care about orderedness, and is true when ordered.
+  if (Op > ISD::SETTRUE2)
+    Op &= ~16;     // Clear the U bit if the N bit is set.
+  
+  // Canonicalize illegal integer setcc's.
+  if (isInteger && Op == ISD::SETUNE)  // e.g. SETUGT | SETULT
+    Op = ISD::SETNE;
+  
+  return ISD::CondCode(Op);
+}
+
+/// getSetCCAndOperation - Return the result of a logical AND between different
+/// comparisons of identical values: ((X op1 Y) & (X op2 Y)).  This
+/// function returns zero if it is not possible to represent the resultant
+/// comparison.
+ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2,
+                                        bool isInteger) {
+  if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
+    // Cannot fold a signed setcc with an unsigned setcc.
+    return ISD::SETCC_INVALID;
+
+  // Combine all of the condition bits.
+  ISD::CondCode Result = ISD::CondCode(Op1 & Op2);
+  
+  // Canonicalize illegal integer setcc's.
+  if (isInteger) {
+    switch (Result) {
+    default: break;
+    case ISD::SETUO : Result = ISD::SETFALSE; break;  // SETUGT & SETULT
+    case ISD::SETUEQ: Result = ISD::SETEQ   ; break;  // SETUGE & SETULE
+    case ISD::SETOLT: Result = ISD::SETULT  ; break;  // SETULT & SETNE
+    case ISD::SETOGT: Result = ISD::SETUGT  ; break;  // SETUGT & SETNE
+    }
+  }
+  
+  return Result;
+}
+
+const TargetMachine &SelectionDAG::getTarget() const {
+  return TLI.getTargetMachine();
+}
+
+//===----------------------------------------------------------------------===//
+//                           SDNode Profile Support
+//===----------------------------------------------------------------------===//
+
+/// AddNodeIDOpcode - Add the node opcode to the NodeID data.
+///
+static void AddNodeIDOpcode(FoldingSetNodeID &ID, unsigned OpC)  {
+  ID.AddInteger(OpC);
+}
+
+/// AddNodeIDValueTypes - Value type lists are intern'd so we can represent them
+/// solely with their pointer.
+void AddNodeIDValueTypes(FoldingSetNodeID &ID, SDVTList VTList) {
+  ID.AddPointer(VTList.VTs);  
+}
+
+/// AddNodeIDOperands - Various routines for adding operands to the NodeID data.
+///
+static void AddNodeIDOperands(FoldingSetNodeID &ID,
+                              const SDOperand *Ops, unsigned NumOps) {
+  for (; NumOps; --NumOps, ++Ops) {
+    ID.AddPointer(Ops->Val);
+    ID.AddInteger(Ops->ResNo);
+  }
+}
+
+static void AddNodeIDNode(FoldingSetNodeID &ID,
+                          unsigned short OpC, SDVTList VTList, 
+                          const SDOperand *OpList, unsigned N) {
+  AddNodeIDOpcode(ID, OpC);
+  AddNodeIDValueTypes(ID, VTList);
+  AddNodeIDOperands(ID, OpList, N);
+}
+
+/// AddNodeIDNode - Generic routine for adding a nodes info to the NodeID
+/// data.
+static void AddNodeIDNode(FoldingSetNodeID &ID, SDNode *N) {
+  AddNodeIDOpcode(ID, N->getOpcode());
+  // Add the return value info.
+  AddNodeIDValueTypes(ID, N->getVTList());
+  // Add the operand info.
+  AddNodeIDOperands(ID, N->op_begin(), N->getNumOperands());
+
+  // Handle SDNode leafs with special info.
+  switch (N->getOpcode()) {
+  default: break;  // Normal nodes don't need extra info.
+  case ISD::TargetConstant:
+  case ISD::Constant:
+    ID.AddInteger(cast<ConstantSDNode>(N)->getValue());
+    break;
+  case ISD::TargetConstantFP:
+  case ISD::ConstantFP:
+    ID.AddDouble(cast<ConstantFPSDNode>(N)->getValue());
+    break;
+  case ISD::TargetGlobalAddress:
+  case ISD::GlobalAddress:
+  case ISD::TargetGlobalTLSAddress:
+  case ISD::GlobalTLSAddress: {
+    GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
+    ID.AddPointer(GA->getGlobal());
+    ID.AddInteger(GA->getOffset());
+    break;
+  }
+  case ISD::BasicBlock:
+    ID.AddPointer(cast<BasicBlockSDNode>(N)->getBasicBlock());
+    break;
+  case ISD::Register:
+    ID.AddInteger(cast<RegisterSDNode>(N)->getReg());
+    break;
+  case ISD::SRCVALUE: {
+    SrcValueSDNode *SV = cast<SrcValueSDNode>(N);
+    ID.AddPointer(SV->getValue());
+    ID.AddInteger(SV->getOffset());
+    break;
+  }
+  case ISD::FrameIndex:
+  case ISD::TargetFrameIndex:
+    ID.AddInteger(cast<FrameIndexSDNode>(N)->getIndex());
+    break;
+  case ISD::JumpTable:
+  case ISD::TargetJumpTable:
+    ID.AddInteger(cast<JumpTableSDNode>(N)->getIndex());
+    break;
+  case ISD::ConstantPool:
+  case ISD::TargetConstantPool: {
+    ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(N);
+    ID.AddInteger(CP->getAlignment());
+    ID.AddInteger(CP->getOffset());
+    if (CP->isMachineConstantPoolEntry())
+      CP->getMachineCPVal()->AddSelectionDAGCSEId(ID);
+    else
+      ID.AddPointer(CP->getConstVal());
+    break;
+  }
+  case ISD::LOAD: {
+    LoadSDNode *LD = cast<LoadSDNode>(N);
+    ID.AddInteger(LD->getAddressingMode());
+    ID.AddInteger(LD->getExtensionType());
+    ID.AddInteger(LD->getLoadedVT());
+    ID.AddPointer(LD->getSrcValue());
+    ID.AddInteger(LD->getSrcValueOffset());
+    ID.AddInteger(LD->getAlignment());
+    ID.AddInteger(LD->isVolatile());
+    break;
+  }
+  case ISD::STORE: {
+    StoreSDNode *ST = cast<StoreSDNode>(N);
+    ID.AddInteger(ST->getAddressingMode());
+    ID.AddInteger(ST->isTruncatingStore());
+    ID.AddInteger(ST->getStoredVT());
+    ID.AddPointer(ST->getSrcValue());
+    ID.AddInteger(ST->getSrcValueOffset());
+    ID.AddInteger(ST->getAlignment());
+    ID.AddInteger(ST->isVolatile());
+    break;
+  }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//                              SelectionDAG Class
+//===----------------------------------------------------------------------===//
+
+/// RemoveDeadNodes - This method deletes all unreachable nodes in the
+/// SelectionDAG.
+void SelectionDAG::RemoveDeadNodes() {
+  // Create a dummy node (which is not added to allnodes), that adds a reference
+  // to the root node, preventing it from being deleted.
+  HandleSDNode Dummy(getRoot());
+
+  SmallVector<SDNode*, 128> DeadNodes;
+  
+  // Add all obviously-dead nodes to the DeadNodes worklist.
+  for (allnodes_iterator I = allnodes_begin(), E = allnodes_end(); I != E; ++I)
+    if (I->use_empty())
+      DeadNodes.push_back(I);
+
+  // Process the worklist, deleting the nodes and adding their uses to the
+  // worklist.
+  while (!DeadNodes.empty()) {
+    SDNode *N = DeadNodes.back();
+    DeadNodes.pop_back();
+    
+    // Take the node out of the appropriate CSE map.
+    RemoveNodeFromCSEMaps(N);
+
+    // Next, brutally remove the operand list.  This is safe to do, as there are
+    // no cycles in the graph.
+    for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I) {
+      SDNode *Operand = I->Val;
+      Operand->removeUser(N);
+      
+      // Now that we removed this operand, see if there are no uses of it left.
+      if (Operand->use_empty())
+        DeadNodes.push_back(Operand);
+    }
+    if (N->OperandsNeedDelete)
+      delete[] N->OperandList;
+    N->OperandList = 0;
+    N->NumOperands = 0;
+    
+    // Finally, remove N itself.
+    AllNodes.erase(N);
+  }
+  
+  // If the root changed (e.g. it was a dead load, update the root).
+  setRoot(Dummy.getValue());
+}
+
+void SelectionDAG::RemoveDeadNode(SDNode *N, std::vector<SDNode*> &Deleted) {
+  SmallVector<SDNode*, 16> DeadNodes;
+  DeadNodes.push_back(N);
+
+  // Process the worklist, deleting the nodes and adding their uses to the
+  // worklist.
+  while (!DeadNodes.empty()) {
+    SDNode *N = DeadNodes.back();
+    DeadNodes.pop_back();
+    
+    // Take the node out of the appropriate CSE map.
+    RemoveNodeFromCSEMaps(N);
+
+    // Next, brutally remove the operand list.  This is safe to do, as there are
+    // no cycles in the graph.
+    for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I) {
+      SDNode *Operand = I->Val;
+      Operand->removeUser(N);
+      
+      // Now that we removed this operand, see if there are no uses of it left.
+      if (Operand->use_empty())
+        DeadNodes.push_back(Operand);
+    }
+    if (N->OperandsNeedDelete)
+      delete[] N->OperandList;
+    N->OperandList = 0;
+    N->NumOperands = 0;
+    
+    // Finally, remove N itself.
+    Deleted.push_back(N);
+    AllNodes.erase(N);
+  }
+}
+
+void SelectionDAG::DeleteNode(SDNode *N) {
+  assert(N->use_empty() && "Cannot delete a node that is not dead!");
+
+  // First take this out of the appropriate CSE map.
+  RemoveNodeFromCSEMaps(N);
+
+  // Finally, remove uses due to operands of this node, remove from the 
+  // AllNodes list, and delete the node.
+  DeleteNodeNotInCSEMaps(N);
+}
+
+void SelectionDAG::DeleteNodeNotInCSEMaps(SDNode *N) {
+
+  // Remove it from the AllNodes list.
+  AllNodes.remove(N);
+    
+  // Drop all of the operands and decrement used nodes use counts.
+  for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I)
+    I->Val->removeUser(N);
+  if (N->OperandsNeedDelete)
+    delete[] N->OperandList;
+  N->OperandList = 0;
+  N->NumOperands = 0;
+  
+  delete N;
+}
+
+/// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that
+/// correspond to it.  This is useful when we're about to delete or repurpose
+/// the node.  We don't want future request for structurally identical nodes
+/// to return N anymore.
+void SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
+  bool Erased = false;
+  switch (N->getOpcode()) {
+  case ISD::HANDLENODE: return;  // noop.
+  case ISD::STRING:
+    Erased = StringNodes.erase(cast<StringSDNode>(N)->getValue());
+    break;
+  case ISD::CONDCODE:
+    assert(CondCodeNodes[cast<CondCodeSDNode>(N)->get()] &&
+           "Cond code doesn't exist!");
+    Erased = CondCodeNodes[cast<CondCodeSDNode>(N)->get()] != 0;
+    CondCodeNodes[cast<CondCodeSDNode>(N)->get()] = 0;
+    break;
+  case ISD::ExternalSymbol:
+    Erased = ExternalSymbols.erase(cast<ExternalSymbolSDNode>(N)->getSymbol());
+    break;
+  case ISD::TargetExternalSymbol:
+    Erased =
+      TargetExternalSymbols.erase(cast<ExternalSymbolSDNode>(N)->getSymbol());
+    break;
+  case ISD::VALUETYPE:
+    Erased = ValueTypeNodes[cast<VTSDNode>(N)->getVT()] != 0;
+    ValueTypeNodes[cast<VTSDNode>(N)->getVT()] = 0;
+    break;
+  default:
+    // Remove it from the CSE Map.
+    Erased = CSEMap.RemoveNode(N);
+    break;
+  }
+#ifndef NDEBUG
+  // Verify that the node was actually in one of the CSE maps, unless it has a 
+  // flag result (which cannot be CSE'd) or is one of the special cases that are
+  // not subject to CSE.
+  if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Flag &&
+      !N->isTargetOpcode()) {
+    N->dump(this);
+    cerr << "\n";
+    assert(0 && "Node is not in map!");
+  }
+#endif
+}
+
+/// AddNonLeafNodeToCSEMaps - Add the specified node back to the CSE maps.  It
+/// has been taken out and modified in some way.  If the specified node already
+/// exists in the CSE maps, do not modify the maps, but return the existing node
+/// instead.  If it doesn't exist, add it and return null.
+///
+SDNode *SelectionDAG::AddNonLeafNodeToCSEMaps(SDNode *N) {
+  assert(N->getNumOperands() && "This is a leaf node!");
+  if (N->getOpcode() == ISD::HANDLENODE || N->getValueType(0) == MVT::Flag)
+    return 0;    // Never add these nodes.
+  
+  // Check that remaining values produced are not flags.
+  for (unsigned i = 1, e = N->getNumValues(); i != e; ++i)
+    if (N->getValueType(i) == MVT::Flag)
+      return 0;   // Never CSE anything that produces a flag.
+  
+  SDNode *New = CSEMap.GetOrInsertNode(N);
+  if (New != N) return New;  // Node already existed.
+  return 0;
+}
+
+/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
+/// were replaced with those specified.  If this node is never memoized, 
+/// return null, otherwise return a pointer to the slot it would take.  If a
+/// node already exists with these operands, the slot will be non-null.
+SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDOperand Op,
+                                           void *&InsertPos) {
+  if (N->getOpcode() == ISD::HANDLENODE || N->getValueType(0) == MVT::Flag)
+    return 0;    // Never add these nodes.
+  
+  // Check that remaining values produced are not flags.
+  for (unsigned i = 1, e = N->getNumValues(); i != e; ++i)
+    if (N->getValueType(i) == MVT::Flag)
+      return 0;   // Never CSE anything that produces a flag.
+  
+  SDOperand Ops[] = { Op };
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 1);
+  return CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+}
+
+/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
+/// were replaced with those specified.  If this node is never memoized, 
+/// return null, otherwise return a pointer to the slot it would take.  If a
+/// node already exists with these operands, the slot will be non-null.
+SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, 
+                                           SDOperand Op1, SDOperand Op2,
+                                           void *&InsertPos) {
+  if (N->getOpcode() == ISD::HANDLENODE || N->getValueType(0) == MVT::Flag)
+    return 0;    // Never add these nodes.
+  
+  // Check that remaining values produced are not flags.
+  for (unsigned i = 1, e = N->getNumValues(); i != e; ++i)
+    if (N->getValueType(i) == MVT::Flag)
+      return 0;   // Never CSE anything that produces a flag.
+                                              
+  SDOperand Ops[] = { Op1, Op2 };
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 2);
+  return CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+}
+
+
+/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
+/// were replaced with those specified.  If this node is never memoized, 
+/// return null, otherwise return a pointer to the slot it would take.  If a
+/// node already exists with these operands, the slot will be non-null.
+SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, 
+                                           const SDOperand *Ops,unsigned NumOps,
+                                           void *&InsertPos) {
+  if (N->getOpcode() == ISD::HANDLENODE || N->getValueType(0) == MVT::Flag)
+    return 0;    // Never add these nodes.
+  
+  // Check that remaining values produced are not flags.
+  for (unsigned i = 1, e = N->getNumValues(); i != e; ++i)
+    if (N->getValueType(i) == MVT::Flag)
+      return 0;   // Never CSE anything that produces a flag.
+  
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, NumOps);
+  
+  if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+    ID.AddInteger(LD->getAddressingMode());
+    ID.AddInteger(LD->getExtensionType());
+    ID.AddInteger(LD->getLoadedVT());
+    ID.AddPointer(LD->getSrcValue());
+    ID.AddInteger(LD->getSrcValueOffset());
+    ID.AddInteger(LD->getAlignment());
+    ID.AddInteger(LD->isVolatile());
+  } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+    ID.AddInteger(ST->getAddressingMode());
+    ID.AddInteger(ST->isTruncatingStore());
+    ID.AddInteger(ST->getStoredVT());
+    ID.AddPointer(ST->getSrcValue());
+    ID.AddInteger(ST->getSrcValueOffset());
+    ID.AddInteger(ST->getAlignment());
+    ID.AddInteger(ST->isVolatile());
+  }
+  
+  return CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+}
+
+
+SelectionDAG::~SelectionDAG() {
+  while (!AllNodes.empty()) {
+    SDNode *N = AllNodes.begin();
+    N->SetNextInBucket(0);
+    if (N->OperandsNeedDelete)
+      delete [] N->OperandList;
+    N->OperandList = 0;
+    N->NumOperands = 0;
+    AllNodes.pop_front();
+  }
+}
+
+SDOperand SelectionDAG::getZeroExtendInReg(SDOperand Op, MVT::ValueType VT) {
+  if (Op.getValueType() == VT) return Op;
+  int64_t Imm = ~0ULL >> (64-MVT::getSizeInBits(VT));
+  return getNode(ISD::AND, Op.getValueType(), Op,
+                 getConstant(Imm, Op.getValueType()));
+}
+
+SDOperand SelectionDAG::getString(const std::string &Val) {
+  StringSDNode *&N = StringNodes[Val];
+  if (!N) {
+    N = new StringSDNode(Val);
+    AllNodes.push_back(N);
+  }
+  return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::getConstant(uint64_t Val, MVT::ValueType VT, bool isT) {
+  assert(MVT::isInteger(VT) && "Cannot create FP integer constant!");
+  assert(!MVT::isVector(VT) && "Cannot create Vector ConstantSDNodes!");
+  
+  // Mask out any bits that are not valid for this constant.
+  Val &= MVT::getIntVTBitMask(VT);
+
+  unsigned Opc = isT ? ISD::TargetConstant : ISD::Constant;
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+  ID.AddInteger(Val);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDOperand(E, 0);
+  SDNode *N = new ConstantSDNode(isT, Val, VT);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDOperand(N, 0);
+}
+
+
+SDOperand SelectionDAG::getConstantFP(double Val, MVT::ValueType VT,
+                                      bool isTarget) {
+  assert(MVT::isFloatingPoint(VT) && "Cannot create integer FP constant!");
+  MVT::ValueType EltVT =
+    MVT::isVector(VT) ? MVT::getVectorElementType(VT) : VT;
+  if (EltVT == MVT::f32)
+    Val = (float)Val;  // Mask out extra precision.
+
+  // Do the map lookup using the actual bit pattern for the floating point
+  // value, so that we don't have problems with 0.0 comparing equal to -0.0, and
+  // we don't have issues with SNANs.
+  unsigned Opc = isTarget ? ISD::TargetConstantFP : ISD::ConstantFP;
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, Opc, getVTList(EltVT), 0, 0);
+  ID.AddDouble(Val);
+  void *IP = 0;
+  SDNode *N = NULL;
+  if ((N = CSEMap.FindNodeOrInsertPos(ID, IP)))
+    if (!MVT::isVector(VT))
+      return SDOperand(N, 0);
+  if (!N) {
+    N = new ConstantFPSDNode(isTarget, Val, EltVT);
+    CSEMap.InsertNode(N, IP);
+    AllNodes.push_back(N);
+  }
+
+  SDOperand Result(N, 0);
+  if (MVT::isVector(VT)) {
+    SmallVector<SDOperand, 8> Ops;
+    Ops.assign(MVT::getVectorNumElements(VT), Result);
+    Result = getNode(ISD::BUILD_VECTOR, VT, &Ops[0], Ops.size());
+  }
+  return Result;
+}
+
+SDOperand SelectionDAG::getGlobalAddress(const GlobalValue *GV,
+                                         MVT::ValueType VT, int Offset,
+                                         bool isTargetGA) {
+  const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+  unsigned Opc;
+  if (GVar && GVar->isThreadLocal())
+    Opc = isTargetGA ? ISD::TargetGlobalTLSAddress : ISD::GlobalTLSAddress;
+  else
+    Opc = isTargetGA ? ISD::TargetGlobalAddress : ISD::GlobalAddress;
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+  ID.AddPointer(GV);
+  ID.AddInteger(Offset);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+   return SDOperand(E, 0);
+  SDNode *N = new GlobalAddressSDNode(isTargetGA, GV, VT, Offset);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::getFrameIndex(int FI, MVT::ValueType VT,
+                                      bool isTarget) {
+  unsigned Opc = isTarget ? ISD::TargetFrameIndex : ISD::FrameIndex;
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+  ID.AddInteger(FI);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDOperand(E, 0);
+  SDNode *N = new FrameIndexSDNode(FI, VT, isTarget);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::getJumpTable(int JTI, MVT::ValueType VT, bool isTarget){
+  unsigned Opc = isTarget ? ISD::TargetJumpTable : ISD::JumpTable;
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+  ID.AddInteger(JTI);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDOperand(E, 0);
+  SDNode *N = new JumpTableSDNode(JTI, VT, isTarget);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::getConstantPool(Constant *C, MVT::ValueType VT,
+                                        unsigned Alignment, int Offset,
+                                        bool isTarget) {
+  unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+  ID.AddInteger(Alignment);
+  ID.AddInteger(Offset);
+  ID.AddPointer(C);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDOperand(E, 0);
+  SDNode *N = new ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDOperand(N, 0);
+}
+
+
+SDOperand SelectionDAG::getConstantPool(MachineConstantPoolValue *C,
+                                        MVT::ValueType VT,
+                                        unsigned Alignment, int Offset,
+                                        bool isTarget) {
+  unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+  ID.AddInteger(Alignment);
+  ID.AddInteger(Offset);
+  C->AddSelectionDAGCSEId(ID);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDOperand(E, 0);
+  SDNode *N = new ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDOperand(N, 0);
+}
+
+
+SDOperand SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) {
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), 0, 0);
+  ID.AddPointer(MBB);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDOperand(E, 0);
+  SDNode *N = new BasicBlockSDNode(MBB);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::getValueType(MVT::ValueType VT) {
+  if ((unsigned)VT >= ValueTypeNodes.size())
+    ValueTypeNodes.resize(VT+1);
+  if (ValueTypeNodes[VT] == 0) {
+    ValueTypeNodes[VT] = new VTSDNode(VT);
+    AllNodes.push_back(ValueTypeNodes[VT]);
+  }
+
+  return SDOperand(ValueTypeNodes[VT], 0);
+}
+
+SDOperand SelectionDAG::getExternalSymbol(const char *Sym, MVT::ValueType VT) {
+  SDNode *&N = ExternalSymbols[Sym];
+  if (N) return SDOperand(N, 0);
+  N = new ExternalSymbolSDNode(false, Sym, VT);
+  AllNodes.push_back(N);
+  return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::getTargetExternalSymbol(const char *Sym,
+                                                MVT::ValueType VT) {
+  SDNode *&N = TargetExternalSymbols[Sym];
+  if (N) return SDOperand(N, 0);
+  N = new ExternalSymbolSDNode(true, Sym, VT);
+  AllNodes.push_back(N);
+  return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::getCondCode(ISD::CondCode Cond) {
+  if ((unsigned)Cond >= CondCodeNodes.size())
+    CondCodeNodes.resize(Cond+1);
+  
+  if (CondCodeNodes[Cond] == 0) {
+    CondCodeNodes[Cond] = new CondCodeSDNode(Cond);
+    AllNodes.push_back(CondCodeNodes[Cond]);
+  }
+  return SDOperand(CondCodeNodes[Cond], 0);
+}
+
+SDOperand SelectionDAG::getRegister(unsigned RegNo, MVT::ValueType VT) {
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::Register, getVTList(VT), 0, 0);
+  ID.AddInteger(RegNo);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDOperand(E, 0);
+  SDNode *N = new RegisterSDNode(RegNo, VT);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::getSrcValue(const Value *V, int Offset) {
+  assert((!V || isa<PointerType>(V->getType())) &&
+         "SrcValue is not a pointer?");
+
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), 0, 0);
+  ID.AddPointer(V);
+  ID.AddInteger(Offset);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDOperand(E, 0);
+  SDNode *N = new SrcValueSDNode(V, Offset);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::FoldSetCC(MVT::ValueType VT, SDOperand N1,
+                                  SDOperand N2, ISD::CondCode Cond) {
+  // These setcc operations always fold.
+  switch (Cond) {
+  default: break;
+  case ISD::SETFALSE:
+  case ISD::SETFALSE2: return getConstant(0, VT);
+  case ISD::SETTRUE:
+  case ISD::SETTRUE2:  return getConstant(1, VT);
+    
+  case ISD::SETOEQ:
+  case ISD::SETOGT:
+  case ISD::SETOGE:
+  case ISD::SETOLT:
+  case ISD::SETOLE:
+  case ISD::SETONE:
+  case ISD::SETO:
+  case ISD::SETUO:
+  case ISD::SETUEQ:
+  case ISD::SETUNE:
+    assert(!MVT::isInteger(N1.getValueType()) && "Illegal setcc for integer!");
+    break;
+  }
+  
+  if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.Val)) {
+    uint64_t C2 = N2C->getValue();
+    if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.Val)) {
+      uint64_t C1 = N1C->getValue();
+      
+      // Sign extend the operands if required
+      if (ISD::isSignedIntSetCC(Cond)) {
+        C1 = N1C->getSignExtended();
+        C2 = N2C->getSignExtended();
+      }
+      
+      switch (Cond) {
+      default: assert(0 && "Unknown integer setcc!");
+      case ISD::SETEQ:  return getConstant(C1 == C2, VT);
+      case ISD::SETNE:  return getConstant(C1 != C2, VT);
+      case ISD::SETULT: return getConstant(C1 <  C2, VT);
+      case ISD::SETUGT: return getConstant(C1 >  C2, VT);
+      case ISD::SETULE: return getConstant(C1 <= C2, VT);
+      case ISD::SETUGE: return getConstant(C1 >= C2, VT);
+      case ISD::SETLT:  return getConstant((int64_t)C1 <  (int64_t)C2, VT);
+      case ISD::SETGT:  return getConstant((int64_t)C1 >  (int64_t)C2, VT);
+      case ISD::SETLE:  return getConstant((int64_t)C1 <= (int64_t)C2, VT);
+      case ISD::SETGE:  return getConstant((int64_t)C1 >= (int64_t)C2, VT);
+      }
+    }
+  }
+  if (ConstantFPSDNode *N1C = dyn_cast<ConstantFPSDNode>(N1.Val))
+    if (ConstantFPSDNode *N2C = dyn_cast<ConstantFPSDNode>(N2.Val)) {
+      double C1 = N1C->getValue(), C2 = N2C->getValue();
+      
+      switch (Cond) {
+      default: break; // FIXME: Implement the rest of these!
+      case ISD::SETEQ:  return getConstant(C1 == C2, VT);
+      case ISD::SETNE:  return getConstant(C1 != C2, VT);
+      case ISD::SETLT:  return getConstant(C1 < C2, VT);
+      case ISD::SETGT:  return getConstant(C1 > C2, VT);
+      case ISD::SETLE:  return getConstant(C1 <= C2, VT);
+      case ISD::SETGE:  return getConstant(C1 >= C2, VT);
+      }
+    } else {
+      // Ensure that the constant occurs on the RHS.
+      return getSetCC(VT, N2, N1, ISD::getSetCCSwappedOperands(Cond));
+    }
+      
+  // Could not fold it.
+  return SDOperand();
+}
+
+/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero.  We use
+/// this predicate to simplify operations downstream.  Mask is known to be zero
+/// for bits that V cannot have.
+bool SelectionDAG::MaskedValueIsZero(SDOperand Op, uint64_t Mask, 
+                                     unsigned Depth) const {
+  // The masks are not wide enough to represent this type!  Should use APInt.
+  if (Op.getValueType() == MVT::i128)
+    return false;
+  
+  uint64_t KnownZero, KnownOne;
+  ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth);
+  assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+  return (KnownZero & Mask) == Mask;
+}
+
+/// ComputeMaskedBits - Determine which of the bits specified in Mask are
+/// known to be either zero or one and return them in the KnownZero/KnownOne
+/// bitsets.  This code only analyzes bits in Mask, in order to short-circuit
+/// processing.
+void SelectionDAG::ComputeMaskedBits(SDOperand Op, uint64_t Mask, 
+                                     uint64_t &KnownZero, uint64_t &KnownOne,
+                                     unsigned Depth) const {
+  KnownZero = KnownOne = 0;   // Don't know anything.
+  if (Depth == 6 || Mask == 0)
+    return;  // Limit search depth.
+  
+  // The masks are not wide enough to represent this type!  Should use APInt.
+  if (Op.getValueType() == MVT::i128)
+    return;
+  
+  uint64_t KnownZero2, KnownOne2;
+
+  switch (Op.getOpcode()) {
+  case ISD::Constant:
+    // We know all of the bits for a constant!
+    KnownOne = cast<ConstantSDNode>(Op)->getValue() & Mask;
+    KnownZero = ~KnownOne & Mask;
+    return;
+  case ISD::AND:
+    // If either the LHS or the RHS are Zero, the result is zero.
+    ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+    Mask &= ~KnownZero;
+    ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
+
+    // Output known-1 bits are only known if set in both the LHS & RHS.
+    KnownOne &= KnownOne2;
+    // Output known-0 are known to be clear if zero in either the LHS | RHS.
+    KnownZero |= KnownZero2;
+    return;
+  case ISD::OR:
+    ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+    Mask &= ~KnownOne;
+    ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
+    
+    // Output known-0 bits are only known if clear in both the LHS & RHS.
+    KnownZero &= KnownZero2;
+    // Output known-1 are known to be set if set in either the LHS | RHS.
+    KnownOne |= KnownOne2;
+    return;
+  case ISD::XOR: {
+    ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
+    
+    // Output known-0 bits are known if clear or set in both the LHS & RHS.
+    uint64_t KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
+    // Output known-1 are known to be set if set in only one of the LHS, RHS.
+    KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
+    KnownZero = KnownZeroOut;
+    return;
+  }
+  case ISD::SELECT:
+    ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero2, KnownOne2, Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
+    
+    // Only known if known in both the LHS and RHS.
+    KnownOne &= KnownOne2;
+    KnownZero &= KnownZero2;
+    return;
+  case ISD::SELECT_CC:
+    ComputeMaskedBits(Op.getOperand(3), Mask, KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero2, KnownOne2, Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
+    
+    // Only known if known in both the LHS and RHS.
+    KnownOne &= KnownOne2;
+    KnownZero &= KnownZero2;
+    return;
+  case ISD::SETCC:
+    // If we know the result of a setcc has the top bits zero, use this info.
+    if (TLI.getSetCCResultContents() == TargetLowering::ZeroOrOneSetCCResult)
+      KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL);
+    return;
+  case ISD::SHL:
+    // (shl X, C1) & C2 == 0   iff   (X & C2 >>u C1) == 0
+    if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      ComputeMaskedBits(Op.getOperand(0), Mask >> SA->getValue(),
+                        KnownZero, KnownOne, Depth+1);
+      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+      KnownZero <<= SA->getValue();
+      KnownOne  <<= SA->getValue();
+      KnownZero |= (1ULL << SA->getValue())-1;  // low bits known zero.
+    }
+    return;
+  case ISD::SRL:
+    // (ushr X, C1) & C2 == 0   iff  (-1 >> C1) & C2 == 0
+    if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      MVT::ValueType VT = Op.getValueType();
+      unsigned ShAmt = SA->getValue();
+
+      uint64_t TypeMask = MVT::getIntVTBitMask(VT);
+      ComputeMaskedBits(Op.getOperand(0), (Mask << ShAmt) & TypeMask,
+                        KnownZero, KnownOne, Depth+1);
+      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+      KnownZero &= TypeMask;
+      KnownOne  &= TypeMask;
+      KnownZero >>= ShAmt;
+      KnownOne  >>= ShAmt;
+
+      uint64_t HighBits = (1ULL << ShAmt)-1;
+      HighBits <<= MVT::getSizeInBits(VT)-ShAmt;
+      KnownZero |= HighBits;  // High bits known zero.
+    }
+    return;
+  case ISD::SRA:
+    if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      MVT::ValueType VT = Op.getValueType();
+      unsigned ShAmt = SA->getValue();
+
+      // Compute the new bits that are at the top now.
+      uint64_t TypeMask = MVT::getIntVTBitMask(VT);
+
+      uint64_t InDemandedMask = (Mask << ShAmt) & TypeMask;
+      // If any of the demanded bits are produced by the sign extension, we also
+      // demand the input sign bit.
+      uint64_t HighBits = (1ULL << ShAmt)-1;
+      HighBits <<= MVT::getSizeInBits(VT) - ShAmt;
+      if (HighBits & Mask)
+        InDemandedMask |= MVT::getIntVTSignBit(VT);
+      
+      ComputeMaskedBits(Op.getOperand(0), InDemandedMask, KnownZero, KnownOne,
+                        Depth+1);
+      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+      KnownZero &= TypeMask;
+      KnownOne  &= TypeMask;
+      KnownZero >>= ShAmt;
+      KnownOne  >>= ShAmt;
+      
+      // Handle the sign bits.
+      uint64_t SignBit = MVT::getIntVTSignBit(VT);
+      SignBit >>= ShAmt;  // Adjust to where it is now in the mask.
+      
+      if (KnownZero & SignBit) {       
+        KnownZero |= HighBits;  // New bits are known zero.
+      } else if (KnownOne & SignBit) {
+        KnownOne  |= HighBits;  // New bits are known one.
+      }
+    }
+    return;
+  case ISD::SIGN_EXTEND_INREG: {
+    MVT::ValueType EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+    
+    // Sign extension.  Compute the demanded bits in the result that are not 
+    // present in the input.
+    uint64_t NewBits = ~MVT::getIntVTBitMask(EVT) & Mask;
+
+    uint64_t InSignBit = MVT::getIntVTSignBit(EVT);
+    int64_t InputDemandedBits = Mask & MVT::getIntVTBitMask(EVT);
+    
+    // If the sign extended bits are demanded, we know that the sign
+    // bit is demanded.
+    if (NewBits)
+      InputDemandedBits |= InSignBit;
+    
+    ComputeMaskedBits(Op.getOperand(0), InputDemandedBits,
+                      KnownZero, KnownOne, Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    
+    // If the sign bit of the input is known set or clear, then we know the
+    // top bits of the result.
+    if (KnownZero & InSignBit) {          // Input sign bit known clear
+      KnownZero |= NewBits;
+      KnownOne  &= ~NewBits;
+    } else if (KnownOne & InSignBit) {    // Input sign bit known set
+      KnownOne  |= NewBits;
+      KnownZero &= ~NewBits;
+    } else {                              // Input sign bit unknown
+      KnownZero &= ~NewBits;
+      KnownOne  &= ~NewBits;
+    }
+    return;
+  }
+  case ISD::CTTZ:
+  case ISD::CTLZ:
+  case ISD::CTPOP: {
+    MVT::ValueType VT = Op.getValueType();
+    unsigned LowBits = Log2_32(MVT::getSizeInBits(VT))+1;
+    KnownZero = ~((1ULL << LowBits)-1) & MVT::getIntVTBitMask(VT);
+    KnownOne  = 0;
+    return;
+  }
+  case ISD::LOAD: {
+    if (ISD::isZEXTLoad(Op.Val)) {
+      LoadSDNode *LD = cast<LoadSDNode>(Op);
+      MVT::ValueType VT = LD->getLoadedVT();
+      KnownZero |= ~MVT::getIntVTBitMask(VT) & Mask;
+    }
+    return;
+  }
+  case ISD::ZERO_EXTEND: {
+    uint64_t InMask  = MVT::getIntVTBitMask(Op.getOperand(0).getValueType());
+    uint64_t NewBits = (~InMask) & Mask;
+    ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero, 
+                      KnownOne, Depth+1);
+    KnownZero |= NewBits & Mask;
+    KnownOne  &= ~NewBits;
+    return;
+  }
+  case ISD::SIGN_EXTEND: {
+    MVT::ValueType InVT = Op.getOperand(0).getValueType();
+    unsigned InBits    = MVT::getSizeInBits(InVT);
+    uint64_t InMask    = MVT::getIntVTBitMask(InVT);
+    uint64_t InSignBit = 1ULL << (InBits-1);
+    uint64_t NewBits   = (~InMask) & Mask;
+    uint64_t InDemandedBits = Mask & InMask;
+
+    // If any of the sign extended bits are demanded, we know that the sign
+    // bit is demanded.
+    if (NewBits & Mask)
+      InDemandedBits |= InSignBit;
+    
+    ComputeMaskedBits(Op.getOperand(0), InDemandedBits, KnownZero, 
+                      KnownOne, Depth+1);
+    // If the sign bit is known zero or one, the  top bits match.
+    if (KnownZero & InSignBit) {
+      KnownZero |= NewBits;
+      KnownOne  &= ~NewBits;
+    } else if (KnownOne & InSignBit) {
+      KnownOne  |= NewBits;
+      KnownZero &= ~NewBits;
+    } else {   // Otherwise, top bits aren't known.
+      KnownOne  &= ~NewBits;
+      KnownZero &= ~NewBits;
+    }
+    return;
+  }
+  case ISD::ANY_EXTEND: {
+    MVT::ValueType VT = Op.getOperand(0).getValueType();
+    ComputeMaskedBits(Op.getOperand(0), Mask & MVT::getIntVTBitMask(VT),
+                      KnownZero, KnownOne, Depth+1);
+    return;
+  }
+  case ISD::TRUNCATE: {
+    ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    uint64_t OutMask = MVT::getIntVTBitMask(Op.getValueType());
+    KnownZero &= OutMask;
+    KnownOne &= OutMask;
+    break;
+  }
+  case ISD::AssertZext: {
+    MVT::ValueType VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+    uint64_t InMask = MVT::getIntVTBitMask(VT);
+    ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero, 
+                      KnownOne, Depth+1);
+    KnownZero |= (~InMask) & Mask;
+    return;
+  }
+  case ISD::ADD: {
+    // If either the LHS or the RHS are Zero, the result is zero.
+    ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
+    
+    // Output known-0 bits are known if clear or set in both the low clear bits
+    // common to both LHS & RHS.  For example, 8+(X<<3) is known to have the
+    // low 3 bits clear.
+    uint64_t KnownZeroOut = std::min(CountTrailingZeros_64(~KnownZero), 
+                                     CountTrailingZeros_64(~KnownZero2));
+    
+    KnownZero = (1ULL << KnownZeroOut) - 1;
+    KnownOne = 0;
+    return;
+  }
+  case ISD::SUB: {
+    ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0));
+    if (!CLHS) return;
+
+    // We know that the top bits of C-X are clear if X contains less bits
+    // than C (i.e. no wrap-around can happen).  For example, 20-X is
+    // positive if we can prove that X is >= 0 and < 16.
+    MVT::ValueType VT = CLHS->getValueType(0);
+    if ((CLHS->getValue() & MVT::getIntVTSignBit(VT)) == 0) {  // sign bit clear
+      unsigned NLZ = CountLeadingZeros_64(CLHS->getValue()+1);
+      uint64_t MaskV = (1ULL << (63-NLZ))-1; // NLZ can't be 64 with no sign bit
+      MaskV = ~MaskV & MVT::getIntVTBitMask(VT);
+      ComputeMaskedBits(Op.getOperand(1), MaskV, KnownZero, KnownOne, Depth+1);
+
+      // If all of the MaskV bits are known to be zero, then we know the output
+      // top bits are zero, because we now know that the output is from [0-C].
+      if ((KnownZero & MaskV) == MaskV) {
+        unsigned NLZ2 = CountLeadingZeros_64(CLHS->getValue());
+        KnownZero = ~((1ULL << (64-NLZ2))-1) & Mask;  // Top bits known zero.
+        KnownOne = 0;   // No one bits known.
+      } else {
+        KnownZero = KnownOne = 0;  // Otherwise, nothing known.
+      }
+    }
+    return;
+  }
+  default:
+    // Allow the target to implement this method for its nodes.
+    if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
+  case ISD::INTRINSIC_WO_CHAIN:
+  case ISD::INTRINSIC_W_CHAIN:
+  case ISD::INTRINSIC_VOID:
+      TLI.computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne, *this);
+    }
+    return;
+  }
+}
+
+/// ComputeNumSignBits - Return the number of times the sign bit of the
+/// register is replicated into the other bits.  We know that at least 1 bit
+/// is always equal to the sign bit (itself), but other cases can give us
+/// information.  For example, immediately after an "SRA X, 2", we know that
+/// the top 3 bits are all equal to each other, so we return 3.
+unsigned SelectionDAG::ComputeNumSignBits(SDOperand Op, unsigned Depth) const{
+  MVT::ValueType VT = Op.getValueType();
+  assert(MVT::isInteger(VT) && "Invalid VT!");
+  unsigned VTBits = MVT::getSizeInBits(VT);
+  unsigned Tmp, Tmp2;
+  
+  if (Depth == 6)
+    return 1;  // Limit search depth.
+
+  switch (Op.getOpcode()) {
+  default: break;
+  case ISD::AssertSext:
+    Tmp = MVT::getSizeInBits(cast<VTSDNode>(Op.getOperand(1))->getVT());
+    return VTBits-Tmp+1;
+  case ISD::AssertZext:
+    Tmp = MVT::getSizeInBits(cast<VTSDNode>(Op.getOperand(1))->getVT());
+    return VTBits-Tmp;
+    
+  case ISD::Constant: {
+    uint64_t Val = cast<ConstantSDNode>(Op)->getValue();
+    // If negative, invert the bits, then look at it.
+    if (Val & MVT::getIntVTSignBit(VT))
+      Val = ~Val;
+    
+    // Shift the bits so they are the leading bits in the int64_t.
+    Val <<= 64-VTBits;
+    
+    // Return # leading zeros.  We use 'min' here in case Val was zero before
+    // shifting.  We don't want to return '64' as for an i32 "0".
+    return std::min(VTBits, CountLeadingZeros_64(Val));
+  }
+    
+  case ISD::SIGN_EXTEND:
+    Tmp = VTBits-MVT::getSizeInBits(Op.getOperand(0).getValueType());
+    return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp;
+    
+  case ISD::SIGN_EXTEND_INREG:
+    // Max of the input and what this extends.
+    Tmp = MVT::getSizeInBits(cast<VTSDNode>(Op.getOperand(1))->getVT());
+    Tmp = VTBits-Tmp+1;
+    
+    Tmp2 = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+    return std::max(Tmp, Tmp2);
+
+  case ISD::SRA:
+    Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+    // SRA X, C   -> adds C sign bits.
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      Tmp += C->getValue();
+      if (Tmp > VTBits) Tmp = VTBits;
+    }
+    return Tmp;
+  case ISD::SHL:
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      // shl destroys sign bits.
+      Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+      if (C->getValue() >= VTBits ||      // Bad shift.
+          C->getValue() >= Tmp) break;    // Shifted all sign bits out.
+      return Tmp - C->getValue();
+    }
+    break;
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR:    // NOT is handled here.
+    // Logical binary ops preserve the number of sign bits.
+    Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+    if (Tmp == 1) return 1;  // Early out.
+    Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+    return std::min(Tmp, Tmp2);
+
+  case ISD::SELECT:
+    Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+    if (Tmp == 1) return 1;  // Early out.
+    Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+    return std::min(Tmp, Tmp2);
+    
+  case ISD::SETCC:
+    // If setcc returns 0/-1, all bits are sign bits.
+    if (TLI.getSetCCResultContents() ==
+        TargetLowering::ZeroOrNegativeOneSetCCResult)
+      return VTBits;
+    break;
+  case ISD::ROTL:
+  case ISD::ROTR:
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      unsigned RotAmt = C->getValue() & (VTBits-1);
+      
+      // Handle rotate right by N like a rotate left by 32-N.
+      if (Op.getOpcode() == ISD::ROTR)
+        RotAmt = (VTBits-RotAmt) & (VTBits-1);
+
+      // If we aren't rotating out all of the known-in sign bits, return the
+      // number that are left.  This handles rotl(sext(x), 1) for example.
+      Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+      if (Tmp > RotAmt+1) return Tmp-RotAmt;
+    }
+    break;
+  case ISD::ADD:
+    // Add can have at most one carry bit.  Thus we know that the output
+    // is, at worst, one more bit than the inputs.
+    Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+    if (Tmp == 1) return 1;  // Early out.
+      
+    // Special case decrementing a value (ADD X, -1):
+    if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(0)))
+      if (CRHS->isAllOnesValue()) {
+        uint64_t KnownZero, KnownOne;
+        uint64_t Mask = MVT::getIntVTBitMask(VT);
+        ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
+        
+        // If the input is known to be 0 or 1, the output is 0/-1, which is all
+        // sign bits set.
+        if ((KnownZero|1) == Mask)
+          return VTBits;
+        
+        // If we are subtracting one from a positive number, there is no carry
+        // out of the result.
+        if (KnownZero & MVT::getIntVTSignBit(VT))
+          return Tmp;
+      }
+      
+    Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+    if (Tmp2 == 1) return 1;
+      return std::min(Tmp, Tmp2)-1;
+    break;
+    
+  case ISD::SUB:
+    Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+    if (Tmp2 == 1) return 1;
+      
+    // Handle NEG.
+    if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0)))
+      if (CLHS->getValue() == 0) {
+        uint64_t KnownZero, KnownOne;
+        uint64_t Mask = MVT::getIntVTBitMask(VT);
+        ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+        // If the input is known to be 0 or 1, the output is 0/-1, which is all
+        // sign bits set.
+        if ((KnownZero|1) == Mask)
+          return VTBits;
+        
+        // If the input is known to be positive (the sign bit is known clear),
+        // the output of the NEG has the same number of sign bits as the input.
+        if (KnownZero & MVT::getIntVTSignBit(VT))
+          return Tmp2;
+        
+        // Otherwise, we treat this like a SUB.
+      }
+    
+    // Sub can have at most one carry bit.  Thus we know that the output
+    // is, at worst, one more bit than the inputs.
+    Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+    if (Tmp == 1) return 1;  // Early out.
+      return std::min(Tmp, Tmp2)-1;
+    break;
+  case ISD::TRUNCATE:
+    // FIXME: it's tricky to do anything useful for this, but it is an important
+    // case for targets like X86.
+    break;
+  }
+  
+  // Handle LOADX separately here. EXTLOAD case will fallthrough.
+  if (Op.getOpcode() == ISD::LOAD) {
+    LoadSDNode *LD = cast<LoadSDNode>(Op);
+    unsigned ExtType = LD->getExtensionType();
+    switch (ExtType) {
+    default: break;
+    case ISD::SEXTLOAD:    // '17' bits known
+      Tmp = MVT::getSizeInBits(LD->getLoadedVT());
+      return VTBits-Tmp+1;
+    case ISD::ZEXTLOAD:    // '16' bits known
+      Tmp = MVT::getSizeInBits(LD->getLoadedVT());
+      return VTBits-Tmp;
+    }
+  }
+
+  // Allow the target to implement this method for its nodes.
+  if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+      Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || 
+      Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+      Op.getOpcode() == ISD::INTRINSIC_VOID) {
+    unsigned NumBits = TLI.ComputeNumSignBitsForTargetNode(Op, Depth);
+    if (NumBits > 1) return NumBits;
+  }
+  
+  // Finally, if we can prove that the top bits of the result are 0's or 1's,
+  // use this information.
+  uint64_t KnownZero, KnownOne;
+  uint64_t Mask = MVT::getIntVTBitMask(VT);
+  ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth);
+  
+  uint64_t SignBit = MVT::getIntVTSignBit(VT);
+  if (KnownZero & SignBit) {        // SignBit is 0
+    Mask = KnownZero;
+  } else if (KnownOne & SignBit) {  // SignBit is 1;
+    Mask = KnownOne;
+  } else {
+    // Nothing known.
+    return 1;
+  }
+  
+  // Okay, we know that the sign bit in Mask is set.  Use CLZ to determine
+  // the number of identical bits in the top of the input value.
+  Mask ^= ~0ULL;
+  Mask <<= 64-VTBits;
+  // Return # leading zeros.  We use 'min' here in case Val was zero before
+  // shifting.  We don't want to return '64' as for an i32 "0".
+  return std::min(VTBits, CountLeadingZeros_64(Mask));
+}
+
+
+/// getNode - Gets or creates the specified node.
+///
+SDOperand SelectionDAG::getNode(unsigned Opcode, MVT::ValueType VT) {
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, Opcode, getVTList(VT), 0, 0);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDOperand(E, 0);
+  SDNode *N = new SDNode(Opcode, SDNode::getSDVTList(VT));
+  CSEMap.InsertNode(N, IP);
+  
+  AllNodes.push_back(N);
+  return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::getNode(unsigned Opcode, MVT::ValueType VT,
+                                SDOperand Operand) {
+  unsigned Tmp1;
+  // Constant fold unary operations with an integer constant operand.
+  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Operand.Val)) {
+    uint64_t Val = C->getValue();
+    switch (Opcode) {
+    default: break;
+    case ISD::SIGN_EXTEND: return getConstant(C->getSignExtended(), VT);
+    case ISD::ANY_EXTEND:
+    case ISD::ZERO_EXTEND: return getConstant(Val, VT);
+    case ISD::TRUNCATE:    return getConstant(Val, VT);
+    case ISD::SINT_TO_FP:  return getConstantFP(C->getSignExtended(), VT);
+    case ISD::UINT_TO_FP:  return getConstantFP(C->getValue(), VT);
+    case ISD::BIT_CONVERT:
+      if (VT == MVT::f32 && C->getValueType(0) == MVT::i32)
+        return getConstantFP(BitsToFloat(Val), VT);
+      else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
+        return getConstantFP(BitsToDouble(Val), VT);
+      break;
+    case ISD::BSWAP:
+      switch(VT) {
+      default: assert(0 && "Invalid bswap!"); break;
+      case MVT::i16: return getConstant(ByteSwap_16((unsigned short)Val), VT);
+      case MVT::i32: return getConstant(ByteSwap_32((unsigned)Val), VT);
+      case MVT::i64: return getConstant(ByteSwap_64(Val), VT);
+      }
+      break;
+    case ISD::CTPOP:
+      switch(VT) {
+      default: assert(0 && "Invalid ctpop!"); break;
+      case MVT::i1: return getConstant(Val != 0, VT);
+      case MVT::i8: 
+        Tmp1 = (unsigned)Val & 0xFF;
+        return getConstant(CountPopulation_32(Tmp1), VT);
+      case MVT::i16:
+        Tmp1 = (unsigned)Val & 0xFFFF;
+        return getConstant(CountPopulation_32(Tmp1), VT);
+      case MVT::i32:
+        return getConstant(CountPopulation_32((unsigned)Val), VT);
+      case MVT::i64:
+        return getConstant(CountPopulation_64(Val), VT);
+      }
+    case ISD::CTLZ:
+      switch(VT) {
+      default: assert(0 && "Invalid ctlz!"); break;
+      case MVT::i1: return getConstant(Val == 0, VT);
+      case MVT::i8: 
+        Tmp1 = (unsigned)Val & 0xFF;
+        return getConstant(CountLeadingZeros_32(Tmp1)-24, VT);
+      case MVT::i16:
+        Tmp1 = (unsigned)Val & 0xFFFF;
+        return getConstant(CountLeadingZeros_32(Tmp1)-16, VT);
+      case MVT::i32:
+        return getConstant(CountLeadingZeros_32((unsigned)Val), VT);
+      case MVT::i64:
+        return getConstant(CountLeadingZeros_64(Val), VT);
+      }
+    case ISD::CTTZ:
+      switch(VT) {
+      default: assert(0 && "Invalid cttz!"); break;
+      case MVT::i1: return getConstant(Val == 0, VT);
+      case MVT::i8: 
+        Tmp1 = (unsigned)Val | 0x100;
+        return getConstant(CountTrailingZeros_32(Tmp1), VT);
+      case MVT::i16:
+        Tmp1 = (unsigned)Val | 0x10000;
+        return getConstant(CountTrailingZeros_32(Tmp1), VT);
+      case MVT::i32:
+        return getConstant(CountTrailingZeros_32((unsigned)Val), VT);
+      case MVT::i64:
+        return getConstant(CountTrailingZeros_64(Val), VT);
+      }
+    }
+  }
+
+  // Constant fold unary operations with an floating point constant operand.
+  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Operand.Val))
+    switch (Opcode) {
+    case ISD::FNEG:
+      return getConstantFP(-C->getValue(), VT);
+    case ISD::FABS:
+      return getConstantFP(fabs(C->getValue()), VT);
+    case ISD::FP_ROUND:
+    case ISD::FP_EXTEND:
+      return getConstantFP(C->getValue(), VT);
+    case ISD::FP_TO_SINT:
+      return getConstant((int64_t)C->getValue(), VT);
+    case ISD::FP_TO_UINT:
+      return getConstant((uint64_t)C->getValue(), VT);
+    case ISD::BIT_CONVERT:
+      if (VT == MVT::i32 && C->getValueType(0) == MVT::f32)
+        return getConstant(FloatToBits(C->getValue()), VT);
+      else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64)
+        return getConstant(DoubleToBits(C->getValue()), VT);
+      break;
+    }
+
+  unsigned OpOpcode = Operand.Val->getOpcode();
+  switch (Opcode) {
+  case ISD::TokenFactor:
+    return Operand;         // Factor of one node?  No factor.
+  case ISD::FP_ROUND:
+  case ISD::FP_EXTEND:
+    assert(MVT::isFloatingPoint(VT) &&
+           MVT::isFloatingPoint(Operand.getValueType()) && "Invalid FP cast!");
+    break;
+  case ISD::SIGN_EXTEND:
+    assert(MVT::isInteger(VT) && MVT::isInteger(Operand.getValueType()) &&
+           "Invalid SIGN_EXTEND!");
+    if (Operand.getValueType() == VT) return Operand;   // noop extension
+    assert(Operand.getValueType() < VT && "Invalid sext node, dst < src!");
+    if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND)
+      return getNode(OpOpcode, VT, Operand.Val->getOperand(0));
+    break;
+  case ISD::ZERO_EXTEND:
+    assert(MVT::isInteger(VT) && MVT::isInteger(Operand.getValueType()) &&
+           "Invalid ZERO_EXTEND!");
+    if (Operand.getValueType() == VT) return Operand;   // noop extension
+    assert(Operand.getValueType() < VT && "Invalid zext node, dst < src!");
+    if (OpOpcode == ISD::ZERO_EXTEND)   // (zext (zext x)) -> (zext x)
+      return getNode(ISD::ZERO_EXTEND, VT, Operand.Val->getOperand(0));
+    break;
+  case ISD::ANY_EXTEND:
+    assert(MVT::isInteger(VT) && MVT::isInteger(Operand.getValueType()) &&
+           "Invalid ANY_EXTEND!");
+    if (Operand.getValueType() == VT) return Operand;   // noop extension
+    assert(Operand.getValueType() < VT && "Invalid anyext node, dst < src!");
+    if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND)
+      // (ext (zext x)) -> (zext x)  and  (ext (sext x)) -> (sext x)
+      return getNode(OpOpcode, VT, Operand.Val->getOperand(0));
+    break;
+  case ISD::TRUNCATE:
+    assert(MVT::isInteger(VT) && MVT::isInteger(Operand.getValueType()) &&
+           "Invalid TRUNCATE!");
+    if (Operand.getValueType() == VT) return Operand;   // noop truncate
+    assert(Operand.getValueType() > VT && "Invalid truncate node, src < dst!");
+    if (OpOpcode == ISD::TRUNCATE)
+      return getNode(ISD::TRUNCATE, VT, Operand.Val->getOperand(0));
+    else if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
+             OpOpcode == ISD::ANY_EXTEND) {
+      // If the source is smaller than the dest, we still need an extend.
+      if (Operand.Val->getOperand(0).getValueType() < VT)
+        return getNode(OpOpcode, VT, Operand.Val->getOperand(0));
+      else if (Operand.Val->getOperand(0).getValueType() > VT)
+        return getNode(ISD::TRUNCATE, VT, Operand.Val->getOperand(0));
+      else
+        return Operand.Val->getOperand(0);
+    }
+    break;
+  case ISD::BIT_CONVERT:
+    // Basic sanity checking.
+    assert(MVT::getSizeInBits(VT) == MVT::getSizeInBits(Operand.getValueType())
+           && "Cannot BIT_CONVERT between types of different sizes!");
+    if (VT == Operand.getValueType()) return Operand;  // noop conversion.
+    if (OpOpcode == ISD::BIT_CONVERT)  // bitconv(bitconv(x)) -> bitconv(x)
+      return getNode(ISD::BIT_CONVERT, VT, Operand.getOperand(0));
+    if (OpOpcode == ISD::UNDEF)
+      return getNode(ISD::UNDEF, VT);
+    break;
+  case ISD::SCALAR_TO_VECTOR:
+    assert(MVT::isVector(VT) && !MVT::isVector(Operand.getValueType()) &&
+           MVT::getVectorElementType(VT) == Operand.getValueType() &&
+           "Illegal SCALAR_TO_VECTOR node!");
+    break;
+  case ISD::FNEG:
+    if (OpOpcode == ISD::FSUB)   // -(X-Y) -> (Y-X)
+      return getNode(ISD::FSUB, VT, Operand.Val->getOperand(1),
+                     Operand.Val->getOperand(0));
+    if (OpOpcode == ISD::FNEG)  // --X -> X
+      return Operand.Val->getOperand(0);
+    break;
+  case ISD::FABS:
+    if (OpOpcode == ISD::FNEG)  // abs(-X) -> abs(X)
+      return getNode(ISD::FABS, VT, Operand.Val->getOperand(0));
+    break;
+  }
+
+  SDNode *N;
+  SDVTList VTs = getVTList(VT);
+  if (VT != MVT::Flag) { // Don't CSE flag producing nodes
+    FoldingSetNodeID ID;
+    SDOperand Ops[1] = { Operand };
+    AddNodeIDNode(ID, Opcode, VTs, Ops, 1);
+    void *IP = 0;
+    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+      return SDOperand(E, 0);
+    N = new UnarySDNode(Opcode, VTs, Operand);
+    CSEMap.InsertNode(N, IP);
+  } else {
+    N = new UnarySDNode(Opcode, VTs, Operand);
+  }
+  AllNodes.push_back(N);
+  return SDOperand(N, 0);
+}
+
+
+
+SDOperand SelectionDAG::getNode(unsigned Opcode, MVT::ValueType VT,
+                                SDOperand N1, SDOperand N2) {
+#ifndef NDEBUG
+  switch (Opcode) {
+  case ISD::TokenFactor:
+    assert(VT == MVT::Other && N1.getValueType() == MVT::Other &&
+           N2.getValueType() == MVT::Other && "Invalid token factor!");
+    break;
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR:
+  case ISD::UDIV:
+  case ISD::UREM:
+  case ISD::MULHU:
+  case ISD::MULHS:
+    assert(MVT::isInteger(VT) && "This operator does not apply to FP types!");
+    // fall through
+  case ISD::ADD:
+  case ISD::SUB:
+  case ISD::MUL:
+  case ISD::SDIV:
+  case ISD::SREM:
+    assert(MVT::isInteger(N1.getValueType()) && "Should use F* for FP ops");
+    // fall through.
+  case ISD::FADD:
+  case ISD::FSUB:
+  case ISD::FMUL:
+  case ISD::FDIV:
+  case ISD::FREM:
+    assert(N1.getValueType() == N2.getValueType() &&
+           N1.getValueType() == VT && "Binary operator types must match!");
+    break;
+  case ISD::FCOPYSIGN:   // N1 and result must match.  N1/N2 need not match.
+    assert(N1.getValueType() == VT &&
+           MVT::isFloatingPoint(N1.getValueType()) && 
+           MVT::isFloatingPoint(N2.getValueType()) &&
+           "Invalid FCOPYSIGN!");
+    break;
+  case ISD::SHL:
+  case ISD::SRA:
+  case ISD::SRL:
+  case ISD::ROTL:
+  case ISD::ROTR:
+    assert(VT == N1.getValueType() &&
+           "Shift operators return type must be the same as their first arg");
+    assert(MVT::isInteger(VT) && MVT::isInteger(N2.getValueType()) &&
+           VT != MVT::i1 && "Shifts only work on integers");
+    break;
+  case ISD::FP_ROUND_INREG: {
+    MVT::ValueType EVT = cast<VTSDNode>(N2)->getVT();
+    assert(VT == N1.getValueType() && "Not an inreg round!");
+    assert(MVT::isFloatingPoint(VT) && MVT::isFloatingPoint(EVT) &&
+           "Cannot FP_ROUND_INREG integer types");
+    assert(EVT <= VT && "Not rounding down!");
+    break;
+  }
+  case ISD::AssertSext:
+  case ISD::AssertZext:
+  case ISD::SIGN_EXTEND_INREG: {
+    MVT::ValueType EVT = cast<VTSDNode>(N2)->getVT();
+    assert(VT == N1.getValueType() && "Not an inreg extend!");
+    assert(MVT::isInteger(VT) && MVT::isInteger(EVT) &&
+           "Cannot *_EXTEND_INREG FP types");
+    assert(EVT <= VT && "Not extending!");
+  }
+
+  default: break;
+  }
+#endif
+
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.Val);
+  ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.Val);
+  if (N1C) {
+    if (Opcode == ISD::SIGN_EXTEND_INREG) {
+      int64_t Val = N1C->getValue();
+      unsigned FromBits = MVT::getSizeInBits(cast<VTSDNode>(N2)->getVT());
+      Val <<= 64-FromBits;
+      Val >>= 64-FromBits;
+      return getConstant(Val, VT);
+    }
+    
+    if (N2C) {
+      uint64_t C1 = N1C->getValue(), C2 = N2C->getValue();
+      switch (Opcode) {
+      case ISD::ADD: return getConstant(C1 + C2, VT);
+      case ISD::SUB: return getConstant(C1 - C2, VT);
+      case ISD::MUL: return getConstant(C1 * C2, VT);
+      case ISD::UDIV:
+        if (C2) return getConstant(C1 / C2, VT);
+        break;
+      case ISD::UREM :
+        if (C2) return getConstant(C1 % C2, VT);
+        break;
+      case ISD::SDIV :
+        if (C2) return getConstant(N1C->getSignExtended() /
+                                   N2C->getSignExtended(), VT);
+        break;
+      case ISD::SREM :
+        if (C2) return getConstant(N1C->getSignExtended() %
+                                   N2C->getSignExtended(), VT);
+        break;
+      case ISD::AND  : return getConstant(C1 & C2, VT);
+      case ISD::OR   : return getConstant(C1 | C2, VT);
+      case ISD::XOR  : return getConstant(C1 ^ C2, VT);
+      case ISD::SHL  : return getConstant(C1 << C2, VT);
+      case ISD::SRL  : return getConstant(C1 >> C2, VT);
+      case ISD::SRA  : return getConstant(N1C->getSignExtended() >>(int)C2, VT);
+      case ISD::ROTL : 
+        return getConstant((C1 << C2) | (C1 >> (MVT::getSizeInBits(VT) - C2)),
+                           VT);
+      case ISD::ROTR : 
+        return getConstant((C1 >> C2) | (C1 << (MVT::getSizeInBits(VT) - C2)), 
+                           VT);
+      default: break;
+      }
+    } else {      // Cannonicalize constant to RHS if commutative
+      if (isCommutativeBinOp(Opcode)) {
+        std::swap(N1C, N2C);
+        std::swap(N1, N2);
+      }
+    }
+  }
+
+  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1.Val);
+  ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2.Val);
+  if (N1CFP) {
+    if (N2CFP) {
+      double C1 = N1CFP->getValue(), C2 = N2CFP->getValue();
+      switch (Opcode) {
+      case ISD::FADD: return getConstantFP(C1 + C2, VT);
+      case ISD::FSUB: return getConstantFP(C1 - C2, VT);
+      case ISD::FMUL: return getConstantFP(C1 * C2, VT);
+      case ISD::FDIV:
+        if (C2) return getConstantFP(C1 / C2, VT);
+        break;
+      case ISD::FREM :
+        if (C2) return getConstantFP(fmod(C1, C2), VT);
+        break;
+      case ISD::FCOPYSIGN: {
+        union {
+          double   F;
+          uint64_t I;
+        } u1;
+        u1.F = C1;
+        if (int64_t(DoubleToBits(C2)) < 0)  // Sign bit of RHS set?
+          u1.I |= 1ULL << 63;      // Set the sign bit of the LHS.
+        else 
+          u1.I &= (1ULL << 63)-1;  // Clear the sign bit of the LHS.
+        return getConstantFP(u1.F, VT);
+      }
+      default: break;
+      }
+    } else {      // Cannonicalize constant to RHS if commutative
+      if (isCommutativeBinOp(Opcode)) {
+        std::swap(N1CFP, N2CFP);
+        std::swap(N1, N2);
+      }
+    }
+  }
+  
+  // Canonicalize an UNDEF to the RHS, even over a constant.
+  if (N1.getOpcode() == ISD::UNDEF) {
+    if (isCommutativeBinOp(Opcode)) {
+      std::swap(N1, N2);
+    } else {
+      switch (Opcode) {
+      case ISD::FP_ROUND_INREG:
+      case ISD::SIGN_EXTEND_INREG:
+      case ISD::SUB:
+      case ISD::FSUB:
+      case ISD::FDIV:
+      case ISD::FREM:
+      case ISD::SRA:
+        return N1;     // fold op(undef, arg2) -> undef
+      case ISD::UDIV:
+      case ISD::SDIV:
+      case ISD::UREM:
+      case ISD::SREM:
+      case ISD::SRL:
+      case ISD::SHL:
+        if (!MVT::isVector(VT)) 
+          return getConstant(0, VT);    // fold op(undef, arg2) -> 0
+        // For vectors, we can't easily build an all zero vector, just return
+        // the LHS.
+        return N2;
+      }
+    }
+  }
+  
+  // Fold a bunch of operators when the RHS is undef. 
+  if (N2.getOpcode() == ISD::UNDEF) {
+    switch (Opcode) {
+    case ISD::ADD:
+    case ISD::ADDC:
+    case ISD::ADDE:
+    case ISD::SUB:
+    case ISD::FADD:
+    case ISD::FSUB:
+    case ISD::FMUL:
+    case ISD::FDIV:
+    case ISD::FREM:
+    case ISD::UDIV:
+    case ISD::SDIV:
+    case ISD::UREM:
+    case ISD::SREM:
+    case ISD::XOR:
+      return N2;       // fold op(arg1, undef) -> undef
+    case ISD::MUL: 
+    case ISD::AND:
+    case ISD::SRL:
+    case ISD::SHL:
+      if (!MVT::isVector(VT)) 
+        return getConstant(0, VT);  // fold op(arg1, undef) -> 0
+      // For vectors, we can't easily build an all zero vector, just return
+      // the LHS.
+      return N1;
+    case ISD::OR:
+      if (!MVT::isVector(VT)) 
+        return getConstant(MVT::getIntVTBitMask(VT), VT);
+      // For vectors, we can't easily build an all one vector, just return
+      // the LHS.
+      return N1;
+    case ISD::SRA:
+      return N1;
+    }
+  }
+
+  // Fold operations.
+  switch (Opcode) {
+  case ISD::TokenFactor:
+    // Fold trivial token factors.
+    if (N1.getOpcode() == ISD::EntryToken) return N2;
+    if (N2.getOpcode() == ISD::EntryToken) return N1;
+    break;
+      
+  case ISD::AND:
+    // (X & 0) -> 0.  This commonly occurs when legalizing i64 values, so it's
+    // worth handling here.
+    if (N2C && N2C->getValue() == 0)
+      return N2;
+    break;
+  case ISD::OR:
+  case ISD::XOR:
+    // (X ^| 0) -> X.  This commonly occurs when legalizing i64 values, so it's
+    // worth handling here.
+    if (N2C && N2C->getValue() == 0)
+      return N1;
+    break;
+  case ISD::FP_ROUND_INREG:
+    if (cast<VTSDNode>(N2)->getVT() == VT) return N1;  // Not actually rounding.
+    break;
+  case ISD::SIGN_EXTEND_INREG: {
+    MVT::ValueType EVT = cast<VTSDNode>(N2)->getVT();
+    if (EVT == VT) return N1;  // Not actually extending
+    break;
+  }
+  case ISD::EXTRACT_VECTOR_ELT:
+    assert(N2C && "Bad EXTRACT_VECTOR_ELT!");
+
+    // EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is
+    // expanding copies of large vectors from registers.
+    if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
+        N1.getNumOperands() > 0) {
+      unsigned Factor =
+        MVT::getVectorNumElements(N1.getOperand(0).getValueType());
+      return getNode(ISD::EXTRACT_VECTOR_ELT, VT,
+                     N1.getOperand(N2C->getValue() / Factor),
+                     getConstant(N2C->getValue() % Factor, N2.getValueType()));
+    }
+
+    // EXTRACT_VECTOR_ELT of BUILD_VECTOR is often formed while lowering is
+    // expanding large vector constants.
+    if (N1.getOpcode() == ISD::BUILD_VECTOR)
+      return N1.getOperand(N2C->getValue());
+
+    // EXTRACT_VECTOR_ELT of INSERT_VECTOR_ELT is often formed when vector
+    // operations are lowered to scalars.
+    if (N1.getOpcode() == ISD::INSERT_VECTOR_ELT)
+      if (ConstantSDNode *IEC = dyn_cast<ConstantSDNode>(N1.getOperand(2))) {
+        if (IEC == N2C)
+          return N1.getOperand(1);
+        else
+          return getNode(ISD::EXTRACT_VECTOR_ELT, VT, N1.getOperand(0), N2);
+      }
+    break;
+  case ISD::EXTRACT_ELEMENT:
+    assert(N2C && (unsigned)N2C->getValue() < 2 && "Bad EXTRACT_ELEMENT!");
+    
+    // EXTRACT_ELEMENT of BUILD_PAIR is often formed while legalize is expanding
+    // 64-bit integers into 32-bit parts.  Instead of building the extract of
+    // the BUILD_PAIR, only to have legalize rip it apart, just do it now. 
+    if (N1.getOpcode() == ISD::BUILD_PAIR)
+      return N1.getOperand(N2C->getValue());
+    
+    // EXTRACT_ELEMENT of a constant int is also very common.
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
+      unsigned Shift = MVT::getSizeInBits(VT) * N2C->getValue();
+      return getConstant(C->getValue() >> Shift, VT);
+    }
+    break;
+
+  // FIXME: figure out how to safely handle things like
+  // int foo(int x) { return 1 << (x & 255); }
+  // int bar() { return foo(256); }
+#if 0
+  case ISD::SHL:
+  case ISD::SRL:
+  case ISD::SRA:
+    if (N2.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+        cast<VTSDNode>(N2.getOperand(1))->getVT() != MVT::i1)
+      return getNode(Opcode, VT, N1, N2.getOperand(0));
+    else if (N2.getOpcode() == ISD::AND)
+      if (ConstantSDNode *AndRHS = dyn_cast<ConstantSDNode>(N2.getOperand(1))) {
+        // If the and is only masking out bits that cannot effect the shift,
+        // eliminate the and.
+        unsigned NumBits = MVT::getSizeInBits(VT);
+        if ((AndRHS->getValue() & (NumBits-1)) == NumBits-1)
+          return getNode(Opcode, VT, N1, N2.getOperand(0));
+      }
+    break;
+#endif
+  }
+
+  // Memoize this node if possible.
+  SDNode *N;
+  SDVTList VTs = getVTList(VT);
+  if (VT != MVT::Flag) {
+    SDOperand Ops[] = { N1, N2 };
+    FoldingSetNodeID ID;
+    AddNodeIDNode(ID, Opcode, VTs, Ops, 2);
+    void *IP = 0;
+    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+      return SDOperand(E, 0);
+    N = new BinarySDNode(Opcode, VTs, N1, N2);
+    CSEMap.InsertNode(N, IP);
+  } else {
+    N = new BinarySDNode(Opcode, VTs, N1, N2);
+  }
+
+  AllNodes.push_back(N);
+  return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::getNode(unsigned Opcode, MVT::ValueType VT,
+                                SDOperand N1, SDOperand N2, SDOperand N3) {
+  // Perform various simplifications.
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.Val);
+  ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.Val);
+  switch (Opcode) {
+  case ISD::SETCC: {
+    // Use FoldSetCC to simplify SETCC's.
+    SDOperand Simp = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get());
+    if (Simp.Val) return Simp;
+    break;
+  }
+  case ISD::SELECT:
+    if (N1C)
+      if (N1C->getValue())
+        return N2;             // select true, X, Y -> X
+      else
+        return N3;             // select false, X, Y -> Y
+
+    if (N2 == N3) return N2;   // select C, X, X -> X
+    break;
+  case ISD::BRCOND:
+    if (N2C)
+      if (N2C->getValue()) // Unconditional branch
+        return getNode(ISD::BR, MVT::Other, N1, N3);
+      else
+        return N1;         // Never-taken branch
+    break;
+  case ISD::VECTOR_SHUFFLE:
+    assert(VT == N1.getValueType() && VT == N2.getValueType() &&
+           MVT::isVector(VT) && MVT::isVector(N3.getValueType()) &&
+           N3.getOpcode() == ISD::BUILD_VECTOR &&
+           MVT::getVectorNumElements(VT) == N3.getNumOperands() &&
+           "Illegal VECTOR_SHUFFLE node!");
+    break;
+  case ISD::BIT_CONVERT:
+    // Fold bit_convert nodes from a type to themselves.
+    if (N1.getValueType() == VT)
+      return N1;
+    break;
+  }
+
+  // Memoize node if it doesn't produce a flag.
+  SDNode *N;
+  SDVTList VTs = getVTList(VT);
+  if (VT != MVT::Flag) {
+    SDOperand Ops[] = { N1, N2, N3 };
+    FoldingSetNodeID ID;
+    AddNodeIDNode(ID, Opcode, VTs, Ops, 3);
+    void *IP = 0;
+    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+      return SDOperand(E, 0);
+    N = new TernarySDNode(Opcode, VTs, N1, N2, N3);
+    CSEMap.InsertNode(N, IP);
+  } else {
+    N = new TernarySDNode(Opcode, VTs, N1, N2, N3);
+  }
+  AllNodes.push_back(N);
+  return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::getNode(unsigned Opcode, MVT::ValueType VT,
+                                SDOperand N1, SDOperand N2, SDOperand N3,
+                                SDOperand N4) {
+  SDOperand Ops[] = { N1, N2, N3, N4 };
+  return getNode(Opcode, VT, Ops, 4);
+}
+
+SDOperand SelectionDAG::getNode(unsigned Opcode, MVT::ValueType VT,
+                                SDOperand N1, SDOperand N2, SDOperand N3,
+                                SDOperand N4, SDOperand N5) {
+  SDOperand Ops[] = { N1, N2, N3, N4, N5 };
+  return getNode(Opcode, VT, Ops, 5);
+}
+
+SDOperand SelectionDAG::getLoad(MVT::ValueType VT,
+                                SDOperand Chain, SDOperand Ptr,
+                                const Value *SV, int SVOffset,
+                                bool isVolatile, unsigned Alignment) {
+  if (Alignment == 0) { // Ensure that codegen never sees alignment 0
+    const Type *Ty = 0;
+    if (VT != MVT::iPTR) {
+      Ty = MVT::getTypeForValueType(VT);
+    } else if (SV) {
+      const PointerType *PT = dyn_cast<PointerType>(SV->getType());
+      assert(PT && "Value for load must be a pointer");
+      Ty = PT->getElementType();
+    }  
+    assert(Ty && "Could not get type information for load");
+    Alignment = TLI.getTargetData()->getABITypeAlignment(Ty);
+  }
+  SDVTList VTs = getVTList(VT, MVT::Other);
+  SDOperand Undef = getNode(ISD::UNDEF, Ptr.getValueType());
+  SDOperand Ops[] = { Chain, Ptr, Undef };
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3);
+  ID.AddInteger(ISD::UNINDEXED);
+  ID.AddInteger(ISD::NON_EXTLOAD);
+  ID.AddInteger(VT);
+  ID.AddPointer(SV);
+  ID.AddInteger(SVOffset);
+  ID.AddInteger(Alignment);
+  ID.AddInteger(isVolatile);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDOperand(E, 0);
+  SDNode *N = new LoadSDNode(Ops, VTs, ISD::UNINDEXED,
+                             ISD::NON_EXTLOAD, VT, SV, SVOffset, Alignment,
+                             isVolatile);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, MVT::ValueType VT,
+                                   SDOperand Chain, SDOperand Ptr,
+                                   const Value *SV,
+                                   int SVOffset, MVT::ValueType EVT,
+                                   bool isVolatile, unsigned Alignment) {
+  // If they are asking for an extending load from/to the same thing, return a
+  // normal load.
+  if (VT == EVT)
+    ExtType = ISD::NON_EXTLOAD;
+
+  if (MVT::isVector(VT))
+    assert(EVT == MVT::getVectorElementType(VT) && "Invalid vector extload!");
+  else
+    assert(EVT < VT && "Should only be an extending load, not truncating!");
+  assert((ExtType == ISD::EXTLOAD || MVT::isInteger(VT)) &&
+         "Cannot sign/zero extend a FP/Vector load!");
+  assert(MVT::isInteger(VT) == MVT::isInteger(EVT) &&
+         "Cannot convert from FP to Int or Int -> FP!");
+
+  if (Alignment == 0) { // Ensure that codegen never sees alignment 0
+    const Type *Ty = 0;
+    if (VT != MVT::iPTR) {
+      Ty = MVT::getTypeForValueType(VT);
+    } else if (SV) {
+      const PointerType *PT = dyn_cast<PointerType>(SV->getType());
+      assert(PT && "Value for load must be a pointer");
+      Ty = PT->getElementType();
+    }  
+    assert(Ty && "Could not get type information for load");
+    Alignment = TLI.getTargetData()->getABITypeAlignment(Ty);
+  }
+  SDVTList VTs = getVTList(VT, MVT::Other);
+  SDOperand Undef = getNode(ISD::UNDEF, Ptr.getValueType());
+  SDOperand Ops[] = { Chain, Ptr, Undef };
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3);
+  ID.AddInteger(ISD::UNINDEXED);
+  ID.AddInteger(ExtType);
+  ID.AddInteger(EVT);
+  ID.AddPointer(SV);
+  ID.AddInteger(SVOffset);
+  ID.AddInteger(Alignment);
+  ID.AddInteger(isVolatile);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDOperand(E, 0);
+  SDNode *N = new LoadSDNode(Ops, VTs, ISD::UNINDEXED, ExtType, EVT,
+                             SV, SVOffset, Alignment, isVolatile);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDOperand(N, 0);
+}
+
+SDOperand
+SelectionDAG::getIndexedLoad(SDOperand OrigLoad, SDOperand Base,
+                             SDOperand Offset, ISD::MemIndexedMode AM) {
+  LoadSDNode *LD = cast<LoadSDNode>(OrigLoad);
+  assert(LD->getOffset().getOpcode() == ISD::UNDEF &&
+         "Load is already a indexed load!");
+  MVT::ValueType VT = OrigLoad.getValueType();
+  SDVTList VTs = getVTList(VT, Base.getValueType(), MVT::Other);
+  SDOperand Ops[] = { LD->getChain(), Base, Offset };
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3);
+  ID.AddInteger(AM);
+  ID.AddInteger(LD->getExtensionType());
+  ID.AddInteger(LD->getLoadedVT());
+  ID.AddPointer(LD->getSrcValue());
+  ID.AddInteger(LD->getSrcValueOffset());
+  ID.AddInteger(LD->getAlignment());
+  ID.AddInteger(LD->isVolatile());
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDOperand(E, 0);
+  SDNode *N = new LoadSDNode(Ops, VTs, AM,
+                             LD->getExtensionType(), LD->getLoadedVT(),
+                             LD->getSrcValue(), LD->getSrcValueOffset(),
+                             LD->getAlignment(), LD->isVolatile());
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::getStore(SDOperand Chain, SDOperand Val,
+                                 SDOperand Ptr, const Value *SV, int SVOffset,
+                                 bool isVolatile, unsigned Alignment) {
+  MVT::ValueType VT = Val.getValueType();
+
+  if (Alignment == 0) { // Ensure that codegen never sees alignment 0
+    const Type *Ty = 0;
+    if (VT != MVT::iPTR) {
+      Ty = MVT::getTypeForValueType(VT);
+    } else if (SV) {
+      const PointerType *PT = dyn_cast<PointerType>(SV->getType());
+      assert(PT && "Value for store must be a pointer");
+      Ty = PT->getElementType();
+    }
+    assert(Ty && "Could not get type information for store");
+    Alignment = TLI.getTargetData()->getABITypeAlignment(Ty);
+  }
+  SDVTList VTs = getVTList(MVT::Other);
+  SDOperand Undef = getNode(ISD::UNDEF, Ptr.getValueType());
+  SDOperand Ops[] = { Chain, Val, Ptr, Undef };
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
+  ID.AddInteger(ISD::UNINDEXED);
+  ID.AddInteger(false);
+  ID.AddInteger(VT);
+  ID.AddPointer(SV);
+  ID.AddInteger(SVOffset);
+  ID.AddInteger(Alignment);
+  ID.AddInteger(isVolatile);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDOperand(E, 0);
+  SDNode *N = new StoreSDNode(Ops, VTs, ISD::UNINDEXED, false,
+                              VT, SV, SVOffset, Alignment, isVolatile);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::getTruncStore(SDOperand Chain, SDOperand Val,
+                                      SDOperand Ptr, const Value *SV,
+                                      int SVOffset, MVT::ValueType SVT,
+                                      bool isVolatile, unsigned Alignment) {
+  MVT::ValueType VT = Val.getValueType();
+  bool isTrunc = VT != SVT;
+
+  assert(VT > SVT && "Not a truncation?");
+  assert(MVT::isInteger(VT) == MVT::isInteger(SVT) &&
+         "Can't do FP-INT conversion!");
+
+  if (Alignment == 0) { // Ensure that codegen never sees alignment 0
+    const Type *Ty = 0;
+    if (VT != MVT::iPTR) {
+      Ty = MVT::getTypeForValueType(VT);
+    } else if (SV) {
+      const PointerType *PT = dyn_cast<PointerType>(SV->getType());
+      assert(PT && "Value for store must be a pointer");
+      Ty = PT->getElementType();
+    }
+    assert(Ty && "Could not get type information for store");
+    Alignment = TLI.getTargetData()->getABITypeAlignment(Ty);
+  }
+  SDVTList VTs = getVTList(MVT::Other);
+  SDOperand Undef = getNode(ISD::UNDEF, Ptr.getValueType());
+  SDOperand Ops[] = { Chain, Val, Ptr, Undef };
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
+  ID.AddInteger(ISD::UNINDEXED);
+  ID.AddInteger(isTrunc);
+  ID.AddInteger(SVT);
+  ID.AddPointer(SV);
+  ID.AddInteger(SVOffset);
+  ID.AddInteger(Alignment);
+  ID.AddInteger(isVolatile);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDOperand(E, 0);
+  SDNode *N = new StoreSDNode(Ops, VTs, ISD::UNINDEXED, isTrunc,
+                              SVT, SV, SVOffset, Alignment, isVolatile);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDOperand(N, 0);
+}
+
+SDOperand
+SelectionDAG::getIndexedStore(SDOperand OrigStore, SDOperand Base,
+                              SDOperand Offset, ISD::MemIndexedMode AM) {
+  StoreSDNode *ST = cast<StoreSDNode>(OrigStore);
+  assert(ST->getOffset().getOpcode() == ISD::UNDEF &&
+         "Store is already a indexed store!");
+  SDVTList VTs = getVTList(Base.getValueType(), MVT::Other);
+  SDOperand Ops[] = { ST->getChain(), ST->getValue(), Base, Offset };
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
+  ID.AddInteger(AM);
+  ID.AddInteger(ST->isTruncatingStore());
+  ID.AddInteger(ST->getStoredVT());
+  ID.AddPointer(ST->getSrcValue());
+  ID.AddInteger(ST->getSrcValueOffset());
+  ID.AddInteger(ST->getAlignment());
+  ID.AddInteger(ST->isVolatile());
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDOperand(E, 0);
+  SDNode *N = new StoreSDNode(Ops, VTs, AM,
+                              ST->isTruncatingStore(), ST->getStoredVT(),
+                              ST->getSrcValue(), ST->getSrcValueOffset(),
+                              ST->getAlignment(), ST->isVolatile());
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::getVAArg(MVT::ValueType VT,
+                                 SDOperand Chain, SDOperand Ptr,
+                                 SDOperand SV) {
+  SDOperand Ops[] = { Chain, Ptr, SV };
+  return getNode(ISD::VAARG, getVTList(VT, MVT::Other), Ops, 3);
+}
+
+SDOperand SelectionDAG::getNode(unsigned Opcode, MVT::ValueType VT,
+                                const SDOperand *Ops, unsigned NumOps) {
+  switch (NumOps) {
+  case 0: return getNode(Opcode, VT);
+  case 1: return getNode(Opcode, VT, Ops[0]);
+  case 2: return getNode(Opcode, VT, Ops[0], Ops[1]);
+  case 3: return getNode(Opcode, VT, Ops[0], Ops[1], Ops[2]);
+  default: break;
+  }
+  
+  switch (Opcode) {
+  default: break;
+  case ISD::SELECT_CC: {
+    assert(NumOps == 5 && "SELECT_CC takes 5 operands!");
+    assert(Ops[0].getValueType() == Ops[1].getValueType() &&
+           "LHS and RHS of condition must have same type!");
+    assert(Ops[2].getValueType() == Ops[3].getValueType() &&
+           "True and False arms of SelectCC must have same type!");
+    assert(Ops[2].getValueType() == VT &&
+           "select_cc node must be of same type as true and false value!");
+    break;
+  }
+  case ISD::BR_CC: {
+    assert(NumOps == 5 && "BR_CC takes 5 operands!");
+    assert(Ops[2].getValueType() == Ops[3].getValueType() &&
+           "LHS/RHS of comparison should match types!");
+    break;
+  }
+  }
+
+  // Memoize nodes.
+  SDNode *N;
+  SDVTList VTs = getVTList(VT);
+  if (VT != MVT::Flag) {
+    FoldingSetNodeID ID;
+    AddNodeIDNode(ID, Opcode, VTs, Ops, NumOps);
+    void *IP = 0;
+    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+      return SDOperand(E, 0);
+    N = new SDNode(Opcode, VTs, Ops, NumOps);
+    CSEMap.InsertNode(N, IP);
+  } else {
+    N = new SDNode(Opcode, VTs, Ops, NumOps);
+  }
+  AllNodes.push_back(N);
+  return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::getNode(unsigned Opcode,
+                                std::vector<MVT::ValueType> &ResultTys,
+                                const SDOperand *Ops, unsigned NumOps) {
+  return getNode(Opcode, getNodeValueTypes(ResultTys), ResultTys.size(),
+                 Ops, NumOps);
+}
+
+SDOperand SelectionDAG::getNode(unsigned Opcode,
+                                const MVT::ValueType *VTs, unsigned NumVTs,
+                                const SDOperand *Ops, unsigned NumOps) {
+  if (NumVTs == 1)
+    return getNode(Opcode, VTs[0], Ops, NumOps);
+  return getNode(Opcode, makeVTList(VTs, NumVTs), Ops, NumOps);
+}  
+  
+SDOperand SelectionDAG::getNode(unsigned Opcode, SDVTList VTList,
+                                const SDOperand *Ops, unsigned NumOps) {
+  if (VTList.NumVTs == 1)
+    return getNode(Opcode, VTList.VTs[0], Ops, NumOps);
+
+  switch (Opcode) {
+  // FIXME: figure out how to safely handle things like
+  // int foo(int x) { return 1 << (x & 255); }
+  // int bar() { return foo(256); }
+#if 0
+  case ISD::SRA_PARTS:
+  case ISD::SRL_PARTS:
+  case ISD::SHL_PARTS:
+    if (N3.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+        cast<VTSDNode>(N3.getOperand(1))->getVT() != MVT::i1)
+      return getNode(Opcode, VT, N1, N2, N3.getOperand(0));
+    else if (N3.getOpcode() == ISD::AND)
+      if (ConstantSDNode *AndRHS = dyn_cast<ConstantSDNode>(N3.getOperand(1))) {
+        // If the and is only masking out bits that cannot effect the shift,
+        // eliminate the and.
+        unsigned NumBits = MVT::getSizeInBits(VT)*2;
+        if ((AndRHS->getValue() & (NumBits-1)) == NumBits-1)
+          return getNode(Opcode, VT, N1, N2, N3.getOperand(0));
+      }
+    break;
+#endif
+  }
+
+  // Memoize the node unless it returns a flag.
+  SDNode *N;
+  if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) {
+    FoldingSetNodeID ID;
+    AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
+    void *IP = 0;
+    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+      return SDOperand(E, 0);
+    if (NumOps == 1)
+      N = new UnarySDNode(Opcode, VTList, Ops[0]);
+    else if (NumOps == 2)
+      N = new BinarySDNode(Opcode, VTList, Ops[0], Ops[1]);
+    else if (NumOps == 3)
+      N = new TernarySDNode(Opcode, VTList, Ops[0], Ops[1], Ops[2]);
+    else
+      N = new SDNode(Opcode, VTList, Ops, NumOps);
+    CSEMap.InsertNode(N, IP);
+  } else {
+    if (NumOps == 1)
+      N = new UnarySDNode(Opcode, VTList, Ops[0]);
+    else if (NumOps == 2)
+      N = new BinarySDNode(Opcode, VTList, Ops[0], Ops[1]);
+    else if (NumOps == 3)
+      N = new TernarySDNode(Opcode, VTList, Ops[0], Ops[1], Ops[2]);
+    else
+      N = new SDNode(Opcode, VTList, Ops, NumOps);
+  }
+  AllNodes.push_back(N);
+  return SDOperand(N, 0);
+}
+
+SDVTList SelectionDAG::getVTList(MVT::ValueType VT) {
+  if (!MVT::isExtendedVT(VT))
+    return makeVTList(SDNode::getValueTypeList(VT), 1);
+
+  for (std::list<std::vector<MVT::ValueType> >::iterator I = VTList.begin(),
+       E = VTList.end(); I != E; ++I) {
+    if (I->size() == 1 && (*I)[0] == VT)
+      return makeVTList(&(*I)[0], 1);
+  }
+  std::vector<MVT::ValueType> V;
+  V.push_back(VT);
+  VTList.push_front(V);
+  return makeVTList(&(*VTList.begin())[0], 1);
+}
+
+SDVTList SelectionDAG::getVTList(MVT::ValueType VT1, MVT::ValueType VT2) {
+  for (std::list<std::vector<MVT::ValueType> >::iterator I = VTList.begin(),
+       E = VTList.end(); I != E; ++I) {
+    if (I->size() == 2 && (*I)[0] == VT1 && (*I)[1] == VT2)
+      return makeVTList(&(*I)[0], 2);
+  }
+  std::vector<MVT::ValueType> V;
+  V.push_back(VT1);
+  V.push_back(VT2);
+  VTList.push_front(V);
+  return makeVTList(&(*VTList.begin())[0], 2);
+}
+SDVTList SelectionDAG::getVTList(MVT::ValueType VT1, MVT::ValueType VT2,
+                                 MVT::ValueType VT3) {
+  for (std::list<std::vector<MVT::ValueType> >::iterator I = VTList.begin(),
+       E = VTList.end(); I != E; ++I) {
+    if (I->size() == 3 && (*I)[0] == VT1 && (*I)[1] == VT2 &&
+        (*I)[2] == VT3)
+      return makeVTList(&(*I)[0], 3);
+  }
+  std::vector<MVT::ValueType> V;
+  V.push_back(VT1);
+  V.push_back(VT2);
+  V.push_back(VT3);
+  VTList.push_front(V);
+  return makeVTList(&(*VTList.begin())[0], 3);
+}
+
+SDVTList SelectionDAG::getVTList(const MVT::ValueType *VTs, unsigned NumVTs) {
+  switch (NumVTs) {
+    case 0: assert(0 && "Cannot have nodes without results!");
+    case 1: return getVTList(VTs[0]);
+    case 2: return getVTList(VTs[0], VTs[1]);
+    case 3: return getVTList(VTs[0], VTs[1], VTs[2]);
+    default: break;
+  }
+
+  for (std::list<std::vector<MVT::ValueType> >::iterator I = VTList.begin(),
+       E = VTList.end(); I != E; ++I) {
+    if (I->size() != NumVTs || VTs[0] != (*I)[0] || VTs[1] != (*I)[1]) continue;
+   
+    bool NoMatch = false;
+    for (unsigned i = 2; i != NumVTs; ++i)
+      if (VTs[i] != (*I)[i]) {
+        NoMatch = true;
+        break;
+      }
+    if (!NoMatch)
+      return makeVTList(&*I->begin(), NumVTs);
+  }
+  
+  VTList.push_front(std::vector<MVT::ValueType>(VTs, VTs+NumVTs));
+  return makeVTList(&*VTList.begin()->begin(), NumVTs);
+}
+
+
+/// UpdateNodeOperands - *Mutate* the specified node in-place to have the
+/// specified operands.  If the resultant node already exists in the DAG,
+/// this does not modify the specified node, instead it returns the node that
+/// already exists.  If the resultant node does not exist in the DAG, the
+/// input node is returned.  As a degenerate case, if you specify the same
+/// input operands as the node already has, the input node is returned.
+SDOperand SelectionDAG::
+UpdateNodeOperands(SDOperand InN, SDOperand Op) {
+  SDNode *N = InN.Val;
+  assert(N->getNumOperands() == 1 && "Update with wrong number of operands");
+  
+  // Check to see if there is no change.
+  if (Op == N->getOperand(0)) return InN;
+  
+  // See if the modified node already exists.
+  void *InsertPos = 0;
+  if (SDNode *Existing = FindModifiedNodeSlot(N, Op, InsertPos))
+    return SDOperand(Existing, InN.ResNo);
+  
+  // Nope it doesn't.  Remove the node from it's current place in the maps.
+  if (InsertPos)
+    RemoveNodeFromCSEMaps(N);
+  
+  // Now we update the operands.
+  N->OperandList[0].Val->removeUser(N);
+  Op.Val->addUser(N);
+  N->OperandList[0] = Op;
+  
+  // If this gets put into a CSE map, add it.
+  if (InsertPos) CSEMap.InsertNode(N, InsertPos);
+  return InN;
+}
+
+SDOperand SelectionDAG::
+UpdateNodeOperands(SDOperand InN, SDOperand Op1, SDOperand Op2) {
+  SDNode *N = InN.Val;
+  assert(N->getNumOperands() == 2 && "Update with wrong number of operands");
+  
+  // Check to see if there is no change.
+  if (Op1 == N->getOperand(0) && Op2 == N->getOperand(1))
+    return InN;   // No operands changed, just return the input node.
+  
+  // See if the modified node already exists.
+  void *InsertPos = 0;
+  if (SDNode *Existing = FindModifiedNodeSlot(N, Op1, Op2, InsertPos))
+    return SDOperand(Existing, InN.ResNo);
+  
+  // Nope it doesn't.  Remove the node from it's current place in the maps.
+  if (InsertPos)
+    RemoveNodeFromCSEMaps(N);
+  
+  // Now we update the operands.
+  if (N->OperandList[0] != Op1) {
+    N->OperandList[0].Val->removeUser(N);
+    Op1.Val->addUser(N);
+    N->OperandList[0] = Op1;
+  }
+  if (N->OperandList[1] != Op2) {
+    N->OperandList[1].Val->removeUser(N);
+    Op2.Val->addUser(N);
+    N->OperandList[1] = Op2;
+  }
+  
+  // If this gets put into a CSE map, add it.
+  if (InsertPos) CSEMap.InsertNode(N, InsertPos);
+  return InN;
+}
+
+SDOperand SelectionDAG::
+UpdateNodeOperands(SDOperand N, SDOperand Op1, SDOperand Op2, SDOperand Op3) {
+  SDOperand Ops[] = { Op1, Op2, Op3 };
+  return UpdateNodeOperands(N, Ops, 3);
+}
+
+SDOperand SelectionDAG::
+UpdateNodeOperands(SDOperand N, SDOperand Op1, SDOperand Op2, 
+                   SDOperand Op3, SDOperand Op4) {
+  SDOperand Ops[] = { Op1, Op2, Op3, Op4 };
+  return UpdateNodeOperands(N, Ops, 4);
+}
+
+SDOperand SelectionDAG::
+UpdateNodeOperands(SDOperand N, SDOperand Op1, SDOperand Op2,
+                   SDOperand Op3, SDOperand Op4, SDOperand Op5) {
+  SDOperand Ops[] = { Op1, Op2, Op3, Op4, Op5 };
+  return UpdateNodeOperands(N, Ops, 5);
+}
+
+
+SDOperand SelectionDAG::
+UpdateNodeOperands(SDOperand InN, SDOperand *Ops, unsigned NumOps) {
+  SDNode *N = InN.Val;
+  assert(N->getNumOperands() == NumOps &&
+         "Update with wrong number of operands");
+  
+  // Check to see if there is no change.
+  bool AnyChange = false;
+  for (unsigned i = 0; i != NumOps; ++i) {
+    if (Ops[i] != N->getOperand(i)) {
+      AnyChange = true;
+      break;
+    }
+  }
+  
+  // No operands changed, just return the input node.
+  if (!AnyChange) return InN;
+  
+  // See if the modified node already exists.
+  void *InsertPos = 0;
+  if (SDNode *Existing = FindModifiedNodeSlot(N, Ops, NumOps, InsertPos))
+    return SDOperand(Existing, InN.ResNo);
+  
+  // Nope it doesn't.  Remove the node from it's current place in the maps.
+  if (InsertPos)
+    RemoveNodeFromCSEMaps(N);
+  
+  // Now we update the operands.
+  for (unsigned i = 0; i != NumOps; ++i) {
+    if (N->OperandList[i] != Ops[i]) {
+      N->OperandList[i].Val->removeUser(N);
+      Ops[i].Val->addUser(N);
+      N->OperandList[i] = Ops[i];
+    }
+  }
+
+  // If this gets put into a CSE map, add it.
+  if (InsertPos) CSEMap.InsertNode(N, InsertPos);
+  return InN;
+}
+
+
+/// MorphNodeTo - This frees the operands of the current node, resets the
+/// opcode, types, and operands to the specified value.  This should only be
+/// used by the SelectionDAG class.
+void SDNode::MorphNodeTo(unsigned Opc, SDVTList L,
+                         const SDOperand *Ops, unsigned NumOps) {
+  NodeType = Opc;
+  ValueList = L.VTs;
+  NumValues = L.NumVTs;
+  
+  // Clear the operands list, updating used nodes to remove this from their
+  // use list.
+  for (op_iterator I = op_begin(), E = op_end(); I != E; ++I)
+    I->Val->removeUser(this);
+  
+  // If NumOps is larger than the # of operands we currently have, reallocate
+  // the operand list.
+  if (NumOps > NumOperands) {
+    if (OperandsNeedDelete)
+      delete [] OperandList;
+    OperandList = new SDOperand[NumOps];
+    OperandsNeedDelete = true;
+  }
+  
+  // Assign the new operands.
+  NumOperands = NumOps;
+  
+  for (unsigned i = 0, e = NumOps; i != e; ++i) {
+    OperandList[i] = Ops[i];
+    SDNode *N = OperandList[i].Val;
+    N->Uses.push_back(this);
+  }
+}
+
+/// SelectNodeTo - These are used for target selectors to *mutate* the
+/// specified node to have the specified return type, Target opcode, and
+/// operands.  Note that target opcodes are stored as
+/// ISD::BUILTIN_OP_END+TargetOpcode in the node opcode field.
+///
+/// Note that SelectNodeTo returns the resultant node.  If there is already a
+/// node of the specified opcode and operands, it returns that node instead of
+/// the current one.
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned TargetOpc,
+                                   MVT::ValueType VT) {
+  SDVTList VTs = getVTList(VT);
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::BUILTIN_OP_END+TargetOpc, VTs, 0, 0);
+  void *IP = 0;
+  if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return ON;
+   
+  RemoveNodeFromCSEMaps(N);
+  
+  N->MorphNodeTo(ISD::BUILTIN_OP_END+TargetOpc, VTs, 0, 0);
+
+  CSEMap.InsertNode(N, IP);
+  return N;
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned TargetOpc,
+                                   MVT::ValueType VT, SDOperand Op1) {
+  // If an identical node already exists, use it.
+  SDVTList VTs = getVTList(VT);
+  SDOperand Ops[] = { Op1 };
+  
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::BUILTIN_OP_END+TargetOpc, VTs, Ops, 1);
+  void *IP = 0;
+  if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return ON;
+                                       
+  RemoveNodeFromCSEMaps(N);
+  N->MorphNodeTo(ISD::BUILTIN_OP_END+TargetOpc, VTs, Ops, 1);
+  CSEMap.InsertNode(N, IP);
+  return N;
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned TargetOpc,
+                                   MVT::ValueType VT, SDOperand Op1,
+                                   SDOperand Op2) {
+  // If an identical node already exists, use it.
+  SDVTList VTs = getVTList(VT);
+  SDOperand Ops[] = { Op1, Op2 };
+  
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::BUILTIN_OP_END+TargetOpc, VTs, Ops, 2);
+  void *IP = 0;
+  if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return ON;
+                                       
+  RemoveNodeFromCSEMaps(N);
+  
+  N->MorphNodeTo(ISD::BUILTIN_OP_END+TargetOpc, VTs, Ops, 2);
+  
+  CSEMap.InsertNode(N, IP);   // Memoize the new node.
+  return N;
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned TargetOpc,
+                                   MVT::ValueType VT, SDOperand Op1,
+                                   SDOperand Op2, SDOperand Op3) {
+  // If an identical node already exists, use it.
+  SDVTList VTs = getVTList(VT);
+  SDOperand Ops[] = { Op1, Op2, Op3 };
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::BUILTIN_OP_END+TargetOpc, VTs, Ops, 3);
+  void *IP = 0;
+  if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return ON;
+                                       
+  RemoveNodeFromCSEMaps(N);
+  
+  N->MorphNodeTo(ISD::BUILTIN_OP_END+TargetOpc, VTs, Ops, 3);
+
+  CSEMap.InsertNode(N, IP);   // Memoize the new node.
+  return N;
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned TargetOpc,
+                                   MVT::ValueType VT, const SDOperand *Ops,
+                                   unsigned NumOps) {
+  // If an identical node already exists, use it.
+  SDVTList VTs = getVTList(VT);
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::BUILTIN_OP_END+TargetOpc, VTs, Ops, NumOps);
+  void *IP = 0;
+  if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return ON;
+                                       
+  RemoveNodeFromCSEMaps(N);
+  N->MorphNodeTo(ISD::BUILTIN_OP_END+TargetOpc, VTs, Ops, NumOps);
+  
+  CSEMap.InsertNode(N, IP);   // Memoize the new node.
+  return N;
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned TargetOpc, 
+                                   MVT::ValueType VT1, MVT::ValueType VT2,
+                                   SDOperand Op1, SDOperand Op2) {
+  SDVTList VTs = getVTList(VT1, VT2);
+  FoldingSetNodeID ID;
+  SDOperand Ops[] = { Op1, Op2 };
+  AddNodeIDNode(ID, ISD::BUILTIN_OP_END+TargetOpc, VTs, Ops, 2);
+  void *IP = 0;
+  if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return ON;
+
+  RemoveNodeFromCSEMaps(N);
+  N->MorphNodeTo(ISD::BUILTIN_OP_END+TargetOpc, VTs, Ops, 2);
+  CSEMap.InsertNode(N, IP);   // Memoize the new node.
+  return N;
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned TargetOpc,
+                                   MVT::ValueType VT1, MVT::ValueType VT2,
+                                   SDOperand Op1, SDOperand Op2, 
+                                   SDOperand Op3) {
+  // If an identical node already exists, use it.
+  SDVTList VTs = getVTList(VT1, VT2);
+  SDOperand Ops[] = { Op1, Op2, Op3 };
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::BUILTIN_OP_END+TargetOpc, VTs, Ops, 3);
+  void *IP = 0;
+  if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return ON;
+
+  RemoveNodeFromCSEMaps(N);
+
+  N->MorphNodeTo(ISD::BUILTIN_OP_END+TargetOpc, VTs, Ops, 3);
+  CSEMap.InsertNode(N, IP);   // Memoize the new node.
+  return N;
+}
+
+
+/// getTargetNode - These are used for target selectors to create a new node
+/// with specified return type(s), target opcode, and operands.
+///
+/// Note that getTargetNode returns the resultant node.  If there is already a
+/// node of the specified opcode and operands, it returns that node instead of
+/// the current one.
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT) {
+  return getNode(ISD::BUILTIN_OP_END+Opcode, VT).Val;
+}
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT,
+                                    SDOperand Op1) {
+  return getNode(ISD::BUILTIN_OP_END+Opcode, VT, Op1).Val;
+}
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT,
+                                    SDOperand Op1, SDOperand Op2) {
+  return getNode(ISD::BUILTIN_OP_END+Opcode, VT, Op1, Op2).Val;
+}
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT,
+                                    SDOperand Op1, SDOperand Op2,
+                                    SDOperand Op3) {
+  return getNode(ISD::BUILTIN_OP_END+Opcode, VT, Op1, Op2, Op3).Val;
+}
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT,
+                                    const SDOperand *Ops, unsigned NumOps) {
+  return getNode(ISD::BUILTIN_OP_END+Opcode, VT, Ops, NumOps).Val;
+}
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT1,
+                                    MVT::ValueType VT2, SDOperand Op1) {
+  const MVT::ValueType *VTs = getNodeValueTypes(VT1, VT2);
+  return getNode(ISD::BUILTIN_OP_END+Opcode, VTs, 2, &Op1, 1).Val;
+}
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT1,
+                                    MVT::ValueType VT2, SDOperand Op1,
+                                    SDOperand Op2) {
+  const MVT::ValueType *VTs = getNodeValueTypes(VT1, VT2);
+  SDOperand Ops[] = { Op1, Op2 };
+  return getNode(ISD::BUILTIN_OP_END+Opcode, VTs, 2, Ops, 2).Val;
+}
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT1,
+                                    MVT::ValueType VT2, SDOperand Op1,
+                                    SDOperand Op2, SDOperand Op3) {
+  const MVT::ValueType *VTs = getNodeValueTypes(VT1, VT2);
+  SDOperand Ops[] = { Op1, Op2, Op3 };
+  return getNode(ISD::BUILTIN_OP_END+Opcode, VTs, 2, Ops, 3).Val;
+}
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT1, 
+                                    MVT::ValueType VT2,
+                                    const SDOperand *Ops, unsigned NumOps) {
+  const MVT::ValueType *VTs = getNodeValueTypes(VT1, VT2);
+  return getNode(ISD::BUILTIN_OP_END+Opcode, VTs, 2, Ops, NumOps).Val;
+}
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT1,
+                                    MVT::ValueType VT2, MVT::ValueType VT3,
+                                    SDOperand Op1, SDOperand Op2) {
+  const MVT::ValueType *VTs = getNodeValueTypes(VT1, VT2, VT3);
+  SDOperand Ops[] = { Op1, Op2 };
+  return getNode(ISD::BUILTIN_OP_END+Opcode, VTs, 3, Ops, 2).Val;
+}
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT1,
+                                    MVT::ValueType VT2, MVT::ValueType VT3,
+                                    SDOperand Op1, SDOperand Op2,
+                                    SDOperand Op3) {
+  const MVT::ValueType *VTs = getNodeValueTypes(VT1, VT2, VT3);
+  SDOperand Ops[] = { Op1, Op2, Op3 };
+  return getNode(ISD::BUILTIN_OP_END+Opcode, VTs, 3, Ops, 3).Val;
+}
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT1, 
+                                    MVT::ValueType VT2, MVT::ValueType VT3,
+                                    const SDOperand *Ops, unsigned NumOps) {
+  const MVT::ValueType *VTs = getNodeValueTypes(VT1, VT2, VT3);
+  return getNode(ISD::BUILTIN_OP_END+Opcode, VTs, 3, Ops, NumOps).Val;
+}
+
+/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
+/// This can cause recursive merging of nodes in the DAG.
+///
+/// This version assumes From/To have a single result value.
+///
+void SelectionDAG::ReplaceAllUsesWith(SDOperand FromN, SDOperand ToN,
+                                      std::vector<SDNode*> *Deleted) {
+  SDNode *From = FromN.Val, *To = ToN.Val;
+  assert(From->getNumValues() == 1 && To->getNumValues() == 1 &&
+         "Cannot replace with this method!");
+  assert(From != To && "Cannot replace uses of with self");
+  
+  while (!From->use_empty()) {
+    // Process users until they are all gone.
+    SDNode *U = *From->use_begin();
+    
+    // This node is about to morph, remove its old self from the CSE maps.
+    RemoveNodeFromCSEMaps(U);
+    
+    for (SDOperand *I = U->OperandList, *E = U->OperandList+U->NumOperands;
+         I != E; ++I)
+      if (I->Val == From) {
+        From->removeUser(U);
+        I->Val = To;
+        To->addUser(U);
+      }
+
+    // Now that we have modified U, add it back to the CSE maps.  If it already
+    // exists there, recursively merge the results together.
+    if (SDNode *Existing = AddNonLeafNodeToCSEMaps(U)) {
+      ReplaceAllUsesWith(U, Existing, Deleted);
+      // U is now dead.
+      if (Deleted) Deleted->push_back(U);
+      DeleteNodeNotInCSEMaps(U);
+    }
+  }
+}
+
+/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
+/// This can cause recursive merging of nodes in the DAG.
+///
+/// This version assumes From/To have matching types and numbers of result
+/// values.
+///
+void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To,
+                                      std::vector<SDNode*> *Deleted) {
+  assert(From != To && "Cannot replace uses of with self");
+  assert(From->getNumValues() == To->getNumValues() &&
+         "Cannot use this version of ReplaceAllUsesWith!");
+  if (From->getNumValues() == 1) {  // If possible, use the faster version.
+    ReplaceAllUsesWith(SDOperand(From, 0), SDOperand(To, 0), Deleted);
+    return;
+  }
+  
+  while (!From->use_empty()) {
+    // Process users until they are all gone.
+    SDNode *U = *From->use_begin();
+    
+    // This node is about to morph, remove its old self from the CSE maps.
+    RemoveNodeFromCSEMaps(U);
+    
+    for (SDOperand *I = U->OperandList, *E = U->OperandList+U->NumOperands;
+         I != E; ++I)
+      if (I->Val == From) {
+        From->removeUser(U);
+        I->Val = To;
+        To->addUser(U);
+      }
+        
+    // Now that we have modified U, add it back to the CSE maps.  If it already
+    // exists there, recursively merge the results together.
+    if (SDNode *Existing = AddNonLeafNodeToCSEMaps(U)) {
+      ReplaceAllUsesWith(U, Existing, Deleted);
+      // U is now dead.
+      if (Deleted) Deleted->push_back(U);
+      DeleteNodeNotInCSEMaps(U);
+    }
+  }
+}
+
+/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
+/// This can cause recursive merging of nodes in the DAG.
+///
+/// This version can replace From with any result values.  To must match the
+/// number and types of values returned by From.
+void SelectionDAG::ReplaceAllUsesWith(SDNode *From,
+                                      const SDOperand *To,
+                                      std::vector<SDNode*> *Deleted) {
+  if (From->getNumValues() == 1 && To[0].Val->getNumValues() == 1) {
+    // Degenerate case handled above.
+    ReplaceAllUsesWith(SDOperand(From, 0), To[0], Deleted);
+    return;
+  }
+
+  while (!From->use_empty()) {
+    // Process users until they are all gone.
+    SDNode *U = *From->use_begin();
+    
+    // This node is about to morph, remove its old self from the CSE maps.
+    RemoveNodeFromCSEMaps(U);
+    
+    for (SDOperand *I = U->OperandList, *E = U->OperandList+U->NumOperands;
+         I != E; ++I)
+      if (I->Val == From) {
+        const SDOperand &ToOp = To[I->ResNo];
+        From->removeUser(U);
+        *I = ToOp;
+        ToOp.Val->addUser(U);
+      }
+        
+    // Now that we have modified U, add it back to the CSE maps.  If it already
+    // exists there, recursively merge the results together.
+    if (SDNode *Existing = AddNonLeafNodeToCSEMaps(U)) {
+      ReplaceAllUsesWith(U, Existing, Deleted);
+      // U is now dead.
+      if (Deleted) Deleted->push_back(U);
+      DeleteNodeNotInCSEMaps(U);
+    }
+  }
+}
+
+/// ReplaceAllUsesOfValueWith - Replace any uses of From with To, leaving
+/// uses of other values produced by From.Val alone.  The Deleted vector is
+/// handled the same was as for ReplaceAllUsesWith.
+void SelectionDAG::ReplaceAllUsesOfValueWith(SDOperand From, SDOperand To,
+                                             std::vector<SDNode*> &Deleted) {
+  assert(From != To && "Cannot replace a value with itself");
+  // Handle the simple, trivial, case efficiently.
+  if (From.Val->getNumValues() == 1 && To.Val->getNumValues() == 1) {
+    ReplaceAllUsesWith(From, To, &Deleted);
+    return;
+  }
+  
+  // Get all of the users of From.Val.  We want these in a nice,
+  // deterministically ordered and uniqued set, so we use a SmallSetVector.
+  SmallSetVector<SDNode*, 16> Users(From.Val->use_begin(), From.Val->use_end());
+
+  while (!Users.empty()) {
+    // We know that this user uses some value of From.  If it is the right
+    // value, update it.
+    SDNode *User = Users.back();
+    Users.pop_back();
+    
+    for (SDOperand *Op = User->OperandList,
+         *E = User->OperandList+User->NumOperands; Op != E; ++Op) {
+      if (*Op == From) {
+        // Okay, we know this user needs to be updated.  Remove its old self
+        // from the CSE maps.
+        RemoveNodeFromCSEMaps(User);
+        
+        // Update all operands that match "From".
+        for (; Op != E; ++Op) {
+          if (*Op == From) {
+            From.Val->removeUser(User);
+            *Op = To;
+            To.Val->addUser(User);
+          }
+        }
+                   
+        // Now that we have modified User, add it back to the CSE maps.  If it
+        // already exists there, recursively merge the results together.
+        if (SDNode *Existing = AddNonLeafNodeToCSEMaps(User)) {
+          unsigned NumDeleted = Deleted.size();
+          ReplaceAllUsesWith(User, Existing, &Deleted);
+          
+          // User is now dead.
+          Deleted.push_back(User);
+          DeleteNodeNotInCSEMaps(User);
+          
+          // We have to be careful here, because ReplaceAllUsesWith could have
+          // deleted a user of From, which means there may be dangling pointers
+          // in the "Users" setvector.  Scan over the deleted node pointers and
+          // remove them from the setvector.
+          for (unsigned i = NumDeleted, e = Deleted.size(); i != e; ++i)
+            Users.remove(Deleted[i]);
+        }
+        break;   // Exit the operand scanning loop.
+      }
+    }
+  }
+}
+
+
+/// AssignNodeIds - Assign a unique node id for each node in the DAG based on
+/// their allnodes order. It returns the maximum id.
+unsigned SelectionDAG::AssignNodeIds() {
+  unsigned Id = 0;
+  for (allnodes_iterator I = allnodes_begin(), E = allnodes_end(); I != E; ++I){
+    SDNode *N = I;
+    N->setNodeId(Id++);
+  }
+  return Id;
+}
+
+/// AssignTopologicalOrder - Assign a unique node id for each node in the DAG
+/// based on their topological order. It returns the maximum id and a vector
+/// of the SDNodes* in assigned order by reference.
+unsigned SelectionDAG::AssignTopologicalOrder(std::vector<SDNode*> &TopOrder) {
+  unsigned DAGSize = AllNodes.size();
+  std::vector<unsigned> InDegree(DAGSize);
+  std::vector<SDNode*> Sources;
+
+  // Use a two pass approach to avoid using a std::map which is slow.
+  unsigned Id = 0;
+  for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ++I){
+    SDNode *N = I;
+    N->setNodeId(Id++);
+    unsigned Degree = N->use_size();
+    InDegree[N->getNodeId()] = Degree;
+    if (Degree == 0)
+      Sources.push_back(N);
+  }
+
+  TopOrder.clear();
+  while (!Sources.empty()) {
+    SDNode *N = Sources.back();
+    Sources.pop_back();
+    TopOrder.push_back(N);
+    for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I) {
+      SDNode *P = I->Val;
+      unsigned Degree = --InDegree[P->getNodeId()];
+      if (Degree == 0)
+        Sources.push_back(P);
+    }
+  }
+
+  // Second pass, assign the actual topological order as node ids.
+  Id = 0;
+  for (std::vector<SDNode*>::iterator TI = TopOrder.begin(),TE = TopOrder.end();
+       TI != TE; ++TI)
+    (*TI)->setNodeId(Id++);
+
+  return Id;
+}
+
+
+
+//===----------------------------------------------------------------------===//
+//                              SDNode Class
+//===----------------------------------------------------------------------===//
+
+// Out-of-line virtual method to give class a home.
+void SDNode::ANCHOR() {}
+void UnarySDNode::ANCHOR() {}
+void BinarySDNode::ANCHOR() {}
+void TernarySDNode::ANCHOR() {}
+void HandleSDNode::ANCHOR() {}
+void StringSDNode::ANCHOR() {}
+void ConstantSDNode::ANCHOR() {}
+void ConstantFPSDNode::ANCHOR() {}
+void GlobalAddressSDNode::ANCHOR() {}
+void FrameIndexSDNode::ANCHOR() {}
+void JumpTableSDNode::ANCHOR() {}
+void ConstantPoolSDNode::ANCHOR() {}
+void BasicBlockSDNode::ANCHOR() {}
+void SrcValueSDNode::ANCHOR() {}
+void RegisterSDNode::ANCHOR() {}
+void ExternalSymbolSDNode::ANCHOR() {}
+void CondCodeSDNode::ANCHOR() {}
+void VTSDNode::ANCHOR() {}
+void LoadSDNode::ANCHOR() {}
+void StoreSDNode::ANCHOR() {}
+
+HandleSDNode::~HandleSDNode() {
+  SDVTList VTs = { 0, 0 };
+  MorphNodeTo(ISD::HANDLENODE, VTs, 0, 0);  // Drops operand uses.
+}
+
+GlobalAddressSDNode::GlobalAddressSDNode(bool isTarget, const GlobalValue *GA,
+                                         MVT::ValueType VT, int o)
+  : SDNode(isa<GlobalVariable>(GA) &&
+           dyn_cast<GlobalVariable>(GA)->isThreadLocal() ?
+           // Thread Local
+           (isTarget ? ISD::TargetGlobalTLSAddress : ISD::GlobalTLSAddress) :
+           // Non Thread Local
+           (isTarget ? ISD::TargetGlobalAddress : ISD::GlobalAddress),
+           getSDVTList(VT)), Offset(o) {
+  TheGlobal = const_cast<GlobalValue*>(GA);
+}
+
+/// Profile - Gather unique data for the node.
+///
+void SDNode::Profile(FoldingSetNodeID &ID) {
+  AddNodeIDNode(ID, this);
+}
+
+/// getValueTypeList - Return a pointer to the specified value type.
+///
+MVT::ValueType *SDNode::getValueTypeList(MVT::ValueType VT) {
+  static MVT::ValueType VTs[MVT::LAST_VALUETYPE];
+  VTs[VT] = VT;
+  return &VTs[VT];
+}
+  
+/// hasNUsesOfValue - Return true if there are exactly NUSES uses of the
+/// indicated value.  This method ignores uses of other values defined by this
+/// operation.
+bool SDNode::hasNUsesOfValue(unsigned NUses, unsigned Value) const {
+  assert(Value < getNumValues() && "Bad value!");
+
+  // If there is only one value, this is easy.
+  if (getNumValues() == 1)
+    return use_size() == NUses;
+  if (Uses.size() < NUses) return false;
+
+  SDOperand TheValue(const_cast<SDNode *>(this), Value);
+
+  SmallPtrSet<SDNode*, 32> UsersHandled;
+
+  for (SDNode::use_iterator UI = Uses.begin(), E = Uses.end(); UI != E; ++UI) {
+    SDNode *User = *UI;
+    if (User->getNumOperands() == 1 ||
+        UsersHandled.insert(User))     // First time we've seen this?
+      for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i)
+        if (User->getOperand(i) == TheValue) {
+          if (NUses == 0)
+            return false;   // too many uses
+          --NUses;
+        }
+  }
+
+  // Found exactly the right number of uses?
+  return NUses == 0;
+}
+
+
+/// isOnlyUse - Return true if this node is the only use of N.
+///
+bool SDNode::isOnlyUse(SDNode *N) const {
+  bool Seen = false;
+  for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
+    SDNode *User = *I;
+    if (User == this)
+      Seen = true;
+    else
+      return false;
+  }
+
+  return Seen;
+}
+
+/// isOperand - Return true if this node is an operand of N.
+///
+bool SDOperand::isOperand(SDNode *N) const {
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+    if (*this == N->getOperand(i))
+      return true;
+  return false;
+}
+
+bool SDNode::isOperand(SDNode *N) const {
+  for (unsigned i = 0, e = N->NumOperands; i != e; ++i)
+    if (this == N->OperandList[i].Val)
+      return true;
+  return false;
+}
+
+static void findPredecessor(SDNode *N, const SDNode *P, bool &found,
+                            SmallPtrSet<SDNode *, 32> &Visited) {
+  if (found || !Visited.insert(N))
+    return;
+
+  for (unsigned i = 0, e = N->getNumOperands(); !found && i != e; ++i) {
+    SDNode *Op = N->getOperand(i).Val;
+    if (Op == P) {
+      found = true;
+      return;
+    }
+    findPredecessor(Op, P, found, Visited);
+  }
+}
+
+/// isPredecessor - Return true if this node is a predecessor of N. This node
+/// is either an operand of N or it can be reached by recursively traversing
+/// up the operands.
+/// NOTE: this is an expensive method. Use it carefully.
+bool SDNode::isPredecessor(SDNode *N) const {
+  SmallPtrSet<SDNode *, 32> Visited;
+  bool found = false;
+  findPredecessor(N, this, found, Visited);
+  return found;
+}
+
+uint64_t SDNode::getConstantOperandVal(unsigned Num) const {
+  assert(Num < NumOperands && "Invalid child # of SDNode!");
+  return cast<ConstantSDNode>(OperandList[Num])->getValue();
+}
+
+std::string SDNode::getOperationName(const SelectionDAG *G) const {
+  switch (getOpcode()) {
+  default:
+    if (getOpcode() < ISD::BUILTIN_OP_END)
+      return "<<Unknown DAG Node>>";
+    else {
+      if (G) {
+        if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo())
+          if (getOpcode()-ISD::BUILTIN_OP_END < TII->getNumOpcodes())
+            return TII->getName(getOpcode()-ISD::BUILTIN_OP_END);
+
+        TargetLowering &TLI = G->getTargetLoweringInfo();
+        const char *Name =
+          TLI.getTargetNodeName(getOpcode());
+        if (Name) return Name;
+      }
+
+      return "<<Unknown Target Node>>";
+    }
+   
+  case ISD::PCMARKER:      return "PCMarker";
+  case ISD::READCYCLECOUNTER: return "ReadCycleCounter";
+  case ISD::SRCVALUE:      return "SrcValue";
+  case ISD::EntryToken:    return "EntryToken";
+  case ISD::TokenFactor:   return "TokenFactor";
+  case ISD::AssertSext:    return "AssertSext";
+  case ISD::AssertZext:    return "AssertZext";
+
+  case ISD::STRING:        return "String";
+  case ISD::BasicBlock:    return "BasicBlock";
+  case ISD::VALUETYPE:     return "ValueType";
+  case ISD::Register:      return "Register";
+
+  case ISD::Constant:      return "Constant";
+  case ISD::ConstantFP:    return "ConstantFP";
+  case ISD::GlobalAddress: return "GlobalAddress";
+  case ISD::GlobalTLSAddress: return "GlobalTLSAddress";
+  case ISD::FrameIndex:    return "FrameIndex";
+  case ISD::JumpTable:     return "JumpTable";
+  case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE";
+  case ISD::RETURNADDR: return "RETURNADDR";
+  case ISD::FRAMEADDR: return "FRAMEADDR";
+  case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET";
+  case ISD::EXCEPTIONADDR: return "EXCEPTIONADDR";
+  case ISD::EHSELECTION: return "EHSELECTION";
+  case ISD::EH_RETURN: return "EH_RETURN";
+  case ISD::ConstantPool:  return "ConstantPool";
+  case ISD::ExternalSymbol: return "ExternalSymbol";
+  case ISD::INTRINSIC_WO_CHAIN: {
+    unsigned IID = cast<ConstantSDNode>(getOperand(0))->getValue();
+    return Intrinsic::getName((Intrinsic::ID)IID);
+  }
+  case ISD::INTRINSIC_VOID:
+  case ISD::INTRINSIC_W_CHAIN: {
+    unsigned IID = cast<ConstantSDNode>(getOperand(1))->getValue();
+    return Intrinsic::getName((Intrinsic::ID)IID);
+  }
+
+  case ISD::BUILD_VECTOR:   return "BUILD_VECTOR";
+  case ISD::TargetConstant: return "TargetConstant";
+  case ISD::TargetConstantFP:return "TargetConstantFP";
+  case ISD::TargetGlobalAddress: return "TargetGlobalAddress";
+  case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress";
+  case ISD::TargetFrameIndex: return "TargetFrameIndex";
+  case ISD::TargetJumpTable:  return "TargetJumpTable";
+  case ISD::TargetConstantPool:  return "TargetConstantPool";
+  case ISD::TargetExternalSymbol: return "TargetExternalSymbol";
+
+  case ISD::CopyToReg:     return "CopyToReg";
+  case ISD::CopyFromReg:   return "CopyFromReg";
+  case ISD::UNDEF:         return "undef";
+  case ISD::MERGE_VALUES:  return "merge_values";
+  case ISD::INLINEASM:     return "inlineasm";
+  case ISD::LABEL:         return "label";
+  case ISD::HANDLENODE:    return "handlenode";
+  case ISD::FORMAL_ARGUMENTS: return "formal_arguments";
+  case ISD::CALL:          return "call";
+    
+  // Unary operators
+  case ISD::FABS:   return "fabs";
+  case ISD::FNEG:   return "fneg";
+  case ISD::FSQRT:  return "fsqrt";
+  case ISD::FSIN:   return "fsin";
+  case ISD::FCOS:   return "fcos";
+  case ISD::FPOWI:  return "fpowi";
+
+  // Binary operators
+  case ISD::ADD:    return "add";
+  case ISD::SUB:    return "sub";
+  case ISD::MUL:    return "mul";
+  case ISD::MULHU:  return "mulhu";
+  case ISD::MULHS:  return "mulhs";
+  case ISD::SDIV:   return "sdiv";
+  case ISD::UDIV:   return "udiv";
+  case ISD::SREM:   return "srem";
+  case ISD::UREM:   return "urem";
+  case ISD::AND:    return "and";
+  case ISD::OR:     return "or";
+  case ISD::XOR:    return "xor";
+  case ISD::SHL:    return "shl";
+  case ISD::SRA:    return "sra";
+  case ISD::SRL:    return "srl";
+  case ISD::ROTL:   return "rotl";
+  case ISD::ROTR:   return "rotr";
+  case ISD::FADD:   return "fadd";
+  case ISD::FSUB:   return "fsub";
+  case ISD::FMUL:   return "fmul";
+  case ISD::FDIV:   return "fdiv";
+  case ISD::FREM:   return "frem";
+  case ISD::FCOPYSIGN: return "fcopysign";
+
+  case ISD::SETCC:       return "setcc";
+  case ISD::SELECT:      return "select";
+  case ISD::SELECT_CC:   return "select_cc";
+  case ISD::INSERT_VECTOR_ELT:   return "insert_vector_elt";
+  case ISD::EXTRACT_VECTOR_ELT:  return "extract_vector_elt";
+  case ISD::CONCAT_VECTORS:      return "concat_vectors";
+  case ISD::EXTRACT_SUBVECTOR:   return "extract_subvector";
+  case ISD::SCALAR_TO_VECTOR:    return "scalar_to_vector";
+  case ISD::VECTOR_SHUFFLE:      return "vector_shuffle";
+  case ISD::CARRY_FALSE:         return "carry_false";
+  case ISD::ADDC:        return "addc";
+  case ISD::ADDE:        return "adde";
+  case ISD::SUBC:        return "subc";
+  case ISD::SUBE:        return "sube";
+  case ISD::SHL_PARTS:   return "shl_parts";
+  case ISD::SRA_PARTS:   return "sra_parts";
+  case ISD::SRL_PARTS:   return "srl_parts";
+
+  // Conversion operators.
+  case ISD::SIGN_EXTEND: return "sign_extend";
+  case ISD::ZERO_EXTEND: return "zero_extend";
+  case ISD::ANY_EXTEND:  return "any_extend";
+  case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg";
+  case ISD::TRUNCATE:    return "truncate";
+  case ISD::FP_ROUND:    return "fp_round";
+  case ISD::FP_ROUND_INREG: return "fp_round_inreg";
+  case ISD::FP_EXTEND:   return "fp_extend";
+
+  case ISD::SINT_TO_FP:  return "sint_to_fp";
+  case ISD::UINT_TO_FP:  return "uint_to_fp";
+  case ISD::FP_TO_SINT:  return "fp_to_sint";
+  case ISD::FP_TO_UINT:  return "fp_to_uint";
+  case ISD::BIT_CONVERT: return "bit_convert";
+
+    // Control flow instructions
+  case ISD::BR:      return "br";
+  case ISD::BRIND:   return "brind";
+  case ISD::BR_JT:   return "br_jt";
+  case ISD::BRCOND:  return "brcond";
+  case ISD::BR_CC:   return "br_cc";
+  case ISD::RET:     return "ret";
+  case ISD::CALLSEQ_START:  return "callseq_start";
+  case ISD::CALLSEQ_END:    return "callseq_end";
+
+    // Other operators
+  case ISD::LOAD:               return "load";
+  case ISD::STORE:              return "store";
+  case ISD::VAARG:              return "vaarg";
+  case ISD::VACOPY:             return "vacopy";
+  case ISD::VAEND:              return "vaend";
+  case ISD::VASTART:            return "vastart";
+  case ISD::DYNAMIC_STACKALLOC: return "dynamic_stackalloc";
+  case ISD::EXTRACT_ELEMENT:    return "extract_element";
+  case ISD::BUILD_PAIR:         return "build_pair";
+  case ISD::STACKSAVE:          return "stacksave";
+  case ISD::STACKRESTORE:       return "stackrestore";
+    
+  // Block memory operations.
+  case ISD::MEMSET:  return "memset";
+  case ISD::MEMCPY:  return "memcpy";
+  case ISD::MEMMOVE: return "memmove";
+
+  // Bit manipulation
+  case ISD::BSWAP:   return "bswap";
+  case ISD::CTPOP:   return "ctpop";
+  case ISD::CTTZ:    return "cttz";
+  case ISD::CTLZ:    return "ctlz";
+
+  // Debug info
+  case ISD::LOCATION: return "location";
+  case ISD::DEBUG_LOC: return "debug_loc";
+
+  case ISD::CONDCODE:
+    switch (cast<CondCodeSDNode>(this)->get()) {
+    default: assert(0 && "Unknown setcc condition!");
+    case ISD::SETOEQ:  return "setoeq";
+    case ISD::SETOGT:  return "setogt";
+    case ISD::SETOGE:  return "setoge";
+    case ISD::SETOLT:  return "setolt";
+    case ISD::SETOLE:  return "setole";
+    case ISD::SETONE:  return "setone";
+
+    case ISD::SETO:    return "seto";
+    case ISD::SETUO:   return "setuo";
+    case ISD::SETUEQ:  return "setue";
+    case ISD::SETUGT:  return "setugt";
+    case ISD::SETUGE:  return "setuge";
+    case ISD::SETULT:  return "setult";
+    case ISD::SETULE:  return "setule";
+    case ISD::SETUNE:  return "setune";
+
+    case ISD::SETEQ:   return "seteq";
+    case ISD::SETGT:   return "setgt";
+    case ISD::SETGE:   return "setge";
+    case ISD::SETLT:   return "setlt";
+    case ISD::SETLE:   return "setle";
+    case ISD::SETNE:   return "setne";
+    }
+  }
+}
+
+const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) {
+  switch (AM) {
+  default:
+    return "";
+  case ISD::PRE_INC:
+    return "<pre-inc>";
+  case ISD::PRE_DEC:
+    return "<pre-dec>";
+  case ISD::POST_INC:
+    return "<post-inc>";
+  case ISD::POST_DEC:
+    return "<post-dec>";
+  }
+}
+
+void SDNode::dump() const { dump(0); }
+void SDNode::dump(const SelectionDAG *G) const {
+  cerr << (void*)this << ": ";
+
+  for (unsigned i = 0, e = getNumValues(); i != e; ++i) {
+    if (i) cerr << ",";
+    if (getValueType(i) == MVT::Other)
+      cerr << "ch";
+    else
+      cerr << MVT::getValueTypeString(getValueType(i));
+  }
+  cerr << " = " << getOperationName(G);
+
+  cerr << " ";
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    if (i) cerr << ", ";
+    cerr << (void*)getOperand(i).Val;
+    if (unsigned RN = getOperand(i).ResNo)
+      cerr << ":" << RN;
+  }
+
+  if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) {
+    cerr << "<" << CSDN->getValue() << ">";
+  } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) {
+    cerr << "<" << CSDN->getValue() << ">";
+  } else if (const GlobalAddressSDNode *GADN =
+             dyn_cast<GlobalAddressSDNode>(this)) {
+    int offset = GADN->getOffset();
+    cerr << "<";
+    WriteAsOperand(*cerr.stream(), GADN->getGlobal()) << ">";
+    if (offset > 0)
+      cerr << " + " << offset;
+    else
+      cerr << " " << offset;
+  } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(this)) {
+    cerr << "<" << FIDN->getIndex() << ">";
+  } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(this)) {
+    cerr << "<" << JTDN->getIndex() << ">";
+  } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(this)){
+    int offset = CP->getOffset();
+    if (CP->isMachineConstantPoolEntry())
+      cerr << "<" << *CP->getMachineCPVal() << ">";
+    else
+      cerr << "<" << *CP->getConstVal() << ">";
+    if (offset > 0)
+      cerr << " + " << offset;
+    else
+      cerr << " " << offset;
+  } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) {
+    cerr << "<";
+    const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock();
+    if (LBB)
+      cerr << LBB->getName() << " ";
+    cerr << (const void*)BBDN->getBasicBlock() << ">";
+  } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) {
+    if (G && R->getReg() && MRegisterInfo::isPhysicalRegister(R->getReg())) {
+      cerr << " " <<G->getTarget().getRegisterInfo()->getName(R->getReg());
+    } else {
+      cerr << " #" << R->getReg();
+    }
+  } else if (const ExternalSymbolSDNode *ES =
+             dyn_cast<ExternalSymbolSDNode>(this)) {
+    cerr << "'" << ES->getSymbol() << "'";
+  } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(this)) {
+    if (M->getValue())
+      cerr << "<" << M->getValue() << ":" << M->getOffset() << ">";
+    else
+      cerr << "<null:" << M->getOffset() << ">";
+  } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) {
+    cerr << ":" << MVT::getValueTypeString(N->getVT());
+  } else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) {
+    bool doExt = true;
+    switch (LD->getExtensionType()) {
+    default: doExt = false; break;
+    case ISD::EXTLOAD:
+      cerr << " <anyext ";
+      break;
+    case ISD::SEXTLOAD:
+      cerr << " <sext ";
+      break;
+    case ISD::ZEXTLOAD:
+      cerr << " <zext ";
+      break;
+    }
+    if (doExt)
+      cerr << MVT::getValueTypeString(LD->getLoadedVT()) << ">";
+
+    const char *AM = getIndexedModeName(LD->getAddressingMode());
+    if (AM != "")
+      cerr << " " << AM;
+  } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) {
+    if (ST->isTruncatingStore())
+      cerr << " <trunc "
+           << MVT::getValueTypeString(ST->getStoredVT()) << ">";
+
+    const char *AM = getIndexedModeName(ST->getAddressingMode());
+    if (AM != "")
+      cerr << " " << AM;
+  }
+}
+
+static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+    if (N->getOperand(i).Val->hasOneUse())
+      DumpNodes(N->getOperand(i).Val, indent+2, G);
+    else
+      cerr << "\n" << std::string(indent+2, ' ')
+           << (void*)N->getOperand(i).Val << ": <multiple use>";
+
+
+  cerr << "\n" << std::string(indent, ' ');
+  N->dump(G);
+}
+
+void SelectionDAG::dump() const {
+  cerr << "SelectionDAG has " << AllNodes.size() << " nodes:";
+  std::vector<const SDNode*> Nodes;
+  for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end();
+       I != E; ++I)
+    Nodes.push_back(I);
+  
+  std::sort(Nodes.begin(), Nodes.end());
+
+  for (unsigned i = 0, e = Nodes.size(); i != e; ++i) {
+    if (!Nodes[i]->hasOneUse() && Nodes[i] != getRoot().Val)
+      DumpNodes(Nodes[i], 2, this);
+  }
+
+  if (getRoot().Val) DumpNodes(getRoot().Val, 2, this);
+
+  cerr << "\n\n";
+}
+
+const Type *ConstantPoolSDNode::getType() const {
+  if (isMachineConstantPoolEntry())
+    return Val.MachineCPVal->getType();
+  return Val.ConstVal->getType();
+}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
new file mode 100644
index 0000000..9256faa
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -0,0 +1,4904 @@
+//===-- SelectionDAGISel.cpp - Implement the SelectionDAGISel class -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAGISel class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "isel"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Constants.h"
+#include "llvm/CallingConv.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/ParameterAttributes.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SSARegMap.h"
+#include "llvm/Target/MRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include <algorithm>
+using namespace llvm;
+
+#ifndef NDEBUG
+static cl::opt<bool>
+ViewISelDAGs("view-isel-dags", cl::Hidden,
+          cl::desc("Pop up a window to show isel dags as they are selected"));
+static cl::opt<bool>
+ViewSchedDAGs("view-sched-dags", cl::Hidden,
+          cl::desc("Pop up a window to show sched dags as they are processed"));
+#else
+static const bool ViewISelDAGs = 0, ViewSchedDAGs = 0;
+#endif
+
+//===---------------------------------------------------------------------===//
+///
+/// RegisterScheduler class - Track the registration of instruction schedulers.
+///
+//===---------------------------------------------------------------------===//
+MachinePassRegistry RegisterScheduler::Registry;
+
+//===---------------------------------------------------------------------===//
+///
+/// ISHeuristic command line option for instruction schedulers.
+///
+//===---------------------------------------------------------------------===//
+namespace {
+  cl::opt<RegisterScheduler::FunctionPassCtor, false,
+          RegisterPassParser<RegisterScheduler> >
+  ISHeuristic("pre-RA-sched",
+              cl::init(&createDefaultScheduler),
+              cl::desc("Instruction schedulers available (before register allocation):"));
+
+  static RegisterScheduler
+  defaultListDAGScheduler("default", "  Best scheduler for the target",
+                          createDefaultScheduler);
+} // namespace
+
+namespace { struct AsmOperandInfo; }
+
+namespace {
+  /// RegsForValue - This struct represents the physical registers that a
+  /// particular value is assigned and the type information about the value.
+  /// This is needed because values can be promoted into larger registers and
+  /// expanded into multiple smaller registers than the value.
+  struct VISIBILITY_HIDDEN RegsForValue {
+    /// Regs - This list holds the register (for legal and promoted values)
+    /// or register set (for expanded values) that the value should be assigned
+    /// to.
+    std::vector<unsigned> Regs;
+    
+    /// RegVT - The value type of each register.
+    ///
+    MVT::ValueType RegVT;
+    
+    /// ValueVT - The value type of the LLVM value, which may be promoted from
+    /// RegVT or made from merging the two expanded parts.
+    MVT::ValueType ValueVT;
+    
+    RegsForValue() : RegVT(MVT::Other), ValueVT(MVT::Other) {}
+    
+    RegsForValue(unsigned Reg, MVT::ValueType regvt, MVT::ValueType valuevt)
+      : RegVT(regvt), ValueVT(valuevt) {
+        Regs.push_back(Reg);
+    }
+    RegsForValue(const std::vector<unsigned> &regs, 
+                 MVT::ValueType regvt, MVT::ValueType valuevt)
+      : Regs(regs), RegVT(regvt), ValueVT(valuevt) {
+    }
+    
+    /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
+    /// this value and returns the result as a ValueVT value.  This uses 
+    /// Chain/Flag as the input and updates them for the output Chain/Flag.
+    /// If the Flag pointer is NULL, no flag is used.
+    SDOperand getCopyFromRegs(SelectionDAG &DAG,
+                              SDOperand &Chain, SDOperand *Flag) const;
+
+    /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
+    /// specified value into the registers specified by this object.  This uses 
+    /// Chain/Flag as the input and updates them for the output Chain/Flag.
+    /// If the Flag pointer is NULL, no flag is used.
+    void getCopyToRegs(SDOperand Val, SelectionDAG &DAG,
+                       SDOperand &Chain, SDOperand *Flag) const;
+    
+    /// AddInlineAsmOperands - Add this value to the specified inlineasm node
+    /// operand list.  This adds the code marker and includes the number of 
+    /// values added into it.
+    void AddInlineAsmOperands(unsigned Code, SelectionDAG &DAG,
+                              std::vector<SDOperand> &Ops) const;
+  };
+}
+
+namespace llvm {
+  //===--------------------------------------------------------------------===//
+  /// createDefaultScheduler - This creates an instruction scheduler appropriate
+  /// for the target.
+  ScheduleDAG* createDefaultScheduler(SelectionDAGISel *IS,
+                                      SelectionDAG *DAG,
+                                      MachineBasicBlock *BB) {
+    TargetLowering &TLI = IS->getTargetLowering();
+    
+    if (TLI.getSchedulingPreference() == TargetLowering::SchedulingForLatency) {
+      return createTDListDAGScheduler(IS, DAG, BB);
+    } else {
+      assert(TLI.getSchedulingPreference() ==
+           TargetLowering::SchedulingForRegPressure && "Unknown sched type!");
+      return createBURRListDAGScheduler(IS, DAG, BB);
+    }
+  }
+
+
+  //===--------------------------------------------------------------------===//
+  /// FunctionLoweringInfo - This contains information that is global to a
+  /// function that is used when lowering a region of the function.
+  class FunctionLoweringInfo {
+  public:
+    TargetLowering &TLI;
+    Function &Fn;
+    MachineFunction &MF;
+    SSARegMap *RegMap;
+
+    FunctionLoweringInfo(TargetLowering &TLI, Function &Fn,MachineFunction &MF);
+
+    /// MBBMap - A mapping from LLVM basic blocks to their machine code entry.
+    std::map<const BasicBlock*, MachineBasicBlock *> MBBMap;
+
+    /// ValueMap - Since we emit code for the function a basic block at a time,
+    /// we must remember which virtual registers hold the values for
+    /// cross-basic-block values.
+    DenseMap<const Value*, unsigned> ValueMap;
+
+    /// StaticAllocaMap - Keep track of frame indices for fixed sized allocas in
+    /// the entry block.  This allows the allocas to be efficiently referenced
+    /// anywhere in the function.
+    std::map<const AllocaInst*, int> StaticAllocaMap;
+
+#ifndef NDEBUG
+    SmallSet<Instruction*, 8> CatchInfoLost;
+    SmallSet<Instruction*, 8> CatchInfoFound;
+#endif
+
+    unsigned MakeReg(MVT::ValueType VT) {
+      return RegMap->createVirtualRegister(TLI.getRegClassFor(VT));
+    }
+    
+    /// isExportedInst - Return true if the specified value is an instruction
+    /// exported from its block.
+    bool isExportedInst(const Value *V) {
+      return ValueMap.count(V);
+    }
+
+    unsigned CreateRegForValue(const Value *V);
+    
+    unsigned InitializeRegForValue(const Value *V) {
+      unsigned &R = ValueMap[V];
+      assert(R == 0 && "Already initialized this value register!");
+      return R = CreateRegForValue(V);
+    }
+  };
+}
+
+/// isSelector - Return true if this instruction is a call to the
+/// eh.selector intrinsic.
+static bool isSelector(Instruction *I) {
+  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+    return II->getIntrinsicID() == Intrinsic::eh_selector;
+  return false;
+}
+
+/// isUsedOutsideOfDefiningBlock - Return true if this instruction is used by
+/// PHI nodes or outside of the basic block that defines it, or used by a 
+/// switch instruction, which may expand to multiple basic blocks.
+static bool isUsedOutsideOfDefiningBlock(Instruction *I) {
+  if (isa<PHINode>(I)) return true;
+  BasicBlock *BB = I->getParent();
+  for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI)
+    if (cast<Instruction>(*UI)->getParent() != BB || isa<PHINode>(*UI) ||
+        // FIXME: Remove switchinst special case.
+        isa<SwitchInst>(*UI))
+      return true;
+  return false;
+}
+
+/// isOnlyUsedInEntryBlock - If the specified argument is only used in the
+/// entry block, return true.  This includes arguments used by switches, since
+/// the switch may expand into multiple basic blocks.
+static bool isOnlyUsedInEntryBlock(Argument *A) {
+  BasicBlock *Entry = A->getParent()->begin();
+  for (Value::use_iterator UI = A->use_begin(), E = A->use_end(); UI != E; ++UI)
+    if (cast<Instruction>(*UI)->getParent() != Entry || isa<SwitchInst>(*UI))
+      return false;  // Use not in entry block.
+  return true;
+}
+
+FunctionLoweringInfo::FunctionLoweringInfo(TargetLowering &tli,
+                                           Function &fn, MachineFunction &mf)
+    : TLI(tli), Fn(fn), MF(mf), RegMap(MF.getSSARegMap()) {
+
+  // Create a vreg for each argument register that is not dead and is used
+  // outside of the entry block for the function.
+  for (Function::arg_iterator AI = Fn.arg_begin(), E = Fn.arg_end();
+       AI != E; ++AI)
+    if (!isOnlyUsedInEntryBlock(AI))
+      InitializeRegForValue(AI);
+
+  // Initialize the mapping of values to registers.  This is only set up for
+  // instruction values that are used outside of the block that defines
+  // them.
+  Function::iterator BB = Fn.begin(), EB = Fn.end();
+  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+    if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
+      if (ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) {
+        const Type *Ty = AI->getAllocatedType();
+        uint64_t TySize = TLI.getTargetData()->getTypeSize(Ty);
+        unsigned Align = 
+          std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty),
+                   AI->getAlignment());
+
+        TySize *= CUI->getZExtValue();   // Get total allocated size.
+        if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects.
+        StaticAllocaMap[AI] =
+          MF.getFrameInfo()->CreateStackObject(TySize, Align);
+      }
+
+  for (; BB != EB; ++BB)
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+      if (!I->use_empty() && isUsedOutsideOfDefiningBlock(I))
+        if (!isa<AllocaInst>(I) ||
+            !StaticAllocaMap.count(cast<AllocaInst>(I)))
+          InitializeRegForValue(I);
+
+  // Create an initial MachineBasicBlock for each LLVM BasicBlock in F.  This
+  // also creates the initial PHI MachineInstrs, though none of the input
+  // operands are populated.
+  for (BB = Fn.begin(), EB = Fn.end(); BB != EB; ++BB) {
+    MachineBasicBlock *MBB = new MachineBasicBlock(BB);
+    MBBMap[BB] = MBB;
+    MF.getBasicBlockList().push_back(MBB);
+
+    // Create Machine PHI nodes for LLVM PHI nodes, lowering them as
+    // appropriate.
+    PHINode *PN;
+    for (BasicBlock::iterator I = BB->begin();(PN = dyn_cast<PHINode>(I)); ++I){
+      if (PN->use_empty()) continue;
+      
+      MVT::ValueType VT = TLI.getValueType(PN->getType());
+      unsigned NumRegisters = TLI.getNumRegisters(VT);
+      unsigned PHIReg = ValueMap[PN];
+      assert(PHIReg && "PHI node does not have an assigned virtual register!");
+      const TargetInstrInfo *TII = TLI.getTargetMachine().getInstrInfo();
+      for (unsigned i = 0; i != NumRegisters; ++i)
+        BuildMI(MBB, TII->get(TargetInstrInfo::PHI), PHIReg+i);
+    }
+  }
+}
+
+/// CreateRegForValue - Allocate the appropriate number of virtual registers of
+/// the correctly promoted or expanded types.  Assign these registers
+/// consecutive vreg numbers and return the first assigned number.
+unsigned FunctionLoweringInfo::CreateRegForValue(const Value *V) {
+  MVT::ValueType VT = TLI.getValueType(V->getType());
+  
+  unsigned NumRegisters = TLI.getNumRegisters(VT);
+  MVT::ValueType RegisterVT = TLI.getRegisterType(VT);
+
+  unsigned R = MakeReg(RegisterVT);
+  for (unsigned i = 1; i != NumRegisters; ++i)
+    MakeReg(RegisterVT);
+
+  return R;
+}
+
+//===----------------------------------------------------------------------===//
+/// SelectionDAGLowering - This is the common target-independent lowering
+/// implementation that is parameterized by a TargetLowering object.
+/// Also, targets can overload any lowering method.
+///
+namespace llvm {
+class SelectionDAGLowering {
+  MachineBasicBlock *CurMBB;
+
+  DenseMap<const Value*, SDOperand> NodeMap;
+
+  /// PendingLoads - Loads are not emitted to the program immediately.  We bunch
+  /// them up and then emit token factor nodes when possible.  This allows us to
+  /// get simple disambiguation between loads without worrying about alias
+  /// analysis.
+  std::vector<SDOperand> PendingLoads;
+
+  /// Case - A struct to record the Value for a switch case, and the
+  /// case's target basic block.
+  struct Case {
+    Constant* Low;
+    Constant* High;
+    MachineBasicBlock* BB;
+
+    Case() : Low(0), High(0), BB(0) { }
+    Case(Constant* low, Constant* high, MachineBasicBlock* bb) :
+      Low(low), High(high), BB(bb) { }
+    uint64_t size() const {
+      uint64_t rHigh = cast<ConstantInt>(High)->getSExtValue();
+      uint64_t rLow  = cast<ConstantInt>(Low)->getSExtValue();
+      return (rHigh - rLow + 1ULL);
+    }
+  };
+
+  struct CaseBits {
+    uint64_t Mask;
+    MachineBasicBlock* BB;
+    unsigned Bits;
+
+    CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits):
+      Mask(mask), BB(bb), Bits(bits) { }
+  };
+
+  typedef std::vector<Case>           CaseVector;
+  typedef std::vector<CaseBits>       CaseBitsVector;
+  typedef CaseVector::iterator        CaseItr;
+  typedef std::pair<CaseItr, CaseItr> CaseRange;
+
+  /// CaseRec - A struct with ctor used in lowering switches to a binary tree
+  /// of conditional branches.
+  struct CaseRec {
+    CaseRec(MachineBasicBlock *bb, Constant *lt, Constant *ge, CaseRange r) :
+    CaseBB(bb), LT(lt), GE(ge), Range(r) {}
+
+    /// CaseBB - The MBB in which to emit the compare and branch
+    MachineBasicBlock *CaseBB;
+    /// LT, GE - If nonzero, we know the current case value must be less-than or
+    /// greater-than-or-equal-to these Constants.
+    Constant *LT;
+    Constant *GE;
+    /// Range - A pair of iterators representing the range of case values to be
+    /// processed at this point in the binary search tree.
+    CaseRange Range;
+  };
+
+  typedef std::vector<CaseRec> CaseRecVector;
+
+  /// The comparison function for sorting the switch case values in the vector.
+  /// WARNING: Case ranges should be disjoint!
+  struct CaseCmp {
+    bool operator () (const Case& C1, const Case& C2) {
+      assert(isa<ConstantInt>(C1.Low) && isa<ConstantInt>(C2.High));
+      const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low);
+      const ConstantInt* CI2 = cast<const ConstantInt>(C2.High);
+      return CI1->getValue().slt(CI2->getValue());
+    }
+  };
+
+  struct CaseBitsCmp {
+    bool operator () (const CaseBits& C1, const CaseBits& C2) {
+      return C1.Bits > C2.Bits;
+    }
+  };
+
+  unsigned Clusterify(CaseVector& Cases, const SwitchInst &SI);
+  
+public:
+  // TLI - This is information that describes the available target features we
+  // need for lowering.  This indicates when operations are unavailable,
+  // implemented with a libcall, etc.
+  TargetLowering &TLI;
+  SelectionDAG &DAG;
+  const TargetData *TD;
+
+  /// SwitchCases - Vector of CaseBlock structures used to communicate
+  /// SwitchInst code generation information.
+  std::vector<SelectionDAGISel::CaseBlock> SwitchCases;
+  /// JTCases - Vector of JumpTable structures used to communicate
+  /// SwitchInst code generation information.
+  std::vector<SelectionDAGISel::JumpTableBlock> JTCases;
+  std::vector<SelectionDAGISel::BitTestBlock> BitTestCases;
+  
+  /// FuncInfo - Information about the function as a whole.
+  ///
+  FunctionLoweringInfo &FuncInfo;
+
+  SelectionDAGLowering(SelectionDAG &dag, TargetLowering &tli,
+                       FunctionLoweringInfo &funcinfo)
+    : TLI(tli), DAG(dag), TD(DAG.getTarget().getTargetData()),
+      FuncInfo(funcinfo) {
+  }
+
+  /// getRoot - Return the current virtual root of the Selection DAG.
+  ///
+  SDOperand getRoot() {
+    if (PendingLoads.empty())
+      return DAG.getRoot();
+
+    if (PendingLoads.size() == 1) {
+      SDOperand Root = PendingLoads[0];
+      DAG.setRoot(Root);
+      PendingLoads.clear();
+      return Root;
+    }
+
+    // Otherwise, we have to make a token factor node.
+    SDOperand Root = DAG.getNode(ISD::TokenFactor, MVT::Other,
+                                 &PendingLoads[0], PendingLoads.size());
+    PendingLoads.clear();
+    DAG.setRoot(Root);
+    return Root;
+  }
+
+  SDOperand CopyValueToVirtualRegister(Value *V, unsigned Reg);
+
+  void visit(Instruction &I) { visit(I.getOpcode(), I); }
+
+  void visit(unsigned Opcode, User &I) {
+    // Note: this doesn't use InstVisitor, because it has to work with
+    // ConstantExpr's in addition to instructions.
+    switch (Opcode) {
+    default: assert(0 && "Unknown instruction type encountered!");
+             abort();
+      // Build the switch statement using the Instruction.def file.
+#define HANDLE_INST(NUM, OPCODE, CLASS) \
+    case Instruction::OPCODE:return visit##OPCODE((CLASS&)I);
+#include "llvm/Instruction.def"
+    }
+  }
+
+  void setCurrentBasicBlock(MachineBasicBlock *MBB) { CurMBB = MBB; }
+
+  SDOperand getLoadFrom(const Type *Ty, SDOperand Ptr,
+                        const Value *SV, SDOperand Root,
+                        bool isVolatile, unsigned Alignment);
+
+  SDOperand getIntPtrConstant(uint64_t Val) {
+    return DAG.getConstant(Val, TLI.getPointerTy());
+  }
+
+  SDOperand getValue(const Value *V);
+
+  void setValue(const Value *V, SDOperand NewN) {
+    SDOperand &N = NodeMap[V];
+    assert(N.Val == 0 && "Already set a value for this node!");
+    N = NewN;
+  }
+  
+  void GetRegistersForValue(AsmOperandInfo &OpInfo, bool HasEarlyClobber,
+                            std::set<unsigned> &OutputRegs, 
+                            std::set<unsigned> &InputRegs);
+
+  void FindMergedConditions(Value *Cond, MachineBasicBlock *TBB,
+                            MachineBasicBlock *FBB, MachineBasicBlock *CurBB,
+                            unsigned Opc);
+  bool isExportableFromCurrentBlock(Value *V, const BasicBlock *FromBB);
+  void ExportFromCurrentBlock(Value *V);
+  void LowerCallTo(Instruction &I,
+                   const Type *CalledValueTy, unsigned CallingConv,
+                   bool IsTailCall, SDOperand Callee, unsigned OpIdx,
+                   MachineBasicBlock *LandingPad = NULL);
+  
+  // Terminator instructions.
+  void visitRet(ReturnInst &I);
+  void visitBr(BranchInst &I);
+  void visitSwitch(SwitchInst &I);
+  void visitUnreachable(UnreachableInst &I) { /* noop */ }
+
+  // Helpers for visitSwitch
+  bool handleSmallSwitchRange(CaseRec& CR,
+                              CaseRecVector& WorkList,
+                              Value* SV,
+                              MachineBasicBlock* Default);
+  bool handleJTSwitchCase(CaseRec& CR,
+                          CaseRecVector& WorkList,
+                          Value* SV,
+                          MachineBasicBlock* Default);
+  bool handleBTSplitSwitchCase(CaseRec& CR,
+                               CaseRecVector& WorkList,
+                               Value* SV,
+                               MachineBasicBlock* Default);
+  bool handleBitTestsSwitchCase(CaseRec& CR,
+                                CaseRecVector& WorkList,
+                                Value* SV,
+                                MachineBasicBlock* Default);  
+  void visitSwitchCase(SelectionDAGISel::CaseBlock &CB);
+  void visitBitTestHeader(SelectionDAGISel::BitTestBlock &B);
+  void visitBitTestCase(MachineBasicBlock* NextMBB,
+                        unsigned Reg,
+                        SelectionDAGISel::BitTestCase &B);
+  void visitJumpTable(SelectionDAGISel::JumpTable &JT);
+  void visitJumpTableHeader(SelectionDAGISel::JumpTable &JT,
+                            SelectionDAGISel::JumpTableHeader &JTH);
+  
+  // These all get lowered before this pass.
+  void visitInvoke(InvokeInst &I);
+  void visitUnwind(UnwindInst &I);
+
+  void visitBinary(User &I, unsigned OpCode);
+  void visitShift(User &I, unsigned Opcode);
+  void visitAdd(User &I) { 
+    if (I.getType()->isFPOrFPVector())
+      visitBinary(I, ISD::FADD);
+    else
+      visitBinary(I, ISD::ADD);
+  }
+  void visitSub(User &I);
+  void visitMul(User &I) {
+    if (I.getType()->isFPOrFPVector())
+      visitBinary(I, ISD::FMUL);
+    else
+      visitBinary(I, ISD::MUL);
+  }
+  void visitURem(User &I) { visitBinary(I, ISD::UREM); }
+  void visitSRem(User &I) { visitBinary(I, ISD::SREM); }
+  void visitFRem(User &I) { visitBinary(I, ISD::FREM); }
+  void visitUDiv(User &I) { visitBinary(I, ISD::UDIV); }
+  void visitSDiv(User &I) { visitBinary(I, ISD::SDIV); }
+  void visitFDiv(User &I) { visitBinary(I, ISD::FDIV); }
+  void visitAnd (User &I) { visitBinary(I, ISD::AND); }
+  void visitOr  (User &I) { visitBinary(I, ISD::OR); }
+  void visitXor (User &I) { visitBinary(I, ISD::XOR); }
+  void visitShl (User &I) { visitShift(I, ISD::SHL); }
+  void visitLShr(User &I) { visitShift(I, ISD::SRL); }
+  void visitAShr(User &I) { visitShift(I, ISD::SRA); }
+  void visitICmp(User &I);
+  void visitFCmp(User &I);
+  // Visit the conversion instructions
+  void visitTrunc(User &I);
+  void visitZExt(User &I);
+  void visitSExt(User &I);
+  void visitFPTrunc(User &I);
+  void visitFPExt(User &I);
+  void visitFPToUI(User &I);
+  void visitFPToSI(User &I);
+  void visitUIToFP(User &I);
+  void visitSIToFP(User &I);
+  void visitPtrToInt(User &I);
+  void visitIntToPtr(User &I);
+  void visitBitCast(User &I);
+
+  void visitExtractElement(User &I);
+  void visitInsertElement(User &I);
+  void visitShuffleVector(User &I);
+
+  void visitGetElementPtr(User &I);
+  void visitSelect(User &I);
+
+  void visitMalloc(MallocInst &I);
+  void visitFree(FreeInst &I);
+  void visitAlloca(AllocaInst &I);
+  void visitLoad(LoadInst &I);
+  void visitStore(StoreInst &I);
+  void visitPHI(PHINode &I) { } // PHI nodes are handled specially.
+  void visitCall(CallInst &I);
+  void visitInlineAsm(CallInst &I);
+  const char *visitIntrinsicCall(CallInst &I, unsigned Intrinsic);
+  void visitTargetIntrinsic(CallInst &I, unsigned Intrinsic);
+
+  void visitVAStart(CallInst &I);
+  void visitVAArg(VAArgInst &I);
+  void visitVAEnd(CallInst &I);
+  void visitVACopy(CallInst &I);
+
+  void visitMemIntrinsic(CallInst &I, unsigned Op);
+
+  void visitUserOp1(Instruction &I) {
+    assert(0 && "UserOp1 should not exist at instruction selection time!");
+    abort();
+  }
+  void visitUserOp2(Instruction &I) {
+    assert(0 && "UserOp2 should not exist at instruction selection time!");
+    abort();
+  }
+};
+} // end namespace llvm
+
+
+/// getCopyFromParts - Create a value that contains the
+/// specified legal parts combined into the value they represent.
+static SDOperand getCopyFromParts(SelectionDAG &DAG,
+                                  const SDOperand *Parts,
+                                  unsigned NumParts,
+                                  MVT::ValueType PartVT,
+                                  MVT::ValueType ValueVT,
+                                  ISD::NodeType AssertOp = ISD::DELETED_NODE) {
+  if (!MVT::isVector(ValueVT) || NumParts == 1) {
+    SDOperand Val = Parts[0];
+
+    // If the value was expanded, copy from the top part.
+    if (NumParts > 1) {
+      assert(NumParts == 2 &&
+             "Cannot expand to more than 2 elts yet!");
+      SDOperand Hi = Parts[1];
+      if (!DAG.getTargetLoweringInfo().isLittleEndian())
+        std::swap(Val, Hi);
+      return DAG.getNode(ISD::BUILD_PAIR, ValueVT, Val, Hi);
+    }
+
+    // Otherwise, if the value was promoted or extended, truncate it to the
+    // appropriate type.
+    if (PartVT == ValueVT)
+      return Val;
+  
+    if (MVT::isVector(PartVT)) {
+      assert(MVT::isVector(ValueVT) && "Unknown vector conversion!");
+      return DAG.getNode(ISD::BIT_CONVERT, PartVT, Val);
+    }
+  
+    if (MVT::isInteger(PartVT) &&
+        MVT::isInteger(ValueVT)) {
+      if (ValueVT < PartVT) {
+        // For a truncate, see if we have any information to
+        // indicate whether the truncated bits will always be
+        // zero or sign-extension.
+        if (AssertOp != ISD::DELETED_NODE)
+          Val = DAG.getNode(AssertOp, PartVT, Val,
+                            DAG.getValueType(ValueVT));
+        return DAG.getNode(ISD::TRUNCATE, ValueVT, Val);
+      } else {
+        return DAG.getNode(ISD::ANY_EXTEND, ValueVT, Val);
+      }
+    }
+  
+    if (MVT::isFloatingPoint(PartVT) &&
+        MVT::isFloatingPoint(ValueVT))
+      return DAG.getNode(ISD::FP_ROUND, ValueVT, Val);
+
+    if (MVT::getSizeInBits(PartVT) == 
+        MVT::getSizeInBits(ValueVT))
+      return DAG.getNode(ISD::BIT_CONVERT, ValueVT, Val);
+
+    assert(0 && "Unknown mismatch!");
+  }
+
+  // Handle a multi-element vector.
+  MVT::ValueType IntermediateVT, RegisterVT;
+  unsigned NumIntermediates;
+  unsigned NumRegs =
+    DAG.getTargetLoweringInfo()
+      .getVectorTypeBreakdown(ValueVT, IntermediateVT, NumIntermediates,
+                              RegisterVT);
+
+  assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
+  assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
+  assert(RegisterVT == Parts[0].getValueType() &&
+         "Part type doesn't match part!");
+
+  // Assemble the parts into intermediate operands.
+  SmallVector<SDOperand, 8> Ops(NumIntermediates);
+  if (NumIntermediates == NumParts) {
+    // If the register was not expanded, truncate or copy the value,
+    // as appropriate.
+    for (unsigned i = 0; i != NumParts; ++i)
+      Ops[i] = getCopyFromParts(DAG, &Parts[i], 1,
+                                PartVT, IntermediateVT);
+  } else if (NumParts > 0) {
+    // If the intermediate type was expanded, build the intermediate operands
+    // from the parts.
+    assert(NumIntermediates % NumParts == 0 &&
+           "Must expand into a divisible number of parts!");
+    unsigned Factor = NumIntermediates / NumParts;
+    for (unsigned i = 0; i != NumIntermediates; ++i)
+      Ops[i] = getCopyFromParts(DAG, &Parts[i * Factor], Factor,
+                                PartVT, IntermediateVT);
+  }
+  
+  // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the intermediate
+  // operands.
+  return DAG.getNode(MVT::isVector(IntermediateVT) ?
+                       ISD::CONCAT_VECTORS :
+                       ISD::BUILD_VECTOR,
+                     ValueVT, &Ops[0], NumParts);
+}
+
+/// getCopyToParts - Create a series of nodes that contain the
+/// specified value split into legal parts.
+static void getCopyToParts(SelectionDAG &DAG,
+                           SDOperand Val,
+                           SDOperand *Parts,
+                           unsigned NumParts,
+                           MVT::ValueType PartVT) {
+  MVT::ValueType ValueVT = Val.getValueType();
+
+  if (!MVT::isVector(ValueVT) || NumParts == 1) {
+    // If the value was expanded, copy from the parts.
+    if (NumParts > 1) {
+      for (unsigned i = 0; i != NumParts; ++i)
+        Parts[i] = DAG.getNode(ISD::EXTRACT_ELEMENT, PartVT, Val,
+                               DAG.getConstant(i, MVT::i32));
+      if (!DAG.getTargetLoweringInfo().isLittleEndian())
+        std::reverse(Parts, Parts + NumParts);
+      return;
+    }
+
+    // If there is a single part and the types differ, this must be
+    // a promotion.
+    if (PartVT != ValueVT) {
+      if (MVT::isVector(PartVT)) {
+        assert(MVT::isVector(ValueVT) &&
+               "Not a vector-vector cast?");
+        Val = DAG.getNode(ISD::BIT_CONVERT, PartVT, Val);
+      } else if (MVT::isInteger(PartVT) && MVT::isInteger(ValueVT)) {
+        if (PartVT < ValueVT)
+          Val = DAG.getNode(ISD::TRUNCATE, PartVT, Val);
+        else
+          Val = DAG.getNode(ISD::ANY_EXTEND, PartVT, Val);
+      } else if (MVT::isFloatingPoint(PartVT) &&
+                 MVT::isFloatingPoint(ValueVT)) {
+        Val = DAG.getNode(ISD::FP_EXTEND, PartVT, Val);
+      } else if (MVT::getSizeInBits(PartVT) == 
+                 MVT::getSizeInBits(ValueVT)) {
+        Val = DAG.getNode(ISD::BIT_CONVERT, PartVT, Val);
+      } else {
+        assert(0 && "Unknown mismatch!");
+      }
+    }
+    Parts[0] = Val;
+    return;
+  }
+
+  // Handle a multi-element vector.
+  MVT::ValueType IntermediateVT, RegisterVT;
+  unsigned NumIntermediates;
+  unsigned NumRegs =
+    DAG.getTargetLoweringInfo()
+      .getVectorTypeBreakdown(ValueVT, IntermediateVT, NumIntermediates,
+                              RegisterVT);
+  unsigned NumElements = MVT::getVectorNumElements(ValueVT);
+
+  assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
+  assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
+
+  // Split the vector into intermediate operands.
+  SmallVector<SDOperand, 8> Ops(NumIntermediates);
+  for (unsigned i = 0; i != NumIntermediates; ++i)
+    if (MVT::isVector(IntermediateVT))
+      Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR,
+                           IntermediateVT, Val,
+                           DAG.getConstant(i * (NumElements / NumIntermediates),
+                                           MVT::i32));
+    else
+      Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+                           IntermediateVT, Val, 
+                           DAG.getConstant(i, MVT::i32));
+
+  // Split the intermediate operands into legal parts.
+  if (NumParts == NumIntermediates) {
+    // If the register was not expanded, promote or copy the value,
+    // as appropriate.
+    for (unsigned i = 0; i != NumParts; ++i)
+      getCopyToParts(DAG, Ops[i], &Parts[i], 1, PartVT);
+  } else if (NumParts > 0) {
+    // If the intermediate type was expanded, split each the value into
+    // legal parts.
+    assert(NumParts % NumIntermediates == 0 &&
+           "Must expand into a divisible number of parts!");
+    unsigned Factor = NumParts / NumIntermediates;
+    for (unsigned i = 0; i != NumIntermediates; ++i)
+      getCopyToParts(DAG, Ops[i], &Parts[i * Factor], Factor, PartVT);
+  }
+}
+
+
+SDOperand SelectionDAGLowering::getValue(const Value *V) {
+  SDOperand &N = NodeMap[V];
+  if (N.Val) return N;
+  
+  const Type *VTy = V->getType();
+  MVT::ValueType VT = TLI.getValueType(VTy);
+  if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V))) {
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+      visit(CE->getOpcode(), *CE);
+      SDOperand N1 = NodeMap[V];
+      assert(N1.Val && "visit didn't populate the ValueMap!");
+      return N1;
+    } else if (GlobalValue *GV = dyn_cast<GlobalValue>(C)) {
+      return N = DAG.getGlobalAddress(GV, VT);
+    } else if (isa<ConstantPointerNull>(C)) {
+      return N = DAG.getConstant(0, TLI.getPointerTy());
+    } else if (isa<UndefValue>(C)) {
+      if (!isa<VectorType>(VTy))
+        return N = DAG.getNode(ISD::UNDEF, VT);
+
+      // Create a BUILD_VECTOR of undef nodes.
+      const VectorType *PTy = cast<VectorType>(VTy);
+      unsigned NumElements = PTy->getNumElements();
+      MVT::ValueType PVT = TLI.getValueType(PTy->getElementType());
+
+      SmallVector<SDOperand, 8> Ops;
+      Ops.assign(NumElements, DAG.getNode(ISD::UNDEF, PVT));
+      
+      // Create a VConstant node with generic Vector type.
+      MVT::ValueType VT = MVT::getVectorType(PVT, NumElements);
+      return N = DAG.getNode(ISD::BUILD_VECTOR, VT,
+                             &Ops[0], Ops.size());
+    } else if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
+      return N = DAG.getConstantFP(CFP->getValue(), VT);
+    } else if (const VectorType *PTy = dyn_cast<VectorType>(VTy)) {
+      unsigned NumElements = PTy->getNumElements();
+      MVT::ValueType PVT = TLI.getValueType(PTy->getElementType());
+      
+      // Now that we know the number and type of the elements, push a
+      // Constant or ConstantFP node onto the ops list for each element of
+      // the vector constant.
+      SmallVector<SDOperand, 8> Ops;
+      if (ConstantVector *CP = dyn_cast<ConstantVector>(C)) {
+        for (unsigned i = 0; i != NumElements; ++i)
+          Ops.push_back(getValue(CP->getOperand(i)));
+      } else {
+        assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
+        SDOperand Op;
+        if (MVT::isFloatingPoint(PVT))
+          Op = DAG.getConstantFP(0, PVT);
+        else
+          Op = DAG.getConstant(0, PVT);
+        Ops.assign(NumElements, Op);
+      }
+      
+      // Create a BUILD_VECTOR node.
+      MVT::ValueType VT = MVT::getVectorType(PVT, NumElements);
+      return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, VT, &Ops[0],
+                                      Ops.size());
+    } else {
+      // Canonicalize all constant ints to be unsigned.
+      return N = DAG.getConstant(cast<ConstantInt>(C)->getZExtValue(),VT);
+    }
+  }
+      
+  if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+    std::map<const AllocaInst*, int>::iterator SI =
+    FuncInfo.StaticAllocaMap.find(AI);
+    if (SI != FuncInfo.StaticAllocaMap.end())
+      return DAG.getFrameIndex(SI->second, TLI.getPointerTy());
+  }
+      
+  unsigned InReg = FuncInfo.ValueMap[V];
+  assert(InReg && "Value not in map!");
+  
+  MVT::ValueType RegisterVT = TLI.getRegisterType(VT);
+  unsigned NumRegs = TLI.getNumRegisters(VT);
+
+  std::vector<unsigned> Regs(NumRegs);
+  for (unsigned i = 0; i != NumRegs; ++i)
+    Regs[i] = InReg + i;
+
+  RegsForValue RFV(Regs, RegisterVT, VT);
+  SDOperand Chain = DAG.getEntryNode();
+
+  return RFV.getCopyFromRegs(DAG, Chain, NULL);
+}
+
+
+void SelectionDAGLowering::visitRet(ReturnInst &I) {
+  if (I.getNumOperands() == 0) {
+    DAG.setRoot(DAG.getNode(ISD::RET, MVT::Other, getRoot()));
+    return;
+  }
+  SmallVector<SDOperand, 8> NewValues;
+  NewValues.push_back(getRoot());
+  for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
+    SDOperand RetOp = getValue(I.getOperand(i));
+    
+    // If this is an integer return value, we need to promote it ourselves to
+    // the full width of a register, since getCopyToParts and Legalize will use
+    // ANY_EXTEND rather than sign/zero.
+    // FIXME: C calling convention requires the return type to be promoted to
+    // at least 32-bit. But this is not necessary for non-C calling conventions.
+    if (MVT::isInteger(RetOp.getValueType()) && 
+        RetOp.getValueType() < MVT::i64) {
+      MVT::ValueType TmpVT;
+      if (TLI.getTypeAction(MVT::i32) == TargetLowering::Promote)
+        TmpVT = TLI.getTypeToTransformTo(MVT::i32);
+      else
+        TmpVT = MVT::i32;
+      const FunctionType *FTy = I.getParent()->getParent()->getFunctionType();
+      const ParamAttrsList *Attrs = FTy->getParamAttrs();
+      ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+      if (Attrs && Attrs->paramHasAttr(0, ParamAttr::SExt))
+        ExtendKind = ISD::SIGN_EXTEND;
+      if (Attrs && Attrs->paramHasAttr(0, ParamAttr::ZExt))
+        ExtendKind = ISD::ZERO_EXTEND;
+      RetOp = DAG.getNode(ExtendKind, TmpVT, RetOp);
+      NewValues.push_back(RetOp);
+      NewValues.push_back(DAG.getConstant(false, MVT::i32));
+    } else {
+      MVT::ValueType VT = RetOp.getValueType();
+      unsigned NumParts = TLI.getNumRegisters(VT);
+      MVT::ValueType PartVT = TLI.getRegisterType(VT);
+      SmallVector<SDOperand, 4> Parts(NumParts);
+      getCopyToParts(DAG, RetOp, &Parts[0], NumParts, PartVT);
+      for (unsigned i = 0; i < NumParts; ++i) {
+        NewValues.push_back(Parts[i]);
+        NewValues.push_back(DAG.getConstant(false, MVT::i32));
+      }
+    }
+  }
+  DAG.setRoot(DAG.getNode(ISD::RET, MVT::Other,
+                          &NewValues[0], NewValues.size()));
+}
+
+/// ExportFromCurrentBlock - If this condition isn't known to be exported from
+/// the current basic block, add it to ValueMap now so that we'll get a
+/// CopyTo/FromReg.
+void SelectionDAGLowering::ExportFromCurrentBlock(Value *V) {
+  // No need to export constants.
+  if (!isa<Instruction>(V) && !isa<Argument>(V)) return;
+  
+  // Already exported?
+  if (FuncInfo.isExportedInst(V)) return;
+
+  unsigned Reg = FuncInfo.InitializeRegForValue(V);
+  PendingLoads.push_back(CopyValueToVirtualRegister(V, Reg));
+}
+
+bool SelectionDAGLowering::isExportableFromCurrentBlock(Value *V,
+                                                    const BasicBlock *FromBB) {
+  // The operands of the setcc have to be in this block.  We don't know
+  // how to export them from some other block.
+  if (Instruction *VI = dyn_cast<Instruction>(V)) {
+    // Can export from current BB.
+    if (VI->getParent() == FromBB)
+      return true;
+    
+    // Is already exported, noop.
+    return FuncInfo.isExportedInst(V);
+  }
+  
+  // If this is an argument, we can export it if the BB is the entry block or
+  // if it is already exported.
+  if (isa<Argument>(V)) {
+    if (FromBB == &FromBB->getParent()->getEntryBlock())
+      return true;
+
+    // Otherwise, can only export this if it is already exported.
+    return FuncInfo.isExportedInst(V);
+  }
+  
+  // Otherwise, constants can always be exported.
+  return true;
+}
+
+static bool InBlock(const Value *V, const BasicBlock *BB) {
+  if (const Instruction *I = dyn_cast<Instruction>(V))
+    return I->getParent() == BB;
+  return true;
+}
+
+/// FindMergedConditions - If Cond is an expression like 
+void SelectionDAGLowering::FindMergedConditions(Value *Cond,
+                                                MachineBasicBlock *TBB,
+                                                MachineBasicBlock *FBB,
+                                                MachineBasicBlock *CurBB,
+                                                unsigned Opc) {
+  // If this node is not part of the or/and tree, emit it as a branch.
+  Instruction *BOp = dyn_cast<Instruction>(Cond);
+
+  if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) || 
+      (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() ||
+      BOp->getParent() != CurBB->getBasicBlock() ||
+      !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
+      !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
+    const BasicBlock *BB = CurBB->getBasicBlock();
+    
+    // If the leaf of the tree is a comparison, merge the condition into 
+    // the caseblock.
+    if ((isa<ICmpInst>(Cond) || isa<FCmpInst>(Cond)) &&
+        // The operands of the cmp have to be in this block.  We don't know
+        // how to export them from some other block.  If this is the first block
+        // of the sequence, no exporting is needed.
+        (CurBB == CurMBB ||
+         (isExportableFromCurrentBlock(BOp->getOperand(0), BB) &&
+          isExportableFromCurrentBlock(BOp->getOperand(1), BB)))) {
+      BOp = cast<Instruction>(Cond);
+      ISD::CondCode Condition;
+      if (ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
+        switch (IC->getPredicate()) {
+        default: assert(0 && "Unknown icmp predicate opcode!");
+        case ICmpInst::ICMP_EQ:  Condition = ISD::SETEQ;  break;
+        case ICmpInst::ICMP_NE:  Condition = ISD::SETNE;  break;
+        case ICmpInst::ICMP_SLE: Condition = ISD::SETLE;  break;
+        case ICmpInst::ICMP_ULE: Condition = ISD::SETULE; break;
+        case ICmpInst::ICMP_SGE: Condition = ISD::SETGE;  break;
+        case ICmpInst::ICMP_UGE: Condition = ISD::SETUGE; break;
+        case ICmpInst::ICMP_SLT: Condition = ISD::SETLT;  break;
+        case ICmpInst::ICMP_ULT: Condition = ISD::SETULT; break;
+        case ICmpInst::ICMP_SGT: Condition = ISD::SETGT;  break;
+        case ICmpInst::ICMP_UGT: Condition = ISD::SETUGT; break;
+        }
+      } else if (FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) {
+        ISD::CondCode FPC, FOC;
+        switch (FC->getPredicate()) {
+        default: assert(0 && "Unknown fcmp predicate opcode!");
+        case FCmpInst::FCMP_FALSE: FOC = FPC = ISD::SETFALSE; break;
+        case FCmpInst::FCMP_OEQ:   FOC = ISD::SETEQ; FPC = ISD::SETOEQ; break;
+        case FCmpInst::FCMP_OGT:   FOC = ISD::SETGT; FPC = ISD::SETOGT; break;
+        case FCmpInst::FCMP_OGE:   FOC = ISD::SETGE; FPC = ISD::SETOGE; break;
+        case FCmpInst::FCMP_OLT:   FOC = ISD::SETLT; FPC = ISD::SETOLT; break;
+        case FCmpInst::FCMP_OLE:   FOC = ISD::SETLE; FPC = ISD::SETOLE; break;
+        case FCmpInst::FCMP_ONE:   FOC = ISD::SETNE; FPC = ISD::SETONE; break;
+        case FCmpInst::FCMP_ORD:   FOC = ISD::SETEQ; FPC = ISD::SETO;   break;
+        case FCmpInst::FCMP_UNO:   FOC = ISD::SETNE; FPC = ISD::SETUO;  break;
+        case FCmpInst::FCMP_UEQ:   FOC = ISD::SETEQ; FPC = ISD::SETUEQ; break;
+        case FCmpInst::FCMP_UGT:   FOC = ISD::SETGT; FPC = ISD::SETUGT; break;
+        case FCmpInst::FCMP_UGE:   FOC = ISD::SETGE; FPC = ISD::SETUGE; break;
+        case FCmpInst::FCMP_ULT:   FOC = ISD::SETLT; FPC = ISD::SETULT; break;
+        case FCmpInst::FCMP_ULE:   FOC = ISD::SETLE; FPC = ISD::SETULE; break;
+        case FCmpInst::FCMP_UNE:   FOC = ISD::SETNE; FPC = ISD::SETUNE; break;
+        case FCmpInst::FCMP_TRUE:  FOC = FPC = ISD::SETTRUE; break;
+        }
+        if (FiniteOnlyFPMath())
+          Condition = FOC;
+        else 
+          Condition = FPC;
+      } else {
+        Condition = ISD::SETEQ; // silence warning.
+        assert(0 && "Unknown compare instruction");
+      }
+      
+      SelectionDAGISel::CaseBlock CB(Condition, BOp->getOperand(0), 
+                                     BOp->getOperand(1), NULL, TBB, FBB, CurBB);
+      SwitchCases.push_back(CB);
+      return;
+    }
+    
+    // Create a CaseBlock record representing this branch.
+    SelectionDAGISel::CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(),
+                                   NULL, TBB, FBB, CurBB);
+    SwitchCases.push_back(CB);
+    return;
+  }
+  
+  
+  //  Create TmpBB after CurBB.
+  MachineFunction::iterator BBI = CurBB;
+  MachineBasicBlock *TmpBB = new MachineBasicBlock(CurBB->getBasicBlock());
+  CurBB->getParent()->getBasicBlockList().insert(++BBI, TmpBB);
+  
+  if (Opc == Instruction::Or) {
+    // Codegen X | Y as:
+    //   jmp_if_X TBB
+    //   jmp TmpBB
+    // TmpBB:
+    //   jmp_if_Y TBB
+    //   jmp FBB
+    //
+  
+    // Emit the LHS condition.
+    FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, Opc);
+  
+    // Emit the RHS condition into TmpBB.
+    FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, Opc);
+  } else {
+    assert(Opc == Instruction::And && "Unknown merge op!");
+    // Codegen X & Y as:
+    //   jmp_if_X TmpBB
+    //   jmp FBB
+    // TmpBB:
+    //   jmp_if_Y TBB
+    //   jmp FBB
+    //
+    //  This requires creation of TmpBB after CurBB.
+    
+    // Emit the LHS condition.
+    FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, Opc);
+    
+    // Emit the RHS condition into TmpBB.
+    FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, Opc);
+  }
+}
+
+/// If the set of cases should be emitted as a series of branches, return true.
+/// If we should emit this as a bunch of and/or'd together conditions, return
+/// false.
+static bool 
+ShouldEmitAsBranches(const std::vector<SelectionDAGISel::CaseBlock> &Cases) {
+  if (Cases.size() != 2) return true;
+  
+  // If this is two comparisons of the same values or'd or and'd together, they
+  // will get folded into a single comparison, so don't emit two blocks.
+  if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&
+       Cases[0].CmpRHS == Cases[1].CmpRHS) ||
+      (Cases[0].CmpRHS == Cases[1].CmpLHS &&
+       Cases[0].CmpLHS == Cases[1].CmpRHS)) {
+    return false;
+  }
+  
+  return true;
+}
+
+void SelectionDAGLowering::visitBr(BranchInst &I) {
+  // Update machine-CFG edges.
+  MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];
+
+  // Figure out which block is immediately after the current one.
+  MachineBasicBlock *NextBlock = 0;
+  MachineFunction::iterator BBI = CurMBB;
+  if (++BBI != CurMBB->getParent()->end())
+    NextBlock = BBI;
+
+  if (I.isUnconditional()) {
+    // If this is not a fall-through branch, emit the branch.
+    if (Succ0MBB != NextBlock)
+      DAG.setRoot(DAG.getNode(ISD::BR, MVT::Other, getRoot(),
+                              DAG.getBasicBlock(Succ0MBB)));
+
+    // Update machine-CFG edges.
+    CurMBB->addSuccessor(Succ0MBB);
+
+    return;
+  }
+
+  // If this condition is one of the special cases we handle, do special stuff
+  // now.
+  Value *CondVal = I.getCondition();
+  MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)];
+
+  // If this is a series of conditions that are or'd or and'd together, emit
+  // this as a sequence of branches instead of setcc's with and/or operations.
+  // For example, instead of something like:
+  //     cmp A, B
+  //     C = seteq 
+  //     cmp D, E
+  //     F = setle 
+  //     or C, F
+  //     jnz foo
+  // Emit:
+  //     cmp A, B
+  //     je foo
+  //     cmp D, E
+  //     jle foo
+  //
+  if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
+    if (BOp->hasOneUse() && 
+        (BOp->getOpcode() == Instruction::And ||
+         BOp->getOpcode() == Instruction::Or)) {
+      FindMergedConditions(BOp, Succ0MBB, Succ1MBB, CurMBB, BOp->getOpcode());
+      // If the compares in later blocks need to use values not currently
+      // exported from this block, export them now.  This block should always
+      // be the first entry.
+      assert(SwitchCases[0].ThisBB == CurMBB && "Unexpected lowering!");
+      
+      // Allow some cases to be rejected.
+      if (ShouldEmitAsBranches(SwitchCases)) {
+        for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) {
+          ExportFromCurrentBlock(SwitchCases[i].CmpLHS);
+          ExportFromCurrentBlock(SwitchCases[i].CmpRHS);
+        }
+        
+        // Emit the branch for this block.
+        visitSwitchCase(SwitchCases[0]);
+        SwitchCases.erase(SwitchCases.begin());
+        return;
+      }
+      
+      // Okay, we decided not to do this, remove any inserted MBB's and clear
+      // SwitchCases.
+      for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i)
+        CurMBB->getParent()->getBasicBlockList().erase(SwitchCases[i].ThisBB);
+      
+      SwitchCases.clear();
+    }
+  }
+  
+  // Create a CaseBlock record representing this branch.
+  SelectionDAGISel::CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(),
+                                 NULL, Succ0MBB, Succ1MBB, CurMBB);
+  // Use visitSwitchCase to actually insert the fast branch sequence for this
+  // cond branch.
+  visitSwitchCase(CB);
+}
+
+/// visitSwitchCase - Emits the necessary code to represent a single node in
+/// the binary search tree resulting from lowering a switch instruction.
+void SelectionDAGLowering::visitSwitchCase(SelectionDAGISel::CaseBlock &CB) {
+  SDOperand Cond;
+  SDOperand CondLHS = getValue(CB.CmpLHS);
+  
+  // Build the setcc now. 
+  if (CB.CmpMHS == NULL) {
+    // Fold "(X == true)" to X and "(X == false)" to !X to
+    // handle common cases produced by branch lowering.
+    if (CB.CmpRHS == ConstantInt::getTrue() && CB.CC == ISD::SETEQ)
+      Cond = CondLHS;
+    else if (CB.CmpRHS == ConstantInt::getFalse() && CB.CC == ISD::SETEQ) {
+      SDOperand True = DAG.getConstant(1, CondLHS.getValueType());
+      Cond = DAG.getNode(ISD::XOR, CondLHS.getValueType(), CondLHS, True);
+    } else
+      Cond = DAG.getSetCC(MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC);
+  } else {
+    assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");
+
+    uint64_t Low = cast<ConstantInt>(CB.CmpLHS)->getSExtValue();
+    uint64_t High  = cast<ConstantInt>(CB.CmpRHS)->getSExtValue();
+
+    SDOperand CmpOp = getValue(CB.CmpMHS);
+    MVT::ValueType VT = CmpOp.getValueType();
+
+    if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
+      Cond = DAG.getSetCC(MVT::i1, CmpOp, DAG.getConstant(High, VT), ISD::SETLE);
+    } else {
+      SDOperand SUB = DAG.getNode(ISD::SUB, VT, CmpOp, DAG.getConstant(Low, VT));
+      Cond = DAG.getSetCC(MVT::i1, SUB,
+                          DAG.getConstant(High-Low, VT), ISD::SETULE);
+    }
+    
+  }
+  
+  // Set NextBlock to be the MBB immediately after the current one, if any.
+  // This is used to avoid emitting unnecessary branches to the next block.
+  MachineBasicBlock *NextBlock = 0;
+  MachineFunction::iterator BBI = CurMBB;
+  if (++BBI != CurMBB->getParent()->end())
+    NextBlock = BBI;
+  
+  // If the lhs block is the next block, invert the condition so that we can
+  // fall through to the lhs instead of the rhs block.
+  if (CB.TrueBB == NextBlock) {
+    std::swap(CB.TrueBB, CB.FalseBB);
+    SDOperand True = DAG.getConstant(1, Cond.getValueType());
+    Cond = DAG.getNode(ISD::XOR, Cond.getValueType(), Cond, True);
+  }
+  SDOperand BrCond = DAG.getNode(ISD::BRCOND, MVT::Other, getRoot(), Cond,
+                                 DAG.getBasicBlock(CB.TrueBB));
+  if (CB.FalseBB == NextBlock)
+    DAG.setRoot(BrCond);
+  else
+    DAG.setRoot(DAG.getNode(ISD::BR, MVT::Other, BrCond, 
+                            DAG.getBasicBlock(CB.FalseBB)));
+  // Update successor info
+  CurMBB->addSuccessor(CB.TrueBB);
+  CurMBB->addSuccessor(CB.FalseBB);
+}
+
+/// visitJumpTable - Emit JumpTable node in the current MBB
+void SelectionDAGLowering::visitJumpTable(SelectionDAGISel::JumpTable &JT) {
+  // Emit the code for the jump table
+  assert(JT.Reg != -1U && "Should lower JT Header first!");
+  MVT::ValueType PTy = TLI.getPointerTy();
+  SDOperand Index = DAG.getCopyFromReg(getRoot(), JT.Reg, PTy);
+  SDOperand Table = DAG.getJumpTable(JT.JTI, PTy);
+  DAG.setRoot(DAG.getNode(ISD::BR_JT, MVT::Other, Index.getValue(1),
+                          Table, Index));
+  return;
+}
+
+/// visitJumpTableHeader - This function emits necessary code to produce index
+/// in the JumpTable from switch case.
+void SelectionDAGLowering::visitJumpTableHeader(SelectionDAGISel::JumpTable &JT,
+                                         SelectionDAGISel::JumpTableHeader &JTH) {
+  // Subtract the lowest switch case value from the value being switched on
+  // and conditional branch to default mbb if the result is greater than the
+  // difference between smallest and largest cases.
+  SDOperand SwitchOp = getValue(JTH.SValue);
+  MVT::ValueType VT = SwitchOp.getValueType();
+  SDOperand SUB = DAG.getNode(ISD::SUB, VT, SwitchOp,
+                              DAG.getConstant(JTH.First, VT));
+  
+  // The SDNode we just created, which holds the value being switched on
+  // minus the the smallest case value, needs to be copied to a virtual
+  // register so it can be used as an index into the jump table in a 
+  // subsequent basic block.  This value may be smaller or larger than the
+  // target's pointer type, and therefore require extension or truncating.
+  if (MVT::getSizeInBits(VT) > MVT::getSizeInBits(TLI.getPointerTy()))
+    SwitchOp = DAG.getNode(ISD::TRUNCATE, TLI.getPointerTy(), SUB);
+  else
+    SwitchOp = DAG.getNode(ISD::ZERO_EXTEND, TLI.getPointerTy(), SUB);
+  
+  unsigned JumpTableReg = FuncInfo.MakeReg(TLI.getPointerTy());
+  SDOperand CopyTo = DAG.getCopyToReg(getRoot(), JumpTableReg, SwitchOp);
+  JT.Reg = JumpTableReg;
+
+  // Emit the range check for the jump table, and branch to the default
+  // block for the switch statement if the value being switched on exceeds
+  // the largest case in the switch.
+  SDOperand CMP = DAG.getSetCC(TLI.getSetCCResultTy(), SUB,
+                               DAG.getConstant(JTH.Last-JTH.First,VT),
+                               ISD::SETUGT);
+
+  // Set NextBlock to be the MBB immediately after the current one, if any.
+  // This is used to avoid emitting unnecessary branches to the next block.
+  MachineBasicBlock *NextBlock = 0;
+  MachineFunction::iterator BBI = CurMBB;
+  if (++BBI != CurMBB->getParent()->end())
+    NextBlock = BBI;
+
+  SDOperand BrCond = DAG.getNode(ISD::BRCOND, MVT::Other, CopyTo, CMP,
+                                 DAG.getBasicBlock(JT.Default));
+
+  if (JT.MBB == NextBlock)
+    DAG.setRoot(BrCond);
+  else
+    DAG.setRoot(DAG.getNode(ISD::BR, MVT::Other, BrCond, 
+                            DAG.getBasicBlock(JT.MBB)));
+
+  return;
+}
+
+/// visitBitTestHeader - This function emits necessary code to produce value
+/// suitable for "bit tests"
+void SelectionDAGLowering::visitBitTestHeader(SelectionDAGISel::BitTestBlock &B) {
+  // Subtract the minimum value
+  SDOperand SwitchOp = getValue(B.SValue);
+  MVT::ValueType VT = SwitchOp.getValueType();
+  SDOperand SUB = DAG.getNode(ISD::SUB, VT, SwitchOp,
+                              DAG.getConstant(B.First, VT));
+
+  // Check range
+  SDOperand RangeCmp = DAG.getSetCC(TLI.getSetCCResultTy(), SUB,
+                                    DAG.getConstant(B.Range, VT),
+                                    ISD::SETUGT);
+
+  SDOperand ShiftOp;
+  if (MVT::getSizeInBits(VT) > MVT::getSizeInBits(TLI.getShiftAmountTy()))
+    ShiftOp = DAG.getNode(ISD::TRUNCATE, TLI.getShiftAmountTy(), SUB);
+  else
+    ShiftOp = DAG.getNode(ISD::ZERO_EXTEND, TLI.getShiftAmountTy(), SUB);
+
+  // Make desired shift
+  SDOperand SwitchVal = DAG.getNode(ISD::SHL, TLI.getPointerTy(),
+                                    DAG.getConstant(1, TLI.getPointerTy()),
+                                    ShiftOp);
+
+  unsigned SwitchReg = FuncInfo.MakeReg(TLI.getPointerTy());
+  SDOperand CopyTo = DAG.getCopyToReg(getRoot(), SwitchReg, SwitchVal);
+  B.Reg = SwitchReg;
+
+  SDOperand BrRange = DAG.getNode(ISD::BRCOND, MVT::Other, CopyTo, RangeCmp,
+                                  DAG.getBasicBlock(B.Default));
+
+  // Set NextBlock to be the MBB immediately after the current one, if any.
+  // This is used to avoid emitting unnecessary branches to the next block.
+  MachineBasicBlock *NextBlock = 0;
+  MachineFunction::iterator BBI = CurMBB;
+  if (++BBI != CurMBB->getParent()->end())
+    NextBlock = BBI;
+
+  MachineBasicBlock* MBB = B.Cases[0].ThisBB;
+  if (MBB == NextBlock)
+    DAG.setRoot(BrRange);
+  else
+    DAG.setRoot(DAG.getNode(ISD::BR, MVT::Other, CopyTo,
+                            DAG.getBasicBlock(MBB)));
+
+  CurMBB->addSuccessor(B.Default);
+  CurMBB->addSuccessor(MBB);
+
+  return;
+}
+
+/// visitBitTestCase - this function produces one "bit test"
+void SelectionDAGLowering::visitBitTestCase(MachineBasicBlock* NextMBB,
+                                            unsigned Reg,
+                                            SelectionDAGISel::BitTestCase &B) {
+  // Emit bit tests and jumps
+  SDOperand SwitchVal = DAG.getCopyFromReg(getRoot(), Reg, TLI.getPointerTy());
+  
+  SDOperand AndOp = DAG.getNode(ISD::AND, TLI.getPointerTy(),
+                                SwitchVal,
+                                DAG.getConstant(B.Mask,
+                                                TLI.getPointerTy()));
+  SDOperand AndCmp = DAG.getSetCC(TLI.getSetCCResultTy(), AndOp,
+                                  DAG.getConstant(0, TLI.getPointerTy()),
+                                  ISD::SETNE);
+  SDOperand BrAnd = DAG.getNode(ISD::BRCOND, MVT::Other, getRoot(),
+                                AndCmp, DAG.getBasicBlock(B.TargetBB));
+
+  // Set NextBlock to be the MBB immediately after the current one, if any.
+  // This is used to avoid emitting unnecessary branches to the next block.
+  MachineBasicBlock *NextBlock = 0;
+  MachineFunction::iterator BBI = CurMBB;
+  if (++BBI != CurMBB->getParent()->end())
+    NextBlock = BBI;
+
+  if (NextMBB == NextBlock)
+    DAG.setRoot(BrAnd);
+  else
+    DAG.setRoot(DAG.getNode(ISD::BR, MVT::Other, BrAnd,
+                            DAG.getBasicBlock(NextMBB)));
+
+  CurMBB->addSuccessor(B.TargetBB);
+  CurMBB->addSuccessor(NextMBB);
+
+  return;
+}
+
+void SelectionDAGLowering::visitInvoke(InvokeInst &I) {
+  // Retrieve successors.
+  MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
+  MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)];
+
+  LowerCallTo(I, I.getCalledValue()->getType(),
+              I.getCallingConv(),
+              false,
+              getValue(I.getOperand(0)),
+              3, LandingPad);
+
+  // If the value of the invoke is used outside of its defining block, make it
+  // available as a virtual register.
+  if (!I.use_empty()) {
+    DenseMap<const Value*, unsigned>::iterator VMI = FuncInfo.ValueMap.find(&I);
+    if (VMI != FuncInfo.ValueMap.end())
+      DAG.setRoot(CopyValueToVirtualRegister(&I, VMI->second));
+  }
+
+  // Drop into normal successor.
+  DAG.setRoot(DAG.getNode(ISD::BR, MVT::Other, getRoot(),
+                          DAG.getBasicBlock(Return)));
+
+  // Update successor info
+  CurMBB->addSuccessor(Return);
+  CurMBB->addSuccessor(LandingPad);
+}
+
+void SelectionDAGLowering::visitUnwind(UnwindInst &I) {
+}
+
+/// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for
+/// small case ranges).
+bool SelectionDAGLowering::handleSmallSwitchRange(CaseRec& CR,
+                                                  CaseRecVector& WorkList,
+                                                  Value* SV,
+                                                  MachineBasicBlock* Default) {
+  Case& BackCase  = *(CR.Range.second-1);
+  
+  // Size is the number of Cases represented by this range.
+  unsigned Size = CR.Range.second - CR.Range.first;
+  if (Size > 3)
+    return false;  
+  
+  // Get the MachineFunction which holds the current MBB.  This is used when
+  // inserting any additional MBBs necessary to represent the switch.
+  MachineFunction *CurMF = CurMBB->getParent();  
+
+  // Figure out which block is immediately after the current one.
+  MachineBasicBlock *NextBlock = 0;
+  MachineFunction::iterator BBI = CR.CaseBB;
+
+  if (++BBI != CurMBB->getParent()->end())
+    NextBlock = BBI;
+
+  // TODO: If any two of the cases has the same destination, and if one value
+  // is the same as the other, but has one bit unset that the other has set,
+  // use bit manipulation to do two compares at once.  For example:
+  // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)"
+    
+  // Rearrange the case blocks so that the last one falls through if possible.
+  if (NextBlock && Default != NextBlock && BackCase.BB != NextBlock) {
+    // The last case block won't fall through into 'NextBlock' if we emit the
+    // branches in this order.  See if rearranging a case value would help.
+    for (CaseItr I = CR.Range.first, E = CR.Range.second-1; I != E; ++I) {
+      if (I->BB == NextBlock) {
+        std::swap(*I, BackCase);
+        break;
+      }
+    }
+  }
+  
+  // Create a CaseBlock record representing a conditional branch to
+  // the Case's target mbb if the value being switched on SV is equal
+  // to C.
+  MachineBasicBlock *CurBlock = CR.CaseBB;
+  for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
+    MachineBasicBlock *FallThrough;
+    if (I != E-1) {
+      FallThrough = new MachineBasicBlock(CurBlock->getBasicBlock());
+      CurMF->getBasicBlockList().insert(BBI, FallThrough);
+    } else {
+      // If the last case doesn't match, go to the default block.
+      FallThrough = Default;
+    }
+
+    Value *RHS, *LHS, *MHS;
+    ISD::CondCode CC;
+    if (I->High == I->Low) {
+      // This is just small small case range :) containing exactly 1 case
+      CC = ISD::SETEQ;
+      LHS = SV; RHS = I->High; MHS = NULL;
+    } else {
+      CC = ISD::SETLE;
+      LHS = I->Low; MHS = SV; RHS = I->High;
+    }
+    SelectionDAGISel::CaseBlock CB(CC, LHS, RHS, MHS,
+                                   I->BB, FallThrough, CurBlock);
+    
+    // If emitting the first comparison, just call visitSwitchCase to emit the
+    // code into the current block.  Otherwise, push the CaseBlock onto the
+    // vector to be later processed by SDISel, and insert the node's MBB
+    // before the next MBB.
+    if (CurBlock == CurMBB)
+      visitSwitchCase(CB);
+    else
+      SwitchCases.push_back(CB);
+    
+    CurBlock = FallThrough;
+  }
+
+  return true;
+}
+
+static inline bool areJTsAllowed(const TargetLowering &TLI) {
+  return (TLI.isOperationLegal(ISD::BR_JT, MVT::Other) ||
+          TLI.isOperationLegal(ISD::BRIND, MVT::Other));
+}
+  
+/// handleJTSwitchCase - Emit jumptable for current switch case range
+bool SelectionDAGLowering::handleJTSwitchCase(CaseRec& CR,
+                                              CaseRecVector& WorkList,
+                                              Value* SV,
+                                              MachineBasicBlock* Default) {
+  Case& FrontCase = *CR.Range.first;
+  Case& BackCase  = *(CR.Range.second-1);
+
+  int64_t First = cast<ConstantInt>(FrontCase.Low)->getSExtValue();
+  int64_t Last  = cast<ConstantInt>(BackCase.High)->getSExtValue();
+
+  uint64_t TSize = 0;
+  for (CaseItr I = CR.Range.first, E = CR.Range.second;
+       I!=E; ++I)
+    TSize += I->size();
+
+  if (!areJTsAllowed(TLI) || TSize <= 3)
+    return false;
+  
+  double Density = (double)TSize / (double)((Last - First) + 1ULL);  
+  if (Density < 0.4)
+    return false;
+
+  DOUT << "Lowering jump table\n"
+       << "First entry: " << First << ". Last entry: " << Last << "\n"
+       << "Size: " << TSize << ". Density: " << Density << "\n\n";
+
+  // Get the MachineFunction which holds the current MBB.  This is used when
+  // inserting any additional MBBs necessary to represent the switch.
+  MachineFunction *CurMF = CurMBB->getParent();
+
+  // Figure out which block is immediately after the current one.
+  MachineBasicBlock *NextBlock = 0;
+  MachineFunction::iterator BBI = CR.CaseBB;
+
+  if (++BBI != CurMBB->getParent()->end())
+    NextBlock = BBI;
+
+  const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
+
+  // Create a new basic block to hold the code for loading the address
+  // of the jump table, and jumping to it.  Update successor information;
+  // we will either branch to the default case for the switch, or the jump
+  // table.
+  MachineBasicBlock *JumpTableBB = new MachineBasicBlock(LLVMBB);
+  CurMF->getBasicBlockList().insert(BBI, JumpTableBB);
+  CR.CaseBB->addSuccessor(Default);
+  CR.CaseBB->addSuccessor(JumpTableBB);
+                
+  // Build a vector of destination BBs, corresponding to each target
+  // of the jump table. If the value of the jump table slot corresponds to
+  // a case statement, push the case's BB onto the vector, otherwise, push
+  // the default BB.
+  std::vector<MachineBasicBlock*> DestBBs;
+  int64_t TEI = First;
+  for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++TEI) {
+    int64_t Low = cast<ConstantInt>(I->Low)->getSExtValue();
+    int64_t High = cast<ConstantInt>(I->High)->getSExtValue();
+    
+    if ((Low <= TEI) && (TEI <= High)) {
+      DestBBs.push_back(I->BB);
+      if (TEI==High)
+        ++I;
+    } else {
+      DestBBs.push_back(Default);
+    }
+  }
+  
+  // Update successor info. Add one edge to each unique successor.
+  BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs());  
+  for (std::vector<MachineBasicBlock*>::iterator I = DestBBs.begin(), 
+         E = DestBBs.end(); I != E; ++I) {
+    if (!SuccsHandled[(*I)->getNumber()]) {
+      SuccsHandled[(*I)->getNumber()] = true;
+      JumpTableBB->addSuccessor(*I);
+    }
+  }
+      
+  // Create a jump table index for this jump table, or return an existing
+  // one.
+  unsigned JTI = CurMF->getJumpTableInfo()->getJumpTableIndex(DestBBs);
+  
+  // Set the jump table information so that we can codegen it as a second
+  // MachineBasicBlock
+  SelectionDAGISel::JumpTable JT(-1U, JTI, JumpTableBB, Default);
+  SelectionDAGISel::JumpTableHeader JTH(First, Last, SV, CR.CaseBB,
+                                        (CR.CaseBB == CurMBB));
+  if (CR.CaseBB == CurMBB)
+    visitJumpTableHeader(JT, JTH);
+        
+  JTCases.push_back(SelectionDAGISel::JumpTableBlock(JTH, JT));
+
+  return true;
+}
+
+/// handleBTSplitSwitchCase - emit comparison and split binary search tree into
+/// 2 subtrees.
+bool SelectionDAGLowering::handleBTSplitSwitchCase(CaseRec& CR,
+                                                   CaseRecVector& WorkList,
+                                                   Value* SV,
+                                                   MachineBasicBlock* Default) {
+  // Get the MachineFunction which holds the current MBB.  This is used when
+  // inserting any additional MBBs necessary to represent the switch.
+  MachineFunction *CurMF = CurMBB->getParent();  
+
+  // Figure out which block is immediately after the current one.
+  MachineBasicBlock *NextBlock = 0;
+  MachineFunction::iterator BBI = CR.CaseBB;
+
+  if (++BBI != CurMBB->getParent()->end())
+    NextBlock = BBI;
+
+  Case& FrontCase = *CR.Range.first;
+  Case& BackCase  = *(CR.Range.second-1);
+  const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
+
+  // Size is the number of Cases represented by this range.
+  unsigned Size = CR.Range.second - CR.Range.first;
+
+  int64_t First = cast<ConstantInt>(FrontCase.Low)->getSExtValue();
+  int64_t Last  = cast<ConstantInt>(BackCase.High)->getSExtValue();
+  double FMetric = 0;
+  CaseItr Pivot = CR.Range.first + Size/2;
+
+  // Select optimal pivot, maximizing sum density of LHS and RHS. This will
+  // (heuristically) allow us to emit JumpTable's later.
+  uint64_t TSize = 0;
+  for (CaseItr I = CR.Range.first, E = CR.Range.second;
+       I!=E; ++I)
+    TSize += I->size();
+
+  uint64_t LSize = FrontCase.size();
+  uint64_t RSize = TSize-LSize;
+  DOUT << "Selecting best pivot: \n"
+       << "First: " << First << ", Last: " << Last <<"\n"
+       << "LSize: " << LSize << ", RSize: " << RSize << "\n";
+  for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second;
+       J!=E; ++I, ++J) {
+    int64_t LEnd = cast<ConstantInt>(I->High)->getSExtValue();
+    int64_t RBegin = cast<ConstantInt>(J->Low)->getSExtValue();
+    assert((RBegin-LEnd>=1) && "Invalid case distance");
+    double LDensity = (double)LSize / (double)((LEnd - First) + 1ULL);
+    double RDensity = (double)RSize / (double)((Last - RBegin) + 1ULL);
+    double Metric = Log2_64(RBegin-LEnd)*(LDensity+RDensity);
+    // Should always split in some non-trivial place
+    DOUT <<"=>Step\n"
+         << "LEnd: " << LEnd << ", RBegin: " << RBegin << "\n"
+         << "LDensity: " << LDensity << ", RDensity: " << RDensity << "\n"
+         << "Metric: " << Metric << "\n"; 
+    if (FMetric < Metric) {
+      Pivot = J;
+      FMetric = Metric;
+      DOUT << "Current metric set to: " << FMetric << "\n";
+    }
+
+    LSize += J->size();
+    RSize -= J->size();
+  }
+  if (areJTsAllowed(TLI)) {
+    // If our case is dense we *really* should handle it earlier!
+    assert((FMetric > 0) && "Should handle dense range earlier!");
+  } else {
+    Pivot = CR.Range.first + Size/2;
+  }
+  
+  CaseRange LHSR(CR.Range.first, Pivot);
+  CaseRange RHSR(Pivot, CR.Range.second);
+  Constant *C = Pivot->Low;
+  MachineBasicBlock *FalseBB = 0, *TrueBB = 0;
+      
+  // We know that we branch to the LHS if the Value being switched on is
+  // less than the Pivot value, C.  We use this to optimize our binary 
+  // tree a bit, by recognizing that if SV is greater than or equal to the
+  // LHS's Case Value, and that Case Value is exactly one less than the 
+  // Pivot's Value, then we can branch directly to the LHS's Target,
+  // rather than creating a leaf node for it.
+  if ((LHSR.second - LHSR.first) == 1 &&
+      LHSR.first->High == CR.GE &&
+      cast<ConstantInt>(C)->getSExtValue() ==
+      (cast<ConstantInt>(CR.GE)->getSExtValue() + 1LL)) {
+    TrueBB = LHSR.first->BB;
+  } else {
+    TrueBB = new MachineBasicBlock(LLVMBB);
+    CurMF->getBasicBlockList().insert(BBI, TrueBB);
+    WorkList.push_back(CaseRec(TrueBB, C, CR.GE, LHSR));
+  }
+  
+  // Similar to the optimization above, if the Value being switched on is
+  // known to be less than the Constant CR.LT, and the current Case Value
+  // is CR.LT - 1, then we can branch directly to the target block for
+  // the current Case Value, rather than emitting a RHS leaf node for it.
+  if ((RHSR.second - RHSR.first) == 1 && CR.LT &&
+      cast<ConstantInt>(RHSR.first->Low)->getSExtValue() ==
+      (cast<ConstantInt>(CR.LT)->getSExtValue() - 1LL)) {
+    FalseBB = RHSR.first->BB;
+  } else {
+    FalseBB = new MachineBasicBlock(LLVMBB);
+    CurMF->getBasicBlockList().insert(BBI, FalseBB);
+    WorkList.push_back(CaseRec(FalseBB,CR.LT,C,RHSR));
+  }
+
+  // Create a CaseBlock record representing a conditional branch to
+  // the LHS node if the value being switched on SV is less than C. 
+  // Otherwise, branch to LHS.
+  SelectionDAGISel::CaseBlock CB(ISD::SETLT, SV, C, NULL,
+                                 TrueBB, FalseBB, CR.CaseBB);
+
+  if (CR.CaseBB == CurMBB)
+    visitSwitchCase(CB);
+  else
+    SwitchCases.push_back(CB);
+
+  return true;
+}
+
+/// handleBitTestsSwitchCase - if current case range has few destination and
+/// range span less, than machine word bitwidth, encode case range into series
+/// of masks and emit bit tests with these masks.
+bool SelectionDAGLowering::handleBitTestsSwitchCase(CaseRec& CR,
+                                                    CaseRecVector& WorkList,
+                                                    Value* SV,
+                                                    MachineBasicBlock* Default){
+  unsigned IntPtrBits = MVT::getSizeInBits(TLI.getPointerTy());
+
+  Case& FrontCase = *CR.Range.first;
+  Case& BackCase  = *(CR.Range.second-1);
+
+  // Get the MachineFunction which holds the current MBB.  This is used when
+  // inserting any additional MBBs necessary to represent the switch.
+  MachineFunction *CurMF = CurMBB->getParent();  
+
+  unsigned numCmps = 0;
+  for (CaseItr I = CR.Range.first, E = CR.Range.second;
+       I!=E; ++I) {
+    // Single case counts one, case range - two.
+    if (I->Low == I->High)
+      numCmps +=1;
+    else
+      numCmps +=2;
+  }
+    
+  // Count unique destinations
+  SmallSet<MachineBasicBlock*, 4> Dests;
+  for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) {
+    Dests.insert(I->BB);
+    if (Dests.size() > 3)
+      // Don't bother the code below, if there are too much unique destinations
+      return false;
+  }
+  DOUT << "Total number of unique destinations: " << Dests.size() << "\n"
+       << "Total number of comparisons: " << numCmps << "\n";
+  
+  // Compute span of values.
+  Constant* minValue = FrontCase.Low;
+  Constant* maxValue = BackCase.High;
+  uint64_t range = cast<ConstantInt>(maxValue)->getSExtValue() -
+                   cast<ConstantInt>(minValue)->getSExtValue();
+  DOUT << "Compare range: " << range << "\n"
+       << "Low bound: " << cast<ConstantInt>(minValue)->getSExtValue() << "\n"
+       << "High bound: " << cast<ConstantInt>(maxValue)->getSExtValue() << "\n";
+  
+  if (range>=IntPtrBits ||
+      (!(Dests.size() == 1 && numCmps >= 3) &&
+       !(Dests.size() == 2 && numCmps >= 5) &&
+       !(Dests.size() >= 3 && numCmps >= 6)))
+    return false;
+  
+  DOUT << "Emitting bit tests\n";
+  int64_t lowBound = 0;
+    
+  // Optimize the case where all the case values fit in a
+  // word without having to subtract minValue. In this case,
+  // we can optimize away the subtraction.
+  if (cast<ConstantInt>(minValue)->getSExtValue() >= 0 &&
+      cast<ConstantInt>(maxValue)->getSExtValue() <  IntPtrBits) {
+    range = cast<ConstantInt>(maxValue)->getSExtValue();
+  } else {
+    lowBound = cast<ConstantInt>(minValue)->getSExtValue();
+  }
+    
+  CaseBitsVector CasesBits;
+  unsigned i, count = 0;
+
+  for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) {
+    MachineBasicBlock* Dest = I->BB;
+    for (i = 0; i < count; ++i)
+      if (Dest == CasesBits[i].BB)
+        break;
+    
+    if (i == count) {
+      assert((count < 3) && "Too much destinations to test!");
+      CasesBits.push_back(CaseBits(0, Dest, 0));
+      count++;
+    }
+    
+    uint64_t lo = cast<ConstantInt>(I->Low)->getSExtValue() - lowBound;
+    uint64_t hi = cast<ConstantInt>(I->High)->getSExtValue() - lowBound;
+    
+    for (uint64_t j = lo; j <= hi; j++) {
+      CasesBits[i].Mask |=  1ULL << j;
+      CasesBits[i].Bits++;
+    }
+      
+  }
+  std::sort(CasesBits.begin(), CasesBits.end(), CaseBitsCmp());
+  
+  SelectionDAGISel::BitTestInfo BTC;
+
+  // Figure out which block is immediately after the current one.
+  MachineFunction::iterator BBI = CR.CaseBB;
+  ++BBI;
+
+  const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
+
+  DOUT << "Cases:\n";
+  for (unsigned i = 0, e = CasesBits.size(); i!=e; ++i) {
+    DOUT << "Mask: " << CasesBits[i].Mask << ", Bits: " << CasesBits[i].Bits
+         << ", BB: " << CasesBits[i].BB << "\n";
+
+    MachineBasicBlock *CaseBB = new MachineBasicBlock(LLVMBB);
+    CurMF->getBasicBlockList().insert(BBI, CaseBB);
+    BTC.push_back(SelectionDAGISel::BitTestCase(CasesBits[i].Mask,
+                                                CaseBB,
+                                                CasesBits[i].BB));
+  }
+  
+  SelectionDAGISel::BitTestBlock BTB(lowBound, range, SV,
+                                     -1U, (CR.CaseBB == CurMBB),
+                                     CR.CaseBB, Default, BTC);
+
+  if (CR.CaseBB == CurMBB)
+    visitBitTestHeader(BTB);
+  
+  BitTestCases.push_back(BTB);
+
+  return true;
+}
+
+
+// Clusterify - Transform simple list of Cases into list of CaseRange's
+unsigned SelectionDAGLowering::Clusterify(CaseVector& Cases,
+                                          const SwitchInst& SI) {
+  unsigned numCmps = 0;
+
+  // Start with "simple" cases
+  for (unsigned i = 1; i < SI.getNumSuccessors(); ++i) {
+    MachineBasicBlock *SMBB = FuncInfo.MBBMap[SI.getSuccessor(i)];
+    Cases.push_back(Case(SI.getSuccessorValue(i),
+                         SI.getSuccessorValue(i),
+                         SMBB));
+  }
+  sort(Cases.begin(), Cases.end(), CaseCmp());
+
+  // Merge case into clusters
+  if (Cases.size()>=2)
+    // Must recompute end() each iteration because it may be
+    // invalidated by erase if we hold on to it
+    for (CaseItr I=Cases.begin(), J=++(Cases.begin()); J!=Cases.end(); ) {
+      int64_t nextValue = cast<ConstantInt>(J->Low)->getSExtValue();
+      int64_t currentValue = cast<ConstantInt>(I->High)->getSExtValue();
+      MachineBasicBlock* nextBB = J->BB;
+      MachineBasicBlock* currentBB = I->BB;
+
+      // If the two neighboring cases go to the same destination, merge them
+      // into a single case.
+      if ((nextValue-currentValue==1) && (currentBB == nextBB)) {
+        I->High = J->High;
+        J = Cases.erase(J);
+      } else {
+        I = J++;
+      }
+    }
+
+  for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) {
+    if (I->Low != I->High)
+      // A range counts double, since it requires two compares.
+      ++numCmps;
+  }
+
+  return numCmps;
+}
+
+void SelectionDAGLowering::visitSwitch(SwitchInst &SI) {  
+  // Figure out which block is immediately after the current one.
+  MachineBasicBlock *NextBlock = 0;
+  MachineFunction::iterator BBI = CurMBB;
+
+  MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()];
+
+  // If there is only the default destination, branch to it if it is not the
+  // next basic block.  Otherwise, just fall through.
+  if (SI.getNumOperands() == 2) {
+    // Update machine-CFG edges.
+
+    // If this is not a fall-through branch, emit the branch.
+    if (Default != NextBlock)
+      DAG.setRoot(DAG.getNode(ISD::BR, MVT::Other, getRoot(),
+                              DAG.getBasicBlock(Default)));
+
+    CurMBB->addSuccessor(Default);
+    return;
+  }
+  
+  // If there are any non-default case statements, create a vector of Cases
+  // representing each one, and sort the vector so that we can efficiently
+  // create a binary search tree from them.
+  CaseVector Cases;
+  unsigned numCmps = Clusterify(Cases, SI);
+  DOUT << "Clusterify finished. Total clusters: " << Cases.size()
+       << ". Total compares: " << numCmps << "\n";
+
+  // Get the Value to be switched on and default basic blocks, which will be
+  // inserted into CaseBlock records, representing basic blocks in the binary
+  // search tree.
+  Value *SV = SI.getOperand(0);
+
+  // Push the initial CaseRec onto the worklist
+  CaseRecVector WorkList;
+  WorkList.push_back(CaseRec(CurMBB,0,0,CaseRange(Cases.begin(),Cases.end())));
+
+  while (!WorkList.empty()) {
+    // Grab a record representing a case range to process off the worklist
+    CaseRec CR = WorkList.back();
+    WorkList.pop_back();
+
+    if (handleBitTestsSwitchCase(CR, WorkList, SV, Default))
+      continue;
+    
+    // If the range has few cases (two or less) emit a series of specific
+    // tests.
+    if (handleSmallSwitchRange(CR, WorkList, SV, Default))
+      continue;
+    
+    // If the switch has more than 5 blocks, and at least 40% dense, and the 
+    // target supports indirect branches, then emit a jump table rather than 
+    // lowering the switch to a binary tree of conditional branches.
+    if (handleJTSwitchCase(CR, WorkList, SV, Default))
+      continue;
+          
+    // Emit binary tree. We need to pick a pivot, and push left and right ranges
+    // onto the worklist. Leafs are handled via handleSmallSwitchRange() call.
+    handleBTSplitSwitchCase(CR, WorkList, SV, Default);
+  }
+}
+
+
+void SelectionDAGLowering::visitSub(User &I) {
+  // -0.0 - X --> fneg
+  const Type *Ty = I.getType();
+  if (isa<VectorType>(Ty)) {
+    if (ConstantVector *CV = dyn_cast<ConstantVector>(I.getOperand(0))) {
+      const VectorType *DestTy = cast<VectorType>(I.getType());
+      const Type *ElTy = DestTy->getElementType();
+      if (ElTy->isFloatingPoint()) {
+        unsigned VL = DestTy->getNumElements();
+        std::vector<Constant*> NZ(VL, ConstantFP::get(ElTy, -0.0));
+        Constant *CNZ = ConstantVector::get(&NZ[0], NZ.size());
+        if (CV == CNZ) {
+          SDOperand Op2 = getValue(I.getOperand(1));
+          setValue(&I, DAG.getNode(ISD::FNEG, Op2.getValueType(), Op2));
+          return;
+        }
+      }
+    }
+  }
+  if (Ty->isFloatingPoint()) {
+    if (ConstantFP *CFP = dyn_cast<ConstantFP>(I.getOperand(0)))
+      if (CFP->isExactlyValue(-0.0)) {
+        SDOperand Op2 = getValue(I.getOperand(1));
+        setValue(&I, DAG.getNode(ISD::FNEG, Op2.getValueType(), Op2));
+        return;
+      }
+  }
+
+  visitBinary(I, Ty->isFPOrFPVector() ? ISD::FSUB : ISD::SUB);
+}
+
+void SelectionDAGLowering::visitBinary(User &I, unsigned OpCode) {
+  SDOperand Op1 = getValue(I.getOperand(0));
+  SDOperand Op2 = getValue(I.getOperand(1));
+  
+  setValue(&I, DAG.getNode(OpCode, Op1.getValueType(), Op1, Op2));
+}
+
+void SelectionDAGLowering::visitShift(User &I, unsigned Opcode) {
+  SDOperand Op1 = getValue(I.getOperand(0));
+  SDOperand Op2 = getValue(I.getOperand(1));
+  
+  if (MVT::getSizeInBits(TLI.getShiftAmountTy()) <
+      MVT::getSizeInBits(Op2.getValueType()))
+    Op2 = DAG.getNode(ISD::TRUNCATE, TLI.getShiftAmountTy(), Op2);
+  else if (TLI.getShiftAmountTy() > Op2.getValueType())
+    Op2 = DAG.getNode(ISD::ANY_EXTEND, TLI.getShiftAmountTy(), Op2);
+  
+  setValue(&I, DAG.getNode(Opcode, Op1.getValueType(), Op1, Op2));
+}
+
+void SelectionDAGLowering::visitICmp(User &I) {
+  ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE;
+  if (ICmpInst *IC = dyn_cast<ICmpInst>(&I))
+    predicate = IC->getPredicate();
+  else if (ConstantExpr *IC = dyn_cast<ConstantExpr>(&I))
+    predicate = ICmpInst::Predicate(IC->getPredicate());
+  SDOperand Op1 = getValue(I.getOperand(0));
+  SDOperand Op2 = getValue(I.getOperand(1));
+  ISD::CondCode Opcode;
+  switch (predicate) {
+    case ICmpInst::ICMP_EQ  : Opcode = ISD::SETEQ; break;
+    case ICmpInst::ICMP_NE  : Opcode = ISD::SETNE; break;
+    case ICmpInst::ICMP_UGT : Opcode = ISD::SETUGT; break;
+    case ICmpInst::ICMP_UGE : Opcode = ISD::SETUGE; break;
+    case ICmpInst::ICMP_ULT : Opcode = ISD::SETULT; break;
+    case ICmpInst::ICMP_ULE : Opcode = ISD::SETULE; break;
+    case ICmpInst::ICMP_SGT : Opcode = ISD::SETGT; break;
+    case ICmpInst::ICMP_SGE : Opcode = ISD::SETGE; break;
+    case ICmpInst::ICMP_SLT : Opcode = ISD::SETLT; break;
+    case ICmpInst::ICMP_SLE : Opcode = ISD::SETLE; break;
+    default:
+      assert(!"Invalid ICmp predicate value");
+      Opcode = ISD::SETEQ;
+      break;
+  }
+  setValue(&I, DAG.getSetCC(MVT::i1, Op1, Op2, Opcode));
+}
+
+void SelectionDAGLowering::visitFCmp(User &I) {
+  FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE;
+  if (FCmpInst *FC = dyn_cast<FCmpInst>(&I))
+    predicate = FC->getPredicate();
+  else if (ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
+    predicate = FCmpInst::Predicate(FC->getPredicate());
+  SDOperand Op1 = getValue(I.getOperand(0));
+  SDOperand Op2 = getValue(I.getOperand(1));
+  ISD::CondCode Condition, FOC, FPC;
+  switch (predicate) {
+    case FCmpInst::FCMP_FALSE: FOC = FPC = ISD::SETFALSE; break;
+    case FCmpInst::FCMP_OEQ:   FOC = ISD::SETEQ; FPC = ISD::SETOEQ; break;
+    case FCmpInst::FCMP_OGT:   FOC = ISD::SETGT; FPC = ISD::SETOGT; break;
+    case FCmpInst::FCMP_OGE:   FOC = ISD::SETGE; FPC = ISD::SETOGE; break;
+    case FCmpInst::FCMP_OLT:   FOC = ISD::SETLT; FPC = ISD::SETOLT; break;
+    case FCmpInst::FCMP_OLE:   FOC = ISD::SETLE; FPC = ISD::SETOLE; break;
+    case FCmpInst::FCMP_ONE:   FOC = ISD::SETNE; FPC = ISD::SETONE; break;
+    case FCmpInst::FCMP_ORD:   FOC = ISD::SETEQ; FPC = ISD::SETO;   break;
+    case FCmpInst::FCMP_UNO:   FOC = ISD::SETNE; FPC = ISD::SETUO;  break;
+    case FCmpInst::FCMP_UEQ:   FOC = ISD::SETEQ; FPC = ISD::SETUEQ; break;
+    case FCmpInst::FCMP_UGT:   FOC = ISD::SETGT; FPC = ISD::SETUGT; break;
+    case FCmpInst::FCMP_UGE:   FOC = ISD::SETGE; FPC = ISD::SETUGE; break;
+    case FCmpInst::FCMP_ULT:   FOC = ISD::SETLT; FPC = ISD::SETULT; break;
+    case FCmpInst::FCMP_ULE:   FOC = ISD::SETLE; FPC = ISD::SETULE; break;
+    case FCmpInst::FCMP_UNE:   FOC = ISD::SETNE; FPC = ISD::SETUNE; break;
+    case FCmpInst::FCMP_TRUE:  FOC = FPC = ISD::SETTRUE; break;
+    default:
+      assert(!"Invalid FCmp predicate value");
+      FOC = FPC = ISD::SETFALSE;
+      break;
+  }
+  if (FiniteOnlyFPMath())
+    Condition = FOC;
+  else 
+    Condition = FPC;
+  setValue(&I, DAG.getSetCC(MVT::i1, Op1, Op2, Condition));
+}
+
+void SelectionDAGLowering::visitSelect(User &I) {
+  SDOperand Cond     = getValue(I.getOperand(0));
+  SDOperand TrueVal  = getValue(I.getOperand(1));
+  SDOperand FalseVal = getValue(I.getOperand(2));
+  setValue(&I, DAG.getNode(ISD::SELECT, TrueVal.getValueType(), Cond,
+                           TrueVal, FalseVal));
+}
+
+
+void SelectionDAGLowering::visitTrunc(User &I) {
+  // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
+  SDOperand N = getValue(I.getOperand(0));
+  MVT::ValueType DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::TRUNCATE, DestVT, N));
+}
+
+void SelectionDAGLowering::visitZExt(User &I) {
+  // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
+  // ZExt also can't be a cast to bool for same reason. So, nothing much to do
+  SDOperand N = getValue(I.getOperand(0));
+  MVT::ValueType DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, DestVT, N));
+}
+
+void SelectionDAGLowering::visitSExt(User &I) {
+  // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
+  // SExt also can't be a cast to bool for same reason. So, nothing much to do
+  SDOperand N = getValue(I.getOperand(0));
+  MVT::ValueType DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, DestVT, N));
+}
+
+void SelectionDAGLowering::visitFPTrunc(User &I) {
+  // FPTrunc is never a no-op cast, no need to check
+  SDOperand N = getValue(I.getOperand(0));
+  MVT::ValueType DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::FP_ROUND, DestVT, N));
+}
+
+void SelectionDAGLowering::visitFPExt(User &I){ 
+  // FPTrunc is never a no-op cast, no need to check
+  SDOperand N = getValue(I.getOperand(0));
+  MVT::ValueType DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::FP_EXTEND, DestVT, N));
+}
+
+void SelectionDAGLowering::visitFPToUI(User &I) { 
+  // FPToUI is never a no-op cast, no need to check
+  SDOperand N = getValue(I.getOperand(0));
+  MVT::ValueType DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::FP_TO_UINT, DestVT, N));
+}
+
+void SelectionDAGLowering::visitFPToSI(User &I) {
+  // FPToSI is never a no-op cast, no need to check
+  SDOperand N = getValue(I.getOperand(0));
+  MVT::ValueType DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::FP_TO_SINT, DestVT, N));
+}
+
+void SelectionDAGLowering::visitUIToFP(User &I) { 
+  // UIToFP is never a no-op cast, no need to check
+  SDOperand N = getValue(I.getOperand(0));
+  MVT::ValueType DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::UINT_TO_FP, DestVT, N));
+}
+
+void SelectionDAGLowering::visitSIToFP(User &I){ 
+  // UIToFP is never a no-op cast, no need to check
+  SDOperand N = getValue(I.getOperand(0));
+  MVT::ValueType DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::SINT_TO_FP, DestVT, N));
+}
+
+void SelectionDAGLowering::visitPtrToInt(User &I) {
+  // What to do depends on the size of the integer and the size of the pointer.
+  // We can either truncate, zero extend, or no-op, accordingly.
+  SDOperand N = getValue(I.getOperand(0));
+  MVT::ValueType SrcVT = N.getValueType();
+  MVT::ValueType DestVT = TLI.getValueType(I.getType());
+  SDOperand Result;
+  if (MVT::getSizeInBits(DestVT) < MVT::getSizeInBits(SrcVT))
+    Result = DAG.getNode(ISD::TRUNCATE, DestVT, N);
+  else 
+    // Note: ZERO_EXTEND can handle cases where the sizes are equal too
+    Result = DAG.getNode(ISD::ZERO_EXTEND, DestVT, N);
+  setValue(&I, Result);
+}
+
+void SelectionDAGLowering::visitIntToPtr(User &I) {
+  // What to do depends on the size of the integer and the size of the pointer.
+  // We can either truncate, zero extend, or no-op, accordingly.
+  SDOperand N = getValue(I.getOperand(0));
+  MVT::ValueType SrcVT = N.getValueType();
+  MVT::ValueType DestVT = TLI.getValueType(I.getType());
+  if (MVT::getSizeInBits(DestVT) < MVT::getSizeInBits(SrcVT))
+    setValue(&I, DAG.getNode(ISD::TRUNCATE, DestVT, N));
+  else 
+    // Note: ZERO_EXTEND can handle cases where the sizes are equal too
+    setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, DestVT, N));
+}
+
+void SelectionDAGLowering::visitBitCast(User &I) { 
+  SDOperand N = getValue(I.getOperand(0));
+  MVT::ValueType DestVT = TLI.getValueType(I.getType());
+
+  // BitCast assures us that source and destination are the same size so this 
+  // is either a BIT_CONVERT or a no-op.
+  if (DestVT != N.getValueType())
+    setValue(&I, DAG.getNode(ISD::BIT_CONVERT, DestVT, N)); // convert types
+  else
+    setValue(&I, N); // noop cast.
+}
+
+void SelectionDAGLowering::visitInsertElement(User &I) {
+  SDOperand InVec = getValue(I.getOperand(0));
+  SDOperand InVal = getValue(I.getOperand(1));
+  SDOperand InIdx = DAG.getNode(ISD::ZERO_EXTEND, TLI.getPointerTy(),
+                                getValue(I.getOperand(2)));
+
+  setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT,
+                           TLI.getValueType(I.getType()),
+                           InVec, InVal, InIdx));
+}
+
+void SelectionDAGLowering::visitExtractElement(User &I) {
+  SDOperand InVec = getValue(I.getOperand(0));
+  SDOperand InIdx = DAG.getNode(ISD::ZERO_EXTEND, TLI.getPointerTy(),
+                                getValue(I.getOperand(1)));
+  setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+                           TLI.getValueType(I.getType()), InVec, InIdx));
+}
+
+void SelectionDAGLowering::visitShuffleVector(User &I) {
+  SDOperand V1   = getValue(I.getOperand(0));
+  SDOperand V2   = getValue(I.getOperand(1));
+  SDOperand Mask = getValue(I.getOperand(2));
+
+  setValue(&I, DAG.getNode(ISD::VECTOR_SHUFFLE,
+                           TLI.getValueType(I.getType()),
+                           V1, V2, Mask));
+}
+
+
+void SelectionDAGLowering::visitGetElementPtr(User &I) {
+  SDOperand N = getValue(I.getOperand(0));
+  const Type *Ty = I.getOperand(0)->getType();
+
+  for (GetElementPtrInst::op_iterator OI = I.op_begin()+1, E = I.op_end();
+       OI != E; ++OI) {
+    Value *Idx = *OI;
+    if (const StructType *StTy = dyn_cast<StructType>(Ty)) {
+      unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
+      if (Field) {
+        // N = N + Offset
+        uint64_t Offset = TD->getStructLayout(StTy)->getElementOffset(Field);
+        N = DAG.getNode(ISD::ADD, N.getValueType(), N,
+                        getIntPtrConstant(Offset));
+      }
+      Ty = StTy->getElementType(Field);
+    } else {
+      Ty = cast<SequentialType>(Ty)->getElementType();
+
+      // If this is a constant subscript, handle it quickly.
+      if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
+        if (CI->getZExtValue() == 0) continue;
+        uint64_t Offs = 
+            TD->getTypeSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
+        N = DAG.getNode(ISD::ADD, N.getValueType(), N, getIntPtrConstant(Offs));
+        continue;
+      }
+      
+      // N = N + Idx * ElementSize;
+      uint64_t ElementSize = TD->getTypeSize(Ty);
+      SDOperand IdxN = getValue(Idx);
+
+      // If the index is smaller or larger than intptr_t, truncate or extend
+      // it.
+      if (IdxN.getValueType() < N.getValueType()) {
+        IdxN = DAG.getNode(ISD::SIGN_EXTEND, N.getValueType(), IdxN);
+      } else if (IdxN.getValueType() > N.getValueType())
+        IdxN = DAG.getNode(ISD::TRUNCATE, N.getValueType(), IdxN);
+
+      // If this is a multiply by a power of two, turn it into a shl
+      // immediately.  This is a very common case.
+      if (isPowerOf2_64(ElementSize)) {
+        unsigned Amt = Log2_64(ElementSize);
+        IdxN = DAG.getNode(ISD::SHL, N.getValueType(), IdxN,
+                           DAG.getConstant(Amt, TLI.getShiftAmountTy()));
+        N = DAG.getNode(ISD::ADD, N.getValueType(), N, IdxN);
+        continue;
+      }
+      
+      SDOperand Scale = getIntPtrConstant(ElementSize);
+      IdxN = DAG.getNode(ISD::MUL, N.getValueType(), IdxN, Scale);
+      N = DAG.getNode(ISD::ADD, N.getValueType(), N, IdxN);
+    }
+  }
+  setValue(&I, N);
+}
+
+void SelectionDAGLowering::visitAlloca(AllocaInst &I) {
+  // If this is a fixed sized alloca in the entry block of the function,
+  // allocate it statically on the stack.
+  if (FuncInfo.StaticAllocaMap.count(&I))
+    return;   // getValue will auto-populate this.
+
+  const Type *Ty = I.getAllocatedType();
+  uint64_t TySize = TLI.getTargetData()->getTypeSize(Ty);
+  unsigned Align =
+    std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty),
+             I.getAlignment());
+
+  SDOperand AllocSize = getValue(I.getArraySize());
+  MVT::ValueType IntPtr = TLI.getPointerTy();
+  if (IntPtr < AllocSize.getValueType())
+    AllocSize = DAG.getNode(ISD::TRUNCATE, IntPtr, AllocSize);
+  else if (IntPtr > AllocSize.getValueType())
+    AllocSize = DAG.getNode(ISD::ZERO_EXTEND, IntPtr, AllocSize);
+
+  AllocSize = DAG.getNode(ISD::MUL, IntPtr, AllocSize,
+                          getIntPtrConstant(TySize));
+
+  // Handle alignment.  If the requested alignment is less than the stack
+  // alignment, ignore it and round the size of the allocation up to the stack
+  // alignment size.  If the size is greater than or equal to the stack
+  // alignment, we note this in the DYNAMIC_STACKALLOC node.
+  unsigned StackAlign =
+    TLI.getTargetMachine().getFrameInfo()->getStackAlignment();
+  if (Align < StackAlign) {
+    Align = 0;
+    // Add SA-1 to the size.
+    AllocSize = DAG.getNode(ISD::ADD, AllocSize.getValueType(), AllocSize,
+                            getIntPtrConstant(StackAlign-1));
+    // Mask out the low bits for alignment purposes.
+    AllocSize = DAG.getNode(ISD::AND, AllocSize.getValueType(), AllocSize,
+                            getIntPtrConstant(~(uint64_t)(StackAlign-1)));
+  }
+
+  SDOperand Ops[] = { getRoot(), AllocSize, getIntPtrConstant(Align) };
+  const MVT::ValueType *VTs = DAG.getNodeValueTypes(AllocSize.getValueType(),
+                                                    MVT::Other);
+  SDOperand DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, VTs, 2, Ops, 3);
+  setValue(&I, DSA);
+  DAG.setRoot(DSA.getValue(1));
+
+  // Inform the Frame Information that we have just allocated a variable-sized
+  // object.
+  CurMBB->getParent()->getFrameInfo()->CreateVariableSizedObject();
+}
+
+void SelectionDAGLowering::visitLoad(LoadInst &I) {
+  SDOperand Ptr = getValue(I.getOperand(0));
+
+  SDOperand Root;
+  if (I.isVolatile())
+    Root = getRoot();
+  else {
+    // Do not serialize non-volatile loads against each other.
+    Root = DAG.getRoot();
+  }
+
+  setValue(&I, getLoadFrom(I.getType(), Ptr, I.getOperand(0),
+                           Root, I.isVolatile(), I.getAlignment()));
+}
+
+SDOperand SelectionDAGLowering::getLoadFrom(const Type *Ty, SDOperand Ptr,
+                                            const Value *SV, SDOperand Root,
+                                            bool isVolatile, 
+                                            unsigned Alignment) {
+  SDOperand L =
+    DAG.getLoad(TLI.getValueType(Ty), Root, Ptr, SV, 0, 
+                isVolatile, Alignment);
+
+  if (isVolatile)
+    DAG.setRoot(L.getValue(1));
+  else
+    PendingLoads.push_back(L.getValue(1));
+  
+  return L;
+}
+
+
+void SelectionDAGLowering::visitStore(StoreInst &I) {
+  Value *SrcV = I.getOperand(0);
+  SDOperand Src = getValue(SrcV);
+  SDOperand Ptr = getValue(I.getOperand(1));
+  DAG.setRoot(DAG.getStore(getRoot(), Src, Ptr, I.getOperand(1), 0,
+                           I.isVolatile(), I.getAlignment()));
+}
+
+/// IntrinsicCannotAccessMemory - Return true if the specified intrinsic cannot
+/// access memory and has no other side effects at all.
+static bool IntrinsicCannotAccessMemory(unsigned IntrinsicID) {
+#define GET_NO_MEMORY_INTRINSICS
+#include "llvm/Intrinsics.gen"
+#undef GET_NO_MEMORY_INTRINSICS
+  return false;
+}
+
+// IntrinsicOnlyReadsMemory - Return true if the specified intrinsic doesn't
+// have any side-effects or if it only reads memory.
+static bool IntrinsicOnlyReadsMemory(unsigned IntrinsicID) {
+#define GET_SIDE_EFFECT_INFO
+#include "llvm/Intrinsics.gen"
+#undef GET_SIDE_EFFECT_INFO
+  return false;
+}
+
+/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
+/// node.
+void SelectionDAGLowering::visitTargetIntrinsic(CallInst &I, 
+                                                unsigned Intrinsic) {
+  bool HasChain = !IntrinsicCannotAccessMemory(Intrinsic);
+  bool OnlyLoad = HasChain && IntrinsicOnlyReadsMemory(Intrinsic);
+  
+  // Build the operand list.
+  SmallVector<SDOperand, 8> Ops;
+  if (HasChain) {  // If this intrinsic has side-effects, chainify it.
+    if (OnlyLoad) {
+      // We don't need to serialize loads against other loads.
+      Ops.push_back(DAG.getRoot());
+    } else { 
+      Ops.push_back(getRoot());
+    }
+  }
+  
+  // Add the intrinsic ID as an integer operand.
+  Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy()));
+
+  // Add all operands of the call to the operand list.
+  for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i) {
+    SDOperand Op = getValue(I.getOperand(i));
+    assert(TLI.isTypeLegal(Op.getValueType()) &&
+           "Intrinsic uses a non-legal type?");
+    Ops.push_back(Op);
+  }
+
+  std::vector<MVT::ValueType> VTs;
+  if (I.getType() != Type::VoidTy) {
+    MVT::ValueType VT = TLI.getValueType(I.getType());
+    if (MVT::isVector(VT)) {
+      const VectorType *DestTy = cast<VectorType>(I.getType());
+      MVT::ValueType EltVT = TLI.getValueType(DestTy->getElementType());
+      
+      VT = MVT::getVectorType(EltVT, DestTy->getNumElements());
+      assert(VT != MVT::Other && "Intrinsic uses a non-legal type?");
+    }
+    
+    assert(TLI.isTypeLegal(VT) && "Intrinsic uses a non-legal type?");
+    VTs.push_back(VT);
+  }
+  if (HasChain)
+    VTs.push_back(MVT::Other);
+
+  const MVT::ValueType *VTList = DAG.getNodeValueTypes(VTs);
+
+  // Create the node.
+  SDOperand Result;
+  if (!HasChain)
+    Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VTList, VTs.size(),
+                         &Ops[0], Ops.size());
+  else if (I.getType() != Type::VoidTy)
+    Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, VTList, VTs.size(),
+                         &Ops[0], Ops.size());
+  else
+    Result = DAG.getNode(ISD::INTRINSIC_VOID, VTList, VTs.size(),
+                         &Ops[0], Ops.size());
+
+  if (HasChain) {
+    SDOperand Chain = Result.getValue(Result.Val->getNumValues()-1);
+    if (OnlyLoad)
+      PendingLoads.push_back(Chain);
+    else
+      DAG.setRoot(Chain);
+  }
+  if (I.getType() != Type::VoidTy) {
+    if (const VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
+      MVT::ValueType VT = TLI.getValueType(PTy);
+      Result = DAG.getNode(ISD::BIT_CONVERT, VT, Result);
+    } 
+    setValue(&I, Result);
+  }
+}
+
+/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.
+static GlobalVariable *ExtractTypeInfo (Value *V) {
+  V = IntrinsicInst::StripPointerCasts(V);
+  GlobalVariable *GV = dyn_cast<GlobalVariable>(V);
+  assert (GV || isa<ConstantPointerNull>(V) &&
+          "TypeInfo must be a global variable or NULL");
+  return GV;
+}
+
+/// addCatchInfo - Extract the personality and type infos from an eh.selector
+/// call, and add them to the specified machine basic block.
+static void addCatchInfo(CallInst &I, MachineModuleInfo *MMI,
+                         MachineBasicBlock *MBB) {
+  // Inform the MachineModuleInfo of the personality for this landing pad.
+  ConstantExpr *CE = cast<ConstantExpr>(I.getOperand(2));
+  assert(CE->getOpcode() == Instruction::BitCast &&
+         isa<Function>(CE->getOperand(0)) &&
+         "Personality should be a function");
+  MMI->addPersonality(MBB, cast<Function>(CE->getOperand(0)));
+
+  // Gather all the type infos for this landing pad and pass them along to
+  // MachineModuleInfo.
+  std::vector<GlobalVariable *> TyInfo;
+  unsigned N = I.getNumOperands();
+
+  for (unsigned i = N - 1; i > 2; --i) {
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(i))) {
+      unsigned FilterLength = CI->getZExtValue();
+      unsigned FirstCatch = i + FilterLength + 1;
+      assert (FirstCatch <= N && "Invalid filter length");
+
+      if (FirstCatch < N) {
+        TyInfo.reserve(N - FirstCatch);
+        for (unsigned j = FirstCatch; j < N; ++j)
+          TyInfo.push_back(ExtractTypeInfo(I.getOperand(j)));
+        MMI->addCatchTypeInfo(MBB, TyInfo);
+        TyInfo.clear();
+      }
+
+      TyInfo.reserve(FilterLength);
+      for (unsigned j = i + 1; j < FirstCatch; ++j)
+        TyInfo.push_back(ExtractTypeInfo(I.getOperand(j)));
+      MMI->addFilterTypeInfo(MBB, TyInfo);
+      TyInfo.clear();
+
+      N = i;
+    }
+  }
+
+  if (N > 3) {
+    TyInfo.reserve(N - 3);
+    for (unsigned j = 3; j < N; ++j)
+      TyInfo.push_back(ExtractTypeInfo(I.getOperand(j)));
+    MMI->addCatchTypeInfo(MBB, TyInfo);
+  }
+}
+
+/// visitIntrinsicCall - Lower the call to the specified intrinsic function.  If
+/// we want to emit this as a call to a named external function, return the name
+/// otherwise lower it and return null.
+const char *
+SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
+  switch (Intrinsic) {
+  default:
+    // By default, turn this into a target intrinsic node.
+    visitTargetIntrinsic(I, Intrinsic);
+    return 0;
+  case Intrinsic::vastart:  visitVAStart(I); return 0;
+  case Intrinsic::vaend:    visitVAEnd(I); return 0;
+  case Intrinsic::vacopy:   visitVACopy(I); return 0;
+  case Intrinsic::returnaddress:
+    setValue(&I, DAG.getNode(ISD::RETURNADDR, TLI.getPointerTy(),
+                             getValue(I.getOperand(1))));
+    return 0;
+  case Intrinsic::frameaddress:
+    setValue(&I, DAG.getNode(ISD::FRAMEADDR, TLI.getPointerTy(),
+                             getValue(I.getOperand(1))));
+    return 0;
+  case Intrinsic::setjmp:
+    return "_setjmp"+!TLI.usesUnderscoreSetJmp();
+    break;
+  case Intrinsic::longjmp:
+    return "_longjmp"+!TLI.usesUnderscoreLongJmp();
+    break;
+  case Intrinsic::memcpy_i32:
+  case Intrinsic::memcpy_i64:
+    visitMemIntrinsic(I, ISD::MEMCPY);
+    return 0;
+  case Intrinsic::memset_i32:
+  case Intrinsic::memset_i64:
+    visitMemIntrinsic(I, ISD::MEMSET);
+    return 0;
+  case Intrinsic::memmove_i32:
+  case Intrinsic::memmove_i64:
+    visitMemIntrinsic(I, ISD::MEMMOVE);
+    return 0;
+    
+  case Intrinsic::dbg_stoppoint: {
+    MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+    DbgStopPointInst &SPI = cast<DbgStopPointInst>(I);
+    if (MMI && SPI.getContext() && MMI->Verify(SPI.getContext())) {
+      SDOperand Ops[5];
+
+      Ops[0] = getRoot();
+      Ops[1] = getValue(SPI.getLineValue());
+      Ops[2] = getValue(SPI.getColumnValue());
+
+      DebugInfoDesc *DD = MMI->getDescFor(SPI.getContext());
+      assert(DD && "Not a debug information descriptor");
+      CompileUnitDesc *CompileUnit = cast<CompileUnitDesc>(DD);
+      
+      Ops[3] = DAG.getString(CompileUnit->getFileName());
+      Ops[4] = DAG.getString(CompileUnit->getDirectory());
+      
+      DAG.setRoot(DAG.getNode(ISD::LOCATION, MVT::Other, Ops, 5));
+    }
+
+    return 0;
+  }
+  case Intrinsic::dbg_region_start: {
+    MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+    DbgRegionStartInst &RSI = cast<DbgRegionStartInst>(I);
+    if (MMI && RSI.getContext() && MMI->Verify(RSI.getContext())) {
+      unsigned LabelID = MMI->RecordRegionStart(RSI.getContext());
+      DAG.setRoot(DAG.getNode(ISD::LABEL, MVT::Other, getRoot(),
+                              DAG.getConstant(LabelID, MVT::i32)));
+    }
+
+    return 0;
+  }
+  case Intrinsic::dbg_region_end: {
+    MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+    DbgRegionEndInst &REI = cast<DbgRegionEndInst>(I);
+    if (MMI && REI.getContext() && MMI->Verify(REI.getContext())) {
+      unsigned LabelID = MMI->RecordRegionEnd(REI.getContext());
+      DAG.setRoot(DAG.getNode(ISD::LABEL, MVT::Other,
+                              getRoot(), DAG.getConstant(LabelID, MVT::i32)));
+    }
+
+    return 0;
+  }
+  case Intrinsic::dbg_func_start: {
+    MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+    DbgFuncStartInst &FSI = cast<DbgFuncStartInst>(I);
+    if (MMI && FSI.getSubprogram() &&
+        MMI->Verify(FSI.getSubprogram())) {
+      unsigned LabelID = MMI->RecordRegionStart(FSI.getSubprogram());
+      DAG.setRoot(DAG.getNode(ISD::LABEL, MVT::Other,
+                  getRoot(), DAG.getConstant(LabelID, MVT::i32)));
+    }
+
+    return 0;
+  }
+  case Intrinsic::dbg_declare: {
+    MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+    DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
+    if (MMI && DI.getVariable() && MMI->Verify(DI.getVariable())) {
+      SDOperand AddressOp  = getValue(DI.getAddress());
+      if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(AddressOp))
+        MMI->RecordVariable(DI.getVariable(), FI->getIndex());
+    }
+
+    return 0;
+  }
+    
+  case Intrinsic::eh_exception: {
+    if (ExceptionHandling) {
+      if (!CurMBB->isLandingPad()) {
+        // FIXME: Mark exception register as live in.  Hack for PR1508.
+        unsigned Reg = TLI.getExceptionAddressRegister();
+        if (Reg) CurMBB->addLiveIn(Reg);
+      }
+      // Insert the EXCEPTIONADDR instruction.
+      SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
+      SDOperand Ops[1];
+      Ops[0] = DAG.getRoot();
+      SDOperand Op = DAG.getNode(ISD::EXCEPTIONADDR, VTs, Ops, 1);
+      setValue(&I, Op);
+      DAG.setRoot(Op.getValue(1));
+    } else {
+      setValue(&I, DAG.getConstant(0, TLI.getPointerTy()));
+    }
+    return 0;
+  }
+
+  case Intrinsic::eh_selector:{
+    MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+
+    if (ExceptionHandling && MMI) {
+      if (CurMBB->isLandingPad())
+        addCatchInfo(I, MMI, CurMBB);
+      else {
+#ifndef NDEBUG
+        FuncInfo.CatchInfoLost.insert(&I);
+#endif
+        // FIXME: Mark exception selector register as live in.  Hack for PR1508.
+        unsigned Reg = TLI.getExceptionSelectorRegister();
+        if (Reg) CurMBB->addLiveIn(Reg);
+      }
+
+      // Insert the EHSELECTION instruction.
+      SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
+      SDOperand Ops[2];
+      Ops[0] = getValue(I.getOperand(1));
+      Ops[1] = getRoot();
+      SDOperand Op = DAG.getNode(ISD::EHSELECTION, VTs, Ops, 2);
+      setValue(&I, Op);
+      DAG.setRoot(Op.getValue(1));
+    } else {
+      setValue(&I, DAG.getConstant(0, TLI.getPointerTy()));
+    }
+    
+    return 0;
+  }
+  
+  case Intrinsic::eh_typeid_for: {
+    MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+    
+    if (MMI) {
+      // Find the type id for the given typeinfo.
+      GlobalVariable *GV = ExtractTypeInfo(I.getOperand(1));
+
+      unsigned TypeID = MMI->getTypeIDFor(GV);
+      setValue(&I, DAG.getConstant(TypeID, MVT::i32));
+    } else {
+      // Return something different to eh_selector.
+      setValue(&I, DAG.getConstant(1, MVT::i32));
+    }
+
+    return 0;
+  }
+
+  case Intrinsic::eh_return: {
+    MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+
+    if (MMI && ExceptionHandling) {
+      MMI->setCallsEHReturn(true);
+      DAG.setRoot(DAG.getNode(ISD::EH_RETURN,
+                              MVT::Other,
+                              getRoot(),
+                              getValue(I.getOperand(1)),
+                              getValue(I.getOperand(2))));
+    } else {
+      setValue(&I, DAG.getConstant(0, TLI.getPointerTy()));
+    }
+
+    return 0;
+  }
+
+   case Intrinsic::eh_unwind_init: {    
+     if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo()) {
+       MMI->setCallsUnwindInit(true);
+     }
+
+     return 0;
+   }
+
+   case Intrinsic::eh_dwarf_cfa: {
+     if (ExceptionHandling) {
+       MVT::ValueType VT = getValue(I.getOperand(1)).getValueType();
+       SDOperand Offset = DAG.getNode(ISD::ADD,
+                                      TLI.getPointerTy(),
+                                      DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET,
+                                                  VT),
+                                      getValue(I.getOperand(1)));
+       setValue(&I, DAG.getNode(ISD::ADD,
+                                TLI.getPointerTy(),
+                                DAG.getNode(ISD::FRAMEADDR,
+                                            TLI.getPointerTy(),
+                                            DAG.getConstant(0,
+                                                            TLI.getPointerTy())),
+                                Offset));
+     } else {
+       setValue(&I, DAG.getConstant(0, TLI.getPointerTy()));
+     }
+
+     return 0;
+  }
+
+  case Intrinsic::sqrt_f32:
+  case Intrinsic::sqrt_f64:
+    setValue(&I, DAG.getNode(ISD::FSQRT,
+                             getValue(I.getOperand(1)).getValueType(),
+                             getValue(I.getOperand(1))));
+    return 0;
+  case Intrinsic::powi_f32:
+  case Intrinsic::powi_f64:
+    setValue(&I, DAG.getNode(ISD::FPOWI,
+                             getValue(I.getOperand(1)).getValueType(),
+                             getValue(I.getOperand(1)),
+                             getValue(I.getOperand(2))));
+    return 0;
+  case Intrinsic::pcmarker: {
+    SDOperand Tmp = getValue(I.getOperand(1));
+    DAG.setRoot(DAG.getNode(ISD::PCMARKER, MVT::Other, getRoot(), Tmp));
+    return 0;
+  }
+  case Intrinsic::readcyclecounter: {
+    SDOperand Op = getRoot();
+    SDOperand Tmp = DAG.getNode(ISD::READCYCLECOUNTER,
+                                DAG.getNodeValueTypes(MVT::i64, MVT::Other), 2,
+                                &Op, 1);
+    setValue(&I, Tmp);
+    DAG.setRoot(Tmp.getValue(1));
+    return 0;
+  }
+  case Intrinsic::part_select: {
+    // Currently not implemented: just abort
+    assert(0 && "part_select intrinsic not implemented");
+    abort();
+  }
+  case Intrinsic::part_set: {
+    // Currently not implemented: just abort
+    assert(0 && "part_set intrinsic not implemented");
+    abort();
+  }
+  case Intrinsic::bswap:
+    setValue(&I, DAG.getNode(ISD::BSWAP,
+                             getValue(I.getOperand(1)).getValueType(),
+                             getValue(I.getOperand(1))));
+    return 0;
+  case Intrinsic::cttz: {
+    SDOperand Arg = getValue(I.getOperand(1));
+    MVT::ValueType Ty = Arg.getValueType();
+    SDOperand result = DAG.getNode(ISD::CTTZ, Ty, Arg);
+    if (Ty < MVT::i32)
+      result = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, result);
+    else if (Ty > MVT::i32)
+      result = DAG.getNode(ISD::TRUNCATE, MVT::i32, result);
+    setValue(&I, result);
+    return 0;
+  }
+  case Intrinsic::ctlz: {
+    SDOperand Arg = getValue(I.getOperand(1));
+    MVT::ValueType Ty = Arg.getValueType();
+    SDOperand result = DAG.getNode(ISD::CTLZ, Ty, Arg);
+    if (Ty < MVT::i32)
+      result = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, result);
+    else if (Ty > MVT::i32)
+      result = DAG.getNode(ISD::TRUNCATE, MVT::i32, result);
+    setValue(&I, result);
+    return 0;
+  }
+  case Intrinsic::ctpop: {
+    SDOperand Arg = getValue(I.getOperand(1));
+    MVT::ValueType Ty = Arg.getValueType();
+    SDOperand result = DAG.getNode(ISD::CTPOP, Ty, Arg);
+    if (Ty < MVT::i32)
+      result = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, result);
+    else if (Ty > MVT::i32)
+      result = DAG.getNode(ISD::TRUNCATE, MVT::i32, result);
+    setValue(&I, result);
+    return 0;
+  }
+  case Intrinsic::stacksave: {
+    SDOperand Op = getRoot();
+    SDOperand Tmp = DAG.getNode(ISD::STACKSAVE,
+              DAG.getNodeValueTypes(TLI.getPointerTy(), MVT::Other), 2, &Op, 1);
+    setValue(&I, Tmp);
+    DAG.setRoot(Tmp.getValue(1));
+    return 0;
+  }
+  case Intrinsic::stackrestore: {
+    SDOperand Tmp = getValue(I.getOperand(1));
+    DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, MVT::Other, getRoot(), Tmp));
+    return 0;
+  }
+  case Intrinsic::prefetch:
+    // FIXME: Currently discarding prefetches.
+    return 0;
+  
+  case Intrinsic::var_annotation:
+    // Discard annotate attributes
+    return 0;
+  }
+}
+
+
+void SelectionDAGLowering::LowerCallTo(Instruction &I,
+                                       const Type *CalledValueTy,
+                                       unsigned CallingConv,
+                                       bool IsTailCall,
+                                       SDOperand Callee, unsigned OpIdx,
+                                       MachineBasicBlock *LandingPad) {
+  const PointerType *PT = cast<PointerType>(CalledValueTy);
+  const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+  const ParamAttrsList *Attrs = FTy->getParamAttrs();
+  MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+  unsigned BeginLabel = 0, EndLabel = 0;
+    
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  Args.reserve(I.getNumOperands());
+  for (unsigned i = OpIdx, e = I.getNumOperands(); i != e; ++i) {
+    Value *Arg = I.getOperand(i);
+    SDOperand ArgNode = getValue(Arg);
+    Entry.Node = ArgNode; Entry.Ty = Arg->getType();
+
+    unsigned attrInd = i - OpIdx + 1;
+    Entry.isSExt  = Attrs && Attrs->paramHasAttr(attrInd, ParamAttr::SExt);
+    Entry.isZExt  = Attrs && Attrs->paramHasAttr(attrInd, ParamAttr::ZExt);
+    Entry.isInReg = Attrs && Attrs->paramHasAttr(attrInd, ParamAttr::InReg);
+    Entry.isSRet  = Attrs && Attrs->paramHasAttr(attrInd, ParamAttr::StructRet);
+    Args.push_back(Entry);
+  }
+
+  if (ExceptionHandling && MMI) {
+    // Insert a label before the invoke call to mark the try range.  This can be
+    // used to detect deletion of the invoke via the MachineModuleInfo.
+    BeginLabel = MMI->NextLabelID();
+    DAG.setRoot(DAG.getNode(ISD::LABEL, MVT::Other, getRoot(),
+                            DAG.getConstant(BeginLabel, MVT::i32)));
+  }
+  
+  std::pair<SDOperand,SDOperand> Result =
+    TLI.LowerCallTo(getRoot(), I.getType(), 
+                    Attrs && Attrs->paramHasAttr(0, ParamAttr::SExt),
+                    FTy->isVarArg(), CallingConv, IsTailCall, 
+                    Callee, Args, DAG);
+  if (I.getType() != Type::VoidTy)
+    setValue(&I, Result.first);
+  DAG.setRoot(Result.second);
+
+  if (ExceptionHandling && MMI) {
+    // Insert a label at the end of the invoke call to mark the try range.  This
+    // can be used to detect deletion of the invoke via the MachineModuleInfo.
+    EndLabel = MMI->NextLabelID();
+    DAG.setRoot(DAG.getNode(ISD::LABEL, MVT::Other, getRoot(),
+                            DAG.getConstant(EndLabel, MVT::i32)));
+
+    // Inform MachineModuleInfo of range.    
+    MMI->addInvoke(LandingPad, BeginLabel, EndLabel);
+  }
+}
+
+
+void SelectionDAGLowering::visitCall(CallInst &I) {
+  const char *RenameFn = 0;
+  if (Function *F = I.getCalledFunction()) {
+    if (F->isDeclaration())
+      if (unsigned IID = F->getIntrinsicID()) {
+        RenameFn = visitIntrinsicCall(I, IID);
+        if (!RenameFn)
+          return;
+      } else {    // Not an LLVM intrinsic.
+        const std::string &Name = F->getName();
+        if (Name[0] == 'c' && (Name == "copysign" || Name == "copysignf")) {
+          if (I.getNumOperands() == 3 &&   // Basic sanity checks.
+              I.getOperand(1)->getType()->isFloatingPoint() &&
+              I.getType() == I.getOperand(1)->getType() &&
+              I.getType() == I.getOperand(2)->getType()) {
+            SDOperand LHS = getValue(I.getOperand(1));
+            SDOperand RHS = getValue(I.getOperand(2));
+            setValue(&I, DAG.getNode(ISD::FCOPYSIGN, LHS.getValueType(),
+                                     LHS, RHS));
+            return;
+          }
+        } else if (Name[0] == 'f' && (Name == "fabs" || Name == "fabsf")) {
+          if (I.getNumOperands() == 2 &&   // Basic sanity checks.
+              I.getOperand(1)->getType()->isFloatingPoint() &&
+              I.getType() == I.getOperand(1)->getType()) {
+            SDOperand Tmp = getValue(I.getOperand(1));
+            setValue(&I, DAG.getNode(ISD::FABS, Tmp.getValueType(), Tmp));
+            return;
+          }
+        } else if (Name[0] == 's' && (Name == "sin" || Name == "sinf")) {
+          if (I.getNumOperands() == 2 &&   // Basic sanity checks.
+              I.getOperand(1)->getType()->isFloatingPoint() &&
+              I.getType() == I.getOperand(1)->getType()) {
+            SDOperand Tmp = getValue(I.getOperand(1));
+            setValue(&I, DAG.getNode(ISD::FSIN, Tmp.getValueType(), Tmp));
+            return;
+          }
+        } else if (Name[0] == 'c' && (Name == "cos" || Name == "cosf")) {
+          if (I.getNumOperands() == 2 &&   // Basic sanity checks.
+              I.getOperand(1)->getType()->isFloatingPoint() &&
+              I.getType() == I.getOperand(1)->getType()) {
+            SDOperand Tmp = getValue(I.getOperand(1));
+            setValue(&I, DAG.getNode(ISD::FCOS, Tmp.getValueType(), Tmp));
+            return;
+          }
+        }
+      }
+  } else if (isa<InlineAsm>(I.getOperand(0))) {
+    visitInlineAsm(I);
+    return;
+  }
+
+  SDOperand Callee;
+  if (!RenameFn)
+    Callee = getValue(I.getOperand(0));
+  else
+    Callee = DAG.getExternalSymbol(RenameFn, TLI.getPointerTy());
+
+  LowerCallTo(I, I.getCalledValue()->getType(),
+              I.getCallingConv(),
+              I.isTailCall(),
+              Callee,
+              1);
+}
+
+
+/// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
+/// this value and returns the result as a ValueVT value.  This uses 
+/// Chain/Flag as the input and updates them for the output Chain/Flag.
+/// If the Flag pointer is NULL, no flag is used.
+SDOperand RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
+                                        SDOperand &Chain, SDOperand *Flag)const{
+  // Copy the legal parts from the registers.
+  unsigned NumParts = Regs.size();
+  SmallVector<SDOperand, 8> Parts(NumParts);
+  for (unsigned i = 0; i != NumParts; ++i) {
+    SDOperand Part = Flag ?
+                     DAG.getCopyFromReg(Chain, Regs[i], RegVT, *Flag) :
+                     DAG.getCopyFromReg(Chain, Regs[i], RegVT);
+    Chain = Part.getValue(1);
+    if (Flag)
+      *Flag = Part.getValue(2);
+    Parts[i] = Part;
+  }
+  
+  // Assemble the legal parts into the final value.
+  return getCopyFromParts(DAG, &Parts[0], NumParts, RegVT, ValueVT);
+}
+
+/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
+/// specified value into the registers specified by this object.  This uses 
+/// Chain/Flag as the input and updates them for the output Chain/Flag.
+/// If the Flag pointer is NULL, no flag is used.
+void RegsForValue::getCopyToRegs(SDOperand Val, SelectionDAG &DAG,
+                                 SDOperand &Chain, SDOperand *Flag) const {
+  // Get the list of the values's legal parts.
+  unsigned NumParts = Regs.size();
+  SmallVector<SDOperand, 8> Parts(NumParts);
+  getCopyToParts(DAG, Val, &Parts[0], NumParts, RegVT);
+
+  // Copy the parts into the registers.
+  for (unsigned i = 0; i != NumParts; ++i) {
+    SDOperand Part = Flag ?
+                     DAG.getCopyToReg(Chain, Regs[i], Parts[i], *Flag) :
+                     DAG.getCopyToReg(Chain, Regs[i], Parts[i]);
+    Chain = Part.getValue(0);
+    if (Flag)
+      *Flag = Part.getValue(1);
+  }
+}
+
+/// AddInlineAsmOperands - Add this value to the specified inlineasm node
+/// operand list.  This adds the code marker and includes the number of 
+/// values added into it.
+void RegsForValue::AddInlineAsmOperands(unsigned Code, SelectionDAG &DAG,
+                                        std::vector<SDOperand> &Ops) const {
+  MVT::ValueType IntPtrTy = DAG.getTargetLoweringInfo().getPointerTy();
+  Ops.push_back(DAG.getTargetConstant(Code | (Regs.size() << 3), IntPtrTy));
+  for (unsigned i = 0, e = Regs.size(); i != e; ++i)
+    Ops.push_back(DAG.getRegister(Regs[i], RegVT));
+}
+
+/// isAllocatableRegister - If the specified register is safe to allocate, 
+/// i.e. it isn't a stack pointer or some other special register, return the
+/// register class for the register.  Otherwise, return null.
+static const TargetRegisterClass *
+isAllocatableRegister(unsigned Reg, MachineFunction &MF,
+                      const TargetLowering &TLI, const MRegisterInfo *MRI) {
+  MVT::ValueType FoundVT = MVT::Other;
+  const TargetRegisterClass *FoundRC = 0;
+  for (MRegisterInfo::regclass_iterator RCI = MRI->regclass_begin(),
+       E = MRI->regclass_end(); RCI != E; ++RCI) {
+    MVT::ValueType ThisVT = MVT::Other;
+
+    const TargetRegisterClass *RC = *RCI;
+    // If none of the the value types for this register class are valid, we 
+    // can't use it.  For example, 64-bit reg classes on 32-bit targets.
+    for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
+         I != E; ++I) {
+      if (TLI.isTypeLegal(*I)) {
+        // If we have already found this register in a different register class,
+        // choose the one with the largest VT specified.  For example, on
+        // PowerPC, we favor f64 register classes over f32.
+        if (FoundVT == MVT::Other || 
+            MVT::getSizeInBits(FoundVT) < MVT::getSizeInBits(*I)) {
+          ThisVT = *I;
+          break;
+        }
+      }
+    }
+    
+    if (ThisVT == MVT::Other) continue;
+    
+    // NOTE: This isn't ideal.  In particular, this might allocate the
+    // frame pointer in functions that need it (due to them not being taken
+    // out of allocation, because a variable sized allocation hasn't been seen
+    // yet).  This is a slight code pessimization, but should still work.
+    for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF),
+         E = RC->allocation_order_end(MF); I != E; ++I)
+      if (*I == Reg) {
+        // We found a matching register class.  Keep looking at others in case
+        // we find one with larger registers that this physreg is also in.
+        FoundRC = RC;
+        FoundVT = ThisVT;
+        break;
+      }
+  }
+  return FoundRC;
+}    
+
+
+namespace {
+/// AsmOperandInfo - This contains information for each constraint that we are
+/// lowering.
+struct AsmOperandInfo : public InlineAsm::ConstraintInfo {
+  /// ConstraintCode - This contains the actual string for the code, like "m".
+  std::string ConstraintCode;
+
+  /// ConstraintType - Information about the constraint code, e.g. Register,
+  /// RegisterClass, Memory, Other, Unknown.
+  TargetLowering::ConstraintType ConstraintType;
+  
+  /// CallOperand/CallOperandval - If this is the result output operand or a
+  /// clobber, this is null, otherwise it is the incoming operand to the
+  /// CallInst.  This gets modified as the asm is processed.
+  SDOperand CallOperand;
+  Value *CallOperandVal;
+  
+  /// ConstraintVT - The ValueType for the operand value.
+  MVT::ValueType ConstraintVT;
+  
+  /// AssignedRegs - If this is a register or register class operand, this
+  /// contains the set of register corresponding to the operand.
+  RegsForValue AssignedRegs;
+  
+  AsmOperandInfo(const InlineAsm::ConstraintInfo &info)
+    : InlineAsm::ConstraintInfo(info), 
+      ConstraintType(TargetLowering::C_Unknown),
+      CallOperand(0,0), CallOperandVal(0), ConstraintVT(MVT::Other) {
+  }
+  
+  void ComputeConstraintToUse(const TargetLowering &TLI);
+  
+  /// MarkAllocatedRegs - Once AssignedRegs is set, mark the assigned registers
+  /// busy in OutputRegs/InputRegs.
+  void MarkAllocatedRegs(bool isOutReg, bool isInReg,
+                         std::set<unsigned> &OutputRegs, 
+                         std::set<unsigned> &InputRegs) const {
+     if (isOutReg)
+       OutputRegs.insert(AssignedRegs.Regs.begin(), AssignedRegs.Regs.end());
+     if (isInReg)
+       InputRegs.insert(AssignedRegs.Regs.begin(), AssignedRegs.Regs.end());
+   }
+};
+} // end anon namespace.
+
+/// getConstraintGenerality - Return an integer indicating how general CT is.
+static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
+  switch (CT) {
+    default: assert(0 && "Unknown constraint type!");
+    case TargetLowering::C_Other:
+    case TargetLowering::C_Unknown:
+      return 0;
+    case TargetLowering::C_Register:
+      return 1;
+    case TargetLowering::C_RegisterClass:
+      return 2;
+    case TargetLowering::C_Memory:
+      return 3;
+  }
+}
+
+void AsmOperandInfo::ComputeConstraintToUse(const TargetLowering &TLI) {
+  assert(!Codes.empty() && "Must have at least one constraint");
+  
+  std::string *Current = &Codes[0];
+  TargetLowering::ConstraintType CurType = TLI.getConstraintType(*Current);
+  if (Codes.size() == 1) {   // Single-letter constraints ('r') are very common.
+    ConstraintCode = *Current;
+    ConstraintType = CurType;
+    return;
+  }
+  
+  unsigned CurGenerality = getConstraintGenerality(CurType);
+  
+  // If we have multiple constraints, try to pick the most general one ahead
+  // of time.  This isn't a wonderful solution, but handles common cases.
+  for (unsigned j = 1, e = Codes.size(); j != e; ++j) {
+    TargetLowering::ConstraintType ThisType = TLI.getConstraintType(Codes[j]);
+    unsigned ThisGenerality = getConstraintGenerality(ThisType);
+    if (ThisGenerality > CurGenerality) {
+      // This constraint letter is more general than the previous one,
+      // use it.
+      CurType = ThisType;
+      Current = &Codes[j];
+      CurGenerality = ThisGenerality;
+    }
+  }
+  
+  ConstraintCode = *Current;
+  ConstraintType = CurType;
+}
+
+
+void SelectionDAGLowering::
+GetRegistersForValue(AsmOperandInfo &OpInfo, bool HasEarlyClobber,
+                     std::set<unsigned> &OutputRegs, 
+                     std::set<unsigned> &InputRegs) {
+  // Compute whether this value requires an input register, an output register,
+  // or both.
+  bool isOutReg = false;
+  bool isInReg = false;
+  switch (OpInfo.Type) {
+  case InlineAsm::isOutput:
+    isOutReg = true;
+    
+    // If this is an early-clobber output, or if there is an input
+    // constraint that matches this, we need to reserve the input register
+    // so no other inputs allocate to it.
+    isInReg = OpInfo.isEarlyClobber || OpInfo.hasMatchingInput;
+    break;
+  case InlineAsm::isInput:
+    isInReg = true;
+    isOutReg = false;
+    break;
+  case InlineAsm::isClobber:
+    isOutReg = true;
+    isInReg = true;
+    break;
+  }
+  
+  
+  MachineFunction &MF = DAG.getMachineFunction();
+  std::vector<unsigned> Regs;
+  
+  // If this is a constraint for a single physreg, or a constraint for a
+  // register class, find it.
+  std::pair<unsigned, const TargetRegisterClass*> PhysReg = 
+    TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
+                                     OpInfo.ConstraintVT);
+
+  unsigned NumRegs = 1;
+  if (OpInfo.ConstraintVT != MVT::Other)
+    NumRegs = TLI.getNumRegisters(OpInfo.ConstraintVT);
+  MVT::ValueType RegVT;
+  MVT::ValueType ValueVT = OpInfo.ConstraintVT;
+  
+
+  // If this is a constraint for a specific physical register, like {r17},
+  // assign it now.
+  if (PhysReg.first) {
+    if (OpInfo.ConstraintVT == MVT::Other)
+      ValueVT = *PhysReg.second->vt_begin();
+    
+    // Get the actual register value type.  This is important, because the user
+    // may have asked for (e.g.) the AX register in i32 type.  We need to
+    // remember that AX is actually i16 to get the right extension.
+    RegVT = *PhysReg.second->vt_begin();
+    
+    // This is a explicit reference to a physical register.
+    Regs.push_back(PhysReg.first);
+
+    // If this is an expanded reference, add the rest of the regs to Regs.
+    if (NumRegs != 1) {
+      TargetRegisterClass::iterator I = PhysReg.second->begin();
+      TargetRegisterClass::iterator E = PhysReg.second->end();
+      for (; *I != PhysReg.first; ++I)
+        assert(I != E && "Didn't find reg!"); 
+      
+      // Already added the first reg.
+      --NumRegs; ++I;
+      for (; NumRegs; --NumRegs, ++I) {
+        assert(I != E && "Ran out of registers to allocate!");
+        Regs.push_back(*I);
+      }
+    }
+    OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
+    OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs);
+    return;
+  }
+  
+  // Otherwise, if this was a reference to an LLVM register class, create vregs
+  // for this reference.
+  std::vector<unsigned> RegClassRegs;
+  const TargetRegisterClass *RC = PhysReg.second;
+  if (RC) {
+    // If this is an early clobber or tied register, our regalloc doesn't know
+    // how to maintain the constraint.  If it isn't, go ahead and create vreg
+    // and let the regalloc do the right thing.
+    if (!OpInfo.hasMatchingInput && !OpInfo.isEarlyClobber &&
+        // If there is some other early clobber and this is an input register,
+        // then we are forced to pre-allocate the input reg so it doesn't
+        // conflict with the earlyclobber.
+        !(OpInfo.Type == InlineAsm::isInput && HasEarlyClobber)) {
+      RegVT = *PhysReg.second->vt_begin();
+      
+      if (OpInfo.ConstraintVT == MVT::Other)
+        ValueVT = RegVT;
+
+      // Create the appropriate number of virtual registers.
+      SSARegMap *RegMap = MF.getSSARegMap();
+      for (; NumRegs; --NumRegs)
+        Regs.push_back(RegMap->createVirtualRegister(PhysReg.second));
+      
+      OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
+      OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs);
+      return;
+    }
+    
+    // Otherwise, we can't allocate it.  Let the code below figure out how to
+    // maintain these constraints.
+    RegClassRegs.assign(PhysReg.second->begin(), PhysReg.second->end());
+    
+  } else {
+    // This is a reference to a register class that doesn't directly correspond
+    // to an LLVM register class.  Allocate NumRegs consecutive, available,
+    // registers from the class.
+    RegClassRegs = TLI.getRegClassForInlineAsmConstraint(OpInfo.ConstraintCode,
+                                                         OpInfo.ConstraintVT);
+  }
+  
+  const MRegisterInfo *MRI = DAG.getTarget().getRegisterInfo();
+  unsigned NumAllocated = 0;
+  for (unsigned i = 0, e = RegClassRegs.size(); i != e; ++i) {
+    unsigned Reg = RegClassRegs[i];
+    // See if this register is available.
+    if ((isOutReg && OutputRegs.count(Reg)) ||   // Already used.
+        (isInReg  && InputRegs.count(Reg))) {    // Already used.
+      // Make sure we find consecutive registers.
+      NumAllocated = 0;
+      continue;
+    }
+    
+    // Check to see if this register is allocatable (i.e. don't give out the
+    // stack pointer).
+    if (RC == 0) {
+      RC = isAllocatableRegister(Reg, MF, TLI, MRI);
+      if (!RC) {        // Couldn't allocate this register.
+        // Reset NumAllocated to make sure we return consecutive registers.
+        NumAllocated = 0;
+        continue;
+      }
+    }
+    
+    // Okay, this register is good, we can use it.
+    ++NumAllocated;
+
+    // If we allocated enough consecutive registers, succeed.
+    if (NumAllocated == NumRegs) {
+      unsigned RegStart = (i-NumAllocated)+1;
+      unsigned RegEnd   = i+1;
+      // Mark all of the allocated registers used.
+      for (unsigned i = RegStart; i != RegEnd; ++i)
+        Regs.push_back(RegClassRegs[i]);
+      
+      OpInfo.AssignedRegs = RegsForValue(Regs, *RC->vt_begin(), 
+                                         OpInfo.ConstraintVT);
+      OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs);
+      return;
+    }
+  }
+  
+  // Otherwise, we couldn't allocate enough registers for this.
+  return;
+}
+
+
+/// visitInlineAsm - Handle a call to an InlineAsm object.
+///
+void SelectionDAGLowering::visitInlineAsm(CallInst &I) {
+  InlineAsm *IA = cast<InlineAsm>(I.getOperand(0));
+
+  /// ConstraintOperands - Information about all of the constraints.
+  std::vector<AsmOperandInfo> ConstraintOperands;
+  
+  SDOperand Chain = getRoot();
+  SDOperand Flag;
+  
+  std::set<unsigned> OutputRegs, InputRegs;
+
+  // Do a prepass over the constraints, canonicalizing them, and building up the
+  // ConstraintOperands list.
+  std::vector<InlineAsm::ConstraintInfo>
+    ConstraintInfos = IA->ParseConstraints();
+
+  // SawEarlyClobber - Keep track of whether we saw an earlyclobber output
+  // constraint.  If so, we can't let the register allocator allocate any input
+  // registers, because it will not know to avoid the earlyclobbered output reg.
+  bool SawEarlyClobber = false;
+  
+  unsigned OpNo = 1;   // OpNo - The operand of the CallInst.
+  for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
+    ConstraintOperands.push_back(AsmOperandInfo(ConstraintInfos[i]));
+    AsmOperandInfo &OpInfo = ConstraintOperands.back();
+    
+    MVT::ValueType OpVT = MVT::Other;
+
+    // Compute the value type for each operand.
+    switch (OpInfo.Type) {
+    case InlineAsm::isOutput:
+      if (!OpInfo.isIndirect) {
+        // The return value of the call is this value.  As such, there is no
+        // corresponding argument.
+        assert(I.getType() != Type::VoidTy && "Bad inline asm!");
+        OpVT = TLI.getValueType(I.getType());
+      } else {
+        OpInfo.CallOperandVal = I.getOperand(OpNo++);
+      }
+      break;
+    case InlineAsm::isInput:
+      OpInfo.CallOperandVal = I.getOperand(OpNo++);
+      break;
+    case InlineAsm::isClobber:
+      // Nothing to do.
+      break;
+    }
+
+    // If this is an input or an indirect output, process the call argument.
+    if (OpInfo.CallOperandVal) {
+      OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
+      const Type *OpTy = OpInfo.CallOperandVal->getType();
+      // If this is an indirect operand, the operand is a pointer to the
+      // accessed type.
+      if (OpInfo.isIndirect)
+        OpTy = cast<PointerType>(OpTy)->getElementType();
+      
+      // If OpTy is not a first-class value, it may be a struct/union that we
+      // can tile with integers.
+      if (!OpTy->isFirstClassType() && OpTy->isSized()) {
+        unsigned BitSize = TD->getTypeSizeInBits(OpTy);
+        switch (BitSize) {
+        default: break;
+        case 1:
+        case 8:
+        case 16:
+        case 32:
+        case 64:
+          OpTy = IntegerType::get(BitSize);
+          break;
+        }
+      }
+      
+      OpVT = TLI.getValueType(OpTy, true);
+    }
+    
+    OpInfo.ConstraintVT = OpVT;
+    
+    // Compute the constraint code and ConstraintType to use.
+    OpInfo.ComputeConstraintToUse(TLI);
+
+    // Keep track of whether we see an earlyclobber.
+    SawEarlyClobber |= OpInfo.isEarlyClobber;
+    
+    // If this is a memory input, and if the operand is not indirect, do what we
+    // need to to provide an address for the memory input.
+    if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
+        !OpInfo.isIndirect) {
+      assert(OpInfo.Type == InlineAsm::isInput &&
+             "Can only indirectify direct input operands!");
+      
+      // Memory operands really want the address of the value.  If we don't have
+      // an indirect input, put it in the constpool if we can, otherwise spill
+      // it to a stack slot.
+      
+      // If the operand is a float, integer, or vector constant, spill to a
+      // constant pool entry to get its address.
+      Value *OpVal = OpInfo.CallOperandVal;
+      if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||
+          isa<ConstantVector>(OpVal)) {
+        OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal),
+                                                 TLI.getPointerTy());
+      } else {
+        // Otherwise, create a stack slot and emit a store to it before the
+        // asm.
+        const Type *Ty = OpVal->getType();
+        uint64_t TySize = TLI.getTargetData()->getTypeSize(Ty);
+        unsigned Align  = TLI.getTargetData()->getPrefTypeAlignment(Ty);
+        MachineFunction &MF = DAG.getMachineFunction();
+        int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align);
+        SDOperand StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
+        Chain = DAG.getStore(Chain, OpInfo.CallOperand, StackSlot, NULL, 0);
+        OpInfo.CallOperand = StackSlot;
+      }
+     
+      // There is no longer a Value* corresponding to this operand.
+      OpInfo.CallOperandVal = 0;
+      // It is now an indirect operand.
+      OpInfo.isIndirect = true;
+    }
+    
+    // If this constraint is for a specific register, allocate it before
+    // anything else.
+    if (OpInfo.ConstraintType == TargetLowering::C_Register)
+      GetRegistersForValue(OpInfo, SawEarlyClobber, OutputRegs, InputRegs);
+  }
+  ConstraintInfos.clear();
+  
+  
+  // Second pass - Loop over all of the operands, assigning virtual or physregs
+  // to registerclass operands.
+  for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
+    AsmOperandInfo &OpInfo = ConstraintOperands[i];
+    
+    // C_Register operands have already been allocated, Other/Memory don't need
+    // to be.
+    if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass)
+      GetRegistersForValue(OpInfo, SawEarlyClobber, OutputRegs, InputRegs);
+  }    
+  
+  // AsmNodeOperands - The operands for the ISD::INLINEASM node.
+  std::vector<SDOperand> AsmNodeOperands;
+  AsmNodeOperands.push_back(SDOperand());  // reserve space for input chain
+  AsmNodeOperands.push_back(
+          DAG.getTargetExternalSymbol(IA->getAsmString().c_str(), MVT::Other));
+  
+  
+  // Loop over all of the inputs, copying the operand values into the
+  // appropriate registers and processing the output regs.
+  RegsForValue RetValRegs;
+  
+  // IndirectStoresToEmit - The set of stores to emit after the inline asm node.
+  std::vector<std::pair<RegsForValue, Value*> > IndirectStoresToEmit;
+  
+  for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
+    AsmOperandInfo &OpInfo = ConstraintOperands[i];
+
+    switch (OpInfo.Type) {
+    case InlineAsm::isOutput: {
+      if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass &&
+          OpInfo.ConstraintType != TargetLowering::C_Register) {
+        // Memory output, or 'other' output (e.g. 'X' constraint).
+        assert(OpInfo.isIndirect && "Memory output must be indirect operand");
+
+        // Add information to the INLINEASM node to know about this output.
+        unsigned ResOpType = 4/*MEM*/ | (1 << 3);
+        AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, 
+                                                        TLI.getPointerTy()));
+        AsmNodeOperands.push_back(OpInfo.CallOperand);
+        break;
+      }
+
+      // Otherwise, this is a register or register class output.
+
+      // Copy the output from the appropriate register.  Find a register that
+      // we can use.
+      if (OpInfo.AssignedRegs.Regs.empty()) {
+        cerr << "Couldn't allocate output reg for contraint '"
+             << OpInfo.ConstraintCode << "'!\n";
+        exit(1);
+      }
+
+      if (!OpInfo.isIndirect) {
+        // This is the result value of the call.
+        assert(RetValRegs.Regs.empty() &&
+               "Cannot have multiple output constraints yet!");
+        assert(I.getType() != Type::VoidTy && "Bad inline asm!");
+        RetValRegs = OpInfo.AssignedRegs;
+      } else {
+        IndirectStoresToEmit.push_back(std::make_pair(OpInfo.AssignedRegs,
+                                                      OpInfo.CallOperandVal));
+      }
+      
+      // Add information to the INLINEASM node to know that this register is
+      // set.
+      OpInfo.AssignedRegs.AddInlineAsmOperands(2 /*REGDEF*/, DAG,
+                                               AsmNodeOperands);
+      break;
+    }
+    case InlineAsm::isInput: {
+      SDOperand InOperandVal = OpInfo.CallOperand;
+      
+      if (isdigit(OpInfo.ConstraintCode[0])) {    // Matching constraint?
+        // If this is required to match an output register we have already set,
+        // just use its register.
+        unsigned OperandNo = atoi(OpInfo.ConstraintCode.c_str());
+        
+        // Scan until we find the definition we already emitted of this operand.
+        // When we find it, create a RegsForValue operand.
+        unsigned CurOp = 2;  // The first operand.
+        for (; OperandNo; --OperandNo) {
+          // Advance to the next operand.
+          unsigned NumOps = 
+            cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getValue();
+          assert(((NumOps & 7) == 2 /*REGDEF*/ ||
+                  (NumOps & 7) == 4 /*MEM*/) &&
+                 "Skipped past definitions?");
+          CurOp += (NumOps>>3)+1;
+        }
+
+        unsigned NumOps = 
+          cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getValue();
+        if ((NumOps & 7) == 2 /*REGDEF*/) {
+          // Add NumOps>>3 registers to MatchedRegs.
+          RegsForValue MatchedRegs;
+          MatchedRegs.ValueVT = InOperandVal.getValueType();
+          MatchedRegs.RegVT   = AsmNodeOperands[CurOp+1].getValueType();
+          for (unsigned i = 0, e = NumOps>>3; i != e; ++i) {
+            unsigned Reg =
+              cast<RegisterSDNode>(AsmNodeOperands[++CurOp])->getReg();
+            MatchedRegs.Regs.push_back(Reg);
+          }
+        
+          // Use the produced MatchedRegs object to 
+          MatchedRegs.getCopyToRegs(InOperandVal, DAG, Chain, &Flag);
+          MatchedRegs.AddInlineAsmOperands(1 /*REGUSE*/, DAG, AsmNodeOperands);
+          break;
+        } else {
+          assert((NumOps & 7) == 4/*MEM*/ && "Unknown matching constraint!");
+          assert(0 && "matching constraints for memory operands unimp");
+        }
+      }
+      
+      if (OpInfo.ConstraintType == TargetLowering::C_Other) {
+        assert(!OpInfo.isIndirect && 
+               "Don't know how to handle indirect other inputs yet!");
+        
+        InOperandVal = TLI.isOperandValidForConstraint(InOperandVal,
+                                                       OpInfo.ConstraintCode[0],
+                                                       DAG);
+        if (!InOperandVal.Val) {
+          cerr << "Invalid operand for inline asm constraint '"
+               << OpInfo.ConstraintCode << "'!\n";
+          exit(1);
+        }
+        
+        // Add information to the INLINEASM node to know about this input.
+        unsigned ResOpType = 3 /*IMM*/ | (1 << 3);
+        AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, 
+                                                        TLI.getPointerTy()));
+        AsmNodeOperands.push_back(InOperandVal);
+        break;
+      } else if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
+        assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");
+        assert(InOperandVal.getValueType() == TLI.getPointerTy() &&
+               "Memory operands expect pointer values");
+               
+        // Add information to the INLINEASM node to know about this input.
+        unsigned ResOpType = 4/*MEM*/ | (1 << 3);
+        AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
+                                                        TLI.getPointerTy()));
+        AsmNodeOperands.push_back(InOperandVal);
+        break;
+      }
+        
+      assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
+              OpInfo.ConstraintType == TargetLowering::C_Register) &&
+             "Unknown constraint type!");
+      assert(!OpInfo.isIndirect && 
+             "Don't know how to handle indirect register inputs yet!");
+
+      // Copy the input into the appropriate registers.
+      assert(!OpInfo.AssignedRegs.Regs.empty() &&
+             "Couldn't allocate input reg!");
+
+      OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, Chain, &Flag);
+      
+      OpInfo.AssignedRegs.AddInlineAsmOperands(1/*REGUSE*/, DAG,
+                                               AsmNodeOperands);
+      break;
+    }
+    case InlineAsm::isClobber: {
+      // Add the clobbered value to the operand list, so that the register
+      // allocator is aware that the physreg got clobbered.
+      if (!OpInfo.AssignedRegs.Regs.empty())
+        OpInfo.AssignedRegs.AddInlineAsmOperands(2/*REGDEF*/, DAG,
+                                                 AsmNodeOperands);
+      break;
+    }
+    }
+  }
+  
+  // Finish up input operands.
+  AsmNodeOperands[0] = Chain;
+  if (Flag.Val) AsmNodeOperands.push_back(Flag);
+  
+  Chain = DAG.getNode(ISD::INLINEASM, 
+                      DAG.getNodeValueTypes(MVT::Other, MVT::Flag), 2,
+                      &AsmNodeOperands[0], AsmNodeOperands.size());
+  Flag = Chain.getValue(1);
+
+  // If this asm returns a register value, copy the result from that register
+  // and set it as the value of the call.
+  if (!RetValRegs.Regs.empty()) {
+    SDOperand Val = RetValRegs.getCopyFromRegs(DAG, Chain, &Flag);
+    
+    // If the result of the inline asm is a vector, it may have the wrong
+    // width/num elts.  Make sure to convert it to the right type with
+    // bit_convert.
+    if (MVT::isVector(Val.getValueType())) {
+      const VectorType *VTy = cast<VectorType>(I.getType());
+      MVT::ValueType DesiredVT = TLI.getValueType(VTy);
+      
+      Val = DAG.getNode(ISD::BIT_CONVERT, DesiredVT, Val);
+    }
+    
+    setValue(&I, Val);
+  }
+  
+  std::vector<std::pair<SDOperand, Value*> > StoresToEmit;
+  
+  // Process indirect outputs, first output all of the flagged copies out of
+  // physregs.
+  for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) {
+    RegsForValue &OutRegs = IndirectStoresToEmit[i].first;
+    Value *Ptr = IndirectStoresToEmit[i].second;
+    SDOperand OutVal = OutRegs.getCopyFromRegs(DAG, Chain, &Flag);
+    StoresToEmit.push_back(std::make_pair(OutVal, Ptr));
+  }
+  
+  // Emit the non-flagged stores from the physregs.
+  SmallVector<SDOperand, 8> OutChains;
+  for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i)
+    OutChains.push_back(DAG.getStore(Chain, StoresToEmit[i].first,
+                                    getValue(StoresToEmit[i].second),
+                                    StoresToEmit[i].second, 0));
+  if (!OutChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
+                        &OutChains[0], OutChains.size());
+  DAG.setRoot(Chain);
+}
+
+
+void SelectionDAGLowering::visitMalloc(MallocInst &I) {
+  SDOperand Src = getValue(I.getOperand(0));
+
+  MVT::ValueType IntPtr = TLI.getPointerTy();
+
+  if (IntPtr < Src.getValueType())
+    Src = DAG.getNode(ISD::TRUNCATE, IntPtr, Src);
+  else if (IntPtr > Src.getValueType())
+    Src = DAG.getNode(ISD::ZERO_EXTEND, IntPtr, Src);
+
+  // Scale the source by the type size.
+  uint64_t ElementSize = TD->getTypeSize(I.getType()->getElementType());
+  Src = DAG.getNode(ISD::MUL, Src.getValueType(),
+                    Src, getIntPtrConstant(ElementSize));
+
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  Entry.Node = Src;
+  Entry.Ty = TLI.getTargetData()->getIntPtrType();
+  Args.push_back(Entry);
+
+  std::pair<SDOperand,SDOperand> Result =
+    TLI.LowerCallTo(getRoot(), I.getType(), false, false, CallingConv::C, true,
+                    DAG.getExternalSymbol("malloc", IntPtr),
+                    Args, DAG);
+  setValue(&I, Result.first);  // Pointers always fit in registers
+  DAG.setRoot(Result.second);
+}
+
+void SelectionDAGLowering::visitFree(FreeInst &I) {
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  Entry.Node = getValue(I.getOperand(0));
+  Entry.Ty = TLI.getTargetData()->getIntPtrType();
+  Args.push_back(Entry);
+  MVT::ValueType IntPtr = TLI.getPointerTy();
+  std::pair<SDOperand,SDOperand> Result =
+    TLI.LowerCallTo(getRoot(), Type::VoidTy, false, false, CallingConv::C, true,
+                    DAG.getExternalSymbol("free", IntPtr), Args, DAG);
+  DAG.setRoot(Result.second);
+}
+
+// InsertAtEndOfBasicBlock - This method should be implemented by targets that
+// mark instructions with the 'usesCustomDAGSchedInserter' flag.  These
+// instructions are special in various ways, which require special support to
+// insert.  The specified MachineInstr is created but not inserted into any
+// basic blocks, and the scheduler passes ownership of it to this method.
+MachineBasicBlock *TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
+                                                       MachineBasicBlock *MBB) {
+  cerr << "If a target marks an instruction with "
+       << "'usesCustomDAGSchedInserter', it must implement "
+       << "TargetLowering::InsertAtEndOfBasicBlock!\n";
+  abort();
+  return 0;  
+}
+
+void SelectionDAGLowering::visitVAStart(CallInst &I) {
+  DAG.setRoot(DAG.getNode(ISD::VASTART, MVT::Other, getRoot(), 
+                          getValue(I.getOperand(1)), 
+                          DAG.getSrcValue(I.getOperand(1))));
+}
+
+void SelectionDAGLowering::visitVAArg(VAArgInst &I) {
+  SDOperand V = DAG.getVAArg(TLI.getValueType(I.getType()), getRoot(),
+                             getValue(I.getOperand(0)),
+                             DAG.getSrcValue(I.getOperand(0)));
+  setValue(&I, V);
+  DAG.setRoot(V.getValue(1));
+}
+
+void SelectionDAGLowering::visitVAEnd(CallInst &I) {
+  DAG.setRoot(DAG.getNode(ISD::VAEND, MVT::Other, getRoot(),
+                          getValue(I.getOperand(1)), 
+                          DAG.getSrcValue(I.getOperand(1))));
+}
+
+void SelectionDAGLowering::visitVACopy(CallInst &I) {
+  DAG.setRoot(DAG.getNode(ISD::VACOPY, MVT::Other, getRoot(), 
+                          getValue(I.getOperand(1)), 
+                          getValue(I.getOperand(2)),
+                          DAG.getSrcValue(I.getOperand(1)),
+                          DAG.getSrcValue(I.getOperand(2))));
+}
+
+/// TargetLowering::LowerArguments - This is the default LowerArguments
+/// implementation, which just inserts a FORMAL_ARGUMENTS node.  FIXME: When all
+/// targets are migrated to using FORMAL_ARGUMENTS, this hook should be 
+/// integrated into SDISel.
+std::vector<SDOperand> 
+TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) {
+  const FunctionType *FTy = F.getFunctionType();
+  const ParamAttrsList *Attrs = FTy->getParamAttrs();
+  // Add CC# and isVararg as operands to the FORMAL_ARGUMENTS node.
+  std::vector<SDOperand> Ops;
+  Ops.push_back(DAG.getRoot());
+  Ops.push_back(DAG.getConstant(F.getCallingConv(), getPointerTy()));
+  Ops.push_back(DAG.getConstant(F.isVarArg(), getPointerTy()));
+
+  // Add one result value for each formal argument.
+  std::vector<MVT::ValueType> RetVals;
+  unsigned j = 1;
+  for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end();
+       I != E; ++I, ++j) {
+    MVT::ValueType VT = getValueType(I->getType());
+    unsigned Flags = ISD::ParamFlags::NoFlagSet;
+    unsigned OriginalAlignment =
+      getTargetData()->getABITypeAlignment(I->getType());
+
+    // FIXME: Distinguish between a formal with no [sz]ext attribute from one
+    // that is zero extended!
+    if (Attrs && Attrs->paramHasAttr(j, ParamAttr::ZExt))
+      Flags &= ~(ISD::ParamFlags::SExt);
+    if (Attrs && Attrs->paramHasAttr(j, ParamAttr::SExt))
+      Flags |= ISD::ParamFlags::SExt;
+    if (Attrs && Attrs->paramHasAttr(j, ParamAttr::InReg))
+      Flags |= ISD::ParamFlags::InReg;
+    if (Attrs && Attrs->paramHasAttr(j, ParamAttr::StructRet))
+      Flags |= ISD::ParamFlags::StructReturn;
+    if (Attrs && Attrs->paramHasAttr(j, ParamAttr::ByVal))
+      Flags |= ISD::ParamFlags::ByVal;
+    Flags |= (OriginalAlignment << ISD::ParamFlags::OrigAlignmentOffs);
+    
+    switch (getTypeAction(VT)) {
+    default: assert(0 && "Unknown type action!");
+    case Legal: 
+      RetVals.push_back(VT);
+      Ops.push_back(DAG.getConstant(Flags, MVT::i32));
+      break;
+    case Promote:
+      RetVals.push_back(getTypeToTransformTo(VT));
+      Ops.push_back(DAG.getConstant(Flags, MVT::i32));
+      break;
+    case Expand: {
+      // If this is an illegal type, it needs to be broken up to fit into 
+      // registers.
+      MVT::ValueType RegisterVT = getRegisterType(VT);
+      unsigned NumRegs = getNumRegisters(VT);
+      for (unsigned i = 0; i != NumRegs; ++i) {
+        RetVals.push_back(RegisterVT);
+        // if it isn't first piece, alignment must be 1
+        if (i > 0)
+          Flags = (Flags & (~ISD::ParamFlags::OrigAlignment)) |
+            (1 << ISD::ParamFlags::OrigAlignmentOffs);
+        Ops.push_back(DAG.getConstant(Flags, MVT::i32));
+      }
+      break;
+    }
+    }
+  }
+
+  RetVals.push_back(MVT::Other);
+  
+  // Create the node.
+  SDNode *Result = DAG.getNode(ISD::FORMAL_ARGUMENTS,
+                               DAG.getNodeValueTypes(RetVals), RetVals.size(),
+                               &Ops[0], Ops.size()).Val;
+  unsigned NumArgRegs = Result->getNumValues() - 1;
+  DAG.setRoot(SDOperand(Result, NumArgRegs));
+
+  // Set up the return result vector.
+  Ops.clear();
+  unsigned i = 0;
+  unsigned Idx = 1;
+  for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; 
+      ++I, ++Idx) {
+    MVT::ValueType VT = getValueType(I->getType());
+    
+    switch (getTypeAction(VT)) {
+    default: assert(0 && "Unknown type action!");
+    case Legal: 
+      Ops.push_back(SDOperand(Result, i++));
+      break;
+    case Promote: {
+      SDOperand Op(Result, i++);
+      if (MVT::isInteger(VT)) {
+        if (Attrs && Attrs->paramHasAttr(Idx, ParamAttr::SExt))
+          Op = DAG.getNode(ISD::AssertSext, Op.getValueType(), Op,
+                           DAG.getValueType(VT));
+        else if (Attrs && Attrs->paramHasAttr(Idx, ParamAttr::ZExt))
+          Op = DAG.getNode(ISD::AssertZext, Op.getValueType(), Op,
+                           DAG.getValueType(VT));
+        Op = DAG.getNode(ISD::TRUNCATE, VT, Op);
+      } else {
+        assert(MVT::isFloatingPoint(VT) && "Not int or FP?");
+        Op = DAG.getNode(ISD::FP_ROUND, VT, Op);
+      }
+      Ops.push_back(Op);
+      break;
+    }
+    case Expand: {
+      MVT::ValueType PartVT = getRegisterType(VT);
+      unsigned NumParts = getNumRegisters(VT);
+      SmallVector<SDOperand, 4> Parts(NumParts);
+      for (unsigned j = 0; j != NumParts; ++j)
+        Parts[j] = SDOperand(Result, i++);
+      Ops.push_back(getCopyFromParts(DAG, &Parts[0], NumParts, PartVT, VT));
+      break;
+    }
+    }
+  }
+  assert(i == NumArgRegs && "Argument register count mismatch!");
+  return Ops;
+}
+
+
+/// TargetLowering::LowerCallTo - This is the default LowerCallTo
+/// implementation, which just inserts an ISD::CALL node, which is later custom
+/// lowered by the target to something concrete.  FIXME: When all targets are
+/// migrated to using ISD::CALL, this hook should be integrated into SDISel.
+std::pair<SDOperand, SDOperand>
+TargetLowering::LowerCallTo(SDOperand Chain, const Type *RetTy, 
+                            bool RetTyIsSigned, bool isVarArg,
+                            unsigned CallingConv, bool isTailCall, 
+                            SDOperand Callee,
+                            ArgListTy &Args, SelectionDAG &DAG) {
+  SmallVector<SDOperand, 32> Ops;
+  Ops.push_back(Chain);   // Op#0 - Chain
+  Ops.push_back(DAG.getConstant(CallingConv, getPointerTy())); // Op#1 - CC
+  Ops.push_back(DAG.getConstant(isVarArg, getPointerTy()));    // Op#2 - VarArg
+  Ops.push_back(DAG.getConstant(isTailCall, getPointerTy()));  // Op#3 - Tail
+  Ops.push_back(Callee);
+  
+  // Handle all of the outgoing arguments.
+  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+    MVT::ValueType VT = getValueType(Args[i].Ty);
+    SDOperand Op = Args[i].Node;
+    unsigned Flags = ISD::ParamFlags::NoFlagSet;
+    unsigned OriginalAlignment =
+      getTargetData()->getABITypeAlignment(Args[i].Ty);
+    
+    if (Args[i].isSExt)
+      Flags |= ISD::ParamFlags::SExt;
+    if (Args[i].isZExt)
+      Flags |= ISD::ParamFlags::ZExt;
+    if (Args[i].isInReg)
+      Flags |= ISD::ParamFlags::InReg;
+    if (Args[i].isSRet)
+      Flags |= ISD::ParamFlags::StructReturn;
+    Flags |= OriginalAlignment << ISD::ParamFlags::OrigAlignmentOffs;
+    
+    switch (getTypeAction(VT)) {
+    default: assert(0 && "Unknown type action!");
+    case Legal:
+      Ops.push_back(Op);
+      Ops.push_back(DAG.getConstant(Flags, MVT::i32));
+      break;
+    case Promote:
+      if (MVT::isInteger(VT)) {
+        unsigned ExtOp;
+        if (Args[i].isSExt)
+          ExtOp = ISD::SIGN_EXTEND;
+        else if (Args[i].isZExt)
+          ExtOp = ISD::ZERO_EXTEND;
+        else
+          ExtOp = ISD::ANY_EXTEND;
+        Op = DAG.getNode(ExtOp, getTypeToTransformTo(VT), Op);
+      } else {
+        assert(MVT::isFloatingPoint(VT) && "Not int or FP?");
+        Op = DAG.getNode(ISD::FP_EXTEND, getTypeToTransformTo(VT), Op);
+      }
+      Ops.push_back(Op);
+      Ops.push_back(DAG.getConstant(Flags, MVT::i32));
+      break;
+    case Expand: {
+      MVT::ValueType PartVT = getRegisterType(VT);
+      unsigned NumParts = getNumRegisters(VT);
+      SmallVector<SDOperand, 4> Parts(NumParts);
+      getCopyToParts(DAG, Op, &Parts[0], NumParts, PartVT);
+      for (unsigned i = 0; i != NumParts; ++i) {
+        // if it isn't first piece, alignment must be 1
+        unsigned MyFlags = Flags;
+        if (i != 0)
+          MyFlags = (MyFlags & (~ISD::ParamFlags::OrigAlignment)) |
+            (1 << ISD::ParamFlags::OrigAlignmentOffs);
+
+        Ops.push_back(Parts[i]);
+        Ops.push_back(DAG.getConstant(MyFlags, MVT::i32));
+      }
+      break;
+    }
+    }
+  }
+  
+  // Figure out the result value types.
+  MVT::ValueType VT = getValueType(RetTy);
+  MVT::ValueType RegisterVT = getRegisterType(VT);
+  unsigned NumRegs = getNumRegisters(VT);
+  SmallVector<MVT::ValueType, 4> RetTys(NumRegs);
+  for (unsigned i = 0; i != NumRegs; ++i)
+    RetTys[i] = RegisterVT;
+  
+  RetTys.push_back(MVT::Other);  // Always has a chain.
+  
+  // Create the CALL node.
+  SDOperand Res = DAG.getNode(ISD::CALL,
+                              DAG.getVTList(&RetTys[0], NumRegs + 1),
+                              &Ops[0], Ops.size());
+  SDOperand Chain = Res.getValue(NumRegs);
+
+  // Gather up the call result into a single value.
+  if (RetTy != Type::VoidTy) {
+    ISD::NodeType AssertOp = ISD::AssertSext;
+    if (!RetTyIsSigned)
+      AssertOp = ISD::AssertZext;
+    SmallVector<SDOperand, 4> Results(NumRegs);
+    for (unsigned i = 0; i != NumRegs; ++i)
+      Results[i] = Res.getValue(i);
+    Res = getCopyFromParts(DAG, &Results[0], NumRegs, RegisterVT, VT, AssertOp);
+  }
+
+  return std::make_pair(Res, Chain);
+}
+
+SDOperand TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
+  assert(0 && "LowerOperation not implemented for this target!");
+  abort();
+  return SDOperand();
+}
+
+SDOperand TargetLowering::CustomPromoteOperation(SDOperand Op,
+                                                 SelectionDAG &DAG) {
+  assert(0 && "CustomPromoteOperation not implemented for this target!");
+  abort();
+  return SDOperand();
+}
+
+/// getMemsetValue - Vectorized representation of the memset value
+/// operand.
+static SDOperand getMemsetValue(SDOperand Value, MVT::ValueType VT,
+                                SelectionDAG &DAG) {
+  MVT::ValueType CurVT = VT;
+  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
+    uint64_t Val   = C->getValue() & 255;
+    unsigned Shift = 8;
+    while (CurVT != MVT::i8) {
+      Val = (Val << Shift) | Val;
+      Shift <<= 1;
+      CurVT = (MVT::ValueType)((unsigned)CurVT - 1);
+    }
+    return DAG.getConstant(Val, VT);
+  } else {
+    Value = DAG.getNode(ISD::ZERO_EXTEND, VT, Value);
+    unsigned Shift = 8;
+    while (CurVT != MVT::i8) {
+      Value =
+        DAG.getNode(ISD::OR, VT,
+                    DAG.getNode(ISD::SHL, VT, Value,
+                                DAG.getConstant(Shift, MVT::i8)), Value);
+      Shift <<= 1;
+      CurVT = (MVT::ValueType)((unsigned)CurVT - 1);
+    }
+
+    return Value;
+  }
+}
+
+/// getMemsetStringVal - Similar to getMemsetValue. Except this is only
+/// used when a memcpy is turned into a memset when the source is a constant
+/// string ptr.
+static SDOperand getMemsetStringVal(MVT::ValueType VT,
+                                    SelectionDAG &DAG, TargetLowering &TLI,
+                                    std::string &Str, unsigned Offset) {
+  uint64_t Val = 0;
+  unsigned MSB = MVT::getSizeInBits(VT) / 8;
+  if (TLI.isLittleEndian())
+    Offset = Offset + MSB - 1;
+  for (unsigned i = 0; i != MSB; ++i) {
+    Val = (Val << 8) | (unsigned char)Str[Offset];
+    Offset += TLI.isLittleEndian() ? -1 : 1;
+  }
+  return DAG.getConstant(Val, VT);
+}
+
+/// getMemBasePlusOffset - Returns base and offset node for the 
+static SDOperand getMemBasePlusOffset(SDOperand Base, unsigned Offset,
+                                      SelectionDAG &DAG, TargetLowering &TLI) {
+  MVT::ValueType VT = Base.getValueType();
+  return DAG.getNode(ISD::ADD, VT, Base, DAG.getConstant(Offset, VT));
+}
+
+/// MeetsMaxMemopRequirement - Determines if the number of memory ops required
+/// to replace the memset / memcpy is below the threshold. It also returns the
+/// types of the sequence of  memory ops to perform memset / memcpy.
+static bool MeetsMaxMemopRequirement(std::vector<MVT::ValueType> &MemOps,
+                                     unsigned Limit, uint64_t Size,
+                                     unsigned Align, TargetLowering &TLI) {
+  MVT::ValueType VT;
+
+  if (TLI.allowsUnalignedMemoryAccesses()) {
+    VT = MVT::i64;
+  } else {
+    switch (Align & 7) {
+    case 0:
+      VT = MVT::i64;
+      break;
+    case 4:
+      VT = MVT::i32;
+      break;
+    case 2:
+      VT = MVT::i16;
+      break;
+    default:
+      VT = MVT::i8;
+      break;
+    }
+  }
+
+  MVT::ValueType LVT = MVT::i64;
+  while (!TLI.isTypeLegal(LVT))
+    LVT = (MVT::ValueType)((unsigned)LVT - 1);
+  assert(MVT::isInteger(LVT));
+
+  if (VT > LVT)
+    VT = LVT;
+
+  unsigned NumMemOps = 0;
+  while (Size != 0) {
+    unsigned VTSize = MVT::getSizeInBits(VT) / 8;
+    while (VTSize > Size) {
+      VT = (MVT::ValueType)((unsigned)VT - 1);
+      VTSize >>= 1;
+    }
+    assert(MVT::isInteger(VT));
+
+    if (++NumMemOps > Limit)
+      return false;
+    MemOps.push_back(VT);
+    Size -= VTSize;
+  }
+
+  return true;
+}
+
+void SelectionDAGLowering::visitMemIntrinsic(CallInst &I, unsigned Op) {
+  SDOperand Op1 = getValue(I.getOperand(1));
+  SDOperand Op2 = getValue(I.getOperand(2));
+  SDOperand Op3 = getValue(I.getOperand(3));
+  SDOperand Op4 = getValue(I.getOperand(4));
+  unsigned Align = (unsigned)cast<ConstantSDNode>(Op4)->getValue();
+  if (Align == 0) Align = 1;
+
+  if (ConstantSDNode *Size = dyn_cast<ConstantSDNode>(Op3)) {
+    std::vector<MVT::ValueType> MemOps;
+
+    // Expand memset / memcpy to a series of load / store ops
+    // if the size operand falls below a certain threshold.
+    SmallVector<SDOperand, 8> OutChains;
+    switch (Op) {
+    default: break;  // Do nothing for now.
+    case ISD::MEMSET: {
+      if (MeetsMaxMemopRequirement(MemOps, TLI.getMaxStoresPerMemset(),
+                                   Size->getValue(), Align, TLI)) {
+        unsigned NumMemOps = MemOps.size();
+        unsigned Offset = 0;
+        for (unsigned i = 0; i < NumMemOps; i++) {
+          MVT::ValueType VT = MemOps[i];
+          unsigned VTSize = MVT::getSizeInBits(VT) / 8;
+          SDOperand Value = getMemsetValue(Op2, VT, DAG);
+          SDOperand Store = DAG.getStore(getRoot(), Value,
+                                    getMemBasePlusOffset(Op1, Offset, DAG, TLI),
+                                         I.getOperand(1), Offset);
+          OutChains.push_back(Store);
+          Offset += VTSize;
+        }
+      }
+      break;
+    }
+    case ISD::MEMCPY: {
+      if (MeetsMaxMemopRequirement(MemOps, TLI.getMaxStoresPerMemcpy(),
+                                   Size->getValue(), Align, TLI)) {
+        unsigned NumMemOps = MemOps.size();
+        unsigned SrcOff = 0, DstOff = 0, SrcDelta = 0;
+        GlobalAddressSDNode *G = NULL;
+        std::string Str;
+        bool CopyFromStr = false;
+
+        if (Op2.getOpcode() == ISD::GlobalAddress)
+          G = cast<GlobalAddressSDNode>(Op2);
+        else if (Op2.getOpcode() == ISD::ADD &&
+                 Op2.getOperand(0).getOpcode() == ISD::GlobalAddress &&
+                 Op2.getOperand(1).getOpcode() == ISD::Constant) {
+          G = cast<GlobalAddressSDNode>(Op2.getOperand(0));
+          SrcDelta = cast<ConstantSDNode>(Op2.getOperand(1))->getValue();
+        }
+        if (G) {
+          GlobalVariable *GV = dyn_cast<GlobalVariable>(G->getGlobal());
+          if (GV && GV->isConstant()) {
+            Str = GV->getStringValue(false);
+            if (!Str.empty()) {
+              CopyFromStr = true;
+              SrcOff += SrcDelta;
+            }
+          }
+        }
+
+        for (unsigned i = 0; i < NumMemOps; i++) {
+          MVT::ValueType VT = MemOps[i];
+          unsigned VTSize = MVT::getSizeInBits(VT) / 8;
+          SDOperand Value, Chain, Store;
+
+          if (CopyFromStr) {
+            Value = getMemsetStringVal(VT, DAG, TLI, Str, SrcOff);
+            Chain = getRoot();
+            Store =
+              DAG.getStore(Chain, Value,
+                           getMemBasePlusOffset(Op1, DstOff, DAG, TLI),
+                           I.getOperand(1), DstOff);
+          } else {
+            Value = DAG.getLoad(VT, getRoot(),
+                        getMemBasePlusOffset(Op2, SrcOff, DAG, TLI),
+                        I.getOperand(2), SrcOff);
+            Chain = Value.getValue(1);
+            Store =
+              DAG.getStore(Chain, Value,
+                           getMemBasePlusOffset(Op1, DstOff, DAG, TLI),
+                           I.getOperand(1), DstOff);
+          }
+          OutChains.push_back(Store);
+          SrcOff += VTSize;
+          DstOff += VTSize;
+        }
+      }
+      break;
+    }
+    }
+
+    if (!OutChains.empty()) {
+      DAG.setRoot(DAG.getNode(ISD::TokenFactor, MVT::Other,
+                  &OutChains[0], OutChains.size()));
+      return;
+    }
+  }
+
+  DAG.setRoot(DAG.getNode(Op, MVT::Other, getRoot(), Op1, Op2, Op3, Op4));
+}
+
+//===----------------------------------------------------------------------===//
+// SelectionDAGISel code
+//===----------------------------------------------------------------------===//
+
+unsigned SelectionDAGISel::MakeReg(MVT::ValueType VT) {
+  return RegMap->createVirtualRegister(TLI.getRegClassFor(VT));
+}
+
+void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<AliasAnalysis>();
+  AU.setPreservesAll();
+}
+
+
+
+bool SelectionDAGISel::runOnFunction(Function &Fn) {
+  MachineFunction &MF = MachineFunction::construct(&Fn, TLI.getTargetMachine());
+  RegMap = MF.getSSARegMap();
+  DOUT << "\n\n\n=== " << Fn.getName() << "\n";
+
+  FunctionLoweringInfo FuncInfo(TLI, Fn, MF);
+
+  if (ExceptionHandling)
+    for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
+      if (InvokeInst *Invoke = dyn_cast<InvokeInst>(I->getTerminator()))
+        // Mark landing pad.
+        FuncInfo.MBBMap[Invoke->getSuccessor(1)]->setIsLandingPad();
+
+  for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
+    SelectBasicBlock(I, MF, FuncInfo);
+
+  // Add function live-ins to entry block live-in set.
+  BasicBlock *EntryBB = &Fn.getEntryBlock();
+  BB = FuncInfo.MBBMap[EntryBB];
+  if (!MF.livein_empty())
+    for (MachineFunction::livein_iterator I = MF.livein_begin(),
+           E = MF.livein_end(); I != E; ++I)
+      BB->addLiveIn(I->first);
+
+#ifndef NDEBUG
+  assert(FuncInfo.CatchInfoFound.size() == FuncInfo.CatchInfoLost.size() &&
+         "Not all catch info was assigned to a landing pad!");
+#endif
+
+  return true;
+}
+
+SDOperand SelectionDAGLowering::CopyValueToVirtualRegister(Value *V, 
+                                                           unsigned Reg) {
+  SDOperand Op = getValue(V);
+  assert((Op.getOpcode() != ISD::CopyFromReg ||
+          cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) &&
+         "Copy from a reg to the same reg!");
+  
+  MVT::ValueType SrcVT = Op.getValueType();
+  MVT::ValueType RegisterVT = TLI.getRegisterType(SrcVT);
+  unsigned NumRegs = TLI.getNumRegisters(SrcVT);
+  SmallVector<SDOperand, 8> Regs(NumRegs);
+  SmallVector<SDOperand, 8> Chains(NumRegs);
+
+  // Copy the value by legal parts into sequential virtual registers.
+  getCopyToParts(DAG, Op, &Regs[0], NumRegs, RegisterVT);
+  for (unsigned i = 0; i != NumRegs; ++i)
+    Chains[i] = DAG.getCopyToReg(getRoot(), Reg + i, Regs[i]);
+  return DAG.getNode(ISD::TokenFactor, MVT::Other, &Chains[0], NumRegs);
+}
+
+void SelectionDAGISel::
+LowerArguments(BasicBlock *LLVMBB, SelectionDAGLowering &SDL,
+               std::vector<SDOperand> &UnorderedChains) {
+  // If this is the entry block, emit arguments.
+  Function &F = *LLVMBB->getParent();
+  FunctionLoweringInfo &FuncInfo = SDL.FuncInfo;
+  SDOperand OldRoot = SDL.DAG.getRoot();
+  std::vector<SDOperand> Args = TLI.LowerArguments(F, SDL.DAG);
+
+  unsigned a = 0;
+  for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end();
+       AI != E; ++AI, ++a)
+    if (!AI->use_empty()) {
+      SDL.setValue(AI, Args[a]);
+
+      // If this argument is live outside of the entry block, insert a copy from
+      // whereever we got it to the vreg that other BB's will reference it as.
+      DenseMap<const Value*, unsigned>::iterator VMI=FuncInfo.ValueMap.find(AI);
+      if (VMI != FuncInfo.ValueMap.end()) {
+        SDOperand Copy = SDL.CopyValueToVirtualRegister(AI, VMI->second);
+        UnorderedChains.push_back(Copy);
+      }
+    }
+
+  // Finally, if the target has anything special to do, allow it to do so.
+  // FIXME: this should insert code into the DAG!
+  EmitFunctionEntryCode(F, SDL.DAG.getMachineFunction());
+}
+
+static void copyCatchInfo(BasicBlock *SrcBB, BasicBlock *DestBB,
+                          MachineModuleInfo *MMI, FunctionLoweringInfo &FLI) {
+  assert(!FLI.MBBMap[SrcBB]->isLandingPad() &&
+         "Copying catch info out of a landing pad!");
+  for (BasicBlock::iterator I = SrcBB->begin(), E = --SrcBB->end(); I != E; ++I)
+    if (isSelector(I)) {
+      // Apply the catch info to DestBB.
+      addCatchInfo(cast<CallInst>(*I), MMI, FLI.MBBMap[DestBB]);
+#ifndef NDEBUG
+      FLI.CatchInfoFound.insert(I);
+#endif
+    }
+}
+
+void SelectionDAGISel::BuildSelectionDAG(SelectionDAG &DAG, BasicBlock *LLVMBB,
+       std::vector<std::pair<MachineInstr*, unsigned> > &PHINodesToUpdate,
+                                         FunctionLoweringInfo &FuncInfo) {
+  SelectionDAGLowering SDL(DAG, TLI, FuncInfo);
+
+  std::vector<SDOperand> UnorderedChains;
+
+  // Lower any arguments needed in this block if this is the entry block.
+  if (LLVMBB == &LLVMBB->getParent()->getEntryBlock())
+    LowerArguments(LLVMBB, SDL, UnorderedChains);
+
+  BB = FuncInfo.MBBMap[LLVMBB];
+  SDL.setCurrentBasicBlock(BB);
+
+  MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+
+  if (ExceptionHandling && MMI && BB->isLandingPad()) {
+    // Add a label to mark the beginning of the landing pad.  Deletion of the
+    // landing pad can thus be detected via the MachineModuleInfo.
+    unsigned LabelID = MMI->addLandingPad(BB);
+    DAG.setRoot(DAG.getNode(ISD::LABEL, MVT::Other, DAG.getEntryNode(),
+                            DAG.getConstant(LabelID, MVT::i32)));
+
+    // Mark exception register as live in.
+    unsigned Reg = TLI.getExceptionAddressRegister();
+    if (Reg) BB->addLiveIn(Reg);
+
+    // Mark exception selector register as live in.
+    Reg = TLI.getExceptionSelectorRegister();
+    if (Reg) BB->addLiveIn(Reg);
+
+    // FIXME: Hack around an exception handling flaw (PR1508): the personality
+    // function and list of typeids logically belong to the invoke (or, if you
+    // like, the basic block containing the invoke), and need to be associated
+    // with it in the dwarf exception handling tables.  Currently however the
+    // information is provided by an intrinsic (eh.selector) that can be moved
+    // to unexpected places by the optimizers: if the unwind edge is critical,
+    // then breaking it can result in the intrinsics being in the successor of
+    // the landing pad, not the landing pad itself.  This results in exceptions
+    // not being caught because no typeids are associated with the invoke.
+    // This may not be the only way things can go wrong, but it is the only way
+    // we try to work around for the moment.
+    BranchInst *Br = dyn_cast<BranchInst>(LLVMBB->getTerminator());
+
+    if (Br && Br->isUnconditional()) { // Critical edge?
+      BasicBlock::iterator I, E;
+      for (I = LLVMBB->begin(), E = --LLVMBB->end(); I != E; ++I)
+        if (isSelector(I))
+          break;
+
+      if (I == E)
+        // No catch info found - try to extract some from the successor.
+        copyCatchInfo(Br->getSuccessor(0), LLVMBB, MMI, FuncInfo);
+    }
+  }
+
+  // Lower all of the non-terminator instructions.
+  for (BasicBlock::iterator I = LLVMBB->begin(), E = --LLVMBB->end();
+       I != E; ++I)
+    SDL.visit(*I);
+
+  // Ensure that all instructions which are used outside of their defining
+  // blocks are available as virtual registers.  Invoke is handled elsewhere.
+  for (BasicBlock::iterator I = LLVMBB->begin(), E = LLVMBB->end(); I != E;++I)
+    if (!I->use_empty() && !isa<PHINode>(I) && !isa<InvokeInst>(I)) {
+      DenseMap<const Value*, unsigned>::iterator VMI =FuncInfo.ValueMap.find(I);
+      if (VMI != FuncInfo.ValueMap.end())
+        UnorderedChains.push_back(
+                                SDL.CopyValueToVirtualRegister(I, VMI->second));
+    }
+
+  // Handle PHI nodes in successor blocks.  Emit code into the SelectionDAG to
+  // ensure constants are generated when needed.  Remember the virtual registers
+  // that need to be added to the Machine PHI nodes as input.  We cannot just
+  // directly add them, because expansion might result in multiple MBB's for one
+  // BB.  As such, the start of the BB might correspond to a different MBB than
+  // the end.
+  //
+  TerminatorInst *TI = LLVMBB->getTerminator();
+
+  // Emit constants only once even if used by multiple PHI nodes.
+  std::map<Constant*, unsigned> ConstantsOut;
+  
+  // Vector bool would be better, but vector<bool> is really slow.
+  std::vector<unsigned char> SuccsHandled;
+  if (TI->getNumSuccessors())
+    SuccsHandled.resize(BB->getParent()->getNumBlockIDs());
+    
+  // Check successor nodes' PHI nodes that expect a constant to be available
+  // from this block.
+  for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
+    BasicBlock *SuccBB = TI->getSuccessor(succ);
+    if (!isa<PHINode>(SuccBB->begin())) continue;
+    MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB];
+    
+    // If this terminator has multiple identical successors (common for
+    // switches), only handle each succ once.
+    unsigned SuccMBBNo = SuccMBB->getNumber();
+    if (SuccsHandled[SuccMBBNo]) continue;
+    SuccsHandled[SuccMBBNo] = true;
+    
+    MachineBasicBlock::iterator MBBI = SuccMBB->begin();
+    PHINode *PN;
+
+    // At this point we know that there is a 1-1 correspondence between LLVM PHI
+    // nodes and Machine PHI nodes, but the incoming operands have not been
+    // emitted yet.
+    for (BasicBlock::iterator I = SuccBB->begin();
+         (PN = dyn_cast<PHINode>(I)); ++I) {
+      // Ignore dead phi's.
+      if (PN->use_empty()) continue;
+      
+      unsigned Reg;
+      Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
+      
+      if (Constant *C = dyn_cast<Constant>(PHIOp)) {
+        unsigned &RegOut = ConstantsOut[C];
+        if (RegOut == 0) {
+          RegOut = FuncInfo.CreateRegForValue(C);
+          UnorderedChains.push_back(
+                           SDL.CopyValueToVirtualRegister(C, RegOut));
+        }
+        Reg = RegOut;
+      } else {
+        Reg = FuncInfo.ValueMap[PHIOp];
+        if (Reg == 0) {
+          assert(isa<AllocaInst>(PHIOp) &&
+                 FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&
+                 "Didn't codegen value into a register!??");
+          Reg = FuncInfo.CreateRegForValue(PHIOp);
+          UnorderedChains.push_back(
+                           SDL.CopyValueToVirtualRegister(PHIOp, Reg));
+        }
+      }
+
+      // Remember that this register needs to added to the machine PHI node as
+      // the input for this MBB.
+      MVT::ValueType VT = TLI.getValueType(PN->getType());
+      unsigned NumRegisters = TLI.getNumRegisters(VT);
+      for (unsigned i = 0, e = NumRegisters; i != e; ++i)
+        PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i));
+    }
+  }
+  ConstantsOut.clear();
+
+  // Turn all of the unordered chains into one factored node.
+  if (!UnorderedChains.empty()) {
+    SDOperand Root = SDL.getRoot();
+    if (Root.getOpcode() != ISD::EntryToken) {
+      unsigned i = 0, e = UnorderedChains.size();
+      for (; i != e; ++i) {
+        assert(UnorderedChains[i].Val->getNumOperands() > 1);
+        if (UnorderedChains[i].Val->getOperand(0) == Root)
+          break;  // Don't add the root if we already indirectly depend on it.
+      }
+        
+      if (i == e)
+        UnorderedChains.push_back(Root);
+    }
+    DAG.setRoot(DAG.getNode(ISD::TokenFactor, MVT::Other,
+                            &UnorderedChains[0], UnorderedChains.size()));
+  }
+
+  // Lower the terminator after the copies are emitted.
+  SDL.visit(*LLVMBB->getTerminator());
+
+  // Copy over any CaseBlock records that may now exist due to SwitchInst
+  // lowering, as well as any jump table information.
+  SwitchCases.clear();
+  SwitchCases = SDL.SwitchCases;
+  JTCases.clear();
+  JTCases = SDL.JTCases;
+  BitTestCases.clear();
+  BitTestCases = SDL.BitTestCases;
+    
+  // Make sure the root of the DAG is up-to-date.
+  DAG.setRoot(SDL.getRoot());
+}
+
+void SelectionDAGISel::CodeGenAndEmitDAG(SelectionDAG &DAG) {
+  // Get alias analysis for load/store combining.
+  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+
+  // Run the DAG combiner in pre-legalize mode.
+  DAG.Combine(false, AA);
+  
+  DOUT << "Lowered selection DAG:\n";
+  DEBUG(DAG.dump());
+  
+  // Second step, hack on the DAG until it only uses operations and types that
+  // the target supports.
+  DAG.Legalize();
+  
+  DOUT << "Legalized selection DAG:\n";
+  DEBUG(DAG.dump());
+  
+  // Run the DAG combiner in post-legalize mode.
+  DAG.Combine(true, AA);
+  
+  if (ViewISelDAGs) DAG.viewGraph();
+
+  // Third, instruction select all of the operations to machine code, adding the
+  // code to the MachineBasicBlock.
+  InstructionSelectBasicBlock(DAG);
+  
+  DOUT << "Selected machine code:\n";
+  DEBUG(BB->dump());
+}  
+
+void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB, MachineFunction &MF,
+                                        FunctionLoweringInfo &FuncInfo) {
+  std::vector<std::pair<MachineInstr*, unsigned> > PHINodesToUpdate;
+  {
+    SelectionDAG DAG(TLI, MF, getAnalysisToUpdate<MachineModuleInfo>());
+    CurDAG = &DAG;
+  
+    // First step, lower LLVM code to some DAG.  This DAG may use operations and
+    // types that are not supported by the target.
+    BuildSelectionDAG(DAG, LLVMBB, PHINodesToUpdate, FuncInfo);
+
+    // Second step, emit the lowered DAG as machine code.
+    CodeGenAndEmitDAG(DAG);
+  }
+
+  DOUT << "Total amount of phi nodes to update: "
+       << PHINodesToUpdate.size() << "\n";
+  DEBUG(for (unsigned i = 0, e = PHINodesToUpdate.size(); i != e; ++i)
+          DOUT << "Node " << i << " : (" << PHINodesToUpdate[i].first
+               << ", " << PHINodesToUpdate[i].second << ")\n";);
+  
+  // Next, now that we know what the last MBB the LLVM BB expanded is, update
+  // PHI nodes in successors.
+  if (SwitchCases.empty() && JTCases.empty() && BitTestCases.empty()) {
+    for (unsigned i = 0, e = PHINodesToUpdate.size(); i != e; ++i) {
+      MachineInstr *PHI = PHINodesToUpdate[i].first;
+      assert(PHI->getOpcode() == TargetInstrInfo::PHI &&
+             "This is not a machine PHI node that we are updating!");
+      PHI->addRegOperand(PHINodesToUpdate[i].second, false);
+      PHI->addMachineBasicBlockOperand(BB);
+    }
+    return;
+  }
+
+  for (unsigned i = 0, e = BitTestCases.size(); i != e; ++i) {
+    // Lower header first, if it wasn't already lowered
+    if (!BitTestCases[i].Emitted) {
+      SelectionDAG HSDAG(TLI, MF, getAnalysisToUpdate<MachineModuleInfo>());
+      CurDAG = &HSDAG;
+      SelectionDAGLowering HSDL(HSDAG, TLI, FuncInfo);    
+      // Set the current basic block to the mbb we wish to insert the code into
+      BB = BitTestCases[i].Parent;
+      HSDL.setCurrentBasicBlock(BB);
+      // Emit the code
+      HSDL.visitBitTestHeader(BitTestCases[i]);
+      HSDAG.setRoot(HSDL.getRoot());
+      CodeGenAndEmitDAG(HSDAG);
+    }    
+
+    for (unsigned j = 0, ej = BitTestCases[i].Cases.size(); j != ej; ++j) {
+      SelectionDAG BSDAG(TLI, MF, getAnalysisToUpdate<MachineModuleInfo>());
+      CurDAG = &BSDAG;
+      SelectionDAGLowering BSDL(BSDAG, TLI, FuncInfo);
+      // Set the current basic block to the mbb we wish to insert the code into
+      BB = BitTestCases[i].Cases[j].ThisBB;
+      BSDL.setCurrentBasicBlock(BB);
+      // Emit the code
+      if (j+1 != ej)
+        BSDL.visitBitTestCase(BitTestCases[i].Cases[j+1].ThisBB,
+                              BitTestCases[i].Reg,
+                              BitTestCases[i].Cases[j]);
+      else
+        BSDL.visitBitTestCase(BitTestCases[i].Default,
+                              BitTestCases[i].Reg,
+                              BitTestCases[i].Cases[j]);
+        
+        
+      BSDAG.setRoot(BSDL.getRoot());
+      CodeGenAndEmitDAG(BSDAG);
+    }
+
+    // Update PHI Nodes
+    for (unsigned pi = 0, pe = PHINodesToUpdate.size(); pi != pe; ++pi) {
+      MachineInstr *PHI = PHINodesToUpdate[pi].first;
+      MachineBasicBlock *PHIBB = PHI->getParent();
+      assert(PHI->getOpcode() == TargetInstrInfo::PHI &&
+             "This is not a machine PHI node that we are updating!");
+      // This is "default" BB. We have two jumps to it. From "header" BB and
+      // from last "case" BB.
+      if (PHIBB == BitTestCases[i].Default) {
+        PHI->addRegOperand(PHINodesToUpdate[pi].second, false);
+        PHI->addMachineBasicBlockOperand(BitTestCases[i].Parent);
+        PHI->addRegOperand(PHINodesToUpdate[pi].second, false);
+        PHI->addMachineBasicBlockOperand(BitTestCases[i].Cases.back().ThisBB);
+      }
+      // One of "cases" BB.
+      for (unsigned j = 0, ej = BitTestCases[i].Cases.size(); j != ej; ++j) {
+        MachineBasicBlock* cBB = BitTestCases[i].Cases[j].ThisBB;
+        if (cBB->succ_end() !=
+            std::find(cBB->succ_begin(),cBB->succ_end(), PHIBB)) {
+          PHI->addRegOperand(PHINodesToUpdate[pi].second, false);
+          PHI->addMachineBasicBlockOperand(cBB);
+        }
+      }
+    }
+  }
+
+  // If the JumpTable record is filled in, then we need to emit a jump table.
+  // Updating the PHI nodes is tricky in this case, since we need to determine
+  // whether the PHI is a successor of the range check MBB or the jump table MBB
+  for (unsigned i = 0, e = JTCases.size(); i != e; ++i) {
+    // Lower header first, if it wasn't already lowered
+    if (!JTCases[i].first.Emitted) {
+      SelectionDAG HSDAG(TLI, MF, getAnalysisToUpdate<MachineModuleInfo>());
+      CurDAG = &HSDAG;
+      SelectionDAGLowering HSDL(HSDAG, TLI, FuncInfo);    
+      // Set the current basic block to the mbb we wish to insert the code into
+      BB = JTCases[i].first.HeaderBB;
+      HSDL.setCurrentBasicBlock(BB);
+      // Emit the code
+      HSDL.visitJumpTableHeader(JTCases[i].second, JTCases[i].first);
+      HSDAG.setRoot(HSDL.getRoot());
+      CodeGenAndEmitDAG(HSDAG);
+    }
+    
+    SelectionDAG JSDAG(TLI, MF, getAnalysisToUpdate<MachineModuleInfo>());
+    CurDAG = &JSDAG;
+    SelectionDAGLowering JSDL(JSDAG, TLI, FuncInfo);
+    // Set the current basic block to the mbb we wish to insert the code into
+    BB = JTCases[i].second.MBB;
+    JSDL.setCurrentBasicBlock(BB);
+    // Emit the code
+    JSDL.visitJumpTable(JTCases[i].second);
+    JSDAG.setRoot(JSDL.getRoot());
+    CodeGenAndEmitDAG(JSDAG);
+    
+    // Update PHI Nodes
+    for (unsigned pi = 0, pe = PHINodesToUpdate.size(); pi != pe; ++pi) {
+      MachineInstr *PHI = PHINodesToUpdate[pi].first;
+      MachineBasicBlock *PHIBB = PHI->getParent();
+      assert(PHI->getOpcode() == TargetInstrInfo::PHI &&
+             "This is not a machine PHI node that we are updating!");
+      // "default" BB. We can go there only from header BB.
+      if (PHIBB == JTCases[i].second.Default) {
+        PHI->addRegOperand(PHINodesToUpdate[pi].second, false);
+        PHI->addMachineBasicBlockOperand(JTCases[i].first.HeaderBB);
+      }
+      // JT BB. Just iterate over successors here
+      if (BB->succ_end() != std::find(BB->succ_begin(),BB->succ_end(), PHIBB)) {
+        PHI->addRegOperand(PHINodesToUpdate[pi].second, false);
+        PHI->addMachineBasicBlockOperand(BB);
+      }
+    }
+  }
+  
+  // If the switch block involved a branch to one of the actual successors, we
+  // need to update PHI nodes in that block.
+  for (unsigned i = 0, e = PHINodesToUpdate.size(); i != e; ++i) {
+    MachineInstr *PHI = PHINodesToUpdate[i].first;
+    assert(PHI->getOpcode() == TargetInstrInfo::PHI &&
+           "This is not a machine PHI node that we are updating!");
+    if (BB->isSuccessor(PHI->getParent())) {
+      PHI->addRegOperand(PHINodesToUpdate[i].second, false);
+      PHI->addMachineBasicBlockOperand(BB);
+    }
+  }
+  
+  // If we generated any switch lowering information, build and codegen any
+  // additional DAGs necessary.
+  for (unsigned i = 0, e = SwitchCases.size(); i != e; ++i) {
+    SelectionDAG SDAG(TLI, MF, getAnalysisToUpdate<MachineModuleInfo>());
+    CurDAG = &SDAG;
+    SelectionDAGLowering SDL(SDAG, TLI, FuncInfo);
+    
+    // Set the current basic block to the mbb we wish to insert the code into
+    BB = SwitchCases[i].ThisBB;
+    SDL.setCurrentBasicBlock(BB);
+    
+    // Emit the code
+    SDL.visitSwitchCase(SwitchCases[i]);
+    SDAG.setRoot(SDL.getRoot());
+    CodeGenAndEmitDAG(SDAG);
+    
+    // Handle any PHI nodes in successors of this chunk, as if we were coming
+    // from the original BB before switch expansion.  Note that PHI nodes can
+    // occur multiple times in PHINodesToUpdate.  We have to be very careful to
+    // handle them the right number of times.
+    while ((BB = SwitchCases[i].TrueBB)) {  // Handle LHS and RHS.
+      for (MachineBasicBlock::iterator Phi = BB->begin();
+           Phi != BB->end() && Phi->getOpcode() == TargetInstrInfo::PHI; ++Phi){
+        // This value for this PHI node is recorded in PHINodesToUpdate, get it.
+        for (unsigned pn = 0; ; ++pn) {
+          assert(pn != PHINodesToUpdate.size() && "Didn't find PHI entry!");
+          if (PHINodesToUpdate[pn].first == Phi) {
+            Phi->addRegOperand(PHINodesToUpdate[pn].second, false);
+            Phi->addMachineBasicBlockOperand(SwitchCases[i].ThisBB);
+            break;
+          }
+        }
+      }
+      
+      // Don't process RHS if same block as LHS.
+      if (BB == SwitchCases[i].FalseBB)
+        SwitchCases[i].FalseBB = 0;
+      
+      // If we haven't handled the RHS, do so now.  Otherwise, we're done.
+      SwitchCases[i].TrueBB = SwitchCases[i].FalseBB;
+      SwitchCases[i].FalseBB = 0;
+    }
+    assert(SwitchCases[i].TrueBB == 0 && SwitchCases[i].FalseBB == 0);
+  }
+}
+
+
+//===----------------------------------------------------------------------===//
+/// ScheduleAndEmitDAG - Pick a safe ordering and emit instructions for each
+/// target node in the graph.
+void SelectionDAGISel::ScheduleAndEmitDAG(SelectionDAG &DAG) {
+  if (ViewSchedDAGs) DAG.viewGraph();
+
+  RegisterScheduler::FunctionPassCtor Ctor = RegisterScheduler::getDefault();
+  
+  if (!Ctor) {
+    Ctor = ISHeuristic;
+    RegisterScheduler::setDefault(Ctor);
+  }
+  
+  ScheduleDAG *SL = Ctor(this, &DAG, BB);
+  BB = SL->Run();
+  delete SL;
+}
+
+
+HazardRecognizer *SelectionDAGISel::CreateTargetHazardRecognizer() {
+  return new HazardRecognizer();
+}
+
+//===----------------------------------------------------------------------===//
+// Helper functions used by the generated instruction selector.
+//===----------------------------------------------------------------------===//
+// Calls to these methods are generated by tblgen.
+
+/// CheckAndMask - The isel is trying to match something like (and X, 255).  If
+/// the dag combiner simplified the 255, we still want to match.  RHS is the
+/// actual value in the DAG on the RHS of an AND, and DesiredMaskS is the value
+/// specified in the .td file (e.g. 255).
+bool SelectionDAGISel::CheckAndMask(SDOperand LHS, ConstantSDNode *RHS, 
+                                    int64_t DesiredMaskS) {
+  uint64_t ActualMask = RHS->getValue();
+  uint64_t DesiredMask =DesiredMaskS & MVT::getIntVTBitMask(LHS.getValueType());
+  
+  // If the actual mask exactly matches, success!
+  if (ActualMask == DesiredMask)
+    return true;
+  
+  // If the actual AND mask is allowing unallowed bits, this doesn't match.
+  if (ActualMask & ~DesiredMask)
+    return false;
+  
+  // Otherwise, the DAG Combiner may have proven that the value coming in is
+  // either already zero or is not demanded.  Check for known zero input bits.
+  uint64_t NeededMask = DesiredMask & ~ActualMask;
+  if (CurDAG->MaskedValueIsZero(LHS, NeededMask))
+    return true;
+  
+  // TODO: check to see if missing bits are just not demanded.
+
+  // Otherwise, this pattern doesn't match.
+  return false;
+}
+
+/// CheckOrMask - The isel is trying to match something like (or X, 255).  If
+/// the dag combiner simplified the 255, we still want to match.  RHS is the
+/// actual value in the DAG on the RHS of an OR, and DesiredMaskS is the value
+/// specified in the .td file (e.g. 255).
+bool SelectionDAGISel::CheckOrMask(SDOperand LHS, ConstantSDNode *RHS, 
+                                    int64_t DesiredMaskS) {
+  uint64_t ActualMask = RHS->getValue();
+  uint64_t DesiredMask =DesiredMaskS & MVT::getIntVTBitMask(LHS.getValueType());
+  
+  // If the actual mask exactly matches, success!
+  if (ActualMask == DesiredMask)
+    return true;
+  
+  // If the actual AND mask is allowing unallowed bits, this doesn't match.
+  if (ActualMask & ~DesiredMask)
+    return false;
+  
+  // Otherwise, the DAG Combiner may have proven that the value coming in is
+  // either already zero or is not demanded.  Check for known zero input bits.
+  uint64_t NeededMask = DesiredMask & ~ActualMask;
+  
+  uint64_t KnownZero, KnownOne;
+  CurDAG->ComputeMaskedBits(LHS, NeededMask, KnownZero, KnownOne);
+  
+  // If all the missing bits in the or are already known to be set, match!
+  if ((NeededMask & KnownOne) == NeededMask)
+    return true;
+  
+  // TODO: check to see if missing bits are just not demanded.
+  
+  // Otherwise, this pattern doesn't match.
+  return false;
+}
+
+
+/// SelectInlineAsmMemoryOperands - Calls to this are automatically generated
+/// by tblgen.  Others should not call it.
+void SelectionDAGISel::
+SelectInlineAsmMemoryOperands(std::vector<SDOperand> &Ops, SelectionDAG &DAG) {
+  std::vector<SDOperand> InOps;
+  std::swap(InOps, Ops);
+
+  Ops.push_back(InOps[0]);  // input chain.
+  Ops.push_back(InOps[1]);  // input asm string.
+
+  unsigned i = 2, e = InOps.size();
+  if (InOps[e-1].getValueType() == MVT::Flag)
+    --e;  // Don't process a flag operand if it is here.
+  
+  while (i != e) {
+    unsigned Flags = cast<ConstantSDNode>(InOps[i])->getValue();
+    if ((Flags & 7) != 4 /*MEM*/) {
+      // Just skip over this operand, copying the operands verbatim.
+      Ops.insert(Ops.end(), InOps.begin()+i, InOps.begin()+i+(Flags >> 3) + 1);
+      i += (Flags >> 3) + 1;
+    } else {
+      assert((Flags >> 3) == 1 && "Memory operand with multiple values?");
+      // Otherwise, this is a memory operand.  Ask the target to select it.
+      std::vector<SDOperand> SelOps;
+      if (SelectInlineAsmMemoryOperand(InOps[i+1], 'm', SelOps, DAG)) {
+        cerr << "Could not match memory address.  Inline asm failure!\n";
+        exit(1);
+      }
+      
+      // Add this to the output node.
+      MVT::ValueType IntPtrTy = DAG.getTargetLoweringInfo().getPointerTy();
+      Ops.push_back(DAG.getTargetConstant(4/*MEM*/ | (SelOps.size() << 3),
+                                          IntPtrTy));
+      Ops.insert(Ops.end(), SelOps.begin(), SelOps.end());
+      i += 2;
+    }
+  }
+  
+  // Add the flag input back if present.
+  if (e != InOps.size())
+    Ops.push_back(InOps.back());
+}
+
+char SelectionDAGISel::ID = 0;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
new file mode 100644
index 0000000..12b5682
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -0,0 +1,245 @@
+//===-- SelectionDAGPrinter.cpp - Implement SelectionDAG::viewGraph() -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAG::viewGraph method.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/MRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Config/config.h"
+#include <fstream>
+#include <sstream>
+using namespace llvm;
+
+namespace llvm {
+  template<>
+  struct DOTGraphTraits<SelectionDAG*> : public DefaultDOTGraphTraits {
+    static std::string getGraphName(const SelectionDAG *G) {
+      return G->getMachineFunction().getFunction()->getName();
+    }
+
+    static bool renderGraphFromBottomUp() {
+      return true;
+    }
+    
+    static bool hasNodeAddressLabel(const SDNode *Node,
+                                    const SelectionDAG *Graph) {
+      return true;
+    }
+    
+    /// If you want to override the dot attributes printed for a particular
+    /// edge, override this method.
+    template<typename EdgeIter>
+    static std::string getEdgeAttributes(const void *Node, EdgeIter EI) {
+      SDOperand Op = EI.getNode()->getOperand(EI.getOperand());
+      MVT::ValueType VT = Op.getValueType();
+      if (VT == MVT::Flag)
+        return "color=red,style=bold";
+      else if (VT == MVT::Other)
+        return "color=blue,style=dashed";
+      return "";
+    }
+    
+
+    static std::string getNodeLabel(const SDNode *Node,
+                                    const SelectionDAG *Graph);
+    static std::string getNodeAttributes(const SDNode *N,
+                                         const SelectionDAG *Graph) {
+#ifndef NDEBUG
+      const std::string &Attrs = Graph->getGraphAttrs(N);
+      if (!Attrs.empty()) {
+        if (Attrs.find("shape=") == std::string::npos)
+          return std::string("shape=Mrecord,") + Attrs;
+        else
+          return Attrs;
+      }
+#endif
+      return "shape=Mrecord";
+    }
+
+    static void addCustomGraphFeatures(SelectionDAG *G,
+                                       GraphWriter<SelectionDAG*> &GW) {
+      GW.emitSimpleNode(0, "plaintext=circle", "GraphRoot");
+      if (G->getRoot().Val)
+        GW.emitEdge(0, -1, G->getRoot().Val, -1, "");
+    }
+  };
+}
+
+std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node,
+                                                        const SelectionDAG *G) {
+  std::string Op = Node->getOperationName(G);
+
+  for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+    if (Node->getValueType(i) == MVT::Other)
+      Op += ":ch";
+    else
+      Op = Op + ":" + MVT::getValueTypeString(Node->getValueType(i));
+    
+  if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Node)) {
+    Op += ": " + utostr(CSDN->getValue());
+  } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(Node)) {
+    Op += ": " + ftostr(CSDN->getValue());
+  } else if (const GlobalAddressSDNode *GADN =
+             dyn_cast<GlobalAddressSDNode>(Node)) {
+    int offset = GADN->getOffset();
+    Op += ": " + GADN->getGlobal()->getName();
+    if (offset > 0)
+      Op += "+" + itostr(offset);
+    else
+      Op += itostr(offset);
+  } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(Node)) {
+    Op += " " + itostr(FIDN->getIndex());
+  } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(Node)) {
+    Op += " " + itostr(JTDN->getIndex());
+  } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Node)){
+    if (CP->isMachineConstantPoolEntry()) {
+      std::ostringstream SS;
+      CP->getMachineCPVal()->print(SS);
+      Op += "<" + SS.str() + ">";
+    } else {
+      if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
+        Op += "<" + ftostr(CFP->getValue()) + ">";
+      else if (ConstantInt *CI = dyn_cast<ConstantInt>(CP->getConstVal()))
+        Op += "<" + utostr(CI->getZExtValue()) + ">";
+      else {
+        std::ostringstream SS;
+        WriteAsOperand(SS, CP->getConstVal(), false);
+        Op += "<" + SS.str() + ">";
+      }
+    }
+  } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(Node)) {
+    Op = "BB: ";
+    const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock();
+    if (LBB)
+      Op += LBB->getName();
+    //Op += " " + (const void*)BBDN->getBasicBlock();
+  } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node)) {
+    if (G && R->getReg() != 0 &&
+        MRegisterInfo::isPhysicalRegister(R->getReg())) {
+      Op = Op + " " + G->getTarget().getRegisterInfo()->getName(R->getReg());
+    } else {
+      Op += " #" + utostr(R->getReg());
+    }
+  } else if (const ExternalSymbolSDNode *ES =
+             dyn_cast<ExternalSymbolSDNode>(Node)) {
+    Op += "'" + std::string(ES->getSymbol()) + "'";
+  } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(Node)) {
+    if (M->getValue())
+      Op += "<" + M->getValue()->getName() + ":" + itostr(M->getOffset()) + ">";
+    else
+      Op += "<null:" + itostr(M->getOffset()) + ">";
+  } else if (const VTSDNode *N = dyn_cast<VTSDNode>(Node)) {
+    Op = Op + " VT=" + MVT::getValueTypeString(N->getVT());
+  } else if (const StringSDNode *N = dyn_cast<StringSDNode>(Node)) {
+    Op = Op + "\"" + N->getValue() + "\"";
+  } else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(Node)) {
+    bool doExt = true;
+    switch (LD->getExtensionType()) {
+    default: doExt = false; break;
+    case ISD::EXTLOAD:
+      Op = Op + "<anyext ";
+      break;
+    case ISD::SEXTLOAD:
+      Op = Op + " <sext ";
+      break;
+    case ISD::ZEXTLOAD:
+      Op = Op + " <zext ";
+      break;
+    }
+    if (doExt)
+      Op = Op + MVT::getValueTypeString(LD->getLoadedVT()) + ">";
+
+    Op += LD->getIndexedModeName(LD->getAddressingMode());
+  } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(Node)) {
+    if (ST->isTruncatingStore())
+      Op = Op + "<trunc " + MVT::getValueTypeString(ST->getStoredVT()) + ">";
+    Op += ST->getIndexedModeName(ST->getAddressingMode());
+  }
+  
+  return Op;
+}
+
+
+/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
+/// rendered using 'dot'.
+///
+void SelectionDAG::viewGraph() {
+// This code is only for debugging!
+#ifndef NDEBUG
+  ViewGraph(this, "dag." + getMachineFunction().getFunction()->getName());
+#else
+  cerr << "SelectionDAG::viewGraph is only available in debug builds on "
+       << "systems with Graphviz or gv!\n";
+#endif  // NDEBUG
+}
+
+
+/// clearGraphAttrs - Clear all previously defined node graph attributes.
+/// Intended to be used from a debugging tool (eg. gdb).
+void SelectionDAG::clearGraphAttrs() {
+#ifndef NDEBUG
+  NodeGraphAttrs.clear();
+#else
+  cerr << "SelectionDAG::clearGraphAttrs is only available in debug builds"
+       << " on systems with Graphviz or gv!\n";
+#endif
+}
+
+
+/// setGraphAttrs - Set graph attributes for a node. (eg. "color=red".)
+///
+void SelectionDAG::setGraphAttrs(const SDNode *N, const char *Attrs) {
+#ifndef NDEBUG
+  NodeGraphAttrs[N] = Attrs;
+#else
+  cerr << "SelectionDAG::setGraphAttrs is only available in debug builds"
+       << " on systems with Graphviz or gv!\n";
+#endif
+}
+
+
+/// getGraphAttrs - Get graph attributes for a node. (eg. "color=red".)
+/// Used from getNodeAttributes.
+const std::string SelectionDAG::getGraphAttrs(const SDNode *N) const {
+#ifndef NDEBUG
+  std::map<const SDNode *, std::string>::const_iterator I =
+    NodeGraphAttrs.find(N);
+    
+  if (I != NodeGraphAttrs.end())
+    return I->second;
+  else
+    return "";
+#else
+  cerr << "SelectionDAG::getGraphAttrs is only available in debug builds"
+       << " on systems with Graphviz or gv!\n";
+  return std::string("");
+#endif
+}
+
+/// setGraphColor - Convenience for setting node color attribute.
+///
+void SelectionDAG::setGraphColor(const SDNode *N, const char *Color) {
+#ifndef NDEBUG
+  NodeGraphAttrs[N] = std::string("color=") + Color;
+#else
+  cerr << "SelectionDAG::setGraphColor is only available in debug builds"
+       << " on systems with Graphviz or gv!\n";
+#endif
+}
+
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
new file mode 100644
index 0000000..1b7b436
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -0,0 +1,1753 @@
+//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the TargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/MRegisterInfo.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+/// InitLibcallNames - Set default libcall names.
+///
+static void InitLibcallNames(const char **Names) {
+  Names[RTLIB::SHL_I32] = "__ashlsi3";
+  Names[RTLIB::SHL_I64] = "__ashldi3";
+  Names[RTLIB::SRL_I32] = "__lshrsi3";
+  Names[RTLIB::SRL_I64] = "__lshrdi3";
+  Names[RTLIB::SRA_I32] = "__ashrsi3";
+  Names[RTLIB::SRA_I64] = "__ashrdi3";
+  Names[RTLIB::MUL_I32] = "__mulsi3";
+  Names[RTLIB::MUL_I64] = "__muldi3";
+  Names[RTLIB::SDIV_I32] = "__divsi3";
+  Names[RTLIB::SDIV_I64] = "__divdi3";
+  Names[RTLIB::UDIV_I32] = "__udivsi3";
+  Names[RTLIB::UDIV_I64] = "__udivdi3";
+  Names[RTLIB::SREM_I32] = "__modsi3";
+  Names[RTLIB::SREM_I64] = "__moddi3";
+  Names[RTLIB::UREM_I32] = "__umodsi3";
+  Names[RTLIB::UREM_I64] = "__umoddi3";
+  Names[RTLIB::NEG_I32] = "__negsi2";
+  Names[RTLIB::NEG_I64] = "__negdi2";
+  Names[RTLIB::ADD_F32] = "__addsf3";
+  Names[RTLIB::ADD_F64] = "__adddf3";
+  Names[RTLIB::SUB_F32] = "__subsf3";
+  Names[RTLIB::SUB_F64] = "__subdf3";
+  Names[RTLIB::MUL_F32] = "__mulsf3";
+  Names[RTLIB::MUL_F64] = "__muldf3";
+  Names[RTLIB::DIV_F32] = "__divsf3";
+  Names[RTLIB::DIV_F64] = "__divdf3";
+  Names[RTLIB::REM_F32] = "fmodf";
+  Names[RTLIB::REM_F64] = "fmod";
+  Names[RTLIB::NEG_F32] = "__negsf2";
+  Names[RTLIB::NEG_F64] = "__negdf2";
+  Names[RTLIB::POWI_F32] = "__powisf2";
+  Names[RTLIB::POWI_F64] = "__powidf2";
+  Names[RTLIB::SQRT_F32] = "sqrtf";
+  Names[RTLIB::SQRT_F64] = "sqrt";
+  Names[RTLIB::SIN_F32] = "sinf";
+  Names[RTLIB::SIN_F64] = "sin";
+  Names[RTLIB::COS_F32] = "cosf";
+  Names[RTLIB::COS_F64] = "cos";
+  Names[RTLIB::FPEXT_F32_F64] = "__extendsfdf2";
+  Names[RTLIB::FPROUND_F64_F32] = "__truncdfsf2";
+  Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi";
+  Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi";
+  Names[RTLIB::FPTOSINT_F64_I32] = "__fixdfsi";
+  Names[RTLIB::FPTOSINT_F64_I64] = "__fixdfdi";
+  Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi";
+  Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi";
+  Names[RTLIB::FPTOUINT_F64_I32] = "__fixunsdfsi";
+  Names[RTLIB::FPTOUINT_F64_I64] = "__fixunsdfdi";
+  Names[RTLIB::SINTTOFP_I32_F32] = "__floatsisf";
+  Names[RTLIB::SINTTOFP_I32_F64] = "__floatsidf";
+  Names[RTLIB::SINTTOFP_I64_F32] = "__floatdisf";
+  Names[RTLIB::SINTTOFP_I64_F64] = "__floatdidf";
+  Names[RTLIB::UINTTOFP_I32_F32] = "__floatunsisf";
+  Names[RTLIB::UINTTOFP_I32_F64] = "__floatunsidf";
+  Names[RTLIB::UINTTOFP_I64_F32] = "__floatundisf";
+  Names[RTLIB::UINTTOFP_I64_F64] = "__floatundidf";
+  Names[RTLIB::OEQ_F32] = "__eqsf2";
+  Names[RTLIB::OEQ_F64] = "__eqdf2";
+  Names[RTLIB::UNE_F32] = "__nesf2";
+  Names[RTLIB::UNE_F64] = "__nedf2";
+  Names[RTLIB::OGE_F32] = "__gesf2";
+  Names[RTLIB::OGE_F64] = "__gedf2";
+  Names[RTLIB::OLT_F32] = "__ltsf2";
+  Names[RTLIB::OLT_F64] = "__ltdf2";
+  Names[RTLIB::OLE_F32] = "__lesf2";
+  Names[RTLIB::OLE_F64] = "__ledf2";
+  Names[RTLIB::OGT_F32] = "__gtsf2";
+  Names[RTLIB::OGT_F64] = "__gtdf2";
+  Names[RTLIB::UO_F32] = "__unordsf2";
+  Names[RTLIB::UO_F64] = "__unorddf2";
+  Names[RTLIB::O_F32] = "__unordsf2";
+  Names[RTLIB::O_F64] = "__unorddf2";
+}
+
+/// InitCmpLibcallCCs - Set default comparison libcall CC.
+///
+static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
+  memset(CCs, ISD::SETCC_INVALID, sizeof(ISD::CondCode)*RTLIB::UNKNOWN_LIBCALL);
+  CCs[RTLIB::OEQ_F32] = ISD::SETEQ;
+  CCs[RTLIB::OEQ_F64] = ISD::SETEQ;
+  CCs[RTLIB::UNE_F32] = ISD::SETNE;
+  CCs[RTLIB::UNE_F64] = ISD::SETNE;
+  CCs[RTLIB::OGE_F32] = ISD::SETGE;
+  CCs[RTLIB::OGE_F64] = ISD::SETGE;
+  CCs[RTLIB::OLT_F32] = ISD::SETLT;
+  CCs[RTLIB::OLT_F64] = ISD::SETLT;
+  CCs[RTLIB::OLE_F32] = ISD::SETLE;
+  CCs[RTLIB::OLE_F64] = ISD::SETLE;
+  CCs[RTLIB::OGT_F32] = ISD::SETGT;
+  CCs[RTLIB::OGT_F64] = ISD::SETGT;
+  CCs[RTLIB::UO_F32] = ISD::SETNE;
+  CCs[RTLIB::UO_F64] = ISD::SETNE;
+  CCs[RTLIB::O_F32] = ISD::SETEQ;
+  CCs[RTLIB::O_F64] = ISD::SETEQ;
+}
+
+TargetLowering::TargetLowering(TargetMachine &tm)
+  : TM(tm), TD(TM.getTargetData()) {
+  assert(ISD::BUILTIN_OP_END <= 156 &&
+         "Fixed size array in TargetLowering is not large enough!");
+  // All operations default to being supported.
+  memset(OpActions, 0, sizeof(OpActions));
+  memset(LoadXActions, 0, sizeof(LoadXActions));
+  memset(&StoreXActions, 0, sizeof(StoreXActions));
+  memset(&IndexedModeActions, 0, sizeof(IndexedModeActions));
+
+  // Set all indexed load / store to expand.
+  for (unsigned VT = 0; VT != (unsigned)MVT::LAST_VALUETYPE; ++VT) {
+    for (unsigned IM = (unsigned)ISD::PRE_INC;
+         IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) {
+      setIndexedLoadAction(IM, (MVT::ValueType)VT, Expand);
+      setIndexedStoreAction(IM, (MVT::ValueType)VT, Expand);
+    }
+  }
+
+  IsLittleEndian = TD->isLittleEndian();
+  UsesGlobalOffsetTable = false;
+  ShiftAmountTy = SetCCResultTy = PointerTy = getValueType(TD->getIntPtrType());
+  ShiftAmtHandling = Undefined;
+  memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*));
+  memset(TargetDAGCombineArray, 0, 
+         sizeof(TargetDAGCombineArray)/sizeof(TargetDAGCombineArray[0]));
+  maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8;
+  allowUnalignedMemoryAccesses = false;
+  UseUnderscoreSetJmp = false;
+  UseUnderscoreLongJmp = false;
+  SelectIsExpensive = false;
+  IntDivIsCheap = false;
+  Pow2DivIsCheap = false;
+  StackPointerRegisterToSaveRestore = 0;
+  ExceptionPointerRegister = 0;
+  ExceptionSelectorRegister = 0;
+  SchedPreferenceInfo = SchedulingForLatency;
+  JumpBufSize = 0;
+  JumpBufAlignment = 0;
+  IfCvtBlockSizeLimit = 2;
+
+  InitLibcallNames(LibcallRoutineNames);
+  InitCmpLibcallCCs(CmpLibcallCCs);
+}
+
+TargetLowering::~TargetLowering() {}
+
+/// computeRegisterProperties - Once all of the register classes are added,
+/// this allows us to compute derived properties we expose.
+void TargetLowering::computeRegisterProperties() {
+  assert(MVT::LAST_VALUETYPE <= 32 &&
+         "Too many value types for ValueTypeActions to hold!");
+
+  // Everything defaults to needing one register.
+  for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) {
+    NumRegistersForVT[i] = 1;
+    RegisterTypeForVT[i] = TransformToType[i] = i;
+  }
+  // ...except isVoid, which doesn't need any registers.
+  NumRegistersForVT[MVT::isVoid] = 0;
+
+  // Find the largest integer register class.
+  unsigned LargestIntReg = MVT::i128;
+  for (; RegClassForVT[LargestIntReg] == 0; --LargestIntReg)
+    assert(LargestIntReg != MVT::i1 && "No integer registers defined!");
+
+  // Every integer value type larger than this largest register takes twice as
+  // many registers to represent as the previous ValueType.
+  for (MVT::ValueType ExpandedReg = LargestIntReg + 1;
+       MVT::isInteger(ExpandedReg); ++ExpandedReg) {
+    NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1];
+    RegisterTypeForVT[ExpandedReg] = LargestIntReg;
+    TransformToType[ExpandedReg] = ExpandedReg - 1;
+    ValueTypeActions.setTypeAction(ExpandedReg, Expand);
+  }
+
+  // Inspect all of the ValueType's smaller than the largest integer
+  // register to see which ones need promotion.
+  MVT::ValueType LegalIntReg = LargestIntReg;
+  for (MVT::ValueType IntReg = LargestIntReg - 1;
+       IntReg >= MVT::i1; --IntReg) {
+    if (isTypeLegal(IntReg)) {
+      LegalIntReg = IntReg;
+    } else {
+      RegisterTypeForVT[IntReg] = TransformToType[IntReg] = LegalIntReg;
+      ValueTypeActions.setTypeAction(IntReg, Promote);
+    }
+  }
+
+  // Decide how to handle f64. If the target does not have native f64 support,
+  // expand it to i64 and we will be generating soft float library calls.
+  if (!isTypeLegal(MVT::f64)) {
+    NumRegistersForVT[MVT::f64] = NumRegistersForVT[MVT::i64];
+    RegisterTypeForVT[MVT::f64] = RegisterTypeForVT[MVT::i64];
+    TransformToType[MVT::f64] = MVT::i64;
+    ValueTypeActions.setTypeAction(MVT::f64, Expand);
+  }
+
+  // Decide how to handle f32. If the target does not have native support for
+  // f32, promote it to f64 if it is legal. Otherwise, expand it to i32.
+  if (!isTypeLegal(MVT::f32)) {
+    if (isTypeLegal(MVT::f64)) {
+      NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::f64];
+      RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::f64];
+      TransformToType[MVT::f32] = MVT::f64;
+      ValueTypeActions.setTypeAction(MVT::f32, Promote);
+    } else {
+      NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32];
+      RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32];
+      TransformToType[MVT::f32] = MVT::i32;
+      ValueTypeActions.setTypeAction(MVT::f32, Expand);
+    }
+  }
+  
+  // Loop over all of the vector value types to see which need transformations.
+  for (MVT::ValueType i = MVT::FIRST_VECTOR_VALUETYPE;
+       i <= MVT::LAST_VECTOR_VALUETYPE; ++i) {
+    if (!isTypeLegal(i)) {
+      MVT::ValueType IntermediateVT, RegisterVT;
+      unsigned NumIntermediates;
+      NumRegistersForVT[i] =
+        getVectorTypeBreakdown(i,
+                               IntermediateVT, NumIntermediates,
+                               RegisterVT);
+      RegisterTypeForVT[i] = RegisterVT;
+      TransformToType[i] = MVT::Other; // this isn't actually used
+      ValueTypeActions.setTypeAction(i, Expand);
+    }
+  }
+}
+
+const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
+  return NULL;
+}
+
+/// getVectorTypeBreakdown - Vector types are broken down into some number of
+/// legal first class types.  For example, MVT::v8f32 maps to 2 MVT::v4f32
+/// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack.
+/// Similarly, MVT::v2i64 turns into 4 MVT::i32 values with both PPC and X86.
+///
+/// This method returns the number of registers needed, and the VT for each
+/// register.  It also returns the VT and quantity of the intermediate values
+/// before they are promoted/expanded.
+///
+unsigned TargetLowering::getVectorTypeBreakdown(MVT::ValueType VT, 
+                                                MVT::ValueType &IntermediateVT,
+                                                unsigned &NumIntermediates,
+                                      MVT::ValueType &RegisterVT) const {
+  // Figure out the right, legal destination reg to copy into.
+  unsigned NumElts = MVT::getVectorNumElements(VT);
+  MVT::ValueType EltTy = MVT::getVectorElementType(VT);
+  
+  unsigned NumVectorRegs = 1;
+  
+  // Divide the input until we get to a supported size.  This will always
+  // end with a scalar if the target doesn't support vectors.
+  while (NumElts > 1 &&
+         !isTypeLegal(MVT::getVectorType(EltTy, NumElts))) {
+    NumElts >>= 1;
+    NumVectorRegs <<= 1;
+  }
+
+  NumIntermediates = NumVectorRegs;
+  
+  MVT::ValueType NewVT = MVT::getVectorType(EltTy, NumElts);
+  if (!isTypeLegal(NewVT))
+    NewVT = EltTy;
+  IntermediateVT = NewVT;
+
+  MVT::ValueType DestVT = getTypeToTransformTo(NewVT);
+  RegisterVT = DestVT;
+  if (DestVT < NewVT) {
+    // Value is expanded, e.g. i64 -> i16.
+    return NumVectorRegs*(MVT::getSizeInBits(NewVT)/MVT::getSizeInBits(DestVT));
+  } else {
+    // Otherwise, promotion or legal types use the same number of registers as
+    // the vector decimated to the appropriate level.
+    return NumVectorRegs;
+  }
+  
+  return 1;
+}
+
+//===----------------------------------------------------------------------===//
+//  Optimization Methods
+//===----------------------------------------------------------------------===//
+
+/// ShrinkDemandedConstant - Check to see if the specified operand of the 
+/// specified instruction is a constant integer.  If so, check to see if there
+/// are any bits set in the constant that are not demanded.  If so, shrink the
+/// constant and return true.
+bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDOperand Op, 
+                                                            uint64_t Demanded) {
+  // FIXME: ISD::SELECT, ISD::SELECT_CC
+  switch(Op.getOpcode()) {
+  default: break;
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR:
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
+      if ((~Demanded & C->getValue()) != 0) {
+        MVT::ValueType VT = Op.getValueType();
+        SDOperand New = DAG.getNode(Op.getOpcode(), VT, Op.getOperand(0),
+                                    DAG.getConstant(Demanded & C->getValue(), 
+                                                    VT));
+        return CombineTo(Op, New);
+      }
+    break;
+  }
+  return false;
+}
+
+/// SimplifyDemandedBits - Look at Op.  At this point, we know that only the
+/// DemandedMask bits of the result of Op are ever used downstream.  If we can
+/// use this information to simplify Op, create a new simplified DAG node and
+/// return true, returning the original and new nodes in Old and New. Otherwise,
+/// analyze the expression and return a mask of KnownOne and KnownZero bits for
+/// the expression (used to simplify the caller).  The KnownZero/One bits may
+/// only be accurate for those bits in the DemandedMask.
+bool TargetLowering::SimplifyDemandedBits(SDOperand Op, uint64_t DemandedMask, 
+                                          uint64_t &KnownZero,
+                                          uint64_t &KnownOne,
+                                          TargetLoweringOpt &TLO,
+                                          unsigned Depth) const {
+  KnownZero = KnownOne = 0;   // Don't know anything.
+
+  // The masks are not wide enough to represent this type!  Should use APInt.
+  if (Op.getValueType() == MVT::i128)
+    return false;
+  
+  // Other users may use these bits.
+  if (!Op.Val->hasOneUse()) { 
+    if (Depth != 0) {
+      // If not at the root, Just compute the KnownZero/KnownOne bits to 
+      // simplify things downstream.
+      TLO.DAG.ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth);
+      return false;
+    }
+    // If this is the root being simplified, allow it to have multiple uses,
+    // just set the DemandedMask to all bits.
+    DemandedMask = MVT::getIntVTBitMask(Op.getValueType());
+  } else if (DemandedMask == 0) {   
+    // Not demanding any bits from Op.
+    if (Op.getOpcode() != ISD::UNDEF)
+      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::UNDEF, Op.getValueType()));
+    return false;
+  } else if (Depth == 6) {        // Limit search depth.
+    return false;
+  }
+
+  uint64_t KnownZero2, KnownOne2, KnownZeroOut, KnownOneOut;
+  switch (Op.getOpcode()) {
+  case ISD::Constant:
+    // We know all of the bits for a constant!
+    KnownOne = cast<ConstantSDNode>(Op)->getValue() & DemandedMask;
+    KnownZero = ~KnownOne & DemandedMask;
+    return false;   // Don't fall through, will infinitely loop.
+  case ISD::AND:
+    // If the RHS is a constant, check to see if the LHS would be zero without
+    // using the bits from the RHS.  Below, we use knowledge about the RHS to
+    // simplify the LHS, here we're using information from the LHS to simplify
+    // the RHS.
+    if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      uint64_t LHSZero, LHSOne;
+      TLO.DAG.ComputeMaskedBits(Op.getOperand(0), DemandedMask,
+                                LHSZero, LHSOne, Depth+1);
+      // If the LHS already has zeros where RHSC does, this and is dead.
+      if ((LHSZero & DemandedMask) == (~RHSC->getValue() & DemandedMask))
+        return TLO.CombineTo(Op, Op.getOperand(0));
+      // If any of the set bits in the RHS are known zero on the LHS, shrink
+      // the constant.
+      if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & DemandedMask))
+        return true;
+    }
+    
+    if (SimplifyDemandedBits(Op.getOperand(1), DemandedMask, KnownZero,
+                             KnownOne, TLO, Depth+1))
+      return true;
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask & ~KnownZero,
+                             KnownZero2, KnownOne2, TLO, Depth+1))
+      return true;
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
+      
+    // If all of the demanded bits are known one on one side, return the other.
+    // These bits cannot contribute to the result of the 'and'.
+    if ((DemandedMask & ~KnownZero2 & KnownOne)==(DemandedMask & ~KnownZero2))
+      return TLO.CombineTo(Op, Op.getOperand(0));
+    if ((DemandedMask & ~KnownZero & KnownOne2)==(DemandedMask & ~KnownZero))
+      return TLO.CombineTo(Op, Op.getOperand(1));
+    // If all of the demanded bits in the inputs are known zeros, return zero.
+    if ((DemandedMask & (KnownZero|KnownZero2)) == DemandedMask)
+      return TLO.CombineTo(Op, TLO.DAG.getConstant(0, Op.getValueType()));
+    // If the RHS is a constant, see if we can simplify it.
+    if (TLO.ShrinkDemandedConstant(Op, DemandedMask & ~KnownZero2))
+      return true;
+      
+    // Output known-1 bits are only known if set in both the LHS & RHS.
+    KnownOne &= KnownOne2;
+    // Output known-0 are known to be clear if zero in either the LHS | RHS.
+    KnownZero |= KnownZero2;
+    break;
+  case ISD::OR:
+    if (SimplifyDemandedBits(Op.getOperand(1), DemandedMask, KnownZero, 
+                             KnownOne, TLO, Depth+1))
+      return true;
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask & ~KnownOne, 
+                             KnownZero2, KnownOne2, TLO, Depth+1))
+      return true;
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
+    
+    // If all of the demanded bits are known zero on one side, return the other.
+    // These bits cannot contribute to the result of the 'or'.
+    if ((DemandedMask & ~KnownOne2 & KnownZero) == (DemandedMask & ~KnownOne2))
+      return TLO.CombineTo(Op, Op.getOperand(0));
+    if ((DemandedMask & ~KnownOne & KnownZero2) == (DemandedMask & ~KnownOne))
+      return TLO.CombineTo(Op, Op.getOperand(1));
+    // If all of the potentially set bits on one side are known to be set on
+    // the other side, just use the 'other' side.
+    if ((DemandedMask & (~KnownZero) & KnownOne2) == 
+        (DemandedMask & (~KnownZero)))
+      return TLO.CombineTo(Op, Op.getOperand(0));
+    if ((DemandedMask & (~KnownZero2) & KnownOne) == 
+        (DemandedMask & (~KnownZero2)))
+      return TLO.CombineTo(Op, Op.getOperand(1));
+    // If the RHS is a constant, see if we can simplify it.
+    if (TLO.ShrinkDemandedConstant(Op, DemandedMask))
+      return true;
+          
+    // Output known-0 bits are only known if clear in both the LHS & RHS.
+    KnownZero &= KnownZero2;
+    // Output known-1 are known to be set if set in either the LHS | RHS.
+    KnownOne |= KnownOne2;
+    break;
+  case ISD::XOR:
+    if (SimplifyDemandedBits(Op.getOperand(1), DemandedMask, KnownZero, 
+                             KnownOne, TLO, Depth+1))
+      return true;
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask, KnownZero2,
+                             KnownOne2, TLO, Depth+1))
+      return true;
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
+    
+    // If all of the demanded bits are known zero on one side, return the other.
+    // These bits cannot contribute to the result of the 'xor'.
+    if ((DemandedMask & KnownZero) == DemandedMask)
+      return TLO.CombineTo(Op, Op.getOperand(0));
+    if ((DemandedMask & KnownZero2) == DemandedMask)
+      return TLO.CombineTo(Op, Op.getOperand(1));
+      
+    // If all of the unknown bits are known to be zero on one side or the other
+    // (but not both) turn this into an *inclusive* or.
+    //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
+    if ((DemandedMask & ~KnownZero & ~KnownZero2) == 0)
+      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, Op.getValueType(),
+                                               Op.getOperand(0),
+                                               Op.getOperand(1)));
+    
+    // Output known-0 bits are known if clear or set in both the LHS & RHS.
+    KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
+    // Output known-1 are known to be set if set in only one of the LHS, RHS.
+    KnownOneOut = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
+    
+    // If all of the demanded bits on one side are known, and all of the set
+    // bits on that side are also known to be set on the other side, turn this
+    // into an AND, as we know the bits will be cleared.
+    //    e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
+    if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask) { // all known
+      if ((KnownOne & KnownOne2) == KnownOne) {
+        MVT::ValueType VT = Op.getValueType();
+        SDOperand ANDC = TLO.DAG.getConstant(~KnownOne & DemandedMask, VT);
+        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, VT, Op.getOperand(0),
+                                                 ANDC));
+      }
+    }
+    
+    // If the RHS is a constant, see if we can simplify it.
+    // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1.
+    if (TLO.ShrinkDemandedConstant(Op, DemandedMask))
+      return true;
+    
+    KnownZero = KnownZeroOut;
+    KnownOne  = KnownOneOut;
+    break;
+  case ISD::SETCC:
+    // If we know the result of a setcc has the top bits zero, use this info.
+    if (getSetCCResultContents() == TargetLowering::ZeroOrOneSetCCResult)
+      KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL);
+    break;
+  case ISD::SELECT:
+    if (SimplifyDemandedBits(Op.getOperand(2), DemandedMask, KnownZero, 
+                             KnownOne, TLO, Depth+1))
+      return true;
+    if (SimplifyDemandedBits(Op.getOperand(1), DemandedMask, KnownZero2,
+                             KnownOne2, TLO, Depth+1))
+      return true;
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
+    
+    // If the operands are constants, see if we can simplify them.
+    if (TLO.ShrinkDemandedConstant(Op, DemandedMask))
+      return true;
+    
+    // Only known if known in both the LHS and RHS.
+    KnownOne &= KnownOne2;
+    KnownZero &= KnownZero2;
+    break;
+  case ISD::SELECT_CC:
+    if (SimplifyDemandedBits(Op.getOperand(3), DemandedMask, KnownZero, 
+                             KnownOne, TLO, Depth+1))
+      return true;
+    if (SimplifyDemandedBits(Op.getOperand(2), DemandedMask, KnownZero2,
+                             KnownOne2, TLO, Depth+1))
+      return true;
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
+    
+    // If the operands are constants, see if we can simplify them.
+    if (TLO.ShrinkDemandedConstant(Op, DemandedMask))
+      return true;
+      
+    // Only known if known in both the LHS and RHS.
+    KnownOne &= KnownOne2;
+    KnownZero &= KnownZero2;
+    break;
+  case ISD::SHL:
+    if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      unsigned ShAmt = SA->getValue();
+      SDOperand InOp = Op.getOperand(0);
+
+      // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
+      // single shift.  We can do this if the bottom bits (which are shifted
+      // out) are never demanded.
+      if (InOp.getOpcode() == ISD::SRL &&
+          isa<ConstantSDNode>(InOp.getOperand(1))) {
+        if (ShAmt && (DemandedMask & ((1ULL << ShAmt)-1)) == 0) {
+          unsigned C1 = cast<ConstantSDNode>(InOp.getOperand(1))->getValue();
+          unsigned Opc = ISD::SHL;
+          int Diff = ShAmt-C1;
+          if (Diff < 0) {
+            Diff = -Diff;
+            Opc = ISD::SRL;
+          }          
+          
+          SDOperand NewSA = 
+            TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType());
+          MVT::ValueType VT = Op.getValueType();
+          return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, VT,
+                                                   InOp.getOperand(0), NewSA));
+        }
+      }      
+      
+      if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask >> ShAmt,
+                               KnownZero, KnownOne, TLO, Depth+1))
+        return true;
+      KnownZero <<= SA->getValue();
+      KnownOne  <<= SA->getValue();
+      KnownZero |= (1ULL << SA->getValue())-1;  // low bits known zero.
+    }
+    break;
+  case ISD::SRL:
+    if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      MVT::ValueType VT = Op.getValueType();
+      unsigned ShAmt = SA->getValue();
+      uint64_t TypeMask = MVT::getIntVTBitMask(VT);
+      unsigned VTSize = MVT::getSizeInBits(VT);
+      SDOperand InOp = Op.getOperand(0);
+      
+      // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
+      // single shift.  We can do this if the top bits (which are shifted out)
+      // are never demanded.
+      if (InOp.getOpcode() == ISD::SHL &&
+          isa<ConstantSDNode>(InOp.getOperand(1))) {
+        if (ShAmt && (DemandedMask & (~0ULL << (VTSize-ShAmt))) == 0) {
+          unsigned C1 = cast<ConstantSDNode>(InOp.getOperand(1))->getValue();
+          unsigned Opc = ISD::SRL;
+          int Diff = ShAmt-C1;
+          if (Diff < 0) {
+            Diff = -Diff;
+            Opc = ISD::SHL;
+          }          
+          
+          SDOperand NewSA =
+            TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType());
+          return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, VT,
+                                                   InOp.getOperand(0), NewSA));
+        }
+      }      
+      
+      // Compute the new bits that are at the top now.
+      if (SimplifyDemandedBits(InOp, (DemandedMask << ShAmt) & TypeMask,
+                               KnownZero, KnownOne, TLO, Depth+1))
+        return true;
+      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+      KnownZero &= TypeMask;
+      KnownOne  &= TypeMask;
+      KnownZero >>= ShAmt;
+      KnownOne  >>= ShAmt;
+
+      uint64_t HighBits = (1ULL << ShAmt)-1;
+      HighBits <<= VTSize - ShAmt;
+      KnownZero |= HighBits;  // High bits known zero.
+    }
+    break;
+  case ISD::SRA:
+    if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      MVT::ValueType VT = Op.getValueType();
+      unsigned ShAmt = SA->getValue();
+      
+      // Compute the new bits that are at the top now.
+      uint64_t TypeMask = MVT::getIntVTBitMask(VT);
+      
+      uint64_t InDemandedMask = (DemandedMask << ShAmt) & TypeMask;
+
+      // If any of the demanded bits are produced by the sign extension, we also
+      // demand the input sign bit.
+      uint64_t HighBits = (1ULL << ShAmt)-1;
+      HighBits <<= MVT::getSizeInBits(VT) - ShAmt;
+      if (HighBits & DemandedMask)
+        InDemandedMask |= MVT::getIntVTSignBit(VT);
+      
+      if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask,
+                               KnownZero, KnownOne, TLO, Depth+1))
+        return true;
+      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+      KnownZero &= TypeMask;
+      KnownOne  &= TypeMask;
+      KnownZero >>= ShAmt;
+      KnownOne  >>= ShAmt;
+      
+      // Handle the sign bits.
+      uint64_t SignBit = MVT::getIntVTSignBit(VT);
+      SignBit >>= ShAmt;  // Adjust to where it is now in the mask.
+      
+      // If the input sign bit is known to be zero, or if none of the top bits
+      // are demanded, turn this into an unsigned shift right.
+      if ((KnownZero & SignBit) || (HighBits & ~DemandedMask) == HighBits) {
+        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, VT, Op.getOperand(0),
+                                                 Op.getOperand(1)));
+      } else if (KnownOne & SignBit) { // New bits are known one.
+        KnownOne |= HighBits;
+      }
+    }
+    break;
+  case ISD::SIGN_EXTEND_INREG: {
+    MVT::ValueType EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+
+    // Sign extension.  Compute the demanded bits in the result that are not 
+    // present in the input.
+    uint64_t NewBits = ~MVT::getIntVTBitMask(EVT) & DemandedMask;
+    
+    // If none of the extended bits are demanded, eliminate the sextinreg.
+    if (NewBits == 0)
+      return TLO.CombineTo(Op, Op.getOperand(0));
+
+    uint64_t InSignBit = MVT::getIntVTSignBit(EVT);
+    int64_t InputDemandedBits = DemandedMask & MVT::getIntVTBitMask(EVT);
+    
+    // Since the sign extended bits are demanded, we know that the sign
+    // bit is demanded.
+    InputDemandedBits |= InSignBit;
+
+    if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits,
+                             KnownZero, KnownOne, TLO, Depth+1))
+      return true;
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+
+    // If the sign bit of the input is known set or clear, then we know the
+    // top bits of the result.
+    
+    // If the input sign bit is known zero, convert this into a zero extension.
+    if (KnownZero & InSignBit)
+      return TLO.CombineTo(Op, 
+                           TLO.DAG.getZeroExtendInReg(Op.getOperand(0), EVT));
+    
+    if (KnownOne & InSignBit) {    // Input sign bit known set
+      KnownOne |= NewBits;
+      KnownZero &= ~NewBits;
+    } else {                       // Input sign bit unknown
+      KnownZero &= ~NewBits;
+      KnownOne &= ~NewBits;
+    }
+    break;
+  }
+  case ISD::CTTZ:
+  case ISD::CTLZ:
+  case ISD::CTPOP: {
+    MVT::ValueType VT = Op.getValueType();
+    unsigned LowBits = Log2_32(MVT::getSizeInBits(VT))+1;
+    KnownZero = ~((1ULL << LowBits)-1) & MVT::getIntVTBitMask(VT);
+    KnownOne  = 0;
+    break;
+  }
+  case ISD::LOAD: {
+    if (ISD::isZEXTLoad(Op.Val)) {
+      LoadSDNode *LD = cast<LoadSDNode>(Op);
+      MVT::ValueType VT = LD->getLoadedVT();
+      KnownZero |= ~MVT::getIntVTBitMask(VT) & DemandedMask;
+    }
+    break;
+  }
+  case ISD::ZERO_EXTEND: {
+    uint64_t InMask = MVT::getIntVTBitMask(Op.getOperand(0).getValueType());
+    
+    // If none of the top bits are demanded, convert this into an any_extend.
+    uint64_t NewBits = (~InMask) & DemandedMask;
+    if (NewBits == 0)
+      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, 
+                                               Op.getValueType(), 
+                                               Op.getOperand(0)));
+    
+    if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask & InMask,
+                             KnownZero, KnownOne, TLO, Depth+1))
+      return true;
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    KnownZero |= NewBits;
+    break;
+  }
+  case ISD::SIGN_EXTEND: {
+    MVT::ValueType InVT = Op.getOperand(0).getValueType();
+    uint64_t InMask    = MVT::getIntVTBitMask(InVT);
+    uint64_t InSignBit = MVT::getIntVTSignBit(InVT);
+    uint64_t NewBits   = (~InMask) & DemandedMask;
+    
+    // If none of the top bits are demanded, convert this into an any_extend.
+    if (NewBits == 0)
+      return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND,Op.getValueType(),
+                                           Op.getOperand(0)));
+    
+    // Since some of the sign extended bits are demanded, we know that the sign
+    // bit is demanded.
+    uint64_t InDemandedBits = DemandedMask & InMask;
+    InDemandedBits |= InSignBit;
+    
+    if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero, 
+                             KnownOne, TLO, Depth+1))
+      return true;
+    
+    // If the sign bit is known zero, convert this to a zero extend.
+    if (KnownZero & InSignBit)
+      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, 
+                                               Op.getValueType(), 
+                                               Op.getOperand(0)));
+    
+    // If the sign bit is known one, the top bits match.
+    if (KnownOne & InSignBit) {
+      KnownOne  |= NewBits;
+      KnownZero &= ~NewBits;
+    } else {   // Otherwise, top bits aren't known.
+      KnownOne  &= ~NewBits;
+      KnownZero &= ~NewBits;
+    }
+    break;
+  }
+  case ISD::ANY_EXTEND: {
+    uint64_t InMask = MVT::getIntVTBitMask(Op.getOperand(0).getValueType());
+    if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask & InMask,
+                             KnownZero, KnownOne, TLO, Depth+1))
+      return true;
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    break;
+  }
+  case ISD::TRUNCATE: {
+    // Simplify the input, using demanded bit information, and compute the known
+    // zero/one bits live out.
+    if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask,
+                             KnownZero, KnownOne, TLO, Depth+1))
+      return true;
+    
+    // If the input is only used by this truncate, see if we can shrink it based
+    // on the known demanded bits.
+    if (Op.getOperand(0).Val->hasOneUse()) {
+      SDOperand In = Op.getOperand(0);
+      switch (In.getOpcode()) {
+      default: break;
+      case ISD::SRL:
+        // Shrink SRL by a constant if none of the high bits shifted in are
+        // demanded.
+        if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1))){
+          uint64_t HighBits = MVT::getIntVTBitMask(In.getValueType());
+          HighBits &= ~MVT::getIntVTBitMask(Op.getValueType());
+          HighBits >>= ShAmt->getValue();
+          
+          if (ShAmt->getValue() < MVT::getSizeInBits(Op.getValueType()) &&
+              (DemandedMask & HighBits) == 0) {
+            // None of the shifted in bits are needed.  Add a truncate of the
+            // shift input, then shift it.
+            SDOperand NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, 
+                                                 Op.getValueType(), 
+                                                 In.getOperand(0));
+            return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL,Op.getValueType(),
+                                                   NewTrunc, In.getOperand(1)));
+          }
+        }
+        break;
+      }
+    }
+    
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    uint64_t OutMask = MVT::getIntVTBitMask(Op.getValueType());
+    KnownZero &= OutMask;
+    KnownOne &= OutMask;
+    break;
+  }
+  case ISD::AssertZext: {
+    MVT::ValueType VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+    uint64_t InMask = MVT::getIntVTBitMask(VT);
+    if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask & InMask,
+                             KnownZero, KnownOne, TLO, Depth+1))
+      return true;
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    KnownZero |= ~InMask & DemandedMask;
+    break;
+  }
+  case ISD::ADD:
+  case ISD::SUB:
+  case ISD::INTRINSIC_WO_CHAIN:
+  case ISD::INTRINSIC_W_CHAIN:
+  case ISD::INTRINSIC_VOID:
+    // Just use ComputeMaskedBits to compute output bits.
+    TLO.DAG.ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth);
+    break;
+  }
+  
+  // If we know the value of all of the demanded bits, return this as a
+  // constant.
+  if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask)
+    return TLO.CombineTo(Op, TLO.DAG.getConstant(KnownOne, Op.getValueType()));
+  
+  return false;
+}
+
+/// computeMaskedBitsForTargetNode - Determine which of the bits specified 
+/// in Mask are known to be either zero or one and return them in the 
+/// KnownZero/KnownOne bitsets.
+void TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, 
+                                                    uint64_t Mask,
+                                                    uint64_t &KnownZero, 
+                                                    uint64_t &KnownOne,
+                                                    const SelectionDAG &DAG,
+                                                    unsigned Depth) const {
+  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+          Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+          Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+          Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+         "Should use MaskedValueIsZero if you don't know whether Op"
+         " is a target node!");
+  KnownZero = 0;
+  KnownOne = 0;
+}
+
+/// ComputeNumSignBitsForTargetNode - This method can be implemented by
+/// targets that want to expose additional information about sign bits to the
+/// DAG Combiner.
+unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDOperand Op,
+                                                         unsigned Depth) const {
+  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+          Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+          Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+          Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+         "Should use ComputeNumSignBits if you don't know whether Op"
+         " is a target node!");
+  return 1;
+}
+
+
+/// SimplifySetCC - Try to simplify a setcc built with the specified operands 
+/// and cc. If it is unable to simplify it, return a null SDOperand.
+SDOperand
+TargetLowering::SimplifySetCC(MVT::ValueType VT, SDOperand N0, SDOperand N1,
+                              ISD::CondCode Cond, bool foldBooleans,
+                              DAGCombinerInfo &DCI) const {
+  SelectionDAG &DAG = DCI.DAG;
+
+  // These setcc operations always fold.
+  switch (Cond) {
+  default: break;
+  case ISD::SETFALSE:
+  case ISD::SETFALSE2: return DAG.getConstant(0, VT);
+  case ISD::SETTRUE:
+  case ISD::SETTRUE2:  return DAG.getConstant(1, VT);
+  }
+
+  if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.Val)) {
+    uint64_t C1 = N1C->getValue();
+    if (isa<ConstantSDNode>(N0.Val)) {
+      return DAG.FoldSetCC(VT, N0, N1, Cond);
+    } else {
+      // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
+      // equality comparison, then we're just comparing whether X itself is
+      // zero.
+      if (N0.getOpcode() == ISD::SRL && (C1 == 0 || C1 == 1) &&
+          N0.getOperand(0).getOpcode() == ISD::CTLZ &&
+          N0.getOperand(1).getOpcode() == ISD::Constant) {
+        unsigned ShAmt = cast<ConstantSDNode>(N0.getOperand(1))->getValue();
+        if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+            ShAmt == Log2_32(MVT::getSizeInBits(N0.getValueType()))) {
+          if ((C1 == 0) == (Cond == ISD::SETEQ)) {
+            // (srl (ctlz x), 5) == 0  -> X != 0
+            // (srl (ctlz x), 5) != 1  -> X != 0
+            Cond = ISD::SETNE;
+          } else {
+            // (srl (ctlz x), 5) != 0  -> X == 0
+            // (srl (ctlz x), 5) == 1  -> X == 0
+            Cond = ISD::SETEQ;
+          }
+          SDOperand Zero = DAG.getConstant(0, N0.getValueType());
+          return DAG.getSetCC(VT, N0.getOperand(0).getOperand(0),
+                              Zero, Cond);
+        }
+      }
+      
+      // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
+      if (N0.getOpcode() == ISD::ZERO_EXTEND) {
+        unsigned InSize = MVT::getSizeInBits(N0.getOperand(0).getValueType());
+
+        // If the comparison constant has bits in the upper part, the
+        // zero-extended value could never match.
+        if (C1 & (~0ULL << InSize)) {
+          unsigned VSize = MVT::getSizeInBits(N0.getValueType());
+          switch (Cond) {
+          case ISD::SETUGT:
+          case ISD::SETUGE:
+          case ISD::SETEQ: return DAG.getConstant(0, VT);
+          case ISD::SETULT:
+          case ISD::SETULE:
+          case ISD::SETNE: return DAG.getConstant(1, VT);
+          case ISD::SETGT:
+          case ISD::SETGE:
+            // True if the sign bit of C1 is set.
+            return DAG.getConstant((C1 & (1ULL << (VSize-1))) != 0, VT);
+          case ISD::SETLT:
+          case ISD::SETLE:
+            // True if the sign bit of C1 isn't set.
+            return DAG.getConstant((C1 & (1ULL << (VSize-1))) == 0, VT);
+          default:
+            break;
+          }
+        }
+
+        // Otherwise, we can perform the comparison with the low bits.
+        switch (Cond) {
+        case ISD::SETEQ:
+        case ISD::SETNE:
+        case ISD::SETUGT:
+        case ISD::SETUGE:
+        case ISD::SETULT:
+        case ISD::SETULE:
+          return DAG.getSetCC(VT, N0.getOperand(0),
+                          DAG.getConstant(C1, N0.getOperand(0).getValueType()),
+                          Cond);
+        default:
+          break;   // todo, be more careful with signed comparisons
+        }
+      } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+                 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+        MVT::ValueType ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
+        unsigned ExtSrcTyBits = MVT::getSizeInBits(ExtSrcTy);
+        MVT::ValueType ExtDstTy = N0.getValueType();
+        unsigned ExtDstTyBits = MVT::getSizeInBits(ExtDstTy);
+
+        // If the extended part has any inconsistent bits, it cannot ever
+        // compare equal.  In other words, they have to be all ones or all
+        // zeros.
+        uint64_t ExtBits =
+          (~0ULL >> (64-ExtSrcTyBits)) & (~0ULL << (ExtDstTyBits-1));
+        if ((C1 & ExtBits) != 0 && (C1 & ExtBits) != ExtBits)
+          return DAG.getConstant(Cond == ISD::SETNE, VT);
+        
+        SDOperand ZextOp;
+        MVT::ValueType Op0Ty = N0.getOperand(0).getValueType();
+        if (Op0Ty == ExtSrcTy) {
+          ZextOp = N0.getOperand(0);
+        } else {
+          int64_t Imm = ~0ULL >> (64-ExtSrcTyBits);
+          ZextOp = DAG.getNode(ISD::AND, Op0Ty, N0.getOperand(0),
+                               DAG.getConstant(Imm, Op0Ty));
+        }
+        if (!DCI.isCalledByLegalizer())
+          DCI.AddToWorklist(ZextOp.Val);
+        // Otherwise, make this a use of a zext.
+        return DAG.getSetCC(VT, ZextOp, 
+                            DAG.getConstant(C1 & (~0ULL>>(64-ExtSrcTyBits)), 
+                                            ExtDstTy),
+                            Cond);
+      } else if ((N1C->getValue() == 0 || N1C->getValue() == 1) &&
+                 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+        
+        // SETCC (SETCC), [0|1], [EQ|NE]  -> SETCC
+        if (N0.getOpcode() == ISD::SETCC) {
+          bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getValue() != 1);
+          if (TrueWhenTrue)
+            return N0;
+          
+          // Invert the condition.
+          ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+          CC = ISD::getSetCCInverse(CC, 
+                               MVT::isInteger(N0.getOperand(0).getValueType()));
+          return DAG.getSetCC(VT, N0.getOperand(0), N0.getOperand(1), CC);
+        }
+        
+        if ((N0.getOpcode() == ISD::XOR ||
+             (N0.getOpcode() == ISD::AND && 
+              N0.getOperand(0).getOpcode() == ISD::XOR &&
+              N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
+            isa<ConstantSDNode>(N0.getOperand(1)) &&
+            cast<ConstantSDNode>(N0.getOperand(1))->getValue() == 1) {
+          // If this is (X^1) == 0/1, swap the RHS and eliminate the xor.  We
+          // can only do this if the top bits are known zero.
+          if (DAG.MaskedValueIsZero(N0,
+                                    MVT::getIntVTBitMask(N0.getValueType())-1)){
+            // Okay, get the un-inverted input value.
+            SDOperand Val;
+            if (N0.getOpcode() == ISD::XOR)
+              Val = N0.getOperand(0);
+            else {
+              assert(N0.getOpcode() == ISD::AND && 
+                     N0.getOperand(0).getOpcode() == ISD::XOR);
+              // ((X^1)&1)^1 -> X & 1
+              Val = DAG.getNode(ISD::AND, N0.getValueType(),
+                                N0.getOperand(0).getOperand(0),
+                                N0.getOperand(1));
+            }
+            return DAG.getSetCC(VT, Val, N1,
+                                Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
+          }
+        }
+      }
+      
+      uint64_t MinVal, MaxVal;
+      unsigned OperandBitSize = MVT::getSizeInBits(N1C->getValueType(0));
+      if (ISD::isSignedIntSetCC(Cond)) {
+        MinVal = 1ULL << (OperandBitSize-1);
+        if (OperandBitSize != 1)   // Avoid X >> 64, which is undefined.
+          MaxVal = ~0ULL >> (65-OperandBitSize);
+        else
+          MaxVal = 0;
+      } else {
+        MinVal = 0;
+        MaxVal = ~0ULL >> (64-OperandBitSize);
+      }
+
+      // Canonicalize GE/LE comparisons to use GT/LT comparisons.
+      if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
+        if (C1 == MinVal) return DAG.getConstant(1, VT);   // X >= MIN --> true
+        --C1;                                          // X >= C0 --> X > (C0-1)
+        return DAG.getSetCC(VT, N0, DAG.getConstant(C1, N1.getValueType()),
+                        (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT);
+      }
+
+      if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
+        if (C1 == MaxVal) return DAG.getConstant(1, VT);   // X <= MAX --> true
+        ++C1;                                          // X <= C0 --> X < (C0+1)
+        return DAG.getSetCC(VT, N0, DAG.getConstant(C1, N1.getValueType()),
+                        (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT);
+      }
+
+      if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal)
+        return DAG.getConstant(0, VT);      // X < MIN --> false
+      if ((Cond == ISD::SETGE || Cond == ISD::SETUGE) && C1 == MinVal)
+        return DAG.getConstant(1, VT);      // X >= MIN --> true
+      if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal)
+        return DAG.getConstant(0, VT);      // X > MAX --> false
+      if ((Cond == ISD::SETLE || Cond == ISD::SETULE) && C1 == MaxVal)
+        return DAG.getConstant(1, VT);      // X <= MAX --> true
+
+      // Canonicalize setgt X, Min --> setne X, Min
+      if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MinVal)
+        return DAG.getSetCC(VT, N0, N1, ISD::SETNE);
+      // Canonicalize setlt X, Max --> setne X, Max
+      if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MaxVal)
+        return DAG.getSetCC(VT, N0, N1, ISD::SETNE);
+
+      // If we have setult X, 1, turn it into seteq X, 0
+      if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1)
+        return DAG.getSetCC(VT, N0, DAG.getConstant(MinVal, N0.getValueType()),
+                        ISD::SETEQ);
+      // If we have setugt X, Max-1, turn it into seteq X, Max
+      else if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1)
+        return DAG.getSetCC(VT, N0, DAG.getConstant(MaxVal, N0.getValueType()),
+                        ISD::SETEQ);
+
+      // If we have "setcc X, C0", check to see if we can shrink the immediate
+      // by changing cc.
+
+      // SETUGT X, SINTMAX  -> SETLT X, 0
+      if (Cond == ISD::SETUGT && OperandBitSize != 1 &&
+          C1 == (~0ULL >> (65-OperandBitSize)))
+        return DAG.getSetCC(VT, N0, DAG.getConstant(0, N1.getValueType()),
+                            ISD::SETLT);
+
+      // FIXME: Implement the rest of these.
+
+      // Fold bit comparisons when we can.
+      if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+          VT == N0.getValueType() && N0.getOpcode() == ISD::AND)
+        if (ConstantSDNode *AndRHS =
+                    dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+          if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0  -->  (X & 8) >> 3
+            // Perform the xform if the AND RHS is a single bit.
+            if (isPowerOf2_64(AndRHS->getValue())) {
+              return DAG.getNode(ISD::SRL, VT, N0,
+                             DAG.getConstant(Log2_64(AndRHS->getValue()),
+                                             getShiftAmountTy()));
+            }
+          } else if (Cond == ISD::SETEQ && C1 == AndRHS->getValue()) {
+            // (X & 8) == 8  -->  (X & 8) >> 3
+            // Perform the xform if C1 is a single bit.
+            if (isPowerOf2_64(C1)) {
+              return DAG.getNode(ISD::SRL, VT, N0,
+                          DAG.getConstant(Log2_64(C1), getShiftAmountTy()));
+            }
+          }
+        }
+    }
+  } else if (isa<ConstantSDNode>(N0.Val)) {
+      // Ensure that the constant occurs on the RHS.
+    return DAG.getSetCC(VT, N1, N0, ISD::getSetCCSwappedOperands(Cond));
+  }
+
+  if (isa<ConstantFPSDNode>(N0.Val)) {
+    // Constant fold or commute setcc.
+    SDOperand O = DAG.FoldSetCC(VT, N0, N1, Cond);    
+    if (O.Val) return O;
+  }
+
+  if (N0 == N1) {
+    // We can always fold X == X for integer setcc's.
+    if (MVT::isInteger(N0.getValueType()))
+      return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT);
+    unsigned UOF = ISD::getUnorderedFlavor(Cond);
+    if (UOF == 2)   // FP operators that are undefined on NaNs.
+      return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT);
+    if (UOF == unsigned(ISD::isTrueWhenEqual(Cond)))
+      return DAG.getConstant(UOF, VT);
+    // Otherwise, we can't fold it.  However, we can simplify it to SETUO/SETO
+    // if it is not already.
+    ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
+    if (NewCond != Cond)
+      return DAG.getSetCC(VT, N0, N1, NewCond);
+  }
+
+  if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+      MVT::isInteger(N0.getValueType())) {
+    if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
+        N0.getOpcode() == ISD::XOR) {
+      // Simplify (X+Y) == (X+Z) -->  Y == Z
+      if (N0.getOpcode() == N1.getOpcode()) {
+        if (N0.getOperand(0) == N1.getOperand(0))
+          return DAG.getSetCC(VT, N0.getOperand(1), N1.getOperand(1), Cond);
+        if (N0.getOperand(1) == N1.getOperand(1))
+          return DAG.getSetCC(VT, N0.getOperand(0), N1.getOperand(0), Cond);
+        if (DAG.isCommutativeBinOp(N0.getOpcode())) {
+          // If X op Y == Y op X, try other combinations.
+          if (N0.getOperand(0) == N1.getOperand(1))
+            return DAG.getSetCC(VT, N0.getOperand(1), N1.getOperand(0), Cond);
+          if (N0.getOperand(1) == N1.getOperand(0))
+            return DAG.getSetCC(VT, N0.getOperand(0), N1.getOperand(1), Cond);
+        }
+      }
+      
+      if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N1)) {
+        if (ConstantSDNode *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+          // Turn (X+C1) == C2 --> X == C2-C1
+          if (N0.getOpcode() == ISD::ADD && N0.Val->hasOneUse()) {
+            return DAG.getSetCC(VT, N0.getOperand(0),
+                              DAG.getConstant(RHSC->getValue()-LHSR->getValue(),
+                                N0.getValueType()), Cond);
+          }
+          
+          // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0.
+          if (N0.getOpcode() == ISD::XOR)
+            // If we know that all of the inverted bits are zero, don't bother
+            // performing the inversion.
+            if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getValue()))
+              return DAG.getSetCC(VT, N0.getOperand(0),
+                              DAG.getConstant(LHSR->getValue()^RHSC->getValue(),
+                                              N0.getValueType()), Cond);
+        }
+        
+        // Turn (C1-X) == C2 --> X == C1-C2
+        if (ConstantSDNode *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
+          if (N0.getOpcode() == ISD::SUB && N0.Val->hasOneUse()) {
+            return DAG.getSetCC(VT, N0.getOperand(1),
+                             DAG.getConstant(SUBC->getValue()-RHSC->getValue(),
+                                             N0.getValueType()), Cond);
+          }
+        }          
+      }
+
+      // Simplify (X+Z) == X -->  Z == 0
+      if (N0.getOperand(0) == N1)
+        return DAG.getSetCC(VT, N0.getOperand(1),
+                        DAG.getConstant(0, N0.getValueType()), Cond);
+      if (N0.getOperand(1) == N1) {
+        if (DAG.isCommutativeBinOp(N0.getOpcode()))
+          return DAG.getSetCC(VT, N0.getOperand(0),
+                          DAG.getConstant(0, N0.getValueType()), Cond);
+        else if (N0.Val->hasOneUse()) {
+          assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!");
+          // (Z-X) == X  --> Z == X<<1
+          SDOperand SH = DAG.getNode(ISD::SHL, N1.getValueType(),
+                                     N1, 
+                                     DAG.getConstant(1, getShiftAmountTy()));
+          if (!DCI.isCalledByLegalizer())
+            DCI.AddToWorklist(SH.Val);
+          return DAG.getSetCC(VT, N0.getOperand(0), SH, Cond);
+        }
+      }
+    }
+
+    if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
+        N1.getOpcode() == ISD::XOR) {
+      // Simplify  X == (X+Z) -->  Z == 0
+      if (N1.getOperand(0) == N0) {
+        return DAG.getSetCC(VT, N1.getOperand(1),
+                        DAG.getConstant(0, N1.getValueType()), Cond);
+      } else if (N1.getOperand(1) == N0) {
+        if (DAG.isCommutativeBinOp(N1.getOpcode())) {
+          return DAG.getSetCC(VT, N1.getOperand(0),
+                          DAG.getConstant(0, N1.getValueType()), Cond);
+        } else if (N1.Val->hasOneUse()) {
+          assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!");
+          // X == (Z-X)  --> X<<1 == Z
+          SDOperand SH = DAG.getNode(ISD::SHL, N1.getValueType(), N0, 
+                                     DAG.getConstant(1, getShiftAmountTy()));
+          if (!DCI.isCalledByLegalizer())
+            DCI.AddToWorklist(SH.Val);
+          return DAG.getSetCC(VT, SH, N1.getOperand(0), Cond);
+        }
+      }
+    }
+  }
+
+  // Fold away ALL boolean setcc's.
+  SDOperand Temp;
+  if (N0.getValueType() == MVT::i1 && foldBooleans) {
+    switch (Cond) {
+    default: assert(0 && "Unknown integer setcc!");
+    case ISD::SETEQ:  // X == Y  -> (X^Y)^1
+      Temp = DAG.getNode(ISD::XOR, MVT::i1, N0, N1);
+      N0 = DAG.getNode(ISD::XOR, MVT::i1, Temp, DAG.getConstant(1, MVT::i1));
+      if (!DCI.isCalledByLegalizer())
+        DCI.AddToWorklist(Temp.Val);
+      break;
+    case ISD::SETNE:  // X != Y   -->  (X^Y)
+      N0 = DAG.getNode(ISD::XOR, MVT::i1, N0, N1);
+      break;
+    case ISD::SETGT:  // X >s Y   -->  X == 0 & Y == 1  -->  X^1 & Y
+    case ISD::SETULT: // X <u Y   -->  X == 0 & Y == 1  -->  X^1 & Y
+      Temp = DAG.getNode(ISD::XOR, MVT::i1, N0, DAG.getConstant(1, MVT::i1));
+      N0 = DAG.getNode(ISD::AND, MVT::i1, N1, Temp);
+      if (!DCI.isCalledByLegalizer())
+        DCI.AddToWorklist(Temp.Val);
+      break;
+    case ISD::SETLT:  // X <s Y   --> X == 1 & Y == 0  -->  Y^1 & X
+    case ISD::SETUGT: // X >u Y   --> X == 1 & Y == 0  -->  Y^1 & X
+      Temp = DAG.getNode(ISD::XOR, MVT::i1, N1, DAG.getConstant(1, MVT::i1));
+      N0 = DAG.getNode(ISD::AND, MVT::i1, N0, Temp);
+      if (!DCI.isCalledByLegalizer())
+        DCI.AddToWorklist(Temp.Val);
+      break;
+    case ISD::SETULE: // X <=u Y  --> X == 0 | Y == 1  -->  X^1 | Y
+    case ISD::SETGE:  // X >=s Y  --> X == 0 | Y == 1  -->  X^1 | Y
+      Temp = DAG.getNode(ISD::XOR, MVT::i1, N0, DAG.getConstant(1, MVT::i1));
+      N0 = DAG.getNode(ISD::OR, MVT::i1, N1, Temp);
+      if (!DCI.isCalledByLegalizer())
+        DCI.AddToWorklist(Temp.Val);
+      break;
+    case ISD::SETUGE: // X >=u Y  --> X == 1 | Y == 0  -->  Y^1 | X
+    case ISD::SETLE:  // X <=s Y  --> X == 1 | Y == 0  -->  Y^1 | X
+      Temp = DAG.getNode(ISD::XOR, MVT::i1, N1, DAG.getConstant(1, MVT::i1));
+      N0 = DAG.getNode(ISD::OR, MVT::i1, N0, Temp);
+      break;
+    }
+    if (VT != MVT::i1) {
+      if (!DCI.isCalledByLegalizer())
+        DCI.AddToWorklist(N0.Val);
+      // FIXME: If running after legalize, we probably can't do this.
+      N0 = DAG.getNode(ISD::ZERO_EXTEND, VT, N0);
+    }
+    return N0;
+  }
+
+  // Could not fold it.
+  return SDOperand();
+}
+
+SDOperand TargetLowering::
+PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
+  // Default implementation: no optimization.
+  return SDOperand();
+}
+
+//===----------------------------------------------------------------------===//
+//  Inline Assembler Implementation Methods
+//===----------------------------------------------------------------------===//
+
+TargetLowering::ConstraintType
+TargetLowering::getConstraintType(const std::string &Constraint) const {
+  // FIXME: lots more standard ones to handle.
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+    default: break;
+    case 'r': return C_RegisterClass;
+    case 'm':    // memory
+    case 'o':    // offsetable
+    case 'V':    // not offsetable
+      return C_Memory;
+    case 'i':    // Simple Integer or Relocatable Constant
+    case 'n':    // Simple Integer
+    case 's':    // Relocatable Constant
+    case 'X':    // Allow ANY value.
+    case 'I':    // Target registers.
+    case 'J':
+    case 'K':
+    case 'L':
+    case 'M':
+    case 'N':
+    case 'O':
+    case 'P':
+      return C_Other;
+    }
+  }
+  
+  if (Constraint.size() > 1 && Constraint[0] == '{' && 
+      Constraint[Constraint.size()-1] == '}')
+    return C_Register;
+  return C_Unknown;
+}
+
+/// isOperandValidForConstraint - Return the specified operand (possibly
+/// modified) if the specified SDOperand is valid for the specified target
+/// constraint letter, otherwise return null.
+SDOperand TargetLowering::isOperandValidForConstraint(SDOperand Op,
+                                                      char ConstraintLetter,
+                                                      SelectionDAG &DAG) {
+  switch (ConstraintLetter) {
+  default: break;
+  case 'i':    // Simple Integer or Relocatable Constant
+  case 'n':    // Simple Integer
+  case 's':    // Relocatable Constant
+  case 'X': {  // Allows any operand.
+    // These operands are interested in values of the form (GV+C), where C may
+    // be folded in as an offset of GV, or it may be explicitly added.  Also, it
+    // is possible and fine if either GV or C are missing.
+    ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
+    GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
+    
+    // If we have "(add GV, C)", pull out GV/C
+    if (Op.getOpcode() == ISD::ADD) {
+      C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+      GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0));
+      if (C == 0 || GA == 0) {
+        C = dyn_cast<ConstantSDNode>(Op.getOperand(0));
+        GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(1));
+      }
+      if (C == 0 || GA == 0)
+        C = 0, GA = 0;
+    }
+    
+    // If we find a valid operand, map to the TargetXXX version so that the
+    // value itself doesn't get selected.
+    if (GA) {   // Either &GV   or   &GV+C
+      if (ConstraintLetter != 'n') {
+        int64_t Offs = GA->getOffset();
+        if (C) Offs += C->getValue();
+        return DAG.getTargetGlobalAddress(GA->getGlobal(), Op.getValueType(),
+                                          Offs);
+      }
+    }
+    if (C) {   // just C, no GV.
+      // Simple constants are not allowed for 's'.
+      if (ConstraintLetter != 's')
+        return DAG.getTargetConstant(C->getValue(), Op.getValueType());
+    }
+    break;
+  }
+  }
+  return SDOperand(0,0);
+}
+
+std::vector<unsigned> TargetLowering::
+getRegClassForInlineAsmConstraint(const std::string &Constraint,
+                                  MVT::ValueType VT) const {
+  return std::vector<unsigned>();
+}
+
+
+std::pair<unsigned, const TargetRegisterClass*> TargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint,
+                             MVT::ValueType VT) const {
+  if (Constraint[0] != '{')
+    return std::pair<unsigned, const TargetRegisterClass*>(0, 0);
+  assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?");
+
+  // Remove the braces from around the name.
+  std::string RegName(Constraint.begin()+1, Constraint.end()-1);
+
+  // Figure out which register class contains this reg.
+  const MRegisterInfo *RI = TM.getRegisterInfo();
+  for (MRegisterInfo::regclass_iterator RCI = RI->regclass_begin(),
+       E = RI->regclass_end(); RCI != E; ++RCI) {
+    const TargetRegisterClass *RC = *RCI;
+    
+    // If none of the the value types for this register class are valid, we 
+    // can't use it.  For example, 64-bit reg classes on 32-bit targets.
+    bool isLegal = false;
+    for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
+         I != E; ++I) {
+      if (isTypeLegal(*I)) {
+        isLegal = true;
+        break;
+      }
+    }
+    
+    if (!isLegal) continue;
+    
+    for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); 
+         I != E; ++I) {
+      if (StringsEqualNoCase(RegName, RI->get(*I).Name))
+        return std::make_pair(*I, RC);
+    }
+  }
+  
+  return std::pair<unsigned, const TargetRegisterClass*>(0, 0);
+}
+
+//===----------------------------------------------------------------------===//
+//  Loop Strength Reduction hooks
+//===----------------------------------------------------------------------===//
+
+/// isLegalAddressingMode - Return true if the addressing mode represented
+/// by AM is legal for this target, for a load/store of the specified type.
+bool TargetLowering::isLegalAddressingMode(const AddrMode &AM, 
+                                           const Type *Ty) const {
+  // The default implementation of this implements a conservative RISCy, r+r and
+  // r+i addr mode.
+
+  // Allows a sign-extended 16-bit immediate field.
+  if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
+    return false;
+  
+  // No global is ever allowed as a base.
+  if (AM.BaseGV)
+    return false;
+  
+  // Only support r+r, 
+  switch (AM.Scale) {
+  case 0:  // "r+i" or just "i", depending on HasBaseReg.
+    break;
+  case 1:
+    if (AM.HasBaseReg && AM.BaseOffs)  // "r+r+i" is not allowed.
+      return false;
+    // Otherwise we have r+r or r+i.
+    break;
+  case 2:
+    if (AM.HasBaseReg || AM.BaseOffs)  // 2*r+r  or  2*r+i is not allowed.
+      return false;
+    // Allow 2*r as r+r.
+    break;
+  }
+  
+  return true;
+}
+
+// Magic for divide replacement
+
+struct ms {
+  int64_t m;  // magic number
+  int64_t s;  // shift amount
+};
+
+struct mu {
+  uint64_t m; // magic number
+  int64_t a;  // add indicator
+  int64_t s;  // shift amount
+};
+
+/// magic - calculate the magic numbers required to codegen an integer sdiv as
+/// a sequence of multiply and shifts.  Requires that the divisor not be 0, 1,
+/// or -1.
+static ms magic32(int32_t d) {
+  int32_t p;
+  uint32_t ad, anc, delta, q1, r1, q2, r2, t;
+  const uint32_t two31 = 0x80000000U;
+  struct ms mag;
+  
+  ad = abs(d);
+  t = two31 + ((uint32_t)d >> 31);
+  anc = t - 1 - t%ad;   // absolute value of nc
+  p = 31;               // initialize p
+  q1 = two31/anc;       // initialize q1 = 2p/abs(nc)
+  r1 = two31 - q1*anc;  // initialize r1 = rem(2p,abs(nc))
+  q2 = two31/ad;        // initialize q2 = 2p/abs(d)
+  r2 = two31 - q2*ad;   // initialize r2 = rem(2p,abs(d))
+  do {
+    p = p + 1;
+    q1 = 2*q1;        // update q1 = 2p/abs(nc)
+    r1 = 2*r1;        // update r1 = rem(2p/abs(nc))
+    if (r1 >= anc) {  // must be unsigned comparison
+      q1 = q1 + 1;
+      r1 = r1 - anc;
+    }
+    q2 = 2*q2;        // update q2 = 2p/abs(d)
+    r2 = 2*r2;        // update r2 = rem(2p/abs(d))
+    if (r2 >= ad) {   // must be unsigned comparison
+      q2 = q2 + 1;
+      r2 = r2 - ad;
+    }
+    delta = ad - r2;
+  } while (q1 < delta || (q1 == delta && r1 == 0));
+  
+  mag.m = (int32_t)(q2 + 1); // make sure to sign extend
+  if (d < 0) mag.m = -mag.m; // resulting magic number
+  mag.s = p - 32;            // resulting shift
+  return mag;
+}
+
+/// magicu - calculate the magic numbers required to codegen an integer udiv as
+/// a sequence of multiply, add and shifts.  Requires that the divisor not be 0.
+static mu magicu32(uint32_t d) {
+  int32_t p;
+  uint32_t nc, delta, q1, r1, q2, r2;
+  struct mu magu;
+  magu.a = 0;               // initialize "add" indicator
+  nc = - 1 - (-d)%d;
+  p = 31;                   // initialize p
+  q1 = 0x80000000/nc;       // initialize q1 = 2p/nc
+  r1 = 0x80000000 - q1*nc;  // initialize r1 = rem(2p,nc)
+  q2 = 0x7FFFFFFF/d;        // initialize q2 = (2p-1)/d
+  r2 = 0x7FFFFFFF - q2*d;   // initialize r2 = rem((2p-1),d)
+  do {
+    p = p + 1;
+    if (r1 >= nc - r1 ) {
+      q1 = 2*q1 + 1;  // update q1
+      r1 = 2*r1 - nc; // update r1
+    }
+    else {
+      q1 = 2*q1; // update q1
+      r1 = 2*r1; // update r1
+    }
+    if (r2 + 1 >= d - r2) {
+      if (q2 >= 0x7FFFFFFF) magu.a = 1;
+      q2 = 2*q2 + 1;     // update q2
+      r2 = 2*r2 + 1 - d; // update r2
+    }
+    else {
+      if (q2 >= 0x80000000) magu.a = 1;
+      q2 = 2*q2;     // update q2
+      r2 = 2*r2 + 1; // update r2
+    }
+    delta = d - 1 - r2;
+  } while (p < 64 && (q1 < delta || (q1 == delta && r1 == 0)));
+  magu.m = q2 + 1; // resulting magic number
+  magu.s = p - 32;  // resulting shift
+  return magu;
+}
+
+/// magic - calculate the magic numbers required to codegen an integer sdiv as
+/// a sequence of multiply and shifts.  Requires that the divisor not be 0, 1,
+/// or -1.
+static ms magic64(int64_t d) {
+  int64_t p;
+  uint64_t ad, anc, delta, q1, r1, q2, r2, t;
+  const uint64_t two63 = 9223372036854775808ULL; // 2^63
+  struct ms mag;
+  
+  ad = d >= 0 ? d : -d;
+  t = two63 + ((uint64_t)d >> 63);
+  anc = t - 1 - t%ad;   // absolute value of nc
+  p = 63;               // initialize p
+  q1 = two63/anc;       // initialize q1 = 2p/abs(nc)
+  r1 = two63 - q1*anc;  // initialize r1 = rem(2p,abs(nc))
+  q2 = two63/ad;        // initialize q2 = 2p/abs(d)
+  r2 = two63 - q2*ad;   // initialize r2 = rem(2p,abs(d))
+  do {
+    p = p + 1;
+    q1 = 2*q1;        // update q1 = 2p/abs(nc)
+    r1 = 2*r1;        // update r1 = rem(2p/abs(nc))
+    if (r1 >= anc) {  // must be unsigned comparison
+      q1 = q1 + 1;
+      r1 = r1 - anc;
+    }
+    q2 = 2*q2;        // update q2 = 2p/abs(d)
+    r2 = 2*r2;        // update r2 = rem(2p/abs(d))
+    if (r2 >= ad) {   // must be unsigned comparison
+      q2 = q2 + 1;
+      r2 = r2 - ad;
+    }
+    delta = ad - r2;
+  } while (q1 < delta || (q1 == delta && r1 == 0));
+  
+  mag.m = q2 + 1;
+  if (d < 0) mag.m = -mag.m; // resulting magic number
+  mag.s = p - 64;            // resulting shift
+  return mag;
+}
+
+/// magicu - calculate the magic numbers required to codegen an integer udiv as
+/// a sequence of multiply, add and shifts.  Requires that the divisor not be 0.
+static mu magicu64(uint64_t d)
+{
+  int64_t p;
+  uint64_t nc, delta, q1, r1, q2, r2;
+  struct mu magu;
+  magu.a = 0;               // initialize "add" indicator
+  nc = - 1 - (-d)%d;
+  p = 63;                   // initialize p
+  q1 = 0x8000000000000000ull/nc;       // initialize q1 = 2p/nc
+  r1 = 0x8000000000000000ull - q1*nc;  // initialize r1 = rem(2p,nc)
+  q2 = 0x7FFFFFFFFFFFFFFFull/d;        // initialize q2 = (2p-1)/d
+  r2 = 0x7FFFFFFFFFFFFFFFull - q2*d;   // initialize r2 = rem((2p-1),d)
+  do {
+    p = p + 1;
+    if (r1 >= nc - r1 ) {
+      q1 = 2*q1 + 1;  // update q1
+      r1 = 2*r1 - nc; // update r1
+    }
+    else {
+      q1 = 2*q1; // update q1
+      r1 = 2*r1; // update r1
+    }
+    if (r2 + 1 >= d - r2) {
+      if (q2 >= 0x7FFFFFFFFFFFFFFFull) magu.a = 1;
+      q2 = 2*q2 + 1;     // update q2
+      r2 = 2*r2 + 1 - d; // update r2
+    }
+    else {
+      if (q2 >= 0x8000000000000000ull) magu.a = 1;
+      q2 = 2*q2;     // update q2
+      r2 = 2*r2 + 1; // update r2
+    }
+    delta = d - 1 - r2;
+  } while (p < 128 && (q1 < delta || (q1 == delta && r1 == 0)));
+  magu.m = q2 + 1; // resulting magic number
+  magu.s = p - 64;  // resulting shift
+  return magu;
+}
+
+/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number.  See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDOperand TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, 
+                                    std::vector<SDNode*>* Created) const {
+  MVT::ValueType VT = N->getValueType(0);
+  
+  // Check to see if we can do this.
+  if (!isTypeLegal(VT) || (VT != MVT::i32 && VT != MVT::i64))
+    return SDOperand();       // BuildSDIV only operates on i32 or i64
+  if (!isOperationLegal(ISD::MULHS, VT))
+    return SDOperand();       // Make sure the target supports MULHS.
+  
+  int64_t d = cast<ConstantSDNode>(N->getOperand(1))->getSignExtended();
+  ms magics = (VT == MVT::i32) ? magic32(d) : magic64(d);
+  
+  // Multiply the numerator (operand 0) by the magic value
+  SDOperand Q = DAG.getNode(ISD::MULHS, VT, N->getOperand(0),
+                            DAG.getConstant(magics.m, VT));
+  // If d > 0 and m < 0, add the numerator
+  if (d > 0 && magics.m < 0) { 
+    Q = DAG.getNode(ISD::ADD, VT, Q, N->getOperand(0));
+    if (Created)
+      Created->push_back(Q.Val);
+  }
+  // If d < 0 and m > 0, subtract the numerator.
+  if (d < 0 && magics.m > 0) {
+    Q = DAG.getNode(ISD::SUB, VT, Q, N->getOperand(0));
+    if (Created)
+      Created->push_back(Q.Val);
+  }
+  // Shift right algebraic if shift value is nonzero
+  if (magics.s > 0) {
+    Q = DAG.getNode(ISD::SRA, VT, Q, 
+                    DAG.getConstant(magics.s, getShiftAmountTy()));
+    if (Created)
+      Created->push_back(Q.Val);
+  }
+  // Extract the sign bit and add it to the quotient
+  SDOperand T =
+    DAG.getNode(ISD::SRL, VT, Q, DAG.getConstant(MVT::getSizeInBits(VT)-1,
+                                                 getShiftAmountTy()));
+  if (Created)
+    Created->push_back(T.Val);
+  return DAG.getNode(ISD::ADD, VT, Q, T);
+}
+
+/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number.  See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDOperand TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
+                                    std::vector<SDNode*>* Created) const {
+  MVT::ValueType VT = N->getValueType(0);
+  
+  // Check to see if we can do this.
+  if (!isTypeLegal(VT) || (VT != MVT::i32 && VT != MVT::i64))
+    return SDOperand();       // BuildUDIV only operates on i32 or i64
+  if (!isOperationLegal(ISD::MULHU, VT))
+    return SDOperand();       // Make sure the target supports MULHU.
+  
+  uint64_t d = cast<ConstantSDNode>(N->getOperand(1))->getValue();
+  mu magics = (VT == MVT::i32) ? magicu32(d) : magicu64(d);
+  
+  // Multiply the numerator (operand 0) by the magic value
+  SDOperand Q = DAG.getNode(ISD::MULHU, VT, N->getOperand(0),
+                            DAG.getConstant(magics.m, VT));
+  if (Created)
+    Created->push_back(Q.Val);
+
+  if (magics.a == 0) {
+    return DAG.getNode(ISD::SRL, VT, Q, 
+                       DAG.getConstant(magics.s, getShiftAmountTy()));
+  } else {
+    SDOperand NPQ = DAG.getNode(ISD::SUB, VT, N->getOperand(0), Q);
+    if (Created)
+      Created->push_back(NPQ.Val);
+    NPQ = DAG.getNode(ISD::SRL, VT, NPQ, 
+                      DAG.getConstant(1, getShiftAmountTy()));
+    if (Created)
+      Created->push_back(NPQ.Val);
+    NPQ = DAG.getNode(ISD::ADD, VT, NPQ, Q);
+    if (Created)
+      Created->push_back(NPQ.Val);
+    return DAG.getNode(ISD::SRL, VT, NPQ, 
+                       DAG.getConstant(magics.s-1, getShiftAmountTy()));
+  }
+}
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp
new file mode 100644
index 0000000..3d8618f
--- /dev/null
+++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp
@@ -0,0 +1,1138 @@
+//===-- SimpleRegisterCoalescing.cpp - Register Coalescing ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a simple register coalescing pass that attempts to
+// aggressively coalesce every register copy that it can.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "simpleregistercoalescing"
+#include "llvm/CodeGen/SimpleRegisterCoalescing.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "VirtRegMap.h"
+#include "llvm/Value.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/SSARegMap.h"
+#include "llvm/Target/MRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+#include <cmath>
+using namespace llvm;
+
+STATISTIC(numJoins    , "Number of interval joins performed");
+STATISTIC(numPeep     , "Number of identity moves eliminated after coalescing");
+STATISTIC(numAborts   , "Number of times interval joining aborted");
+
+char SimpleRegisterCoalescing::ID = 0;
+namespace {
+  static cl::opt<bool>
+  EnableJoining("join-liveintervals",
+                cl::desc("Coalesce copies (default=true)"),
+                cl::init(true));
+
+  RegisterPass<SimpleRegisterCoalescing> 
+  X("simple-register-coalescing",
+    "Simple register coalescing to eliminate all possible register copies");
+}
+
+const PassInfo *llvm::SimpleRegisterCoalescingID = X.getPassInfo();
+
+void SimpleRegisterCoalescing::getAnalysisUsage(AnalysisUsage &AU) const {
+   //AU.addPreserved<LiveVariables>();
+  AU.addPreserved<LiveIntervals>();
+  AU.addPreservedID(PHIEliminationID);
+  AU.addPreservedID(TwoAddressInstructionPassID);
+  AU.addRequired<LiveVariables>();
+  AU.addRequired<LiveIntervals>();
+  AU.addRequired<LoopInfo>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy with IntA
+/// being the source and IntB being the dest, thus this defines a value number
+/// in IntB.  If the source value number (in IntA) is defined by a copy from B,
+/// see if we can merge these two pieces of B into a single value number,
+/// eliminating a copy.  For example:
+///
+///  A3 = B0
+///    ...
+///  B1 = A3      <- this copy
+///
+/// In this case, B0 can be extended to where the B1 copy lives, allowing the B1
+/// value number to be replaced with B0 (which simplifies the B liveinterval).
+///
+/// This returns true if an interval was modified.
+///
+bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA, LiveInterval &IntB,
+                                         MachineInstr *CopyMI) {
+  unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI));
+
+  // BValNo is a value number in B that is defined by a copy from A.  'B3' in
+  // the example above.
+  LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx);
+  unsigned BValNo = BLR->ValId;
+  
+  // Get the location that B is defined at.  Two options: either this value has
+  // an unknown definition point or it is defined at CopyIdx.  If unknown, we 
+  // can't process it.
+  unsigned BValNoDefIdx = IntB.getInstForValNum(BValNo);
+  if (BValNoDefIdx == ~0U) return false;
+  assert(BValNoDefIdx == CopyIdx &&
+         "Copy doesn't define the value?");
+  
+  // AValNo is the value number in A that defines the copy, A0 in the example.
+  LiveInterval::iterator AValLR = IntA.FindLiveRangeContaining(CopyIdx-1);
+  unsigned AValNo = AValLR->ValId;
+  
+  // If AValNo is defined as a copy from IntB, we can potentially process this.
+  
+  // Get the instruction that defines this value number.
+  unsigned SrcReg = IntA.getSrcRegForValNum(AValNo);
+  if (!SrcReg) return false;  // Not defined by a copy.
+    
+  // If the value number is not defined by a copy instruction, ignore it.
+    
+  // If the source register comes from an interval other than IntB, we can't
+  // handle this.
+  if (rep(SrcReg) != IntB.reg) return false;
+  
+  // Get the LiveRange in IntB that this value number starts with.
+  unsigned AValNoInstIdx = IntA.getInstForValNum(AValNo);
+  LiveInterval::iterator ValLR = IntB.FindLiveRangeContaining(AValNoInstIdx-1);
+  
+  // Make sure that the end of the live range is inside the same block as
+  // CopyMI.
+  MachineInstr *ValLREndInst = li_->getInstructionFromIndex(ValLR->end-1);
+  if (!ValLREndInst || 
+      ValLREndInst->getParent() != CopyMI->getParent()) return false;
+
+  // Okay, we now know that ValLR ends in the same block that the CopyMI
+  // live-range starts.  If there are no intervening live ranges between them in
+  // IntB, we can merge them.
+  if (ValLR+1 != BLR) return false;
+  
+  DOUT << "\nExtending: "; IntB.print(DOUT, mri_);
+  
+  // We are about to delete CopyMI, so need to remove it as the 'instruction
+  // that defines this value #'.
+  IntB.setValueNumberInfo(BValNo, std::make_pair(~0U, 0));
+  
+  // Okay, we can merge them.  We need to insert a new liverange:
+  // [ValLR.end, BLR.begin) of either value number, then we merge the
+  // two value numbers.
+  unsigned FillerStart = ValLR->end, FillerEnd = BLR->start;
+  IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo));
+
+  // If the IntB live range is assigned to a physical register, and if that
+  // physreg has aliases, 
+  if (MRegisterInfo::isPhysicalRegister(IntB.reg)) {
+    // Update the liveintervals of sub-registers.
+    for (const unsigned *AS = mri_->getSubRegisters(IntB.reg); *AS; ++AS) {
+      LiveInterval &AliasLI = li_->getInterval(*AS);
+      AliasLI.addRange(LiveRange(FillerStart, FillerEnd,
+                                 AliasLI.getNextValue(~0U, 0)));
+    }
+  }
+
+  // Okay, merge "B1" into the same value number as "B0".
+  if (BValNo != ValLR->ValId)
+    IntB.MergeValueNumberInto(BValNo, ValLR->ValId);
+  DOUT << "   result = "; IntB.print(DOUT, mri_);
+  DOUT << "\n";
+
+  // If the source instruction was killing the source register before the
+  // merge, unset the isKill marker given the live range has been extended.
+  int UIdx = ValLREndInst->findRegisterUseOperandIdx(IntB.reg, true);
+  if (UIdx != -1)
+    ValLREndInst->getOperand(UIdx).unsetIsKill();
+  
+  // Finally, delete the copy instruction.
+  li_->RemoveMachineInstrFromMaps(CopyMI);
+  CopyMI->eraseFromParent();
+  ++numPeep;
+  return true;
+}
+
+/// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg,
+/// which are the src/dst of the copy instruction CopyMI.  This returns true
+/// if the copy was successfully coalesced away, or if it is never possible
+/// to coalesce this copy, due to register constraints.  It returns
+/// false if it is not currently possible to coalesce this interval, but
+/// it may be possible if other things get coalesced.
+bool SimpleRegisterCoalescing::JoinCopy(MachineInstr *CopyMI,
+                             unsigned SrcReg, unsigned DstReg, bool PhysOnly) {
+  DOUT << li_->getInstructionIndex(CopyMI) << '\t' << *CopyMI;
+
+  // Get representative registers.
+  unsigned repSrcReg = rep(SrcReg);
+  unsigned repDstReg = rep(DstReg);
+  
+  // If they are already joined we continue.
+  if (repSrcReg == repDstReg) {
+    DOUT << "\tCopy already coalesced.\n";
+    return true;  // Not coalescable.
+  }
+  
+  bool SrcIsPhys = MRegisterInfo::isPhysicalRegister(repSrcReg);
+  bool DstIsPhys = MRegisterInfo::isPhysicalRegister(repDstReg);
+  if (PhysOnly && !SrcIsPhys && !DstIsPhys)
+    // Only joining physical registers with virtual registers in this round.
+    return true;
+
+  // If they are both physical registers, we cannot join them.
+  if (SrcIsPhys && DstIsPhys) {
+    DOUT << "\tCan not coalesce physregs.\n";
+    return true;  // Not coalescable.
+  }
+  
+  // We only join virtual registers with allocatable physical registers.
+  if (SrcIsPhys && !allocatableRegs_[repSrcReg]) {
+    DOUT << "\tSrc reg is unallocatable physreg.\n";
+    return true;  // Not coalescable.
+  }
+  if (DstIsPhys && !allocatableRegs_[repDstReg]) {
+    DOUT << "\tDst reg is unallocatable physreg.\n";
+    return true;  // Not coalescable.
+  }
+  
+  // If they are not of the same register class, we cannot join them.
+  if (differingRegisterClasses(repSrcReg, repDstReg)) {
+    DOUT << "\tSrc/Dest are different register classes.\n";
+    return true;  // Not coalescable.
+  }
+  
+  LiveInterval &SrcInt = li_->getInterval(repSrcReg);
+  LiveInterval &DstInt = li_->getInterval(repDstReg);
+  assert(SrcInt.reg == repSrcReg && DstInt.reg == repDstReg &&
+         "Register mapping is horribly broken!");
+
+  DOUT << "\t\tInspecting "; SrcInt.print(DOUT, mri_);
+  DOUT << " and "; DstInt.print(DOUT, mri_);
+  DOUT << ": ";
+
+  // Check if it is necessary to propagate "isDead" property before intervals
+  // are joined.
+  MachineOperand *mopd = CopyMI->findRegisterDefOperand(DstReg);
+  bool isDead = mopd->isDead();
+  bool isShorten = false;
+  unsigned SrcStart = 0, RemoveStart = 0;
+  unsigned SrcEnd = 0, RemoveEnd = 0;
+  if (isDead) {
+    unsigned CopyIdx = li_->getInstructionIndex(CopyMI);
+    LiveInterval::iterator SrcLR =
+      SrcInt.FindLiveRangeContaining(li_->getUseIndex(CopyIdx));
+    RemoveStart = SrcStart = SrcLR->start;
+    RemoveEnd   = SrcEnd   = SrcLR->end;
+    // The instruction which defines the src is only truly dead if there are
+    // no intermediate uses and there isn't a use beyond the copy.
+    // FIXME: find the last use, mark is kill and shorten the live range.
+    if (SrcEnd > li_->getDefIndex(CopyIdx)) {
+      isDead = false;
+    } else {
+      MachineOperand *MOU;
+      MachineInstr *LastUse= lastRegisterUse(SrcStart, CopyIdx, repSrcReg, MOU);
+      if (LastUse) {
+        // Shorten the liveinterval to the end of last use.
+        MOU->setIsKill();
+        isDead = false;
+        isShorten = true;
+        RemoveStart = li_->getDefIndex(li_->getInstructionIndex(LastUse));
+        RemoveEnd   = SrcEnd;
+      } else {
+        MachineInstr *SrcMI = li_->getInstructionFromIndex(SrcStart);
+        if (SrcMI) {
+          MachineOperand *mops = findDefOperand(SrcMI, repSrcReg);
+          if (mops)
+            // A dead def should have a single cycle interval.
+            ++RemoveStart;
+        }
+      }
+    }
+  }
+
+  // We need to be careful about coalescing a source physical register with a
+  // virtual register. Once the coalescing is done, it cannot be broken and
+  // these are not spillable! If the destination interval uses are far away,
+  // think twice about coalescing them!
+  if (!mopd->isDead() && (SrcIsPhys || DstIsPhys)) {
+    LiveInterval &JoinVInt = SrcIsPhys ? DstInt : SrcInt;
+    unsigned JoinVReg = SrcIsPhys ? repDstReg : repSrcReg;
+    unsigned JoinPReg = SrcIsPhys ? repSrcReg : repDstReg;
+    const TargetRegisterClass *RC = mf_->getSSARegMap()->getRegClass(JoinVReg);
+    unsigned Threshold = allocatableRCRegs_[RC].count();
+
+    // If the virtual register live interval is long has it has low use desity,
+    // do not join them, instead mark the physical register as its allocation
+    // preference.
+    unsigned Length = JoinVInt.getSize() / InstrSlots::NUM;
+    LiveVariables::VarInfo &vi = lv_->getVarInfo(JoinVReg);
+    if (Length > Threshold &&
+        (((float)vi.NumUses / Length) < (1.0 / Threshold))) {
+      JoinVInt.preference = JoinPReg;
+      ++numAborts;
+      DOUT << "\tMay tie down a physical register, abort!\n";
+      return false;
+    }
+  }
+
+  // Okay, attempt to join these two intervals.  On failure, this returns false.
+  // Otherwise, if one of the intervals being joined is a physreg, this method
+  // always canonicalizes DstInt to be it.  The output "SrcInt" will not have
+  // been modified, so we can use this information below to update aliases.
+  if (JoinIntervals(DstInt, SrcInt)) {
+    if (isDead) {
+      // Result of the copy is dead. Propagate this property.
+      if (SrcStart == 0) {
+        assert(MRegisterInfo::isPhysicalRegister(repSrcReg) &&
+               "Live-in must be a physical register!");
+        // Live-in to the function but dead. Remove it from entry live-in set.
+        // JoinIntervals may end up swapping the two intervals.
+        mf_->begin()->removeLiveIn(repSrcReg);
+      } else {
+        MachineInstr *SrcMI = li_->getInstructionFromIndex(SrcStart);
+        if (SrcMI) {
+          MachineOperand *mops = findDefOperand(SrcMI, repSrcReg);
+          if (mops)
+            mops->setIsDead();
+        }
+      }
+    }
+
+    if (isShorten || isDead) {
+      // Shorten the live interval.
+      LiveInterval &LiveInInt = (repSrcReg == DstInt.reg) ? DstInt : SrcInt;
+      LiveInInt.removeRange(RemoveStart, RemoveEnd);
+    }
+  } else {
+    // Coalescing failed.
+    
+    // If we can eliminate the copy without merging the live ranges, do so now.
+    if (AdjustCopiesBackFrom(SrcInt, DstInt, CopyMI))
+      return true;
+
+    // Otherwise, we are unable to join the intervals.
+    DOUT << "Interference!\n";
+    return false;
+  }
+
+  bool Swapped = repSrcReg == DstInt.reg;
+  if (Swapped)
+    std::swap(repSrcReg, repDstReg);
+  assert(MRegisterInfo::isVirtualRegister(repSrcReg) &&
+         "LiveInterval::join didn't work right!");
+                               
+  // If we're about to merge live ranges into a physical register live range,
+  // we have to update any aliased register's live ranges to indicate that they
+  // have clobbered values for this range.
+  if (MRegisterInfo::isPhysicalRegister(repDstReg)) {
+    // Unset unnecessary kills.
+    if (!DstInt.containsOneValue()) {
+      for (LiveInterval::Ranges::const_iterator I = SrcInt.begin(),
+             E = SrcInt.end(); I != E; ++I)
+        unsetRegisterKills(I->start, I->end, repDstReg);
+    }
+
+    // Update the liveintervals of sub-registers.
+    for (const unsigned *AS = mri_->getSubRegisters(repDstReg); *AS; ++AS)
+      li_->getInterval(*AS).MergeInClobberRanges(SrcInt);
+  } else {
+    // Merge use info if the destination is a virtual register.
+    LiveVariables::VarInfo& dVI = lv_->getVarInfo(repDstReg);
+    LiveVariables::VarInfo& sVI = lv_->getVarInfo(repSrcReg);
+    dVI.NumUses += sVI.NumUses;
+  }
+
+  DOUT << "\n\t\tJoined.  Result = "; DstInt.print(DOUT, mri_);
+  DOUT << "\n";
+
+  // Remember these liveintervals have been joined.
+  JoinedLIs.set(repSrcReg - MRegisterInfo::FirstVirtualRegister);
+  if (MRegisterInfo::isVirtualRegister(repDstReg))
+    JoinedLIs.set(repDstReg - MRegisterInfo::FirstVirtualRegister);
+
+  // If the intervals were swapped by Join, swap them back so that the register
+  // mapping (in the r2i map) is correct.
+  if (Swapped) SrcInt.swap(DstInt);
+  li_->removeInterval(repSrcReg);
+  r2rMap_[repSrcReg] = repDstReg;
+
+  // Finally, delete the copy instruction.
+  li_->RemoveMachineInstrFromMaps(CopyMI);
+  CopyMI->eraseFromParent();
+  ++numPeep;
+  ++numJoins;
+  return true;
+}
+
+/// ComputeUltimateVN - Assuming we are going to join two live intervals,
+/// compute what the resultant value numbers for each value in the input two
+/// ranges will be.  This is complicated by copies between the two which can
+/// and will commonly cause multiple value numbers to be merged into one.
+///
+/// VN is the value number that we're trying to resolve.  InstDefiningValue
+/// keeps track of the new InstDefiningValue assignment for the result
+/// LiveInterval.  ThisFromOther/OtherFromThis are sets that keep track of
+/// whether a value in this or other is a copy from the opposite set.
+/// ThisValNoAssignments/OtherValNoAssignments keep track of value #'s that have
+/// already been assigned.
+///
+/// ThisFromOther[x] - If x is defined as a copy from the other interval, this
+/// contains the value number the copy is from.
+///
+static unsigned ComputeUltimateVN(unsigned VN,
+                                  SmallVector<std::pair<unsigned,
+                                                unsigned>, 16> &ValueNumberInfo,
+                                  SmallVector<int, 16> &ThisFromOther,
+                                  SmallVector<int, 16> &OtherFromThis,
+                                  SmallVector<int, 16> &ThisValNoAssignments,
+                                  SmallVector<int, 16> &OtherValNoAssignments,
+                                  LiveInterval &ThisLI, LiveInterval &OtherLI) {
+  // If the VN has already been computed, just return it.
+  if (ThisValNoAssignments[VN] >= 0)
+    return ThisValNoAssignments[VN];
+//  assert(ThisValNoAssignments[VN] != -2 && "Cyclic case?");
+  
+  // If this val is not a copy from the other val, then it must be a new value
+  // number in the destination.
+  int OtherValNo = ThisFromOther[VN];
+  if (OtherValNo == -1) {
+    ValueNumberInfo.push_back(ThisLI.getValNumInfo(VN));
+    return ThisValNoAssignments[VN] = ValueNumberInfo.size()-1;
+  }
+
+  // Otherwise, this *is* a copy from the RHS.  If the other side has already
+  // been computed, return it.
+  if (OtherValNoAssignments[OtherValNo] >= 0)
+    return ThisValNoAssignments[VN] = OtherValNoAssignments[OtherValNo];
+  
+  // Mark this value number as currently being computed, then ask what the
+  // ultimate value # of the other value is.
+  ThisValNoAssignments[VN] = -2;
+  unsigned UltimateVN =
+    ComputeUltimateVN(OtherValNo, ValueNumberInfo,
+                      OtherFromThis, ThisFromOther,
+                      OtherValNoAssignments, ThisValNoAssignments,
+                      OtherLI, ThisLI);
+  return ThisValNoAssignments[VN] = UltimateVN;
+}
+
+static bool InVector(unsigned Val, const SmallVector<unsigned, 8> &V) {
+  return std::find(V.begin(), V.end(), Val) != V.end();
+}
+
+/// SimpleJoin - Attempt to joint the specified interval into this one. The
+/// caller of this method must guarantee that the RHS only contains a single
+/// value number and that the RHS is not defined by a copy from this
+/// interval.  This returns false if the intervals are not joinable, or it
+/// joins them and returns true.
+bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS) {
+  assert(RHS.containsOneValue());
+  
+  // Some number (potentially more than one) value numbers in the current
+  // interval may be defined as copies from the RHS.  Scan the overlapping
+  // portions of the LHS and RHS, keeping track of this and looking for
+  // overlapping live ranges that are NOT defined as copies.  If these exist, we
+  // cannot coalesce.
+  
+  LiveInterval::iterator LHSIt = LHS.begin(), LHSEnd = LHS.end();
+  LiveInterval::iterator RHSIt = RHS.begin(), RHSEnd = RHS.end();
+  
+  if (LHSIt->start < RHSIt->start) {
+    LHSIt = std::upper_bound(LHSIt, LHSEnd, RHSIt->start);
+    if (LHSIt != LHS.begin()) --LHSIt;
+  } else if (RHSIt->start < LHSIt->start) {
+    RHSIt = std::upper_bound(RHSIt, RHSEnd, LHSIt->start);
+    if (RHSIt != RHS.begin()) --RHSIt;
+  }
+  
+  SmallVector<unsigned, 8> EliminatedLHSVals;
+  
+  while (1) {
+    // Determine if these live intervals overlap.
+    bool Overlaps = false;
+    if (LHSIt->start <= RHSIt->start)
+      Overlaps = LHSIt->end > RHSIt->start;
+    else
+      Overlaps = RHSIt->end > LHSIt->start;
+    
+    // If the live intervals overlap, there are two interesting cases: if the
+    // LHS interval is defined by a copy from the RHS, it's ok and we record
+    // that the LHS value # is the same as the RHS.  If it's not, then we cannot
+    // coalesce these live ranges and we bail out.
+    if (Overlaps) {
+      // If we haven't already recorded that this value # is safe, check it.
+      if (!InVector(LHSIt->ValId, EliminatedLHSVals)) {
+        // Copy from the RHS?
+        unsigned SrcReg = LHS.getSrcRegForValNum(LHSIt->ValId);
+        if (rep(SrcReg) != RHS.reg)
+          return false;    // Nope, bail out.
+        
+        EliminatedLHSVals.push_back(LHSIt->ValId);
+      }
+      
+      // We know this entire LHS live range is okay, so skip it now.
+      if (++LHSIt == LHSEnd) break;
+      continue;
+    }
+    
+    if (LHSIt->end < RHSIt->end) {
+      if (++LHSIt == LHSEnd) break;
+    } else {
+      // One interesting case to check here.  It's possible that we have
+      // something like "X3 = Y" which defines a new value number in the LHS,
+      // and is the last use of this liverange of the RHS.  In this case, we
+      // want to notice this copy (so that it gets coalesced away) even though
+      // the live ranges don't actually overlap.
+      if (LHSIt->start == RHSIt->end) {
+        if (InVector(LHSIt->ValId, EliminatedLHSVals)) {
+          // We already know that this value number is going to be merged in
+          // if coalescing succeeds.  Just skip the liverange.
+          if (++LHSIt == LHSEnd) break;
+        } else {
+          // Otherwise, if this is a copy from the RHS, mark it as being merged
+          // in.
+          if (rep(LHS.getSrcRegForValNum(LHSIt->ValId)) == RHS.reg) {
+            EliminatedLHSVals.push_back(LHSIt->ValId);
+
+            // We know this entire LHS live range is okay, so skip it now.
+            if (++LHSIt == LHSEnd) break;
+          }
+        }
+      }
+      
+      if (++RHSIt == RHSEnd) break;
+    }
+  }
+  
+  // If we got here, we know that the coalescing will be successful and that
+  // the value numbers in EliminatedLHSVals will all be merged together.  Since
+  // the most common case is that EliminatedLHSVals has a single number, we
+  // optimize for it: if there is more than one value, we merge them all into
+  // the lowest numbered one, then handle the interval as if we were merging
+  // with one value number.
+  unsigned LHSValNo;
+  if (EliminatedLHSVals.size() > 1) {
+    // Loop through all the equal value numbers merging them into the smallest
+    // one.
+    unsigned Smallest = EliminatedLHSVals[0];
+    for (unsigned i = 1, e = EliminatedLHSVals.size(); i != e; ++i) {
+      if (EliminatedLHSVals[i] < Smallest) {
+        // Merge the current notion of the smallest into the smaller one.
+        LHS.MergeValueNumberInto(Smallest, EliminatedLHSVals[i]);
+        Smallest = EliminatedLHSVals[i];
+      } else {
+        // Merge into the smallest.
+        LHS.MergeValueNumberInto(EliminatedLHSVals[i], Smallest);
+      }
+    }
+    LHSValNo = Smallest;
+  } else {
+    assert(!EliminatedLHSVals.empty() && "No copies from the RHS?");
+    LHSValNo = EliminatedLHSVals[0];
+  }
+  
+  // Okay, now that there is a single LHS value number that we're merging the
+  // RHS into, update the value number info for the LHS to indicate that the
+  // value number is defined where the RHS value number was.
+  LHS.setValueNumberInfo(LHSValNo, RHS.getValNumInfo(0));
+  
+  // Okay, the final step is to loop over the RHS live intervals, adding them to
+  // the LHS.
+  LHS.MergeRangesInAsValue(RHS, LHSValNo);
+  LHS.weight += RHS.weight;
+  if (RHS.preference && !LHS.preference)
+    LHS.preference = RHS.preference;
+  
+  return true;
+}
+
+/// JoinIntervals - Attempt to join these two intervals.  On failure, this
+/// returns false.  Otherwise, if one of the intervals being joined is a
+/// physreg, this method always canonicalizes LHS to be it.  The output
+/// "RHS" will not have been modified, so we can use this information
+/// below to update aliases.
+bool SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS) {
+  // Compute the final value assignment, assuming that the live ranges can be
+  // coalesced.
+  SmallVector<int, 16> LHSValNoAssignments;
+  SmallVector<int, 16> RHSValNoAssignments;
+  SmallVector<std::pair<unsigned,unsigned>, 16> ValueNumberInfo;
+                          
+  // If a live interval is a physical register, conservatively check if any
+  // of its sub-registers is overlapping the live interval of the virtual
+  // register. If so, do not coalesce.
+  if (MRegisterInfo::isPhysicalRegister(LHS.reg) &&
+      *mri_->getSubRegisters(LHS.reg)) {
+    for (const unsigned* SR = mri_->getSubRegisters(LHS.reg); *SR; ++SR)
+      if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) {
+        DOUT << "Interfere with sub-register ";
+        DEBUG(li_->getInterval(*SR).print(DOUT, mri_));
+        return false;
+      }
+  } else if (MRegisterInfo::isPhysicalRegister(RHS.reg) &&
+             *mri_->getSubRegisters(RHS.reg)) {
+    for (const unsigned* SR = mri_->getSubRegisters(RHS.reg); *SR; ++SR)
+      if (li_->hasInterval(*SR) && LHS.overlaps(li_->getInterval(*SR))) {
+        DOUT << "Interfere with sub-register ";
+        DEBUG(li_->getInterval(*SR).print(DOUT, mri_));
+        return false;
+      }
+  }
+                          
+  // Compute ultimate value numbers for the LHS and RHS values.
+  if (RHS.containsOneValue()) {
+    // Copies from a liveinterval with a single value are simple to handle and
+    // very common, handle the special case here.  This is important, because
+    // often RHS is small and LHS is large (e.g. a physreg).
+    
+    // Find out if the RHS is defined as a copy from some value in the LHS.
+    int RHSValID = -1;
+    std::pair<unsigned,unsigned> RHSValNoInfo;
+    unsigned RHSSrcReg = RHS.getSrcRegForValNum(0);
+    if ((RHSSrcReg == 0 || rep(RHSSrcReg) != LHS.reg)) {
+      // If RHS is not defined as a copy from the LHS, we can use simpler and
+      // faster checks to see if the live ranges are coalescable.  This joiner
+      // can't swap the LHS/RHS intervals though.
+      if (!MRegisterInfo::isPhysicalRegister(RHS.reg)) {
+        return SimpleJoin(LHS, RHS);
+      } else {
+        RHSValNoInfo = RHS.getValNumInfo(0);
+      }
+    } else {
+      // It was defined as a copy from the LHS, find out what value # it is.
+      unsigned ValInst = RHS.getInstForValNum(0);
+      RHSValID = LHS.getLiveRangeContaining(ValInst-1)->ValId;
+      RHSValNoInfo = LHS.getValNumInfo(RHSValID);
+    }
+    
+    LHSValNoAssignments.resize(LHS.getNumValNums(), -1);
+    RHSValNoAssignments.resize(RHS.getNumValNums(), -1);
+    ValueNumberInfo.resize(LHS.getNumValNums());
+    
+    // Okay, *all* of the values in LHS that are defined as a copy from RHS
+    // should now get updated.
+    for (unsigned VN = 0, e = LHS.getNumValNums(); VN != e; ++VN) {
+      if (unsigned LHSSrcReg = LHS.getSrcRegForValNum(VN)) {
+        if (rep(LHSSrcReg) != RHS.reg) {
+          // If this is not a copy from the RHS, its value number will be
+          // unmodified by the coalescing.
+          ValueNumberInfo[VN] = LHS.getValNumInfo(VN);
+          LHSValNoAssignments[VN] = VN;
+        } else if (RHSValID == -1) {
+          // Otherwise, it is a copy from the RHS, and we don't already have a
+          // value# for it.  Keep the current value number, but remember it.
+          LHSValNoAssignments[VN] = RHSValID = VN;
+          ValueNumberInfo[VN] = RHSValNoInfo;
+        } else {
+          // Otherwise, use the specified value #.
+          LHSValNoAssignments[VN] = RHSValID;
+          if (VN != (unsigned)RHSValID)
+            ValueNumberInfo[VN].first = ~1U;
+          else
+            ValueNumberInfo[VN] = RHSValNoInfo;
+        }
+      } else {
+        ValueNumberInfo[VN] = LHS.getValNumInfo(VN);
+        LHSValNoAssignments[VN] = VN;
+      }
+    }
+    
+    assert(RHSValID != -1 && "Didn't find value #?");
+    RHSValNoAssignments[0] = RHSValID;
+    
+  } else {
+    // Loop over the value numbers of the LHS, seeing if any are defined from
+    // the RHS.
+    SmallVector<int, 16> LHSValsDefinedFromRHS;
+    LHSValsDefinedFromRHS.resize(LHS.getNumValNums(), -1);
+    for (unsigned VN = 0, e = LHS.getNumValNums(); VN != e; ++VN) {
+      unsigned ValSrcReg = LHS.getSrcRegForValNum(VN);
+      if (ValSrcReg == 0)  // Src not defined by a copy?
+        continue;
+      
+      // DstReg is known to be a register in the LHS interval.  If the src is
+      // from the RHS interval, we can use its value #.
+      if (rep(ValSrcReg) != RHS.reg)
+        continue;
+      
+      // Figure out the value # from the RHS.
+      unsigned ValInst = LHS.getInstForValNum(VN);
+      LHSValsDefinedFromRHS[VN] = RHS.getLiveRangeContaining(ValInst-1)->ValId;
+    }
+    
+    // Loop over the value numbers of the RHS, seeing if any are defined from
+    // the LHS.
+    SmallVector<int, 16> RHSValsDefinedFromLHS;
+    RHSValsDefinedFromLHS.resize(RHS.getNumValNums(), -1);
+    for (unsigned VN = 0, e = RHS.getNumValNums(); VN != e; ++VN) {
+      unsigned ValSrcReg = RHS.getSrcRegForValNum(VN);
+      if (ValSrcReg == 0)  // Src not defined by a copy?
+        continue;
+      
+      // DstReg is known to be a register in the RHS interval.  If the src is
+      // from the LHS interval, we can use its value #.
+      if (rep(ValSrcReg) != LHS.reg)
+        continue;
+      
+      // Figure out the value # from the LHS.
+      unsigned ValInst = RHS.getInstForValNum(VN);
+      RHSValsDefinedFromLHS[VN] = LHS.getLiveRangeContaining(ValInst-1)->ValId;
+    }
+    
+    LHSValNoAssignments.resize(LHS.getNumValNums(), -1);
+    RHSValNoAssignments.resize(RHS.getNumValNums(), -1);
+    ValueNumberInfo.reserve(LHS.getNumValNums() + RHS.getNumValNums());
+    
+    for (unsigned VN = 0, e = LHS.getNumValNums(); VN != e; ++VN) {
+      if (LHSValNoAssignments[VN] >= 0 || LHS.getInstForValNum(VN) == ~2U) 
+        continue;
+      ComputeUltimateVN(VN, ValueNumberInfo,
+                        LHSValsDefinedFromRHS, RHSValsDefinedFromLHS,
+                        LHSValNoAssignments, RHSValNoAssignments, LHS, RHS);
+    }
+    for (unsigned VN = 0, e = RHS.getNumValNums(); VN != e; ++VN) {
+      if (RHSValNoAssignments[VN] >= 0 || RHS.getInstForValNum(VN) == ~2U)
+        continue;
+      // If this value number isn't a copy from the LHS, it's a new number.
+      if (RHSValsDefinedFromLHS[VN] == -1) {
+        ValueNumberInfo.push_back(RHS.getValNumInfo(VN));
+        RHSValNoAssignments[VN] = ValueNumberInfo.size()-1;
+        continue;
+      }
+      
+      ComputeUltimateVN(VN, ValueNumberInfo,
+                        RHSValsDefinedFromLHS, LHSValsDefinedFromRHS,
+                        RHSValNoAssignments, LHSValNoAssignments, RHS, LHS);
+    }
+  }
+  
+  // Armed with the mappings of LHS/RHS values to ultimate values, walk the
+  // interval lists to see if these intervals are coalescable.
+  LiveInterval::const_iterator I = LHS.begin();
+  LiveInterval::const_iterator IE = LHS.end();
+  LiveInterval::const_iterator J = RHS.begin();
+  LiveInterval::const_iterator JE = RHS.end();
+  
+  // Skip ahead until the first place of potential sharing.
+  if (I->start < J->start) {
+    I = std::upper_bound(I, IE, J->start);
+    if (I != LHS.begin()) --I;
+  } else if (J->start < I->start) {
+    J = std::upper_bound(J, JE, I->start);
+    if (J != RHS.begin()) --J;
+  }
+  
+  while (1) {
+    // Determine if these two live ranges overlap.
+    bool Overlaps;
+    if (I->start < J->start) {
+      Overlaps = I->end > J->start;
+    } else {
+      Overlaps = J->end > I->start;
+    }
+
+    // If so, check value # info to determine if they are really different.
+    if (Overlaps) {
+      // If the live range overlap will map to the same value number in the
+      // result liverange, we can still coalesce them.  If not, we can't.
+      if (LHSValNoAssignments[I->ValId] != RHSValNoAssignments[J->ValId])
+        return false;
+    }
+    
+    if (I->end < J->end) {
+      ++I;
+      if (I == IE) break;
+    } else {
+      ++J;
+      if (J == JE) break;
+    }
+  }
+
+  // If we get here, we know that we can coalesce the live ranges.  Ask the
+  // intervals to coalesce themselves now.
+  LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0],
+           ValueNumberInfo);
+  return true;
+}
+
+namespace {
+  // DepthMBBCompare - Comparison predicate that sort first based on the loop
+  // depth of the basic block (the unsigned), and then on the MBB number.
+  struct DepthMBBCompare {
+    typedef std::pair<unsigned, MachineBasicBlock*> DepthMBBPair;
+    bool operator()(const DepthMBBPair &LHS, const DepthMBBPair &RHS) const {
+      if (LHS.first > RHS.first) return true;   // Deeper loops first
+      return LHS.first == RHS.first &&
+        LHS.second->getNumber() < RHS.second->getNumber();
+    }
+  };
+}
+
+void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB,
+                                std::vector<CopyRec> *TryAgain, bool PhysOnly) {
+  DOUT << ((Value*)MBB->getBasicBlock())->getName() << ":\n";
+  
+  for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
+       MII != E;) {
+    MachineInstr *Inst = MII++;
+    
+    // If this isn't a copy, we can't join intervals.
+    unsigned SrcReg, DstReg;
+    if (!tii_->isMoveInstr(*Inst, SrcReg, DstReg)) continue;
+    
+    if (TryAgain && !JoinCopy(Inst, SrcReg, DstReg, PhysOnly))
+      TryAgain->push_back(getCopyRec(Inst, SrcReg, DstReg));
+  }
+}
+
+void SimpleRegisterCoalescing::joinIntervals() {
+  DOUT << "********** JOINING INTERVALS ***********\n";
+
+  JoinedLIs.resize(li_->getNumIntervals());
+  JoinedLIs.reset();
+
+  std::vector<CopyRec> TryAgainList;
+  const LoopInfo &LI = getAnalysis<LoopInfo>();
+  if (LI.begin() == LI.end()) {
+    // If there are no loops in the function, join intervals in function order.
+    for (MachineFunction::iterator I = mf_->begin(), E = mf_->end();
+         I != E; ++I)
+      CopyCoalesceInMBB(I, &TryAgainList);
+  } else {
+    // Otherwise, join intervals in inner loops before other intervals.
+    // Unfortunately we can't just iterate over loop hierarchy here because
+    // there may be more MBB's than BB's.  Collect MBB's for sorting.
+
+    // Join intervals in the function prolog first. We want to join physical
+    // registers with virtual registers before the intervals got too long.
+    std::vector<std::pair<unsigned, MachineBasicBlock*> > MBBs;
+    for (MachineFunction::iterator I = mf_->begin(), E = mf_->end(); I != E;++I)
+      MBBs.push_back(std::make_pair(LI.getLoopDepth(I->getBasicBlock()), I));
+
+    // Sort by loop depth.
+    std::sort(MBBs.begin(), MBBs.end(), DepthMBBCompare());
+
+    // Finally, join intervals in loop nest order.
+    for (unsigned i = 0, e = MBBs.size(); i != e; ++i)
+      CopyCoalesceInMBB(MBBs[i].second, NULL, true);
+    for (unsigned i = 0, e = MBBs.size(); i != e; ++i)
+      CopyCoalesceInMBB(MBBs[i].second, &TryAgainList, false);
+  }
+  
+  // Joining intervals can allow other intervals to be joined.  Iteratively join
+  // until we make no progress.
+  bool ProgressMade = true;
+  while (ProgressMade) {
+    ProgressMade = false;
+
+    for (unsigned i = 0, e = TryAgainList.size(); i != e; ++i) {
+      CopyRec &TheCopy = TryAgainList[i];
+      if (TheCopy.MI &&
+          JoinCopy(TheCopy.MI, TheCopy.SrcReg, TheCopy.DstReg)) {
+        TheCopy.MI = 0;   // Mark this one as done.
+        ProgressMade = true;
+      }
+    }
+  }
+
+  // Some live range has been lengthened due to colaescing, eliminate the
+  // unnecessary kills.
+  int RegNum = JoinedLIs.find_first();
+  while (RegNum != -1) {
+    unsigned Reg = RegNum + MRegisterInfo::FirstVirtualRegister;
+    unsigned repReg = rep(Reg);
+    LiveInterval &LI = li_->getInterval(repReg);
+    LiveVariables::VarInfo& svi = lv_->getVarInfo(Reg);
+    for (unsigned i = 0, e = svi.Kills.size(); i != e; ++i) {
+      MachineInstr *Kill = svi.Kills[i];
+      // Suppose vr1 = op vr2, x
+      // and vr1 and vr2 are coalesced. vr2 should still be marked kill
+      // unless it is a two-address operand.
+      if (li_->isRemoved(Kill) || hasRegisterDef(Kill, repReg))
+        continue;
+      if (LI.liveAt(li_->getInstructionIndex(Kill) + InstrSlots::NUM))
+        unsetRegisterKill(Kill, repReg);
+    }
+    RegNum = JoinedLIs.find_next(RegNum);
+  }
+  
+  DOUT << "*** Register mapping ***\n";
+  for (int i = 0, e = r2rMap_.size(); i != e; ++i)
+    if (r2rMap_[i]) {
+      DOUT << "  reg " << i << " -> ";
+      DEBUG(printRegName(r2rMap_[i]));
+      DOUT << "\n";
+    }
+}
+
+/// Return true if the two specified registers belong to different register
+/// classes.  The registers may be either phys or virt regs.
+bool SimpleRegisterCoalescing::differingRegisterClasses(unsigned RegA,
+                                             unsigned RegB) const {
+
+  // Get the register classes for the first reg.
+  if (MRegisterInfo::isPhysicalRegister(RegA)) {
+    assert(MRegisterInfo::isVirtualRegister(RegB) &&
+           "Shouldn't consider two physregs!");
+    return !mf_->getSSARegMap()->getRegClass(RegB)->contains(RegA);
+  }
+
+  // Compare against the regclass for the second reg.
+  const TargetRegisterClass *RegClass = mf_->getSSARegMap()->getRegClass(RegA);
+  if (MRegisterInfo::isVirtualRegister(RegB))
+    return RegClass != mf_->getSSARegMap()->getRegClass(RegB);
+  else
+    return !RegClass->contains(RegB);
+}
+
+/// lastRegisterUse - Returns the last use of the specific register between
+/// cycles Start and End. It also returns the use operand by reference. It
+/// returns NULL if there are no uses.
+MachineInstr *
+SimpleRegisterCoalescing::lastRegisterUse(unsigned Start, unsigned End, unsigned Reg,
+                               MachineOperand *&MOU) {
+  int e = (End-1) / InstrSlots::NUM * InstrSlots::NUM;
+  int s = Start;
+  while (e >= s) {
+    // Skip deleted instructions
+    MachineInstr *MI = li_->getInstructionFromIndex(e);
+    while ((e - InstrSlots::NUM) >= s && !MI) {
+      e -= InstrSlots::NUM;
+      MI = li_->getInstructionFromIndex(e);
+    }
+    if (e < s || MI == NULL)
+      return NULL;
+
+    for (unsigned i = 0, NumOps = MI->getNumOperands(); i != NumOps; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (MO.isReg() && MO.isUse() && MO.getReg() &&
+          mri_->regsOverlap(rep(MO.getReg()), Reg)) {
+        MOU = &MO;
+        return MI;
+      }
+    }
+
+    e -= InstrSlots::NUM;
+  }
+
+  return NULL;
+}
+
+
+/// findDefOperand - Returns the MachineOperand that is a def of the specific
+/// register. It returns NULL if the def is not found.
+MachineOperand *SimpleRegisterCoalescing::findDefOperand(MachineInstr *MI, unsigned Reg) {
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (MO.isReg() && MO.isDef() &&
+        mri_->regsOverlap(rep(MO.getReg()), Reg))
+      return &MO;
+  }
+  return NULL;
+}
+
+/// unsetRegisterKill - Unset IsKill property of all uses of specific register
+/// of the specific instruction.
+void SimpleRegisterCoalescing::unsetRegisterKill(MachineInstr *MI, unsigned Reg) {
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (MO.isReg() && MO.isUse() && MO.isKill() && MO.getReg() &&
+        mri_->regsOverlap(rep(MO.getReg()), Reg))
+      MO.unsetIsKill();
+  }
+}
+
+/// unsetRegisterKills - Unset IsKill property of all uses of specific register
+/// between cycles Start and End.
+void SimpleRegisterCoalescing::unsetRegisterKills(unsigned Start, unsigned End,
+                                       unsigned Reg) {
+  int e = (End-1) / InstrSlots::NUM * InstrSlots::NUM;
+  int s = Start;
+  while (e >= s) {
+    // Skip deleted instructions
+    MachineInstr *MI = li_->getInstructionFromIndex(e);
+    while ((e - InstrSlots::NUM) >= s && !MI) {
+      e -= InstrSlots::NUM;
+      MI = li_->getInstructionFromIndex(e);
+    }
+    if (e < s || MI == NULL)
+      return;
+
+    for (unsigned i = 0, NumOps = MI->getNumOperands(); i != NumOps; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (MO.isReg() && MO.isUse() && MO.isKill() && MO.getReg() &&
+          mri_->regsOverlap(rep(MO.getReg()), Reg)) {
+        MO.unsetIsKill();
+      }
+    }
+
+    e -= InstrSlots::NUM;
+  }
+}
+
+/// hasRegisterDef - True if the instruction defines the specific register.
+///
+bool SimpleRegisterCoalescing::hasRegisterDef(MachineInstr *MI, unsigned Reg) {
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (MO.isReg() && MO.isDef() &&
+        mri_->regsOverlap(rep(MO.getReg()), Reg))
+      return true;
+  }
+  return false;
+}
+
+void SimpleRegisterCoalescing::printRegName(unsigned reg) const {
+  if (MRegisterInfo::isPhysicalRegister(reg))
+    cerr << mri_->getName(reg);
+  else
+    cerr << "%reg" << reg;
+}
+
+void SimpleRegisterCoalescing::releaseMemory() {
+   r2rMap_.clear();
+   JoinedLIs.clear();
+}
+
+static bool isZeroLengthInterval(LiveInterval *li) {
+  for (LiveInterval::Ranges::const_iterator
+         i = li->ranges.begin(), e = li->ranges.end(); i != e; ++i)
+    if (i->end - i->start > LiveIntervals::InstrSlots::NUM)
+      return false;
+  return true;
+}
+
+bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
+  mf_ = &fn;
+  tm_ = &fn.getTarget();
+  mri_ = tm_->getRegisterInfo();
+  tii_ = tm_->getInstrInfo();
+  li_ = &getAnalysis<LiveIntervals>();
+  lv_ = &getAnalysis<LiveVariables>();
+
+  DOUT << "********** SIMPLE REGISTER COALESCING **********\n"
+       << "********** Function: "
+       << ((Value*)mf_->getFunction())->getName() << '\n';
+
+  allocatableRegs_ = mri_->getAllocatableSet(fn);
+  for (MRegisterInfo::regclass_iterator I = mri_->regclass_begin(),
+         E = mri_->regclass_end(); I != E; ++I)
+    allocatableRCRegs_.insert(std::make_pair(*I,mri_->getAllocatableSet(fn, *I)));
+
+  r2rMap_.grow(mf_->getSSARegMap()->getLastVirtReg());
+
+  // Join (coalesce) intervals if requested.
+  if (EnableJoining) {
+    joinIntervals();
+    DOUT << "********** INTERVALS POST JOINING **********\n";
+    for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); I != E; ++I) {
+      I->second.print(DOUT, mri_);
+      DOUT << "\n";
+    }
+  }
+
+  // perform a final pass over the instructions and compute spill
+  // weights, coalesce virtual registers and remove identity moves.
+  const LoopInfo &loopInfo = getAnalysis<LoopInfo>();
+
+  for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end();
+       mbbi != mbbe; ++mbbi) {
+    MachineBasicBlock* mbb = mbbi;
+    unsigned loopDepth = loopInfo.getLoopDepth(mbb->getBasicBlock());
+
+    for (MachineBasicBlock::iterator mii = mbb->begin(), mie = mbb->end();
+         mii != mie; ) {
+      // if the move will be an identity move delete it
+      unsigned srcReg, dstReg, RegRep;
+      if (tii_->isMoveInstr(*mii, srcReg, dstReg) &&
+          (RegRep = rep(srcReg)) == rep(dstReg)) {
+        // remove from def list
+        LiveInterval &RegInt = li_->getOrCreateInterval(RegRep);
+        MachineOperand *MO = mii->findRegisterDefOperand(dstReg);
+        // If def of this move instruction is dead, remove its live range from
+        // the dstination register's live interval.
+        if (MO->isDead()) {
+          unsigned MoveIdx = li_->getDefIndex(li_->getInstructionIndex(mii));
+          LiveInterval::iterator MLR = RegInt.FindLiveRangeContaining(MoveIdx);
+          RegInt.removeRange(MLR->start, MoveIdx+1);
+          if (RegInt.empty())
+            li_->removeInterval(RegRep);
+        }
+        li_->RemoveMachineInstrFromMaps(mii);
+        mii = mbbi->erase(mii);
+        ++numPeep;
+      } else {
+        SmallSet<unsigned, 4> UniqueUses;
+        for (unsigned i = 0, e = mii->getNumOperands(); i != e; ++i) {
+          const MachineOperand &mop = mii->getOperand(i);
+          if (mop.isRegister() && mop.getReg() &&
+              MRegisterInfo::isVirtualRegister(mop.getReg())) {
+            // replace register with representative register
+            unsigned reg = rep(mop.getReg());
+            mii->getOperand(i).setReg(reg);
+
+            // Multiple uses of reg by the same instruction. It should not
+            // contribute to spill weight again.
+            if (UniqueUses.count(reg) != 0)
+              continue;
+            LiveInterval &RegInt = li_->getInterval(reg);
+            float w = (mop.isUse()+mop.isDef()) * powf(10.0F, (float)loopDepth);
+            // If the definition instruction is re-materializable, its spill
+            // weight is half of what it would have been normally unless it's
+            // a load from fixed stack slot.
+            int Dummy;
+            if (RegInt.remat && !tii_->isLoadFromStackSlot(RegInt.remat, Dummy))
+              w /= 2;
+            RegInt.weight += w;
+            UniqueUses.insert(reg);
+          }
+        }
+        ++mii;
+      }
+    }
+  }
+
+  for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); I != E; ++I) {
+    LiveInterval &LI = I->second;
+    if (MRegisterInfo::isVirtualRegister(LI.reg)) {
+      // If the live interval length is essentially zero, i.e. in every live
+      // range the use follows def immediately, it doesn't make sense to spill
+      // it and hope it will be easier to allocate for this li.
+      if (isZeroLengthInterval(&LI))
+        LI.weight = HUGE_VALF;
+
+      // Slightly prefer live interval that has been assigned a preferred reg.
+      if (LI.preference)
+        LI.weight *= 1.01F;
+
+      // Divide the weight of the interval by its size.  This encourages 
+      // spilling of intervals that are large and have few uses, and
+      // discourages spilling of small intervals with many uses.
+      LI.weight /= LI.getSize();
+    }
+  }
+
+  DEBUG(dump());
+  return true;
+}
+
+/// print - Implement the dump method.
+void SimpleRegisterCoalescing::print(std::ostream &O, const Module* m) const {
+   li_->print(O, m);
+}
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
new file mode 100644
index 0000000..372b1b3
--- /dev/null
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -0,0 +1,228 @@
+//===-- TwoAddressInstructionPass.cpp - Two-Address instruction pass ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TwoAddress instruction pass which is used
+// by most register allocators. Two-Address instructions are rewritten
+// from:
+//
+//     A = B op C
+//
+// to:
+//
+//     A = B
+//     A op= C
+//
+// Note that if a register allocator chooses to use this pass, that it
+// has to be capable of handling the non-SSA nature of these rewritten
+// virtual registers.
+//
+// It is also worth noting that the duplicate operand of the two
+// address instruction is removed.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "twoaddrinstr"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/SSARegMap.h"
+#include "llvm/Target/MRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+STATISTIC(NumTwoAddressInstrs, "Number of two-address instructions");
+STATISTIC(NumCommuted        , "Number of instructions commuted to coalesce");
+STATISTIC(NumConvertedTo3Addr, "Number of instructions promoted to 3-address");
+
+namespace {
+  struct VISIBILITY_HIDDEN TwoAddressInstructionPass
+   : public MachineFunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    TwoAddressInstructionPass() : MachineFunctionPass((intptr_t)&ID) {}
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+    /// runOnMachineFunction - pass entry point
+    bool runOnMachineFunction(MachineFunction&);
+  };
+
+  char TwoAddressInstructionPass::ID = 0;
+  RegisterPass<TwoAddressInstructionPass>
+  X("twoaddressinstruction", "Two-Address instruction pass");
+}
+
+const PassInfo *llvm::TwoAddressInstructionPassID = X.getPassInfo();
+
+void TwoAddressInstructionPass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<LiveVariables>();
+  AU.addPreserved<LiveVariables>();
+  AU.addPreservedID(PHIEliminationID);
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/// runOnMachineFunction - Reduce two-address instructions to two
+/// operands.
+///
+bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
+  DOUT << "Machine Function\n";
+  const TargetMachine &TM = MF.getTarget();
+  const TargetInstrInfo &TII = *TM.getInstrInfo();
+  const MRegisterInfo &MRI = *TM.getRegisterInfo();
+  LiveVariables &LV = getAnalysis<LiveVariables>();
+
+  bool MadeChange = false;
+
+  DOUT << "********** REWRITING TWO-ADDR INSTRS **********\n";
+  DOUT << "********** Function: " << MF.getFunction()->getName() << '\n';
+
+  for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end();
+       mbbi != mbbe; ++mbbi) {
+    for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end();
+         mi != me; ++mi) {
+      const TargetInstrDescriptor *TID = mi->getInstrDescriptor();
+
+      bool FirstTied = true;
+      for (unsigned si = 1, e = TID->numOperands; si < e; ++si) {
+        int ti = TID->getOperandConstraint(si, TOI::TIED_TO);
+        if (ti == -1)
+          continue;
+
+        if (FirstTied) {
+          ++NumTwoAddressInstrs;
+          DOUT << '\t'; DEBUG(mi->print(*cerr.stream(), &TM));
+        }
+        FirstTied = false;
+
+        assert(mi->getOperand(si).isRegister() && mi->getOperand(si).getReg() &&
+               mi->getOperand(si).isUse() && "two address instruction invalid");
+
+        // if the two operands are the same we just remove the use
+        // and mark the def as def&use, otherwise we have to insert a copy.
+        if (mi->getOperand(ti).getReg() != mi->getOperand(si).getReg()) {
+          // rewrite:
+          //     a = b op c
+          // to:
+          //     a = b
+          //     a = a op c
+          unsigned regA = mi->getOperand(ti).getReg();
+          unsigned regB = mi->getOperand(si).getReg();
+
+          assert(MRegisterInfo::isVirtualRegister(regA) &&
+                 MRegisterInfo::isVirtualRegister(regB) &&
+                 "cannot update physical register live information");
+
+#ifndef NDEBUG
+          // First, verify that we don't have a use of a in the instruction (a =
+          // b + a for example) because our transformation will not work. This
+          // should never occur because we are in SSA form.
+          for (unsigned i = 0; i != mi->getNumOperands(); ++i)
+            assert((int)i == ti ||
+                   !mi->getOperand(i).isRegister() ||
+                   mi->getOperand(i).getReg() != regA);
+#endif
+
+          // If this instruction is not the killing user of B, see if we can
+          // rearrange the code to make it so.  Making it the killing user will
+          // allow us to coalesce A and B together, eliminating the copy we are
+          // about to insert.
+          if (!LV.KillsRegister(mi, regB)) {
+            // If this instruction is commutative, check to see if C dies.  If
+            // so, swap the B and C operands.  This makes the live ranges of A
+            // and C joinable.
+            // FIXME: This code also works for A := B op C instructions.
+            if ((TID->Flags & M_COMMUTABLE) && mi->getNumOperands() == 3) {
+              assert(mi->getOperand(3-si).isRegister() &&
+                     "Not a proper commutative instruction!");
+              unsigned regC = mi->getOperand(3-si).getReg();
+              if (LV.KillsRegister(mi, regC)) {
+                DOUT << "2addr: COMMUTING  : " << *mi;
+                MachineInstr *NewMI = TII.commuteInstruction(mi);
+                if (NewMI == 0) {
+                  DOUT << "2addr: COMMUTING FAILED!\n";
+                } else {
+                  DOUT << "2addr: COMMUTED TO: " << *NewMI;
+                  // If the instruction changed to commute it, update livevar.
+                  if (NewMI != mi) {
+                    LV.instructionChanged(mi, NewMI);  // Update live variables
+                    mbbi->insert(mi, NewMI);           // Insert the new inst
+                    mbbi->erase(mi);                   // Nuke the old inst.
+                    mi = NewMI;
+                  }
+
+                  ++NumCommuted;
+                  regB = regC;
+                  goto InstructionRearranged;
+                }
+              }
+            }
+
+            // If this instruction is potentially convertible to a true
+            // three-address instruction,
+            if (TID->Flags & M_CONVERTIBLE_TO_3_ADDR)
+              // FIXME: This assumes there are no more operands which are tied
+              // to another register.
+#ifndef NDEBUG
+              for (unsigned i = si+1, e = TID->numOperands; i < e; ++i)
+                assert(TID->getOperandConstraint(i, TOI::TIED_TO) == -1);
+#endif
+
+              if (MachineInstr *New = TII.convertToThreeAddress(mbbi, mi, LV)) {
+                DOUT << "2addr: CONVERTING 2-ADDR: " << *mi;
+                DOUT << "2addr:         TO 3-ADDR: " << *New;
+                mbbi->erase(mi);                 // Nuke the old inst.
+                mi = New;
+                ++NumConvertedTo3Addr;
+                // Done with this instruction.
+                break;
+              }
+          }
+
+        InstructionRearranged:
+          const TargetRegisterClass* rc = MF.getSSARegMap()->getRegClass(regA);
+          MRI.copyRegToReg(*mbbi, mi, regA, regB, rc);
+
+          MachineBasicBlock::iterator prevMi = prior(mi);
+          DOUT << "\t\tprepend:\t"; DEBUG(prevMi->print(*cerr.stream(), &TM));
+
+          // Update live variables for regA
+          LiveVariables::VarInfo& varInfo = LV.getVarInfo(regA);
+          varInfo.DefInst = prevMi;
+
+          if (LV.removeVirtualRegisterKilled(regB, mbbi, mi))
+            LV.addVirtualRegisterKilled(regB, prevMi);
+
+          if (LV.removeVirtualRegisterDead(regB, mbbi, mi))
+            LV.addVirtualRegisterDead(regB, prevMi);
+
+          // replace all occurences of regB with regA
+          for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
+            if (mi->getOperand(i).isRegister() &&
+                mi->getOperand(i).getReg() == regB)
+              mi->getOperand(i).setReg(regA);
+          }
+        }
+
+        assert(mi->getOperand(ti).isDef() && mi->getOperand(si).isUse());
+        mi->getOperand(ti).setReg(mi->getOperand(si).getReg());
+        MadeChange = true;
+
+        DOUT << "\t\trewrite to:\t"; DEBUG(mi->print(*cerr.stream(), &TM));
+      }
+    }
+  }
+
+  return MadeChange;
+}
diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp
new file mode 100644
index 0000000..0a611ab
--- /dev/null
+++ b/lib/CodeGen/UnreachableBlockElim.cpp
@@ -0,0 +1,81 @@
+//===-- UnreachableBlockElim.cpp - Remove unreachable blocks for codegen --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is an extremely simple version of the SimplifyCFG pass.  Its sole
+// job is to delete LLVM basic blocks that are not reachable from the entry
+// node.  To do this, it performs a simple depth first traversal of the CFG,
+// then deletes any unvisited nodes.
+//
+// Note that this pass is really a hack.  In particular, the instruction
+// selectors for various targets should just not generate code for unreachable
+// blocks.  Until LLVM has a more systematic way of defining instruction
+// selectors, however, we cannot really expect them to handle additional
+// complexity.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Constant.h"
+#include "llvm/Instructions.h"
+#include "llvm/Function.h"
+#include "llvm/Pass.h"
+#include "llvm/Type.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+using namespace llvm;
+
+namespace {
+  class VISIBILITY_HIDDEN UnreachableBlockElim : public FunctionPass {
+    virtual bool runOnFunction(Function &F);
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    UnreachableBlockElim() : FunctionPass((intptr_t)&ID) {}
+  };
+  char UnreachableBlockElim::ID = 0;
+  RegisterPass<UnreachableBlockElim>
+  X("unreachableblockelim", "Remove unreachable blocks from the CFG");
+}
+
+FunctionPass *llvm::createUnreachableBlockEliminationPass() {
+  return new UnreachableBlockElim();
+}
+
+bool UnreachableBlockElim::runOnFunction(Function &F) {
+  std::set<BasicBlock*> Reachable;
+
+  // Mark all reachable blocks.
+  for (df_ext_iterator<Function*> I = df_ext_begin(&F, Reachable),
+         E = df_ext_end(&F, Reachable); I != E; ++I)
+    /* Mark all reachable blocks */;
+
+  // Loop over all dead blocks, remembering them and deleting all instructions
+  // in them.
+  std::vector<BasicBlock*> DeadBlocks;
+  for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+    if (!Reachable.count(I)) {
+      BasicBlock *BB = I;
+      DeadBlocks.push_back(BB);
+      while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
+        PN->replaceAllUsesWith(Constant::getNullValue(PN->getType()));
+        BB->getInstList().pop_front();
+      }
+      for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI)
+        (*SI)->removePredecessor(BB);
+      BB->dropAllReferences();
+    }
+
+  if (DeadBlocks.empty()) return false;
+
+  // Actually remove the blocks now.
+  for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i)
+    F.getBasicBlockList().erase(DeadBlocks[i]);
+
+  return true;
+}
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
new file mode 100644
index 0000000..196e849
--- /dev/null
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -0,0 +1,1118 @@
+//===-- llvm/CodeGen/VirtRegMap.cpp - Virtual Register Map ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the VirtRegMap class.
+//
+// It also contains implementations of the the Spiller interface, which, given a
+// virtual register map and a machine function, eliminates all virtual
+// references by replacing them with physical register references - adding spill
+// code as necessary.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "spiller"
+#include "VirtRegMap.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SSARegMap.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumSpills, "Number of register spills");
+STATISTIC(NumReMats, "Number of re-materialization");
+STATISTIC(NumStores, "Number of stores added");
+STATISTIC(NumLoads , "Number of loads added");
+STATISTIC(NumReused, "Number of values reused");
+STATISTIC(NumDSE   , "Number of dead stores elided");
+STATISTIC(NumDCE   , "Number of copies elided");
+
+namespace {
+  enum SpillerName { simple, local };
+
+  static cl::opt<SpillerName>
+  SpillerOpt("spiller",
+             cl::desc("Spiller to use: (default: local)"),
+             cl::Prefix,
+             cl::values(clEnumVal(simple, "  simple spiller"),
+                        clEnumVal(local,  "  local spiller"),
+                        clEnumValEnd),
+             cl::init(local));
+}
+
+//===----------------------------------------------------------------------===//
+//  VirtRegMap implementation
+//===----------------------------------------------------------------------===//
+
+VirtRegMap::VirtRegMap(MachineFunction &mf)
+  : TII(*mf.getTarget().getInstrInfo()), MF(mf), 
+    Virt2PhysMap(NO_PHYS_REG), Virt2StackSlotMap(NO_STACK_SLOT),
+    ReMatId(MAX_STACK_SLOT+1) {
+  grow();
+}
+
+void VirtRegMap::grow() {
+  Virt2PhysMap.grow(MF.getSSARegMap()->getLastVirtReg());
+  Virt2StackSlotMap.grow(MF.getSSARegMap()->getLastVirtReg());
+}
+
+int VirtRegMap::assignVirt2StackSlot(unsigned virtReg) {
+  assert(MRegisterInfo::isVirtualRegister(virtReg));
+  assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT &&
+         "attempt to assign stack slot to already spilled register");
+  const TargetRegisterClass* RC = MF.getSSARegMap()->getRegClass(virtReg);
+  int frameIndex = MF.getFrameInfo()->CreateStackObject(RC->getSize(),
+                                                        RC->getAlignment());
+  Virt2StackSlotMap[virtReg] = frameIndex;
+  ++NumSpills;
+  return frameIndex;
+}
+
+void VirtRegMap::assignVirt2StackSlot(unsigned virtReg, int frameIndex) {
+  assert(MRegisterInfo::isVirtualRegister(virtReg));
+  assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT &&
+         "attempt to assign stack slot to already spilled register");
+  assert((frameIndex >= 0 ||
+          (frameIndex >= MF.getFrameInfo()->getObjectIndexBegin())) &&
+         "illegal fixed frame index");
+  Virt2StackSlotMap[virtReg] = frameIndex;
+}
+
+int VirtRegMap::assignVirtReMatId(unsigned virtReg) {
+  assert(MRegisterInfo::isVirtualRegister(virtReg));
+  assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT &&
+         "attempt to assign re-mat id to already spilled register");
+  const MachineInstr *DefMI = getReMaterializedMI(virtReg);
+  int FrameIdx;
+  if (TII.isLoadFromStackSlot((MachineInstr*)DefMI, FrameIdx)) {
+    // Load from stack slot is re-materialize as reload from the stack slot!
+    Virt2StackSlotMap[virtReg] = FrameIdx;
+    return FrameIdx;
+  }
+  Virt2StackSlotMap[virtReg] = ReMatId;
+  return ReMatId++;
+}
+
+void VirtRegMap::virtFolded(unsigned VirtReg, MachineInstr *OldMI,
+                            unsigned OpNo, MachineInstr *NewMI) {
+  // Move previous memory references folded to new instruction.
+  MI2VirtMapTy::iterator IP = MI2VirtMap.lower_bound(NewMI);
+  for (MI2VirtMapTy::iterator I = MI2VirtMap.lower_bound(OldMI),
+         E = MI2VirtMap.end(); I != E && I->first == OldMI; ) {
+    MI2VirtMap.insert(IP, std::make_pair(NewMI, I->second));
+    MI2VirtMap.erase(I++);
+  }
+
+  ModRef MRInfo;
+  const TargetInstrDescriptor *TID = OldMI->getInstrDescriptor();
+  if (TID->getOperandConstraint(OpNo, TOI::TIED_TO) != -1 ||
+      TID->findTiedToSrcOperand(OpNo) != -1) {
+    // Folded a two-address operand.
+    MRInfo = isModRef;
+  } else if (OldMI->getOperand(OpNo).isDef()) {
+    MRInfo = isMod;
+  } else {
+    MRInfo = isRef;
+  }
+
+  // add new memory reference
+  MI2VirtMap.insert(IP, std::make_pair(NewMI, std::make_pair(VirtReg, MRInfo)));
+}
+
+void VirtRegMap::print(std::ostream &OS) const {
+  const MRegisterInfo* MRI = MF.getTarget().getRegisterInfo();
+
+  OS << "********** REGISTER MAP **********\n";
+  for (unsigned i = MRegisterInfo::FirstVirtualRegister,
+         e = MF.getSSARegMap()->getLastVirtReg(); i <= e; ++i) {
+    if (Virt2PhysMap[i] != (unsigned)VirtRegMap::NO_PHYS_REG)
+      OS << "[reg" << i << " -> " << MRI->getName(Virt2PhysMap[i]) << "]\n";
+
+  }
+
+  for (unsigned i = MRegisterInfo::FirstVirtualRegister,
+         e = MF.getSSARegMap()->getLastVirtReg(); i <= e; ++i)
+    if (Virt2StackSlotMap[i] != VirtRegMap::NO_STACK_SLOT)
+      OS << "[reg" << i << " -> fi#" << Virt2StackSlotMap[i] << "]\n";
+  OS << '\n';
+}
+
+void VirtRegMap::dump() const {
+  print(DOUT);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Simple Spiller Implementation
+//===----------------------------------------------------------------------===//
+
+Spiller::~Spiller() {}
+
+namespace {
+  struct VISIBILITY_HIDDEN SimpleSpiller : public Spiller {
+    bool runOnMachineFunction(MachineFunction& mf, VirtRegMap &VRM);
+  };
+}
+
+bool SimpleSpiller::runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM) {
+  DOUT << "********** REWRITE MACHINE CODE **********\n";
+  DOUT << "********** Function: " << MF.getFunction()->getName() << '\n';
+  const TargetMachine &TM = MF.getTarget();
+  const MRegisterInfo &MRI = *TM.getRegisterInfo();
+
+  // LoadedRegs - Keep track of which vregs are loaded, so that we only load
+  // each vreg once (in the case where a spilled vreg is used by multiple
+  // operands).  This is always smaller than the number of operands to the
+  // current machine instr, so it should be small.
+  std::vector<unsigned> LoadedRegs;
+
+  for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+       MBBI != E; ++MBBI) {
+    DOUT << MBBI->getBasicBlock()->getName() << ":\n";
+    MachineBasicBlock &MBB = *MBBI;
+    for (MachineBasicBlock::iterator MII = MBB.begin(),
+           E = MBB.end(); MII != E; ++MII) {
+      MachineInstr &MI = *MII;
+      for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+        MachineOperand &MO = MI.getOperand(i);
+        if (MO.isRegister() && MO.getReg())
+          if (MRegisterInfo::isVirtualRegister(MO.getReg())) {
+            unsigned VirtReg = MO.getReg();
+            unsigned PhysReg = VRM.getPhys(VirtReg);
+            if (VRM.hasStackSlot(VirtReg)) {
+              int StackSlot = VRM.getStackSlot(VirtReg);
+              const TargetRegisterClass* RC =
+                MF.getSSARegMap()->getRegClass(VirtReg);
+
+              if (MO.isUse() &&
+                  std::find(LoadedRegs.begin(), LoadedRegs.end(), VirtReg)
+                  == LoadedRegs.end()) {
+                MRI.loadRegFromStackSlot(MBB, &MI, PhysReg, StackSlot, RC);
+                LoadedRegs.push_back(VirtReg);
+                ++NumLoads;
+                DOUT << '\t' << *prior(MII);
+              }
+
+              if (MO.isDef()) {
+                MRI.storeRegToStackSlot(MBB, next(MII), PhysReg, StackSlot, RC);
+                ++NumStores;
+              }
+            }
+            MF.setPhysRegUsed(PhysReg);
+            MI.getOperand(i).setReg(PhysReg);
+          } else {
+            MF.setPhysRegUsed(MO.getReg());
+          }
+      }
+
+      DOUT << '\t' << MI;
+      LoadedRegs.clear();
+    }
+  }
+  return true;
+}
+
+//===----------------------------------------------------------------------===//
+//  Local Spiller Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+  /// LocalSpiller - This spiller does a simple pass over the machine basic
+  /// block to attempt to keep spills in registers as much as possible for
+  /// blocks that have low register pressure (the vreg may be spilled due to
+  /// register pressure in other blocks).
+  class VISIBILITY_HIDDEN LocalSpiller : public Spiller {
+    const MRegisterInfo *MRI;
+    const TargetInstrInfo *TII;
+  public:
+    bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM) {
+      MRI = MF.getTarget().getRegisterInfo();
+      TII = MF.getTarget().getInstrInfo();
+      DOUT << "\n**** Local spiller rewriting function '"
+           << MF.getFunction()->getName() << "':\n";
+
+      std::vector<MachineInstr *> ReMatedMIs;
+      for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
+           MBB != E; ++MBB)
+        RewriteMBB(*MBB, VRM, ReMatedMIs);
+      for (unsigned i = 0, e = ReMatedMIs.size(); i != e; ++i)
+        delete ReMatedMIs[i];
+      return true;
+    }
+  private:
+    void RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM,
+                    std::vector<MachineInstr*> &ReMatedMIs);
+  };
+}
+
+/// AvailableSpills - As the local spiller is scanning and rewriting an MBB from
+/// top down, keep track of which spills slots are available in each register.
+///
+/// Note that not all physregs are created equal here.  In particular, some
+/// physregs are reloads that we are allowed to clobber or ignore at any time.
+/// Other physregs are values that the register allocated program is using that
+/// we cannot CHANGE, but we can read if we like.  We keep track of this on a 
+/// per-stack-slot basis as the low bit in the value of the SpillSlotsAvailable
+/// entries.  The predicate 'canClobberPhysReg()' checks this bit and
+/// addAvailable sets it if.
+namespace {
+class VISIBILITY_HIDDEN AvailableSpills {
+  const MRegisterInfo *MRI;
+  const TargetInstrInfo *TII;
+
+  // SpillSlotsAvailable - This map keeps track of all of the spilled virtual
+  // register values that are still available, due to being loaded or stored to,
+  // but not invalidated yet.
+  std::map<int, unsigned> SpillSlotsAvailable;
+    
+  // PhysRegsAvailable - This is the inverse of SpillSlotsAvailable, indicating
+  // which stack slot values are currently held by a physreg.  This is used to
+  // invalidate entries in SpillSlotsAvailable when a physreg is modified.
+  std::multimap<unsigned, int> PhysRegsAvailable;
+  
+  void disallowClobberPhysRegOnly(unsigned PhysReg);
+
+  void ClobberPhysRegOnly(unsigned PhysReg);
+public:
+  AvailableSpills(const MRegisterInfo *mri, const TargetInstrInfo *tii)
+    : MRI(mri), TII(tii) {
+  }
+  
+  const MRegisterInfo *getRegInfo() const { return MRI; }
+
+  /// getSpillSlotPhysReg - If the specified stack slot is available in a 
+  /// physical register, return that PhysReg, otherwise return 0.
+  unsigned getSpillSlotPhysReg(int Slot) const {
+    std::map<int, unsigned>::const_iterator I = SpillSlotsAvailable.find(Slot);
+    if (I != SpillSlotsAvailable.end()) {
+      return I->second >> 1;  // Remove the CanClobber bit.
+    }
+    return 0;
+  }
+
+  /// addAvailable - Mark that the specified stack slot is available in the
+  /// specified physreg.  If CanClobber is true, the physreg can be modified at
+  /// any time without changing the semantics of the program.
+  void addAvailable(int Slot, MachineInstr *MI, unsigned Reg,
+                    bool CanClobber = true) {
+    // If this stack slot is thought to be available in some other physreg, 
+    // remove its record.
+    ModifyStackSlot(Slot);
+    
+    PhysRegsAvailable.insert(std::make_pair(Reg, Slot));
+    SpillSlotsAvailable[Slot] = (Reg << 1) | (unsigned)CanClobber;
+  
+    if (Slot > VirtRegMap::MAX_STACK_SLOT)
+      DOUT << "Remembering RM#" << Slot-VirtRegMap::MAX_STACK_SLOT-1;
+    else
+      DOUT << "Remembering SS#" << Slot;
+    DOUT << " in physreg " << MRI->getName(Reg) << "\n";
+  }
+
+  /// canClobberPhysReg - Return true if the spiller is allowed to change the 
+  /// value of the specified stackslot register if it desires.  The specified
+  /// stack slot must be available in a physreg for this query to make sense.
+  bool canClobberPhysReg(int Slot) const {
+    assert(SpillSlotsAvailable.count(Slot) && "Slot not available!");
+    return SpillSlotsAvailable.find(Slot)->second & 1;
+  }
+  
+  /// disallowClobberPhysReg - Unset the CanClobber bit of the specified
+  /// stackslot register. The register is still available but is no longer
+  /// allowed to be modifed.
+  void disallowClobberPhysReg(unsigned PhysReg);
+  
+  /// ClobberPhysReg - This is called when the specified physreg changes
+  /// value.  We use this to invalidate any info about stuff we thing lives in
+  /// it and any of its aliases.
+  void ClobberPhysReg(unsigned PhysReg);
+
+  /// ModifyStackSlot - This method is called when the value in a stack slot
+  /// changes.  This removes information about which register the previous value
+  /// for this slot lives in (as the previous value is dead now).
+  void ModifyStackSlot(int Slot);
+};
+}
+
+/// disallowClobberPhysRegOnly - Unset the CanClobber bit of the specified
+/// stackslot register. The register is still available but is no longer
+/// allowed to be modifed.
+void AvailableSpills::disallowClobberPhysRegOnly(unsigned PhysReg) {
+  std::multimap<unsigned, int>::iterator I =
+    PhysRegsAvailable.lower_bound(PhysReg);
+  while (I != PhysRegsAvailable.end() && I->first == PhysReg) {
+    int Slot = I->second;
+    I++;
+    assert((SpillSlotsAvailable[Slot] >> 1) == PhysReg &&
+           "Bidirectional map mismatch!");
+    SpillSlotsAvailable[Slot] &= ~1;
+    DOUT << "PhysReg " << MRI->getName(PhysReg)
+         << " copied, it is available for use but can no longer be modified\n";
+  }
+}
+
+/// disallowClobberPhysReg - Unset the CanClobber bit of the specified
+/// stackslot register and its aliases. The register and its aliases may
+/// still available but is no longer allowed to be modifed.
+void AvailableSpills::disallowClobberPhysReg(unsigned PhysReg) {
+  for (const unsigned *AS = MRI->getAliasSet(PhysReg); *AS; ++AS)
+    disallowClobberPhysRegOnly(*AS);
+  disallowClobberPhysRegOnly(PhysReg);
+}
+
+/// ClobberPhysRegOnly - This is called when the specified physreg changes
+/// value.  We use this to invalidate any info about stuff we thing lives in it.
+void AvailableSpills::ClobberPhysRegOnly(unsigned PhysReg) {
+  std::multimap<unsigned, int>::iterator I =
+    PhysRegsAvailable.lower_bound(PhysReg);
+  while (I != PhysRegsAvailable.end() && I->first == PhysReg) {
+    int Slot = I->second;
+    PhysRegsAvailable.erase(I++);
+    assert((SpillSlotsAvailable[Slot] >> 1) == PhysReg &&
+           "Bidirectional map mismatch!");
+    SpillSlotsAvailable.erase(Slot);
+    DOUT << "PhysReg " << MRI->getName(PhysReg)
+         << " clobbered, invalidating ";
+    if (Slot > VirtRegMap::MAX_STACK_SLOT)
+      DOUT << "RM#" << Slot-VirtRegMap::MAX_STACK_SLOT-1 << "\n";
+    else
+      DOUT << "SS#" << Slot << "\n";
+  }
+}
+
+/// ClobberPhysReg - This is called when the specified physreg changes
+/// value.  We use this to invalidate any info about stuff we thing lives in
+/// it and any of its aliases.
+void AvailableSpills::ClobberPhysReg(unsigned PhysReg) {
+  for (const unsigned *AS = MRI->getAliasSet(PhysReg); *AS; ++AS)
+    ClobberPhysRegOnly(*AS);
+  ClobberPhysRegOnly(PhysReg);
+}
+
+/// ModifyStackSlot - This method is called when the value in a stack slot
+/// changes.  This removes information about which register the previous value
+/// for this slot lives in (as the previous value is dead now).
+void AvailableSpills::ModifyStackSlot(int Slot) {
+  std::map<int, unsigned>::iterator It = SpillSlotsAvailable.find(Slot);
+  if (It == SpillSlotsAvailable.end()) return;
+  unsigned Reg = It->second >> 1;
+  SpillSlotsAvailable.erase(It);
+  
+  // This register may hold the value of multiple stack slots, only remove this
+  // stack slot from the set of values the register contains.
+  std::multimap<unsigned, int>::iterator I = PhysRegsAvailable.lower_bound(Reg);
+  for (; ; ++I) {
+    assert(I != PhysRegsAvailable.end() && I->first == Reg &&
+           "Map inverse broken!");
+    if (I->second == Slot) break;
+  }
+  PhysRegsAvailable.erase(I);
+}
+
+
+
+/// InvalidateKills - MI is going to be deleted. If any of its operands are
+/// marked kill, then invalidate the information.
+static void InvalidateKills(MachineInstr &MI, BitVector &RegKills,
+                           std::vector<MachineOperand*> &KillOps) {
+  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI.getOperand(i);
+    if (!MO.isReg() || !MO.isUse() || !MO.isKill())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (KillOps[Reg] == &MO) {
+      RegKills.reset(Reg);
+      KillOps[Reg] = NULL;
+    }
+  }
+}
+
+/// UpdateKills - Track and update kill info. If a MI reads a register that is
+/// marked kill, then it must be due to register reuse. Transfer the kill info
+/// over.
+static void UpdateKills(MachineInstr &MI, BitVector &RegKills,
+                        std::vector<MachineOperand*> &KillOps) {
+  const TargetInstrDescriptor *TID = MI.getInstrDescriptor();
+  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI.getOperand(i);
+    if (!MO.isReg() || !MO.isUse())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (Reg == 0)
+      continue;
+    
+    if (RegKills[Reg]) {
+      // That can't be right. Register is killed but not re-defined and it's
+      // being reused. Let's fix that.
+      KillOps[Reg]->unsetIsKill();
+      if (i < TID->numOperands &&
+          TID->getOperandConstraint(i, TOI::TIED_TO) == -1)
+        // Unless it's a two-address operand, this is the new kill.
+        MO.setIsKill();
+    }
+
+    if (MO.isKill()) {
+      RegKills.set(Reg);
+      KillOps[Reg] = &MO;
+    }
+  }
+
+  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI.getOperand(i);
+    if (!MO.isReg() || !MO.isDef())
+      continue;
+    unsigned Reg = MO.getReg();
+    RegKills.reset(Reg);
+    KillOps[Reg] = NULL;
+  }
+}
+
+
+// ReusedOp - For each reused operand, we keep track of a bit of information, in
+// case we need to rollback upon processing a new operand.  See comments below.
+namespace {
+  struct ReusedOp {
+    // The MachineInstr operand that reused an available value.
+    unsigned Operand;
+
+    // StackSlot - The spill slot of the value being reused.
+    unsigned StackSlot;
+
+    // PhysRegReused - The physical register the value was available in.
+    unsigned PhysRegReused;
+
+    // AssignedPhysReg - The physreg that was assigned for use by the reload.
+    unsigned AssignedPhysReg;
+    
+    // VirtReg - The virtual register itself.
+    unsigned VirtReg;
+
+    ReusedOp(unsigned o, unsigned ss, unsigned prr, unsigned apr,
+             unsigned vreg)
+      : Operand(o), StackSlot(ss), PhysRegReused(prr), AssignedPhysReg(apr),
+      VirtReg(vreg) {}
+  };
+  
+  /// ReuseInfo - This maintains a collection of ReuseOp's for each operand that
+  /// is reused instead of reloaded.
+  class VISIBILITY_HIDDEN ReuseInfo {
+    MachineInstr &MI;
+    std::vector<ReusedOp> Reuses;
+    BitVector PhysRegsClobbered;
+  public:
+    ReuseInfo(MachineInstr &mi, const MRegisterInfo *mri) : MI(mi) {
+      PhysRegsClobbered.resize(mri->getNumRegs());
+    }
+    
+    bool hasReuses() const {
+      return !Reuses.empty();
+    }
+    
+    /// addReuse - If we choose to reuse a virtual register that is already
+    /// available instead of reloading it, remember that we did so.
+    void addReuse(unsigned OpNo, unsigned StackSlot,
+                  unsigned PhysRegReused, unsigned AssignedPhysReg,
+                  unsigned VirtReg) {
+      // If the reload is to the assigned register anyway, no undo will be
+      // required.
+      if (PhysRegReused == AssignedPhysReg) return;
+      
+      // Otherwise, remember this.
+      Reuses.push_back(ReusedOp(OpNo, StackSlot, PhysRegReused, 
+                                AssignedPhysReg, VirtReg));
+    }
+
+    void markClobbered(unsigned PhysReg) {
+      PhysRegsClobbered.set(PhysReg);
+    }
+
+    bool isClobbered(unsigned PhysReg) const {
+      return PhysRegsClobbered.test(PhysReg);
+    }
+    
+    /// GetRegForReload - We are about to emit a reload into PhysReg.  If there
+    /// is some other operand that is using the specified register, either pick
+    /// a new register to use, or evict the previous reload and use this reg. 
+    unsigned GetRegForReload(unsigned PhysReg, MachineInstr *MI,
+                             AvailableSpills &Spills,
+                             std::map<int, MachineInstr*> &MaybeDeadStores,
+                             SmallSet<unsigned, 8> &Rejected,
+                             BitVector &RegKills,
+                             std::vector<MachineOperand*> &KillOps) {
+      if (Reuses.empty()) return PhysReg;  // This is most often empty.
+
+      for (unsigned ro = 0, e = Reuses.size(); ro != e; ++ro) {
+        ReusedOp &Op = Reuses[ro];
+        // If we find some other reuse that was supposed to use this register
+        // exactly for its reload, we can change this reload to use ITS reload
+        // register. That is, unless its reload register has already been
+        // considered and subsequently rejected because it has also been reused
+        // by another operand.
+        if (Op.PhysRegReused == PhysReg &&
+            Rejected.count(Op.AssignedPhysReg) == 0) {
+          // Yup, use the reload register that we didn't use before.
+          unsigned NewReg = Op.AssignedPhysReg;
+          Rejected.insert(PhysReg);
+          return GetRegForReload(NewReg, MI, Spills, MaybeDeadStores, Rejected,
+                                 RegKills, KillOps);
+        } else {
+          // Otherwise, we might also have a problem if a previously reused
+          // value aliases the new register.  If so, codegen the previous reload
+          // and use this one.          
+          unsigned PRRU = Op.PhysRegReused;
+          const MRegisterInfo *MRI = Spills.getRegInfo();
+          if (MRI->areAliases(PRRU, PhysReg)) {
+            // Okay, we found out that an alias of a reused register
+            // was used.  This isn't good because it means we have
+            // to undo a previous reuse.
+            MachineBasicBlock *MBB = MI->getParent();
+            const TargetRegisterClass *AliasRC =
+              MBB->getParent()->getSSARegMap()->getRegClass(Op.VirtReg);
+
+            // Copy Op out of the vector and remove it, we're going to insert an
+            // explicit load for it.
+            ReusedOp NewOp = Op;
+            Reuses.erase(Reuses.begin()+ro);
+
+            // Ok, we're going to try to reload the assigned physreg into the
+            // slot that we were supposed to in the first place.  However, that
+            // register could hold a reuse.  Check to see if it conflicts or
+            // would prefer us to use a different register.
+            unsigned NewPhysReg = GetRegForReload(NewOp.AssignedPhysReg,
+                                                  MI, Spills, MaybeDeadStores,
+                                                  Rejected, RegKills, KillOps);
+            
+            MRI->loadRegFromStackSlot(*MBB, MI, NewPhysReg,
+                                      NewOp.StackSlot, AliasRC);
+            Spills.ClobberPhysReg(NewPhysReg);
+            Spills.ClobberPhysReg(NewOp.PhysRegReused);
+            
+            // Any stores to this stack slot are not dead anymore.
+            MaybeDeadStores.erase(NewOp.StackSlot);
+            
+            MI->getOperand(NewOp.Operand).setReg(NewPhysReg);
+            
+            Spills.addAvailable(NewOp.StackSlot, MI, NewPhysReg);
+            ++NumLoads;
+            MachineBasicBlock::iterator MII = MI;
+            --MII;
+            UpdateKills(*MII, RegKills, KillOps);
+            DOUT << '\t' << *MII;
+            
+            DOUT << "Reuse undone!\n";
+            --NumReused;
+            
+            // Finally, PhysReg is now available, go ahead and use it.
+            return PhysReg;
+          }
+        }
+      }
+      return PhysReg;
+    }
+
+    /// GetRegForReload - Helper for the above GetRegForReload(). Add a
+    /// 'Rejected' set to remember which registers have been considered and
+    /// rejected for the reload. This avoids infinite looping in case like
+    /// this:
+    /// t1 := op t2, t3
+    /// t2 <- assigned r0 for use by the reload but ended up reuse r1
+    /// t3 <- assigned r1 for use by the reload but ended up reuse r0
+    /// t1 <- desires r1
+    ///       sees r1 is taken by t2, tries t2's reload register r0
+    ///       sees r0 is taken by t3, tries t3's reload register r1
+    ///       sees r1 is taken by t2, tries t2's reload register r0 ...
+    unsigned GetRegForReload(unsigned PhysReg, MachineInstr *MI,
+                             AvailableSpills &Spills,
+                             std::map<int, MachineInstr*> &MaybeDeadStores,
+                             BitVector &RegKills,
+                             std::vector<MachineOperand*> &KillOps) {
+      SmallSet<unsigned, 8> Rejected;
+      return GetRegForReload(PhysReg, MI, Spills, MaybeDeadStores, Rejected,
+                             RegKills, KillOps);
+    }
+  };
+}
+
+
+/// rewriteMBB - Keep track of which spills are available even after the
+/// register allocator is done with them.  If possible, avoid reloading vregs.
+void LocalSpiller::RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM,
+                              std::vector<MachineInstr*> &ReMatedMIs) {
+  DOUT << MBB.getBasicBlock()->getName() << ":\n";
+
+  // Spills - Keep track of which spilled values are available in physregs so
+  // that we can choose to reuse the physregs instead of emitting reloads.
+  AvailableSpills Spills(MRI, TII);
+  
+  // MaybeDeadStores - When we need to write a value back into a stack slot,
+  // keep track of the inserted store.  If the stack slot value is never read
+  // (because the value was used from some available register, for example), and
+  // subsequently stored to, the original store is dead.  This map keeps track
+  // of inserted stores that are not used.  If we see a subsequent store to the
+  // same stack slot, the original store is deleted.
+  std::map<int, MachineInstr*> MaybeDeadStores;
+
+  // Keep track of kill information.
+  BitVector RegKills(MRI->getNumRegs());
+  std::vector<MachineOperand*>  KillOps;
+  KillOps.resize(MRI->getNumRegs(), NULL);
+
+  MachineFunction &MF = *MBB.getParent();
+  for (MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
+       MII != E; ) {
+    MachineInstr &MI = *MII;
+    MachineBasicBlock::iterator NextMII = MII; ++NextMII;
+    VirtRegMap::MI2VirtMapTy::const_iterator I, End;
+
+    bool Erased = false;
+    bool BackTracked = false;
+
+    /// ReusedOperands - Keep track of operand reuse in case we need to undo
+    /// reuse.
+    ReuseInfo ReusedOperands(MI, MRI);
+
+    // Loop over all of the implicit defs, clearing them from our available
+    // sets.
+    const TargetInstrDescriptor *TID = MI.getInstrDescriptor();
+
+    // If this instruction is being rematerialized, just remove it!
+    int FrameIdx;
+    if (TII->isTriviallyReMaterializable(&MI) ||
+        TII->isLoadFromStackSlot(&MI, FrameIdx)) {
+      Erased = true;
+      for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+        MachineOperand &MO = MI.getOperand(i);
+        if (!MO.isRegister() || MO.getReg() == 0)
+          continue;   // Ignore non-register operands.
+        if (MO.isDef() && !VRM.isReMaterialized(MO.getReg())) {
+          Erased = false;
+          break;
+        }
+      }
+      if (Erased) {
+        VRM.RemoveFromFoldedVirtMap(&MI);
+        ReMatedMIs.push_back(MI.removeFromParent());
+        goto ProcessNextInst;
+      }
+    }
+
+    if (TID->ImplicitDefs) {
+      const unsigned *ImpDef = TID->ImplicitDefs;
+      for ( ; *ImpDef; ++ImpDef) {
+        MF.setPhysRegUsed(*ImpDef);
+        ReusedOperands.markClobbered(*ImpDef);
+        Spills.ClobberPhysReg(*ImpDef);
+      }
+    }
+
+    // Process all of the spilled uses and all non spilled reg references.
+    for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI.getOperand(i);
+      if (!MO.isRegister() || MO.getReg() == 0)
+        continue;   // Ignore non-register operands.
+      
+      if (MRegisterInfo::isPhysicalRegister(MO.getReg())) {
+        // Ignore physregs for spilling, but remember that it is used by this
+        // function.
+        MF.setPhysRegUsed(MO.getReg());
+        ReusedOperands.markClobbered(MO.getReg());
+        continue;
+      }
+      
+      assert(MRegisterInfo::isVirtualRegister(MO.getReg()) &&
+             "Not a virtual or a physical register?");
+      
+      unsigned VirtReg = MO.getReg();
+      if (!VRM.hasStackSlot(VirtReg)) {
+        // This virtual register was assigned a physreg!
+        unsigned Phys = VRM.getPhys(VirtReg);
+        MF.setPhysRegUsed(Phys);
+        if (MO.isDef())
+          ReusedOperands.markClobbered(Phys);
+        MI.getOperand(i).setReg(Phys);
+        continue;
+      }
+      
+      // This virtual register is now known to be a spilled value.
+      if (!MO.isUse())
+        continue;  // Handle defs in the loop below (handle use&def here though)
+
+      bool doReMat = VRM.isReMaterialized(VirtReg);
+      int StackSlot = VRM.getStackSlot(VirtReg);
+      unsigned PhysReg;
+
+      // Check to see if this stack slot is available.
+      if ((PhysReg = Spills.getSpillSlotPhysReg(StackSlot))) {
+        // This spilled operand might be part of a two-address operand.  If this
+        // is the case, then changing it will necessarily require changing the 
+        // def part of the instruction as well.  However, in some cases, we
+        // aren't allowed to modify the reused register.  If none of these cases
+        // apply, reuse it.
+        bool CanReuse = true;
+        int ti = TID->getOperandConstraint(i, TOI::TIED_TO);
+        if (ti != -1 &&
+            MI.getOperand(ti).isReg() && 
+            MI.getOperand(ti).getReg() == VirtReg) {
+          // Okay, we have a two address operand.  We can reuse this physreg as
+          // long as we are allowed to clobber the value and there isn't an
+          // earlier def that has already clobbered the physreg.
+          CanReuse = Spills.canClobberPhysReg(StackSlot) &&
+            !ReusedOperands.isClobbered(PhysReg);
+        }
+        
+        if (CanReuse) {
+          // If this stack slot value is already available, reuse it!
+          if (StackSlot > VirtRegMap::MAX_STACK_SLOT)
+            DOUT << "Reusing RM#" << StackSlot-VirtRegMap::MAX_STACK_SLOT-1;
+          else
+            DOUT << "Reusing SS#" << StackSlot;
+          DOUT << " from physreg "
+               << MRI->getName(PhysReg) << " for vreg"
+               << VirtReg <<" instead of reloading into physreg "
+               << MRI->getName(VRM.getPhys(VirtReg)) << "\n";
+          MI.getOperand(i).setReg(PhysReg);
+
+          // The only technical detail we have is that we don't know that
+          // PhysReg won't be clobbered by a reloaded stack slot that occurs
+          // later in the instruction.  In particular, consider 'op V1, V2'.
+          // If V1 is available in physreg R0, we would choose to reuse it
+          // here, instead of reloading it into the register the allocator
+          // indicated (say R1).  However, V2 might have to be reloaded
+          // later, and it might indicate that it needs to live in R0.  When
+          // this occurs, we need to have information available that
+          // indicates it is safe to use R1 for the reload instead of R0.
+          //
+          // To further complicate matters, we might conflict with an alias,
+          // or R0 and R1 might not be compatible with each other.  In this
+          // case, we actually insert a reload for V1 in R1, ensuring that
+          // we can get at R0 or its alias.
+          ReusedOperands.addReuse(i, StackSlot, PhysReg,
+                                  VRM.getPhys(VirtReg), VirtReg);
+          if (ti != -1)
+            // Only mark it clobbered if this is a use&def operand.
+            ReusedOperands.markClobbered(PhysReg);
+          ++NumReused;
+          continue;
+        }
+        
+        // Otherwise we have a situation where we have a two-address instruction
+        // whose mod/ref operand needs to be reloaded.  This reload is already
+        // available in some register "PhysReg", but if we used PhysReg as the
+        // operand to our 2-addr instruction, the instruction would modify
+        // PhysReg.  This isn't cool if something later uses PhysReg and expects
+        // to get its initial value.
+        //
+        // To avoid this problem, and to avoid doing a load right after a store,
+        // we emit a copy from PhysReg into the designated register for this
+        // operand.
+        unsigned DesignatedReg = VRM.getPhys(VirtReg);
+        assert(DesignatedReg && "Must map virtreg to physreg!");
+
+        // Note that, if we reused a register for a previous operand, the
+        // register we want to reload into might not actually be
+        // available.  If this occurs, use the register indicated by the
+        // reuser.
+        if (ReusedOperands.hasReuses())
+          DesignatedReg = ReusedOperands.GetRegForReload(DesignatedReg, &MI, 
+                                    Spills, MaybeDeadStores, RegKills, KillOps);
+        
+        // If the mapped designated register is actually the physreg we have
+        // incoming, we don't need to inserted a dead copy.
+        if (DesignatedReg == PhysReg) {
+          // If this stack slot value is already available, reuse it!
+          if (StackSlot > VirtRegMap::MAX_STACK_SLOT)
+            DOUT << "Reusing RM#" << StackSlot-VirtRegMap::MAX_STACK_SLOT-1;
+          else
+            DOUT << "Reusing SS#" << StackSlot;
+          DOUT << " from physreg " << MRI->getName(PhysReg) << " for vreg"
+               << VirtReg
+               << " instead of reloading into same physreg.\n";
+          MI.getOperand(i).setReg(PhysReg);
+          ReusedOperands.markClobbered(PhysReg);
+          ++NumReused;
+          continue;
+        }
+        
+        const TargetRegisterClass* RC = MF.getSSARegMap()->getRegClass(VirtReg);
+        MF.setPhysRegUsed(DesignatedReg);
+        ReusedOperands.markClobbered(DesignatedReg);
+        MRI->copyRegToReg(MBB, &MI, DesignatedReg, PhysReg, RC);
+
+        MachineInstr *CopyMI = prior(MII);
+        UpdateKills(*CopyMI, RegKills, KillOps);
+
+        // This invalidates DesignatedReg.
+        Spills.ClobberPhysReg(DesignatedReg);
+        
+        Spills.addAvailable(StackSlot, &MI, DesignatedReg);
+        MI.getOperand(i).setReg(DesignatedReg);
+        DOUT << '\t' << *prior(MII);
+        ++NumReused;
+        continue;
+      }
+      
+      // Otherwise, reload it and remember that we have it.
+      PhysReg = VRM.getPhys(VirtReg);
+      assert(PhysReg && "Must map virtreg to physreg!");
+      const TargetRegisterClass* RC = MF.getSSARegMap()->getRegClass(VirtReg);
+
+      // Note that, if we reused a register for a previous operand, the
+      // register we want to reload into might not actually be
+      // available.  If this occurs, use the register indicated by the
+      // reuser.
+      if (ReusedOperands.hasReuses())
+        PhysReg = ReusedOperands.GetRegForReload(PhysReg, &MI, 
+                                    Spills, MaybeDeadStores, RegKills, KillOps);
+      
+      MF.setPhysRegUsed(PhysReg);
+      ReusedOperands.markClobbered(PhysReg);
+      if (doReMat) {
+        MRI->reMaterialize(MBB, &MI, PhysReg, VRM.getReMaterializedMI(VirtReg));
+        ++NumReMats;
+      } else {
+        MRI->loadRegFromStackSlot(MBB, &MI, PhysReg, StackSlot, RC);
+        ++NumLoads;
+      }
+      // This invalidates PhysReg.
+      Spills.ClobberPhysReg(PhysReg);
+
+      // Any stores to this stack slot are not dead anymore.
+      if (!doReMat)
+        MaybeDeadStores.erase(StackSlot);
+      Spills.addAvailable(StackSlot, &MI, PhysReg);
+      // Assumes this is the last use. IsKill will be unset if reg is reused
+      // unless it's a two-address operand.
+      if (TID->getOperandConstraint(i, TOI::TIED_TO) == -1)
+        MI.getOperand(i).setIsKill();
+      MI.getOperand(i).setReg(PhysReg);
+      UpdateKills(*prior(MII), RegKills, KillOps);
+      DOUT << '\t' << *prior(MII);
+    }
+
+    DOUT << '\t' << MI;
+
+    // If we have folded references to memory operands, make sure we clear all
+    // physical registers that may contain the value of the spilled virtual
+    // register
+    for (tie(I, End) = VRM.getFoldedVirts(&MI); I != End; ++I) {
+      DOUT << "Folded vreg: " << I->second.first << "  MR: "
+           << I->second.second;
+      unsigned VirtReg = I->second.first;
+      VirtRegMap::ModRef MR = I->second.second;
+      if (!VRM.hasStackSlot(VirtReg)) {
+        DOUT << ": No stack slot!\n";
+        continue;
+      }
+      int SS = VRM.getStackSlot(VirtReg);
+      DOUT << " - StackSlot: " << SS << "\n";
+      
+      // If this folded instruction is just a use, check to see if it's a
+      // straight load from the virt reg slot.
+      if ((MR & VirtRegMap::isRef) && !(MR & VirtRegMap::isMod)) {
+        int FrameIdx;
+        if (unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx)) {
+          if (FrameIdx == SS) {
+            // If this spill slot is available, turn it into a copy (or nothing)
+            // instead of leaving it as a load!
+            if (unsigned InReg = Spills.getSpillSlotPhysReg(SS)) {
+              DOUT << "Promoted Load To Copy: " << MI;
+              if (DestReg != InReg) {
+                MRI->copyRegToReg(MBB, &MI, DestReg, InReg,
+                                  MF.getSSARegMap()->getRegClass(VirtReg));
+                // Revisit the copy so we make sure to notice the effects of the
+                // operation on the destreg (either needing to RA it if it's 
+                // virtual or needing to clobber any values if it's physical).
+                NextMII = &MI;
+                --NextMII;  // backtrack to the copy.
+                BackTracked = true;
+              } else
+                DOUT << "Removing now-noop copy: " << MI;
+
+              VRM.RemoveFromFoldedVirtMap(&MI);
+              MBB.erase(&MI);
+              Erased = true;
+              goto ProcessNextInst;
+            }
+          }
+        }
+      }
+
+      // If this reference is not a use, any previous store is now dead.
+      // Otherwise, the store to this stack slot is not dead anymore.
+      std::map<int, MachineInstr*>::iterator MDSI = MaybeDeadStores.find(SS);
+      if (MDSI != MaybeDeadStores.end()) {
+        if (MR & VirtRegMap::isRef)   // Previous store is not dead.
+          MaybeDeadStores.erase(MDSI);
+        else {
+          // If we get here, the store is dead, nuke it now.
+          assert(VirtRegMap::isMod && "Can't be modref!");
+          DOUT << "Removed dead store:\t" << *MDSI->second;
+          InvalidateKills(*MDSI->second, RegKills, KillOps);
+          MBB.erase(MDSI->second);
+          VRM.RemoveFromFoldedVirtMap(MDSI->second);
+          MaybeDeadStores.erase(MDSI);
+          ++NumDSE;
+        }
+      }
+
+      // If the spill slot value is available, and this is a new definition of
+      // the value, the value is not available anymore.
+      if (MR & VirtRegMap::isMod) {
+        // Notice that the value in this stack slot has been modified.
+        Spills.ModifyStackSlot(SS);
+        
+        // If this is *just* a mod of the value, check to see if this is just a
+        // store to the spill slot (i.e. the spill got merged into the copy). If
+        // so, realize that the vreg is available now, and add the store to the
+        // MaybeDeadStore info.
+        int StackSlot;
+        if (!(MR & VirtRegMap::isRef)) {
+          if (unsigned SrcReg = TII->isStoreToStackSlot(&MI, StackSlot)) {
+            assert(MRegisterInfo::isPhysicalRegister(SrcReg) &&
+                   "Src hasn't been allocated yet?");
+            // Okay, this is certainly a store of SrcReg to [StackSlot].  Mark
+            // this as a potentially dead store in case there is a subsequent
+            // store into the stack slot without a read from it.
+            MaybeDeadStores[StackSlot] = &MI;
+
+            // If the stack slot value was previously available in some other
+            // register, change it now.  Otherwise, make the register available,
+            // in PhysReg.
+            Spills.addAvailable(StackSlot, &MI, SrcReg, false/*don't clobber*/);
+          }
+        }
+      }
+    }
+
+    // Process all of the spilled defs.
+    for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI.getOperand(i);
+      if (MO.isRegister() && MO.getReg() && MO.isDef()) {
+        unsigned VirtReg = MO.getReg();
+
+        if (!MRegisterInfo::isVirtualRegister(VirtReg)) {
+          // Check to see if this is a noop copy.  If so, eliminate the
+          // instruction before considering the dest reg to be changed.
+          unsigned Src, Dst;
+          if (TII->isMoveInstr(MI, Src, Dst) && Src == Dst) {
+            ++NumDCE;
+            DOUT << "Removing now-noop copy: " << MI;
+            MBB.erase(&MI);
+            Erased = true;
+            VRM.RemoveFromFoldedVirtMap(&MI);
+            Spills.disallowClobberPhysReg(VirtReg);
+            goto ProcessNextInst;
+          }
+          
+          // If it's not a no-op copy, it clobbers the value in the destreg.
+          Spills.ClobberPhysReg(VirtReg);
+          ReusedOperands.markClobbered(VirtReg);
+ 
+          // Check to see if this instruction is a load from a stack slot into
+          // a register.  If so, this provides the stack slot value in the reg.
+          int FrameIdx;
+          if (unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx)) {
+            assert(DestReg == VirtReg && "Unknown load situation!");
+            
+            // Otherwise, if it wasn't available, remember that it is now!
+            Spills.addAvailable(FrameIdx, &MI, DestReg);
+            goto ProcessNextInst;
+          }
+            
+          continue;
+        }
+
+        // The only vregs left are stack slot definitions.
+        int StackSlot = VRM.getStackSlot(VirtReg);
+        const TargetRegisterClass *RC = MF.getSSARegMap()->getRegClass(VirtReg);
+
+        // If this def is part of a two-address operand, make sure to execute
+        // the store from the correct physical register.
+        unsigned PhysReg;
+        int TiedOp = MI.getInstrDescriptor()->findTiedToSrcOperand(i);
+        if (TiedOp != -1)
+          PhysReg = MI.getOperand(TiedOp).getReg();
+        else {
+          PhysReg = VRM.getPhys(VirtReg);
+          if (ReusedOperands.isClobbered(PhysReg)) {
+            // Another def has taken the assigned physreg. It must have been a
+            // use&def which got it due to reuse. Undo the reuse!
+            PhysReg = ReusedOperands.GetRegForReload(PhysReg, &MI, 
+                                    Spills, MaybeDeadStores, RegKills, KillOps);
+          }
+        }
+
+        MF.setPhysRegUsed(PhysReg);
+        ReusedOperands.markClobbered(PhysReg);
+        MRI->storeRegToStackSlot(MBB, next(MII), PhysReg, StackSlot, RC);
+        DOUT << "Store:\t" << *next(MII);
+        MI.getOperand(i).setReg(PhysReg);
+
+        // If there is a dead store to this stack slot, nuke it now.
+        MachineInstr *&LastStore = MaybeDeadStores[StackSlot];
+        if (LastStore) {
+          DOUT << "Removed dead store:\t" << *LastStore;
+          ++NumDSE;
+          InvalidateKills(*LastStore, RegKills, KillOps);
+          MBB.erase(LastStore);
+          VRM.RemoveFromFoldedVirtMap(LastStore);
+        }
+        LastStore = next(MII);
+
+        // If the stack slot value was previously available in some other
+        // register, change it now.  Otherwise, make the register available,
+        // in PhysReg.
+        Spills.ModifyStackSlot(StackSlot);
+        Spills.ClobberPhysReg(PhysReg);
+        Spills.addAvailable(StackSlot, LastStore, PhysReg);
+        ++NumStores;
+
+        // Check to see if this is a noop copy.  If so, eliminate the
+        // instruction before considering the dest reg to be changed.
+        {
+          unsigned Src, Dst;
+          if (TII->isMoveInstr(MI, Src, Dst) && Src == Dst) {
+            ++NumDCE;
+            DOUT << "Removing now-noop copy: " << MI;
+            MBB.erase(&MI);
+            Erased = true;
+            VRM.RemoveFromFoldedVirtMap(&MI);
+            UpdateKills(*LastStore, RegKills, KillOps);
+            goto ProcessNextInst;
+          }
+        }        
+      }
+    }
+  ProcessNextInst:
+    if (!Erased && !BackTracked)
+      for (MachineBasicBlock::iterator II = MI; II != NextMII; ++II)
+        UpdateKills(*II, RegKills, KillOps);
+    MII = NextMII;
+  }
+}
+
+
+llvm::Spiller* llvm::createSpiller() {
+  switch (SpillerOpt) {
+  default: assert(0 && "Unreachable!");
+  case local:
+    return new LocalSpiller();
+  case simple:
+    return new SimpleSpiller();
+  }
+}
diff --git a/lib/CodeGen/VirtRegMap.h b/lib/CodeGen/VirtRegMap.h
new file mode 100644
index 0000000..b7cbe51c
--- /dev/null
+++ b/lib/CodeGen/VirtRegMap.h
@@ -0,0 +1,211 @@
+//===-- llvm/CodeGen/VirtRegMap.h - Virtual Register Map -*- C++ -*--------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a virtual register map. This maps virtual registers to
+// physical registers and virtual registers to stack slots. It is created and
+// updated by a register allocator and then used by a machine code rewriter that
+// adds spill code and rewrites virtual into physical register references.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_VIRTREGMAP_H
+#define LLVM_CODEGEN_VIRTREGMAP_H
+
+#include "llvm/Target/MRegisterInfo.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/Support/Streams.h"
+#include <map>
+
+namespace llvm {
+  class MachineInstr;
+  class TargetInstrInfo;
+
+  class VirtRegMap {
+  public:
+    enum {
+      NO_PHYS_REG = 0,
+      NO_STACK_SLOT = (1L << 30)-1,
+      MAX_STACK_SLOT = (1L << 18)-1
+    };
+
+    enum ModRef { isRef = 1, isMod = 2, isModRef = 3 };
+    typedef std::multimap<MachineInstr*,
+                          std::pair<unsigned, ModRef> > MI2VirtMapTy;
+
+  private:
+    const TargetInstrInfo &TII;
+
+    MachineFunction &MF;
+    /// Virt2PhysMap - This is a virtual to physical register
+    /// mapping. Each virtual register is required to have an entry in
+    /// it; even spilled virtual registers (the register mapped to a
+    /// spilled register is the temporary used to load it from the
+    /// stack).
+    IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2PhysMap;
+    /// Virt2StackSlotMap - This is virtual register to stack slot
+    /// mapping. Each spilled virtual register has an entry in it
+    /// which corresponds to the stack slot this register is spilled
+    /// at.
+    IndexedMap<int, VirtReg2IndexFunctor> Virt2StackSlotMap;
+    /// MI2VirtMap - This is MachineInstr to virtual register
+    /// mapping. In the case of memory spill code being folded into
+    /// instructions, we need to know which virtual register was
+    /// read/written by this instruction.
+    MI2VirtMapTy MI2VirtMap;
+
+    /// ReMatMap - This is virtual register to re-materialized instruction
+    /// mapping. Each virtual register whose definition is going to be
+    /// re-materialized has an entry in it.
+    std::map<unsigned, const MachineInstr*> ReMatMap;
+
+    /// ReMatId - Instead of assigning a stack slot to a to be rematerialized
+    /// virtual register, an unique id is being assigned. This keeps track of
+    /// the highest id used so far. Note, this starts at (1<<18) to avoid
+    /// conflicts with stack slot numbers.
+    int ReMatId;
+
+    VirtRegMap(const VirtRegMap&);     // DO NOT IMPLEMENT
+    void operator=(const VirtRegMap&); // DO NOT IMPLEMENT
+
+  public:
+    VirtRegMap(MachineFunction &mf);
+
+    void grow();
+
+    /// @brief returns true if the specified virtual register is
+    /// mapped to a physical register
+    bool hasPhys(unsigned virtReg) const {
+      return getPhys(virtReg) != NO_PHYS_REG;
+    }
+
+    /// @brief returns the physical register mapped to the specified
+    /// virtual register
+    unsigned getPhys(unsigned virtReg) const {
+      assert(MRegisterInfo::isVirtualRegister(virtReg));
+      return Virt2PhysMap[virtReg];
+    }
+
+    /// @brief creates a mapping for the specified virtual register to
+    /// the specified physical register
+    void assignVirt2Phys(unsigned virtReg, unsigned physReg) {
+      assert(MRegisterInfo::isVirtualRegister(virtReg) &&
+             MRegisterInfo::isPhysicalRegister(physReg));
+      assert(Virt2PhysMap[virtReg] == NO_PHYS_REG &&
+             "attempt to assign physical register to already mapped "
+             "virtual register");
+      Virt2PhysMap[virtReg] = physReg;
+    }
+
+    /// @brief clears the specified virtual register's, physical
+    /// register mapping
+    void clearVirt(unsigned virtReg) {
+      assert(MRegisterInfo::isVirtualRegister(virtReg));
+      assert(Virt2PhysMap[virtReg] != NO_PHYS_REG &&
+             "attempt to clear a not assigned virtual register");
+      Virt2PhysMap[virtReg] = NO_PHYS_REG;
+    }
+
+    /// @brief clears all virtual to physical register mappings
+    void clearAllVirt() {
+      Virt2PhysMap.clear();
+      grow();
+    }
+
+    /// @brief returns true is the specified virtual register is
+    /// mapped to a stack slot
+    bool hasStackSlot(unsigned virtReg) const {
+      return getStackSlot(virtReg) != NO_STACK_SLOT;
+    }
+
+    /// @brief returns the stack slot mapped to the specified virtual
+    /// register
+    int getStackSlot(unsigned virtReg) const {
+      assert(MRegisterInfo::isVirtualRegister(virtReg));
+      return Virt2StackSlotMap[virtReg];
+    }
+
+    /// @brief create a mapping for the specifed virtual register to
+    /// the next available stack slot
+    int assignVirt2StackSlot(unsigned virtReg);
+    /// @brief create a mapping for the specified virtual register to
+    /// the specified stack slot
+    void assignVirt2StackSlot(unsigned virtReg, int frameIndex);
+
+    /// @brief assign an unique re-materialization id to the specified
+    /// virtual register.
+    int assignVirtReMatId(unsigned virtReg);
+
+    /// @brief returns true if the specified virtual register is being
+    /// re-materialized.
+    bool isReMaterialized(unsigned virtReg) const {
+      return ReMatMap.count(virtReg) != 0;
+    }
+
+    /// @brief returns the original machine instruction being re-issued
+    /// to re-materialize the specified virtual register.
+    const MachineInstr *getReMaterializedMI(unsigned virtReg) {
+      return ReMatMap[virtReg];
+    }
+
+    /// @brief records the specified virtual register will be
+    /// re-materialized and the original instruction which will be re-issed
+    /// for this purpose.
+    void setVirtIsReMaterialized(unsigned virtReg, MachineInstr *def) {
+      ReMatMap[virtReg] = def;
+    }
+
+    /// @brief Updates information about the specified virtual register's value
+    /// folded into newMI machine instruction.  The OpNum argument indicates the
+    /// operand number of OldMI that is folded.
+    void virtFolded(unsigned VirtReg, MachineInstr *OldMI, unsigned OpNum,
+                    MachineInstr *NewMI);
+
+    /// @brief returns the virtual registers' values folded in memory
+    /// operands of this instruction
+    std::pair<MI2VirtMapTy::const_iterator, MI2VirtMapTy::const_iterator>
+    getFoldedVirts(MachineInstr* MI) const {
+      return MI2VirtMap.equal_range(MI);
+    }
+    
+    /// RemoveFromFoldedVirtMap - If the specified machine instruction is in
+    /// the folded instruction map, remove its entry from the map.
+    void RemoveFromFoldedVirtMap(MachineInstr *MI) {
+      MI2VirtMap.erase(MI);
+    }
+
+    void print(std::ostream &OS) const;
+    void print(std::ostream *OS) const { if (OS) print(*OS); }
+    void dump() const;
+  };
+
+  inline std::ostream *operator<<(std::ostream *OS, const VirtRegMap &VRM) {
+    VRM.print(OS);
+    return OS;
+  }
+  inline std::ostream &operator<<(std::ostream &OS, const VirtRegMap &VRM) {
+    VRM.print(OS);
+    return OS;
+  }
+
+  /// Spiller interface: Implementations of this interface assign spilled
+  /// virtual registers to stack slots, rewriting the code.
+  struct Spiller {
+    virtual ~Spiller();
+    virtual bool runOnMachineFunction(MachineFunction &MF,
+                                      VirtRegMap &VRM) = 0;
+  };
+
+  /// createSpiller - Create an return a spiller object, as specified on the
+  /// command line.
+  Spiller* createSpiller();
+
+} // End llvm namespace
+
+#endif