diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 1c93477..8bfa574 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -620,7 +620,7 @@
     setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v4f32, Custom);
     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
     setOperationAction(ISD::SELECT,             MVT::v4f32, Custom);
-    setOperationAction(ISD::VSETCC,             MVT::v4f32, Legal);
+    setOperationAction(ISD::VSETCC,             MVT::v4f32, Custom);
   }
 
   if (Subtarget->hasSSE2()) {
@@ -646,11 +646,10 @@
     setOperationAction(ISD::FSQRT,              MVT::v2f64, Legal);
     setOperationAction(ISD::FNEG,               MVT::v2f64, Custom);
 
-    setOperationAction(ISD::VSETCC,             MVT::v2f64, Legal);
-    setOperationAction(ISD::VSETCC,             MVT::v16i8, Legal);
-    setOperationAction(ISD::VSETCC,             MVT::v8i16, Legal);
-    setOperationAction(ISD::VSETCC,             MVT::v4i32, Legal);
-    setOperationAction(ISD::VSETCC,             MVT::v2i64, Legal);
+    setOperationAction(ISD::VSETCC,             MVT::v2f64, Custom);
+    setOperationAction(ISD::VSETCC,             MVT::v16i8, Custom);
+    setOperationAction(ISD::VSETCC,             MVT::v8i16, Custom);
+    setOperationAction(ISD::VSETCC,             MVT::v4i32, Custom);
 
     setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v16i8, Custom);
     setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v8i16, Custom);
@@ -728,6 +727,10 @@
     }
   }
 
+  if (Subtarget->hasSSE42()) {
+    setOperationAction(ISD::VSETCC,             MVT::v2i64, Custom);
+  }
+  
   // We want to custom lower some of our intrinsics.
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
 
@@ -4685,6 +4688,113 @@
   }
 }
 
+SDOperand X86TargetLowering::LowerVSETCC(SDOperand Op, SelectionDAG &DAG) {
+  SDOperand Cond;
+  SDOperand Op0 = Op.getOperand(0);
+  SDOperand Op1 = Op.getOperand(1);
+  SDOperand CC = Op.getOperand(2);
+  MVT VT = Op.getValueType();
+  ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
+  bool isFP = Op.getOperand(1).getValueType().isFloatingPoint();
+
+  if (isFP) {
+    unsigned SSECC = 8;
+    unsigned Opc = Op0.getValueType() == MVT::v4f32 ? X86ISD::CMPPS :
+                                                      X86ISD::CMPPD;
+    bool Swap = false;
+
+    switch (SetCCOpcode) {
+    default: break;
+    case ISD::SETEQ:  SSECC = 0; break;
+    case ISD::SETOGT: 
+    case ISD::SETGT: Swap = true; // Fallthrough
+    case ISD::SETLT:
+    case ISD::SETOLT: SSECC = 1; break;
+    case ISD::SETOGE:
+    case ISD::SETGE: Swap = true; // Fallthrough
+    case ISD::SETLE:
+    case ISD::SETOLE: SSECC = 2; break;
+    case ISD::SETUO:  SSECC = 3; break;
+    case ISD::SETONE:
+    case ISD::SETNE:  SSECC = 4; break;
+    case ISD::SETULE: Swap = true;
+    case ISD::SETUGE: SSECC = 5; break;
+    case ISD::SETULT: Swap = true;
+    case ISD::SETUGT: SSECC = 6; break;
+    case ISD::SETO:   SSECC = 7; break;
+    }
+    if (Swap)
+      std::swap(Op0, Op1);
+
+    // In the one special case we can't handle, emit two comparisons.
+    if (SSECC == 8) {
+      SDOperand UNORD, EQ;
+
+      assert(SetCCOpcode == ISD::SETUEQ && "Illegal FP comparison");
+      
+      UNORD = DAG.getNode(Opc, VT, Op0, Op1, DAG.getConstant(3, MVT::i8));
+      EQ = DAG.getNode(Opc, VT, Op0, Op1, DAG.getConstant(0, MVT::i8));
+      return DAG.getNode(ISD::OR, VT, UNORD, EQ);
+    }
+    // Handle all other FP comparisons here.
+    return DAG.getNode(Opc, VT, Op0, Op1, DAG.getConstant(SSECC, MVT::i8));
+  }
+  
+  // We are handling one of the integer comparisons here.  Since SSE only has
+  // GT and EQ comparisons for integer, swapping operands and multiple
+  // operations may be required for some comparisons.
+  unsigned Opc = 0, EQOpc = 0, GTOpc = 0;
+  bool Swap = false, Invert = false, FlipSigns = false;
+  
+  switch (VT.getSimpleVT()) {
+  default: break;
+  case MVT::v16i8: EQOpc = X86ISD::PCMPEQB; GTOpc = X86ISD::PCMPGTB; break;
+  case MVT::v8i16: EQOpc = X86ISD::PCMPEQW; GTOpc = X86ISD::PCMPGTW; break;
+  case MVT::v4i32: EQOpc = X86ISD::PCMPEQD; GTOpc = X86ISD::PCMPGTD; break;
+  case MVT::v2i64: EQOpc = X86ISD::PCMPEQQ; GTOpc = X86ISD::PCMPGTQ; break;
+  }
+  
+  switch (SetCCOpcode) {
+  default: break;
+  case ISD::SETNE:  Invert = true;
+  case ISD::SETEQ:  Opc = EQOpc; break;
+  case ISD::SETLT:  Swap = true;
+  case ISD::SETGT:  Opc = GTOpc; break;
+  case ISD::SETGE:  Swap = true;
+  case ISD::SETLE:  Opc = GTOpc; Invert = true; break;
+  case ISD::SETULT: Swap = true;
+  case ISD::SETUGT: Opc = GTOpc; FlipSigns = true; break;
+  case ISD::SETUGE: Swap = true;
+  case ISD::SETULE: Opc = GTOpc; FlipSigns = true; Invert = true; break;
+  }
+  if (Swap)
+    std::swap(Op0, Op1);
+  
+  // Since SSE has no unsigned integer comparisons, we need to flip  the sign
+  // bits of the inputs before performing those operations.
+  if (FlipSigns) {
+    MVT EltVT = VT.getVectorElementType();
+    SDOperand SignBit = DAG.getConstant(EltVT.getIntegerVTSignBit(), EltVT);
+    std::vector<SDOperand> SignBits(VT.getVectorNumElements(), SignBit);
+    SDOperand SignVec = DAG.getNode(ISD::BUILD_VECTOR, VT, &SignBits[0],
+                                    SignBits.size());
+    Op0 = DAG.getNode(ISD::XOR, VT, Op0, SignVec);
+    Op1 = DAG.getNode(ISD::XOR, VT, Op1, SignVec);
+  }
+  
+  SDOperand Result = DAG.getNode(Opc, VT, Op0, Op1);
+
+  // If the logical-not of the result is required, perform that now.
+  if (Invert) {
+    MVT EltVT = VT.getVectorElementType();
+    SDOperand NegOne = DAG.getConstant(EltVT.getIntegerVTBitMask(), EltVT);
+    std::vector<SDOperand> NegOnes(VT.getVectorNumElements(), NegOne);
+    SDOperand NegOneV = DAG.getNode(ISD::BUILD_VECTOR, VT, &NegOnes[0],
+                                    NegOnes.size());
+    Result = DAG.getNode(ISD::XOR, VT, Result, NegOneV);
+  }
+  return Result;
+}
 
 SDOperand X86TargetLowering::LowerSELECT(SDOperand Op, SelectionDAG &DAG) {
   bool addTest = true;
@@ -5728,6 +5838,7 @@
   case ISD::FNEG:               return LowerFNEG(Op, DAG);
   case ISD::FCOPYSIGN:          return LowerFCOPYSIGN(Op, DAG);
   case ISD::SETCC:              return LowerSETCC(Op, DAG);
+  case ISD::VSETCC:             return LowerVSETCC(Op, DAG);
   case ISD::SELECT:             return LowerSELECT(Op, DAG);
   case ISD::BRCOND:             return LowerBRCOND(Op, DAG);
   case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
@@ -5819,6 +5930,16 @@
   case X86ISD::VZEXT_LOAD:         return "X86ISD::VZEXT_LOAD";
   case X86ISD::VSHL:               return "X86ISD::VSHL";
   case X86ISD::VSRL:               return "X86ISD::VSRL";
+  case X86ISD::CMPPD:              return "X86ISD::CMPPD";
+  case X86ISD::CMPPS:              return "X86ISD::CMPPS";
+  case X86ISD::PCMPEQB:            return "X86ISD::PCMPEQB";
+  case X86ISD::PCMPEQW:            return "X86ISD::PCMPEQW";
+  case X86ISD::PCMPEQD:            return "X86ISD::PCMPEQD";
+  case X86ISD::PCMPEQQ:            return "X86ISD::PCMPEQQ";
+  case X86ISD::PCMPGTB:            return "X86ISD::PCMPGTB";
+  case X86ISD::PCMPGTW:            return "X86ISD::PCMPGTW";
+  case X86ISD::PCMPGTD:            return "X86ISD::PCMPGTD";
+  case X86ISD::PCMPGTQ:            return "X86ISD::PCMPGTQ";
   }
 }
 
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 648fe3b..125a30d 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -208,7 +208,14 @@
       VZEXT_LOAD,
 
       // VSHL, VSRL - Vector logical left / right shift.
-      VSHL, VSRL
+      VSHL, VSRL,
+      
+      // CMPPD, CMPPS - Vector double/float comparison.
+      CMPPD, CMPPS,
+      
+      // PCMP* - Vector integer comparisons.
+      PCMPEQB, PCMPEQW, PCMPEQD, PCMPEQQ,
+      PCMPGTB, PCMPGTW, PCMPGTD, PCMPGTQ
     };
   }
 
@@ -521,6 +528,7 @@
     SDOperand LowerFNEG(SDOperand Op, SelectionDAG &DAG);
     SDOperand LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG);
     SDOperand LowerSETCC(SDOperand Op, SelectionDAG &DAG);
+    SDOperand LowerVSETCC(SDOperand Op, SelectionDAG &DAG);
     SDOperand LowerSELECT(SDOperand Op, SelectionDAG &DAG);
     SDOperand LowerBRCOND(SDOperand Op, SelectionDAG &DAG);
     SDOperand LowerMEMSET(SDOperand Op, SelectionDAG &DAG);
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index 8d098f1..d5f0efb 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -215,6 +215,12 @@
             list<dag> pattern>
       : Ii8<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSE41]>;
 
+// SSE4.2 Instruction Templates:
+// 
+//   SS428I - SSE 4.2 instructions with T8 prefix.
+class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm,
+             list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSE42]>;
 
 // X86-64 Instruction templates...
 //
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 16a5202..e11967d 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -20,6 +20,8 @@
 
 def SDTX86FPShiftOp : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>,
                                             SDTCisFP<0>, SDTCisInt<2> ]>;
+def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>,
+                                       SDTCisFP<1>, SDTCisVT<3, i8>]>;
 
 def X86fmin    : SDNode<"X86ISD::FMIN",      SDTFPBinOp>;
 def X86fmax    : SDNode<"X86ISD::FMAX",      SDTFPBinOp>;
@@ -53,6 +55,16 @@
                         [SDNPHasChain, SDNPMayLoad]>;
 def X86vshl    : SDNode<"X86ISD::VSHL",      SDTIntShiftOp>;
 def X86vshr    : SDNode<"X86ISD::VSRL",      SDTIntShiftOp>;
+def X86cmpps   : SDNode<"X86ISD::CMPPS",     SDTX86VFCMP>;
+def X86cmppd   : SDNode<"X86ISD::CMPPD",     SDTX86VFCMP>;
+def X86pcmpeqb : SDNode<"X86ISD::PCMPEQB", SDTIntBinOp, [SDNPCommutative]>;
+def X86pcmpeqw : SDNode<"X86ISD::PCMPEQW", SDTIntBinOp, [SDNPCommutative]>;
+def X86pcmpeqd : SDNode<"X86ISD::PCMPEQD", SDTIntBinOp, [SDNPCommutative]>;
+def X86pcmpeqq : SDNode<"X86ISD::PCMPEQQ", SDTIntBinOp, [SDNPCommutative]>;
+def X86pcmpgtb : SDNode<"X86ISD::PCMPGTB", SDTIntBinOp>;
+def X86pcmpgtw : SDNode<"X86ISD::PCMPGTW", SDTIntBinOp>;
+def X86pcmpgtd : SDNode<"X86ISD::PCMPGTD", SDTIntBinOp>;
+def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>;
 
 //===----------------------------------------------------------------------===//
 // SSE Complex Patterns
@@ -163,22 +175,6 @@
   return getI32Imm(N->getValue() >> 3);
 }]>;
 
-def SSE_CC_imm  : SDNodeXForm<cond, [{
-  unsigned Val;
-  switch (N->get()) {
-  default: Val = 0; assert(0 && "Unexpected CondCode"); break;
-  case ISD::SETOEQ: Val = 0; break;
-  case ISD::SETOLT: Val = 1; break;
-  case ISD::SETOLE: Val = 2; break;
-  case ISD::SETUO:  Val = 3; break;
-  case ISD::SETONE: Val = 4; break;
-  case ISD::SETOGE: Val = 5; break;
-  case ISD::SETOGT: Val = 6; break;
-  case ISD::SETO:   Val = 7; break;
-  }
-  return getI8Imm(Val);
-}]>;
-
 // SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*,
 // SHUFP* etc. imm.
 def SHUFFLE_get_shuf_imm : SDNodeXForm<build_vector, [{
@@ -896,10 +892,10 @@
                   [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
                                             (memop addr:$src), imm:$cc))]>;
 }
-def : Pat<(v4i32 (vsetcc (v4f32 VR128:$src1), VR128:$src2, cond:$cc)),
-          (CMPPSrri VR128:$src1, VR128:$src2, (SSE_CC_imm cond:$cc))>;
-def : Pat<(v4i32 (vsetcc (v4f32 VR128:$src1), (memop addr:$src2), cond:$cc)),
-          (CMPPSrmi VR128:$src1, addr:$src2, (SSE_CC_imm cond:$cc))>;
+def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
+          (CMPPSrri VR128:$src1, VR128:$src2, imm:$cc)>;
+def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)),
+          (CMPPSrmi VR128:$src1, addr:$src2, imm:$cc)>;
 
 // Shuffle and unpack instructions
 let Constraints = "$src1 = $dst" in {
@@ -1725,10 +1721,10 @@
                   [(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1,
                                                  (memop addr:$src), imm:$cc))]>;
 }
-def : Pat<(v2i64 (vsetcc (v2f64 VR128:$src1), VR128:$src2, cond:$cc)),
-          (CMPPDrri VR128:$src1, VR128:$src2, (SSE_CC_imm cond:$cc))>;
-def : Pat<(v2i64 (vsetcc (v2f64 VR128:$src1), (memop addr:$src2), cond:$cc)),
-          (CMPPDrmi VR128:$src1, addr:$src2, (SSE_CC_imm cond:$cc))>;
+def : Pat<(v2i64 (X86cmppd VR128:$src1, VR128:$src2, imm:$cc)),
+          (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
+def : Pat<(v2i64 (X86cmppd VR128:$src1, (memop addr:$src2), imm:$cc)),
+          (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
 
 // Shuffle and unpack instructions
 let Constraints = "$src1 = $dst" in {
@@ -1994,30 +1990,30 @@
 defm PCMPGTW  : PDI_binop_rm_int<0x65, "pcmpgtw", int_x86_sse2_pcmpgt_w>;
 defm PCMPGTD  : PDI_binop_rm_int<0x66, "pcmpgtd", int_x86_sse2_pcmpgt_d>;
 
-def : Pat<(v16i8 (vsetcc (v16i8 VR128:$src1), VR128:$src2, SETEQ)),
+def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, VR128:$src2)),
           (PCMPEQBrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v16i8 (vsetcc (v16i8 VR128:$src1), (memop addr:$src2), SETEQ)),
+def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, (memop addr:$src2))),
           (PCMPEQBrm VR128:$src1, addr:$src2)>;
-def : Pat<(v8i16 (vsetcc (v8i16 VR128:$src1), VR128:$src2, SETEQ)),
+def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, VR128:$src2)),
           (PCMPEQWrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v8i16 (vsetcc (v8i16 VR128:$src1), (memop addr:$src2), SETEQ)),
+def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, (memop addr:$src2))),
           (PCMPEQWrm VR128:$src1, addr:$src2)>;
-def : Pat<(v4i32 (vsetcc (v4i32 VR128:$src1), VR128:$src2, SETEQ)),
+def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, VR128:$src2)),
           (PCMPEQDrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v4i32 (vsetcc (v4i32 VR128:$src1), (memop addr:$src2), SETEQ)),
+def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, (memop addr:$src2))),
           (PCMPEQDrm VR128:$src1, addr:$src2)>;
 
-def : Pat<(v16i8 (vsetcc (v16i8 VR128:$src1), VR128:$src2, SETGT)),
+def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, VR128:$src2)),
           (PCMPGTBrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v16i8 (vsetcc (v16i8 VR128:$src1), (memop addr:$src2), SETGT)),
+def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, (memop addr:$src2))),
           (PCMPGTBrm VR128:$src1, addr:$src2)>;
-def : Pat<(v8i16 (vsetcc (v8i16 VR128:$src1), VR128:$src2, SETGT)),
+def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, VR128:$src2)),
           (PCMPGTWrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v8i16 (vsetcc (v8i16 VR128:$src1), (memop addr:$src2), SETGT)),
+def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, (memop addr:$src2))),
           (PCMPGTWrm VR128:$src1, addr:$src2)>;
-def : Pat<(v4i32 (vsetcc (v4i32 VR128:$src1), VR128:$src2, SETGT)),
+def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, VR128:$src2)),
           (PCMPGTDrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v4i32 (vsetcc (v4i32 VR128:$src1), (memop addr:$src2), SETGT)),
+def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, (memop addr:$src2))),
           (PCMPGTDrm VR128:$src1, addr:$src2)>;
 
 
@@ -3258,6 +3254,11 @@
 defm PMAXUW       : SS41I_binop_rm_int<0x3E, "pmaxuw",
                                        int_x86_sse41_pmaxuw, 1>;
 
+def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)),
+          (PCMPEQQrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))),
+          (PCMPEQQrm VR128:$src1, addr:$src2)>;
+
 
 /// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
 let Constraints = "$src1 = $dst" in {
@@ -3555,3 +3556,30 @@
 def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
                        "movntdqa\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>;
+
+/// SS42I_binop_rm_int - Simple SSE 4.2 binary operator
+let Constraints = "$src1 = $dst" in {
+  multiclass SS42I_binop_rm_int<bits<8> opc, string OpcodeStr,
+                                Intrinsic IntId128, bit Commutable = 0> {
+    def rr : SS428I<opc, MRMSrcReg, (outs VR128:$dst),
+                   (ins VR128:$src1, VR128:$src2),
+                   !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+                   [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
+                   OpSize {
+      let isCommutable = Commutable;
+    }
+    def rm : SS428I<opc, MRMSrcMem, (outs VR128:$dst),
+                   (ins VR128:$src1, i128mem:$src2),
+                   !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+                   [(set VR128:$dst,
+                     (IntId128 VR128:$src1,
+                      (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
+  }
+}
+
+defm PCMPGTQ      : SS41I_binop_rm_int<0x37, "pcmpgtq", int_x86_sse42_pcmpgtq>;
+
+def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)),
+          (PCMPGTQrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))),
+          (PCMPGTQrm VR128:$src1, addr:$src2)>;
