[X86] Add k-mask conversion and load/store instrinsics to match gcc and icc.

This adds:
_cvtmask8_u32, _cvtmask16_u32, _cvtmask32_u32, _cvtmask64_u64
_cvtu32_mask8, _cvtu32_mask16, _cvtu32_mask32, _cvtu64_mask64
_load_mask8, _load_mask16, _load_mask32, _load_mask64
_store_mask8, _store_mask16, _store_mask32, _store_mask64

These are currently missing from the Intel Intrinsics Guide webpage.

llvm-svn: 341251
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 50e9b5e..58f8b7d 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -10137,6 +10137,17 @@
     return Builder.CreateBitCast(Builder.CreateNot(Res),
                                  Ops[0]->getType());
   }
+  case X86::BI__builtin_ia32_kmovb:
+  case X86::BI__builtin_ia32_kmovw:
+  case X86::BI__builtin_ia32_kmovd:
+  case X86::BI__builtin_ia32_kmovq: {
+    // Bitcast to vXi1 type and then back to integer. This gets the mask
+    // register type into the IR, but might be optimized out depending on
+    // what's around it.
+    unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
+    Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
+    return Builder.CreateBitCast(Res, Ops[0]->getType());
+  }
 
   case X86::BI__builtin_ia32_kunpckdi:
   case X86::BI__builtin_ia32_kunpcksi: