Use movss to implement insertelement when elements = 4 and index = 0.

This avoids using a pair of shufps instructions as the previous lowering
was doing.  Instead, we use movss to copy the element to be inserted
into the lower 32 bits of the destination.

Define InstX8632Movss as a Binop, the class to which it properly
belongs.

BUG=none
R=jvoung@chromium.org, stichnot@chromium.org

Review URL: https://codereview.chromium.org/412353005
diff --git a/src/IceInstX8632.h b/src/IceInstX8632.h
index 6760057..ddea6b5 100644
--- a/src/IceInstX8632.h
+++ b/src/IceInstX8632.h
@@ -552,7 +552,6 @@
 typedef InstX8632Unaryop<InstX8632::Bsr> InstX8632Bsr;
 typedef InstX8632Unaryop<InstX8632::Lea> InstX8632Lea;
 typedef InstX8632Unaryop<InstX8632::Movd> InstX8632Movd;
-typedef InstX8632Unaryop<InstX8632::Movss> InstX8632Movss;
 typedef InstX8632Unaryop<InstX8632::Sqrtss> InstX8632Sqrtss;
 typedef InstX8632Binop<InstX8632::Add> InstX8632Add;
 typedef InstX8632Binop<InstX8632::Addps> InstX8632Addps;
@@ -586,6 +585,13 @@
 typedef InstX8632Binop<InstX8632::Psra> InstX8632Psra;
 typedef InstX8632Binop<InstX8632::Pcmpeq> InstX8632Pcmpeq;
 typedef InstX8632Binop<InstX8632::Pcmpgt> InstX8632Pcmpgt;
+// TODO: movss is only a binary operation when the source and dest
+// operands are both registers.  In other cases, it behaves like a copy
+// (mov-like) operation.  Eventually, InstX8632Movss should assert that
+// both its source and dest operands are registers, and the lowering
+// code should use _mov instead of _movss in cases where a copy
+// operation is intended.
+typedef InstX8632Binop<InstX8632::Movss> InstX8632Movss;
 typedef InstX8632Ternop<InstX8632::Idiv> InstX8632Idiv;
 typedef InstX8632Ternop<InstX8632::Div> InstX8632Div;
 typedef InstX8632Ternop<InstX8632::Pinsrw> InstX8632Pinsrw;