Removes references to ah. AH is a thorn in the flesh for our X86-64 backend. The assembler was designed to always encode the low 8-bit registers, so %ah would become %spl. While it is true we **could** force %spl to always be encoded as %ah, that would not work if the instruction has a rex prefix. This CL removes references to %ah from TargetX86Base. There used to be 2 uses of ah in the target lowering: 1) To zero-extend %al before an unsigned div: mov <<src0>>, %al mov 0, %ah div <<src1>> This pattern has been changed to xor %eax, %eax mov <<src0>>, %al div <<src1>> 2) To access the 8-bit remainder for 8-bit division: mov %ah, <<dest>> This pattern has been changed to shr $8, %eax mov %al, <<Dest>> BUG= https://code.google.com/p/nativeclient/issues/detail?id=4077 R=stichnot@chromium.org Review URL: https://codereview.chromium.org/1260163003.

commit: 448c16f0f6905460a3d27e728ed0f14a1c08ff69 [log] [tgz]
author: John Porto <jpp@chromium.org> Tue Jul 28 16:56:29 2015 -0700
committer: John Porto <jpp@chromium.org> Tue Jul 28 16:56:29 2015 -0700
tree: ccf870b112136ce897482d95ac50ef4f9f440f46
parent: 2fea26cae5a6b140c67e18aec3dcf645a16694d5 [diff] [blame]
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index 77048b0..d9cc5e4 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h

@@ -1850,12 +1850,21 @@
     // immediates as the operand.
     Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
     if (isByteSizedArithType(Dest->getType())) {
-      Variable *T_ah = nullptr;
-      Constant *Zero = Ctx->getConstantZero(IceType_i8);
+      // For 8-bit unsigned division we need to zero-extend al into ah. A mov
+      // $0, %ah (or xor %ah, %ah) would work just fine, except that the x86-64
+      // assembler refuses to encode %ah (encoding %spl with a REX prefix
+      // instead.) Accessing %ah in 64-bit is "tricky" as you can't encode %ah
+      // with any other 8-bit register except for %a[lh], %b[lh], %c[lh], and
+      // d[%lh], which means the X86 target lowering (and the register
+      // allocator) would have to be aware of this restriction. For now, we
+      // simply zero %eax completely, and move the dividend into %al.
+      Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
+      Context.insert(InstFakeDef::create(Func, T_eax));
+      _xor(T_eax, T_eax);
       _mov(T, Src0, Traits::RegisterSet::Reg_eax);
-      _mov(T_ah, Zero, Traits::RegisterSet::Reg_ah);
-      _div(T, Src1, T_ah);
+      _div(T, Src1, T);
       _mov(Dest, T);
+      Context.insert(InstFakeUse::create(Func, T_eax));
     } else {
       Constant *Zero = Ctx->getConstantZero(IceType_i32);
       _mov(T, Src0, Traits::RegisterSet::Reg_eax);
@@ -1917,12 +1926,21 @@
   case InstArithmetic::Urem:
     Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
     if (isByteSizedArithType(Dest->getType())) {
-      Variable *T_ah = nullptr;
-      Constant *Zero = Ctx->getConstantZero(IceType_i8);
+      Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
+      Context.insert(InstFakeDef::create(Func, T_eax));
+      _xor(T_eax, T_eax);
       _mov(T, Src0, Traits::RegisterSet::Reg_eax);
-      _mov(T_ah, Zero, Traits::RegisterSet::Reg_ah);
-      _div(T_ah, Src1, T);
-      _mov(Dest, T_ah);
+      Variable *T_al = makeReg(IceType_i8, Traits::RegisterSet::Reg_eax);
+      _div(T_al, Src1, T);
+      // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't
+      // mov %ah, %al because it would make x86-64 codegen more complicated. If
+      // this ever becomes a problem we can introduce a pseudo rem instruction
+      // that returns the remainder in %al directly (and uses a mov for copying
+      // %ah to %al.)
+      static constexpr uint8_t AlSizeInBits = 8;
+      _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits));
+      _mov(Dest, T_al);
+      Context.insert(InstFakeUse::create(Func, T_eax));
     } else {
       Constant *Zero = Ctx->getConstantZero(IceType_i32);
       _mov(T_edx, Zero, Traits::RegisterSet::Reg_edx);
@@ -1974,12 +1992,21 @@
     }
     Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
     if (isByteSizedArithType(Dest->getType())) {
-      Variable *T_ah = makeReg(IceType_i8, Traits::RegisterSet::Reg_ah);
       _mov(T, Src0, Traits::RegisterSet::Reg_eax);
+      // T is %al.
       _cbwdq(T, T);
-      Context.insert(InstFakeDef::create(Func, T_ah));
-      _idiv(T_ah, Src1, T);
-      _mov(Dest, T_ah);
+      _idiv(T, Src1, T);
+      Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
+      Context.insert(InstFakeDef::create(Func, T_eax));
+      // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't
+      // mov %ah, %al because it would make x86-64 codegen more complicated. If
+      // this ever becomes a problem we can introduce a pseudo rem instruction
+      // that returns the remainder in %al directly (and uses a mov for copying
+      // %ah to %al.)
+      static constexpr uint8_t AlSizeInBits = 8;
+      _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits));
+      _mov(Dest, T);
+      Context.insert(InstFakeUse::create(Func, T_eax));
     } else {
       T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
       _mov(T, Src0, Traits::RegisterSet::Reg_eax);
commit	448c16f0f6905460a3d27e728ed0f14a1c08ff69	[log] [tgz]
author	John Porto <jpp@chromium.org>	Tue Jul 28 16:56:29 2015 -0700
committer	John Porto <jpp@chromium.org>	Tue Jul 28 16:56:29 2015 -0700
tree	ccf870b112136ce897482d95ac50ef4f9f440f46
parent	2fea26cae5a6b140c67e18aec3dcf645a16694d5 [diff] [blame]