| /* |
| * Copyright (c) 2016, Intel Corporation. |
| * Intel Math Library (LIBM) Source Code |
| * |
| * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| * |
| * This code is free software; you can redistribute it and/or modify it |
| * under the terms of the GNU General Public License version 2 only, as |
| * published by the Free Software Foundation. |
| * |
| * This code is distributed in the hope that it will be useful, but WITHOUT |
| * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| * version 2 for more details (a copy is included in the LICENSE file that |
| * accompanied this code). |
| * |
| * You should have received a copy of the GNU General Public License version |
| * 2 along with this work; if not, write to the Free Software Foundation, |
| * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| * |
| * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| * or visit www.oracle.com if you need additional information or have any |
| * questions. |
| * |
| */ |
| |
| #include "precompiled.hpp" |
| #include "asm/assembler.hpp" |
| #include "asm/assembler.inline.hpp" |
| #include "runtime/stubRoutines.hpp" |
| #include "macroAssembler_x86.hpp" |
| |
| #ifdef _MSC_VER |
| #define ALIGNED_(x) __declspec(align(x)) |
| #else |
| #define ALIGNED_(x) __attribute__ ((aligned(x))) |
| #endif |
| |
| /******************************************************************************/ |
| // ALGORITHM DESCRIPTION - COS() |
| // --------------------- |
| // |
| // 1. RANGE REDUCTION |
| // |
| // We perform an initial range reduction from X to r with |
| // |
| // X =~= N * pi/32 + r |
| // |
| // so that |r| <= pi/64 + epsilon. We restrict inputs to those |
| // where |N| <= 932560. Beyond this, the range reduction is |
| // insufficiently accurate. For extremely small inputs, |
| // denormalization can occur internally, impacting performance. |
| // This means that the main path is actually only taken for |
| // 2^-252 <= |X| < 90112. |
| // |
| // To avoid branches, we perform the range reduction to full |
| // accuracy each time. |
| // |
| // X - N * (P_1 + P_2 + P_3) |
| // |
| // where P_1 and P_2 are 32-bit numbers (so multiplication by N |
| // is exact) and P_3 is a 53-bit number. Together, these |
| // approximate pi well enough for all cases in the restricted |
| // range. |
| // |
| // The main reduction sequence is: |
| // |
| // y = 32/pi * x |
| // N = integer(y) |
| // (computed by adding and subtracting off SHIFTER) |
| // |
| // m_1 = N * P_1 |
| // m_2 = N * P_2 |
| // r_1 = x - m_1 |
| // r = r_1 - m_2 |
| // (this r can be used for most of the calculation) |
| // |
| // c_1 = r_1 - r |
| // m_3 = N * P_3 |
| // c_2 = c_1 - m_2 |
| // c = c_2 - m_3 |
| // |
| // 2. MAIN ALGORITHM |
| // |
| // The algorithm uses a table lookup based on B = M * pi / 32 |
| // where M = N mod 64. The stored values are: |
| // sigma closest power of 2 to cos(B) |
| // C_hl 53-bit cos(B) - sigma |
| // S_hi + S_lo 2 * 53-bit sin(B) |
| // |
| // The computation is organized as follows: |
| // |
| // sin(B + r + c) = [sin(B) + sigma * r] + |
| // r * (cos(B) - sigma) + |
| // sin(B) * [cos(r + c) - 1] + |
| // cos(B) * [sin(r + c) - r] |
| // |
| // which is approximately: |
| // |
| // [S_hi + sigma * r] + |
| // C_hl * r + |
| // S_lo + S_hi * [(cos(r) - 1) - r * c] + |
| // (C_hl + sigma) * [(sin(r) - r) + c] |
| // |
| // and this is what is actually computed. We separate this sum |
| // into four parts: |
| // |
| // hi + med + pols + corr |
| // |
| // where |
| // |
| // hi = S_hi + sigma r |
| // med = C_hl * r |
| // pols = S_hi * (cos(r) - 1) + (C_hl + sigma) * (sin(r) - r) |
| // corr = S_lo + c * ((C_hl + sigma) - S_hi * r) |
| // |
| // 3. POLYNOMIAL |
| // |
| // The polynomial S_hi * (cos(r) - 1) + (C_hl + sigma) * |
| // (sin(r) - r) can be rearranged freely, since it is quite |
| // small, so we exploit parallelism to the fullest. |
| // |
| // psc4 = SC_4 * r_1 |
| // msc4 = psc4 * r |
| // r2 = r * r |
| // msc2 = SC_2 * r2 |
| // r4 = r2 * r2 |
| // psc3 = SC_3 + msc4 |
| // psc1 = SC_1 + msc2 |
| // msc3 = r4 * psc3 |
| // sincospols = psc1 + msc3 |
| // pols = sincospols * |
| // <S_hi * r^2 | (C_hl + sigma) * r^3> |
| // |
| // 4. CORRECTION TERM |
| // |
| // This is where the "c" component of the range reduction is |
| // taken into account; recall that just "r" is used for most of |
| // the calculation. |
| // |
| // -c = m_3 - c_2 |
| // -d = S_hi * r - (C_hl + sigma) |
| // corr = -c * -d + S_lo |
| // |
| // 5. COMPENSATED SUMMATIONS |
| // |
| // The two successive compensated summations add up the high |
| // and medium parts, leaving just the low parts to add up at |
| // the end. |
| // |
| // rs = sigma * r |
| // res_int = S_hi + rs |
| // k_0 = S_hi - res_int |
| // k_2 = k_0 + rs |
| // med = C_hl * r |
| // res_hi = res_int + med |
| // k_1 = res_int - res_hi |
| // k_3 = k_1 + med |
| // |
| // 6. FINAL SUMMATION |
| // |
| // We now add up all the small parts: |
| // |
| // res_lo = pols(hi) + pols(lo) + corr + k_1 + k_3 |
| // |
| // Now the overall result is just: |
| // |
| // res_hi + res_lo |
| // |
| // 7. SMALL ARGUMENTS |
| // |
| // Inputs with |X| < 2^-252 are treated specially as |
| // 1 - |x|. |
| // |
| // Special cases: |
| // cos(NaN) = quiet NaN, and raise invalid exception |
| // cos(INF) = NaN and raise invalid exception |
| // cos(0) = 1 |
| // |
| /******************************************************************************/ |
| |
| #ifdef _LP64 |
| // The 64 bit code is at most SSE2 compliant |
| ALIGNED_(8) juint _ONE[] = |
| { |
| 0x00000000UL, 0x3ff00000UL |
| }; |
| void MacroAssembler::fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register r8, Register r9, Register r10, Register r11) { |
| |
| Label L_2TAG_PACKET_0_0_1, L_2TAG_PACKET_1_0_1, L_2TAG_PACKET_2_0_1, L_2TAG_PACKET_3_0_1; |
| Label L_2TAG_PACKET_4_0_1, L_2TAG_PACKET_5_0_1, L_2TAG_PACKET_6_0_1, L_2TAG_PACKET_7_0_1; |
| Label L_2TAG_PACKET_8_0_1, L_2TAG_PACKET_9_0_1, L_2TAG_PACKET_10_0_1, L_2TAG_PACKET_11_0_1; |
| Label L_2TAG_PACKET_12_0_1, L_2TAG_PACKET_13_0_1, B1_2, B1_3, B1_4, B1_5, start; |
| |
| assert_different_registers(r8, r9, r10, r11, eax, ecx, edx); |
| |
| address ONEHALF = StubRoutines::x86::_ONEHALF_addr(); |
| address P_2 = StubRoutines::x86::_P_2_addr(); |
| address SC_4 = StubRoutines::x86::_SC_4_addr(); |
| address Ctable = StubRoutines::x86::_Ctable_addr(); |
| address SC_2 = StubRoutines::x86::_SC_2_addr(); |
| address SC_3 = StubRoutines::x86::_SC_3_addr(); |
| address SC_1 = StubRoutines::x86::_SC_1_addr(); |
| address PI_INV_TABLE = StubRoutines::x86::_PI_INV_TABLE_addr(); |
| address PI_4 = (address)StubRoutines::x86::_PI_4_addr(); |
| address PI32INV = (address)StubRoutines::x86::_PI32INV_addr(); |
| address SIGN_MASK = (address)StubRoutines::x86::_SIGN_MASK_addr(); |
| address P_1 = (address)StubRoutines::x86::_P_1_addr(); |
| address P_3 = (address)StubRoutines::x86::_P_3_addr(); |
| address ONE = (address)_ONE; |
| address NEG_ZERO = (address)StubRoutines::x86::_NEG_ZERO_addr(); |
| |
| bind(start); |
| push(rbx); |
| subq(rsp, 16); |
| movsd(Address(rsp, 8), xmm0); |
| |
| bind(B1_2); |
| movl(eax, Address(rsp, 12)); |
| movq(xmm1, ExternalAddress(PI32INV)); //0x6dc9c883UL, 0x40245f30UL |
| andl(eax, 2147418112); |
| subl(eax, 808452096); |
| cmpl(eax, 281346048); |
| jcc(Assembler::above, L_2TAG_PACKET_0_0_1); |
| mulsd(xmm1, xmm0); |
| movdqu(xmm5, ExternalAddress(ONEHALF)); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL |
| movq(xmm4, ExternalAddress(SIGN_MASK)); //0x00000000UL, 0x80000000UL |
| pand(xmm4, xmm0); |
| por(xmm5, xmm4); |
| addpd(xmm1, xmm5); |
| cvttsd2sil(edx, xmm1); |
| cvtsi2sdl(xmm1, edx); |
| movdqu(xmm2, ExternalAddress(P_2)); //0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL |
| movq(xmm3, ExternalAddress(P_1)); //0x54400000UL, 0x3fb921fbUL |
| mulsd(xmm3, xmm1); |
| unpcklpd(xmm1, xmm1); |
| addq(rdx, 1865232); |
| movdqu(xmm4, xmm0); |
| andq(rdx, 63); |
| movdqu(xmm5, ExternalAddress(SC_4)); //0xa556c734UL, 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL |
| lea(rax, ExternalAddress(Ctable)); |
| shlq(rdx, 5); |
| addq(rax, rdx); |
| mulpd(xmm2, xmm1); |
| subsd(xmm0, xmm3); |
| mulsd(xmm1, ExternalAddress(P_3)); //0x2e037073UL, 0x3b63198aUL |
| subsd(xmm4, xmm3); |
| movq(xmm7, Address(rax, 8)); |
| unpcklpd(xmm0, xmm0); |
| movdqu(xmm3, xmm4); |
| subsd(xmm4, xmm2); |
| mulpd(xmm5, xmm0); |
| subpd(xmm0, xmm2); |
| movdqu(xmm6, ExternalAddress(SC_2)); //0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL |
| mulsd(xmm7, xmm4); |
| subsd(xmm3, xmm4); |
| mulpd(xmm5, xmm0); |
| mulpd(xmm0, xmm0); |
| subsd(xmm3, xmm2); |
| movdqu(xmm2, Address(rax, 0)); |
| subsd(xmm1, xmm3); |
| movq(xmm3, Address(rax, 24)); |
| addsd(xmm2, xmm3); |
| subsd(xmm7, xmm2); |
| mulsd(xmm2, xmm4); |
| mulpd(xmm6, xmm0); |
| mulsd(xmm3, xmm4); |
| mulpd(xmm2, xmm0); |
| mulpd(xmm0, xmm0); |
| addpd(xmm5, ExternalAddress(SC_3)); //0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL |
| mulsd(xmm4, Address(rax, 0)); |
| addpd(xmm6, ExternalAddress(SC_1)); //0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL |
| mulpd(xmm5, xmm0); |
| movdqu(xmm0, xmm3); |
| addsd(xmm3, Address(rax, 8)); |
| mulpd(xmm1, xmm7); |
| movdqu(xmm7, xmm4); |
| addsd(xmm4, xmm3); |
| addpd(xmm6, xmm5); |
| movq(xmm5, Address(rax, 8)); |
| subsd(xmm5, xmm3); |
| subsd(xmm3, xmm4); |
| addsd(xmm1, Address(rax, 16)); |
| mulpd(xmm6, xmm2); |
| addsd(xmm0, xmm5); |
| addsd(xmm3, xmm7); |
| addsd(xmm0, xmm1); |
| addsd(xmm0, xmm3); |
| addsd(xmm0, xmm6); |
| unpckhpd(xmm6, xmm6); |
| addsd(xmm0, xmm6); |
| addsd(xmm0, xmm4); |
| jmp(B1_4); |
| |
| bind(L_2TAG_PACKET_0_0_1); |
| jcc(Assembler::greater, L_2TAG_PACKET_1_0_1); |
| pextrw(eax, xmm0, 3); |
| andl(eax, 32767); |
| pinsrw(xmm0, eax, 3); |
| movq(xmm1, ExternalAddress(ONE)); //0x00000000UL, 0x3ff00000UL |
| subsd(xmm1, xmm0); |
| movdqu(xmm0, xmm1); |
| jmp(B1_4); |
| |
| bind(L_2TAG_PACKET_1_0_1); |
| pextrw(eax, xmm0, 3); |
| andl(eax, 32752); |
| cmpl(eax, 32752); |
| jcc(Assembler::equal, L_2TAG_PACKET_2_0_1); |
| pextrw(ecx, xmm0, 3); |
| andl(ecx, 32752); |
| subl(ecx, 16224); |
| shrl(ecx, 7); |
| andl(ecx, 65532); |
| lea(r11, ExternalAddress(PI_INV_TABLE)); |
| addq(rcx, r11); |
| movdq(rax, xmm0); |
| movl(r10, Address(rcx, 20)); |
| movl(r8, Address(rcx, 24)); |
| movl(edx, eax); |
| shrq(rax, 21); |
| orl(eax, INT_MIN); |
| shrl(eax, 11); |
| movl(r9, r10); |
| imulq(r10, rdx); |
| imulq(r9, rax); |
| imulq(r8, rax); |
| movl(rsi, Address(rcx, 16)); |
| movl(rdi, Address(rcx, 12)); |
| movl(r11, r10); |
| shrq(r10, 32); |
| addq(r9, r10); |
| addq(r11, r8); |
| movl(r8, r11); |
| shrq(r11, 32); |
| addq(r9, r11); |
| movl(r10, rsi); |
| imulq(rsi, rdx); |
| imulq(r10, rax); |
| movl(r11, rdi); |
| imulq(rdi, rdx); |
| movl(rbx, rsi); |
| shrq(rsi, 32); |
| addq(r9, rbx); |
| movl(rbx, r9); |
| shrq(r9, 32); |
| addq(r10, rsi); |
| addq(r10, r9); |
| shlq(rbx, 32); |
| orq(r8, rbx); |
| imulq(r11, rax); |
| movl(r9, Address(rcx, 8)); |
| movl(rsi, Address(rcx, 4)); |
| movl(rbx, rdi); |
| shrq(rdi, 32); |
| addq(r10, rbx); |
| movl(rbx, r10); |
| shrq(r10, 32); |
| addq(r11, rdi); |
| addq(r11, r10); |
| movq(rdi, r9); |
| imulq(r9, rdx); |
| imulq(rdi, rax); |
| movl(r10, r9); |
| shrq(r9, 32); |
| addq(r11, r10); |
| movl(r10, r11); |
| shrq(r11, 32); |
| addq(rdi, r9); |
| addq(rdi, r11); |
| movq(r9, rsi); |
| imulq(rsi, rdx); |
| imulq(r9, rax); |
| shlq(r10, 32); |
| orq(r10, rbx); |
| movl(eax, Address(rcx, 0)); |
| movl(r11, rsi); |
| shrq(rsi, 32); |
| addq(rdi, r11); |
| movl(r11, rdi); |
| shrq(rdi, 32); |
| addq(r9, rsi); |
| addq(r9, rdi); |
| imulq(rdx, rax); |
| pextrw(rbx, xmm0, 3); |
| lea(rdi, ExternalAddress(PI_INV_TABLE)); |
| subq(rcx, rdi); |
| addl(ecx, ecx); |
| addl(ecx, ecx); |
| addl(ecx, ecx); |
| addl(ecx, 19); |
| movl(rsi, 32768); |
| andl(rsi, rbx); |
| shrl(rbx, 4); |
| andl(rbx, 2047); |
| subl(rbx, 1023); |
| subl(ecx, rbx); |
| addq(r9, rdx); |
| movl(edx, ecx); |
| addl(edx, 32); |
| cmpl(ecx, 1); |
| jcc(Assembler::less, L_2TAG_PACKET_3_0_1); |
| negl(ecx); |
| addl(ecx, 29); |
| shll(r9); |
| movl(rdi, r9); |
| andl(r9, 536870911); |
| testl(r9, 268435456); |
| jcc(Assembler::notEqual, L_2TAG_PACKET_4_0_1); |
| shrl(r9); |
| movl(rbx, 0); |
| shlq(r9, 32); |
| orq(r9, r11); |
| |
| bind(L_2TAG_PACKET_5_0_1); |
| |
| bind(L_2TAG_PACKET_6_0_1); |
| cmpq(r9, 0); |
| jcc(Assembler::equal, L_2TAG_PACKET_7_0_1); |
| |
| bind(L_2TAG_PACKET_8_0_1); |
| bsrq(r11, r9); |
| movl(ecx, 29); |
| subl(ecx, r11); |
| jcc(Assembler::lessEqual, L_2TAG_PACKET_9_0_1); |
| shlq(r9); |
| movq(rax, r10); |
| shlq(r10); |
| addl(edx, ecx); |
| negl(ecx); |
| addl(ecx, 64); |
| shrq(rax); |
| shrq(r8); |
| orq(r9, rax); |
| orq(r10, r8); |
| |
| bind(L_2TAG_PACKET_10_0_1); |
| cvtsi2sdq(xmm0, r9); |
| shrq(r10, 1); |
| cvtsi2sdq(xmm3, r10); |
| xorpd(xmm4, xmm4); |
| shll(edx, 4); |
| negl(edx); |
| addl(edx, 16368); |
| orl(edx, rsi); |
| xorl(edx, rbx); |
| pinsrw(xmm4, edx, 3); |
| movq(xmm2, ExternalAddress(PI_4)); //0x40000000UL, 0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL |
| movq(xmm6, ExternalAddress(8 + PI_4)); //0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL |
| xorpd(xmm5, xmm5); |
| subl(edx, 1008); |
| pinsrw(xmm5, edx, 3); |
| mulsd(xmm0, xmm4); |
| shll(rsi, 16); |
| sarl(rsi, 31); |
| mulsd(xmm3, xmm5); |
| movdqu(xmm1, xmm0); |
| mulsd(xmm0, xmm2); |
| shrl(rdi, 29); |
| addsd(xmm1, xmm3); |
| mulsd(xmm3, xmm2); |
| addl(rdi, rsi); |
| xorl(rdi, rsi); |
| mulsd(xmm6, xmm1); |
| movl(eax, rdi); |
| addsd(xmm6, xmm3); |
| movdqu(xmm2, xmm0); |
| addsd(xmm0, xmm6); |
| subsd(xmm2, xmm0); |
| addsd(xmm6, xmm2); |
| |
| bind(L_2TAG_PACKET_11_0_1); |
| movq(xmm1, ExternalAddress(PI32INV)); //0x6dc9c883UL, 0x40245f30UL |
| mulsd(xmm1, xmm0); |
| movq(xmm5, ExternalAddress(ONEHALF)); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL |
| movq(xmm4, ExternalAddress(SIGN_MASK)); //0x00000000UL, 0x80000000UL |
| pand(xmm4, xmm0); |
| por(xmm5, xmm4); |
| addpd(xmm1, xmm5); |
| cvttsd2siq(rdx, xmm1); |
| cvtsi2sdq(xmm1, rdx); |
| movq(xmm3, ExternalAddress(P_1)); //0x54400000UL, 0x3fb921fbUL |
| movdqu(xmm2, ExternalAddress(P_2)); //0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL |
| mulsd(xmm3, xmm1); |
| unpcklpd(xmm1, xmm1); |
| shll(eax, 3); |
| addl(edx, 1865232); |
| movdqu(xmm4, xmm0); |
| addl(edx, eax); |
| andl(edx, 63); |
| movdqu(xmm5, ExternalAddress(SC_4)); //0xa556c734UL, 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL |
| lea(rax, ExternalAddress(Ctable)); |
| shll(edx, 5); |
| addq(rax, rdx); |
| mulpd(xmm2, xmm1); |
| subsd(xmm0, xmm3); |
| mulsd(xmm1, ExternalAddress(P_3)); //0x2e037073UL, 0x3b63198aUL |
| subsd(xmm4, xmm3); |
| movq(xmm7, Address(rax, 8)); |
| unpcklpd(xmm0, xmm0); |
| movdqu(xmm3, xmm4); |
| subsd(xmm4, xmm2); |
| mulpd(xmm5, xmm0); |
| subpd(xmm0, xmm2); |
| mulsd(xmm7, xmm4); |
| subsd(xmm3, xmm4); |
| mulpd(xmm5, xmm0); |
| mulpd(xmm0, xmm0); |
| subsd(xmm3, xmm2); |
| movdqu(xmm2, Address(rax, 0)); |
| subsd(xmm1, xmm3); |
| movq(xmm3, Address(rax, 24)); |
| addsd(xmm2, xmm3); |
| subsd(xmm7, xmm2); |
| subsd(xmm1, xmm6); |
| movdqu(xmm6, ExternalAddress(SC_2)); //0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL |
| mulsd(xmm2, xmm4); |
| mulpd(xmm6, xmm0); |
| mulsd(xmm3, xmm4); |
| mulpd(xmm2, xmm0); |
| mulpd(xmm0, xmm0); |
| addpd(xmm5, ExternalAddress(SC_3)); //0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL |
| mulsd(xmm4, Address(rax, 0)); |
| addpd(xmm6, ExternalAddress(SC_1)); //0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL |
| mulpd(xmm5, xmm0); |
| movdqu(xmm0, xmm3); |
| addsd(xmm3, Address(rax, 8)); |
| mulpd(xmm1, xmm7); |
| movdqu(xmm7, xmm4); |
| addsd(xmm4, xmm3); |
| addpd(xmm6, xmm5); |
| movq(xmm5, Address(rax, 8)); |
| subsd(xmm5, xmm3); |
| subsd(xmm3, xmm4); |
| addsd(xmm1, Address(rax, 16)); |
| mulpd(xmm6, xmm2); |
| addsd(xmm5, xmm0); |
| addsd(xmm3, xmm7); |
| addsd(xmm1, xmm5); |
| addsd(xmm1, xmm3); |
| addsd(xmm1, xmm6); |
| unpckhpd(xmm6, xmm6); |
| movdqu(xmm0, xmm4); |
| addsd(xmm1, xmm6); |
| addsd(xmm0, xmm1); |
| jmp(B1_4); |
| |
| bind(L_2TAG_PACKET_7_0_1); |
| addl(edx, 64); |
| movq(r9, r10); |
| movq(r10, r8); |
| movl(r8, 0); |
| cmpq(r9, 0); |
| jcc(Assembler::notEqual, L_2TAG_PACKET_8_0_1); |
| addl(edx, 64); |
| movq(r9, r10); |
| movq(r10, r8); |
| cmpq(r9, 0); |
| jcc(Assembler::notEqual, L_2TAG_PACKET_8_0_1); |
| xorpd(xmm0, xmm0); |
| xorpd(xmm6, xmm6); |
| jmp(L_2TAG_PACKET_11_0_1); |
| |
| bind(L_2TAG_PACKET_9_0_1); |
| jcc(Assembler::equal, L_2TAG_PACKET_10_0_1); |
| negl(ecx); |
| shrq(r10); |
| movq(rax, r9); |
| shrq(r9); |
| subl(edx, ecx); |
| negl(ecx); |
| addl(ecx, 64); |
| shlq(rax); |
| orq(r10, rax); |
| jmp(L_2TAG_PACKET_10_0_1); |
| bind(L_2TAG_PACKET_3_0_1); |
| negl(ecx); |
| shlq(r9, 32); |
| orq(r9, r11); |
| shlq(r9); |
| movq(rdi, r9); |
| testl(r9, INT_MIN); |
| jcc(Assembler::notEqual, L_2TAG_PACKET_12_0_1); |
| shrl(r9); |
| movl(rbx, 0); |
| shrq(rdi, 3); |
| jmp(L_2TAG_PACKET_6_0_1); |
| |
| bind(L_2TAG_PACKET_4_0_1); |
| shrl(r9); |
| movl(rbx, 536870912); |
| shrl(rbx); |
| shlq(r9, 32); |
| orq(r9, r11); |
| shlq(rbx, 32); |
| addl(rdi, 536870912); |
| movl(rcx, 0); |
| movl(r11, 0); |
| subq(rcx, r8); |
| sbbq(r11, r10); |
| sbbq(rbx, r9); |
| movq(r8, rcx); |
| movq(r10, r11); |
| movq(r9, rbx); |
| movl(rbx, 32768); |
| jmp(L_2TAG_PACKET_5_0_1); |
| |
| bind(L_2TAG_PACKET_12_0_1); |
| shrl(r9); |
| mov64(rbx, 0x100000000); |
| shrq(rbx); |
| movl(rcx, 0); |
| movl(r11, 0); |
| subq(rcx, r8); |
| sbbq(r11, r10); |
| sbbq(rbx, r9); |
| movq(r8, rcx); |
| movq(r10, r11); |
| movq(r9, rbx); |
| movl(rbx, 32768); |
| shrq(rdi, 3); |
| addl(rdi, 536870912); |
| jmp(L_2TAG_PACKET_6_0_1); |
| |
| bind(L_2TAG_PACKET_2_0_1); |
| movsd(xmm0, Address(rsp, 8)); |
| mulsd(xmm0, ExternalAddress(NEG_ZERO)); //0x00000000UL, 0x80000000UL |
| movq(Address(rsp, 0), xmm0); |
| |
| bind(L_2TAG_PACKET_13_0_1); |
| |
| bind(B1_4); |
| addq(rsp, 16); |
| pop(rbx); |
| } |
| #else |
| // The 32 bit code is at most SSE2 compliant |
| |
| ALIGNED_(16) juint _static_const_table_cos[] = |
| { |
| 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, |
| 0x00000000UL, 0x00000000UL, 0x3ff00000UL, 0x176d6d31UL, 0xbf73b92eUL, |
| 0xbc29b42cUL, 0x3fb917a6UL, 0xe0000000UL, 0xbc3e2718UL, 0x00000000UL, |
| 0x3ff00000UL, 0x011469fbUL, 0xbf93ad06UL, 0x3c69a60bUL, 0x3fc8f8b8UL, |
| 0xc0000000UL, 0xbc626d19UL, 0x00000000UL, 0x3ff00000UL, 0x939d225aUL, |
| 0xbfa60beaUL, 0x2ed59f06UL, 0x3fd29406UL, 0xa0000000UL, 0xbc75d28dUL, |
| 0x00000000UL, 0x3ff00000UL, 0x866b95cfUL, 0xbfb37ca1UL, 0xa6aea963UL, |
| 0x3fd87de2UL, 0xe0000000UL, 0xbc672cedUL, 0x00000000UL, 0x3ff00000UL, |
| 0x73fa1279UL, 0xbfbe3a68UL, 0x3806f63bUL, 0x3fde2b5dUL, 0x20000000UL, |
| 0x3c5e0d89UL, 0x00000000UL, 0x3ff00000UL, 0x5bc57974UL, 0xbfc59267UL, |
| 0x39ae68c8UL, 0x3fe1c73bUL, 0x20000000UL, 0x3c8b25ddUL, 0x00000000UL, |
| 0x3ff00000UL, 0x53aba2fdUL, 0xbfcd0dfeUL, 0x25091dd6UL, 0x3fe44cf3UL, |
| 0x20000000UL, 0x3c68076aUL, 0x00000000UL, 0x3ff00000UL, 0x99fcef32UL, |
| 0x3fca8279UL, 0x667f3bcdUL, 0x3fe6a09eUL, 0x20000000UL, 0xbc8bdd34UL, |
| 0x00000000UL, 0x3fe00000UL, 0x94247758UL, 0x3fc133ccUL, 0x6b151741UL, |
| 0x3fe8bc80UL, 0x20000000UL, 0xbc82c5e1UL, 0x00000000UL, 0x3fe00000UL, |
| 0x9ae68c87UL, 0x3fac73b3UL, 0x290ea1a3UL, 0x3fea9b66UL, 0xe0000000UL, |
| 0x3c39f630UL, 0x00000000UL, 0x3fe00000UL, 0x7f909c4eUL, 0xbf9d4a2cUL, |
| 0xf180bdb1UL, 0x3fec38b2UL, 0x80000000UL, 0xbc76e0b1UL, 0x00000000UL, |
| 0x3fe00000UL, 0x65455a75UL, 0xbfbe0875UL, 0xcf328d46UL, 0x3fed906bUL, |
| 0x20000000UL, 0x3c7457e6UL, 0x00000000UL, 0x3fe00000UL, 0x76acf82dUL, |
| 0x3fa4a031UL, 0x56c62ddaUL, 0x3fee9f41UL, 0xe0000000UL, 0x3c8760b1UL, |
| 0x00000000UL, 0x3fd00000UL, 0x0e5967d5UL, 0xbfac1d1fUL, 0xcff75cb0UL, |
| 0x3fef6297UL, 0x20000000UL, 0x3c756217UL, 0x00000000UL, 0x3fd00000UL, |
| 0x0f592f50UL, 0xbf9ba165UL, 0xa3d12526UL, 0x3fefd88dUL, 0x40000000UL, |
| 0xbc887df6UL, 0x00000000UL, 0x3fc00000UL, 0x00000000UL, 0x00000000UL, |
| 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, |
| 0x00000000UL, 0x0f592f50UL, 0x3f9ba165UL, 0xa3d12526UL, 0x3fefd88dUL, |
| 0x40000000UL, 0xbc887df6UL, 0x00000000UL, 0xbfc00000UL, 0x0e5967d5UL, |
| 0x3fac1d1fUL, 0xcff75cb0UL, 0x3fef6297UL, 0x20000000UL, 0x3c756217UL, |
| 0x00000000UL, 0xbfd00000UL, 0x76acf82dUL, 0xbfa4a031UL, 0x56c62ddaUL, |
| 0x3fee9f41UL, 0xe0000000UL, 0x3c8760b1UL, 0x00000000UL, 0xbfd00000UL, |
| 0x65455a75UL, 0x3fbe0875UL, 0xcf328d46UL, 0x3fed906bUL, 0x20000000UL, |
| 0x3c7457e6UL, 0x00000000UL, 0xbfe00000UL, 0x7f909c4eUL, 0x3f9d4a2cUL, |
| 0xf180bdb1UL, 0x3fec38b2UL, 0x80000000UL, 0xbc76e0b1UL, 0x00000000UL, |
| 0xbfe00000UL, 0x9ae68c87UL, 0xbfac73b3UL, 0x290ea1a3UL, 0x3fea9b66UL, |
| 0xe0000000UL, 0x3c39f630UL, 0x00000000UL, 0xbfe00000UL, 0x94247758UL, |
| 0xbfc133ccUL, 0x6b151741UL, 0x3fe8bc80UL, 0x20000000UL, 0xbc82c5e1UL, |
| 0x00000000UL, 0xbfe00000UL, 0x99fcef32UL, 0xbfca8279UL, 0x667f3bcdUL, |
| 0x3fe6a09eUL, 0x20000000UL, 0xbc8bdd34UL, 0x00000000UL, 0xbfe00000UL, |
| 0x53aba2fdUL, 0x3fcd0dfeUL, 0x25091dd6UL, 0x3fe44cf3UL, 0x20000000UL, |
| 0x3c68076aUL, 0x00000000UL, 0xbff00000UL, 0x5bc57974UL, 0x3fc59267UL, |
| 0x39ae68c8UL, 0x3fe1c73bUL, 0x20000000UL, 0x3c8b25ddUL, 0x00000000UL, |
| 0xbff00000UL, 0x73fa1279UL, 0x3fbe3a68UL, 0x3806f63bUL, 0x3fde2b5dUL, |
| 0x20000000UL, 0x3c5e0d89UL, 0x00000000UL, 0xbff00000UL, 0x866b95cfUL, |
| 0x3fb37ca1UL, 0xa6aea963UL, 0x3fd87de2UL, 0xe0000000UL, 0xbc672cedUL, |
| 0x00000000UL, 0xbff00000UL, 0x939d225aUL, 0x3fa60beaUL, 0x2ed59f06UL, |
| 0x3fd29406UL, 0xa0000000UL, 0xbc75d28dUL, 0x00000000UL, 0xbff00000UL, |
| 0x011469fbUL, 0x3f93ad06UL, 0x3c69a60bUL, 0x3fc8f8b8UL, 0xc0000000UL, |
| 0xbc626d19UL, 0x00000000UL, 0xbff00000UL, 0x176d6d31UL, 0x3f73b92eUL, |
| 0xbc29b42cUL, 0x3fb917a6UL, 0xe0000000UL, 0xbc3e2718UL, 0x00000000UL, |
| 0xbff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, |
| 0x00000000UL, 0x00000000UL, 0x00000000UL, 0xbff00000UL, 0x176d6d31UL, |
| 0x3f73b92eUL, 0xbc29b42cUL, 0xbfb917a6UL, 0xe0000000UL, 0x3c3e2718UL, |
| 0x00000000UL, 0xbff00000UL, 0x011469fbUL, 0x3f93ad06UL, 0x3c69a60bUL, |
| 0xbfc8f8b8UL, 0xc0000000UL, 0x3c626d19UL, 0x00000000UL, 0xbff00000UL, |
| 0x939d225aUL, 0x3fa60beaUL, 0x2ed59f06UL, 0xbfd29406UL, 0xa0000000UL, |
| 0x3c75d28dUL, 0x00000000UL, 0xbff00000UL, 0x866b95cfUL, 0x3fb37ca1UL, |
| 0xa6aea963UL, 0xbfd87de2UL, 0xe0000000UL, 0x3c672cedUL, 0x00000000UL, |
| 0xbff00000UL, 0x73fa1279UL, 0x3fbe3a68UL, 0x3806f63bUL, 0xbfde2b5dUL, |
| 0x20000000UL, 0xbc5e0d89UL, 0x00000000UL, 0xbff00000UL, 0x5bc57974UL, |
| 0x3fc59267UL, 0x39ae68c8UL, 0xbfe1c73bUL, 0x20000000UL, 0xbc8b25ddUL, |
| 0x00000000UL, 0xbff00000UL, 0x53aba2fdUL, 0x3fcd0dfeUL, 0x25091dd6UL, |
| 0xbfe44cf3UL, 0x20000000UL, 0xbc68076aUL, 0x00000000UL, 0xbff00000UL, |
| 0x99fcef32UL, 0xbfca8279UL, 0x667f3bcdUL, 0xbfe6a09eUL, 0x20000000UL, |
| 0x3c8bdd34UL, 0x00000000UL, 0xbfe00000UL, 0x94247758UL, 0xbfc133ccUL, |
| 0x6b151741UL, 0xbfe8bc80UL, 0x20000000UL, 0x3c82c5e1UL, 0x00000000UL, |
| 0xbfe00000UL, 0x9ae68c87UL, 0xbfac73b3UL, 0x290ea1a3UL, 0xbfea9b66UL, |
| 0xe0000000UL, 0xbc39f630UL, 0x00000000UL, 0xbfe00000UL, 0x7f909c4eUL, |
| 0x3f9d4a2cUL, 0xf180bdb1UL, 0xbfec38b2UL, 0x80000000UL, 0x3c76e0b1UL, |
| 0x00000000UL, 0xbfe00000UL, 0x65455a75UL, 0x3fbe0875UL, 0xcf328d46UL, |
| 0xbfed906bUL, 0x20000000UL, 0xbc7457e6UL, 0x00000000UL, 0xbfe00000UL, |
| 0x76acf82dUL, 0xbfa4a031UL, 0x56c62ddaUL, 0xbfee9f41UL, 0xe0000000UL, |
| 0xbc8760b1UL, 0x00000000UL, 0xbfd00000UL, 0x0e5967d5UL, 0x3fac1d1fUL, |
| 0xcff75cb0UL, 0xbfef6297UL, 0x20000000UL, 0xbc756217UL, 0x00000000UL, |
| 0xbfd00000UL, 0x0f592f50UL, 0x3f9ba165UL, 0xa3d12526UL, 0xbfefd88dUL, |
| 0x40000000UL, 0x3c887df6UL, 0x00000000UL, 0xbfc00000UL, 0x00000000UL, |
| 0x00000000UL, 0x00000000UL, 0xbff00000UL, 0x00000000UL, 0x00000000UL, |
| 0x00000000UL, 0x00000000UL, 0x0f592f50UL, 0xbf9ba165UL, 0xa3d12526UL, |
| 0xbfefd88dUL, 0x40000000UL, 0x3c887df6UL, 0x00000000UL, 0x3fc00000UL, |
| 0x0e5967d5UL, 0xbfac1d1fUL, 0xcff75cb0UL, 0xbfef6297UL, 0x20000000UL, |
| 0xbc756217UL, 0x00000000UL, 0x3fd00000UL, 0x76acf82dUL, 0x3fa4a031UL, |
| 0x56c62ddaUL, 0xbfee9f41UL, 0xe0000000UL, 0xbc8760b1UL, 0x00000000UL, |
| 0x3fd00000UL, 0x65455a75UL, 0xbfbe0875UL, 0xcf328d46UL, 0xbfed906bUL, |
| 0x20000000UL, 0xbc7457e6UL, 0x00000000UL, 0x3fe00000UL, 0x7f909c4eUL, |
| 0xbf9d4a2cUL, 0xf180bdb1UL, 0xbfec38b2UL, 0x80000000UL, 0x3c76e0b1UL, |
| 0x00000000UL, 0x3fe00000UL, 0x9ae68c87UL, 0x3fac73b3UL, 0x290ea1a3UL, |
| 0xbfea9b66UL, 0xe0000000UL, 0xbc39f630UL, 0x00000000UL, 0x3fe00000UL, |
| 0x94247758UL, 0x3fc133ccUL, 0x6b151741UL, 0xbfe8bc80UL, 0x20000000UL, |
| 0x3c82c5e1UL, 0x00000000UL, 0x3fe00000UL, 0x99fcef32UL, 0x3fca8279UL, |
| 0x667f3bcdUL, 0xbfe6a09eUL, 0x20000000UL, 0x3c8bdd34UL, 0x00000000UL, |
| 0x3fe00000UL, 0x53aba2fdUL, 0xbfcd0dfeUL, 0x25091dd6UL, 0xbfe44cf3UL, |
| 0x20000000UL, 0xbc68076aUL, 0x00000000UL, 0x3ff00000UL, 0x5bc57974UL, |
| 0xbfc59267UL, 0x39ae68c8UL, 0xbfe1c73bUL, 0x20000000UL, 0xbc8b25ddUL, |
| 0x00000000UL, 0x3ff00000UL, 0x73fa1279UL, 0xbfbe3a68UL, 0x3806f63bUL, |
| 0xbfde2b5dUL, 0x20000000UL, 0xbc5e0d89UL, 0x00000000UL, 0x3ff00000UL, |
| 0x866b95cfUL, 0xbfb37ca1UL, 0xa6aea963UL, 0xbfd87de2UL, 0xe0000000UL, |
| 0x3c672cedUL, 0x00000000UL, 0x3ff00000UL, 0x939d225aUL, 0xbfa60beaUL, |
| 0x2ed59f06UL, 0xbfd29406UL, 0xa0000000UL, 0x3c75d28dUL, 0x00000000UL, |
| 0x3ff00000UL, 0x011469fbUL, 0xbf93ad06UL, 0x3c69a60bUL, 0xbfc8f8b8UL, |
| 0xc0000000UL, 0x3c626d19UL, 0x00000000UL, 0x3ff00000UL, 0x176d6d31UL, |
| 0xbf73b92eUL, 0xbc29b42cUL, 0xbfb917a6UL, 0xe0000000UL, 0x3c3e2718UL, |
| 0x00000000UL, 0x3ff00000UL, 0x55555555UL, 0xbfc55555UL, 0x00000000UL, |
| 0xbfe00000UL, 0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL, |
| 0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL, 0xa556c734UL, |
| 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL, 0x1a600000UL, 0x3d90b461UL, |
| 0x1a600000UL, 0x3d90b461UL, 0x54400000UL, 0x3fb921fbUL, 0x00000000UL, |
| 0x00000000UL, 0x2e037073UL, 0x3b63198aUL, 0x00000000UL, 0x00000000UL, |
| 0x6dc9c883UL, 0x40245f30UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, |
| 0x43380000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x3ff00000UL, |
| 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x80000000UL, 0x00000000UL, |
| 0x00000000UL, 0x00000000UL, 0x80000000UL, 0x00000000UL, 0x00000000UL, |
| 0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL |
| }; |
| //registers, |
| // input: (rbp + 8) |
| // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 |
| // rax, rdx, rcx, rbx (tmp) |
| |
| // Code generated by Intel C compiler for LIBM library |
| |
| void MacroAssembler::fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) { |
| Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; |
| Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; |
| Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2; |
| Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start; |
| |
| assert_different_registers(tmp, eax, ecx, edx); |
| |
| address static_const_table_cos = (address)_static_const_table_cos; |
| |
| bind(start); |
| subl(rsp, 120); |
| movl(Address(rsp, 56), tmp); |
| lea(tmp, ExternalAddress(static_const_table_cos)); |
| movsd(xmm0, Address(rsp, 128)); |
| pextrw(eax, xmm0, 3); |
| andl(eax, 32767); |
| subl(eax, 12336); |
| cmpl(eax, 4293); |
| jcc(Assembler::above, L_2TAG_PACKET_0_0_2); |
| movsd(xmm1, Address(tmp, 2160)); |
| mulsd(xmm1, xmm0); |
| movdqu(xmm5, Address(tmp, 2240)); |
| movsd(xmm4, Address(tmp, 2224)); |
| pand(xmm4, xmm0); |
| por(xmm5, xmm4); |
| movsd(xmm3, Address(tmp, 2128)); |
| movdqu(xmm2, Address(tmp, 2112)); |
| addpd(xmm1, xmm5); |
| cvttsd2sil(edx, xmm1); |
| cvtsi2sdl(xmm1, edx); |
| mulsd(xmm3, xmm1); |
| unpcklpd(xmm1, xmm1); |
| addl(edx, 1865232); |
| movdqu(xmm4, xmm0); |
| andl(edx, 63); |
| movdqu(xmm5, Address(tmp, 2096)); |
| lea(eax, Address(tmp, 0)); |
| shll(edx, 5); |
| addl(eax, edx); |
| mulpd(xmm2, xmm1); |
| subsd(xmm0, xmm3); |
| mulsd(xmm1, Address(tmp, 2144)); |
| subsd(xmm4, xmm3); |
| movsd(xmm7, Address(eax, 8)); |
| unpcklpd(xmm0, xmm0); |
| movapd(xmm3, xmm4); |
| subsd(xmm4, xmm2); |
| mulpd(xmm5, xmm0); |
| subpd(xmm0, xmm2); |
| movdqu(xmm6, Address(tmp, 2064)); |
| mulsd(xmm7, xmm4); |
| subsd(xmm3, xmm4); |
| mulpd(xmm5, xmm0); |
| mulpd(xmm0, xmm0); |
| subsd(xmm3, xmm2); |
| movdqu(xmm2, Address(eax, 0)); |
| subsd(xmm1, xmm3); |
| movsd(xmm3, Address(eax, 24)); |
| addsd(xmm2, xmm3); |
| subsd(xmm7, xmm2); |
| mulsd(xmm2, xmm4); |
| mulpd(xmm6, xmm0); |
| mulsd(xmm3, xmm4); |
| mulpd(xmm2, xmm0); |
| mulpd(xmm0, xmm0); |
| addpd(xmm5, Address(tmp, 2080)); |
| mulsd(xmm4, Address(eax, 0)); |
| addpd(xmm6, Address(tmp, 2048)); |
| mulpd(xmm5, xmm0); |
| movapd(xmm0, xmm3); |
| addsd(xmm3, Address(eax, 8)); |
| mulpd(xmm1, xmm7); |
| movapd(xmm7, xmm4); |
| addsd(xmm4, xmm3); |
| addpd(xmm6, xmm5); |
| movsd(xmm5, Address(eax, 8)); |
| subsd(xmm5, xmm3); |
| subsd(xmm3, xmm4); |
| addsd(xmm1, Address(eax, 16)); |
| mulpd(xmm6, xmm2); |
| addsd(xmm5, xmm0); |
| addsd(xmm3, xmm7); |
| addsd(xmm1, xmm5); |
| addsd(xmm1, xmm3); |
| addsd(xmm1, xmm6); |
| unpckhpd(xmm6, xmm6); |
| addsd(xmm1, xmm6); |
| addsd(xmm4, xmm1); |
| movsd(Address(rsp, 0), xmm4); |
| fld_d(Address(rsp, 0)); |
| jmp(L_2TAG_PACKET_1_0_2); |
| |
| bind(L_2TAG_PACKET_0_0_2); |
| jcc(Assembler::greater, L_2TAG_PACKET_2_0_2); |
| pextrw(eax, xmm0, 3); |
| andl(eax, 32767); |
| pinsrw(xmm0, eax, 3); |
| movsd(xmm1, Address(tmp, 2192)); |
| subsd(xmm1, xmm0); |
| movsd(Address(rsp, 0), xmm1); |
| fld_d(Address(rsp, 0)); |
| jmp(L_2TAG_PACKET_1_0_2); |
| |
| bind(L_2TAG_PACKET_2_0_2); |
| movl(eax, Address(rsp, 132)); |
| andl(eax, 2146435072); |
| cmpl(eax, 2146435072); |
| jcc(Assembler::equal, L_2TAG_PACKET_3_0_2); |
| subl(rsp, 32); |
| movsd(Address(rsp, 0), xmm0); |
| lea(eax, Address(rsp, 40)); |
| movl(Address(rsp, 8), eax); |
| movl(eax, 1); |
| movl(Address(rsp, 12), eax); |
| call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlibm_sin_cos_huge()))); |
| addl(rsp, 32); |
| fld_d(Address(rsp, 8)); |
| jmp(L_2TAG_PACKET_1_0_2); |
| |
| bind(L_2TAG_PACKET_3_0_2); |
| fld_d(Address(rsp, 128)); |
| fmul_d(Address(tmp, 2208)); |
| |
| bind(L_2TAG_PACKET_1_0_2); |
| movl(tmp, Address(rsp, 56)); |
| } |
| #endif |