| /* |
| * Copyright (c) 2008, 2017, Oracle and/or its affiliates. All rights reserved. |
| * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| * |
| * This code is free software; you can redistribute it and/or modify it |
| * under the terms of the GNU General Public License version 2 only, as |
| * published by the Free Software Foundation. |
| * |
| * This code is distributed in the hope that it will be useful, but WITHOUT |
| * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| * version 2 for more details (a copy is included in the LICENSE file that |
| * accompanied this code). |
| * |
| * You should have received a copy of the GNU General Public License version |
| * 2 along with this work; if not, write to the Free Software Foundation, |
| * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| * |
| * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| * or visit www.oracle.com if you need additional information or have any |
| * questions. |
| * |
| */ |
| |
| #include "precompiled.hpp" |
| #include "asm/assembler.hpp" |
| #include "assembler_arm.inline.hpp" |
| #include "code/debugInfoRec.hpp" |
| #include "code/icBuffer.hpp" |
| #include "code/vtableStubs.hpp" |
| #include "interpreter/interpreter.hpp" |
| #include "logging/log.hpp" |
| #include "memory/resourceArea.hpp" |
| #include "oops/compiledICHolder.hpp" |
| #include "runtime/sharedRuntime.hpp" |
| #include "runtime/vframeArray.hpp" |
| #include "utilities/align.hpp" |
| #include "vmreg_arm.inline.hpp" |
| #ifdef COMPILER1 |
| #include "c1/c1_Runtime1.hpp" |
| #endif |
| #ifdef COMPILER2 |
| #include "opto/runtime.hpp" |
| #endif |
| #ifdef SHARK |
| #include "compiler/compileBroker.hpp" |
| #include "shark/sharkCompiler.hpp" |
| #endif |
| |
| #define __ masm-> |
| |
| class RegisterSaver { |
| public: |
| |
| // Special registers: |
| // 32-bit ARM 64-bit ARM |
| // Rthread: R10 R28 |
| // LR: R14 R30 |
| |
| // Rthread is callee saved in the C ABI and never changed by compiled code: |
| // no need to save it. |
| |
| // 2 slots for LR: the one at LR_offset and an other one at R14/R30_offset. |
| // The one at LR_offset is a return address that is needed by stack walking. |
| // A c2 method uses LR as a standard register so it may be live when we |
| // branch to the runtime. The slot at R14/R30_offset is for the value of LR |
| // in case it's live in the method we are coming from. |
| |
| #ifdef AARCH64 |
| |
| // |
| // On AArch64 registers save area has the following layout: |
| // |
| // |---------------------| |
| // | return address (LR) | |
| // | FP | |
| // |---------------------| |
| // | V31 | |
| // | ... | |
| // | V0 | |
| // |---------------------| |
| // | padding | |
| // | R30 (LR live value) | |
| // |---------------------| |
| // | R27 | |
| // | ... | |
| // | R0 | |
| // |---------------------| <-- SP |
| // |
| |
| enum RegisterLayout { |
| number_of_saved_gprs = 28, |
| number_of_saved_fprs = FloatRegisterImpl::number_of_registers, |
| words_per_fpr = ConcreteRegisterImpl::words_per_fpr, |
| |
| R0_offset = 0, |
| R30_offset = R0_offset + number_of_saved_gprs, |
| D0_offset = R30_offset + 2, |
| FP_offset = D0_offset + number_of_saved_fprs * words_per_fpr, |
| LR_offset = FP_offset + 1, |
| |
| reg_save_size = LR_offset + 1, |
| }; |
| |
| static const int Rmethod_offset; |
| static const int Rtemp_offset; |
| |
| #else |
| |
| enum RegisterLayout { |
| fpu_save_size = FloatRegisterImpl::number_of_registers, |
| #ifndef __SOFTFP__ |
| D0_offset = 0, |
| #endif |
| R0_offset = fpu_save_size, |
| R1_offset, |
| R2_offset, |
| R3_offset, |
| R4_offset, |
| R5_offset, |
| R6_offset, |
| #if (FP_REG_NUM != 7) |
| // if not saved as FP |
| R7_offset, |
| #endif |
| R8_offset, |
| R9_offset, |
| #if (FP_REG_NUM != 11) |
| // if not saved as FP |
| R11_offset, |
| #endif |
| R12_offset, |
| R14_offset, |
| FP_offset, |
| LR_offset, |
| reg_save_size, |
| |
| Rmethod_offset = R9_offset, |
| Rtemp_offset = R12_offset, |
| }; |
| |
| // all regs but Rthread (R10), FP (R7 or R11), SP and PC |
| // (altFP_7_11 is the one amoung R7 and R11 which is not FP) |
| #define SAVED_BASE_REGS (RegisterSet(R0, R6) | RegisterSet(R8, R9) | RegisterSet(R12) | R14 | altFP_7_11) |
| |
| #endif // AARCH64 |
| |
| // When LR may be live in the nmethod from which we are comming |
| // then lr_saved is true, the return address is saved before the |
| // call to save_live_register by the caller and LR contains the |
| // live value. |
| |
| static OopMap* save_live_registers(MacroAssembler* masm, |
| int* total_frame_words, |
| bool lr_saved = false); |
| static void restore_live_registers(MacroAssembler* masm, bool restore_lr = true); |
| |
| }; |
| |
| |
| #ifdef AARCH64 |
| const int RegisterSaver::Rmethod_offset = RegisterSaver::R0_offset + Rmethod->encoding(); |
| const int RegisterSaver::Rtemp_offset = RegisterSaver::R0_offset + Rtemp->encoding(); |
| #endif // AARCH64 |
| |
| |
| OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, |
| int* total_frame_words, |
| bool lr_saved) { |
| *total_frame_words = reg_save_size; |
| |
| OopMapSet *oop_maps = new OopMapSet(); |
| OopMap* map = new OopMap(VMRegImpl::slots_per_word * (*total_frame_words), 0); |
| |
| #ifdef AARCH64 |
| assert((reg_save_size * wordSize) % StackAlignmentInBytes == 0, "SP should be aligned"); |
| |
| if (lr_saved) { |
| // LR was stashed here, so that jump could use it as a scratch reg |
| __ ldr(LR, Address(SP, 0)); |
| // There are two words on the stack top: |
| // [SP + 0]: placeholder for FP |
| // [SP + wordSize]: saved return address |
| __ str(FP, Address(SP, 0)); |
| } else { |
| __ raw_push(FP, LR); |
| } |
| |
| __ sub(SP, SP, (reg_save_size - 2) * wordSize); |
| |
| for (int i = 0; i < number_of_saved_gprs; i += 2) { |
| int offset = R0_offset + i; |
| __ stp(as_Register(i), as_Register(i+1), Address(SP, offset * wordSize)); |
| map->set_callee_saved(VMRegImpl::stack2reg((offset + 0) * VMRegImpl::slots_per_word), as_Register(i)->as_VMReg()); |
| map->set_callee_saved(VMRegImpl::stack2reg((offset + 1) * VMRegImpl::slots_per_word), as_Register(i+1)->as_VMReg()); |
| } |
| |
| __ str(R30, Address(SP, R30_offset * wordSize)); |
| map->set_callee_saved(VMRegImpl::stack2reg(R30_offset * VMRegImpl::slots_per_word), R30->as_VMReg()); |
| |
| for (int i = 0; i < number_of_saved_fprs; i += 2) { |
| int offset1 = D0_offset + i * words_per_fpr; |
| int offset2 = offset1 + words_per_fpr; |
| Address base(SP, offset1 * wordSize); |
| if (words_per_fpr == 2) { |
| // pair of "wide" quad vector registers |
| __ stp_q(as_FloatRegister(i), as_FloatRegister(i+1), base); |
| } else { |
| // pair of double vector registers |
| __ stp_d(as_FloatRegister(i), as_FloatRegister(i+1), base); |
| } |
| map->set_callee_saved(VMRegImpl::stack2reg(offset1 * VMRegImpl::slots_per_word), as_FloatRegister(i)->as_VMReg()); |
| map->set_callee_saved(VMRegImpl::stack2reg(offset2 * VMRegImpl::slots_per_word), as_FloatRegister(i+1)->as_VMReg()); |
| } |
| #else |
| if (lr_saved) { |
| __ push(RegisterSet(FP)); |
| } else { |
| __ push(RegisterSet(FP) | RegisterSet(LR)); |
| } |
| __ push(SAVED_BASE_REGS); |
| if (HaveVFP) { |
| if (VM_Version::has_vfp3_32()) { |
| __ fstmdbd(SP, FloatRegisterSet(D16, 16), writeback); |
| } else { |
| if (FloatRegisterImpl::number_of_registers > 32) { |
| assert(FloatRegisterImpl::number_of_registers == 64, "nb fp registers should be 64"); |
| __ sub(SP, SP, 32 * wordSize); |
| } |
| } |
| __ fstmdbd(SP, FloatRegisterSet(D0, 16), writeback); |
| } else { |
| __ sub(SP, SP, fpu_save_size * wordSize); |
| } |
| |
| int i; |
| int j=0; |
| for (i = R0_offset; i <= R9_offset; i++) { |
| if (j == FP_REG_NUM) { |
| // skip the FP register, managed below. |
| j++; |
| } |
| map->set_callee_saved(VMRegImpl::stack2reg(i), as_Register(j)->as_VMReg()); |
| j++; |
| } |
| assert(j == R10->encoding(), "must be"); |
| #if (FP_REG_NUM != 11) |
| // add R11, if not managed as FP |
| map->set_callee_saved(VMRegImpl::stack2reg(R11_offset), R11->as_VMReg()); |
| #endif |
| map->set_callee_saved(VMRegImpl::stack2reg(R12_offset), R12->as_VMReg()); |
| map->set_callee_saved(VMRegImpl::stack2reg(R14_offset), R14->as_VMReg()); |
| if (HaveVFP) { |
| for (i = 0; i < (VM_Version::has_vfp3_32() ? 64 : 32); i+=2) { |
| map->set_callee_saved(VMRegImpl::stack2reg(i), as_FloatRegister(i)->as_VMReg()); |
| map->set_callee_saved(VMRegImpl::stack2reg(i + 1), as_FloatRegister(i)->as_VMReg()->next()); |
| } |
| } |
| #endif // AARCH64 |
| |
| return map; |
| } |
| |
| void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_lr) { |
| #ifdef AARCH64 |
| for (int i = 0; i < number_of_saved_gprs; i += 2) { |
| __ ldp(as_Register(i), as_Register(i+1), Address(SP, (R0_offset + i) * wordSize)); |
| } |
| |
| __ ldr(R30, Address(SP, R30_offset * wordSize)); |
| |
| for (int i = 0; i < number_of_saved_fprs; i += 2) { |
| Address base(SP, (D0_offset + i * words_per_fpr) * wordSize); |
| if (words_per_fpr == 2) { |
| // pair of "wide" quad vector registers |
| __ ldp_q(as_FloatRegister(i), as_FloatRegister(i+1), base); |
| } else { |
| // pair of double vector registers |
| __ ldp_d(as_FloatRegister(i), as_FloatRegister(i+1), base); |
| } |
| } |
| |
| __ add(SP, SP, (reg_save_size - 2) * wordSize); |
| |
| if (restore_lr) { |
| __ raw_pop(FP, LR); |
| } else { |
| __ ldr(FP, Address(SP, 0)); |
| } |
| #else |
| if (HaveVFP) { |
| __ fldmiad(SP, FloatRegisterSet(D0, 16), writeback); |
| if (VM_Version::has_vfp3_32()) { |
| __ fldmiad(SP, FloatRegisterSet(D16, 16), writeback); |
| } else { |
| if (FloatRegisterImpl::number_of_registers > 32) { |
| assert(FloatRegisterImpl::number_of_registers == 64, "nb fp registers should be 64"); |
| __ add(SP, SP, 32 * wordSize); |
| } |
| } |
| } else { |
| __ add(SP, SP, fpu_save_size * wordSize); |
| } |
| __ pop(SAVED_BASE_REGS); |
| if (restore_lr) { |
| __ pop(RegisterSet(FP) | RegisterSet(LR)); |
| } else { |
| __ pop(RegisterSet(FP)); |
| } |
| #endif // AARCH64 |
| } |
| |
| #ifdef AARCH64 |
| |
| static void push_result_registers(MacroAssembler* masm, BasicType ret_type) { |
| if (ret_type == T_DOUBLE || ret_type == T_FLOAT) { |
| __ str_d(D0, Address(SP, -2*wordSize, pre_indexed)); |
| } else { |
| __ raw_push(R0, ZR); |
| } |
| } |
| |
| static void pop_result_registers(MacroAssembler* masm, BasicType ret_type) { |
| if (ret_type == T_DOUBLE || ret_type == T_FLOAT) { |
| __ ldr_d(D0, Address(SP, 2*wordSize, post_indexed)); |
| } else { |
| __ raw_pop(R0, ZR); |
| } |
| } |
| |
| static void push_param_registers(MacroAssembler* masm, int fp_regs_in_arguments) { |
| __ raw_push(R0, R1); |
| __ raw_push(R2, R3); |
| __ raw_push(R4, R5); |
| __ raw_push(R6, R7); |
| |
| assert(FPR_PARAMS == 8, "adjust this code"); |
| assert((0 <= fp_regs_in_arguments) && (fp_regs_in_arguments <= FPR_PARAMS), "should be"); |
| |
| if (fp_regs_in_arguments > 6) __ stp_d(V6, V7, Address(SP, -2 * wordSize, pre_indexed)); |
| if (fp_regs_in_arguments > 4) __ stp_d(V4, V5, Address(SP, -2 * wordSize, pre_indexed)); |
| if (fp_regs_in_arguments > 2) __ stp_d(V2, V3, Address(SP, -2 * wordSize, pre_indexed)); |
| if (fp_regs_in_arguments > 0) __ stp_d(V0, V1, Address(SP, -2 * wordSize, pre_indexed)); |
| } |
| |
| static void pop_param_registers(MacroAssembler* masm, int fp_regs_in_arguments) { |
| assert(FPR_PARAMS == 8, "adjust this code"); |
| assert((0 <= fp_regs_in_arguments) && (fp_regs_in_arguments <= FPR_PARAMS), "should be"); |
| |
| if (fp_regs_in_arguments > 0) __ ldp_d(V0, V1, Address(SP, 2 * wordSize, post_indexed)); |
| if (fp_regs_in_arguments > 2) __ ldp_d(V2, V3, Address(SP, 2 * wordSize, post_indexed)); |
| if (fp_regs_in_arguments > 4) __ ldp_d(V4, V5, Address(SP, 2 * wordSize, post_indexed)); |
| if (fp_regs_in_arguments > 6) __ ldp_d(V6, V7, Address(SP, 2 * wordSize, post_indexed)); |
| |
| __ raw_pop(R6, R7); |
| __ raw_pop(R4, R5); |
| __ raw_pop(R2, R3); |
| __ raw_pop(R0, R1); |
| } |
| |
| #else // AARCH64 |
| |
| static void push_result_registers(MacroAssembler* masm, BasicType ret_type) { |
| #ifdef __ABI_HARD__ |
| if (ret_type == T_DOUBLE || ret_type == T_FLOAT) { |
| __ sub(SP, SP, 8); |
| __ fstd(D0, Address(SP)); |
| return; |
| } |
| #endif // __ABI_HARD__ |
| __ raw_push(R0, R1); |
| } |
| |
| static void pop_result_registers(MacroAssembler* masm, BasicType ret_type) { |
| #ifdef __ABI_HARD__ |
| if (ret_type == T_DOUBLE || ret_type == T_FLOAT) { |
| __ fldd(D0, Address(SP)); |
| __ add(SP, SP, 8); |
| return; |
| } |
| #endif // __ABI_HARD__ |
| __ raw_pop(R0, R1); |
| } |
| |
| static void push_param_registers(MacroAssembler* masm, int fp_regs_in_arguments) { |
| // R1-R3 arguments need to be saved, but we push 4 registers for 8-byte alignment |
| __ push(RegisterSet(R0, R3)); |
| |
| #ifdef __ABI_HARD__ |
| // preserve arguments |
| // Likely not needed as the locking code won't probably modify volatile FP registers, |
| // but there is no way to guarantee that |
| if (fp_regs_in_arguments) { |
| // convert fp_regs_in_arguments to a number of double registers |
| int double_regs_num = (fp_regs_in_arguments + 1) >> 1; |
| __ fstmdbd(SP, FloatRegisterSet(D0, double_regs_num), writeback); |
| } |
| #endif // __ ABI_HARD__ |
| } |
| |
| static void pop_param_registers(MacroAssembler* masm, int fp_regs_in_arguments) { |
| #ifdef __ABI_HARD__ |
| if (fp_regs_in_arguments) { |
| int double_regs_num = (fp_regs_in_arguments + 1) >> 1; |
| __ fldmiad(SP, FloatRegisterSet(D0, double_regs_num), writeback); |
| } |
| #endif // __ABI_HARD__ |
| |
| __ pop(RegisterSet(R0, R3)); |
| } |
| |
| #endif // AARCH64 |
| |
| |
| // Is vector's size (in bytes) bigger than a size saved by default? |
| // All vector registers are saved by default on ARM. |
| bool SharedRuntime::is_wide_vector(int size) { |
| return false; |
| } |
| |
| size_t SharedRuntime::trampoline_size() { |
| return 16; |
| } |
| |
| void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) { |
| InlinedAddress dest(destination); |
| __ indirect_jump(dest, Rtemp); |
| __ bind_literal(dest); |
| } |
| |
| int SharedRuntime::c_calling_convention(const BasicType *sig_bt, |
| VMRegPair *regs, |
| VMRegPair *regs2, |
| int total_args_passed) { |
| assert(regs2 == NULL, "not needed on arm"); |
| #ifdef AARCH64 |
| int slot = 0; // counted in 32-bit VMReg slots |
| int reg = 0; |
| int fp_reg = 0; |
| for (int i = 0; i < total_args_passed; i++) { |
| switch (sig_bt[i]) { |
| case T_SHORT: |
| case T_CHAR: |
| case T_BYTE: |
| case T_BOOLEAN: |
| case T_INT: |
| if (reg < GPR_PARAMS) { |
| Register r = as_Register(reg); |
| regs[i].set1(r->as_VMReg()); |
| reg++; |
| } else { |
| regs[i].set1(VMRegImpl::stack2reg(slot)); |
| slot+=2; |
| } |
| break; |
| case T_LONG: |
| assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "missing Half" ); |
| // fall through |
| case T_ARRAY: |
| case T_OBJECT: |
| case T_ADDRESS: |
| if (reg < GPR_PARAMS) { |
| Register r = as_Register(reg); |
| regs[i].set2(r->as_VMReg()); |
| reg++; |
| } else { |
| regs[i].set2(VMRegImpl::stack2reg(slot)); |
| slot+=2; |
| } |
| break; |
| case T_FLOAT: |
| if (fp_reg < FPR_PARAMS) { |
| FloatRegister r = as_FloatRegister(fp_reg); |
| regs[i].set1(r->as_VMReg()); |
| fp_reg++; |
| } else { |
| regs[i].set1(VMRegImpl::stack2reg(slot)); |
| slot+=2; |
| } |
| break; |
| case T_DOUBLE: |
| assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "missing Half" ); |
| if (fp_reg < FPR_PARAMS) { |
| FloatRegister r = as_FloatRegister(fp_reg); |
| regs[i].set2(r->as_VMReg()); |
| fp_reg++; |
| } else { |
| regs[i].set2(VMRegImpl::stack2reg(slot)); |
| slot+=2; |
| } |
| break; |
| case T_VOID: |
| assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); |
| regs[i].set_bad(); |
| break; |
| default: |
| ShouldNotReachHere(); |
| } |
| } |
| return slot; |
| |
| #else // AARCH64 |
| |
| int slot = 0; |
| int ireg = 0; |
| #ifdef __ABI_HARD__ |
| int fp_slot = 0; |
| int single_fpr_slot = 0; |
| #endif // __ABI_HARD__ |
| for (int i = 0; i < total_args_passed; i++) { |
| switch (sig_bt[i]) { |
| case T_SHORT: |
| case T_CHAR: |
| case T_BYTE: |
| case T_BOOLEAN: |
| case T_INT: |
| case T_ARRAY: |
| case T_OBJECT: |
| case T_ADDRESS: |
| #ifndef __ABI_HARD__ |
| case T_FLOAT: |
| #endif // !__ABI_HARD__ |
| if (ireg < 4) { |
| Register r = as_Register(ireg); |
| regs[i].set1(r->as_VMReg()); |
| ireg++; |
| } else { |
| regs[i].set1(VMRegImpl::stack2reg(slot)); |
| slot++; |
| } |
| break; |
| case T_LONG: |
| #ifndef __ABI_HARD__ |
| case T_DOUBLE: |
| #endif // !__ABI_HARD__ |
| assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "missing Half" ); |
| if (ireg <= 2) { |
| #if (ALIGN_WIDE_ARGUMENTS == 1) |
| if(ireg & 1) ireg++; // Aligned location required |
| #endif |
| Register r1 = as_Register(ireg); |
| Register r2 = as_Register(ireg + 1); |
| regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg()); |
| ireg += 2; |
| #if (ALIGN_WIDE_ARGUMENTS == 0) |
| } else if (ireg == 3) { |
| // uses R3 + one stack slot |
| Register r = as_Register(ireg); |
| regs[i].set_pair(VMRegImpl::stack2reg(slot), r->as_VMReg()); |
| ireg += 1; |
| slot += 1; |
| #endif |
| } else { |
| if (slot & 1) slot++; // Aligned location required |
| regs[i].set_pair(VMRegImpl::stack2reg(slot+1), VMRegImpl::stack2reg(slot)); |
| slot += 2; |
| ireg = 4; |
| } |
| break; |
| case T_VOID: |
| regs[i].set_bad(); |
| break; |
| #ifdef __ABI_HARD__ |
| case T_FLOAT: |
| if ((fp_slot < 16)||(single_fpr_slot & 1)) { |
| if ((single_fpr_slot & 1) == 0) { |
| single_fpr_slot = fp_slot; |
| fp_slot += 2; |
| } |
| FloatRegister r = as_FloatRegister(single_fpr_slot); |
| single_fpr_slot++; |
| regs[i].set1(r->as_VMReg()); |
| } else { |
| regs[i].set1(VMRegImpl::stack2reg(slot)); |
| slot++; |
| } |
| break; |
| case T_DOUBLE: |
| assert(ALIGN_WIDE_ARGUMENTS == 1, "ABI_HARD not supported with unaligned wide arguments"); |
| if (fp_slot <= 14) { |
| FloatRegister r1 = as_FloatRegister(fp_slot); |
| FloatRegister r2 = as_FloatRegister(fp_slot+1); |
| regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg()); |
| fp_slot += 2; |
| } else { |
| if(slot & 1) slot++; |
| regs[i].set_pair(VMRegImpl::stack2reg(slot+1), VMRegImpl::stack2reg(slot)); |
| slot += 2; |
| single_fpr_slot = 16; |
| } |
| break; |
| #endif // __ABI_HARD__ |
| default: |
| ShouldNotReachHere(); |
| } |
| } |
| return slot; |
| #endif // AARCH64 |
| } |
| |
| int SharedRuntime::java_calling_convention(const BasicType *sig_bt, |
| VMRegPair *regs, |
| int total_args_passed, |
| int is_outgoing) { |
| #ifdef AARCH64 |
| // C calling convention on AArch64 is good enough. |
| return c_calling_convention(sig_bt, regs, NULL, total_args_passed); |
| #else |
| #ifdef __SOFTFP__ |
| // soft float is the same as the C calling convention. |
| return c_calling_convention(sig_bt, regs, NULL, total_args_passed); |
| #endif // __SOFTFP__ |
| (void) is_outgoing; |
| int slot = 0; |
| int ireg = 0; |
| int freg = 0; |
| int single_fpr = 0; |
| |
| for (int i = 0; i < total_args_passed; i++) { |
| switch (sig_bt[i]) { |
| case T_SHORT: |
| case T_CHAR: |
| case T_BYTE: |
| case T_BOOLEAN: |
| case T_INT: |
| case T_ARRAY: |
| case T_OBJECT: |
| case T_ADDRESS: |
| if (ireg < 4) { |
| Register r = as_Register(ireg++); |
| regs[i].set1(r->as_VMReg()); |
| } else { |
| regs[i].set1(VMRegImpl::stack2reg(slot++)); |
| } |
| break; |
| case T_FLOAT: |
| // C2 utilizes S14/S15 for mem-mem moves |
| if ((freg < 16 COMPILER2_PRESENT(-2)) || (single_fpr & 1)) { |
| if ((single_fpr & 1) == 0) { |
| single_fpr = freg; |
| freg += 2; |
| } |
| FloatRegister r = as_FloatRegister(single_fpr++); |
| regs[i].set1(r->as_VMReg()); |
| } else { |
| regs[i].set1(VMRegImpl::stack2reg(slot++)); |
| } |
| break; |
| case T_DOUBLE: |
| // C2 utilizes S14/S15 for mem-mem moves |
| if (freg <= 14 COMPILER2_PRESENT(-2)) { |
| FloatRegister r1 = as_FloatRegister(freg); |
| FloatRegister r2 = as_FloatRegister(freg + 1); |
| regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg()); |
| freg += 2; |
| } else { |
| // Keep internally the aligned calling convention, |
| // ignoring ALIGN_WIDE_ARGUMENTS |
| if (slot & 1) slot++; |
| regs[i].set_pair(VMRegImpl::stack2reg(slot + 1), VMRegImpl::stack2reg(slot)); |
| slot += 2; |
| single_fpr = 16; |
| } |
| break; |
| case T_LONG: |
| // Keep internally the aligned calling convention, |
| // ignoring ALIGN_WIDE_ARGUMENTS |
| if (ireg <= 2) { |
| if (ireg & 1) ireg++; |
| Register r1 = as_Register(ireg); |
| Register r2 = as_Register(ireg + 1); |
| regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg()); |
| ireg += 2; |
| } else { |
| if (slot & 1) slot++; |
| regs[i].set_pair(VMRegImpl::stack2reg(slot + 1), VMRegImpl::stack2reg(slot)); |
| slot += 2; |
| ireg = 4; |
| } |
| break; |
| case T_VOID: |
| regs[i].set_bad(); |
| break; |
| default: |
| ShouldNotReachHere(); |
| } |
| } |
| |
| if (slot & 1) slot++; |
| return slot; |
| #endif // AARCH64 |
| } |
| |
| static void patch_callers_callsite(MacroAssembler *masm) { |
| Label skip; |
| |
| __ ldr(Rtemp, Address(Rmethod, Method::code_offset())); |
| __ cbz(Rtemp, skip); |
| |
| #ifdef AARCH64 |
| push_param_registers(masm, FPR_PARAMS); |
| __ raw_push(LR, ZR); |
| #else |
| // Pushing an even number of registers for stack alignment. |
| // Selecting R9, which had to be saved anyway for some platforms. |
| __ push(RegisterSet(R0, R3) | R9 | LR); |
| #endif // AARCH64 |
| |
| __ mov(R0, Rmethod); |
| __ mov(R1, LR); |
| __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)); |
| |
| #ifdef AARCH64 |
| __ raw_pop(LR, ZR); |
| pop_param_registers(masm, FPR_PARAMS); |
| #else |
| __ pop(RegisterSet(R0, R3) | R9 | LR); |
| #endif // AARCH64 |
| |
| __ bind(skip); |
| } |
| |
| void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, |
| int total_args_passed, int comp_args_on_stack, |
| const BasicType *sig_bt, const VMRegPair *regs) { |
| // TODO: ARM - May be can use ldm to load arguments |
| const Register tmp = Rtemp; // avoid erasing R5_mh |
| |
| // Next assert may not be needed but safer. Extra analysis required |
| // if this there is not enough free registers and we need to use R5 here. |
| assert_different_registers(tmp, R5_mh); |
| |
| // 6243940 We might end up in handle_wrong_method if |
| // the callee is deoptimized as we race thru here. If that |
| // happens we don't want to take a safepoint because the |
| // caller frame will look interpreted and arguments are now |
| // "compiled" so it is much better to make this transition |
| // invisible to the stack walking code. Unfortunately if |
| // we try and find the callee by normal means a safepoint |
| // is possible. So we stash the desired callee in the thread |
| // and the vm will find there should this case occur. |
| Address callee_target_addr(Rthread, JavaThread::callee_target_offset()); |
| __ str(Rmethod, callee_target_addr); |
| |
| #ifdef AARCH64 |
| |
| assert_different_registers(tmp, R0, R1, R2, R3, R4, R5, R6, R7, Rsender_sp, Rmethod); |
| assert_different_registers(tmp, R0, R1, R2, R3, R4, R5, R6, R7, Rsender_sp, Rparams); |
| |
| if (comp_args_on_stack) { |
| __ sub_slow(SP, SP, align_up(comp_args_on_stack * VMRegImpl::stack_slot_size, StackAlignmentInBytes)); |
| } |
| |
| for (int i = 0; i < total_args_passed; i++) { |
| if (sig_bt[i] == T_VOID) { |
| assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); |
| continue; |
| } |
| assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "must be ordered"); |
| |
| int expr_slots_count = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ? 2 : 1; |
| Address source_addr(Rparams, Interpreter::expr_offset_in_bytes(total_args_passed - expr_slots_count - i)); |
| |
| VMReg r = regs[i].first(); |
| bool full_word = regs[i].second()->is_valid(); |
| |
| if (r->is_stack()) { |
| if (full_word) { |
| __ ldr(tmp, source_addr); |
| __ str(tmp, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size)); |
| } else { |
| __ ldr_w(tmp, source_addr); |
| __ str_w(tmp, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size)); |
| } |
| } else if (r->is_Register()) { |
| if (full_word) { |
| __ ldr(r->as_Register(), source_addr); |
| } else { |
| __ ldr_w(r->as_Register(), source_addr); |
| } |
| } else if (r->is_FloatRegister()) { |
| if (sig_bt[i] == T_DOUBLE) { |
| __ ldr_d(r->as_FloatRegister(), source_addr); |
| } else { |
| __ ldr_s(r->as_FloatRegister(), source_addr); |
| } |
| } else { |
| assert(!r->is_valid() && !regs[i].second()->is_valid(), "must be"); |
| } |
| } |
| |
| __ ldr(tmp, Address(Rmethod, Method::from_compiled_offset())); |
| __ br(tmp); |
| |
| #else |
| |
| assert_different_registers(tmp, R0, R1, R2, R3, Rsender_sp, Rmethod); |
| |
| const Register initial_sp = Rmethod; // temporarily scratched |
| |
| // Old code was modifying R4 but this looks unsafe (particularly with JSR292) |
| assert_different_registers(tmp, R0, R1, R2, R3, Rsender_sp, initial_sp); |
| |
| __ mov(initial_sp, SP); |
| |
| if (comp_args_on_stack) { |
| __ sub_slow(SP, SP, comp_args_on_stack * VMRegImpl::stack_slot_size); |
| } |
| __ bic(SP, SP, StackAlignmentInBytes - 1); |
| |
| for (int i = 0; i < total_args_passed; i++) { |
| if (sig_bt[i] == T_VOID) { |
| assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); |
| continue; |
| } |
| assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "must be ordered"); |
| int arg_offset = Interpreter::expr_offset_in_bytes(total_args_passed - 1 - i); |
| |
| VMReg r_1 = regs[i].first(); |
| VMReg r_2 = regs[i].second(); |
| if (r_1->is_stack()) { |
| int stack_offset = r_1->reg2stack() * VMRegImpl::stack_slot_size; |
| if (!r_2->is_valid()) { |
| __ ldr(tmp, Address(initial_sp, arg_offset)); |
| __ str(tmp, Address(SP, stack_offset)); |
| } else { |
| __ ldr(tmp, Address(initial_sp, arg_offset - Interpreter::stackElementSize)); |
| __ str(tmp, Address(SP, stack_offset)); |
| __ ldr(tmp, Address(initial_sp, arg_offset)); |
| __ str(tmp, Address(SP, stack_offset + wordSize)); |
| } |
| } else if (r_1->is_Register()) { |
| if (!r_2->is_valid()) { |
| __ ldr(r_1->as_Register(), Address(initial_sp, arg_offset)); |
| } else { |
| __ ldr(r_1->as_Register(), Address(initial_sp, arg_offset - Interpreter::stackElementSize)); |
| __ ldr(r_2->as_Register(), Address(initial_sp, arg_offset)); |
| } |
| } else if (r_1->is_FloatRegister()) { |
| #ifdef __SOFTFP__ |
| ShouldNotReachHere(); |
| #endif // __SOFTFP__ |
| if (!r_2->is_valid()) { |
| __ flds(r_1->as_FloatRegister(), Address(initial_sp, arg_offset)); |
| } else { |
| __ fldd(r_1->as_FloatRegister(), Address(initial_sp, arg_offset - Interpreter::stackElementSize)); |
| } |
| } else { |
| assert(!r_1->is_valid() && !r_2->is_valid(), "must be"); |
| } |
| } |
| |
| // restore Rmethod (scratched for initial_sp) |
| __ ldr(Rmethod, callee_target_addr); |
| __ ldr(PC, Address(Rmethod, Method::from_compiled_offset())); |
| |
| #endif // AARCH64 |
| } |
| |
| static void gen_c2i_adapter(MacroAssembler *masm, |
| int total_args_passed, int comp_args_on_stack, |
| const BasicType *sig_bt, const VMRegPair *regs, |
| Label& skip_fixup) { |
| // TODO: ARM - May be can use stm to deoptimize arguments |
| const Register tmp = Rtemp; |
| |
| patch_callers_callsite(masm); |
| __ bind(skip_fixup); |
| |
| __ mov(Rsender_sp, SP); // not yet saved |
| |
| #ifdef AARCH64 |
| |
| int extraspace = align_up(total_args_passed * Interpreter::stackElementSize, StackAlignmentInBytes); |
| if (extraspace) { |
| __ sub(SP, SP, extraspace); |
| } |
| |
| for (int i = 0; i < total_args_passed; i++) { |
| if (sig_bt[i] == T_VOID) { |
| assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); |
| continue; |
| } |
| |
| int expr_slots_count = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ? 2 : 1; |
| Address dest_addr(SP, Interpreter::expr_offset_in_bytes(total_args_passed - expr_slots_count - i)); |
| |
| VMReg r = regs[i].first(); |
| bool full_word = regs[i].second()->is_valid(); |
| |
| if (r->is_stack()) { |
| if (full_word) { |
| __ ldr(tmp, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + extraspace)); |
| __ str(tmp, dest_addr); |
| } else { |
| __ ldr_w(tmp, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + extraspace)); |
| __ str_w(tmp, dest_addr); |
| } |
| } else if (r->is_Register()) { |
| if (full_word) { |
| __ str(r->as_Register(), dest_addr); |
| } else { |
| __ str_w(r->as_Register(), dest_addr); |
| } |
| } else if (r->is_FloatRegister()) { |
| if (sig_bt[i] == T_DOUBLE) { |
| __ str_d(r->as_FloatRegister(), dest_addr); |
| } else { |
| __ str_s(r->as_FloatRegister(), dest_addr); |
| } |
| } else { |
| assert(!r->is_valid() && !regs[i].second()->is_valid(), "must be"); |
| } |
| } |
| |
| __ mov(Rparams, SP); |
| |
| __ ldr(tmp, Address(Rmethod, Method::interpreter_entry_offset())); |
| __ br(tmp); |
| |
| #else |
| |
| int extraspace = total_args_passed * Interpreter::stackElementSize; |
| if (extraspace) { |
| __ sub_slow(SP, SP, extraspace); |
| } |
| |
| for (int i = 0; i < total_args_passed; i++) { |
| if (sig_bt[i] == T_VOID) { |
| assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); |
| continue; |
| } |
| int stack_offset = (total_args_passed - 1 - i) * Interpreter::stackElementSize; |
| |
| VMReg r_1 = regs[i].first(); |
| VMReg r_2 = regs[i].second(); |
| if (r_1->is_stack()) { |
| int arg_offset = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace; |
| if (!r_2->is_valid()) { |
| __ ldr(tmp, Address(SP, arg_offset)); |
| __ str(tmp, Address(SP, stack_offset)); |
| } else { |
| __ ldr(tmp, Address(SP, arg_offset)); |
| __ str(tmp, Address(SP, stack_offset - Interpreter::stackElementSize)); |
| __ ldr(tmp, Address(SP, arg_offset + wordSize)); |
| __ str(tmp, Address(SP, stack_offset)); |
| } |
| } else if (r_1->is_Register()) { |
| if (!r_2->is_valid()) { |
| __ str(r_1->as_Register(), Address(SP, stack_offset)); |
| } else { |
| __ str(r_1->as_Register(), Address(SP, stack_offset - Interpreter::stackElementSize)); |
| __ str(r_2->as_Register(), Address(SP, stack_offset)); |
| } |
| } else if (r_1->is_FloatRegister()) { |
| #ifdef __SOFTFP__ |
| ShouldNotReachHere(); |
| #endif // __SOFTFP__ |
| if (!r_2->is_valid()) { |
| __ fsts(r_1->as_FloatRegister(), Address(SP, stack_offset)); |
| } else { |
| __ fstd(r_1->as_FloatRegister(), Address(SP, stack_offset - Interpreter::stackElementSize)); |
| } |
| } else { |
| assert(!r_1->is_valid() && !r_2->is_valid(), "must be"); |
| } |
| } |
| |
| __ ldr(PC, Address(Rmethod, Method::interpreter_entry_offset())); |
| |
| #endif // AARCH64 |
| } |
| |
| AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, |
| int total_args_passed, |
| int comp_args_on_stack, |
| const BasicType *sig_bt, |
| const VMRegPair *regs, |
| AdapterFingerPrint* fingerprint) { |
| address i2c_entry = __ pc(); |
| gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); |
| |
| address c2i_unverified_entry = __ pc(); |
| Label skip_fixup; |
| const Register receiver = R0; |
| const Register holder_klass = Rtemp; // XXX should be OK for C2 but not 100% sure |
| const Register receiver_klass = AARCH64_ONLY(R8) NOT_AARCH64(R4); |
| |
| __ load_klass(receiver_klass, receiver); |
| __ ldr(holder_klass, Address(Ricklass, CompiledICHolder::holder_klass_offset())); |
| __ ldr(Rmethod, Address(Ricklass, CompiledICHolder::holder_method_offset())); |
| __ cmp(receiver_klass, holder_klass); |
| |
| #ifdef AARCH64 |
| Label ic_miss; |
| __ b(ic_miss, ne); |
| __ ldr(Rtemp, Address(Rmethod, Method::code_offset())); |
| __ cbz(Rtemp, skip_fixup); |
| __ bind(ic_miss); |
| __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, Rtemp); |
| #else |
| __ ldr(Rtemp, Address(Rmethod, Method::code_offset()), eq); |
| __ cmp(Rtemp, 0, eq); |
| __ b(skip_fixup, eq); |
| __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, noreg, ne); |
| #endif // AARCH64 |
| |
| address c2i_entry = __ pc(); |
| gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); |
| |
| __ flush(); |
| return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry); |
| } |
| |
| |
| static int reg2offset_in(VMReg r) { |
| // Account for saved FP and LR |
| return r->reg2stack() * VMRegImpl::stack_slot_size + 2*wordSize; |
| } |
| |
| static int reg2offset_out(VMReg r) { |
| return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; |
| } |
| |
| |
| static void verify_oop_args(MacroAssembler* masm, |
| const methodHandle& method, |
| const BasicType* sig_bt, |
| const VMRegPair* regs) { |
| Register temp_reg = Rmethod; // not part of any compiled calling seq |
| if (VerifyOops) { |
| for (int i = 0; i < method->size_of_parameters(); i++) { |
| if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ARRAY) { |
| VMReg r = regs[i].first(); |
| assert(r->is_valid(), "bad oop arg"); |
| if (r->is_stack()) { |
| __ ldr(temp_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size)); |
| __ verify_oop(temp_reg); |
| } else { |
| __ verify_oop(r->as_Register()); |
| } |
| } |
| } |
| } |
| } |
| |
| static void gen_special_dispatch(MacroAssembler* masm, |
| const methodHandle& method, |
| const BasicType* sig_bt, |
| const VMRegPair* regs) { |
| verify_oop_args(masm, method, sig_bt, regs); |
| vmIntrinsics::ID iid = method->intrinsic_id(); |
| |
| // Now write the args into the outgoing interpreter space |
| bool has_receiver = false; |
| Register receiver_reg = noreg; |
| int member_arg_pos = -1; |
| Register member_reg = noreg; |
| int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid); |
| if (ref_kind != 0) { |
| member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument |
| member_reg = Rmethod; // known to be free at this point |
| has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); |
| } else if (iid == vmIntrinsics::_invokeBasic) { |
| has_receiver = true; |
| } else { |
| fatal("unexpected intrinsic id %d", iid); |
| } |
| |
| if (member_reg != noreg) { |
| // Load the member_arg into register, if necessary. |
| SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs); |
| VMReg r = regs[member_arg_pos].first(); |
| if (r->is_stack()) { |
| __ ldr(member_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size)); |
| } else { |
| // no data motion is needed |
| member_reg = r->as_Register(); |
| } |
| } |
| |
| if (has_receiver) { |
| // Make sure the receiver is loaded into a register. |
| assert(method->size_of_parameters() > 0, "oob"); |
| assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); |
| VMReg r = regs[0].first(); |
| assert(r->is_valid(), "bad receiver arg"); |
| if (r->is_stack()) { |
| // Porting note: This assumes that compiled calling conventions always |
| // pass the receiver oop in a register. If this is not true on some |
| // platform, pick a temp and load the receiver from stack. |
| assert(false, "receiver always in a register"); |
| receiver_reg = j_rarg0; // known to be free at this point |
| __ ldr(receiver_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size)); |
| } else { |
| // no data motion is needed |
| receiver_reg = r->as_Register(); |
| } |
| } |
| |
| // Figure out which address we are really jumping to: |
| MethodHandles::generate_method_handle_dispatch(masm, iid, |
| receiver_reg, member_reg, /*for_compiler_entry:*/ true); |
| } |
| |
| // --------------------------------------------------------------------------- |
| // Generate a native wrapper for a given method. The method takes arguments |
| // in the Java compiled code convention, marshals them to the native |
| // convention (handlizes oops, etc), transitions to native, makes the call, |
| // returns to java state (possibly blocking), unhandlizes any result and |
| // returns. |
| nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, |
| const methodHandle& method, |
| int compile_id, |
| BasicType* in_sig_bt, |
| VMRegPair* in_regs, |
| BasicType ret_type) { |
| if (method->is_method_handle_intrinsic()) { |
| vmIntrinsics::ID iid = method->intrinsic_id(); |
| intptr_t start = (intptr_t)__ pc(); |
| int vep_offset = ((intptr_t)__ pc()) - start; |
| gen_special_dispatch(masm, |
| method, |
| in_sig_bt, |
| in_regs); |
| int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period |
| __ flush(); |
| int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually |
| return nmethod::new_native_nmethod(method, |
| compile_id, |
| masm->code(), |
| vep_offset, |
| frame_complete, |
| stack_slots / VMRegImpl::slots_per_word, |
| in_ByteSize(-1), |
| in_ByteSize(-1), |
| (OopMapSet*)NULL); |
| } |
| // Arguments for JNI method include JNIEnv and Class if static |
| |
| // Usage of Rtemp should be OK since scratched by native call |
| |
| bool is_static = method->is_static(); |
| |
| const int total_in_args = method->size_of_parameters(); |
| int total_c_args = total_in_args + 1; |
| if (is_static) { |
| total_c_args++; |
| } |
| |
| BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); |
| VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); |
| |
| int argc = 0; |
| out_sig_bt[argc++] = T_ADDRESS; |
| if (is_static) { |
| out_sig_bt[argc++] = T_OBJECT; |
| } |
| |
| int i; |
| for (i = 0; i < total_in_args; i++) { |
| out_sig_bt[argc++] = in_sig_bt[i]; |
| } |
| |
| int out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); |
| int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; |
| // Since object arguments need to be wrapped, we must preserve space |
| // for those object arguments which come in registers (GPR_PARAMS maximum) |
| // plus one more slot for Klass handle (for static methods) |
| int oop_handle_offset = stack_slots; |
| stack_slots += (GPR_PARAMS + 1) * VMRegImpl::slots_per_word; |
| |
| // Plus a lock if needed |
| int lock_slot_offset = 0; |
| if (method->is_synchronized()) { |
| lock_slot_offset = stack_slots; |
| assert(sizeof(BasicLock) == wordSize, "adjust this code"); |
| stack_slots += VMRegImpl::slots_per_word; |
| } |
| |
| // Space to save return address and FP |
| stack_slots += 2 * VMRegImpl::slots_per_word; |
| |
| // Calculate the final stack size taking account of alignment |
| stack_slots = align_up(stack_slots, StackAlignmentInBytes / VMRegImpl::stack_slot_size); |
| int stack_size = stack_slots * VMRegImpl::stack_slot_size; |
| int lock_slot_fp_offset = stack_size - 2 * wordSize - |
| lock_slot_offset * VMRegImpl::stack_slot_size; |
| |
| // Unverified entry point |
| address start = __ pc(); |
| |
| // Inline cache check, same as in C1_MacroAssembler::inline_cache_check() |
| const Register receiver = R0; // see receiverOpr() |
| __ load_klass(Rtemp, receiver); |
| __ cmp(Rtemp, Ricklass); |
| Label verified; |
| |
| __ b(verified, eq); // jump over alignment no-ops too |
| __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, Rtemp); |
| __ align(CodeEntryAlignment); |
| |
| // Verified entry point |
| __ bind(verified); |
| int vep_offset = __ pc() - start; |
| |
| #ifdef AARCH64 |
| // Extra nop for MT-safe patching in NativeJump::patch_verified_entry |
| __ nop(); |
| #endif // AARCH64 |
| |
| if ((InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) || (method->intrinsic_id() == vmIntrinsics::_identityHashCode)) { |
| // Object.hashCode, System.identityHashCode can pull the hashCode from the header word |
| // instead of doing a full VM transition once it's been computed. |
| Label slow_case; |
| const Register obj_reg = R0; |
| |
| // Unlike for Object.hashCode, System.identityHashCode is static method and |
| // gets object as argument instead of the receiver. |
| if (method->intrinsic_id() == vmIntrinsics::_identityHashCode) { |
| assert(method->is_static(), "method should be static"); |
| // return 0 for null reference input, return val = R0 = obj_reg = 0 |
| #ifdef AARCH64 |
| Label Continue; |
| __ cbnz(obj_reg, Continue); |
| __ ret(); |
| __ bind(Continue); |
| #else |
| __ cmp(obj_reg, 0); |
| __ bx(LR, eq); |
| #endif |
| } |
| |
| __ ldr(Rtemp, Address(obj_reg, oopDesc::mark_offset_in_bytes())); |
| |
| assert(markOopDesc::unlocked_value == 1, "adjust this code"); |
| __ tbz(Rtemp, exact_log2(markOopDesc::unlocked_value), slow_case); |
| |
| if (UseBiasedLocking) { |
| assert(is_power_of_2(markOopDesc::biased_lock_bit_in_place), "adjust this code"); |
| __ tbnz(Rtemp, exact_log2(markOopDesc::biased_lock_bit_in_place), slow_case); |
| } |
| |
| #ifdef AARCH64 |
| __ ands(Rtemp, Rtemp, (uintx)markOopDesc::hash_mask_in_place); |
| __ b(slow_case, eq); |
| __ logical_shift_right(R0, Rtemp, markOopDesc::hash_shift); |
| __ ret(); |
| #else |
| __ bics(Rtemp, Rtemp, ~markOopDesc::hash_mask_in_place); |
| __ mov(R0, AsmOperand(Rtemp, lsr, markOopDesc::hash_shift), ne); |
| __ bx(LR, ne); |
| #endif // AARCH64 |
| |
| __ bind(slow_case); |
| } |
| |
| // Bang stack pages |
| __ arm_stack_overflow_check(stack_size, Rtemp); |
| |
| // Setup frame linkage |
| __ raw_push(FP, LR); |
| __ mov(FP, SP); |
| __ sub_slow(SP, SP, stack_size - 2*wordSize); |
| |
| int frame_complete = __ pc() - start; |
| |
| OopMapSet* oop_maps = new OopMapSet(); |
| OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); |
| const int extra_args = is_static ? 2 : 1; |
| int receiver_offset = -1; |
| int fp_regs_in_arguments = 0; |
| |
| for (i = total_in_args; --i >= 0; ) { |
| switch (in_sig_bt[i]) { |
| case T_ARRAY: |
| case T_OBJECT: { |
| VMReg src = in_regs[i].first(); |
| VMReg dst = out_regs[i + extra_args].first(); |
| if (src->is_stack()) { |
| assert(dst->is_stack(), "must be"); |
| assert(i != 0, "Incoming receiver is always in a register"); |
| __ ldr(Rtemp, Address(FP, reg2offset_in(src))); |
| __ cmp(Rtemp, 0); |
| #ifdef AARCH64 |
| __ add(Rtemp, FP, reg2offset_in(src)); |
| __ csel(Rtemp, ZR, Rtemp, eq); |
| #else |
| __ add(Rtemp, FP, reg2offset_in(src), ne); |
| #endif // AARCH64 |
| __ str(Rtemp, Address(SP, reg2offset_out(dst))); |
| int offset_in_older_frame = src->reg2stack() + SharedRuntime::out_preserve_stack_slots(); |
| map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots)); |
| } else { |
| int offset = oop_handle_offset * VMRegImpl::stack_slot_size; |
| __ str(src->as_Register(), Address(SP, offset)); |
| map->set_oop(VMRegImpl::stack2reg(oop_handle_offset)); |
| if ((i == 0) && (!is_static)) { |
| receiver_offset = offset; |
| } |
| oop_handle_offset += VMRegImpl::slots_per_word; |
| |
| #ifdef AARCH64 |
| __ cmp(src->as_Register(), 0); |
| __ add(Rtemp, SP, offset); |
| __ csel(dst->is_stack() ? Rtemp : dst->as_Register(), ZR, Rtemp, eq); |
| if (dst->is_stack()) { |
| __ str(Rtemp, Address(SP, reg2offset_out(dst))); |
| } |
| #else |
| if (dst->is_stack()) { |
| __ movs(Rtemp, src->as_Register()); |
| __ add(Rtemp, SP, offset, ne); |
| __ str(Rtemp, Address(SP, reg2offset_out(dst))); |
| } else { |
| __ movs(dst->as_Register(), src->as_Register()); |
| __ add(dst->as_Register(), SP, offset, ne); |
| } |
| #endif // AARCH64 |
| } |
| } |
| |
| case T_VOID: |
| break; |
| |
| #ifdef AARCH64 |
| case T_FLOAT: |
| case T_DOUBLE: { |
| VMReg src = in_regs[i].first(); |
| VMReg dst = out_regs[i + extra_args].first(); |
| if (src->is_stack()) { |
| assert(dst->is_stack(), "must be"); |
| __ ldr(Rtemp, Address(FP, reg2offset_in(src))); |
| __ str(Rtemp, Address(SP, reg2offset_out(dst))); |
| } else { |
| assert(src->is_FloatRegister() && dst->is_FloatRegister(), "must be"); |
| assert(src->as_FloatRegister() == dst->as_FloatRegister(), "must be"); |
| fp_regs_in_arguments++; |
| } |
| break; |
| } |
| #else // AARCH64 |
| |
| #ifdef __SOFTFP__ |
| case T_DOUBLE: |
| #endif |
| case T_LONG: { |
| VMReg src_1 = in_regs[i].first(); |
| VMReg src_2 = in_regs[i].second(); |
| VMReg dst_1 = out_regs[i + extra_args].first(); |
| VMReg dst_2 = out_regs[i + extra_args].second(); |
| #if (ALIGN_WIDE_ARGUMENTS == 0) |
| // C convention can mix a register and a stack slot for a |
| // 64-bits native argument. |
| |
| // Note: following code should work independently of whether |
| // the Java calling convention follows C convention or whether |
| // it aligns 64-bit values. |
| if (dst_2->is_Register()) { |
| if (src_1->as_Register() != dst_1->as_Register()) { |
| assert(src_1->as_Register() != dst_2->as_Register() && |
| src_2->as_Register() != dst_2->as_Register(), "must be"); |
| __ mov(dst_2->as_Register(), src_2->as_Register()); |
| __ mov(dst_1->as_Register(), src_1->as_Register()); |
| } else { |
| assert(src_2->as_Register() == dst_2->as_Register(), "must be"); |
| } |
| } else if (src_2->is_Register()) { |
| if (dst_1->is_Register()) { |
| // dst mixes a register and a stack slot |
| assert(dst_2->is_stack() && src_1->is_Register() && src_2->is_Register(), "must be"); |
| assert(src_1->as_Register() != dst_1->as_Register(), "must be"); |
| __ str(src_2->as_Register(), Address(SP, reg2offset_out(dst_2))); |
| __ mov(dst_1->as_Register(), src_1->as_Register()); |
| } else { |
| // registers to stack slots |
| assert(dst_2->is_stack() && src_1->is_Register() && src_2->is_Register(), "must be"); |
| __ str(src_1->as_Register(), Address(SP, reg2offset_out(dst_1))); |
| __ str(src_2->as_Register(), Address(SP, reg2offset_out(dst_2))); |
| } |
| } else if (src_1->is_Register()) { |
| if (dst_1->is_Register()) { |
| // src and dst must be R3 + stack slot |
| assert(dst_1->as_Register() == src_1->as_Register(), "must be"); |
| __ ldr(Rtemp, Address(FP, reg2offset_in(src_2))); |
| __ str(Rtemp, Address(SP, reg2offset_out(dst_2))); |
| } else { |
| // <R3,stack> -> <stack,stack> |
| assert(dst_2->is_stack() && src_2->is_stack(), "must be"); |
| __ ldr(LR, Address(FP, reg2offset_in(src_2))); |
| __ str(src_1->as_Register(), Address(SP, reg2offset_out(dst_1))); |
| __ str(LR, Address(SP, reg2offset_out(dst_2))); |
| } |
| } else { |
| assert(src_2->is_stack() && dst_1->is_stack() && dst_2->is_stack(), "must be"); |
| __ ldr(Rtemp, Address(FP, reg2offset_in(src_1))); |
| __ ldr(LR, Address(FP, reg2offset_in(src_2))); |
| __ str(Rtemp, Address(SP, reg2offset_out(dst_1))); |
| __ str(LR, Address(SP, reg2offset_out(dst_2))); |
| } |
| #else // ALIGN_WIDE_ARGUMENTS |
| if (src_1->is_stack()) { |
| assert(src_2->is_stack() && dst_1->is_stack() && dst_2->is_stack(), "must be"); |
| __ ldr(Rtemp, Address(FP, reg2offset_in(src_1))); |
| __ ldr(LR, Address(FP, reg2offset_in(src_2))); |
| __ str(Rtemp, Address(SP, reg2offset_out(dst_1))); |
| __ str(LR, Address(SP, reg2offset_out(dst_2))); |
| } else if (dst_1->is_stack()) { |
| assert(dst_2->is_stack() && src_1->is_Register() && src_2->is_Register(), "must be"); |
| __ str(src_1->as_Register(), Address(SP, reg2offset_out(dst_1))); |
| __ str(src_2->as_Register(), Address(SP, reg2offset_out(dst_2))); |
| } else if (src_1->as_Register() == dst_1->as_Register()) { |
| assert(src_2->as_Register() == dst_2->as_Register(), "must be"); |
| } else { |
| assert(src_1->as_Register() != dst_2->as_Register() && |
| src_2->as_Register() != dst_2->as_Register(), "must be"); |
| __ mov(dst_2->as_Register(), src_2->as_Register()); |
| __ mov(dst_1->as_Register(), src_1->as_Register()); |
| } |
| #endif // ALIGN_WIDE_ARGUMENTS |
| break; |
| } |
| |
| #if (!defined __SOFTFP__ && !defined __ABI_HARD__) |
| case T_FLOAT: { |
| VMReg src = in_regs[i].first(); |
| VMReg dst = out_regs[i + extra_args].first(); |
| if (src->is_stack()) { |
| assert(dst->is_stack(), "must be"); |
| __ ldr(Rtemp, Address(FP, reg2offset_in(src))); |
| __ str(Rtemp, Address(SP, reg2offset_out(dst))); |
| } else if (dst->is_stack()) { |
| __ fsts(src->as_FloatRegister(), Address(SP, reg2offset_out(dst))); |
| } else { |
| assert(src->is_FloatRegister() && dst->is_Register(), "must be"); |
| __ fmrs(dst->as_Register(), src->as_FloatRegister()); |
| } |
| break; |
| } |
| |
| case T_DOUBLE: { |
| VMReg src_1 = in_regs[i].first(); |
| VMReg src_2 = in_regs[i].second(); |
| VMReg dst_1 = out_regs[i + extra_args].first(); |
| VMReg dst_2 = out_regs[i + extra_args].second(); |
| if (src_1->is_stack()) { |
| assert(src_2->is_stack() && dst_1->is_stack() && dst_2->is_stack(), "must be"); |
| __ ldr(Rtemp, Address(FP, reg2offset_in(src_1))); |
| __ ldr(LR, Address(FP, reg2offset_in(src_2))); |
| __ str(Rtemp, Address(SP, reg2offset_out(dst_1))); |
| __ str(LR, Address(SP, reg2offset_out(dst_2))); |
| } else if (dst_1->is_stack()) { |
| assert(dst_2->is_stack() && src_1->is_FloatRegister(), "must be"); |
| __ fstd(src_1->as_FloatRegister(), Address(SP, reg2offset_out(dst_1))); |
| #if (ALIGN_WIDE_ARGUMENTS == 0) |
| } else if (dst_2->is_stack()) { |
| assert(! src_2->is_stack(), "must be"); // assuming internal java convention is aligned |
| // double register must go into R3 + one stack slot |
| __ fmrrd(dst_1->as_Register(), Rtemp, src_1->as_FloatRegister()); |
| __ str(Rtemp, Address(SP, reg2offset_out(dst_2))); |
| #endif |
| } else { |
| assert(src_1->is_FloatRegister() && dst_1->is_Register() && dst_2->is_Register(), "must be"); |
| __ fmrrd(dst_1->as_Register(), dst_2->as_Register(), src_1->as_FloatRegister()); |
| } |
| break; |
| } |
| #endif // __SOFTFP__ |
| |
| #ifdef __ABI_HARD__ |
| case T_FLOAT: { |
| VMReg src = in_regs[i].first(); |
| VMReg dst = out_regs[i + extra_args].first(); |
| if (src->is_stack()) { |
| if (dst->is_stack()) { |
| __ ldr(Rtemp, Address(FP, reg2offset_in(src))); |
| __ str(Rtemp, Address(SP, reg2offset_out(dst))); |
| } else { |
| // C2 Java calling convention does not populate S14 and S15, therefore |
| // those need to be loaded from stack here |
| __ flds(dst->as_FloatRegister(), Address(FP, reg2offset_in(src))); |
| fp_regs_in_arguments++; |
| } |
| } else { |
| assert(src->is_FloatRegister(), "must be"); |
| fp_regs_in_arguments++; |
| } |
| break; |
| } |
| case T_DOUBLE: { |
| VMReg src_1 = in_regs[i].first(); |
| VMReg src_2 = in_regs[i].second(); |
| VMReg dst_1 = out_regs[i + extra_args].first(); |
| VMReg dst_2 = out_regs[i + extra_args].second(); |
| if (src_1->is_stack()) { |
| if (dst_1->is_stack()) { |
| assert(dst_2->is_stack(), "must be"); |
| __ ldr(Rtemp, Address(FP, reg2offset_in(src_1))); |
| __ ldr(LR, Address(FP, reg2offset_in(src_2))); |
| __ str(Rtemp, Address(SP, reg2offset_out(dst_1))); |
| __ str(LR, Address(SP, reg2offset_out(dst_2))); |
| } else { |
| // C2 Java calling convention does not populate S14 and S15, therefore |
| // those need to be loaded from stack here |
| __ fldd(dst_1->as_FloatRegister(), Address(FP, reg2offset_in(src_1))); |
| fp_regs_in_arguments += 2; |
| } |
| } else { |
| assert(src_1->is_FloatRegister() && src_2->is_FloatRegister(), "must be"); |
| fp_regs_in_arguments += 2; |
| } |
| break; |
| } |
| #endif // __ABI_HARD__ |
| #endif // AARCH64 |
| |
| default: { |
| assert(in_sig_bt[i] != T_ADDRESS, "found T_ADDRESS in java args"); |
| VMReg src = in_regs[i].first(); |
| VMReg dst = out_regs[i + extra_args].first(); |
| if (src->is_stack()) { |
| assert(dst->is_stack(), "must be"); |
| __ ldr(Rtemp, Address(FP, reg2offset_in(src))); |
| __ str(Rtemp, Address(SP, reg2offset_out(dst))); |
| } else if (dst->is_stack()) { |
| __ str(src->as_Register(), Address(SP, reg2offset_out(dst))); |
| } else { |
| assert(src->is_Register() && dst->is_Register(), "must be"); |
| __ mov(dst->as_Register(), src->as_Register()); |
| } |
| } |
| } |
| } |
| |
| // Get Klass mirror |
| int klass_offset = -1; |
| if (is_static) { |
| klass_offset = oop_handle_offset * VMRegImpl::stack_slot_size; |
| __ mov_oop(Rtemp, JNIHandles::make_local(method->method_holder()->java_mirror())); |
| __ add(c_rarg1, SP, klass_offset); |
| __ str(Rtemp, Address(SP, klass_offset)); |
| map->set_oop(VMRegImpl::stack2reg(oop_handle_offset)); |
| } |
| |
| // the PC offset given to add_gc_map must match the PC saved in set_last_Java_frame |
| int pc_offset = __ set_last_Java_frame(SP, FP, true, Rtemp); |
| assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin"); |
| oop_maps->add_gc_map(pc_offset, map); |
| |
| #ifndef AARCH64 |
| // Order last_Java_pc store with the thread state transition (to _thread_in_native) |
| __ membar(MacroAssembler::StoreStore, Rtemp); |
| #endif // !AARCH64 |
| |
| // RedefineClasses() tracing support for obsolete method entry |
| if (log_is_enabled(Trace, redefine, class, obsolete)) { |
| #ifdef AARCH64 |
| __ NOT_TESTED(); |
| #endif |
| __ save_caller_save_registers(); |
| __ mov(R0, Rthread); |
| __ mov_metadata(R1, method()); |
| __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), R0, R1); |
| __ restore_caller_save_registers(); |
| } |
| |
| const Register sync_handle = AARCH64_ONLY(R20) NOT_AARCH64(R5); |
| const Register sync_obj = AARCH64_ONLY(R21) NOT_AARCH64(R6); |
| const Register disp_hdr = AARCH64_ONLY(R22) NOT_AARCH64(altFP_7_11); |
| const Register tmp = AARCH64_ONLY(R23) NOT_AARCH64(R8); |
| |
| Label slow_lock, slow_lock_biased, lock_done, fast_lock, leave; |
| if (method->is_synchronized()) { |
| // The first argument is a handle to sync object (a class or an instance) |
| __ ldr(sync_obj, Address(R1)); |
| // Remember the handle for the unlocking code |
| __ mov(sync_handle, R1); |
| |
| if(UseBiasedLocking) { |
| __ biased_locking_enter(sync_obj, tmp, disp_hdr/*scratched*/, false, Rtemp, lock_done, slow_lock_biased); |
| } |
| |
| const Register mark = tmp; |
| #ifdef AARCH64 |
| __ sub(disp_hdr, FP, lock_slot_fp_offset); |
| assert(oopDesc::mark_offset_in_bytes() == 0, "Required by atomic instructions"); |
| |
| __ ldr(mark, sync_obj); |
| |
| // Test if object is already locked |
| assert(markOopDesc::unlocked_value == 1, "adjust this code"); |
| __ tbnz(mark, exact_log2(markOopDesc::unlocked_value), fast_lock); |
| |
| // Check for recursive lock |
| // See comments in InterpreterMacroAssembler::lock_object for |
| // explanations on the fast recursive locking check. |
| __ mov(Rtemp, SP); |
| __ sub(Rtemp, mark, Rtemp); |
| intptr_t mask = ((intptr_t)3) - ((intptr_t)os::vm_page_size()); |
| Assembler::LogicalImmediate imm(mask, false); |
| __ ands(Rtemp, Rtemp, imm); |
| __ b(slow_lock, ne); |
| |
| // Recursive locking: store 0 into a lock record |
| __ str(ZR, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes())); |
| __ b(lock_done); |
| |
| __ bind(fast_lock); |
| __ str(mark, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes())); |
| |
| __ cas_for_lock_acquire(mark, disp_hdr, sync_obj, Rtemp, slow_lock); |
| #else |
| // On MP platforms the next load could return a 'stale' value if the memory location has been modified by another thread. |
| // That would be acceptable as either CAS or slow case path is taken in that case |
| |
| __ ldr(mark, Address(sync_obj, oopDesc::mark_offset_in_bytes())); |
| __ sub(disp_hdr, FP, lock_slot_fp_offset); |
| __ tst(mark, markOopDesc::unlocked_value); |
| __ b(fast_lock, ne); |
| |
| // Check for recursive lock |
| // See comments in InterpreterMacroAssembler::lock_object for |
| // explanations on the fast recursive locking check. |
| // Check independently the low bits and the distance to SP |
| // -1- test low 2 bits |
| __ movs(Rtemp, AsmOperand(mark, lsl, 30)); |
| // -2- test (hdr - SP) if the low two bits are 0 |
| __ sub(Rtemp, mark, SP, eq); |
| __ movs(Rtemp, AsmOperand(Rtemp, lsr, exact_log2(os::vm_page_size())), eq); |
| // If still 'eq' then recursive locking OK: set displaced header to 0 |
| __ str(Rtemp, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()), eq); |
| __ b(lock_done, eq); |
| __ b(slow_lock); |
| |
| __ bind(fast_lock); |
| __ str(mark, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes())); |
| |
| __ cas_for_lock_acquire(mark, disp_hdr, sync_obj, Rtemp, slow_lock); |
| #endif // AARCH64 |
| |
| __ bind(lock_done); |
| } |
| |
| // Get JNIEnv* |
| __ add(c_rarg0, Rthread, in_bytes(JavaThread::jni_environment_offset())); |
| |
| // Perform thread state transition |
| __ mov(Rtemp, _thread_in_native); |
| #ifdef AARCH64 |
| // stlr instruction is used to force all preceding writes to be observed prior to thread state change |
| __ add(Rtemp2, Rthread, in_bytes(JavaThread::thread_state_offset())); |
| __ stlr_w(Rtemp, Rtemp2); |
| #else |
| __ str(Rtemp, Address(Rthread, JavaThread::thread_state_offset())); |
| #endif // AARCH64 |
| |
| // Finally, call the native method |
| __ call(method->native_function()); |
| |
| // Set FPSCR/FPCR to a known state |
| if (AlwaysRestoreFPU) { |
| __ restore_default_fp_mode(); |
| } |
| |
| // Do a safepoint check while thread is in transition state |
| InlinedAddress safepoint_state(SafepointSynchronize::address_of_state()); |
| Label call_safepoint_runtime, return_to_java; |
| __ mov(Rtemp, _thread_in_native_trans); |
| __ ldr_literal(R2, safepoint_state); |
| __ str_32(Rtemp, Address(Rthread, JavaThread::thread_state_offset())); |
| |
| // make sure the store is observed before reading the SafepointSynchronize state and further mem refs |
| __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad | MacroAssembler::StoreStore), Rtemp); |
| |
| __ ldr_s32(R2, Address(R2)); |
| __ ldr_u32(R3, Address(Rthread, JavaThread::suspend_flags_offset())); |
| __ cmp(R2, SafepointSynchronize::_not_synchronized); |
| __ cond_cmp(R3, 0, eq); |
| __ b(call_safepoint_runtime, ne); |
| __ bind(return_to_java); |
| |
| // Perform thread state transition and reguard stack yellow pages if needed |
| Label reguard, reguard_done; |
| __ mov(Rtemp, _thread_in_Java); |
| __ ldr_s32(R2, Address(Rthread, JavaThread::stack_guard_state_offset())); |
| __ str_32(Rtemp, Address(Rthread, JavaThread::thread_state_offset())); |
| |
| __ cmp(R2, JavaThread::stack_guard_yellow_reserved_disabled); |
| __ b(reguard, eq); |
| __ bind(reguard_done); |
| |
| Label slow_unlock, unlock_done, retry; |
| if (method->is_synchronized()) { |
| __ ldr(sync_obj, Address(sync_handle)); |
| |
| if(UseBiasedLocking) { |
| __ biased_locking_exit(sync_obj, Rtemp, unlock_done); |
| // disp_hdr may not have been saved on entry with biased locking |
| __ sub(disp_hdr, FP, lock_slot_fp_offset); |
| } |
| |
| // See C1_MacroAssembler::unlock_object() for more comments |
| __ ldr(R2, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes())); |
| __ cbz(R2, unlock_done); |
| |
| __ cas_for_lock_release(disp_hdr, R2, sync_obj, Rtemp, slow_unlock); |
| |
| __ bind(unlock_done); |
| } |
| |
| // Set last java frame and handle block to zero |
| __ ldr(LR, Address(Rthread, JavaThread::active_handles_offset())); |
| __ reset_last_Java_frame(Rtemp); // sets Rtemp to 0 on 32-bit ARM |
| |
| #ifdef AARCH64 |
| __ str_32(ZR, Address(LR, JNIHandleBlock::top_offset_in_bytes())); |
| if (CheckJNICalls) { |
| __ str(ZR, Address(Rthread, JavaThread::pending_jni_exception_check_fn_offset())); |
| } |
| |
| |
| switch (ret_type) { |
| case T_BOOLEAN: |
| __ tst(R0, 0xff); |
| __ cset(R0, ne); |
| break; |
| case T_CHAR : __ zero_extend(R0, R0, 16); break; |
| case T_BYTE : __ sign_extend(R0, R0, 8); break; |
| case T_SHORT : __ sign_extend(R0, R0, 16); break; |
| case T_INT : // fall through |
| case T_LONG : // fall through |
| case T_VOID : // fall through |
| case T_FLOAT : // fall through |
| case T_DOUBLE : /* nothing to do */ break; |
| case T_OBJECT : // fall through |
| case T_ARRAY : break; // See JNIHandles::resolve below |
| default: |
| ShouldNotReachHere(); |
| } |
| #else |
| __ str_32(Rtemp, Address(LR, JNIHandleBlock::top_offset_in_bytes())); |
| if (CheckJNICalls) { |
| __ str(__ zero_register(Rtemp), Address(Rthread, JavaThread::pending_jni_exception_check_fn_offset())); |
| } |
| #endif // AARCH64 |
| |
| // Unbox oop result, e.g. JNIHandles::resolve value in R0. |
| if (ret_type == T_OBJECT || ret_type == T_ARRAY) { |
| __ resolve_jobject(R0, // value |
| Rtemp, // tmp1 |
| R1_tmp); // tmp2 |
| } |
| |
| // Any exception pending? |
| __ ldr(Rtemp, Address(Rthread, Thread::pending_exception_offset())); |
| __ mov(SP, FP); |
| |
| #ifdef AARCH64 |
| Label except; |
| __ cbnz(Rtemp, except); |
| __ raw_pop(FP, LR); |
| __ ret(); |
| |
| __ bind(except); |
| // Pop the frame and forward the exception. Rexception_pc contains return address. |
| __ raw_pop(FP, Rexception_pc); |
| #else |
| __ cmp(Rtemp, 0); |
| // Pop the frame and return if no exception pending |
| __ pop(RegisterSet(FP) | RegisterSet(PC), eq); |
| // Pop the frame and forward the exception. Rexception_pc contains return address. |
| __ ldr(FP, Address(SP, wordSize, post_indexed), ne); |
| __ ldr(Rexception_pc, Address(SP, wordSize, post_indexed), ne); |
| #endif // AARCH64 |
| __ jump(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type, Rtemp); |
| |
| // Safepoint operation and/or pending suspend request is in progress. |
| // Save the return values and call the runtime function by hand. |
| __ bind(call_safepoint_runtime); |
| push_result_registers(masm, ret_type); |
| __ mov(R0, Rthread); |
| __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)); |
| pop_result_registers(masm, ret_type); |
| __ b(return_to_java); |
| |
| __ bind_literal(safepoint_state); |
| |
| // Reguard stack pages. Save native results around a call to C runtime. |
| __ bind(reguard); |
| push_result_registers(masm, ret_type); |
| __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)); |
| pop_result_registers(masm, ret_type); |
| __ b(reguard_done); |
| |
| if (method->is_synchronized()) { |
| // Locking slow case |
| if(UseBiasedLocking) { |
| __ bind(slow_lock_biased); |
| __ sub(disp_hdr, FP, lock_slot_fp_offset); |
| } |
| |
| __ bind(slow_lock); |
| |
| push_param_registers(masm, fp_regs_in_arguments); |
| |
| // last_Java_frame is already set, so do call_VM manually; no exception can occur |
| __ mov(R0, sync_obj); |
| __ mov(R1, disp_hdr); |
| __ mov(R2, Rthread); |
| __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C)); |
| |
| pop_param_registers(masm, fp_regs_in_arguments); |
| |
| __ b(lock_done); |
| |
| // Unlocking slow case |
| __ bind(slow_unlock); |
| |
| push_result_registers(masm, ret_type); |
| |
| // Clear pending exception before reentering VM. |
| // Can store the oop in register since it is a leaf call. |
| assert_different_registers(Rtmp_save1, sync_obj, disp_hdr); |
| __ ldr(Rtmp_save1, Address(Rthread, Thread::pending_exception_offset())); |
| Register zero = __ zero_register(Rtemp); |
| __ str(zero, Address(Rthread, Thread::pending_exception_offset())); |
| __ mov(R0, sync_obj); |
| __ mov(R1, disp_hdr); |
| __ mov(R2, Rthread); |
| __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C)); |
| __ str(Rtmp_save1, Address(Rthread, Thread::pending_exception_offset())); |
| |
| pop_result_registers(masm, ret_type); |
| |
| __ b(unlock_done); |
| } |
| |
| __ flush(); |
| return nmethod::new_native_nmethod(method, |
| compile_id, |
| masm->code(), |
| vep_offset, |
| frame_complete, |
| stack_slots / VMRegImpl::slots_per_word, |
| in_ByteSize(is_static ? klass_offset : receiver_offset), |
| in_ByteSize(lock_slot_offset * VMRegImpl::stack_slot_size), |
| oop_maps); |
| } |
| |
| // this function returns the adjust size (in number of words) to a c2i adapter |
| // activation for use during deoptimization |
| int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) { |
| int extra_locals_size = (callee_locals - callee_parameters) * Interpreter::stackElementWords; |
| #ifdef AARCH64 |
| extra_locals_size = align_up(extra_locals_size, StackAlignmentInBytes/BytesPerWord); |
| #endif // AARCH64 |
| return extra_locals_size; |
| } |
| |
| |
| uint SharedRuntime::out_preserve_stack_slots() { |
| return 0; |
| } |
| |
| |
| //------------------------------generate_deopt_blob---------------------------- |
| void SharedRuntime::generate_deopt_blob() { |
| ResourceMark rm; |
| #ifdef AARCH64 |
| CodeBuffer buffer("deopt_blob", 1024+256, 1); |
| #else |
| CodeBuffer buffer("deopt_blob", 1024, 1024); |
| #endif |
| int frame_size_in_words; |
| OopMapSet* oop_maps; |
| int reexecute_offset; |
| int exception_in_tls_offset; |
| int exception_offset; |
| |
| MacroAssembler* masm = new MacroAssembler(&buffer); |
| Label cont; |
| const Register Rkind = AARCH64_ONLY(R21) NOT_AARCH64(R9); // caller-saved on 32bit |
| const Register Rublock = AARCH64_ONLY(R22) NOT_AARCH64(R6); |
| const Register Rsender = AARCH64_ONLY(R23) NOT_AARCH64(altFP_7_11); |
| assert_different_registers(Rkind, Rublock, Rsender, Rexception_obj, Rexception_pc, R0, R1, R2, R3, R8, Rtemp); |
| |
| address start = __ pc(); |
| |
| oop_maps = new OopMapSet(); |
| // LR saved by caller (can be live in c2 method) |
| |
| // A deopt is a case where LR may be live in the c2 nmethod. So it's |
| // not possible to call the deopt blob from the nmethod and pass the |
| // address of the deopt handler of the nmethod in LR. What happens |
| // now is that the caller of the deopt blob pushes the current |
| // address so the deopt blob doesn't have to do it. This way LR can |
| // be preserved, contains the live value from the nmethod and is |
| // saved at R14/R30_offset here. |
| OopMap* map = RegisterSaver::save_live_registers(masm, &frame_size_in_words, true); |
| __ mov(Rkind, Deoptimization::Unpack_deopt); |
| __ b(cont); |
| |
| exception_offset = __ pc() - start; |
| |
| // Transfer Rexception_obj & Rexception_pc in TLS and fall thru to the |
| // exception_in_tls_offset entry point. |
| __ str(Rexception_obj, Address(Rthread, JavaThread::exception_oop_offset())); |
| __ str(Rexception_pc, Address(Rthread, JavaThread::exception_pc_offset())); |
| // Force return value to NULL to avoid confusing the escape analysis |
| // logic. Everything is dead here anyway. |
| __ mov(R0, 0); |
| |
| exception_in_tls_offset = __ pc() - start; |
| |
| // Exception data is in JavaThread structure |
| // Patch the return address of the current frame |
| __ ldr(LR, Address(Rthread, JavaThread::exception_pc_offset())); |
| (void) RegisterSaver::save_live_registers(masm, &frame_size_in_words); |
| { |
| const Register Rzero = __ zero_register(Rtemp); // XXX should be OK for C2 but not 100% sure |
| __ str(Rzero, Address(Rthread, JavaThread::exception_pc_offset())); |
| } |
| __ mov(Rkind, Deoptimization::Unpack_exception); |
| __ b(cont); |
| |
| reexecute_offset = __ pc() - start; |
| |
| (void) RegisterSaver::save_live_registers(masm, &frame_size_in_words); |
| __ mov(Rkind, Deoptimization::Unpack_reexecute); |
| |
| // Calculate UnrollBlock and save the result in Rublock |
| __ bind(cont); |
| __ mov(R0, Rthread); |
| __ mov(R1, Rkind); |
| |
| int pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp); // note: FP may not need to be saved (not on x86) |
| assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin"); |
| __ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)); |
| if (pc_offset == -1) { |
| pc_offset = __ offset(); |
| } |
| oop_maps->add_gc_map(pc_offset, map); |
| __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call |
| |
| __ mov(Rublock, R0); |
| |
| // Reload Rkind from the UnrollBlock (might have changed) |
| __ ldr_s32(Rkind, Address(Rublock, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes())); |
| Label noException; |
| __ cmp_32(Rkind, Deoptimization::Unpack_exception); // Was exception pending? |
| __ b(noException, ne); |
| // handle exception case |
| #ifdef ASSERT |
| // assert that exception_pc is zero in tls |
| { Label L; |
| __ ldr(Rexception_pc, Address(Rthread, JavaThread::exception_pc_offset())); |
| __ cbz(Rexception_pc, L); |
| __ stop("exception pc should be null"); |
| __ bind(L); |
| } |
| #endif |
| __ ldr(Rexception_obj, Address(Rthread, JavaThread::exception_oop_offset())); |
| __ verify_oop(Rexception_obj); |
| { |
| const Register Rzero = __ zero_register(Rtemp); |
| __ str(Rzero, Address(Rthread, JavaThread::exception_oop_offset())); |
| } |
| |
| __ bind(noException); |
| |
| // This frame is going away. Fetch return value, so we can move it to |
| // a new frame. |
| __ ldr(R0, Address(SP, RegisterSaver::R0_offset * wordSize)); |
| #ifndef AARCH64 |
| __ ldr(R1, Address(SP, RegisterSaver::R1_offset * wordSize)); |
| #endif // !AARCH64 |
| #ifndef __SOFTFP__ |
| __ ldr_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize)); |
| #endif |
| // pop frame |
| __ add(SP, SP, RegisterSaver::reg_save_size * wordSize); |
| |
| // Set initial stack state before pushing interpreter frames |
| __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes())); |
| __ ldr(R2, Address(Rublock, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes())); |
| __ ldr(R3, Address(Rublock, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes())); |
| |
| #ifdef AARCH64 |
| // Pop deoptimized frame. Make sure to restore the initial saved FP/LR of the caller. |
| // They are needed for correct stack walking during stack overflow handling. |
| // Also, restored FP is saved in the bottom interpreter frame (LR is reloaded from unroll block). |
| __ sub(Rtemp, Rtemp, 2*wordSize); |
| __ add(SP, SP, Rtemp, ex_uxtx); |
| __ raw_pop(FP, LR); |
| |
| #ifdef ASSERT |
| { Label L; |
| __ ldr(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes())); |
| __ cmp(FP, Rtemp); |
| __ b(L, eq); |
| __ stop("FP restored from deoptimized frame does not match FP stored in unroll block"); |
| __ bind(L); |
| } |
| { Label L; |
| __ ldr(Rtemp, Address(R2)); |
| __ cmp(LR, Rtemp); |
| __ b(L, eq); |
| __ stop("LR restored from deoptimized frame does not match the 1st PC in unroll block"); |
| __ bind(L); |
| } |
| #endif // ASSERT |
| |
| #else |
| __ add(SP, SP, Rtemp); |
| #endif // AARCH64 |
| |
| #ifdef ASSERT |
| // Compilers generate code that bang the stack by as much as the |
| // interpreter would need. So this stack banging should never |
| // trigger a fault. Verify that it does not on non product builds. |
| // See if it is enough stack to push deoptimized frames |
| if (UseStackBanging) { |
| #ifndef AARCH64 |
| // The compiled method that we are deoptimizing was popped from the stack. |
| // If the stack bang results in a stack overflow, we don't return to the |
| // method that is being deoptimized. The stack overflow exception is |
| // propagated to the caller of the deoptimized method. Need to get the pc |
| // from the caller in LR and restore FP. |
| __ ldr(LR, Address(R2, 0)); |
| __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes())); |
| #endif // !AARCH64 |
| __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes())); |
| __ arm_stack_overflow_check(R8, Rtemp); |
| } |
| #endif |
| __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes())); |
| |
| #ifndef AARCH64 |
| // Pick up the initial fp we should save |
| // XXX Note: was ldr(FP, Address(FP)); |
| |
| // The compiler no longer uses FP as a frame pointer for the |
| // compiled code. It can be used by the allocator in C2 or to |
| // memorize the original SP for JSR292 call sites. |
| |
| // Hence, ldr(FP, Address(FP)) is probably not correct. For x86, |
| // Deoptimization::fetch_unroll_info computes the right FP value and |
| // stores it in Rublock.initial_info. This has been activated for ARM. |
| __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes())); |
| #endif // !AARCH64 |
| |
| __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes())); |
| __ mov(Rsender, SP); |
| #ifdef AARCH64 |
| __ sub(SP, SP, Rtemp, ex_uxtx); |
| #else |
| __ sub(SP, SP, Rtemp); |
| #endif // AARCH64 |
| |
| // Push interpreter frames in a loop |
| Label loop; |
| __ bind(loop); |
| __ ldr(LR, Address(R2, wordSize, post_indexed)); // load frame pc |
| __ ldr(Rtemp, Address(R3, wordSize, post_indexed)); // load frame size |
| |
| __ raw_push(FP, LR); // create new frame |
| __ mov(FP, SP); |
| __ sub(Rtemp, Rtemp, 2*wordSize); |
| |
| #ifdef AARCH64 |
| __ sub(SP, SP, Rtemp, ex_uxtx); |
| #else |
| __ sub(SP, SP, Rtemp); |
| #endif // AARCH64 |
| |
| __ str(Rsender, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize)); |
| #ifdef AARCH64 |
| __ str(ZR, Address(FP, frame::interpreter_frame_stack_top_offset * wordSize)); |
| #else |
| __ mov(LR, 0); |
| __ str(LR, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize)); |
| #endif // AARCH64 |
| |
| __ subs(R8, R8, 1); // decrement counter |
| __ mov(Rsender, SP); |
| __ b(loop, ne); |
| |
| // Re-push self-frame |
| __ ldr(LR, Address(R2)); |
| __ raw_push(FP, LR); |
| __ mov(FP, SP); |
| __ sub(SP, SP, (frame_size_in_words - 2) * wordSize); |
| |
| // Restore frame locals after moving the frame |
| __ str(R0, Address(SP, RegisterSaver::R0_offset * wordSize)); |
| #ifndef AARCH64 |
| __ str(R1, Address(SP, RegisterSaver::R1_offset * wordSize)); |
| #endif // !AARCH64 |
| |
| #ifndef __SOFTFP__ |
| __ str_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize)); |
| #endif // !__SOFTFP__ |
| |
| #ifndef AARCH64 |
| #ifdef ASSERT |
| // Reload Rkind from the UnrollBlock and check that it was not overwritten (Rkind is not callee-saved) |
| { Label L; |
| __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes())); |
| __ cmp_32(Rkind, Rtemp); |
| __ b(L, eq); |
| __ stop("Rkind was overwritten"); |
| __ bind(L); |
| } |
| #endif |
| #endif |
| |
| // Call unpack_frames with proper arguments |
| __ mov(R0, Rthread); |
| __ mov(R1, Rkind); |
| |
| pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp); |
| assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin"); |
| __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)); |
| if (pc_offset == -1) { |
| pc_offset = __ offset(); |
| } |
| oop_maps->add_gc_map(pc_offset, new OopMap(frame_size_in_words * VMRegImpl::slots_per_word, 0)); |
| __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call |
| |
| // Collect return values, pop self-frame and jump to interpreter |
| __ ldr(R0, Address(SP, RegisterSaver::R0_offset * wordSize)); |
| #ifndef AARCH64 |
| __ ldr(R1, Address(SP, RegisterSaver::R1_offset * wordSize)); |
| #endif // !AARCH64 |
| // Interpreter floats controlled by __SOFTFP__, but compiler |
| // float return value registers controlled by __ABI_HARD__ |
| // This matters for vfp-sflt builds. |
| #ifndef __SOFTFP__ |
| // Interpreter hard float |
| #ifdef __ABI_HARD__ |
| // Compiler float return value in FP registers |
| __ ldr_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize)); |
| #else |
| // Compiler float return value in integer registers, |
| // copy to D0 for interpreter (S0 <-- R0) |
| __ fmdrr(D0_tos, R0, R1); |
| #endif |
| #endif // !__SOFTFP__ |
| __ mov(SP, FP); |
| |
| #ifdef AARCH64 |
| __ raw_pop(FP, LR); |
| __ ret(); |
| #else |
| __ pop(RegisterSet(FP) | RegisterSet(PC)); |
| #endif // AARCH64 |
| |
| __ flush(); |
| |
| _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, |
| reexecute_offset, frame_size_in_words); |
| _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); |
| } |
| |
| #ifdef COMPILER2 |
| |
| //------------------------------generate_uncommon_trap_blob-------------------- |
| // Ought to generate an ideal graph & compile, but here's some SPARC ASM |
| // instead. |
| void SharedRuntime::generate_uncommon_trap_blob() { |
| // allocate space for the code |
| ResourceMark rm; |
| |
| // setup code generation tools |
| int pad = VerifyThread ? 512 : 0; |
| #ifdef _LP64 |
| CodeBuffer buffer("uncommon_trap_blob", 2700+pad, 512); |
| #else |
| // Measured 8/7/03 at 660 in 32bit debug build (no VerifyThread) |
| // Measured 8/7/03 at 1028 in 32bit debug build (VerifyThread) |
| CodeBuffer buffer("uncommon_trap_blob", 2000+pad, 512); |
| #endif |
| // bypassed when code generation useless |
| MacroAssembler* masm = new MacroAssembler(&buffer); |
| const Register Rublock = AARCH64_ONLY(R22) NOT_AARCH64(R6); |
| const Register Rsender = AARCH64_ONLY(R23) NOT_AARCH64(altFP_7_11); |
| assert_different_registers(Rublock, Rsender, Rexception_obj, R0, R1, R2, R3, R8, Rtemp); |
| |
| // |
| // This is the entry point for all traps the compiler takes when it thinks |
| // it cannot handle further execution of compilation code. The frame is |
| // deoptimized in these cases and converted into interpreter frames for |
| // execution |
| // The steps taken by this frame are as follows: |
| // - push a fake "unpack_frame" |
| // - call the C routine Deoptimization::uncommon_trap (this function |
| // packs the current compiled frame into vframe arrays and returns |
| // information about the number and size of interpreter frames which |
| // are equivalent to the frame which is being deoptimized) |
| // - deallocate the "unpack_frame" |
| // - deallocate the deoptimization frame |
| // - in a loop using the information returned in the previous step |
| // push interpreter frames; |
| // - create a dummy "unpack_frame" |
| // - call the C routine: Deoptimization::unpack_frames (this function |
| // lays out values on the interpreter frame which was just created) |
| // - deallocate the dummy unpack_frame |
| // - return to the interpreter entry point |
| // |
| // Refer to the following methods for more information: |
| // - Deoptimization::uncommon_trap |
| // - Deoptimization::unpack_frame |
| |
| // the unloaded class index is in R0 (first parameter to this blob) |
| |
| __ raw_push(FP, LR); |
| __ set_last_Java_frame(SP, FP, false, Rtemp); |
| __ mov(R2, Deoptimization::Unpack_uncommon_trap); |
| __ mov(R1, R0); |
| __ mov(R0, Rthread); |
| __ call(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap)); |
| __ mov(Rublock, R0); |
| __ reset_last_Java_frame(Rtemp); |
| __ raw_pop(FP, LR); |
| |
| #ifdef ASSERT |
| { Label L; |
| __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes())); |
| __ cmp_32(Rtemp, Deoptimization::Unpack_uncommon_trap); |
| __ b(L, eq); |
| __ stop("SharedRuntime::generate_uncommon_trap_blob: expected Unpack_uncommon_trap"); |
| __ bind(L); |
| } |
| #endif |
| |
| |
| // Set initial stack state before pushing interpreter frames |
| __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes())); |
| __ ldr(R2, Address(Rublock, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes())); |
| __ ldr(R3, Address(Rublock, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes())); |
| |
| #ifdef AARCH64 |
| // Pop deoptimized frame. Make sure to restore the initial saved FP/LR of the caller. |
| // They are needed for correct stack walking during stack overflow handling. |
| // Also, restored FP is saved in the bottom interpreter frame (LR is reloaded from unroll block). |
| __ sub(Rtemp, Rtemp, 2*wordSize); |
| __ add(SP, SP, Rtemp, ex_uxtx); |
| __ raw_pop(FP, LR); |
| |
| #ifdef ASSERT |
| { Label L; |
| __ ldr(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes())); |
| __ cmp(FP, Rtemp); |
| __ b(L, eq); |
| __ stop("FP restored from deoptimized frame does not match FP stored in unroll block"); |
| __ bind(L); |
| } |
| { Label L; |
| __ ldr(Rtemp, Address(R2)); |
| __ cmp(LR, Rtemp); |
| __ b(L, eq); |
| __ stop("LR restored from deoptimized frame does not match the 1st PC in unroll block"); |
| __ bind(L); |
| } |
| #endif // ASSERT |
| |
| #else |
| __ add(SP, SP, Rtemp); |
| #endif //AARCH64 |
| |
| // See if it is enough stack to push deoptimized frames |
| #ifdef ASSERT |
| // Compilers generate code that bang the stack by as much as the |
| // interpreter would need. So this stack banging should never |
| // trigger a fault. Verify that it does not on non product builds. |
| if (UseStackBanging) { |
| #ifndef AARCH64 |
| // The compiled method that we are deoptimizing was popped from the stack. |
| // If the stack bang results in a stack overflow, we don't return to the |
| // method that is being deoptimized. The stack overflow exception is |
| // propagated to the caller of the deoptimized method. Need to get the pc |
| // from the caller in LR and restore FP. |
| __ ldr(LR, Address(R2, 0)); |
| __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes())); |
| #endif // !AARCH64 |
| __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes())); |
| __ arm_stack_overflow_check(R8, Rtemp); |
| } |
| #endif |
| __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes())); |
| __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes())); |
| __ mov(Rsender, SP); |
| #ifdef AARCH64 |
| __ sub(SP, SP, Rtemp, ex_uxtx); |
| #else |
| __ sub(SP, SP, Rtemp); |
| #endif |
| #ifndef AARCH64 |
| // __ ldr(FP, Address(FP)); |
| __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes())); |
| #endif // AARCH64 |
| |
| // Push interpreter frames in a loop |
| Label loop; |
| __ bind(loop); |
| __ ldr(LR, Address(R2, wordSize, post_indexed)); // load frame pc |
| __ ldr(Rtemp, Address(R3, wordSize, post_indexed)); // load frame size |
| |
| __ raw_push(FP, LR); // create new frame |
| __ mov(FP, SP); |
| __ sub(Rtemp, Rtemp, 2*wordSize); |
| |
| #ifdef AARCH64 |
| __ sub(SP, SP, Rtemp, ex_uxtx); |
| #else |
| __ sub(SP, SP, Rtemp); |
| #endif // AARCH64 |
| |
| __ str(Rsender, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize)); |
| #ifdef AARCH64 |
| __ str(ZR, Address(FP, frame::interpreter_frame_stack_top_offset * wordSize)); |
| #else |
| __ mov(LR, 0); |
| __ str(LR, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize)); |
| #endif // AARCH64 |
| __ subs(R8, R8, 1); // decrement counter |
| __ mov(Rsender, SP); |
| __ b(loop, ne); |
| |
| // Re-push self-frame |
| __ ldr(LR, Address(R2)); |
| __ raw_push(FP, LR); |
| __ mov(FP, SP); |
| |
| // Call unpack_frames with proper arguments |
| __ mov(R0, Rthread); |
| __ mov(R1, Deoptimization::Unpack_uncommon_trap); |
| __ set_last_Java_frame(SP, FP, false, Rtemp); |
| __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)); |
| // oop_maps->add_gc_map(__ pc() - start, new OopMap(frame_size_in_words, 0)); |
| __ reset_last_Java_frame(Rtemp); |
| |
| __ mov(SP, FP); |
| #ifdef AARCH64 |
| __ raw_pop(FP, LR); |
| __ ret(); |
| #else |
| __ pop(RegisterSet(FP) | RegisterSet(PC)); |
| #endif |
| |
| masm->flush(); |
| _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, NULL, 2 /* LR+FP */); |
| } |
| |
| #endif // COMPILER2 |
| |
| //------------------------------generate_handler_blob------ |
| // |
| // Generate a special Compile2Runtime blob that saves all registers, |
| // setup oopmap, and calls safepoint code to stop the compiled code for |
| // a safepoint. |
| // |
| SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) { |
| assert(StubRoutines::forward_exception_entry() != NULL, "must be generated before"); |
| |
| ResourceMark rm; |
| CodeBuffer buffer("handler_blob", 256, 256); |
| int frame_size_words; |
| OopMapSet* oop_maps; |
| |
| bool cause_return = (poll_type == POLL_AT_RETURN); |
| |
| MacroAssembler* masm = new MacroAssembler(&buffer); |
| address start = __ pc(); |
| oop_maps = new OopMapSet(); |
| |
| if (!cause_return) { |
| #ifdef AARCH64 |
| __ raw_push(LR, LR); |
| #else |
| __ sub(SP, SP, 4); // make room for LR which may still be live |
| // here if we are coming from a c2 method |
| #endif // AARCH64 |
| } |
| |
| OopMap* map = RegisterSaver::save_live_registers(masm, &frame_size_words, !cause_return); |
| if (!cause_return) { |
| // update saved PC with correct value |
| // need 2 steps because LR can be live in c2 method |
| __ ldr(LR, Address(Rthread, JavaThread::saved_exception_pc_offset())); |
| __ str(LR, Address(SP, RegisterSaver::LR_offset * wordSize)); |
| } |
| |
| __ mov(R0, Rthread); |
| int pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp); // note: FP may not need to be saved (not on x86) |
| assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin"); |
| __ call(call_ptr); |
| if (pc_offset == -1) { |
| pc_offset = __ offset(); |
| } |
| oop_maps->add_gc_map(pc_offset, map); |
| __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call |
| |
| // Check for pending exception |
| __ ldr(Rtemp, Address(Rthread, Thread::pending_exception_offset())); |
| __ cmp(Rtemp, 0); |
| |
| #ifdef AARCH64 |
| RegisterSaver::restore_live_registers(masm, cause_return); |
| Register ret_addr = cause_return ? LR : Rtemp; |
| if (!cause_return) { |
| __ raw_pop(FP, ret_addr); |
| } |
| |
| Label throw_exception; |
| __ b(throw_exception, ne); |
| __ br(ret_addr); |
| |
| __ bind(throw_exception); |
| __ mov(Rexception_pc, ret_addr); |
| #else // AARCH64 |
| if (!cause_return) { |
| RegisterSaver::restore_live_registers(masm, false); |
| __ pop(PC, eq); |
| __ pop(Rexception_pc); |
| } else { |
| RegisterSaver::restore_live_registers(masm); |
| __ bx(LR, eq); |
| __ mov(Rexception_pc, LR); |
| } |
| #endif // AARCH64 |
| |
| __ jump(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type, Rtemp); |
| |
| __ flush(); |
| |
| return SafepointBlob::create(&buffer, oop_maps, frame_size_words); |
| } |
| |
| RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) { |
| assert(StubRoutines::forward_exception_entry() != NULL, "must be generated before"); |
| |
| ResourceMark rm; |
| CodeBuffer buffer(name, 1000, 512); |
| int frame_size_words; |
| OopMapSet *oop_maps; |
| int frame_complete; |
| |
| MacroAssembler* masm = new MacroAssembler(&buffer); |
| Label pending_exception; |
| |
| int start = __ offset(); |
| |
| oop_maps = new OopMapSet(); |
| OopMap* map = RegisterSaver::save_live_registers(masm, &frame_size_words); |
| |
| frame_complete = __ offset(); |
| |
| __ mov(R0, Rthread); |
| |
| int pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp); |
| assert(start == 0, "warning: start differs from code_begin"); |
| __ call(destination); |
| if (pc_offset == -1) { |
| pc_offset = __ offset(); |
| } |
| oop_maps->add_gc_map(pc_offset, map); |
| __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call |
| |
| __ ldr(R1, Address(Rthread, Thread::pending_exception_offset())); |
| __ cbnz(R1, pending_exception); |
| |
| // Overwrite saved register values |
| |
| // Place metadata result of VM call into Rmethod |
| __ get_vm_result_2(R1, Rtemp); |
| __ str(R1, Address(SP, RegisterSaver::Rmethod_offset * wordSize)); |
| |
| // Place target address (VM call result) into Rtemp |
| __ str(R0, Address(SP, RegisterSaver::Rtemp_offset * wordSize)); |
| |
| RegisterSaver::restore_live_registers(masm); |
| __ jump(Rtemp); |
| |
| __ bind(pending_exception); |
| |
| RegisterSaver::restore_live_registers(masm); |
| const Register Rzero = __ zero_register(Rtemp); |
| __ str(Rzero, Address(Rthread, JavaThread::vm_result_2_offset())); |
| __ mov(Rexception_pc, LR); |
| __ jump(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type, Rtemp); |
| |
| __ flush(); |
| |
| return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true); |
| } |