| /* |
| * Copyright (c) 2008, 2017, Oracle and/or its affiliates. All rights reserved. |
| * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| * |
| * This code is free software; you can redistribute it and/or modify it |
| * under the terms of the GNU General Public License version 2 only, as |
| * published by the Free Software Foundation. |
| * |
| * This code is distributed in the hope that it will be useful, but WITHOUT |
| * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| * version 2 for more details (a copy is included in the LICENSE file that |
| * accompanied this code). |
| * |
| * You should have received a copy of the GNU General Public License version |
| * 2 along with this work; if not, write to the Free Software Foundation, |
| * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| * |
| * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| * or visit www.oracle.com if you need additional information or have any |
| * questions. |
| * |
| */ |
| |
| #include "precompiled.hpp" |
| #include "asm/assembler.hpp" |
| #include "asm/assembler.inline.hpp" |
| #include "asm/macroAssembler.hpp" |
| #include "ci/ciEnv.hpp" |
| #include "code/nativeInst.hpp" |
| #include "compiler/disassembler.hpp" |
| #include "gc/shared/cardTableModRefBS.hpp" |
| #include "gc/shared/collectedHeap.inline.hpp" |
| #include "interpreter/interpreter.hpp" |
| #include "memory/resourceArea.hpp" |
| #include "oops/klass.inline.hpp" |
| #include "prims/methodHandles.hpp" |
| #include "runtime/biasedLocking.hpp" |
| #include "runtime/interfaceSupport.hpp" |
| #include "runtime/objectMonitor.hpp" |
| #include "runtime/os.hpp" |
| #include "runtime/sharedRuntime.hpp" |
| #include "runtime/stubRoutines.hpp" |
| #include "utilities/macros.hpp" |
| #if INCLUDE_ALL_GCS |
| #include "gc/g1/g1CollectedHeap.inline.hpp" |
| #include "gc/g1/g1SATBCardTableModRefBS.hpp" |
| #include "gc/g1/heapRegion.hpp" |
| #endif |
| |
| // Implementation of AddressLiteral |
| |
| void AddressLiteral::set_rspec(relocInfo::relocType rtype) { |
| switch (rtype) { |
| case relocInfo::oop_type: |
| // Oops are a special case. Normally they would be their own section |
| // but in cases like icBuffer they are literals in the code stream that |
| // we don't have a section for. We use none so that we get a literal address |
| // which is always patchable. |
| break; |
| case relocInfo::external_word_type: |
| _rspec = external_word_Relocation::spec(_target); |
| break; |
| case relocInfo::internal_word_type: |
| _rspec = internal_word_Relocation::spec(_target); |
| break; |
| case relocInfo::opt_virtual_call_type: |
| _rspec = opt_virtual_call_Relocation::spec(); |
| break; |
| case relocInfo::static_call_type: |
| _rspec = static_call_Relocation::spec(); |
| break; |
| case relocInfo::runtime_call_type: |
| _rspec = runtime_call_Relocation::spec(); |
| break; |
| case relocInfo::poll_type: |
| case relocInfo::poll_return_type: |
| _rspec = Relocation::spec_simple(rtype); |
| break; |
| case relocInfo::none: |
| break; |
| default: |
| ShouldNotReachHere(); |
| break; |
| } |
| } |
| |
| // Initially added to the Assembler interface as a pure virtual: |
| // RegisterConstant delayed_value(..) |
| // for: |
| // 6812678 macro assembler needs delayed binding of a few constants (for 6655638) |
| // this was subsequently modified to its present name and return type |
| RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, |
| Register tmp, |
| int offset) { |
| ShouldNotReachHere(); |
| return RegisterOrConstant(-1); |
| } |
| |
| |
| #ifdef AARCH64 |
| // Note: ARM32 version is OS dependent |
| void MacroAssembler::breakpoint(AsmCondition cond) { |
| if (cond == al) { |
| brk(); |
| } else { |
| Label L; |
| b(L, inverse(cond)); |
| brk(); |
| bind(L); |
| } |
| } |
| #endif // AARCH64 |
| |
| |
| // virtual method calling |
| void MacroAssembler::lookup_virtual_method(Register recv_klass, |
| Register vtable_index, |
| Register method_result) { |
| const int base_offset = in_bytes(Klass::vtable_start_offset()) + vtableEntry::method_offset_in_bytes(); |
| assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); |
| add(recv_klass, recv_klass, AsmOperand(vtable_index, lsl, LogBytesPerWord)); |
| ldr(method_result, Address(recv_klass, base_offset)); |
| } |
| |
| |
| // Simplified, combined version, good for typical uses. |
| // Falls through on failure. |
| void MacroAssembler::check_klass_subtype(Register sub_klass, |
| Register super_klass, |
| Register temp_reg, |
| Register temp_reg2, |
| Register temp_reg3, |
| Label& L_success) { |
| Label L_failure; |
| check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, temp_reg2, &L_success, &L_failure, NULL); |
| check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, temp_reg2, temp_reg3, &L_success, NULL); |
| bind(L_failure); |
| }; |
| |
| void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, |
| Register super_klass, |
| Register temp_reg, |
| Register temp_reg2, |
| Label* L_success, |
| Label* L_failure, |
| Label* L_slow_path) { |
| |
| assert_different_registers(sub_klass, super_klass, temp_reg, temp_reg2, noreg); |
| const Register super_check_offset = temp_reg2; |
| |
| Label L_fallthrough; |
| int label_nulls = 0; |
| if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } |
| if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } |
| if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } |
| assert(label_nulls <= 1, "at most one NULL in the batch"); |
| |
| int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); |
| int sco_offset = in_bytes(Klass::super_check_offset_offset()); |
| Address super_check_offset_addr(super_klass, sco_offset); |
| |
| // If the pointers are equal, we are done (e.g., String[] elements). |
| // This self-check enables sharing of secondary supertype arrays among |
| // non-primary types such as array-of-interface. Otherwise, each such |
| // type would need its own customized SSA. |
| // We move this check to the front of the fast path because many |
| // type checks are in fact trivially successful in this manner, |
| // so we get a nicely predicted branch right at the start of the check. |
| cmp(sub_klass, super_klass); |
| b(*L_success, eq); |
| |
| // Check the supertype display: |
| ldr_u32(super_check_offset, super_check_offset_addr); |
| |
| Address super_check_addr(sub_klass, super_check_offset); |
| ldr(temp_reg, super_check_addr); |
| cmp(super_klass, temp_reg); // load displayed supertype |
| |
| // This check has worked decisively for primary supers. |
| // Secondary supers are sought in the super_cache ('super_cache_addr'). |
| // (Secondary supers are interfaces and very deeply nested subtypes.) |
| // This works in the same check above because of a tricky aliasing |
| // between the super_cache and the primary super display elements. |
| // (The 'super_check_addr' can address either, as the case requires.) |
| // Note that the cache is updated below if it does not help us find |
| // what we need immediately. |
| // So if it was a primary super, we can just fail immediately. |
| // Otherwise, it's the slow path for us (no success at this point). |
| |
| b(*L_success, eq); |
| cmp_32(super_check_offset, sc_offset); |
| if (L_failure == &L_fallthrough) { |
| b(*L_slow_path, eq); |
| } else { |
| b(*L_failure, ne); |
| if (L_slow_path != &L_fallthrough) { |
| b(*L_slow_path); |
| } |
| } |
| |
| bind(L_fallthrough); |
| } |
| |
| |
| void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, |
| Register super_klass, |
| Register temp_reg, |
| Register temp2_reg, |
| Register temp3_reg, |
| Label* L_success, |
| Label* L_failure, |
| bool set_cond_codes) { |
| #ifdef AARCH64 |
| NOT_IMPLEMENTED(); |
| #else |
| // Note: if used by code that expects a register to be 0 on success, |
| // this register must be temp_reg and set_cond_codes must be true |
| |
| Register saved_reg = noreg; |
| |
| // get additional tmp registers |
| if (temp3_reg == noreg) { |
| saved_reg = temp3_reg = LR; |
| push(saved_reg); |
| } |
| |
| assert(temp2_reg != noreg, "need all the temporary registers"); |
| assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg, temp3_reg); |
| |
| Register cmp_temp = temp_reg; |
| Register scan_temp = temp3_reg; |
| Register count_temp = temp2_reg; |
| |
| Label L_fallthrough; |
| int label_nulls = 0; |
| if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } |
| if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } |
| assert(label_nulls <= 1, "at most one NULL in the batch"); |
| |
| // a couple of useful fields in sub_klass: |
| int ss_offset = in_bytes(Klass::secondary_supers_offset()); |
| int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); |
| Address secondary_supers_addr(sub_klass, ss_offset); |
| Address super_cache_addr( sub_klass, sc_offset); |
| |
| #ifndef PRODUCT |
| inc_counter((address)&SharedRuntime::_partial_subtype_ctr, scan_temp, count_temp); |
| #endif |
| |
| // We will consult the secondary-super array. |
| ldr(scan_temp, Address(sub_klass, ss_offset)); |
| |
| assert(! UseCompressedOops, "search_key must be the compressed super_klass"); |
| // else search_key is the |
| Register search_key = super_klass; |
| |
| // Load the array length. |
| ldr(count_temp, Address(scan_temp, Array<Klass*>::length_offset_in_bytes())); |
| add(scan_temp, scan_temp, Array<Klass*>::base_offset_in_bytes()); |
| |
| add(count_temp, count_temp, 1); |
| |
| Label L_loop, L_setnz_and_fail, L_fail; |
| |
| // Top of search loop |
| bind(L_loop); |
| // Notes: |
| // scan_temp starts at the array elements |
| // count_temp is 1+size |
| subs(count_temp, count_temp, 1); |
| if ((L_failure != &L_fallthrough) && (! set_cond_codes) && (saved_reg == noreg)) { |
| // direct jump to L_failure if failed and no cleanup needed |
| b(*L_failure, eq); // not found and |
| } else { |
| b(L_fail, eq); // not found in the array |
| } |
| |
| // Load next super to check |
| // In the array of super classes elements are pointer sized. |
| int element_size = wordSize; |
| ldr(cmp_temp, Address(scan_temp, element_size, post_indexed)); |
| |
| // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list |
| subs(cmp_temp, cmp_temp, search_key); |
| |
| // A miss means we are NOT a subtype and need to keep looping |
| b(L_loop, ne); |
| |
| // Falling out the bottom means we found a hit; we ARE a subtype |
| |
| // Note: temp_reg/cmp_temp is already 0 and flag Z is set |
| |
| // Success. Cache the super we found and proceed in triumph. |
| str(super_klass, Address(sub_klass, sc_offset)); |
| |
| if (saved_reg != noreg) { |
| // Return success |
| pop(saved_reg); |
| } |
| |
| b(*L_success); |
| |
| bind(L_fail); |
| // Note1: check "b(*L_failure, eq)" above if adding extra instructions here |
| if (set_cond_codes) { |
| movs(temp_reg, sub_klass); // clears Z and sets temp_reg to non-0 if needed |
| } |
| if (saved_reg != noreg) { |
| pop(saved_reg); |
| } |
| if (L_failure != &L_fallthrough) { |
| b(*L_failure); |
| } |
| |
| bind(L_fallthrough); |
| #endif |
| } |
| |
| // Returns address of receiver parameter, using tmp as base register. tmp and params_count can be the same. |
| Address MacroAssembler::receiver_argument_address(Register params_base, Register params_count, Register tmp) { |
| assert_different_registers(params_base, params_count); |
| add(tmp, params_base, AsmOperand(params_count, lsl, Interpreter::logStackElementSize)); |
| return Address(tmp, -Interpreter::stackElementSize); |
| } |
| |
| |
| void MacroAssembler::align(int modulus) { |
| while (offset() % modulus != 0) { |
| nop(); |
| } |
| } |
| |
| int MacroAssembler::set_last_Java_frame(Register last_java_sp, |
| Register last_java_fp, |
| bool save_last_java_pc, |
| Register tmp) { |
| int pc_offset; |
| if (last_java_fp != noreg) { |
| // optional |
| str(last_java_fp, Address(Rthread, JavaThread::last_Java_fp_offset())); |
| _fp_saved = true; |
| } else { |
| _fp_saved = false; |
| } |
| if (AARCH64_ONLY(true) NOT_AARCH64(save_last_java_pc)) { // optional on 32-bit ARM |
| #ifdef AARCH64 |
| pc_offset = mov_pc_to(tmp); |
| str(tmp, Address(Rthread, JavaThread::last_Java_pc_offset())); |
| #else |
| str(PC, Address(Rthread, JavaThread::last_Java_pc_offset())); |
| pc_offset = offset() + VM_Version::stored_pc_adjustment(); |
| #endif |
| _pc_saved = true; |
| } else { |
| _pc_saved = false; |
| pc_offset = -1; |
| } |
| // According to comment in javaFrameAnchorm SP must be saved last, so that other |
| // entries are valid when SP is set. |
| |
| // However, this is probably not a strong constrainst since for instance PC is |
| // sometimes read from the stack at SP... but is pushed later (by the call). Hence, |
| // we now write the fields in the expected order but we have not added a StoreStore |
| // barrier. |
| |
| // XXX: if the ordering is really important, PC should always be saved (without forgetting |
| // to update oop_map offsets) and a StoreStore barrier might be needed. |
| |
| if (last_java_sp == noreg) { |
| last_java_sp = SP; // always saved |
| } |
| #ifdef AARCH64 |
| if (last_java_sp == SP) { |
| mov(tmp, SP); |
| str(tmp, Address(Rthread, JavaThread::last_Java_sp_offset())); |
| } else { |
| str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset())); |
| } |
| #else |
| str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset())); |
| #endif |
| |
| return pc_offset; // for oopmaps |
| } |
| |
| void MacroAssembler::reset_last_Java_frame(Register tmp) { |
| const Register Rzero = zero_register(tmp); |
| str(Rzero, Address(Rthread, JavaThread::last_Java_sp_offset())); |
| if (_fp_saved) { |
| str(Rzero, Address(Rthread, JavaThread::last_Java_fp_offset())); |
| } |
| if (_pc_saved) { |
| str(Rzero, Address(Rthread, JavaThread::last_Java_pc_offset())); |
| } |
| } |
| |
| |
| // Implementation of call_VM versions |
| |
| void MacroAssembler::call_VM_leaf_helper(address entry_point, int number_of_arguments) { |
| assert(number_of_arguments >= 0, "cannot have negative number of arguments"); |
| assert(number_of_arguments <= 4, "cannot have more than 4 arguments"); |
| |
| #ifndef AARCH64 |
| // Safer to save R9 here since callers may have been written |
| // assuming R9 survives. This is suboptimal but is not worth |
| // optimizing for the few platforms where R9 is scratched. |
| push(RegisterSet(R4) | R9ifScratched); |
| mov(R4, SP); |
| bic(SP, SP, StackAlignmentInBytes - 1); |
| #endif // AARCH64 |
| call(entry_point, relocInfo::runtime_call_type); |
| #ifndef AARCH64 |
| mov(SP, R4); |
| pop(RegisterSet(R4) | R9ifScratched); |
| #endif // AARCH64 |
| } |
| |
| |
| void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { |
| assert(number_of_arguments >= 0, "cannot have negative number of arguments"); |
| assert(number_of_arguments <= 3, "cannot have more than 3 arguments"); |
| |
| const Register tmp = Rtemp; |
| assert_different_registers(oop_result, tmp); |
| |
| set_last_Java_frame(SP, FP, true, tmp); |
| |
| #ifdef ASSERT |
| AARCH64_ONLY(if (UseCompressedOops || UseCompressedClassPointers) { verify_heapbase("call_VM_helper: heap base corrupted?"); }); |
| #endif // ASSERT |
| |
| #ifndef AARCH64 |
| #if R9_IS_SCRATCHED |
| // Safer to save R9 here since callers may have been written |
| // assuming R9 survives. This is suboptimal but is not worth |
| // optimizing for the few platforms where R9 is scratched. |
| |
| // Note: cannot save R9 above the saved SP (some calls expect for |
| // instance the Java stack top at the saved SP) |
| // => once saved (with set_last_Java_frame), decrease SP before rounding to |
| // ensure the slot at SP will be free for R9). |
| sub(SP, SP, 4); |
| bic(SP, SP, StackAlignmentInBytes - 1); |
| str(R9, Address(SP, 0)); |
| #else |
| bic(SP, SP, StackAlignmentInBytes - 1); |
| #endif // R9_IS_SCRATCHED |
| #endif |
| |
| mov(R0, Rthread); |
| call(entry_point, relocInfo::runtime_call_type); |
| |
| #ifndef AARCH64 |
| #if R9_IS_SCRATCHED |
| ldr(R9, Address(SP, 0)); |
| #endif |
| ldr(SP, Address(Rthread, JavaThread::last_Java_sp_offset())); |
| #endif |
| |
| reset_last_Java_frame(tmp); |
| |
| // C++ interp handles this in the interpreter |
| check_and_handle_popframe(); |
| check_and_handle_earlyret(); |
| |
| if (check_exceptions) { |
| // check for pending exceptions |
| ldr(tmp, Address(Rthread, Thread::pending_exception_offset())); |
| #ifdef AARCH64 |
| Label L; |
| cbz(tmp, L); |
| mov_pc_to(Rexception_pc); |
| b(StubRoutines::forward_exception_entry()); |
| bind(L); |
| #else |
| cmp(tmp, 0); |
| mov(Rexception_pc, PC, ne); |
| b(StubRoutines::forward_exception_entry(), ne); |
| #endif // AARCH64 |
| } |
| |
| // get oop result if there is one and reset the value in the thread |
| if (oop_result->is_valid()) { |
| get_vm_result(oop_result, tmp); |
| } |
| } |
| |
| void MacroAssembler::call_VM(Register oop_result, address entry_point, bool check_exceptions) { |
| call_VM_helper(oop_result, entry_point, 0, check_exceptions); |
| } |
| |
| |
| void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions) { |
| assert (arg_1 == R1, "fixed register for arg_1"); |
| call_VM_helper(oop_result, entry_point, 1, check_exceptions); |
| } |
| |
| |
| void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) { |
| assert (arg_1 == R1, "fixed register for arg_1"); |
| assert (arg_2 == R2, "fixed register for arg_2"); |
| call_VM_helper(oop_result, entry_point, 2, check_exceptions); |
| } |
| |
| |
| void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) { |
| assert (arg_1 == R1, "fixed register for arg_1"); |
| assert (arg_2 == R2, "fixed register for arg_2"); |
| assert (arg_3 == R3, "fixed register for arg_3"); |
| call_VM_helper(oop_result, entry_point, 3, check_exceptions); |
| } |
| |
| |
| void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments, bool check_exceptions) { |
| // Not used on ARM |
| Unimplemented(); |
| } |
| |
| |
| void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions) { |
| // Not used on ARM |
| Unimplemented(); |
| } |
| |
| |
| void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) { |
| // Not used on ARM |
| Unimplemented(); |
| } |
| |
| |
| void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) { |
| // Not used on ARM |
| Unimplemented(); |
| } |
| |
| // Raw call, without saving/restoring registers, exception handling, etc. |
| // Mainly used from various stubs. |
| void MacroAssembler::call_VM(address entry_point, bool save_R9_if_scratched) { |
| const Register tmp = Rtemp; // Rtemp free since scratched by call |
| set_last_Java_frame(SP, FP, true, tmp); |
| #if R9_IS_SCRATCHED |
| if (save_R9_if_scratched) { |
| // Note: Saving also R10 for alignment. |
| push(RegisterSet(R9, R10)); |
| } |
| #endif |
| mov(R0, Rthread); |
| call(entry_point, relocInfo::runtime_call_type); |
| #if R9_IS_SCRATCHED |
| if (save_R9_if_scratched) { |
| pop(RegisterSet(R9, R10)); |
| } |
| #endif |
| reset_last_Java_frame(tmp); |
| } |
| |
| void MacroAssembler::call_VM_leaf(address entry_point) { |
| call_VM_leaf_helper(entry_point, 0); |
| } |
| |
| void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1) { |
| assert (arg_1 == R0, "fixed register for arg_1"); |
| call_VM_leaf_helper(entry_point, 1); |
| } |
| |
| void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2) { |
| assert (arg_1 == R0, "fixed register for arg_1"); |
| assert (arg_2 == R1, "fixed register for arg_2"); |
| call_VM_leaf_helper(entry_point, 2); |
| } |
| |
| void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3) { |
| assert (arg_1 == R0, "fixed register for arg_1"); |
| assert (arg_2 == R1, "fixed register for arg_2"); |
| assert (arg_3 == R2, "fixed register for arg_3"); |
| call_VM_leaf_helper(entry_point, 3); |
| } |
| |
| void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4) { |
| assert (arg_1 == R0, "fixed register for arg_1"); |
| assert (arg_2 == R1, "fixed register for arg_2"); |
| assert (arg_3 == R2, "fixed register for arg_3"); |
| assert (arg_4 == R3, "fixed register for arg_4"); |
| call_VM_leaf_helper(entry_point, 4); |
| } |
| |
| void MacroAssembler::get_vm_result(Register oop_result, Register tmp) { |
| assert_different_registers(oop_result, tmp); |
| ldr(oop_result, Address(Rthread, JavaThread::vm_result_offset())); |
| str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_offset())); |
| verify_oop(oop_result); |
| } |
| |
| void MacroAssembler::get_vm_result_2(Register metadata_result, Register tmp) { |
| assert_different_registers(metadata_result, tmp); |
| ldr(metadata_result, Address(Rthread, JavaThread::vm_result_2_offset())); |
| str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_2_offset())); |
| } |
| |
| void MacroAssembler::add_rc(Register dst, Register arg1, RegisterOrConstant arg2) { |
| if (arg2.is_register()) { |
| add(dst, arg1, arg2.as_register()); |
| } else { |
| add(dst, arg1, arg2.as_constant()); |
| } |
| } |
| |
| void MacroAssembler::add_slow(Register rd, Register rn, int c) { |
| #ifdef AARCH64 |
| if (c == 0) { |
| if (rd != rn) { |
| mov(rd, rn); |
| } |
| return; |
| } |
| if (c < 0) { |
| sub_slow(rd, rn, -c); |
| return; |
| } |
| if (c > right_n_bits(24)) { |
| guarantee(rd != rn, "no large add_slow with only one register"); |
| mov_slow(rd, c); |
| add(rd, rn, rd); |
| } else { |
| int lo = c & right_n_bits(12); |
| int hi = (c >> 12) & right_n_bits(12); |
| if (lo != 0) { |
| add(rd, rn, lo, lsl0); |
| } |
| if (hi != 0) { |
| add(rd, (lo == 0) ? rn : rd, hi, lsl12); |
| } |
| } |
| #else |
| // This function is used in compiler for handling large frame offsets |
| if ((c < 0) && (((-c) & ~0x3fc) == 0)) { |
| return sub(rd, rn, (-c)); |
| } |
| int low = c & 0x3fc; |
| if (low != 0) { |
| add(rd, rn, low); |
| rn = rd; |
| } |
| if (c & ~0x3fc) { |
| assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported add_slow offset %d", c); |
| add(rd, rn, c & ~0x3fc); |
| } else if (rd != rn) { |
| assert(c == 0, ""); |
| mov(rd, rn); // need to generate at least one move! |
| } |
| #endif // AARCH64 |
| } |
| |
| void MacroAssembler::sub_slow(Register rd, Register rn, int c) { |
| #ifdef AARCH64 |
| if (c <= 0) { |
| add_slow(rd, rn, -c); |
| return; |
| } |
| if (c > right_n_bits(24)) { |
| guarantee(rd != rn, "no large sub_slow with only one register"); |
| mov_slow(rd, c); |
| sub(rd, rn, rd); |
| } else { |
| int lo = c & right_n_bits(12); |
| int hi = (c >> 12) & right_n_bits(12); |
| if (lo != 0) { |
| sub(rd, rn, lo, lsl0); |
| } |
| if (hi != 0) { |
| sub(rd, (lo == 0) ? rn : rd, hi, lsl12); |
| } |
| } |
| #else |
| // This function is used in compiler for handling large frame offsets |
| if ((c < 0) && (((-c) & ~0x3fc) == 0)) { |
| return add(rd, rn, (-c)); |
| } |
| int low = c & 0x3fc; |
| if (low != 0) { |
| sub(rd, rn, low); |
| rn = rd; |
| } |
| if (c & ~0x3fc) { |
| assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported sub_slow offset %d", c); |
| sub(rd, rn, c & ~0x3fc); |
| } else if (rd != rn) { |
| assert(c == 0, ""); |
| mov(rd, rn); // need to generate at least one move! |
| } |
| #endif // AARCH64 |
| } |
| |
| void MacroAssembler::mov_slow(Register rd, address addr) { |
| // do *not* call the non relocated mov_related_address |
| mov_slow(rd, (intptr_t)addr); |
| } |
| |
| void MacroAssembler::mov_slow(Register rd, const char *str) { |
| mov_slow(rd, (intptr_t)str); |
| } |
| |
| #ifdef AARCH64 |
| |
| // Common code for mov_slow and instr_count_for_mov_slow. |
| // Returns number of instructions of mov_slow pattern, |
| // generating it if non-null MacroAssembler is given. |
| int MacroAssembler::mov_slow_helper(Register rd, intptr_t c, MacroAssembler* masm) { |
| // This code pattern is matched in NativeIntruction::is_mov_slow. |
| // Update it at modifications. |
| |
| const intx mask = right_n_bits(16); |
| // 1 movz instruction |
| for (int base_shift = 0; base_shift < 64; base_shift += 16) { |
| if ((c & ~(mask << base_shift)) == 0) { |
| if (masm != NULL) { |
| masm->movz(rd, ((uintx)c) >> base_shift, base_shift); |
| } |
| return 1; |
| } |
| } |
| // 1 movn instruction |
| for (int base_shift = 0; base_shift < 64; base_shift += 16) { |
| if (((~c) & ~(mask << base_shift)) == 0) { |
| if (masm != NULL) { |
| masm->movn(rd, ((uintx)(~c)) >> base_shift, base_shift); |
| } |
| return 1; |
| } |
| } |
| // 1 orr instruction |
| { |
| LogicalImmediate imm(c, false); |
| if (imm.is_encoded()) { |
| if (masm != NULL) { |
| masm->orr(rd, ZR, imm); |
| } |
| return 1; |
| } |
| } |
| // 1 movz/movn + up to 3 movk instructions |
| int zeroes = 0; |
| int ones = 0; |
| for (int base_shift = 0; base_shift < 64; base_shift += 16) { |
| int part = (c >> base_shift) & mask; |
| if (part == 0) { |
| ++zeroes; |
| } else if (part == mask) { |
| ++ones; |
| } |
| } |
| int def_bits = 0; |
| if (ones > zeroes) { |
| def_bits = mask; |
| } |
| int inst_count = 0; |
| for (int base_shift = 0; base_shift < 64; base_shift += 16) { |
| int part = (c >> base_shift) & mask; |
| if (part != def_bits) { |
| if (masm != NULL) { |
| if (inst_count > 0) { |
| masm->movk(rd, part, base_shift); |
| } else { |
| if (def_bits == 0) { |
| masm->movz(rd, part, base_shift); |
| } else { |
| masm->movn(rd, ~part & mask, base_shift); |
| } |
| } |
| } |
| inst_count++; |
| } |
| } |
| assert((1 <= inst_count) && (inst_count <= 4), "incorrect number of instructions"); |
| return inst_count; |
| } |
| |
| void MacroAssembler::mov_slow(Register rd, intptr_t c) { |
| #ifdef ASSERT |
| int off = offset(); |
| #endif |
| (void) mov_slow_helper(rd, c, this); |
| assert(offset() - off == instr_count_for_mov_slow(c) * InstructionSize, "size mismatch"); |
| } |
| |
| // Counts instructions generated by mov_slow(rd, c). |
| int MacroAssembler::instr_count_for_mov_slow(intptr_t c) { |
| return mov_slow_helper(noreg, c, NULL); |
| } |
| |
| int MacroAssembler::instr_count_for_mov_slow(address c) { |
| return mov_slow_helper(noreg, (intptr_t)c, NULL); |
| } |
| |
| #else |
| |
| void MacroAssembler::mov_slow(Register rd, intptr_t c, AsmCondition cond) { |
| if (AsmOperand::is_rotated_imm(c)) { |
| mov(rd, c, cond); |
| } else if (AsmOperand::is_rotated_imm(~c)) { |
| mvn(rd, ~c, cond); |
| } else if (VM_Version::supports_movw()) { |
| movw(rd, c & 0xffff, cond); |
| if ((unsigned int)c >> 16) { |
| movt(rd, (unsigned int)c >> 16, cond); |
| } |
| } else { |
| // Find first non-zero bit |
| int shift = 0; |
| while ((c & (3 << shift)) == 0) { |
| shift += 2; |
| } |
| // Put the least significant part of the constant |
| int mask = 0xff << shift; |
| mov(rd, c & mask, cond); |
| // Add up to 3 other parts of the constant; |
| // each of them can be represented as rotated_imm |
| if (c & (mask << 8)) { |
| orr(rd, rd, c & (mask << 8), cond); |
| } |
| if (c & (mask << 16)) { |
| orr(rd, rd, c & (mask << 16), cond); |
| } |
| if (c & (mask << 24)) { |
| orr(rd, rd, c & (mask << 24), cond); |
| } |
| } |
| } |
| |
| #endif // AARCH64 |
| |
| void MacroAssembler::mov_oop(Register rd, jobject o, int oop_index, |
| #ifdef AARCH64 |
| bool patchable |
| #else |
| AsmCondition cond |
| #endif |
| ) { |
| |
| if (o == NULL) { |
| #ifdef AARCH64 |
| if (patchable) { |
| nop(); |
| } |
| mov(rd, ZR); |
| #else |
| mov(rd, 0, cond); |
| #endif |
| return; |
| } |
| |
| if (oop_index == 0) { |
| oop_index = oop_recorder()->allocate_oop_index(o); |
| } |
| relocate(oop_Relocation::spec(oop_index)); |
| |
| #ifdef AARCH64 |
| if (patchable) { |
| nop(); |
| } |
| ldr(rd, pc()); |
| #else |
| if (VM_Version::supports_movw()) { |
| movw(rd, 0, cond); |
| movt(rd, 0, cond); |
| } else { |
| ldr(rd, Address(PC), cond); |
| // Extra nop to handle case of large offset of oop placeholder (see NativeMovConstReg::set_data). |
| nop(); |
| } |
| #endif |
| } |
| |
| void MacroAssembler::mov_metadata(Register rd, Metadata* o, int metadata_index AARCH64_ONLY_ARG(bool patchable)) { |
| if (o == NULL) { |
| #ifdef AARCH64 |
| if (patchable) { |
| nop(); |
| } |
| #endif |
| mov(rd, 0); |
| return; |
| } |
| |
| if (metadata_index == 0) { |
| metadata_index = oop_recorder()->allocate_metadata_index(o); |
| } |
| relocate(metadata_Relocation::spec(metadata_index)); |
| |
| #ifdef AARCH64 |
| if (patchable) { |
| nop(); |
| } |
| #ifdef COMPILER2 |
| if (!patchable && VM_Version::prefer_moves_over_load_literal()) { |
| mov_slow(rd, (address)o); |
| return; |
| } |
| #endif |
| ldr(rd, pc()); |
| #else |
| if (VM_Version::supports_movw()) { |
| movw(rd, ((int)o) & 0xffff); |
| movt(rd, (unsigned int)o >> 16); |
| } else { |
| ldr(rd, Address(PC)); |
| // Extra nop to handle case of large offset of metadata placeholder (see NativeMovConstReg::set_data). |
| nop(); |
| } |
| #endif // AARCH64 |
| } |
| |
| void MacroAssembler::mov_float(FloatRegister fd, jfloat c NOT_AARCH64_ARG(AsmCondition cond)) { |
| Label skip_constant; |
| union { |
| jfloat f; |
| jint i; |
| } accessor; |
| accessor.f = c; |
| |
| #ifdef AARCH64 |
| // TODO-AARCH64 - try to optimize loading of float constants with fmov and/or mov_slow |
| Label L; |
| ldr_s(fd, target(L)); |
| b(skip_constant); |
| bind(L); |
| emit_int32(accessor.i); |
| bind(skip_constant); |
| #else |
| flds(fd, Address(PC), cond); |
| b(skip_constant); |
| emit_int32(accessor.i); |
| bind(skip_constant); |
| #endif // AARCH64 |
| } |
| |
| void MacroAssembler::mov_double(FloatRegister fd, jdouble c NOT_AARCH64_ARG(AsmCondition cond)) { |
| Label skip_constant; |
| union { |
| jdouble d; |
| jint i[2]; |
| } accessor; |
| accessor.d = c; |
| |
| #ifdef AARCH64 |
| // TODO-AARCH64 - try to optimize loading of double constants with fmov |
| Label L; |
| ldr_d(fd, target(L)); |
| b(skip_constant); |
| align(wordSize); |
| bind(L); |
| emit_int32(accessor.i[0]); |
| emit_int32(accessor.i[1]); |
| bind(skip_constant); |
| #else |
| fldd(fd, Address(PC), cond); |
| b(skip_constant); |
| emit_int32(accessor.i[0]); |
| emit_int32(accessor.i[1]); |
| bind(skip_constant); |
| #endif // AARCH64 |
| } |
| |
| void MacroAssembler::ldr_global_s32(Register reg, address address_of_global) { |
| intptr_t addr = (intptr_t) address_of_global; |
| #ifdef AARCH64 |
| assert((addr & 0x3) == 0, "address should be aligned"); |
| |
| // FIXME: TODO |
| if (false && page_reachable_from_cache(address_of_global)) { |
| assert(false,"TODO: relocate"); |
| //relocate(); |
| adrp(reg, address_of_global); |
| ldrsw(reg, Address(reg, addr & 0xfff)); |
| } else { |
| mov_slow(reg, addr & ~0x3fff); |
| ldrsw(reg, Address(reg, addr & 0x3fff)); |
| } |
| #else |
| mov_slow(reg, addr & ~0xfff); |
| ldr(reg, Address(reg, addr & 0xfff)); |
| #endif |
| } |
| |
| void MacroAssembler::ldr_global_ptr(Register reg, address address_of_global) { |
| #ifdef AARCH64 |
| intptr_t addr = (intptr_t) address_of_global; |
| assert ((addr & 0x7) == 0, "address should be aligned"); |
| mov_slow(reg, addr & ~0x7fff); |
| ldr(reg, Address(reg, addr & 0x7fff)); |
| #else |
| ldr_global_s32(reg, address_of_global); |
| #endif |
| } |
| |
| void MacroAssembler::ldrb_global(Register reg, address address_of_global) { |
| intptr_t addr = (intptr_t) address_of_global; |
| mov_slow(reg, addr & ~0xfff); |
| ldrb(reg, Address(reg, addr & 0xfff)); |
| } |
| |
| void MacroAssembler::zero_extend(Register rd, Register rn, int bits) { |
| #ifdef AARCH64 |
| switch (bits) { |
| case 8: uxtb(rd, rn); break; |
| case 16: uxth(rd, rn); break; |
| case 32: mov_w(rd, rn); break; |
| default: ShouldNotReachHere(); |
| } |
| #else |
| if (bits <= 8) { |
| andr(rd, rn, (1 << bits) - 1); |
| } else if (bits >= 24) { |
| bic(rd, rn, -1 << bits); |
| } else { |
| mov(rd, AsmOperand(rn, lsl, 32 - bits)); |
| mov(rd, AsmOperand(rd, lsr, 32 - bits)); |
| } |
| #endif |
| } |
| |
| void MacroAssembler::sign_extend(Register rd, Register rn, int bits) { |
| #ifdef AARCH64 |
| switch (bits) { |
| case 8: sxtb(rd, rn); break; |
| case 16: sxth(rd, rn); break; |
| case 32: sxtw(rd, rn); break; |
| default: ShouldNotReachHere(); |
| } |
| #else |
| mov(rd, AsmOperand(rn, lsl, 32 - bits)); |
| mov(rd, AsmOperand(rd, asr, 32 - bits)); |
| #endif |
| } |
| |
| #ifndef AARCH64 |
| |
| void MacroAssembler::long_move(Register rd_lo, Register rd_hi, |
| Register rn_lo, Register rn_hi, |
| AsmCondition cond) { |
| if (rd_lo != rn_hi) { |
| if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); } |
| if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); } |
| } else if (rd_hi != rn_lo) { |
| if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); } |
| if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); } |
| } else { |
| eor(rd_lo, rd_hi, rd_lo, cond); |
| eor(rd_hi, rd_lo, rd_hi, cond); |
| eor(rd_lo, rd_hi, rd_lo, cond); |
| } |
| } |
| |
| void MacroAssembler::long_shift(Register rd_lo, Register rd_hi, |
| Register rn_lo, Register rn_hi, |
| AsmShift shift, Register count) { |
| Register tmp; |
| if (rd_lo != rn_lo && rd_lo != rn_hi && rd_lo != count) { |
| tmp = rd_lo; |
| } else { |
| tmp = rd_hi; |
| } |
| assert_different_registers(tmp, count, rn_lo, rn_hi); |
| |
| subs(tmp, count, 32); |
| if (shift == lsl) { |
| assert_different_registers(rd_hi, rn_lo); |
| assert_different_registers(count, rd_hi); |
| mov(rd_hi, AsmOperand(rn_lo, shift, tmp), pl); |
| rsb(tmp, count, 32, mi); |
| if (rd_hi == rn_hi) { |
| mov(rd_hi, AsmOperand(rn_hi, lsl, count), mi); |
| orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, tmp), mi); |
| } else { |
| mov(rd_hi, AsmOperand(rn_lo, lsr, tmp), mi); |
| orr(rd_hi, rd_hi, AsmOperand(rn_hi, lsl, count), mi); |
| } |
| mov(rd_lo, AsmOperand(rn_lo, shift, count)); |
| } else { |
| assert_different_registers(rd_lo, rn_hi); |
| assert_different_registers(rd_lo, count); |
| mov(rd_lo, AsmOperand(rn_hi, shift, tmp), pl); |
| rsb(tmp, count, 32, mi); |
| if (rd_lo == rn_lo) { |
| mov(rd_lo, AsmOperand(rn_lo, lsr, count), mi); |
| orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, tmp), mi); |
| } else { |
| mov(rd_lo, AsmOperand(rn_hi, lsl, tmp), mi); |
| orr(rd_lo, rd_lo, AsmOperand(rn_lo, lsr, count), mi); |
| } |
| mov(rd_hi, AsmOperand(rn_hi, shift, count)); |
| } |
| } |
| |
| void MacroAssembler::long_shift(Register rd_lo, Register rd_hi, |
| Register rn_lo, Register rn_hi, |
| AsmShift shift, int count) { |
| assert(count != 0 && (count & ~63) == 0, "must be"); |
| |
| if (shift == lsl) { |
| assert_different_registers(rd_hi, rn_lo); |
| if (count >= 32) { |
| mov(rd_hi, AsmOperand(rn_lo, lsl, count - 32)); |
| mov(rd_lo, 0); |
| } else { |
| mov(rd_hi, AsmOperand(rn_hi, lsl, count)); |
| orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, 32 - count)); |
| mov(rd_lo, AsmOperand(rn_lo, lsl, count)); |
| } |
| } else { |
| assert_different_registers(rd_lo, rn_hi); |
| if (count >= 32) { |
| if (count == 32) { |
| mov(rd_lo, rn_hi); |
| } else { |
| mov(rd_lo, AsmOperand(rn_hi, shift, count - 32)); |
| } |
| if (shift == asr) { |
| mov(rd_hi, AsmOperand(rn_hi, asr, 0)); |
| } else { |
| mov(rd_hi, 0); |
| } |
| } else { |
| mov(rd_lo, AsmOperand(rn_lo, lsr, count)); |
| orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, 32 - count)); |
| mov(rd_hi, AsmOperand(rn_hi, shift, count)); |
| } |
| } |
| } |
| #endif // !AARCH64 |
| |
| void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) { |
| // This code pattern is matched in NativeIntruction::skip_verify_oop. |
| // Update it at modifications. |
| if (!VerifyOops) return; |
| |
| char buffer[64]; |
| #ifdef COMPILER1 |
| if (CommentedAssembly) { |
| snprintf(buffer, sizeof(buffer), "verify_oop at %d", offset()); |
| block_comment(buffer); |
| } |
| #endif |
| const char* msg_buffer = NULL; |
| { |
| ResourceMark rm; |
| stringStream ss; |
| ss.print("%s at offset %d (%s:%d)", s, offset(), file, line); |
| msg_buffer = code_string(ss.as_string()); |
| } |
| |
| save_all_registers(); |
| |
| if (reg != R2) { |
| mov(R2, reg); // oop to verify |
| } |
| mov(R1, SP); // register save area |
| |
| Label done; |
| InlinedString Lmsg(msg_buffer); |
| ldr_literal(R0, Lmsg); // message |
| |
| // call indirectly to solve generation ordering problem |
| ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address()); |
| call(Rtemp); |
| |
| restore_all_registers(); |
| |
| b(done); |
| #ifdef COMPILER2 |
| int off = offset(); |
| #endif |
| bind_literal(Lmsg); |
| #ifdef COMPILER2 |
| if (offset() - off == 1 * wordSize) { |
| // no padding, so insert nop for worst-case sizing |
| nop(); |
| } |
| #endif |
| bind(done); |
| } |
| |
| void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) { |
| if (!VerifyOops) return; |
| |
| const char* msg_buffer = NULL; |
| { |
| ResourceMark rm; |
| stringStream ss; |
| if ((addr.base() == SP) && (addr.index()==noreg)) { |
| ss.print("verify_oop_addr SP[%d]: %s", (int)addr.disp(), s); |
| } else { |
| ss.print("verify_oop_addr: %s", s); |
| } |
| ss.print(" (%s:%d)", file, line); |
| msg_buffer = code_string(ss.as_string()); |
| } |
| |
| int push_size = save_all_registers(); |
| |
| if (addr.base() == SP) { |
| // computes an addr that takes into account the push |
| if (addr.index() != noreg) { |
| Register new_base = addr.index() == R2 ? R1 : R2; // avoid corrupting the index |
| add(new_base, SP, push_size); |
| addr = addr.rebase(new_base); |
| } else { |
| addr = addr.plus_disp(push_size); |
| } |
| } |
| |
| ldr(R2, addr); // oop to verify |
| mov(R1, SP); // register save area |
| |
| Label done; |
| InlinedString Lmsg(msg_buffer); |
| ldr_literal(R0, Lmsg); // message |
| |
| // call indirectly to solve generation ordering problem |
| ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address()); |
| call(Rtemp); |
| |
| restore_all_registers(); |
| |
| b(done); |
| bind_literal(Lmsg); |
| bind(done); |
| } |
| |
| void MacroAssembler::null_check(Register reg, Register tmp, int offset) { |
| if (needs_explicit_null_check(offset)) { |
| #ifdef AARCH64 |
| ldr(ZR, Address(reg)); |
| #else |
| assert_different_registers(reg, tmp); |
| if (tmp == noreg) { |
| tmp = Rtemp; |
| assert((! Thread::current()->is_Compiler_thread()) || |
| (! (ciEnv::current()->task() == NULL)) || |
| (! (ciEnv::current()->comp_level() == CompLevel_full_optimization)), |
| "Rtemp not available in C2"); // explicit tmp register required |
| // XXX: could we mark the code buffer as not compatible with C2 ? |
| } |
| ldr(tmp, Address(reg)); |
| #endif |
| } |
| } |
| |
| // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`. |
| void MacroAssembler::eden_allocate(Register obj, Register obj_end, Register tmp1, Register tmp2, |
| RegisterOrConstant size_expression, Label& slow_case) { |
| if (!Universe::heap()->supports_inline_contig_alloc()) { |
| b(slow_case); |
| return; |
| } |
| |
| CollectedHeap* ch = Universe::heap(); |
| |
| const Register top_addr = tmp1; |
| const Register heap_end = tmp2; |
| |
| if (size_expression.is_register()) { |
| assert_different_registers(obj, obj_end, top_addr, heap_end, size_expression.as_register()); |
| } else { |
| assert_different_registers(obj, obj_end, top_addr, heap_end); |
| } |
| |
| bool load_const = AARCH64_ONLY(false) NOT_AARCH64(VM_Version::supports_movw() ); // TODO-AARCH64 check performance |
| if (load_const) { |
| mov_address(top_addr, (address)Universe::heap()->top_addr(), symbolic_Relocation::eden_top_reference); |
| } else { |
| ldr(top_addr, Address(Rthread, JavaThread::heap_top_addr_offset())); |
| } |
| // Calculate new heap_top by adding the size of the object |
| Label retry; |
| bind(retry); |
| |
| #ifdef AARCH64 |
| ldxr(obj, top_addr); |
| #else |
| ldr(obj, Address(top_addr)); |
| #endif // AARCH64 |
| |
| ldr(heap_end, Address(top_addr, (intptr_t)ch->end_addr() - (intptr_t)ch->top_addr())); |
| add_rc(obj_end, obj, size_expression); |
| // Check if obj_end wrapped around, i.e., obj_end < obj. If yes, jump to the slow case. |
| cmp(obj_end, obj); |
| b(slow_case, lo); |
| // Update heap_top if allocation succeeded |
| cmp(obj_end, heap_end); |
| b(slow_case, hi); |
| |
| #ifdef AARCH64 |
| stxr(heap_end/*scratched*/, obj_end, top_addr); |
| cbnz_w(heap_end, retry); |
| #else |
| atomic_cas_bool(obj, obj_end, top_addr, 0, heap_end/*scratched*/); |
| b(retry, ne); |
| #endif // AARCH64 |
| } |
| |
| // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`. |
| void MacroAssembler::tlab_allocate(Register obj, Register obj_end, Register tmp1, |
| RegisterOrConstant size_expression, Label& slow_case) { |
| const Register tlab_end = tmp1; |
| assert_different_registers(obj, obj_end, tlab_end); |
| |
| ldr(obj, Address(Rthread, JavaThread::tlab_top_offset())); |
| ldr(tlab_end, Address(Rthread, JavaThread::tlab_end_offset())); |
| add_rc(obj_end, obj, size_expression); |
| cmp(obj_end, tlab_end); |
| b(slow_case, hi); |
| str(obj_end, Address(Rthread, JavaThread::tlab_top_offset())); |
| } |
| |
| void MacroAssembler::tlab_refill(Register top, Register tmp1, Register tmp2, |
| Register tmp3, Register tmp4, |
| Label& try_eden, Label& slow_case) { |
| if (!Universe::heap()->supports_inline_contig_alloc()) { |
| b(slow_case); |
| return; |
| } |
| |
| InlinedAddress intArrayKlass_addr((address)Universe::intArrayKlassObj_addr()); |
| Label discard_tlab, do_refill; |
| ldr(top, Address(Rthread, JavaThread::tlab_top_offset())); |
| ldr(tmp1, Address(Rthread, JavaThread::tlab_end_offset())); |
| ldr(tmp2, Address(Rthread, JavaThread::tlab_refill_waste_limit_offset())); |
| |
| // Calculate amount of free space |
| sub(tmp1, tmp1, top); |
| // Retain tlab and allocate in shared space |
| // if the amount of free space in tlab is too large to discard |
| cmp(tmp2, AsmOperand(tmp1, lsr, LogHeapWordSize)); |
| b(discard_tlab, ge); |
| |
| // Increment waste limit to prevent getting stuck on this slow path |
| mov_slow(tmp3, ThreadLocalAllocBuffer::refill_waste_limit_increment()); |
| add(tmp2, tmp2, tmp3); |
| str(tmp2, Address(Rthread, JavaThread::tlab_refill_waste_limit_offset())); |
| if (TLABStats) { |
| ldr_u32(tmp2, Address(Rthread, JavaThread::tlab_slow_allocations_offset())); |
| add_32(tmp2, tmp2, 1); |
| str_32(tmp2, Address(Rthread, JavaThread::tlab_slow_allocations_offset())); |
| } |
| b(try_eden); |
| bind_literal(intArrayKlass_addr); |
| |
| bind(discard_tlab); |
| if (TLABStats) { |
| ldr_u32(tmp2, Address(Rthread, JavaThread::tlab_number_of_refills_offset())); |
| ldr_u32(tmp3, Address(Rthread, JavaThread::tlab_fast_refill_waste_offset())); |
| add_32(tmp2, tmp2, 1); |
| add_32(tmp3, tmp3, AsmOperand(tmp1, lsr, LogHeapWordSize)); |
| str_32(tmp2, Address(Rthread, JavaThread::tlab_number_of_refills_offset())); |
| str_32(tmp3, Address(Rthread, JavaThread::tlab_fast_refill_waste_offset())); |
| } |
| // If tlab is currently allocated (top or end != null) |
| // then fill [top, end + alignment_reserve) with array object |
| cbz(top, do_refill); |
| |
| // Set up the mark word |
| mov_slow(tmp2, (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2)); |
| str(tmp2, Address(top, oopDesc::mark_offset_in_bytes())); |
| // Set klass to intArrayKlass and the length to the remaining space |
| ldr_literal(tmp2, intArrayKlass_addr); |
| add(tmp1, tmp1, ThreadLocalAllocBuffer::alignment_reserve_in_bytes() - |
| typeArrayOopDesc::header_size(T_INT) * HeapWordSize); |
| Register klass = tmp2; |
| ldr(klass, Address(tmp2)); |
| logical_shift_right(tmp1, tmp1, LogBytesPerInt); // divide by sizeof(jint) |
| str_32(tmp1, Address(top, arrayOopDesc::length_offset_in_bytes())); |
| store_klass(klass, top); // blows klass: |
| klass = noreg; |
| |
| ldr(tmp1, Address(Rthread, JavaThread::tlab_start_offset())); |
| sub(tmp1, top, tmp1); // size of tlab's allocated portion |
| incr_allocated_bytes(tmp1, tmp2); |
| |
| bind(do_refill); |
| // Refill the tlab with an eden allocation |
| ldr(tmp1, Address(Rthread, JavaThread::tlab_size_offset())); |
| logical_shift_left(tmp4, tmp1, LogHeapWordSize); |
| eden_allocate(top, tmp1, tmp2, tmp3, tmp4, slow_case); |
| str(top, Address(Rthread, JavaThread::tlab_start_offset())); |
| str(top, Address(Rthread, JavaThread::tlab_top_offset())); |
| |
| #ifdef ASSERT |
| // Verify that tmp1 contains tlab_end |
| ldr(tmp2, Address(Rthread, JavaThread::tlab_size_offset())); |
| add(tmp2, top, AsmOperand(tmp2, lsl, LogHeapWordSize)); |
| cmp(tmp1, tmp2); |
| breakpoint(ne); |
| #endif |
| |
| sub(tmp1, tmp1, ThreadLocalAllocBuffer::alignment_reserve_in_bytes()); |
| str(tmp1, Address(Rthread, JavaThread::tlab_end_offset())); |
| |
| if (ZeroTLAB) { |
| // clobbers start and tmp |
| // top must be preserved! |
| add(tmp1, tmp1, ThreadLocalAllocBuffer::alignment_reserve_in_bytes()); |
| ldr(tmp2, Address(Rthread, JavaThread::tlab_start_offset())); |
| zero_memory(tmp2, tmp1, tmp3); |
| } |
| } |
| |
| // Fills memory regions [start..end] with zeroes. Clobbers `start` and `tmp` registers. |
| void MacroAssembler::zero_memory(Register start, Register end, Register tmp) { |
| Label loop; |
| const Register ptr = start; |
| |
| #ifdef AARCH64 |
| // TODO-AARCH64 - compare performance of 2x word zeroing with simple 1x |
| const Register size = tmp; |
| Label remaining, done; |
| |
| sub(size, end, start); |
| |
| #ifdef ASSERT |
| { Label L; |
| tst(size, wordSize - 1); |
| b(L, eq); |
| stop("size is not a multiple of wordSize"); |
| bind(L); |
| } |
| #endif // ASSERT |
| |
| subs(size, size, wordSize); |
| b(remaining, le); |
| |
| // Zero by 2 words per iteration. |
| bind(loop); |
| subs(size, size, 2*wordSize); |
| stp(ZR, ZR, Address(ptr, 2*wordSize, post_indexed)); |
| b(loop, gt); |
| |
| bind(remaining); |
| b(done, ne); |
| str(ZR, Address(ptr)); |
| bind(done); |
| #else |
| mov(tmp, 0); |
| bind(loop); |
| cmp(ptr, end); |
| str(tmp, Address(ptr, wordSize, post_indexed), lo); |
| b(loop, lo); |
| #endif // AARCH64 |
| } |
| |
| void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register tmp) { |
| #ifdef AARCH64 |
| ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); |
| add_rc(tmp, tmp, size_in_bytes); |
| str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); |
| #else |
| // Bump total bytes allocated by this thread |
| Label done; |
| |
| ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); |
| adds(tmp, tmp, size_in_bytes); |
| str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())), cc); |
| b(done, cc); |
| |
| // Increment the high word and store single-copy atomically (that is an unlikely scenario on typical embedded systems as it means >4GB has been allocated) |
| // To do so ldrd/strd instructions used which require an even-odd pair of registers. Such a request could be difficult to satisfy by |
| // allocating those registers on a higher level, therefore the routine is ready to allocate a pair itself. |
| Register low, high; |
| // Select ether R0/R1 or R2/R3 |
| |
| if (size_in_bytes.is_register() && (size_in_bytes.as_register() == R0 || size_in_bytes.as_register() == R1)) { |
| low = R2; |
| high = R3; |
| } else { |
| low = R0; |
| high = R1; |
| } |
| push(RegisterSet(low, high)); |
| |
| ldrd(low, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); |
| adds(low, low, size_in_bytes); |
| adc(high, high, 0); |
| strd(low, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); |
| |
| pop(RegisterSet(low, high)); |
| |
| bind(done); |
| #endif // AARCH64 |
| } |
| |
| void MacroAssembler::arm_stack_overflow_check(int frame_size_in_bytes, Register tmp) { |
| // Version of AbstractAssembler::generate_stack_overflow_check optimized for ARM |
| if (UseStackBanging) { |
| const int page_size = os::vm_page_size(); |
| |
| sub_slow(tmp, SP, JavaThread::stack_shadow_zone_size()); |
| strb(R0, Address(tmp)); |
| #ifdef AARCH64 |
| for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= page_size) { |
| sub(tmp, tmp, page_size); |
| strb(R0, Address(tmp)); |
| } |
| #else |
| for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= 0xff0) { |
| strb(R0, Address(tmp, -0xff0, pre_indexed)); |
| } |
| #endif // AARCH64 |
| } |
| } |
| |
| void MacroAssembler::arm_stack_overflow_check(Register Rsize, Register tmp) { |
| if (UseStackBanging) { |
| Label loop; |
| |
| mov(tmp, SP); |
| add_slow(Rsize, Rsize, JavaThread::stack_shadow_zone_size() - os::vm_page_size()); |
| #ifdef AARCH64 |
| sub(tmp, tmp, Rsize); |
| bind(loop); |
| subs(Rsize, Rsize, os::vm_page_size()); |
| strb(ZR, Address(tmp, Rsize)); |
| #else |
| bind(loop); |
| subs(Rsize, Rsize, 0xff0); |
| strb(R0, Address(tmp, -0xff0, pre_indexed)); |
| #endif // AARCH64 |
| b(loop, hi); |
| } |
| } |
| |
| void MacroAssembler::stop(const char* msg) { |
| // This code pattern is matched in NativeIntruction::is_stop. |
| // Update it at modifications. |
| #ifdef COMPILER1 |
| if (CommentedAssembly) { |
| block_comment("stop"); |
| } |
| #endif |
| |
| InlinedAddress Ldebug(CAST_FROM_FN_PTR(address, MacroAssembler::debug)); |
| InlinedString Lmsg(msg); |
| |
| // save all registers for further inspection |
| save_all_registers(); |
| |
| ldr_literal(R0, Lmsg); // message |
| mov(R1, SP); // register save area |
| |
| #ifdef AARCH64 |
| ldr_literal(Rtemp, Ldebug); |
| br(Rtemp); |
| #else |
| ldr_literal(PC, Ldebug); // call MacroAssembler::debug |
| #endif // AARCH64 |
| |
| #if defined(COMPILER2) && defined(AARCH64) |
| int off = offset(); |
| #endif |
| bind_literal(Lmsg); |
| bind_literal(Ldebug); |
| #if defined(COMPILER2) && defined(AARCH64) |
| if (offset() - off == 2 * wordSize) { |
| // no padding, so insert nop for worst-case sizing |
| nop(); |
| } |
| #endif |
| } |
| |
| void MacroAssembler::warn(const char* msg) { |
| #ifdef COMPILER1 |
| if (CommentedAssembly) { |
| block_comment("warn"); |
| } |
| #endif |
| |
| InlinedAddress Lwarn(CAST_FROM_FN_PTR(address, warning)); |
| InlinedString Lmsg(msg); |
| Label done; |
| |
| int push_size = save_caller_save_registers(); |
| |
| #ifdef AARCH64 |
| // TODO-AARCH64 - get rid of extra debug parameters |
| mov(R1, LR); |
| mov(R2, FP); |
| add(R3, SP, push_size); |
| #endif |
| |
| ldr_literal(R0, Lmsg); // message |
| ldr_literal(LR, Lwarn); // call warning |
| |
| call(LR); |
| |
| restore_caller_save_registers(); |
| |
| b(done); |
| bind_literal(Lmsg); |
| bind_literal(Lwarn); |
| bind(done); |
| } |
| |
| |
| int MacroAssembler::save_all_registers() { |
| // This code pattern is matched in NativeIntruction::is_save_all_registers. |
| // Update it at modifications. |
| #ifdef AARCH64 |
| const Register tmp = Rtemp; |
| raw_push(R30, ZR); |
| for (int i = 28; i >= 0; i -= 2) { |
| raw_push(as_Register(i), as_Register(i+1)); |
| } |
| mov_pc_to(tmp); |
| str(tmp, Address(SP, 31*wordSize)); |
| ldr(tmp, Address(SP, tmp->encoding()*wordSize)); |
| return 32*wordSize; |
| #else |
| push(RegisterSet(R0, R12) | RegisterSet(LR) | RegisterSet(PC)); |
| return 15*wordSize; |
| #endif // AARCH64 |
| } |
| |
| void MacroAssembler::restore_all_registers() { |
| #ifdef AARCH64 |
| for (int i = 0; i <= 28; i += 2) { |
| raw_pop(as_Register(i), as_Register(i+1)); |
| } |
| raw_pop(R30, ZR); |
| #else |
| pop(RegisterSet(R0, R12) | RegisterSet(LR)); // restore registers |
| add(SP, SP, wordSize); // discard saved PC |
| #endif // AARCH64 |
| } |
| |
| int MacroAssembler::save_caller_save_registers() { |
| #ifdef AARCH64 |
| for (int i = 0; i <= 16; i += 2) { |
| raw_push(as_Register(i), as_Register(i+1)); |
| } |
| raw_push(R18, LR); |
| return 20*wordSize; |
| #else |
| #if R9_IS_SCRATCHED |
| // Save also R10 to preserve alignment |
| push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10)); |
| return 8*wordSize; |
| #else |
| push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR)); |
| return 6*wordSize; |
| #endif |
| #endif // AARCH64 |
| } |
| |
| void MacroAssembler::restore_caller_save_registers() { |
| #ifdef AARCH64 |
| raw_pop(R18, LR); |
| for (int i = 16; i >= 0; i -= 2) { |
| raw_pop(as_Register(i), as_Register(i+1)); |
| } |
| #else |
| #if R9_IS_SCRATCHED |
| pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10)); |
| #else |
| pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR)); |
| #endif |
| #endif // AARCH64 |
| } |
| |
| void MacroAssembler::debug(const char* msg, const intx* registers) { |
| // In order to get locks to work, we need to fake a in_VM state |
| JavaThread* thread = JavaThread::current(); |
| thread->set_thread_state(_thread_in_vm); |
| |
| if (ShowMessageBoxOnError) { |
| ttyLocker ttyl; |
| if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { |
| BytecodeCounter::print(); |
| } |
| if (os::message_box(msg, "Execution stopped, print registers?")) { |
| #ifdef AARCH64 |
| // saved registers: R0-R30, PC |
| const int nregs = 32; |
| #else |
| // saved registers: R0-R12, LR, PC |
| const int nregs = 15; |
| const Register regs[nregs] = {R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, PC}; |
| #endif // AARCH64 |
| |
| for (int i = 0; i < nregs AARCH64_ONLY(-1); i++) { |
| tty->print_cr("%s = " INTPTR_FORMAT, AARCH64_ONLY(as_Register(i)) NOT_AARCH64(regs[i])->name(), registers[i]); |
| } |
| |
| #ifdef AARCH64 |
| tty->print_cr("pc = " INTPTR_FORMAT, registers[nregs-1]); |
| #endif // AARCH64 |
| |
| // derive original SP value from the address of register save area |
| tty->print_cr("%s = " INTPTR_FORMAT, SP->name(), p2i(®isters[nregs])); |
| } |
| BREAKPOINT; |
| } else { |
| ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); |
| } |
| assert(false, "DEBUG MESSAGE: %s", msg); |
| fatal("%s", msg); // returning from MacroAssembler::debug is not supported |
| } |
| |
| void MacroAssembler::unimplemented(const char* what) { |
| const char* buf = NULL; |
| { |
| ResourceMark rm; |
| stringStream ss; |
| ss.print("unimplemented: %s", what); |
| buf = code_string(ss.as_string()); |
| } |
| stop(buf); |
| } |
| |
| |
| // Implementation of FixedSizeCodeBlock |
| |
| FixedSizeCodeBlock::FixedSizeCodeBlock(MacroAssembler* masm, int size_in_instrs, bool enabled) : |
| _masm(masm), _start(masm->pc()), _size_in_instrs(size_in_instrs), _enabled(enabled) { |
| } |
| |
| FixedSizeCodeBlock::~FixedSizeCodeBlock() { |
| if (_enabled) { |
| address curr_pc = _masm->pc(); |
| |
| assert(_start < curr_pc, "invalid current pc"); |
| guarantee(curr_pc <= _start + _size_in_instrs * Assembler::InstructionSize, "code block is too long"); |
| |
| int nops_count = (_start - curr_pc) / Assembler::InstructionSize + _size_in_instrs; |
| for (int i = 0; i < nops_count; i++) { |
| _masm->nop(); |
| } |
| } |
| } |
| |
| #ifdef AARCH64 |
| |
| // Serializes memory. |
| // tmp register is not used on AArch64, this parameter is provided solely for better compatibility with 32-bit ARM |
| void MacroAssembler::membar(Membar_mask_bits order_constraint, Register tmp) { |
| if (!os::is_MP()) return; |
| |
| // TODO-AARCH64 investigate dsb vs dmb effects |
| if (order_constraint == StoreStore) { |
| dmb(DMB_st); |
| } else if ((order_constraint & ~(LoadLoad | LoadStore)) == 0) { |
| dmb(DMB_ld); |
| } else { |
| dmb(DMB_all); |
| } |
| } |
| |
| #else |
| |
| // Serializes memory. Potentially blows flags and reg. |
| // tmp is a scratch for v6 co-processor write op (could be noreg for other architecure versions) |
| // preserve_flags takes a longer path in LoadStore case (dmb rather then control dependency) to preserve status flags. Optional. |
| // load_tgt is an ordered load target in a LoadStore case only, to create dependency between the load operation and conditional branch. Optional. |
| void MacroAssembler::membar(Membar_mask_bits order_constraint, |
| Register tmp, |
| bool preserve_flags, |
| Register load_tgt) { |
| if (!os::is_MP()) return; |
| |
| if (order_constraint == StoreStore) { |
| dmb(DMB_st, tmp); |
| } else if ((order_constraint & StoreLoad) || |
| (order_constraint & LoadLoad) || |
| (order_constraint & StoreStore) || |
| (load_tgt == noreg) || |
| preserve_flags) { |
| dmb(DMB_all, tmp); |
| } else { |
| // LoadStore: speculative stores reordeing is prohibited |
| |
| // By providing an ordered load target register, we avoid an extra memory load reference |
| Label not_taken; |
| bind(not_taken); |
| cmp(load_tgt, load_tgt); |
| b(not_taken, ne); |
| } |
| } |
| |
| #endif // AARCH64 |
| |
| // If "allow_fallthrough_on_failure" is false, we always branch to "slow_case" |
| // on failure, so fall-through can only mean success. |
| // "one_shot" controls whether we loop and retry to mitigate spurious failures. |
| // This is only needed for C2, which for some reason does not rety, |
| // while C1/interpreter does. |
| // TODO: measure if it makes a difference |
| |
| void MacroAssembler::cas_for_lock_acquire(Register oldval, Register newval, |
| Register base, Register tmp, Label &slow_case, |
| bool allow_fallthrough_on_failure, bool one_shot) |
| { |
| |
| bool fallthrough_is_success = false; |
| |
| // ARM Litmus Test example does prefetching here. |
| // TODO: investigate if it helps performance |
| |
| // The last store was to the displaced header, so to prevent |
| // reordering we must issue a StoreStore or Release barrier before |
| // the CAS store. |
| |
| #ifdef AARCH64 |
| |
| Register Rscratch = tmp; |
| Register Roop = base; |
| Register mark = oldval; |
| Register Rbox = newval; |
| Label loop; |
| |
| assert(oopDesc::mark_offset_in_bytes() == 0, "must be"); |
| |
| // Instead of StoreStore here, we use store-release-exclusive below |
| |
| bind(loop); |
| |
| ldaxr(tmp, base); // acquire |
| cmp(tmp, oldval); |
| b(slow_case, ne); |
| stlxr(tmp, newval, base); // release |
| if (one_shot) { |
| cmp_w(tmp, 0); |
| } else { |
| cbnz_w(tmp, loop); |
| fallthrough_is_success = true; |
| } |
| |
| // MemBarAcquireLock would normally go here, but |
| // we already do ldaxr+stlxr above, which has |
| // Sequential Consistency |
| |
| #else |
| membar(MacroAssembler::StoreStore, noreg); |
| |
| if (one_shot) { |
| ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes())); |
| cmp(tmp, oldval); |
| strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq); |
| cmp(tmp, 0, eq); |
| } else { |
| atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp); |
| } |
| |
| // MemBarAcquireLock barrier |
| // According to JSR-133 Cookbook, this should be LoadLoad | LoadStore, |
| // but that doesn't prevent a load or store from floating up between |
| // the load and store in the CAS sequence, so play it safe and |
| // do a full fence. |
| membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), noreg); |
| #endif |
| if (!fallthrough_is_success && !allow_fallthrough_on_failure) { |
| b(slow_case, ne); |
| } |
| } |
| |
| void MacroAssembler::cas_for_lock_release(Register oldval, Register newval, |
| Register base, Register tmp, Label &slow_case, |
| bool allow_fallthrough_on_failure, bool one_shot) |
| { |
| |
| bool fallthrough_is_success = false; |
| |
| assert_different_registers(oldval,newval,base,tmp); |
| |
| #ifdef AARCH64 |
| Label loop; |
| |
| assert(oopDesc::mark_offset_in_bytes() == 0, "must be"); |
| |
| bind(loop); |
| ldxr(tmp, base); |
| cmp(tmp, oldval); |
| b(slow_case, ne); |
| // MemBarReleaseLock barrier |
| stlxr(tmp, newval, base); |
| if (one_shot) { |
| cmp_w(tmp, 0); |
| } else { |
| cbnz_w(tmp, loop); |
| fallthrough_is_success = true; |
| } |
| #else |
| // MemBarReleaseLock barrier |
| // According to JSR-133 Cookbook, this should be StoreStore | LoadStore, |
| // but that doesn't prevent a load or store from floating down between |
| // the load and store in the CAS sequence, so play it safe and |
| // do a full fence. |
| membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), tmp); |
| |
| if (one_shot) { |
| ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes())); |
| cmp(tmp, oldval); |
| strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq); |
| cmp(tmp, 0, eq); |
| } else { |
| atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp); |
| } |
| #endif |
| if (!fallthrough_is_success && !allow_fallthrough_on_failure) { |
| b(slow_case, ne); |
| } |
| |
| // ExitEnter |
| // According to JSR-133 Cookbook, this should be StoreLoad, the same |
| // barrier that follows volatile store. |
| // TODO: Should be able to remove on armv8 if volatile loads |
| // use the load-acquire instruction. |
| membar(StoreLoad, noreg); |
| } |
| |
| #ifndef PRODUCT |
| |
| // Preserves flags and all registers. |
| // On SMP the updated value might not be visible to external observers without a sychronization barrier |
| void MacroAssembler::cond_atomic_inc32(AsmCondition cond, int* counter_addr) { |
| if (counter_addr != NULL) { |
| InlinedAddress counter_addr_literal((address)counter_addr); |
| Label done, retry; |
| if (cond != al) { |
| b(done, inverse(cond)); |
| } |
| |
| #ifdef AARCH64 |
| raw_push(R0, R1); |
| raw_push(R2, ZR); |
| |
| ldr_literal(R0, counter_addr_literal); |
| |
| bind(retry); |
| ldxr_w(R1, R0); |
| add_w(R1, R1, 1); |
| stxr_w(R2, R1, R0); |
| cbnz_w(R2, retry); |
| |
| raw_pop(R2, ZR); |
| raw_pop(R0, R1); |
| #else |
| push(RegisterSet(R0, R3) | RegisterSet(Rtemp)); |
| ldr_literal(R0, counter_addr_literal); |
| |
| mrs(CPSR, Rtemp); |
| |
| bind(retry); |
| ldr_s32(R1, Address(R0)); |
| add(R2, R1, 1); |
| atomic_cas_bool(R1, R2, R0, 0, R3); |
| b(retry, ne); |
| |
| msr(CPSR_fsxc, Rtemp); |
| |
| pop(RegisterSet(R0, R3) | RegisterSet(Rtemp)); |
| #endif // AARCH64 |
| |
| b(done); |
| bind_literal(counter_addr_literal); |
| |
| bind(done); |
| } |
| } |
| |
| #endif // !PRODUCT |
| |
| |
| // Building block for CAS cases of biased locking: makes CAS and records statistics. |
| // The slow_case label is used to transfer control if CAS fails. Otherwise leaves condition codes set. |
| void MacroAssembler::biased_locking_enter_with_cas(Register obj_reg, Register old_mark_reg, Register new_mark_reg, |
| Register tmp, Label& slow_case, int* counter_addr) { |
| |
| cas_for_lock_acquire(old_mark_reg, new_mark_reg, obj_reg, tmp, slow_case); |
| #ifdef ASSERT |
| breakpoint(ne); // Fallthrough only on success |
| #endif |
| #ifndef PRODUCT |
| if (counter_addr != NULL) { |
| cond_atomic_inc32(al, counter_addr); |
| } |
| #endif // !PRODUCT |
| } |
| |
| int MacroAssembler::biased_locking_enter(Register obj_reg, Register swap_reg, Register tmp_reg, |
| bool swap_reg_contains_mark, |
| Register tmp2, |
| Label& done, Label& slow_case, |
| BiasedLockingCounters* counters) { |
| // obj_reg must be preserved (at least) if the bias locking fails |
| // tmp_reg is a temporary register |
| // swap_reg was used as a temporary but contained a value |
| // that was used afterwards in some call pathes. Callers |
| // have been fixed so that swap_reg no longer needs to be |
| // saved. |
| // Rtemp in no longer scratched |
| |
| assert(UseBiasedLocking, "why call this otherwise?"); |
| assert_different_registers(obj_reg, swap_reg, tmp_reg, tmp2); |
| guarantee(swap_reg!=tmp_reg, "invariant"); |
| assert(tmp_reg != noreg, "must supply tmp_reg"); |
| |
| #ifndef PRODUCT |
| if (PrintBiasedLockingStatistics && (counters == NULL)) { |
| counters = BiasedLocking::counters(); |
| } |
| #endif |
| |
| assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); |
| Address mark_addr(obj_reg, oopDesc::mark_offset_in_bytes()); |
| |
| // Biased locking |
| // See whether the lock is currently biased toward our thread and |
| // whether the epoch is still valid |
| // Note that the runtime guarantees sufficient alignment of JavaThread |
| // pointers to allow age to be placed into low bits |
| // First check to see whether biasing is even enabled for this object |
| Label cas_label; |
| |
| // The null check applies to the mark loading, if we need to load it. |
| // If the mark has already been loaded in swap_reg then it has already |
| // been performed and the offset is irrelevant. |
| int null_check_offset = offset(); |
| if (!swap_reg_contains_mark) { |
| ldr(swap_reg, mark_addr); |
| } |
| |
| // On MP platform loads could return 'stale' values in some cases. |
| // That is acceptable since either CAS or slow case path is taken in the worst case. |
| |
| andr(tmp_reg, swap_reg, (uintx)markOopDesc::biased_lock_mask_in_place); |
| cmp(tmp_reg, markOopDesc::biased_lock_pattern); |
| |
| b(cas_label, ne); |
| |
| // The bias pattern is present in the object's header. Need to check |
| // whether the bias owner and the epoch are both still current. |
| load_klass(tmp_reg, obj_reg); |
| ldr(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset())); |
| orr(tmp_reg, tmp_reg, Rthread); |
| eor(tmp_reg, tmp_reg, swap_reg); |
| |
| #ifdef AARCH64 |
| ands(tmp_reg, tmp_reg, ~((uintx) markOopDesc::age_mask_in_place)); |
| #else |
| bics(tmp_reg, tmp_reg, ((int) markOopDesc::age_mask_in_place)); |
| #endif // AARCH64 |
| |
| #ifndef PRODUCT |
| if (counters != NULL) { |
| cond_atomic_inc32(eq, counters->biased_lock_entry_count_addr()); |
| } |
| #endif // !PRODUCT |
| |
| b(done, eq); |
| |
| Label try_revoke_bias; |
| Label try_rebias; |
| |
| // At this point we know that the header has the bias pattern and |
| // that we are not the bias owner in the current epoch. We need to |
| // figure out more details about the state of the header in order to |
| // know what operations can be legally performed on the object's |
| // header. |
| |
| // If the low three bits in the xor result aren't clear, that means |
| // the prototype header is no longer biased and we have to revoke |
| // the bias on this object. |
| tst(tmp_reg, (uintx)markOopDesc::biased_lock_mask_in_place); |
| b(try_revoke_bias, ne); |
| |
| // Biasing is still enabled for this data type. See whether the |
| // epoch of the current bias is still valid, meaning that the epoch |
| // bits of the mark word are equal to the epoch bits of the |
| // prototype header. (Note that the prototype header's epoch bits |
| // only change at a safepoint.) If not, attempt to rebias the object |
| // toward the current thread. Note that we must be absolutely sure |
| // that the current epoch is invalid in order to do this because |
| // otherwise the manipulations it performs on the mark word are |
| // illegal. |
| tst(tmp_reg, (uintx)markOopDesc::epoch_mask_in_place); |
| b(try_rebias, ne); |
| |
| // tmp_reg has the age, epoch and pattern bits cleared |
| // The remaining (owner) bits are (Thread ^ current_owner) |
| |
| // The epoch of the current bias is still valid but we know nothing |
| // about the owner; it might be set or it might be clear. Try to |
| // acquire the bias of the object using an atomic operation. If this |
| // fails we will go in to the runtime to revoke the object's bias. |
| // Note that we first construct the presumed unbiased header so we |
| // don't accidentally blow away another thread's valid bias. |
| |
| // Note that we know the owner is not ourself. Hence, success can |
| // only happen when the owner bits is 0 |
| |
| #ifdef AARCH64 |
| // Bit mask biased_lock + age + epoch is not a valid AArch64 logical immediate, as it has |
| // cleared bit in the middle (cms bit). So it is loaded with separate instruction. |
| mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place)); |
| andr(swap_reg, swap_reg, tmp2); |
| #else |
| // until the assembler can be made smarter, we need to make some assumptions about the values |
| // so we can optimize this: |
| assert((markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place) == 0x1ff, "biased bitmasks changed"); |
| |
| mov(swap_reg, AsmOperand(swap_reg, lsl, 23)); |
| mov(swap_reg, AsmOperand(swap_reg, lsr, 23)); // markOop with thread bits cleared (for CAS) |
| #endif // AARCH64 |
| |
| orr(tmp_reg, swap_reg, Rthread); // new mark |
| |
| biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case, |
| (counters != NULL) ? counters->anonymously_biased_lock_entry_count_addr() : NULL); |
| |
| // If the biasing toward our thread failed, this means that |
| // another thread succeeded in biasing it toward itself and we |
| // need to revoke that bias. The revocation will occur in the |
| // interpreter runtime in the slow case. |
| |
| b(done); |
| |
| bind(try_rebias); |
| |
| // At this point we know the epoch has expired, meaning that the |
| // current "bias owner", if any, is actually invalid. Under these |
| // circumstances _only_, we are allowed to use the current header's |
| // value as the comparison value when doing the cas to acquire the |
| // bias in the current epoch. In other words, we allow transfer of |
| // the bias from one thread to another directly in this situation. |
| |
| // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg) |
| |
| eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !) |
| |
| // owner bits 'random'. Set them to Rthread. |
| #ifdef AARCH64 |
| mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place)); |
| andr(tmp_reg, tmp_reg, tmp2); |
| #else |
| mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23)); |
| mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23)); |
| #endif // AARCH64 |
| |
| orr(tmp_reg, tmp_reg, Rthread); // new mark |
| |
| biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case, |
| (counters != NULL) ? counters->rebiased_lock_entry_count_addr() : NULL); |
| |
| // If the biasing toward our thread failed, then another thread |
| // succeeded in biasing it toward itself and we need to revoke that |
| // bias. The revocation will occur in the runtime in the slow case. |
| |
| b(done); |
| |
| bind(try_revoke_bias); |
| |
| // The prototype mark in the klass doesn't have the bias bit set any |
| // more, indicating that objects of this data type are not supposed |
| // to be biased any more. We are going to try to reset the mark of |
| // this object to the prototype value and fall through to the |
| // CAS-based locking scheme. Note that if our CAS fails, it means |
| // that another thread raced us for the privilege of revoking the |
| // bias of this particular object, so it's okay to continue in the |
| // normal locking code. |
| |
| // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg) |
| |
| eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !) |
| |
| // owner bits 'random'. Clear them |
| #ifdef AARCH64 |
| mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place)); |
| andr(tmp_reg, tmp_reg, tmp2); |
| #else |
| mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23)); |
| mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23)); |
| #endif // AARCH64 |
| |
| biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, cas_label, |
| (counters != NULL) ? counters->revoked_lock_entry_count_addr() : NULL); |
| |
| // Fall through to the normal CAS-based lock, because no matter what |
| // the result of the above CAS, some thread must have succeeded in |
| // removing the bias bit from the object's header. |
| |
| bind(cas_label); |
| |
| return null_check_offset; |
| } |
| |
| |
| void MacroAssembler::biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done) { |
| assert(UseBiasedLocking, "why call this otherwise?"); |
| |
| // Check for biased locking unlock case, which is a no-op |
| // Note: we do not have to check the thread ID for two reasons. |
| // First, the interpreter checks for IllegalMonitorStateException at |
| // a higher level. Second, if the bias was revoked while we held the |
| // lock, the object could not be rebiased toward another thread, so |
| // the bias bit would be clear. |
| ldr(tmp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); |
| |
| andr(tmp_reg, tmp_reg, (uintx)markOopDesc::biased_lock_mask_in_place); |
| cmp(tmp_reg, markOopDesc::biased_lock_pattern); |
| b(done, eq); |
| } |
| |
| |
| void MacroAssembler::resolve_jobject(Register value, |
| Register tmp1, |
| Register tmp2) { |
| assert_different_registers(value, tmp1, tmp2); |
| Label done, not_weak; |
| cbz(value, done); // Use NULL as-is. |
| STATIC_ASSERT(JNIHandles::weak_tag_mask == 1u); |
| tbz(value, 0, not_weak); // Test for jweak tag. |
| // Resolve jweak. |
| ldr(value, Address(value, -JNIHandles::weak_tag_value)); |
| verify_oop(value); |
| #if INCLUDE_ALL_GCS |
| if (UseG1GC) { |
| g1_write_barrier_pre(noreg, // store_addr |
| noreg, // new_val |
| value, // pre_val |
| tmp1, // tmp1 |
| tmp2); // tmp2 |
| } |
| #endif // INCLUDE_ALL_GCS |
| b(done); |
| bind(not_weak); |
| // Resolve (untagged) jobject. |
| ldr(value, Address(value)); |
| verify_oop(value); |
| bind(done); |
| } |
| |
| |
| ////////////////////////////////////////////////////////////////////////////////// |
| |
| #if INCLUDE_ALL_GCS |
| |
| // G1 pre-barrier. |
| // Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR). |
| // If store_addr != noreg, then previous value is loaded from [store_addr]; |
| // in such case store_addr and new_val registers are preserved; |
| // otherwise pre_val register is preserved. |
| void MacroAssembler::g1_write_barrier_pre(Register store_addr, |
| Register new_val, |
| Register pre_val, |
| Register tmp1, |
| Register tmp2) { |
| Label done; |
| Label runtime; |
| |
| if (store_addr != noreg) { |
| assert_different_registers(store_addr, new_val, pre_val, tmp1, tmp2, noreg); |
| } else { |
| assert (new_val == noreg, "should be"); |
| assert_different_registers(pre_val, tmp1, tmp2, noreg); |
| } |
| |
| Address in_progress(Rthread, in_bytes(JavaThread::satb_mark_queue_offset() + |
| SATBMarkQueue::byte_offset_of_active())); |
| Address index(Rthread, in_bytes(JavaThread::satb_mark_queue_offset() + |
| SATBMarkQueue::byte_offset_of_index())); |
| Address buffer(Rthread, in_bytes(JavaThread::satb_mark_queue_offset() + |
| SATBMarkQueue::byte_offset_of_buf())); |
| |
| // Is marking active? |
| assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "adjust this code"); |
| ldrb(tmp1, in_progress); |
| cbz(tmp1, done); |
| |
| // Do we need to load the previous value? |
| if (store_addr != noreg) { |
| load_heap_oop(pre_val, Address(store_addr, 0)); |
| } |
| |
| // Is the previous value null? |
| cbz(pre_val, done); |
| |
| // Can we store original value in the thread's buffer? |
| // Is index == 0? |
| // (The index field is typed as size_t.) |
| |
| ldr(tmp1, index); // tmp1 := *index_adr |
| ldr(tmp2, buffer); |
| |
| subs(tmp1, tmp1, wordSize); // tmp1 := tmp1 - wordSize |
| b(runtime, lt); // If negative, goto runtime |
| |
| str(tmp1, index); // *index_adr := tmp1 |
| |
| // Record the previous value |
| str(pre_val, Address(tmp2, tmp1)); |
| b(done); |
| |
| bind(runtime); |
| |
| // save the live input values |
| #ifdef AARCH64 |
| if (store_addr != noreg) { |
| raw_push(store_addr, new_val); |
| } else { |
| raw_push(pre_val, ZR); |
| } |
| #else |
| if (store_addr != noreg) { |
| // avoid raw_push to support any ordering of store_addr and new_val |
| push(RegisterSet(store_addr) | RegisterSet(new_val)); |
| } else { |
| push(pre_val); |
| } |
| #endif // AARCH64 |
| |
| if (pre_val != R0) { |
| mov(R0, pre_val); |
| } |
| mov(R1, Rthread); |
| |
| call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), R0, R1); |
| |
| #ifdef AARCH64 |
| if (store_addr != noreg) { |
| raw_pop(store_addr, new_val); |
| } else { |
| raw_pop(pre_val, ZR); |
| } |
| #else |
| if (store_addr != noreg) { |
| pop(RegisterSet(store_addr) | RegisterSet(new_val)); |
| } else { |
| pop(pre_val); |
| } |
| #endif // AARCH64 |
| |
| bind(done); |
| } |
| |
| // G1 post-barrier. |
| // Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR). |
| void MacroAssembler::g1_write_barrier_post(Register store_addr, |
| Register new_val, |
| Register tmp1, |
| Register tmp2, |
| Register tmp3) { |
| |
| Address queue_index(Rthread, in_bytes(JavaThread::dirty_card_queue_offset() + |
| DirtyCardQueue::byte_offset_of_index())); |
| Address buffer(Rthread, in_bytes(JavaThread::dirty_card_queue_offset() + |
| DirtyCardQueue::byte_offset_of_buf())); |
| |
| BarrierSet* bs = Universe::heap()->barrier_set(); |
| CardTableModRefBS* ct = (CardTableModRefBS*)bs; |
| Label done; |
| Label runtime; |
| |
| // Does store cross heap regions? |
| |
| eor(tmp1, store_addr, new_val); |
| #ifdef AARCH64 |
| logical_shift_right(tmp1, tmp1, HeapRegion::LogOfHRGrainBytes); |
| cbz(tmp1, done); |
| #else |
| movs(tmp1, AsmOperand(tmp1, lsr, HeapRegion::LogOfHRGrainBytes)); |
| b(done, eq); |
| #endif |
| |
| // crosses regions, storing NULL? |
| |
| cbz(new_val, done); |
| |
| // storing region crossing non-NULL, is card already dirty? |
| const Register card_addr = tmp1; |
| assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); |
| |
| mov_address(tmp2, (address)ct->byte_map_base, symbolic_Relocation::card_table_reference); |
| add(card_addr, tmp2, AsmOperand(store_addr, lsr, CardTableModRefBS::card_shift)); |
| |
| ldrb(tmp2, Address(card_addr)); |
| cmp(tmp2, (int)G1SATBCardTableModRefBS::g1_young_card_val()); |
| b(done, eq); |
| |
| membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad), tmp2); |
| |
| assert(CardTableModRefBS::dirty_card_val() == 0, "adjust this code"); |
| ldrb(tmp2, Address(card_addr)); |
| cbz(tmp2, done); |
| |
| // storing a region crossing, non-NULL oop, card is clean. |
| // dirty card and log. |
| |
| strb(zero_register(tmp2), Address(card_addr)); |
| |
| ldr(tmp2, queue_index); |
| ldr(tmp3, buffer); |
| |
| subs(tmp2, tmp2, wordSize); |
| b(runtime, lt); // go to runtime if now negative |
| |
| str(tmp2, queue_index); |
| |
| str(card_addr, Address(tmp3, tmp2)); |
| b(done); |
| |
| bind(runtime); |
| |
| if (card_addr != R0) { |
| mov(R0, card_addr); |
| } |
| mov(R1, Rthread); |
| call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), R0, R1); |
| |
| bind(done); |
| } |
| |
| #endif // INCLUDE_ALL_GCS |
| |
| ////////////////////////////////////////////////////////////////////////////////// |
| |
| #ifdef AARCH64 |
| |
| void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) { |
| switch (size_in_bytes) { |
| case 8: ldr(dst, src); break; |
| case 4: is_signed ? ldr_s32(dst, src) : ldr_u32(dst, src); break; |
| case 2: is_signed ? ldrsh(dst, src) : ldrh(dst, src); break; |
| case 1: is_signed ? ldrsb(dst, src) : ldrb(dst, src); break; |
| default: ShouldNotReachHere(); |
| } |
| } |
| |
| void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes) { |
| switch (size_in_bytes) { |
| case 8: str(src, dst); break; |
| case 4: str_32(src, dst); break; |
| case 2: strh(src, dst); break; |
| case 1: strb(src, dst); break; |
| default: ShouldNotReachHere(); |
| } |
| } |
| |
| #else |
| |
| void MacroAssembler::load_sized_value(Register dst, Address src, |
| size_t size_in_bytes, bool is_signed, AsmCondition cond) { |
| switch (size_in_bytes) { |
| case 4: ldr(dst, src, cond); break; |
| case 2: is_signed ? ldrsh(dst, src, cond) : ldrh(dst, src, cond); break; |
| case 1: is_signed ? ldrsb(dst, src, cond) : ldrb(dst, src, cond); break; |
| default: ShouldNotReachHere(); |
| } |
| } |
| |
| |
| void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes, AsmCondition cond) { |
| switch (size_in_bytes) { |
| case 4: str(src, dst, cond); break; |
| case 2: strh(src, dst, cond); break; |
| case 1: strb(src, dst, cond); break; |
| default: ShouldNotReachHere(); |
| } |
| } |
| #endif // AARCH64 |
| |
| // Look up the method for a megamorphic invokeinterface call. |
| // The target method is determined by <Rinterf, Rindex>. |
| // The receiver klass is in Rklass. |
| // On success, the result will be in method_result, and execution falls through. |
| // On failure, execution transfers to the given label. |
| void MacroAssembler::lookup_interface_method(Register Rklass, |
| Register Rinterf, |
| Register Rindex, |
| Register method_result, |
| Register temp_reg1, |
| Register temp_reg2, |
| Label& L_no_such_interface) { |
| |
| assert_different_registers(Rklass, Rinterf, temp_reg1, temp_reg2, Rindex); |
| |
| Register Ritable = temp_reg1; |
| |
| // Compute start of first itableOffsetEntry (which is at the end of the vtable) |
| const int base = in_bytes(Klass::vtable_start_offset()); |
| const int scale = exact_log2(vtableEntry::size_in_bytes()); |
| ldr_s32(temp_reg2, Address(Rklass, Klass::vtable_length_offset())); // Get length of vtable |
| add(Ritable, Rklass, base); |
| add(Ritable, Ritable, AsmOperand(temp_reg2, lsl, scale)); |
| |
| Label entry, search; |
| |
| b(entry); |
| |
| bind(search); |
| add(Ritable, Ritable, itableOffsetEntry::size() * HeapWordSize); |
| |
| bind(entry); |
| |
| // Check that the entry is non-null. A null entry means that the receiver |
| // class doesn't implement the interface, and wasn't the same as the |
| // receiver class checked when the interface was resolved. |
| |
| ldr(temp_reg2, Address(Ritable, itableOffsetEntry::interface_offset_in_bytes())); |
| cbz(temp_reg2, L_no_such_interface); |
| |
| cmp(Rinterf, temp_reg2); |
| b(search, ne); |
| |
| ldr_s32(temp_reg2, Address(Ritable, itableOffsetEntry::offset_offset_in_bytes())); |
| add(temp_reg2, temp_reg2, Rklass); // Add offset to Klass* |
| assert(itableMethodEntry::size() * HeapWordSize == wordSize, "adjust the scaling in the code below"); |
| assert(itableMethodEntry::method_offset_in_bytes() == 0, "adjust the offset in the code below"); |
| |
| ldr(method_result, Address::indexed_ptr(temp_reg2, Rindex)); |
| } |
| |
| #ifdef COMPILER2 |
| // TODO: 8 bytes at a time? pre-fetch? |
| // Compare char[] arrays aligned to 4 bytes. |
| void MacroAssembler::char_arrays_equals(Register ary1, Register ary2, |
| Register limit, Register result, |
| Register chr1, Register chr2, Label& Ldone) { |
| Label Lvector, Lloop; |
| |
| // Note: limit contains number of bytes (2*char_elements) != 0. |
| tst(limit, 0x2); // trailing character ? |
| b(Lvector, eq); |
| |
| // compare the trailing char |
| sub(limit, limit, sizeof(jchar)); |
| ldrh(chr1, Address(ary1, limit)); |
| ldrh(chr2, Address(ary2, limit)); |
| cmp(chr1, chr2); |
| mov(result, 0, ne); // not equal |
| b(Ldone, ne); |
| |
| // only one char ? |
| tst(limit, limit); |
| mov(result, 1, eq); |
| b(Ldone, eq); |
| |
| // word by word compare, dont't need alignment check |
| bind(Lvector); |
| |
| // Shift ary1 and ary2 to the end of the arrays, negate limit |
| add(ary1, limit, ary1); |
| add(ary2, limit, ary2); |
| neg(limit, limit); |
| |
| bind(Lloop); |
| ldr_u32(chr1, Address(ary1, limit)); |
| ldr_u32(chr2, Address(ary2, limit)); |
| cmp_32(chr1, chr2); |
| mov(result, 0, ne); // not equal |
| b(Ldone, ne); |
| adds(limit, limit, 2*sizeof(jchar)); |
| b(Lloop, ne); |
| |
| // Caller should set it: |
| // mov(result_reg, 1); //equal |
| } |
| #endif |
| |
| void MacroAssembler::inc_counter(address counter_addr, Register tmpreg1, Register tmpreg2) { |
| mov_slow(tmpreg1, counter_addr); |
| ldr_s32(tmpreg2, tmpreg1); |
| add_32(tmpreg2, tmpreg2, 1); |
| str_32(tmpreg2, tmpreg1); |
| } |
| |
| void MacroAssembler::floating_cmp(Register dst) { |
| #ifdef AARCH64 |
| NOT_TESTED(); |
| cset(dst, gt); // 1 if '>', else 0 |
| csinv(dst, dst, ZR, ge); // previous value if '>=', else -1 |
| #else |
| vmrs(dst, FPSCR); |
| orr(dst, dst, 0x08000000); |
| eor(dst, dst, AsmOperand(dst, lsl, 3)); |
| mov(dst, AsmOperand(dst, asr, 30)); |
| #endif |
| } |
| |
| void MacroAssembler::restore_default_fp_mode() { |
| #ifdef AARCH64 |
| msr(SysReg_FPCR, ZR); |
| #else |
| #ifndef __SOFTFP__ |
| // Round to Near mode, IEEE compatible, masked exceptions |
| mov(Rtemp, 0); |
| vmsr(FPSCR, Rtemp); |
| #endif // !__SOFTFP__ |
| #endif // AARCH64 |
| } |
| |
| #ifndef AARCH64 |
| // 24-bit word range == 26-bit byte range |
| bool check26(int offset) { |
| // this could be simplified, but it mimics encoding and decoding |
| // an actual branch insrtuction |
| int off1 = offset << 6 >> 8; |
| int encoded = off1 & ((1<<24)-1); |
| int decoded = encoded << 8 >> 6; |
| return offset == decoded; |
| } |
| #endif // !AARCH64 |
| |
| // Perform some slight adjustments so the default 32MB code cache |
| // is fully reachable. |
| static inline address first_cache_address() { |
| return CodeCache::low_bound() + sizeof(HeapBlock::Header); |
| } |
| static inline address last_cache_address() { |
| return CodeCache::high_bound() - Assembler::InstructionSize; |
| } |
| |
| #ifdef AARCH64 |
| // Can we reach target using ADRP? |
| bool MacroAssembler::page_reachable_from_cache(address target) { |
| intptr_t cl = (intptr_t)first_cache_address() & ~0xfff; |
| intptr_t ch = (intptr_t)last_cache_address() & ~0xfff; |
| intptr_t addr = (intptr_t)target & ~0xfff; |
| |
| intptr_t loffset = addr - cl; |
| intptr_t hoffset = addr - ch; |
| return is_imm_in_range(loffset >> 12, 21, 0) && is_imm_in_range(hoffset >> 12, 21, 0); |
| } |
| #endif |
| |
| // Can we reach target using unconditional branch or call from anywhere |
| // in the code cache (because code can be relocated)? |
| bool MacroAssembler::_reachable_from_cache(address target) { |
| #ifdef __thumb__ |
| if ((1 & (intptr_t)target) != 0) { |
| // Return false to avoid 'b' if we need switching to THUMB mode. |
| return false; |
| } |
| #endif |
| |
| address cl = first_cache_address(); |
| address ch = last_cache_address(); |
| |
| if (ForceUnreachable) { |
| // Only addresses from CodeCache can be treated as reachable. |
| if (target < CodeCache::low_bound() || CodeCache::high_bound() < target) { |
| return false; |
| } |
| } |
| |
| intptr_t loffset = (intptr_t)target - (intptr_t)cl; |
| intptr_t hoffset = (intptr_t)target - (intptr_t)ch; |
| |
| #ifdef AARCH64 |
| return is_offset_in_range(loffset, 26) && is_offset_in_range(hoffset, 26); |
| #else |
| return check26(loffset - 8) && check26(hoffset - 8); |
| #endif |
| } |
| |
| bool MacroAssembler::reachable_from_cache(address target) { |
| assert(CodeCache::contains(pc()), "not supported"); |
| return _reachable_from_cache(target); |
| } |
| |
| // Can we reach the entire code cache from anywhere else in the code cache? |
| bool MacroAssembler::_cache_fully_reachable() { |
| address cl = first_cache_address(); |
| address ch = last_cache_address(); |
| return _reachable_from_cache(cl) && _reachable_from_cache(ch); |
| } |
| |
| bool MacroAssembler::cache_fully_reachable() { |
| assert(CodeCache::contains(pc()), "not supported"); |
| return _cache_fully_reachable(); |
| } |
| |
| void MacroAssembler::jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) { |
| assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported"); |
| if (reachable_from_cache(target)) { |
| relocate(rtype); |
| b(target NOT_AARCH64_ARG(cond)); |
| return; |
| } |
| |
| // Note: relocate is not needed for the code below, |
| // encoding targets in absolute format. |
| if (ignore_non_patchable_relocations()) { |
| rtype = relocInfo::none; |
| } |
| |
| #ifdef AARCH64 |
| assert (scratch != noreg, "should be specified"); |
| InlinedAddress address_literal(target, rtype); |
| ldr_literal(scratch, address_literal); |
| br(scratch); |
| int off = offset(); |
| bind_literal(address_literal); |
| #ifdef COMPILER2 |
| if (offset() - off == wordSize) { |
| // no padding, so insert nop for worst-case sizing |
| nop(); |
| } |
| #endif |
| #else |
| if (VM_Version::supports_movw() && (scratch != noreg) && (rtype == relocInfo::none)) { |
| // Note: this version cannot be (atomically) patched |
| mov_slow(scratch, (intptr_t)target, cond); |
| bx(scratch, cond); |
| } else { |
| Label skip; |
| InlinedAddress address_literal(target); |
| if (cond != al) { |
| b(skip, inverse(cond)); |
| } |
| relocate(rtype); |
| ldr_literal(PC, address_literal); |
| bind_literal(address_literal); |
| bind(skip); |
| } |
| #endif // AARCH64 |
| } |
| |
| // Similar to jump except that: |
| // - near calls are valid only if any destination in the cache is near |
| // - no movt/movw (not atomically patchable) |
| void MacroAssembler::patchable_jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) { |
| assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported"); |
| if (cache_fully_reachable()) { |
| // Note: this assumes that all possible targets (the initial one |
| // and the addressed patched to) are all in the code cache. |
| assert(CodeCache::contains(target), "target might be too far"); |
| relocate(rtype); |
| b(target NOT_AARCH64_ARG(cond)); |
| return; |
| } |
| |
| // Discard the relocation information if not needed for CacheCompiledCode |
| // since the next encodings are all in absolute format. |
| if (ignore_non_patchable_relocations()) { |
| rtype = relocInfo::none; |
| } |
| |
| #ifdef AARCH64 |
| assert (scratch != noreg, "should be specified"); |
| InlinedAddress address_literal(target); |
| relocate(rtype); |
| ldr_literal(scratch, address_literal); |
| br(scratch); |
| int off = offset(); |
| bind_literal(address_literal); |
| #ifdef COMPILER2 |
| if (offset() - off == wordSize) { |
| // no padding, so insert nop for worst-case sizing |
| nop(); |
| } |
| #endif |
| #else |
| { |
| Label skip; |
| InlinedAddress address_literal(target); |
| if (cond != al) { |
| b(skip, inverse(cond)); |
| } |
| relocate(rtype); |
| ldr_literal(PC, address_literal); |
| bind_literal(address_literal); |
| bind(skip); |
| } |
| #endif // AARCH64 |
| } |
| |
| void MacroAssembler::call(address target, RelocationHolder rspec NOT_AARCH64_ARG(AsmCondition cond)) { |
| Register scratch = LR; |
| assert(rspec.type() == relocInfo::runtime_call_type || rspec.type() == relocInfo::none, "not supported"); |
| if (reachable_from_cache(target)) { |
| relocate(rspec); |
| bl(target NOT_AARCH64_ARG(cond)); |
| return; |
| } |
| |
| // Note: relocate is not needed for the code below, |
| // encoding targets in absolute format. |
| if (ignore_non_patchable_relocations()) { |
| // This assumes the information was needed only for relocating the code. |
| rspec = RelocationHolder::none; |
| } |
| |
| #ifndef AARCH64 |
| if (VM_Version::supports_movw() && (rspec.type() == relocInfo::none)) { |
| // Note: this version cannot be (atomically) patched |
| mov_slow(scratch, (intptr_t)target, cond); |
| blx(scratch, cond); |
| return; |
| } |
| #endif |
| |
| { |
| Label ret_addr; |
| #ifndef AARCH64 |
| if (cond != al) { |
| b(ret_addr, inverse(cond)); |
| } |
| #endif |
| |
| |
| #ifdef AARCH64 |
| // TODO-AARCH64: make more optimal implementation |
| // [ Keep in sync with MacroAssembler::call_size ] |
| assert(rspec.type() == relocInfo::none, "call reloc not implemented"); |
| mov_slow(scratch, target); |
| blr(scratch); |
| #else |
| InlinedAddress address_literal(target); |
| relocate(rspec); |
| adr(LR, ret_addr); |
| ldr_literal(PC, address_literal); |
| |
| bind_literal(address_literal); |
| bind(ret_addr); |
| #endif |
| } |
| } |
| |
| #if defined(AARCH64) && defined(COMPILER2) |
| int MacroAssembler::call_size(address target, bool far, bool patchable) { |
| // FIXME: mov_slow is variable-length |
| if (!far) return 1; // bl |
| if (patchable) return 2; // ldr; blr |
| return instr_count_for_mov_slow((intptr_t)target) + 1; |
| } |
| #endif |
| |
| int MacroAssembler::patchable_call(address target, RelocationHolder const& rspec, bool c2) { |
| assert(rspec.type() == relocInfo::static_call_type || |
| rspec.type() == relocInfo::none || |
| rspec.type() == relocInfo::opt_virtual_call_type, "not supported"); |
| |
| // Always generate the relocation information, needed for patching |
| relocate(rspec); // used by NativeCall::is_call_before() |
| if (cache_fully_reachable()) { |
| // Note: this assumes that all possible targets (the initial one |
| // and the addresses patched to) are all in the code cache. |
| assert(CodeCache::contains(target), "target might be too far"); |
| bl(target); |
| } else { |
| #if defined(AARCH64) && defined(COMPILER2) |
| if (c2) { |
| // return address needs to match call_size(). |
| // no need to trash Rtemp |
| int off = offset(); |
| Label skip_literal; |
| InlinedAddress address_literal(target); |
| ldr_literal(LR, address_literal); |
| blr(LR); |
| int ret_addr_offset = offset(); |
| assert(offset() - off == call_size(target, true, true) * InstructionSize, "need to fix call_size()"); |
| b(skip_literal); |
| int off2 = offset(); |
| bind_literal(address_literal); |
| if (offset() - off2 == wordSize) { |
| // no padding, so insert nop for worst-case sizing |
| nop(); |
| } |
| bind(skip_literal); |
| return ret_addr_offset; |
| } |
| #endif |
| Label ret_addr; |
| InlinedAddress address_literal(target); |
| #ifdef AARCH64 |
| ldr_literal(Rtemp, address_literal); |
| adr(LR, ret_addr); |
| br(Rtemp); |
| #else |
| adr(LR, ret_addr); |
| ldr_literal(PC, address_literal); |
| #endif |
| bind_literal(address_literal); |
| bind(ret_addr); |
| } |
| return offset(); |
| } |
| |
| // ((OopHandle)result).resolve(); |
| void MacroAssembler::resolve_oop_handle(Register result) { |
| // OopHandle::resolve is an indirection. |
| ldr(result, Address(result, 0)); |
| } |
| |
| void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) { |
| const int mirror_offset = in_bytes(Klass::java_mirror_offset()); |
| ldr(tmp, Address(method, Method::const_offset())); |
| ldr(tmp, Address(tmp, ConstMethod::constants_offset())); |
| ldr(tmp, Address(tmp, ConstantPool::pool_holder_offset_in_bytes())); |
| ldr(mirror, Address(tmp, mirror_offset)); |
| } |
| |
| |
| /////////////////////////////////////////////////////////////////////////////// |
| |
| // Compressed pointers |
| |
| #ifdef AARCH64 |
| |
| void MacroAssembler::load_klass(Register dst_klass, Register src_oop) { |
| if (UseCompressedClassPointers) { |
| ldr_w(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes())); |
| decode_klass_not_null(dst_klass); |
| } else { |
| ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes())); |
| } |
| } |
| |
| #else |
| |
| void MacroAssembler::load_klass(Register dst_klass, Register src_oop, AsmCondition cond) { |
| ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()), cond); |
| } |
| |
| #endif // AARCH64 |
| |
| // Blows src_klass. |
| void MacroAssembler::store_klass(Register src_klass, Register dst_oop) { |
| #ifdef AARCH64 |
| if (UseCompressedClassPointers) { |
| assert(src_klass != dst_oop, "not enough registers"); |
| encode_klass_not_null(src_klass); |
| str_w(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes())); |
| return; |
| } |
| #endif // AARCH64 |
| str(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes())); |
| } |
| |
| #ifdef AARCH64 |
| |
| void MacroAssembler::store_klass_gap(Register dst) { |
| if (UseCompressedClassPointers) { |
| str_w(ZR, Address(dst, oopDesc::klass_gap_offset_in_bytes())); |
| } |
| } |
| |
| #endif // AARCH64 |
| |
| |
| void MacroAssembler::load_heap_oop(Register dst, Address src) { |
| #ifdef AARCH64 |
| if (UseCompressedOops) { |
| ldr_w(dst, src); |
| decode_heap_oop(dst); |
| return; |
| } |
| #endif // AARCH64 |
| ldr(dst, src); |
| } |
| |
| // Blows src and flags. |
| void MacroAssembler::store_heap_oop(Register src, Address dst) { |
| #ifdef AARCH64 |
| if (UseCompressedOops) { |
| assert(!dst.uses(src), "not enough registers"); |
| encode_heap_oop(src); |
| str_w(src, dst); |
| return; |
| } |
| #endif // AARCH64 |
| str(src, dst); |
| } |
| |
| void MacroAssembler::store_heap_oop_null(Register src, Address dst) { |
| #ifdef AARCH64 |
| if (UseCompressedOops) { |
| str_w(src, dst); |
| return; |
| } |
| #endif // AARCH64 |
| str(src, dst); |
| } |
| |
| |
| #ifdef AARCH64 |
| |
| // Algorithm must match oop.inline.hpp encode_heap_oop. |
| void MacroAssembler::encode_heap_oop(Register dst, Register src) { |
| // This code pattern is matched in NativeIntruction::skip_encode_heap_oop. |
| // Update it at modifications. |
| assert (UseCompressedOops, "must be compressed"); |
| assert (Universe::heap() != NULL, "java heap should be initialized"); |
| #ifdef ASSERT |
| verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); |
| #endif |
| verify_oop(src); |
| if (Universe::narrow_oop_base() == NULL) { |
| if (Universe::narrow_oop_shift() != 0) { |
| assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); |
| _lsr(dst, src, Universe::narrow_oop_shift()); |
| } else if (dst != src) { |
| mov(dst, src); |
| } |
| } else { |
| tst(src, src); |
| csel(dst, Rheap_base, src, eq); |
| sub(dst, dst, Rheap_base); |
| if (Universe::narrow_oop_shift() != 0) { |
| assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); |
| _lsr(dst, dst, Universe::narrow_oop_shift()); |
| } |
| } |
| } |
| |
| // Same algorithm as oop.inline.hpp decode_heap_oop. |
| void MacroAssembler::decode_heap_oop(Register dst, Register src) { |
| #ifdef ASSERT |
| verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); |
| #endif |
| assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); |
| if (Universe::narrow_oop_base() != NULL) { |
| tst(src, src); |
| add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift())); |
| csel(dst, dst, ZR, ne); |
| } else { |
| _lsl(dst, src, Universe::narrow_oop_shift()); |
| } |
| verify_oop(dst); |
| } |
| |
| #ifdef COMPILER2 |
| // Algorithm must match oop.inline.hpp encode_heap_oop. |
| // Must preserve condition codes, or C2 encodeHeapOop_not_null rule |
| // must be changed. |
| void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { |
| assert (UseCompressedOops, "must be compressed"); |
| assert (Universe::heap() != NULL, "java heap should be initialized"); |
| #ifdef ASSERT |
| verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); |
| #endif |
| verify_oop(src); |
| if (Universe::narrow_oop_base() == NULL) { |
| if (Universe::narrow_oop_shift() != 0) { |
| assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); |
| _lsr(dst, src, Universe::narrow_oop_shift()); |
| } else if (dst != src) { |
| mov(dst, src); |
| } |
| } else { |
| sub(dst, src, Rheap_base); |
| if (Universe::narrow_oop_shift() != 0) { |
| assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); |
| _lsr(dst, dst, Universe::narrow_oop_shift()); |
| } |
| } |
| } |
| |
| // Same algorithm as oops.inline.hpp decode_heap_oop. |
| // Must preserve condition codes, or C2 decodeHeapOop_not_null rule |
| // must be changed. |
| void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { |
| #ifdef ASSERT |
| verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); |
| #endif |
| assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); |
| if (Universe::narrow_oop_base() != NULL) { |
| add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift())); |
| } else { |
| _lsl(dst, src, Universe::narrow_oop_shift()); |
| } |
| verify_oop(dst); |
| } |
| |
| void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { |
| assert(UseCompressedClassPointers, "should only be used for compressed header"); |
| assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); |
| int klass_index = oop_recorder()->find_index(k); |
| RelocationHolder rspec = metadata_Relocation::spec(klass_index); |
| |
| // Relocation with special format (see relocInfo_arm.hpp). |
| relocate(rspec); |
| narrowKlass encoded_k = Klass::encode_klass(k); |
| movz(dst, encoded_k & 0xffff, 0); |
| movk(dst, (encoded_k >> 16) & 0xffff, 16); |
| } |
| |
| void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { |
| assert(UseCompressedOops, "should only be used for compressed header"); |
| assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); |
| int oop_index = oop_recorder()->find_index(obj); |
| RelocationHolder rspec = oop_Relocation::spec(oop_index); |
| |
| relocate(rspec); |
| movz(dst, 0xffff, 0); |
| movk(dst, 0xffff, 16); |
| } |
| |
| #endif // COMPILER2 |
| |
| // Must preserve condition codes, or C2 encodeKlass_not_null rule |
| // must be changed. |
| void MacroAssembler::encode_klass_not_null(Register r) { |
| if (Universe::narrow_klass_base() != NULL) { |
| // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base. |
| assert(r != Rheap_base, "Encoding a klass in Rheap_base"); |
| mov_slow(Rheap_base, Universe::narrow_klass_base()); |
| sub(r, r, Rheap_base); |
| } |
| if (Universe::narrow_klass_shift() != 0) { |
| assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); |
| _lsr(r, r, Universe::narrow_klass_shift()); |
| } |
| if (Universe::narrow_klass_base() != NULL) { |
| reinit_heapbase(); |
| } |
| } |
| |
| // Must preserve condition codes, or C2 encodeKlass_not_null rule |
| // must be changed. |
| void MacroAssembler::encode_klass_not_null(Register dst, Register src) { |
| if (dst == src) { |
| encode_klass_not_null(src); |
| return; |
| } |
| if (Universe::narrow_klass_base() != NULL) { |
| mov_slow(dst, (int64_t)Universe::narrow_klass_base()); |
| sub(dst, src, dst); |
| if (Universe::narrow_klass_shift() != 0) { |
| assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); |
| _lsr(dst, dst, Universe::narrow_klass_shift()); |
| } |
| } else { |
| if (Universe::narrow_klass_shift() != 0) { |
| assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); |
| _lsr(dst, src, Universe::narrow_klass_shift()); |
| } else { |
| mov(dst, src); |
| } |
| } |
| } |
| |
| // Function instr_count_for_decode_klass_not_null() counts the instructions |
| // generated by decode_klass_not_null(register r) and reinit_heapbase(), |
| // when (Universe::heap() != NULL). Hence, if the instructions they |
| // generate change, then this method needs to be updated. |
| int MacroAssembler::instr_count_for_decode_klass_not_null() { |
| assert(UseCompressedClassPointers, "only for compressed klass ptrs"); |
| assert(Universe::heap() != NULL, "java heap should be initialized"); |
| if (Universe::narrow_klass_base() != NULL) { |
| return instr_count_for_mov_slow(Universe::narrow_klass_base()) + // mov_slow |
| 1 + // add |
| instr_count_for_mov_slow(Universe::narrow_ptrs_base()); // reinit_heapbase() = mov_slow |
| } else { |
| if (Universe::narrow_klass_shift() != 0) { |
| return 1; |
| } |
| } |
| return 0; |
| } |
| |
| // Must preserve condition codes, or C2 decodeKlass_not_null rule |
| // must be changed. |
| void MacroAssembler::decode_klass_not_null(Register r) { |
| int off = offset(); |
| assert(UseCompressedClassPointers, "should only be used for compressed headers"); |
| assert(Universe::heap() != NULL, "java heap should be initialized"); |
| assert(r != Rheap_base, "Decoding a klass in Rheap_base"); |
| // Cannot assert, instr_count_for_decode_klass_not_null() counts instructions. |
| // Also do not verify_oop as this is called by verify_oop. |
| if (Universe::narrow_klass_base() != NULL) { |
| // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base. |
| mov_slow(Rheap_base, Universe::narrow_klass_base()); |
| add(r, Rheap_base, AsmOperand(r, lsl, Universe::narrow_klass_shift())); |
| reinit_heapbase(); |
| } else { |
| if (Universe::narrow_klass_shift() != 0) { |
| assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); |
| _lsl(r, r, Universe::narrow_klass_shift()); |
| } |
| } |
| assert((offset() - off) == (instr_count_for_decode_klass_not_null() * InstructionSize), "need to fix instr_count_for_decode_klass_not_null"); |
| } |
| |
| // Must preserve condition codes, or C2 decodeKlass_not_null rule |
| // must be changed. |
| void MacroAssembler::decode_klass_not_null(Register dst, Register src) { |
| if (src == dst) { |
| decode_klass_not_null(src); |
| return; |
| } |
| |
| assert(UseCompressedClassPointers, "should only be used for compressed headers"); |
| assert(Universe::heap() != NULL, "java heap should be initialized"); |
| assert(src != Rheap_base, "Decoding a klass in Rheap_base"); |
| assert(dst != Rheap_base, "Decoding a klass into Rheap_base"); |
| // Also do not verify_oop as this is called by verify_oop. |
| if (Universe::narrow_klass_base() != NULL) { |
| mov_slow(dst, Universe::narrow_klass_base()); |
| add(dst, dst, AsmOperand(src, lsl, Universe::narrow_klass_shift())); |
| } else { |
| _lsl(dst, src, Universe::narrow_klass_shift()); |
| } |
| } |
| |
| |
| void MacroAssembler::reinit_heapbase() { |
| if (UseCompressedOops || UseCompressedClassPointers) { |
| if (Universe::heap() != NULL) { |
| mov_slow(Rheap_base, Universe::narrow_ptrs_base()); |
| } else { |
| ldr_global_ptr(Rheap_base, (address)Universe::narrow_ptrs_base_addr()); |
| } |
| } |
| } |
| |
| #ifdef ASSERT |
| void MacroAssembler::verify_heapbase(const char* msg) { |
| // This code pattern is matched in NativeIntruction::skip_verify_heapbase. |
| // Update it at modifications. |
| assert (UseCompressedOops, "should be compressed"); |
| assert (Universe::heap() != NULL, "java heap should be initialized"); |
| if (CheckCompressedOops) { |
| Label ok; |
| str(Rthread, Address(Rthread, in_bytes(JavaThread::in_top_frame_unsafe_section_offset()))); |
| raw_push(Rtemp, ZR); |
| mrs(Rtemp, Assembler::SysReg_NZCV); |
| str(Rtemp, Address(SP, 1 * wordSize)); |
| mov_slow(Rtemp, Universe::narrow_ptrs_base()); |
| cmp(Rheap_base, Rtemp); |
| b(ok, eq); |
| stop(msg); |
| bind(ok); |
| ldr(Rtemp, Address(SP, 1 * wordSize)); |
| msr(Assembler::SysReg_NZCV, Rtemp); |
| raw_pop(Rtemp, ZR); |
| str(ZR, Address(Rthread, in_bytes(JavaThread::in_top_frame_unsafe_section_offset()))); |
| } |
| } |
| #endif // ASSERT |
| |
| #endif // AARCH64 |
| |
| #ifdef COMPILER2 |
| void MacroAssembler::fast_lock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2 AARCH64_ONLY_ARG(Register Rscratch3)) |
| { |
| assert(VM_Version::supports_ldrex(), "unsupported, yet?"); |
| |
| Register Rmark = Rscratch2; |
| |
| assert(Roop != Rscratch, ""); |
| assert(Roop != Rmark, ""); |
| assert(Rbox != Rscratch, ""); |
| assert(Rbox != Rmark, ""); |
| |
| Label fast_lock, done; |
| |
| if (UseBiasedLocking && !UseOptoBiasInlining) { |
| Label failed; |
| #ifdef AARCH64 |
| biased_locking_enter(Roop, Rmark, Rscratch, false, Rscratch3, done, failed); |
| #else |
| biased_locking_enter(Roop, Rmark, Rscratch, false, noreg, done, failed); |
| #endif |
| bind(failed); |
| } |
| |
| ldr(Rmark, Address(Roop, oopDesc::mark_offset_in_bytes())); |
| tst(Rmark, markOopDesc::unlocked_value); |
| b(fast_lock, ne); |
| |
| // Check for recursive lock |
| // See comments in InterpreterMacroAssembler::lock_object for |
| // explanations on the fast recursive locking check. |
| #ifdef AARCH64 |
| intptr_t mask = ((intptr_t)3) - ((intptr_t)os::vm_page_size()); |
| Assembler::LogicalImmediate imm(mask, false); |
| mov(Rscratch, SP); |
| sub(Rscratch, Rmark, Rscratch); |
| ands(Rscratch, Rscratch, imm); |
| b(done, ne); // exit with failure |
| str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); // set to zero |
| b(done); |
| |
| #else |
| // -1- test low 2 bits |
| movs(Rscratch, AsmOperand(Rmark, lsl, 30)); |
| // -2- test (hdr - SP) if the low two bits are 0 |
| sub(Rscratch, Rmark, SP, eq); |
| movs(Rscratch, AsmOperand(Rscratch, lsr, exact_log2(os::vm_page_size())), eq); |
| // If still 'eq' then recursive locking OK |
| str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()), eq); // set to zero |
| b(done); |
| #endif |
| |
| bind(fast_lock); |
| str(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); |
| |
| bool allow_fallthrough_on_failure = true; |
| bool one_shot = true; |
| cas_for_lock_acquire(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot); |
| |
| bind(done); |
| |
| } |
| |
| void MacroAssembler::fast_unlock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2 AARCH64_ONLY_ARG(Register Rscratch3)) |
| { |
| assert(VM_Version::supports_ldrex(), "unsupported, yet?"); |
| |
| Register Rmark = Rscratch2; |
| |
| assert(Roop != Rscratch, ""); |
| assert(Roop != Rmark, ""); |
| assert(Rbox != Rscratch, ""); |
| assert(Rbox != Rmark, ""); |
| |
| Label done; |
| |
| if (UseBiasedLocking && !UseOptoBiasInlining) { |
| biased_locking_exit(Roop, Rscratch, done); |
| } |
| |
| ldr(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); |
| // If hdr is NULL, we've got recursive locking and there's nothing more to do |
| cmp(Rmark, 0); |
| b(done, eq); |
| |
| // Restore the object header |
| bool allow_fallthrough_on_failure = true; |
| bool one_shot = true; |
| cas_for_lock_release(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot); |
| |
| bind(done); |
| |
| } |
| #endif // COMPILER2 |
| |