Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 1 | /* |
Mikael Vidstedt | d4b4dca | 2016-03-07 15:03:48 -0800 | [diff] [blame] | 2 | * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| 4 | * |
| 5 | * This code is free software; you can redistribute it and/or modify it |
| 6 | * under the terms of the GNU General Public License version 2 only, as |
| 7 | * published by the Free Software Foundation. |
| 8 | * |
| 9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
| 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 12 | * version 2 for more details (a copy is included in the LICENSE file that |
| 13 | * accompanied this code). |
| 14 | * |
| 15 | * You should have received a copy of the GNU General Public License version |
| 16 | * 2 along with this work; if not, write to the Free Software Foundation, |
| 17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| 18 | * |
| 19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| 20 | * or visit www.oracle.com if you need additional information or have any |
| 21 | * questions. |
| 22 | * |
| 23 | */ |
| 24 | |
| 25 | #ifndef CPU_X86_VM_MACROASSEMBLER_X86_HPP |
| 26 | #define CPU_X86_VM_MACROASSEMBLER_X86_HPP |
| 27 | |
| 28 | #include "asm/assembler.hpp" |
Joseph Provino | 698fba9 | 2013-01-23 13:02:39 -0500 | [diff] [blame] | 29 | #include "utilities/macros.hpp" |
Vladimir Kozlov | 97a51c5 | 2014-03-20 17:49:27 -0700 | [diff] [blame] | 30 | #include "runtime/rtmLocking.hpp" |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 31 | |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 32 | // MacroAssembler extends Assembler by frequently used macros. |
| 33 | // |
| 34 | // Instructions for which a 'better' code sequence exists depending |
| 35 | // on arguments should also go in here. |
| 36 | |
| 37 | class MacroAssembler: public Assembler { |
| 38 | friend class LIR_Assembler; |
| 39 | friend class Runtime1; // as_Address() |
| 40 | |
| 41 | protected: |
| 42 | |
| 43 | Address as_Address(AddressLiteral adr); |
| 44 | Address as_Address(ArrayAddress adr); |
| 45 | |
| 46 | // Support for VM calls |
| 47 | // |
| 48 | // This is the base routine called by the different versions of call_VM_leaf. The interpreter |
| 49 | // may customize this version by overriding it for its purposes (e.g., to save/restore |
| 50 | // additional registers when doing a VM call). |
Vivek R Deshpande | c07f802 | 2015-10-22 21:39:25 -0700 | [diff] [blame] | 51 | |
Coleen Phillimore | ac0d55c | 2015-12-22 11:11:29 -0500 | [diff] [blame] | 52 | virtual void call_VM_leaf_base( |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 53 | address entry_point, // the entry point |
| 54 | int number_of_arguments // the number of arguments to pop after the call |
| 55 | ); |
| 56 | |
| 57 | // This is the base routine called by the different versions of call_VM. The interpreter |
| 58 | // may customize this version by overriding it for its purposes (e.g., to save/restore |
| 59 | // additional registers when doing a VM call). |
| 60 | // |
| 61 | // If no java_thread register is specified (noreg) than rdi will be used instead. call_VM_base |
| 62 | // returns the register which contains the thread upon return. If a thread register has been |
| 63 | // specified, the return value will correspond to that register. If no last_java_sp is specified |
| 64 | // (noreg) than rsp will be used instead. |
Coleen Phillimore | ac0d55c | 2015-12-22 11:11:29 -0500 | [diff] [blame] | 65 | virtual void call_VM_base( // returns the register containing the thread upon return |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 66 | Register oop_result, // where an oop-result ends up if any; use noreg otherwise |
| 67 | Register java_thread, // the thread if computed before ; use noreg otherwise |
| 68 | Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise |
| 69 | address entry_point, // the entry point |
| 70 | int number_of_arguments, // the number of arguments (w/o thread) to pop after the call |
| 71 | bool check_exceptions // whether to check for pending exceptions after return |
| 72 | ); |
| 73 | |
| 74 | // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code. |
| 75 | // The implementation is only non-empty for the InterpreterMacroAssembler, |
| 76 | // as only the interpreter handles PopFrame and ForceEarlyReturn requests. |
| 77 | virtual void check_and_handle_popframe(Register java_thread); |
| 78 | virtual void check_and_handle_earlyret(Register java_thread); |
| 79 | |
| 80 | void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true); |
| 81 | |
| 82 | // helpers for FPU flag access |
| 83 | // tmp is a temporary register, if none is available use noreg |
| 84 | void save_rax (Register tmp); |
| 85 | void restore_rax(Register tmp); |
| 86 | |
| 87 | public: |
| 88 | MacroAssembler(CodeBuffer* code) : Assembler(code) {} |
| 89 | |
| 90 | // Support for NULL-checks |
| 91 | // |
| 92 | // Generates code that causes a NULL OS exception if the content of reg is NULL. |
| 93 | // If the accessed location is M[reg + offset] and the offset is known, provide the |
| 94 | // offset. No explicit code generation is needed if the offset is within a certain |
| 95 | // range (0 <= offset <= page_size). |
| 96 | |
| 97 | void null_check(Register reg, int offset = -1); |
| 98 | static bool needs_explicit_null_check(intptr_t offset); |
| 99 | |
| 100 | // Required platform-specific helpers for Label::patch_instructions. |
| 101 | // They _shadow_ the declarations in AbstractAssembler, which are undefined. |
| 102 | void pd_patch_instruction(address branch, address target) { |
| 103 | unsigned char op = branch[0]; |
| 104 | assert(op == 0xE8 /* call */ || |
| 105 | op == 0xE9 /* jmp */ || |
| 106 | op == 0xEB /* short jmp */ || |
| 107 | (op & 0xF0) == 0x70 /* short jcc */ || |
Vladimir Kozlov | 97a51c5 | 2014-03-20 17:49:27 -0700 | [diff] [blame] | 108 | op == 0x0F && (branch[1] & 0xF0) == 0x80 /* jcc */ || |
| 109 | op == 0xC7 && branch[1] == 0xF8 /* xbegin */, |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 110 | "Invalid opcode at patch point"); |
| 111 | |
| 112 | if (op == 0xEB || (op & 0xF0) == 0x70) { |
| 113 | // short offset operators (jmp and jcc) |
| 114 | char* disp = (char*) &branch[1]; |
| 115 | int imm8 = target - (address) &disp[1]; |
| 116 | guarantee(this->is8bit(imm8), "Short forward jump exceeds 8-bit offset"); |
| 117 | *disp = imm8; |
| 118 | } else { |
Vladimir Kozlov | 97a51c5 | 2014-03-20 17:49:27 -0700 | [diff] [blame] | 119 | int* disp = (int*) &branch[(op == 0x0F || op == 0xC7)? 2: 1]; |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 120 | int imm32 = target - (address) &disp[1]; |
| 121 | *disp = imm32; |
| 122 | } |
| 123 | } |
| 124 | |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 125 | // The following 4 methods return the offset of the appropriate move instruction |
| 126 | |
| 127 | // Support for fast byte/short loading with zero extension (depending on particular CPU) |
| 128 | int load_unsigned_byte(Register dst, Address src); |
| 129 | int load_unsigned_short(Register dst, Address src); |
| 130 | |
| 131 | // Support for fast byte/short loading with sign extension (depending on particular CPU) |
| 132 | int load_signed_byte(Register dst, Address src); |
| 133 | int load_signed_short(Register dst, Address src); |
| 134 | |
| 135 | // Support for sign-extension (hi:lo = extend_sign(lo)) |
| 136 | void extend_sign(Register hi, Register lo); |
| 137 | |
| 138 | // Load and store values by size and signed-ness |
| 139 | void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg); |
| 140 | void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg); |
| 141 | |
| 142 | // Support for inc/dec with optimal instruction selection depending on value |
| 143 | |
| 144 | void increment(Register reg, int value = 1) { LP64_ONLY(incrementq(reg, value)) NOT_LP64(incrementl(reg, value)) ; } |
| 145 | void decrement(Register reg, int value = 1) { LP64_ONLY(decrementq(reg, value)) NOT_LP64(decrementl(reg, value)) ; } |
| 146 | |
| 147 | void decrementl(Address dst, int value = 1); |
| 148 | void decrementl(Register reg, int value = 1); |
| 149 | |
| 150 | void decrementq(Register reg, int value = 1); |
| 151 | void decrementq(Address dst, int value = 1); |
| 152 | |
| 153 | void incrementl(Address dst, int value = 1); |
| 154 | void incrementl(Register reg, int value = 1); |
| 155 | |
| 156 | void incrementq(Register reg, int value = 1); |
| 157 | void incrementq(Address dst, int value = 1); |
| 158 | |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 159 | // Support optimal SSE move instructions. |
| 160 | void movflt(XMMRegister dst, XMMRegister src) { |
| 161 | if (UseXmmRegToRegMoveAll) { movaps(dst, src); return; } |
| 162 | else { movss (dst, src); return; } |
| 163 | } |
| 164 | void movflt(XMMRegister dst, Address src) { movss(dst, src); } |
| 165 | void movflt(XMMRegister dst, AddressLiteral src); |
| 166 | void movflt(Address dst, XMMRegister src) { movss(dst, src); } |
| 167 | |
| 168 | void movdbl(XMMRegister dst, XMMRegister src) { |
| 169 | if (UseXmmRegToRegMoveAll) { movapd(dst, src); return; } |
| 170 | else { movsd (dst, src); return; } |
| 171 | } |
| 172 | |
| 173 | void movdbl(XMMRegister dst, AddressLiteral src); |
| 174 | |
| 175 | void movdbl(XMMRegister dst, Address src) { |
| 176 | if (UseXmmLoadAndClearUpper) { movsd (dst, src); return; } |
| 177 | else { movlpd(dst, src); return; } |
| 178 | } |
| 179 | void movdbl(Address dst, XMMRegister src) { movsd(dst, src); } |
| 180 | |
| 181 | void incrementl(AddressLiteral dst); |
| 182 | void incrementl(ArrayAddress dst); |
| 183 | |
Vladimir Kozlov | 97a51c5 | 2014-03-20 17:49:27 -0700 | [diff] [blame] | 184 | void incrementq(AddressLiteral dst); |
| 185 | |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 186 | // Alignment |
| 187 | void align(int modulus); |
Aleksey Shipilev | f6c7ab6 | 2015-08-11 12:24:26 +0300 | [diff] [blame] | 188 | void align(int modulus, int target); |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 189 | |
| 190 | // A 5 byte nop that is safe for patching (see patch_verified_entry) |
| 191 | void fat_nop(); |
| 192 | |
| 193 | // Stack frame creation/removal |
| 194 | void enter(); |
| 195 | void leave(); |
| 196 | |
| 197 | // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information) |
| 198 | // The pointer will be loaded into the thread register. |
| 199 | void get_thread(Register thread); |
| 200 | |
| 201 | |
| 202 | // Support for VM calls |
| 203 | // |
| 204 | // It is imperative that all calls into the VM are handled via the call_VM macros. |
| 205 | // They make sure that the stack linkage is setup correctly. call_VM's correspond |
| 206 | // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points. |
| 207 | |
| 208 | |
| 209 | void call_VM(Register oop_result, |
| 210 | address entry_point, |
| 211 | bool check_exceptions = true); |
| 212 | void call_VM(Register oop_result, |
| 213 | address entry_point, |
| 214 | Register arg_1, |
| 215 | bool check_exceptions = true); |
| 216 | void call_VM(Register oop_result, |
| 217 | address entry_point, |
| 218 | Register arg_1, Register arg_2, |
| 219 | bool check_exceptions = true); |
| 220 | void call_VM(Register oop_result, |
| 221 | address entry_point, |
| 222 | Register arg_1, Register arg_2, Register arg_3, |
| 223 | bool check_exceptions = true); |
| 224 | |
| 225 | // Overloadings with last_Java_sp |
| 226 | void call_VM(Register oop_result, |
| 227 | Register last_java_sp, |
| 228 | address entry_point, |
| 229 | int number_of_arguments = 0, |
| 230 | bool check_exceptions = true); |
| 231 | void call_VM(Register oop_result, |
| 232 | Register last_java_sp, |
| 233 | address entry_point, |
| 234 | Register arg_1, bool |
| 235 | check_exceptions = true); |
| 236 | void call_VM(Register oop_result, |
| 237 | Register last_java_sp, |
| 238 | address entry_point, |
| 239 | Register arg_1, Register arg_2, |
| 240 | bool check_exceptions = true); |
| 241 | void call_VM(Register oop_result, |
| 242 | Register last_java_sp, |
| 243 | address entry_point, |
| 244 | Register arg_1, Register arg_2, Register arg_3, |
| 245 | bool check_exceptions = true); |
| 246 | |
| 247 | void get_vm_result (Register oop_result, Register thread); |
| 248 | void get_vm_result_2(Register metadata_result, Register thread); |
| 249 | |
| 250 | // These always tightly bind to MacroAssembler::call_VM_base |
| 251 | // bypassing the virtual implementation |
| 252 | void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments = 0, bool check_exceptions = true); |
| 253 | void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions = true); |
| 254 | void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true); |
| 255 | void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions = true); |
| 256 | void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4, bool check_exceptions = true); |
| 257 | |
| 258 | void call_VM_leaf(address entry_point, |
| 259 | int number_of_arguments = 0); |
| 260 | void call_VM_leaf(address entry_point, |
| 261 | Register arg_1); |
| 262 | void call_VM_leaf(address entry_point, |
| 263 | Register arg_1, Register arg_2); |
| 264 | void call_VM_leaf(address entry_point, |
| 265 | Register arg_1, Register arg_2, Register arg_3); |
| 266 | |
| 267 | // These always tightly bind to MacroAssembler::call_VM_leaf_base |
| 268 | // bypassing the virtual implementation |
| 269 | void super_call_VM_leaf(address entry_point); |
| 270 | void super_call_VM_leaf(address entry_point, Register arg_1); |
| 271 | void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2); |
| 272 | void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3); |
| 273 | void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4); |
| 274 | |
| 275 | // last Java Frame (fills frame anchor) |
| 276 | void set_last_Java_frame(Register thread, |
| 277 | Register last_java_sp, |
| 278 | Register last_java_fp, |
| 279 | address last_java_pc); |
| 280 | |
| 281 | // thread in the default location (r15_thread on 64bit) |
| 282 | void set_last_Java_frame(Register last_java_sp, |
| 283 | Register last_java_fp, |
| 284 | address last_java_pc); |
| 285 | |
| 286 | void reset_last_Java_frame(Register thread, bool clear_fp, bool clear_pc); |
| 287 | |
| 288 | // thread in the default location (r15_thread on 64bit) |
| 289 | void reset_last_Java_frame(bool clear_fp, bool clear_pc); |
| 290 | |
| 291 | // Stores |
| 292 | void store_check(Register obj); // store check for obj - register is destroyed afterwards |
| 293 | void store_check(Register obj, Address dst); // same as above, dst is exact store location (reg. is destroyed) |
| 294 | |
Joseph Provino | 698fba9 | 2013-01-23 13:02:39 -0500 | [diff] [blame] | 295 | #if INCLUDE_ALL_GCS |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 296 | |
| 297 | void g1_write_barrier_pre(Register obj, |
| 298 | Register pre_val, |
| 299 | Register thread, |
| 300 | Register tmp, |
| 301 | bool tosca_live, |
| 302 | bool expand_call); |
| 303 | |
| 304 | void g1_write_barrier_post(Register store_addr, |
| 305 | Register new_val, |
| 306 | Register thread, |
| 307 | Register tmp, |
| 308 | Register tmp2); |
| 309 | |
Joseph Provino | 698fba9 | 2013-01-23 13:02:39 -0500 | [diff] [blame] | 310 | #endif // INCLUDE_ALL_GCS |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 311 | |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 312 | // C 'boolean' to Java boolean: x == 0 ? 0 : 1 |
| 313 | void c2bool(Register x); |
| 314 | |
| 315 | // C++ bool manipulation |
| 316 | |
| 317 | void movbool(Register dst, Address src); |
| 318 | void movbool(Address dst, bool boolconst); |
| 319 | void movbool(Address dst, Register src); |
| 320 | void testbool(Register dst); |
| 321 | |
| 322 | // oop manipulations |
| 323 | void load_klass(Register dst, Register src); |
| 324 | void store_klass(Register dst, Register src); |
| 325 | |
| 326 | void load_heap_oop(Register dst, Address src); |
| 327 | void load_heap_oop_not_null(Register dst, Address src); |
| 328 | void store_heap_oop(Address dst, Register src); |
| 329 | void cmp_heap_oop(Register src1, Address src2, Register tmp = noreg); |
| 330 | |
| 331 | // Used for storing NULL. All other oop constants should be |
| 332 | // stored using routines that take a jobject. |
| 333 | void store_heap_oop_null(Address dst); |
| 334 | |
| 335 | void load_prototype_header(Register dst, Register src); |
| 336 | |
| 337 | #ifdef _LP64 |
| 338 | void store_klass_gap(Register dst, Register src); |
| 339 | |
| 340 | // This dummy is to prevent a call to store_heap_oop from |
| 341 | // converting a zero (like NULL) into a Register by giving |
| 342 | // the compiler two choices it can't resolve |
| 343 | |
| 344 | void store_heap_oop(Address dst, void* dummy); |
| 345 | |
| 346 | void encode_heap_oop(Register r); |
| 347 | void decode_heap_oop(Register r); |
| 348 | void encode_heap_oop_not_null(Register r); |
| 349 | void decode_heap_oop_not_null(Register r); |
| 350 | void encode_heap_oop_not_null(Register dst, Register src); |
| 351 | void decode_heap_oop_not_null(Register dst, Register src); |
| 352 | |
| 353 | void set_narrow_oop(Register dst, jobject obj); |
| 354 | void set_narrow_oop(Address dst, jobject obj); |
| 355 | void cmp_narrow_oop(Register dst, jobject obj); |
| 356 | void cmp_narrow_oop(Address dst, jobject obj); |
| 357 | |
| 358 | void encode_klass_not_null(Register r); |
| 359 | void decode_klass_not_null(Register r); |
| 360 | void encode_klass_not_null(Register dst, Register src); |
| 361 | void decode_klass_not_null(Register dst, Register src); |
| 362 | void set_narrow_klass(Register dst, Klass* k); |
| 363 | void set_narrow_klass(Address dst, Klass* k); |
| 364 | void cmp_narrow_klass(Register dst, Klass* k); |
| 365 | void cmp_narrow_klass(Address dst, Klass* k); |
| 366 | |
Harold Seigel | 4d91f4e | 2013-08-15 20:04:10 -0400 | [diff] [blame] | 367 | // Returns the byte size of the instructions generated by decode_klass_not_null() |
| 368 | // when compressed klass pointers are being used. |
| 369 | static int instr_size_for_decode_klass_not_null(); |
| 370 | |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 371 | // if heap base register is used - reinit it with the correct value |
| 372 | void reinit_heapbase(); |
| 373 | |
| 374 | DEBUG_ONLY(void verify_heapbase(const char* msg);) |
| 375 | |
| 376 | #endif // _LP64 |
| 377 | |
| 378 | // Int division/remainder for Java |
| 379 | // (as idivl, but checks for special case as described in JVM spec.) |
| 380 | // returns idivl instruction offset for implicit exception handling |
| 381 | int corrected_idivl(Register reg); |
| 382 | |
| 383 | // Long division/remainder for Java |
| 384 | // (as idivq, but checks for special case as described in JVM spec.) |
| 385 | // returns idivq instruction offset for implicit exception handling |
| 386 | int corrected_idivq(Register reg); |
| 387 | |
| 388 | void int3(); |
| 389 | |
| 390 | // Long operation macros for a 32bit cpu |
| 391 | // Long negation for Java |
| 392 | void lneg(Register hi, Register lo); |
| 393 | |
| 394 | // Long multiplication for Java |
| 395 | // (destroys contents of eax, ebx, ecx and edx) |
| 396 | void lmul(int x_rsp_offset, int y_rsp_offset); // rdx:rax = x * y |
| 397 | |
| 398 | // Long shifts for Java |
| 399 | // (semantics as described in JVM spec.) |
| 400 | void lshl(Register hi, Register lo); // hi:lo << (rcx & 0x3f) |
| 401 | void lshr(Register hi, Register lo, bool sign_extension = false); // hi:lo >> (rcx & 0x3f) |
| 402 | |
| 403 | // Long compare for Java |
| 404 | // (semantics as described in JVM spec.) |
| 405 | void lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo); // x_hi = lcmp(x, y) |
| 406 | |
| 407 | |
| 408 | // misc |
| 409 | |
| 410 | // Sign extension |
| 411 | void sign_extend_short(Register reg); |
| 412 | void sign_extend_byte(Register reg); |
| 413 | |
| 414 | // Division by power of 2, rounding towards 0 |
| 415 | void division_with_shift(Register reg, int shift_value); |
| 416 | |
| 417 | // Compares the top-most stack entries on the FPU stack and sets the eflags as follows: |
| 418 | // |
| 419 | // CF (corresponds to C0) if x < y |
| 420 | // PF (corresponds to C2) if unordered |
| 421 | // ZF (corresponds to C3) if x = y |
| 422 | // |
| 423 | // The arguments are in reversed order on the stack (i.e., top of stack is first argument). |
| 424 | // tmp is a temporary register, if none is available use noreg (only matters for non-P6 code) |
| 425 | void fcmp(Register tmp); |
| 426 | // Variant of the above which allows y to be further down the stack |
| 427 | // and which only pops x and y if specified. If pop_right is |
| 428 | // specified then pop_left must also be specified. |
| 429 | void fcmp(Register tmp, int index, bool pop_left, bool pop_right); |
| 430 | |
| 431 | // Floating-point comparison for Java |
| 432 | // Compares the top-most stack entries on the FPU stack and stores the result in dst. |
| 433 | // The arguments are in reversed order on the stack (i.e., top of stack is first argument). |
| 434 | // (semantics as described in JVM spec.) |
| 435 | void fcmp2int(Register dst, bool unordered_is_less); |
| 436 | // Variant of the above which allows y to be further down the stack |
| 437 | // and which only pops x and y if specified. If pop_right is |
| 438 | // specified then pop_left must also be specified. |
| 439 | void fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right); |
| 440 | |
| 441 | // Floating-point remainder for Java (ST0 = ST0 fremr ST1, ST1 is empty afterwards) |
| 442 | // tmp is a temporary register, if none is available use noreg |
| 443 | void fremr(Register tmp); |
| 444 | |
| 445 | |
| 446 | // same as fcmp2int, but using SSE2 |
| 447 | void cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less); |
| 448 | void cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less); |
| 449 | |
| 450 | // Inlined sin/cos generator for Java; must not use CPU instruction |
| 451 | // directly on Intel as it does not have high enough precision |
| 452 | // outside of the range [-pi/4, pi/4]. Extra argument indicate the |
| 453 | // number of FPU stack slots in use; all but the topmost will |
| 454 | // require saving if a slow case is necessary. Assumes argument is |
| 455 | // on FP TOS; result is on FP TOS. No cpu registers are changed by |
| 456 | // this code. |
| 457 | void trigfunc(char trig, int num_fpu_regs_in_use = 1); |
| 458 | |
| 459 | // branch to L if FPU flag C2 is set/not set |
| 460 | // tmp is a temporary register, if none is available use noreg |
| 461 | void jC2 (Register tmp, Label& L); |
| 462 | void jnC2(Register tmp, Label& L); |
| 463 | |
| 464 | // Pop ST (ffree & fincstp combined) |
| 465 | void fpop(); |
| 466 | |
Zoltan Majo | 1e55e60 | 2015-08-19 08:55:18 +0200 | [diff] [blame] | 467 | // Load float value from 'address'. If UseSSE >= 1, the value is loaded into |
| 468 | // register xmm0. Otherwise, the value is loaded onto the FPU stack. |
| 469 | void load_float(Address src); |
| 470 | |
| 471 | // Store float value to 'address'. If UseSSE >= 1, the value is stored |
| 472 | // from register xmm0. Otherwise, the value is stored from the FPU stack. |
| 473 | void store_float(Address dst); |
| 474 | |
| 475 | // Load double value from 'address'. If UseSSE >= 2, the value is loaded into |
| 476 | // register xmm0. Otherwise, the value is loaded onto the FPU stack. |
| 477 | void load_double(Address src); |
| 478 | |
| 479 | // Store double value to 'address'. If UseSSE >= 2, the value is stored |
| 480 | // from register xmm0. Otherwise, the value is stored from the FPU stack. |
| 481 | void store_double(Address dst); |
| 482 | |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 483 | // pushes double TOS element of FPU stack on CPU stack; pops from FPU stack |
| 484 | void push_fTOS(); |
| 485 | |
| 486 | // pops double TOS element from CPU stack and pushes on FPU stack |
| 487 | void pop_fTOS(); |
| 488 | |
| 489 | void empty_FPU_stack(); |
| 490 | |
| 491 | void push_IU_state(); |
| 492 | void pop_IU_state(); |
| 493 | |
| 494 | void push_FPU_state(); |
| 495 | void pop_FPU_state(); |
| 496 | |
| 497 | void push_CPU_state(); |
| 498 | void pop_CPU_state(); |
| 499 | |
| 500 | // Round up to a power of two |
| 501 | void round_to(Register reg, int modulus); |
| 502 | |
| 503 | // Callee saved registers handling |
| 504 | void push_callee_saved_registers(); |
| 505 | void pop_callee_saved_registers(); |
| 506 | |
| 507 | // allocation |
| 508 | void eden_allocate( |
| 509 | Register obj, // result: pointer to object after successful allocation |
| 510 | Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise |
| 511 | int con_size_in_bytes, // object size in bytes if known at compile time |
| 512 | Register t1, // temp register |
| 513 | Label& slow_case // continuation point if fast allocation fails |
| 514 | ); |
| 515 | void tlab_allocate( |
| 516 | Register obj, // result: pointer to object after successful allocation |
| 517 | Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise |
| 518 | int con_size_in_bytes, // object size in bytes if known at compile time |
| 519 | Register t1, // temp register |
| 520 | Register t2, // temp register |
| 521 | Label& slow_case // continuation point if fast allocation fails |
| 522 | ); |
| 523 | Register tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case); // returns TLS address |
Zoltan Majo | dfa6539 | 2016-01-12 09:19:09 +0100 | [diff] [blame] | 524 | void zero_memory(Register address, Register length_in_bytes, int offset_in_bytes, Register temp); |
| 525 | |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 526 | void incr_allocated_bytes(Register thread, |
| 527 | Register var_size_in_bytes, int con_size_in_bytes, |
| 528 | Register t1 = noreg); |
| 529 | |
| 530 | // interface method calling |
| 531 | void lookup_interface_method(Register recv_klass, |
| 532 | Register intf_klass, |
| 533 | RegisterOrConstant itable_index, |
| 534 | Register method_result, |
| 535 | Register scan_temp, |
| 536 | Label& no_such_interface); |
| 537 | |
| 538 | // virtual method calling |
| 539 | void lookup_virtual_method(Register recv_klass, |
| 540 | RegisterOrConstant vtable_index, |
| 541 | Register method_result); |
| 542 | |
| 543 | // Test sub_klass against super_klass, with fast and slow paths. |
| 544 | |
| 545 | // The fast path produces a tri-state answer: yes / no / maybe-slow. |
| 546 | // One of the three labels can be NULL, meaning take the fall-through. |
| 547 | // If super_check_offset is -1, the value is loaded up from super_klass. |
| 548 | // No registers are killed, except temp_reg. |
| 549 | void check_klass_subtype_fast_path(Register sub_klass, |
| 550 | Register super_klass, |
| 551 | Register temp_reg, |
| 552 | Label* L_success, |
| 553 | Label* L_failure, |
| 554 | Label* L_slow_path, |
| 555 | RegisterOrConstant super_check_offset = RegisterOrConstant(-1)); |
| 556 | |
| 557 | // The rest of the type check; must be wired to a corresponding fast path. |
| 558 | // It does not repeat the fast path logic, so don't use it standalone. |
| 559 | // The temp_reg and temp2_reg can be noreg, if no temps are available. |
| 560 | // Updates the sub's secondary super cache as necessary. |
| 561 | // If set_cond_codes, condition codes will be Z on success, NZ on failure. |
| 562 | void check_klass_subtype_slow_path(Register sub_klass, |
| 563 | Register super_klass, |
| 564 | Register temp_reg, |
| 565 | Register temp2_reg, |
| 566 | Label* L_success, |
| 567 | Label* L_failure, |
| 568 | bool set_cond_codes = false); |
| 569 | |
| 570 | // Simplified, combined version, good for typical uses. |
| 571 | // Falls through on failure. |
| 572 | void check_klass_subtype(Register sub_klass, |
| 573 | Register super_klass, |
| 574 | Register temp_reg, |
| 575 | Label& L_success); |
| 576 | |
| 577 | // method handles (JSR 292) |
| 578 | Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0); |
| 579 | |
| 580 | //---- |
| 581 | void set_word_if_not_zero(Register reg); // sets reg to 1 if not zero, otherwise 0 |
| 582 | |
| 583 | // Debugging |
| 584 | |
| 585 | // only if +VerifyOops |
| 586 | // TODO: Make these macros with file and line like sparc version! |
| 587 | void verify_oop(Register reg, const char* s = "broken oop"); |
| 588 | void verify_oop_addr(Address addr, const char * s = "broken oop addr"); |
| 589 | |
| 590 | // TODO: verify method and klass metadata (compare against vptr?) |
| 591 | void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {} |
| 592 | void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){} |
| 593 | |
| 594 | #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__) |
| 595 | #define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__) |
| 596 | |
| 597 | // only if +VerifyFPU |
| 598 | void verify_FPU(int stack_depth, const char* s = "illegal FPU state"); |
| 599 | |
Vladimir Kozlov | 88f70d8 | 2013-04-03 11:12:57 -0700 | [diff] [blame] | 600 | // Verify or restore cpu control state after JNI call |
| 601 | void restore_cpu_control_state_after_jni(); |
| 602 | |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 603 | // prints msg, dumps registers and stops execution |
| 604 | void stop(const char* msg); |
| 605 | |
| 606 | // prints msg and continues |
| 607 | void warn(const char* msg); |
| 608 | |
| 609 | // dumps registers and other state |
| 610 | void print_state(); |
| 611 | |
| 612 | static void debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg); |
| 613 | static void debug64(char* msg, int64_t pc, int64_t regs[]); |
| 614 | static void print_state32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip); |
| 615 | static void print_state64(int64_t pc, int64_t regs[]); |
| 616 | |
| 617 | void os_breakpoint(); |
| 618 | |
| 619 | void untested() { stop("untested"); } |
| 620 | |
| 621 | void unimplemented(const char* what = "") { char* b = new char[1024]; jio_snprintf(b, 1024, "unimplemented: %s", what); stop(b); } |
| 622 | |
| 623 | void should_not_reach_here() { stop("should not reach here"); } |
| 624 | |
| 625 | void print_CPU_state(); |
| 626 | |
| 627 | // Stack overflow checking |
| 628 | void bang_stack_with_offset(int offset) { |
| 629 | // stack grows down, caller passes positive offset |
| 630 | assert(offset > 0, "must bang with negative offset"); |
| 631 | movl(Address(rsp, (-offset)), rax); |
| 632 | } |
| 633 | |
| 634 | // Writes to stack successive pages until offset reached to check for |
| 635 | // stack overflow + shadow pages. Also, clobbers tmp |
| 636 | void bang_stack_size(Register size, Register tmp); |
| 637 | |
Frederic Parain | ef800bd | 2015-12-11 09:07:07 -0800 | [diff] [blame] | 638 | // Check for reserved stack access in method being exited (for JIT) |
| 639 | void reserved_stack_check(); |
| 640 | |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 641 | virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, |
| 642 | Register tmp, |
| 643 | int offset); |
| 644 | |
| 645 | // Support for serializing memory accesses between threads |
| 646 | void serialize_memory(Register thread, Register tmp); |
| 647 | |
| 648 | void verify_tlab(); |
| 649 | |
| 650 | // Biased locking support |
| 651 | // lock_reg and obj_reg must be loaded up with the appropriate values. |
| 652 | // swap_reg must be rax, and is killed. |
| 653 | // tmp_reg is optional. If it is supplied (i.e., != noreg) it will |
| 654 | // be killed; if not supplied, push/pop will be used internally to |
| 655 | // allocate a temporary (inefficient, avoid if possible). |
| 656 | // Optional slow case is for implementations (interpreter and C1) which branch to |
| 657 | // slow case directly. Leaves condition codes set for C2's Fast_Lock node. |
| 658 | // Returns offset of first potentially-faulting instruction for null |
| 659 | // check info (currently consumed only by C1). If |
| 660 | // swap_reg_contains_mark is true then returns -1 as it is assumed |
| 661 | // the calling code has already passed any potential faults. |
| 662 | int biased_locking_enter(Register lock_reg, Register obj_reg, |
| 663 | Register swap_reg, Register tmp_reg, |
| 664 | bool swap_reg_contains_mark, |
| 665 | Label& done, Label* slow_case = NULL, |
| 666 | BiasedLockingCounters* counters = NULL); |
| 667 | void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done); |
Vladimir Kozlov | e2fecea | 2014-02-24 15:12:26 -0800 | [diff] [blame] | 668 | #ifdef COMPILER2 |
| 669 | // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file. |
| 670 | // See full desription in macroAssembler_x86.cpp. |
Vladimir Kozlov | 97a51c5 | 2014-03-20 17:49:27 -0700 | [diff] [blame] | 671 | void fast_lock(Register obj, Register box, Register tmp, |
| 672 | Register scr, Register cx1, Register cx2, |
| 673 | BiasedLockingCounters* counters, |
| 674 | RTMLockingCounters* rtm_counters, |
| 675 | RTMLockingCounters* stack_rtm_counters, |
| 676 | Metadata* method_data, |
| 677 | bool use_rtm, bool profile_rtm); |
| 678 | void fast_unlock(Register obj, Register box, Register tmp, bool use_rtm); |
| 679 | #if INCLUDE_RTM_OPT |
| 680 | void rtm_counters_update(Register abort_status, Register rtm_counters); |
| 681 | void branch_on_random_using_rdtsc(Register tmp, Register scr, int count, Label& brLabel); |
| 682 | void rtm_abort_ratio_calculation(Register tmp, Register rtm_counters_reg, |
| 683 | RTMLockingCounters* rtm_counters, |
| 684 | Metadata* method_data); |
| 685 | void rtm_profiling(Register abort_status_Reg, Register rtm_counters_Reg, |
| 686 | RTMLockingCounters* rtm_counters, Metadata* method_data, bool profile_rtm); |
| 687 | void rtm_retry_lock_on_abort(Register retry_count, Register abort_status, Label& retryLabel); |
| 688 | void rtm_retry_lock_on_busy(Register retry_count, Register box, Register tmp, Register scr, Label& retryLabel); |
| 689 | void rtm_stack_locking(Register obj, Register tmp, Register scr, |
| 690 | Register retry_on_abort_count, |
| 691 | RTMLockingCounters* stack_rtm_counters, |
| 692 | Metadata* method_data, bool profile_rtm, |
| 693 | Label& DONE_LABEL, Label& IsInflated); |
| 694 | void rtm_inflated_locking(Register obj, Register box, Register tmp, |
| 695 | Register scr, Register retry_on_busy_count, |
| 696 | Register retry_on_abort_count, |
| 697 | RTMLockingCounters* rtm_counters, |
| 698 | Metadata* method_data, bool profile_rtm, |
| 699 | Label& DONE_LABEL); |
| 700 | #endif |
Vladimir Kozlov | e2fecea | 2014-02-24 15:12:26 -0800 | [diff] [blame] | 701 | #endif |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 702 | |
| 703 | Condition negate_condition(Condition cond); |
| 704 | |
| 705 | // Instructions that use AddressLiteral operands. These instruction can handle 32bit/64bit |
| 706 | // operands. In general the names are modified to avoid hiding the instruction in Assembler |
| 707 | // so that we don't need to implement all the varieties in the Assembler with trivial wrappers |
| 708 | // here in MacroAssembler. The major exception to this rule is call |
| 709 | |
| 710 | // Arithmetics |
| 711 | |
| 712 | |
| 713 | void addptr(Address dst, int32_t src) { LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)) ; } |
| 714 | void addptr(Address dst, Register src); |
| 715 | |
| 716 | void addptr(Register dst, Address src) { LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); } |
| 717 | void addptr(Register dst, int32_t src); |
| 718 | void addptr(Register dst, Register src); |
| 719 | void addptr(Register dst, RegisterOrConstant src) { |
| 720 | if (src.is_constant()) addptr(dst, (int) src.as_constant()); |
| 721 | else addptr(dst, src.as_register()); |
| 722 | } |
| 723 | |
| 724 | void andptr(Register dst, int32_t src); |
| 725 | void andptr(Register src1, Register src2) { LP64_ONLY(andq(src1, src2)) NOT_LP64(andl(src1, src2)) ; } |
| 726 | |
| 727 | void cmp8(AddressLiteral src1, int imm); |
| 728 | |
| 729 | // renamed to drag out the casting of address to int32_t/intptr_t |
| 730 | void cmp32(Register src1, int32_t imm); |
| 731 | |
| 732 | void cmp32(AddressLiteral src1, int32_t imm); |
| 733 | // compare reg - mem, or reg - &mem |
| 734 | void cmp32(Register src1, AddressLiteral src2); |
| 735 | |
| 736 | void cmp32(Register src1, Address src2); |
| 737 | |
| 738 | #ifndef _LP64 |
| 739 | void cmpklass(Address dst, Metadata* obj); |
| 740 | void cmpklass(Register dst, Metadata* obj); |
| 741 | void cmpoop(Address dst, jobject obj); |
| 742 | void cmpoop(Register dst, jobject obj); |
| 743 | #endif // _LP64 |
| 744 | |
| 745 | // NOTE src2 must be the lval. This is NOT an mem-mem compare |
| 746 | void cmpptr(Address src1, AddressLiteral src2); |
| 747 | |
| 748 | void cmpptr(Register src1, AddressLiteral src2); |
| 749 | |
| 750 | void cmpptr(Register src1, Register src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; } |
| 751 | void cmpptr(Register src1, Address src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; } |
| 752 | // void cmpptr(Address src1, Register src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; } |
| 753 | |
| 754 | void cmpptr(Register src1, int32_t src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; } |
| 755 | void cmpptr(Address src1, int32_t src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; } |
| 756 | |
| 757 | // cmp64 to avoild hiding cmpq |
| 758 | void cmp64(Register src1, AddressLiteral src); |
| 759 | |
| 760 | void cmpxchgptr(Register reg, Address adr); |
| 761 | |
| 762 | void locked_cmpxchgptr(Register reg, AddressLiteral adr); |
| 763 | |
| 764 | |
| 765 | void imulptr(Register dst, Register src) { LP64_ONLY(imulq(dst, src)) NOT_LP64(imull(dst, src)); } |
Vladimir Kozlov | 97a51c5 | 2014-03-20 17:49:27 -0700 | [diff] [blame] | 766 | void imulptr(Register dst, Register src, int imm32) { LP64_ONLY(imulq(dst, src, imm32)) NOT_LP64(imull(dst, src, imm32)); } |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 767 | |
| 768 | |
| 769 | void negptr(Register dst) { LP64_ONLY(negq(dst)) NOT_LP64(negl(dst)); } |
| 770 | |
| 771 | void notptr(Register dst) { LP64_ONLY(notq(dst)) NOT_LP64(notl(dst)); } |
| 772 | |
| 773 | void shlptr(Register dst, int32_t shift); |
| 774 | void shlptr(Register dst) { LP64_ONLY(shlq(dst)) NOT_LP64(shll(dst)); } |
| 775 | |
| 776 | void shrptr(Register dst, int32_t shift); |
| 777 | void shrptr(Register dst) { LP64_ONLY(shrq(dst)) NOT_LP64(shrl(dst)); } |
| 778 | |
| 779 | void sarptr(Register dst) { LP64_ONLY(sarq(dst)) NOT_LP64(sarl(dst)); } |
| 780 | void sarptr(Register dst, int32_t src) { LP64_ONLY(sarq(dst, src)) NOT_LP64(sarl(dst, src)); } |
| 781 | |
| 782 | void subptr(Address dst, int32_t src) { LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); } |
| 783 | |
| 784 | void subptr(Register dst, Address src) { LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); } |
| 785 | void subptr(Register dst, int32_t src); |
| 786 | // Force generation of a 4 byte immediate value even if it fits into 8bit |
| 787 | void subptr_imm32(Register dst, int32_t src); |
| 788 | void subptr(Register dst, Register src); |
| 789 | void subptr(Register dst, RegisterOrConstant src) { |
| 790 | if (src.is_constant()) subptr(dst, (int) src.as_constant()); |
| 791 | else subptr(dst, src.as_register()); |
| 792 | } |
| 793 | |
| 794 | void sbbptr(Address dst, int32_t src) { LP64_ONLY(sbbq(dst, src)) NOT_LP64(sbbl(dst, src)); } |
| 795 | void sbbptr(Register dst, int32_t src) { LP64_ONLY(sbbq(dst, src)) NOT_LP64(sbbl(dst, src)); } |
| 796 | |
| 797 | void xchgptr(Register src1, Register src2) { LP64_ONLY(xchgq(src1, src2)) NOT_LP64(xchgl(src1, src2)) ; } |
| 798 | void xchgptr(Register src1, Address src2) { LP64_ONLY(xchgq(src1, src2)) NOT_LP64(xchgl(src1, src2)) ; } |
| 799 | |
| 800 | void xaddptr(Address src1, Register src2) { LP64_ONLY(xaddq(src1, src2)) NOT_LP64(xaddl(src1, src2)) ; } |
| 801 | |
| 802 | |
| 803 | |
| 804 | // Helper functions for statistics gathering. |
| 805 | // Conditionally (atomically, on MPs) increments passed counter address, preserving condition codes. |
| 806 | void cond_inc32(Condition cond, AddressLiteral counter_addr); |
| 807 | // Unconditional atomic increment. |
Vladimir Kozlov | 97a51c5 | 2014-03-20 17:49:27 -0700 | [diff] [blame] | 808 | void atomic_incl(Address counter_addr); |
| 809 | void atomic_incl(AddressLiteral counter_addr, Register scr = rscratch1); |
| 810 | #ifdef _LP64 |
| 811 | void atomic_incq(Address counter_addr); |
| 812 | void atomic_incq(AddressLiteral counter_addr, Register scr = rscratch1); |
| 813 | #endif |
| 814 | void atomic_incptr(AddressLiteral counter_addr, Register scr = rscratch1) { LP64_ONLY(atomic_incq(counter_addr, scr)) NOT_LP64(atomic_incl(counter_addr, scr)) ; } |
| 815 | void atomic_incptr(Address counter_addr) { LP64_ONLY(atomic_incq(counter_addr)) NOT_LP64(atomic_incl(counter_addr)) ; } |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 816 | |
| 817 | void lea(Register dst, AddressLiteral adr); |
| 818 | void lea(Address dst, AddressLiteral adr); |
| 819 | void lea(Register dst, Address adr) { Assembler::lea(dst, adr); } |
| 820 | |
| 821 | void leal32(Register dst, Address src) { leal(dst, src); } |
| 822 | |
| 823 | // Import other testl() methods from the parent class or else |
| 824 | // they will be hidden by the following overriding declaration. |
| 825 | using Assembler::testl; |
| 826 | void testl(Register dst, AddressLiteral src); |
| 827 | |
| 828 | void orptr(Register dst, Address src) { LP64_ONLY(orq(dst, src)) NOT_LP64(orl(dst, src)); } |
| 829 | void orptr(Register dst, Register src) { LP64_ONLY(orq(dst, src)) NOT_LP64(orl(dst, src)); } |
| 830 | void orptr(Register dst, int32_t src) { LP64_ONLY(orq(dst, src)) NOT_LP64(orl(dst, src)); } |
Roland Westrelin | cbd0e9b | 2013-10-09 16:32:21 +0200 | [diff] [blame] | 831 | void orptr(Address dst, int32_t imm32) { LP64_ONLY(orq(dst, imm32)) NOT_LP64(orl(dst, imm32)); } |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 832 | |
| 833 | void testptr(Register src, int32_t imm32) { LP64_ONLY(testq(src, imm32)) NOT_LP64(testl(src, imm32)); } |
| 834 | void testptr(Register src1, Register src2); |
| 835 | |
| 836 | void xorptr(Register dst, Register src) { LP64_ONLY(xorq(dst, src)) NOT_LP64(xorl(dst, src)); } |
| 837 | void xorptr(Register dst, Address src) { LP64_ONLY(xorq(dst, src)) NOT_LP64(xorl(dst, src)); } |
| 838 | |
| 839 | // Calls |
| 840 | |
| 841 | void call(Label& L, relocInfo::relocType rtype); |
| 842 | void call(Register entry); |
| 843 | |
| 844 | // NOTE: this call tranfers to the effective address of entry NOT |
| 845 | // the address contained by entry. This is because this is more natural |
| 846 | // for jumps/calls. |
| 847 | void call(AddressLiteral entry); |
| 848 | |
| 849 | // Emit the CompiledIC call idiom |
Vladimir Ivanov | d60a09e | 2015-12-04 23:46:19 +0300 | [diff] [blame] | 850 | void ic_call(address entry, jint method_index = 0); |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 851 | |
| 852 | // Jumps |
| 853 | |
| 854 | // NOTE: these jumps tranfer to the effective address of dst NOT |
| 855 | // the address contained by dst. This is because this is more natural |
| 856 | // for jumps/calls. |
| 857 | void jump(AddressLiteral dst); |
| 858 | void jump_cc(Condition cc, AddressLiteral dst); |
| 859 | |
| 860 | // 32bit can do a case table jump in one instruction but we no longer allow the base |
| 861 | // to be installed in the Address class. This jump will tranfers to the address |
| 862 | // contained in the location described by entry (not the address of entry) |
| 863 | void jump(ArrayAddress entry); |
| 864 | |
| 865 | // Floating |
| 866 | |
| 867 | void andpd(XMMRegister dst, Address src) { Assembler::andpd(dst, src); } |
| 868 | void andpd(XMMRegister dst, AddressLiteral src); |
Vivek Deshpande | 6a52e93 | 2016-01-08 21:06:50 -0800 | [diff] [blame] | 869 | void andpd(XMMRegister dst, XMMRegister src) { Assembler::andpd(dst, src); } |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 870 | |
| 871 | void andps(XMMRegister dst, XMMRegister src) { Assembler::andps(dst, src); } |
| 872 | void andps(XMMRegister dst, Address src) { Assembler::andps(dst, src); } |
| 873 | void andps(XMMRegister dst, AddressLiteral src); |
| 874 | |
| 875 | void comiss(XMMRegister dst, XMMRegister src) { Assembler::comiss(dst, src); } |
| 876 | void comiss(XMMRegister dst, Address src) { Assembler::comiss(dst, src); } |
| 877 | void comiss(XMMRegister dst, AddressLiteral src); |
| 878 | |
| 879 | void comisd(XMMRegister dst, XMMRegister src) { Assembler::comisd(dst, src); } |
| 880 | void comisd(XMMRegister dst, Address src) { Assembler::comisd(dst, src); } |
| 881 | void comisd(XMMRegister dst, AddressLiteral src); |
| 882 | |
| 883 | void fadd_s(Address src) { Assembler::fadd_s(src); } |
| 884 | void fadd_s(AddressLiteral src) { Assembler::fadd_s(as_Address(src)); } |
| 885 | |
| 886 | void fldcw(Address src) { Assembler::fldcw(src); } |
| 887 | void fldcw(AddressLiteral src); |
| 888 | |
| 889 | void fld_s(int index) { Assembler::fld_s(index); } |
| 890 | void fld_s(Address src) { Assembler::fld_s(src); } |
| 891 | void fld_s(AddressLiteral src); |
| 892 | |
| 893 | void fld_d(Address src) { Assembler::fld_d(src); } |
| 894 | void fld_d(AddressLiteral src); |
| 895 | |
| 896 | void fld_x(Address src) { Assembler::fld_x(src); } |
| 897 | void fld_x(AddressLiteral src); |
| 898 | |
| 899 | void fmul_s(Address src) { Assembler::fmul_s(src); } |
| 900 | void fmul_s(AddressLiteral src) { Assembler::fmul_s(as_Address(src)); } |
| 901 | |
| 902 | void ldmxcsr(Address src) { Assembler::ldmxcsr(src); } |
| 903 | void ldmxcsr(AddressLiteral src); |
| 904 | |
Vivek Deshpande | 40fe96d | 2016-03-03 22:02:13 -0800 | [diff] [blame] | 905 | void fast_sha1(XMMRegister abcd, XMMRegister e0, XMMRegister e1, XMMRegister msg0, |
| 906 | XMMRegister msg1, XMMRegister msg2, XMMRegister msg3, XMMRegister shuf_mask, |
| 907 | Register buf, Register state, Register ofs, Register limit, Register rsp, |
| 908 | bool multi_block); |
| 909 | |
| 910 | #ifdef _LP64 |
| 911 | void fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0, |
| 912 | XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, |
| 913 | Register buf, Register state, Register ofs, Register limit, Register rsp, |
| 914 | bool multi_block, XMMRegister shuf_mask); |
| 915 | #else |
| 916 | void fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0, |
| 917 | XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, |
| 918 | Register buf, Register state, Register ofs, Register limit, Register rsp, |
| 919 | bool multi_block); |
| 920 | #endif |
| 921 | |
Vivek R Deshpande | 5a633b1 | 2015-10-05 20:02:40 -0700 | [diff] [blame] | 922 | void fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, |
| 923 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, |
| 924 | Register rax, Register rcx, Register rdx, Register tmp); |
Vivek R Deshpande | c07f802 | 2015-10-22 21:39:25 -0700 | [diff] [blame] | 925 | |
Vivek Deshpande | 40fe96d | 2016-03-03 22:02:13 -0800 | [diff] [blame] | 926 | #ifdef _LP64 |
Vivek R Deshpande | c07f802 | 2015-10-22 21:39:25 -0700 | [diff] [blame] | 927 | void fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, |
| 928 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, |
Vivek Deshpande | 40fe96d | 2016-03-03 22:02:13 -0800 | [diff] [blame] | 929 | Register rax, Register rcx, Register rdx, Register tmp1, Register tmp2); |
Vivek Deshpande | 6a52e93 | 2016-01-08 21:06:50 -0800 | [diff] [blame] | 930 | |
Vivek R Deshpande | 4536503 | 2015-12-23 21:09:50 -0800 | [diff] [blame] | 931 | void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, |
| 932 | XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx, |
Vivek Deshpande | 40fe96d | 2016-03-03 22:02:13 -0800 | [diff] [blame] | 933 | Register rdx, Register tmp1, Register tmp2, Register tmp3, Register tmp4); |
Vivek R Deshpande | c07f802 | 2015-10-22 21:39:25 -0700 | [diff] [blame] | 934 | |
Vivek Deshpande | 6a52e93 | 2016-01-08 21:06:50 -0800 | [diff] [blame] | 935 | void fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, |
| 936 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, |
Vivek Deshpande | 40fe96d | 2016-03-03 22:02:13 -0800 | [diff] [blame] | 937 | Register rax, Register rbx, Register rcx, Register rdx, Register tmp1, Register tmp2, |
| 938 | Register tmp3, Register tmp4); |
Vivek Deshpande | 6a52e93 | 2016-01-08 21:06:50 -0800 | [diff] [blame] | 939 | |
| 940 | void fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, |
| 941 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, |
Vivek Deshpande | 40fe96d | 2016-03-03 22:02:13 -0800 | [diff] [blame] | 942 | Register rax, Register rcx, Register rdx, Register tmp1, |
| 943 | Register tmp2, Register tmp3, Register tmp4); |
| 944 | #else |
| 945 | void fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, |
| 946 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, |
| 947 | Register rax, Register rcx, Register rdx, Register tmp1); |
Vivek Deshpande | 6a52e93 | 2016-01-08 21:06:50 -0800 | [diff] [blame] | 948 | |
Vivek Deshpande | 40fe96d | 2016-03-03 22:02:13 -0800 | [diff] [blame] | 949 | void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, |
| 950 | XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx, |
| 951 | Register rdx, Register tmp); |
| 952 | |
| 953 | void fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, |
| 954 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, |
| 955 | Register rax, Register rbx, Register rdx); |
| 956 | |
| 957 | void fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, |
| 958 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, |
| 959 | Register rax, Register rcx, Register rdx, Register tmp); |
| 960 | |
Vivek Deshpande | 6a52e93 | 2016-01-08 21:06:50 -0800 | [diff] [blame] | 961 | void libm_sincos_huge(XMMRegister xmm0, XMMRegister xmm1, Register eax, Register ecx, |
| 962 | Register edx, Register ebx, Register esi, Register edi, |
| 963 | Register ebp, Register esp); |
Vivek Deshpande | 40fe96d | 2016-03-03 22:02:13 -0800 | [diff] [blame] | 964 | |
Vivek Deshpande | 6a52e93 | 2016-01-08 21:06:50 -0800 | [diff] [blame] | 965 | void libm_reduce_pi04l(Register eax, Register ecx, Register edx, Register ebx, |
| 966 | Register esi, Register edi, Register ebp, Register esp); |
| 967 | #endif |
| 968 | |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 969 | void increase_precision(); |
| 970 | void restore_precision(); |
| 971 | |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 972 | private: |
| 973 | |
| 974 | // call runtime as a fallback for trig functions and pow/exp. |
| 975 | void fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use); |
| 976 | |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 977 | // these are private because users should be doing movflt/movdbl |
| 978 | |
| 979 | void movss(Address dst, XMMRegister src) { Assembler::movss(dst, src); } |
| 980 | void movss(XMMRegister dst, XMMRegister src) { Assembler::movss(dst, src); } |
| 981 | void movss(XMMRegister dst, Address src) { Assembler::movss(dst, src); } |
| 982 | void movss(XMMRegister dst, AddressLiteral src); |
| 983 | |
| 984 | void movlpd(XMMRegister dst, Address src) {Assembler::movlpd(dst, src); } |
| 985 | void movlpd(XMMRegister dst, AddressLiteral src); |
| 986 | |
| 987 | public: |
| 988 | |
| 989 | void addsd(XMMRegister dst, XMMRegister src) { Assembler::addsd(dst, src); } |
| 990 | void addsd(XMMRegister dst, Address src) { Assembler::addsd(dst, src); } |
| 991 | void addsd(XMMRegister dst, AddressLiteral src); |
| 992 | |
| 993 | void addss(XMMRegister dst, XMMRegister src) { Assembler::addss(dst, src); } |
| 994 | void addss(XMMRegister dst, Address src) { Assembler::addss(dst, src); } |
| 995 | void addss(XMMRegister dst, AddressLiteral src); |
| 996 | |
Vivek Deshpande | 6a52e93 | 2016-01-08 21:06:50 -0800 | [diff] [blame] | 997 | void addpd(XMMRegister dst, XMMRegister src) { Assembler::addpd(dst, src); } |
| 998 | void addpd(XMMRegister dst, Address src) { Assembler::addpd(dst, src); } |
| 999 | void addpd(XMMRegister dst, AddressLiteral src); |
| 1000 | |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 1001 | void divsd(XMMRegister dst, XMMRegister src) { Assembler::divsd(dst, src); } |
| 1002 | void divsd(XMMRegister dst, Address src) { Assembler::divsd(dst, src); } |
| 1003 | void divsd(XMMRegister dst, AddressLiteral src); |
| 1004 | |
| 1005 | void divss(XMMRegister dst, XMMRegister src) { Assembler::divss(dst, src); } |
| 1006 | void divss(XMMRegister dst, Address src) { Assembler::divss(dst, src); } |
| 1007 | void divss(XMMRegister dst, AddressLiteral src); |
| 1008 | |
| 1009 | // Move Unaligned Double Quadword |
Michael C Berg | e86e386 | 2015-11-09 11:26:41 -0800 | [diff] [blame] | 1010 | void movdqu(Address dst, XMMRegister src); |
| 1011 | void movdqu(XMMRegister dst, Address src); |
| 1012 | void movdqu(XMMRegister dst, XMMRegister src); |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 1013 | void movdqu(XMMRegister dst, AddressLiteral src); |
Michael C Berg | e86e386 | 2015-11-09 11:26:41 -0800 | [diff] [blame] | 1014 | // AVX Unaligned forms |
| 1015 | void vmovdqu(Address dst, XMMRegister src); |
| 1016 | void vmovdqu(XMMRegister dst, Address src); |
| 1017 | void vmovdqu(XMMRegister dst, XMMRegister src); |
| 1018 | void vmovdqu(XMMRegister dst, AddressLiteral src); |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 1019 | |
David Chase | 9a35998 | 2013-07-02 20:42:12 -0400 | [diff] [blame] | 1020 | // Move Aligned Double Quadword |
| 1021 | void movdqa(XMMRegister dst, Address src) { Assembler::movdqa(dst, src); } |
| 1022 | void movdqa(XMMRegister dst, XMMRegister src) { Assembler::movdqa(dst, src); } |
| 1023 | void movdqa(XMMRegister dst, AddressLiteral src); |
| 1024 | |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 1025 | void movsd(XMMRegister dst, XMMRegister src) { Assembler::movsd(dst, src); } |
| 1026 | void movsd(Address dst, XMMRegister src) { Assembler::movsd(dst, src); } |
| 1027 | void movsd(XMMRegister dst, Address src) { Assembler::movsd(dst, src); } |
| 1028 | void movsd(XMMRegister dst, AddressLiteral src); |
| 1029 | |
Vivek R Deshpande | 5a633b1 | 2015-10-05 20:02:40 -0700 | [diff] [blame] | 1030 | void mulpd(XMMRegister dst, XMMRegister src) { Assembler::mulpd(dst, src); } |
| 1031 | void mulpd(XMMRegister dst, Address src) { Assembler::mulpd(dst, src); } |
| 1032 | void mulpd(XMMRegister dst, AddressLiteral src); |
| 1033 | |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 1034 | void mulsd(XMMRegister dst, XMMRegister src) { Assembler::mulsd(dst, src); } |
| 1035 | void mulsd(XMMRegister dst, Address src) { Assembler::mulsd(dst, src); } |
| 1036 | void mulsd(XMMRegister dst, AddressLiteral src); |
| 1037 | |
| 1038 | void mulss(XMMRegister dst, XMMRegister src) { Assembler::mulss(dst, src); } |
| 1039 | void mulss(XMMRegister dst, Address src) { Assembler::mulss(dst, src); } |
| 1040 | void mulss(XMMRegister dst, AddressLiteral src); |
| 1041 | |
Vladimir Kozlov | 7764490 | 2014-08-05 15:02:10 -0700 | [diff] [blame] | 1042 | // Carry-Less Multiplication Quadword |
| 1043 | void pclmulldq(XMMRegister dst, XMMRegister src) { |
| 1044 | // 0x00 - multiply lower 64 bits [0:63] |
| 1045 | Assembler::pclmulqdq(dst, src, 0x00); |
| 1046 | } |
| 1047 | void pclmulhdq(XMMRegister dst, XMMRegister src) { |
| 1048 | // 0x11 - multiply upper 64 bits [64:127] |
| 1049 | Assembler::pclmulqdq(dst, src, 0x11); |
| 1050 | } |
| 1051 | |
Michael Berg | 2ae7dcf | 2015-11-19 16:07:22 -0800 | [diff] [blame] | 1052 | void pcmpeqb(XMMRegister dst, XMMRegister src); |
| 1053 | void pcmpeqw(XMMRegister dst, XMMRegister src); |
| 1054 | |
| 1055 | void pcmpestri(XMMRegister dst, Address src, int imm8); |
| 1056 | void pcmpestri(XMMRegister dst, XMMRegister src, int imm8); |
| 1057 | |
| 1058 | void pmovzxbw(XMMRegister dst, XMMRegister src); |
| 1059 | void pmovzxbw(XMMRegister dst, Address src); |
| 1060 | |
| 1061 | void pmovmskb(Register dst, XMMRegister src); |
| 1062 | |
| 1063 | void ptest(XMMRegister dst, XMMRegister src); |
| 1064 | |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 1065 | void sqrtsd(XMMRegister dst, XMMRegister src) { Assembler::sqrtsd(dst, src); } |
| 1066 | void sqrtsd(XMMRegister dst, Address src) { Assembler::sqrtsd(dst, src); } |
| 1067 | void sqrtsd(XMMRegister dst, AddressLiteral src); |
| 1068 | |
| 1069 | void sqrtss(XMMRegister dst, XMMRegister src) { Assembler::sqrtss(dst, src); } |
| 1070 | void sqrtss(XMMRegister dst, Address src) { Assembler::sqrtss(dst, src); } |
| 1071 | void sqrtss(XMMRegister dst, AddressLiteral src); |
| 1072 | |
| 1073 | void subsd(XMMRegister dst, XMMRegister src) { Assembler::subsd(dst, src); } |
| 1074 | void subsd(XMMRegister dst, Address src) { Assembler::subsd(dst, src); } |
| 1075 | void subsd(XMMRegister dst, AddressLiteral src); |
| 1076 | |
| 1077 | void subss(XMMRegister dst, XMMRegister src) { Assembler::subss(dst, src); } |
| 1078 | void subss(XMMRegister dst, Address src) { Assembler::subss(dst, src); } |
| 1079 | void subss(XMMRegister dst, AddressLiteral src); |
| 1080 | |
| 1081 | void ucomiss(XMMRegister dst, XMMRegister src) { Assembler::ucomiss(dst, src); } |
| 1082 | void ucomiss(XMMRegister dst, Address src) { Assembler::ucomiss(dst, src); } |
| 1083 | void ucomiss(XMMRegister dst, AddressLiteral src); |
| 1084 | |
| 1085 | void ucomisd(XMMRegister dst, XMMRegister src) { Assembler::ucomisd(dst, src); } |
| 1086 | void ucomisd(XMMRegister dst, Address src) { Assembler::ucomisd(dst, src); } |
| 1087 | void ucomisd(XMMRegister dst, AddressLiteral src); |
| 1088 | |
| 1089 | // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values |
Michael C Berg | e86e386 | 2015-11-09 11:26:41 -0800 | [diff] [blame] | 1090 | void xorpd(XMMRegister dst, XMMRegister src); |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 1091 | void xorpd(XMMRegister dst, Address src) { Assembler::xorpd(dst, src); } |
| 1092 | void xorpd(XMMRegister dst, AddressLiteral src); |
| 1093 | |
| 1094 | // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values |
Michael C Berg | e86e386 | 2015-11-09 11:26:41 -0800 | [diff] [blame] | 1095 | void xorps(XMMRegister dst, XMMRegister src); |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 1096 | void xorps(XMMRegister dst, Address src) { Assembler::xorps(dst, src); } |
| 1097 | void xorps(XMMRegister dst, AddressLiteral src); |
| 1098 | |
| 1099 | // Shuffle Bytes |
| 1100 | void pshufb(XMMRegister dst, XMMRegister src) { Assembler::pshufb(dst, src); } |
| 1101 | void pshufb(XMMRegister dst, Address src) { Assembler::pshufb(dst, src); } |
| 1102 | void pshufb(XMMRegister dst, AddressLiteral src); |
| 1103 | // AVX 3-operands instructions |
| 1104 | |
| 1105 | void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddsd(dst, nds, src); } |
| 1106 | void vaddsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddsd(dst, nds, src); } |
| 1107 | void vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src); |
| 1108 | |
| 1109 | void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddss(dst, nds, src); } |
| 1110 | void vaddss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddss(dst, nds, src); } |
| 1111 | void vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src); |
| 1112 | |
Michael C Berg | e86e386 | 2015-11-09 11:26:41 -0800 | [diff] [blame] | 1113 | void vabsss(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len); |
| 1114 | void vabssd(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len); |
| 1115 | |
| 1116 | void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
| 1117 | void vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len); |
| 1118 | |
| 1119 | void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
| 1120 | void vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len); |
| 1121 | |
Michael Berg | 2ae7dcf | 2015-11-19 16:07:22 -0800 | [diff] [blame] | 1122 | void vpbroadcastw(XMMRegister dst, XMMRegister src); |
| 1123 | |
| 1124 | void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
| 1125 | void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
| 1126 | |
| 1127 | void vpmovzxbw(XMMRegister dst, Address src, int vector_len); |
| 1128 | void vpmovmskb(Register dst, XMMRegister src); |
| 1129 | |
| 1130 | void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
| 1131 | void vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len); |
| 1132 | |
Michael C Berg | e86e386 | 2015-11-09 11:26:41 -0800 | [diff] [blame] | 1133 | void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
| 1134 | void vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len); |
| 1135 | |
| 1136 | void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
| 1137 | void vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len); |
| 1138 | |
Michael C Berg | e86e386 | 2015-11-09 11:26:41 -0800 | [diff] [blame] | 1139 | void vpsraw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len); |
| 1140 | void vpsraw(XMMRegister dst, XMMRegister nds, int shift, int vector_len); |
| 1141 | |
| 1142 | void vpsrlw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len); |
| 1143 | void vpsrlw(XMMRegister dst, XMMRegister nds, int shift, int vector_len); |
| 1144 | |
| 1145 | void vpsllw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len); |
| 1146 | void vpsllw(XMMRegister dst, XMMRegister nds, int shift, int vector_len); |
| 1147 | |
Michael Berg | 2ae7dcf | 2015-11-19 16:07:22 -0800 | [diff] [blame] | 1148 | void vptest(XMMRegister dst, XMMRegister src); |
| 1149 | |
Michael C Berg | e86e386 | 2015-11-09 11:26:41 -0800 | [diff] [blame] | 1150 | void punpcklbw(XMMRegister dst, XMMRegister src); |
| 1151 | void punpcklbw(XMMRegister dst, Address src) { Assembler::punpcklbw(dst, src); } |
| 1152 | |
| 1153 | void pshuflw(XMMRegister dst, XMMRegister src, int mode); |
| 1154 | void pshuflw(XMMRegister dst, Address src, int mode) { Assembler::pshuflw(dst, src, mode); } |
| 1155 | |
Michael C Berg | 4fca8db | 2015-05-08 11:49:20 -0700 | [diff] [blame] | 1156 | void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vandpd(dst, nds, src, vector_len); } |
| 1157 | void vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vandpd(dst, nds, src, vector_len); } |
| 1158 | void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len); |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 1159 | |
Michael C Berg | 4fca8db | 2015-05-08 11:49:20 -0700 | [diff] [blame] | 1160 | void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vandps(dst, nds, src, vector_len); } |
| 1161 | void vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vandps(dst, nds, src, vector_len); } |
| 1162 | void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len); |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 1163 | |
| 1164 | void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivsd(dst, nds, src); } |
| 1165 | void vdivsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivsd(dst, nds, src); } |
| 1166 | void vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src); |
| 1167 | |
| 1168 | void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivss(dst, nds, src); } |
| 1169 | void vdivss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivss(dst, nds, src); } |
| 1170 | void vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src); |
| 1171 | |
| 1172 | void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulsd(dst, nds, src); } |
| 1173 | void vmulsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vmulsd(dst, nds, src); } |
| 1174 | void vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src); |
| 1175 | |
| 1176 | void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulss(dst, nds, src); } |
| 1177 | void vmulss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vmulss(dst, nds, src); } |
| 1178 | void vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src); |
| 1179 | |
| 1180 | void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubsd(dst, nds, src); } |
| 1181 | void vsubsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vsubsd(dst, nds, src); } |
| 1182 | void vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src); |
| 1183 | |
| 1184 | void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubss(dst, nds, src); } |
| 1185 | void vsubss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vsubss(dst, nds, src); } |
| 1186 | void vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src); |
| 1187 | |
Michael Berg | d67924d | 2015-09-11 17:02:44 -0700 | [diff] [blame] | 1188 | void vnegatess(XMMRegister dst, XMMRegister nds, AddressLiteral src); |
| 1189 | void vnegatesd(XMMRegister dst, XMMRegister nds, AddressLiteral src); |
| 1190 | |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 1191 | // AVX Vector instructions |
| 1192 | |
Michael C Berg | 4fca8db | 2015-05-08 11:49:20 -0700 | [diff] [blame] | 1193 | void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vxorpd(dst, nds, src, vector_len); } |
| 1194 | void vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vxorpd(dst, nds, src, vector_len); } |
| 1195 | void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len); |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 1196 | |
Michael C Berg | 4fca8db | 2015-05-08 11:49:20 -0700 | [diff] [blame] | 1197 | void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vxorps(dst, nds, src, vector_len); } |
| 1198 | void vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vxorps(dst, nds, src, vector_len); } |
| 1199 | void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len); |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 1200 | |
Michael C Berg | 4fca8db | 2015-05-08 11:49:20 -0700 | [diff] [blame] | 1201 | void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { |
| 1202 | if (UseAVX > 1 || (vector_len < 1)) // vpxor 256 bit is available only in AVX2 |
| 1203 | Assembler::vpxor(dst, nds, src, vector_len); |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 1204 | else |
Michael C Berg | 4fca8db | 2015-05-08 11:49:20 -0700 | [diff] [blame] | 1205 | Assembler::vxorpd(dst, nds, src, vector_len); |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 1206 | } |
Michael C Berg | 4fca8db | 2015-05-08 11:49:20 -0700 | [diff] [blame] | 1207 | void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { |
| 1208 | if (UseAVX > 1 || (vector_len < 1)) // vpxor 256 bit is available only in AVX2 |
| 1209 | Assembler::vpxor(dst, nds, src, vector_len); |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 1210 | else |
Michael C Berg | 4fca8db | 2015-05-08 11:49:20 -0700 | [diff] [blame] | 1211 | Assembler::vxorpd(dst, nds, src, vector_len); |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 1212 | } |
| 1213 | |
Vladimir Kozlov | 38c81fb | 2013-01-08 11:30:51 -0800 | [diff] [blame] | 1214 | // Simple version for AVX2 256bit vectors |
| 1215 | void vpxor(XMMRegister dst, XMMRegister src) { Assembler::vpxor(dst, dst, src, true); } |
| 1216 | void vpxor(XMMRegister dst, Address src) { Assembler::vpxor(dst, dst, src, true); } |
| 1217 | |
Mikael Vidstedt | d4b4dca | 2016-03-07 15:03:48 -0800 | [diff] [blame] | 1218 | void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) { |
| 1219 | if (UseAVX > 1) { // vinserti128 is available only in AVX2 |
| 1220 | Assembler::vinserti128(dst, nds, src, imm8); |
| 1221 | } else { |
| 1222 | Assembler::vinsertf128(dst, nds, src, imm8); |
| 1223 | } |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 1224 | } |
| 1225 | |
Mikael Vidstedt | d4b4dca | 2016-03-07 15:03:48 -0800 | [diff] [blame] | 1226 | void vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) { |
| 1227 | if (UseAVX > 1) { // vinserti128 is available only in AVX2 |
| 1228 | Assembler::vinserti128(dst, nds, src, imm8); |
| 1229 | } else { |
| 1230 | Assembler::vinsertf128(dst, nds, src, imm8); |
| 1231 | } |
| 1232 | } |
| 1233 | |
| 1234 | void vextracti128(XMMRegister dst, XMMRegister src, uint8_t imm8) { |
| 1235 | if (UseAVX > 1) { // vextracti128 is available only in AVX2 |
| 1236 | Assembler::vextracti128(dst, src, imm8); |
| 1237 | } else { |
| 1238 | Assembler::vextractf128(dst, src, imm8); |
| 1239 | } |
| 1240 | } |
| 1241 | |
| 1242 | void vextracti128(Address dst, XMMRegister src, uint8_t imm8) { |
| 1243 | if (UseAVX > 1) { // vextracti128 is available only in AVX2 |
| 1244 | Assembler::vextracti128(dst, src, imm8); |
| 1245 | } else { |
| 1246 | Assembler::vextractf128(dst, src, imm8); |
| 1247 | } |
| 1248 | } |
| 1249 | |
| 1250 | // 128bit copy to/from high 128 bits of 256bit (YMM) vector registers |
| 1251 | void vinserti128_high(XMMRegister dst, XMMRegister src) { |
| 1252 | vinserti128(dst, dst, src, 1); |
| 1253 | } |
| 1254 | void vinserti128_high(XMMRegister dst, Address src) { |
| 1255 | vinserti128(dst, dst, src, 1); |
| 1256 | } |
| 1257 | void vextracti128_high(XMMRegister dst, XMMRegister src) { |
| 1258 | vextracti128(dst, src, 1); |
| 1259 | } |
| 1260 | void vextracti128_high(Address dst, XMMRegister src) { |
| 1261 | vextracti128(dst, src, 1); |
| 1262 | } |
| 1263 | void vinsertf128_high(XMMRegister dst, XMMRegister src) { |
| 1264 | vinsertf128(dst, dst, src, 1); |
| 1265 | } |
| 1266 | void vinsertf128_high(XMMRegister dst, Address src) { |
| 1267 | vinsertf128(dst, dst, src, 1); |
| 1268 | } |
| 1269 | void vextractf128_high(XMMRegister dst, XMMRegister src) { |
| 1270 | vextractf128(dst, src, 1); |
| 1271 | } |
| 1272 | void vextractf128_high(Address dst, XMMRegister src) { |
| 1273 | vextractf128(dst, src, 1); |
| 1274 | } |
| 1275 | |
| 1276 | // 256bit copy to/from high 256 bits of 512bit (ZMM) vector registers |
| 1277 | void vinserti64x4_high(XMMRegister dst, XMMRegister src) { |
| 1278 | vinserti64x4(dst, dst, src, 1); |
| 1279 | } |
| 1280 | void vinsertf64x4_high(XMMRegister dst, XMMRegister src) { |
| 1281 | vinsertf64x4(dst, dst, src, 1); |
| 1282 | } |
| 1283 | void vextracti64x4_high(XMMRegister dst, XMMRegister src) { |
| 1284 | vextracti64x4(dst, src, 1); |
| 1285 | } |
| 1286 | void vextractf64x4_high(XMMRegister dst, XMMRegister src) { |
| 1287 | vextractf64x4(dst, src, 1); |
| 1288 | } |
| 1289 | void vextractf64x4_high(Address dst, XMMRegister src) { |
| 1290 | vextractf64x4(dst, src, 1); |
| 1291 | } |
| 1292 | void vinsertf64x4_high(XMMRegister dst, Address src) { |
| 1293 | vinsertf64x4(dst, dst, src, 1); |
| 1294 | } |
| 1295 | |
| 1296 | // 128bit copy to/from low 128 bits of 256bit (YMM) vector registers |
| 1297 | void vinserti128_low(XMMRegister dst, XMMRegister src) { |
| 1298 | vinserti128(dst, dst, src, 0); |
| 1299 | } |
| 1300 | void vinserti128_low(XMMRegister dst, Address src) { |
| 1301 | vinserti128(dst, dst, src, 0); |
| 1302 | } |
| 1303 | void vextracti128_low(XMMRegister dst, XMMRegister src) { |
| 1304 | vextracti128(dst, src, 0); |
| 1305 | } |
| 1306 | void vextracti128_low(Address dst, XMMRegister src) { |
| 1307 | vextracti128(dst, src, 0); |
| 1308 | } |
| 1309 | void vinsertf128_low(XMMRegister dst, XMMRegister src) { |
| 1310 | vinsertf128(dst, dst, src, 0); |
| 1311 | } |
| 1312 | void vinsertf128_low(XMMRegister dst, Address src) { |
| 1313 | vinsertf128(dst, dst, src, 0); |
| 1314 | } |
| 1315 | void vextractf128_low(XMMRegister dst, XMMRegister src) { |
| 1316 | vextractf128(dst, src, 0); |
| 1317 | } |
| 1318 | void vextractf128_low(Address dst, XMMRegister src) { |
| 1319 | vextractf128(dst, src, 0); |
| 1320 | } |
| 1321 | |
| 1322 | // 256bit copy to/from low 256 bits of 512bit (ZMM) vector registers |
| 1323 | void vinserti64x4_low(XMMRegister dst, XMMRegister src) { |
| 1324 | vinserti64x4(dst, dst, src, 0); |
| 1325 | } |
| 1326 | void vinsertf64x4_low(XMMRegister dst, XMMRegister src) { |
| 1327 | vinsertf64x4(dst, dst, src, 0); |
| 1328 | } |
| 1329 | void vextracti64x4_low(XMMRegister dst, XMMRegister src) { |
| 1330 | vextracti64x4(dst, src, 0); |
| 1331 | } |
| 1332 | void vextractf64x4_low(XMMRegister dst, XMMRegister src) { |
| 1333 | vextractf64x4(dst, src, 0); |
| 1334 | } |
| 1335 | void vextractf64x4_low(Address dst, XMMRegister src) { |
| 1336 | vextractf64x4(dst, src, 0); |
| 1337 | } |
| 1338 | void vinsertf64x4_low(XMMRegister dst, Address src) { |
| 1339 | vinsertf64x4(dst, dst, src, 0); |
| 1340 | } |
| 1341 | |
| 1342 | |
David Chase | 9a35998 | 2013-07-02 20:42:12 -0400 | [diff] [blame] | 1343 | // Carry-Less Multiplication Quadword |
| 1344 | void vpclmulldq(XMMRegister dst, XMMRegister nds, XMMRegister src) { |
| 1345 | // 0x00 - multiply lower 64 bits [0:63] |
| 1346 | Assembler::vpclmulqdq(dst, nds, src, 0x00); |
| 1347 | } |
| 1348 | void vpclmulhdq(XMMRegister dst, XMMRegister nds, XMMRegister src) { |
| 1349 | // 0x11 - multiply upper 64 bits [64:127] |
| 1350 | Assembler::vpclmulqdq(dst, nds, src, 0x11); |
| 1351 | } |
| 1352 | |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 1353 | // Data |
| 1354 | |
| 1355 | void cmov32( Condition cc, Register dst, Address src); |
| 1356 | void cmov32( Condition cc, Register dst, Register src); |
| 1357 | |
| 1358 | void cmov( Condition cc, Register dst, Register src) { cmovptr(cc, dst, src); } |
| 1359 | |
| 1360 | void cmovptr(Condition cc, Register dst, Address src) { LP64_ONLY(cmovq(cc, dst, src)) NOT_LP64(cmov32(cc, dst, src)); } |
| 1361 | void cmovptr(Condition cc, Register dst, Register src) { LP64_ONLY(cmovq(cc, dst, src)) NOT_LP64(cmov32(cc, dst, src)); } |
| 1362 | |
| 1363 | void movoop(Register dst, jobject obj); |
| 1364 | void movoop(Address dst, jobject obj); |
| 1365 | |
| 1366 | void mov_metadata(Register dst, Metadata* obj); |
| 1367 | void mov_metadata(Address dst, Metadata* obj); |
| 1368 | |
| 1369 | void movptr(ArrayAddress dst, Register src); |
| 1370 | // can this do an lea? |
| 1371 | void movptr(Register dst, ArrayAddress src); |
| 1372 | |
| 1373 | void movptr(Register dst, Address src); |
| 1374 | |
Vladimir Kozlov | 97a51c5 | 2014-03-20 17:49:27 -0700 | [diff] [blame] | 1375 | #ifdef _LP64 |
| 1376 | void movptr(Register dst, AddressLiteral src, Register scratch=rscratch1); |
| 1377 | #else |
| 1378 | void movptr(Register dst, AddressLiteral src, Register scratch=noreg); // Scratch reg is ignored in 32-bit |
| 1379 | #endif |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 1380 | |
| 1381 | void movptr(Register dst, intptr_t src); |
| 1382 | void movptr(Register dst, Register src); |
| 1383 | void movptr(Address dst, intptr_t src); |
| 1384 | |
| 1385 | void movptr(Address dst, Register src); |
| 1386 | |
| 1387 | void movptr(Register dst, RegisterOrConstant src) { |
| 1388 | if (src.is_constant()) movptr(dst, src.as_constant()); |
| 1389 | else movptr(dst, src.as_register()); |
| 1390 | } |
| 1391 | |
| 1392 | #ifdef _LP64 |
| 1393 | // Generally the next two are only used for moving NULL |
| 1394 | // Although there are situations in initializing the mark word where |
| 1395 | // they could be used. They are dangerous. |
| 1396 | |
| 1397 | // They only exist on LP64 so that int32_t and intptr_t are not the same |
| 1398 | // and we have ambiguous declarations. |
| 1399 | |
| 1400 | void movptr(Address dst, int32_t imm32); |
| 1401 | void movptr(Register dst, int32_t imm32); |
| 1402 | #endif // _LP64 |
| 1403 | |
| 1404 | // to avoid hiding movl |
| 1405 | void mov32(AddressLiteral dst, Register src); |
| 1406 | void mov32(Register dst, AddressLiteral src); |
| 1407 | |
| 1408 | // to avoid hiding movb |
| 1409 | void movbyte(ArrayAddress dst, int src); |
| 1410 | |
| 1411 | // Import other mov() methods from the parent class or else |
| 1412 | // they will be hidden by the following overriding declaration. |
| 1413 | using Assembler::movdl; |
| 1414 | using Assembler::movq; |
| 1415 | void movdl(XMMRegister dst, AddressLiteral src); |
| 1416 | void movq(XMMRegister dst, AddressLiteral src); |
| 1417 | |
| 1418 | // Can push value or effective address |
| 1419 | void pushptr(AddressLiteral src); |
| 1420 | |
| 1421 | void pushptr(Address src) { LP64_ONLY(pushq(src)) NOT_LP64(pushl(src)); } |
| 1422 | void popptr(Address src) { LP64_ONLY(popq(src)) NOT_LP64(popl(src)); } |
| 1423 | |
| 1424 | void pushoop(jobject obj); |
| 1425 | void pushklass(Metadata* obj); |
| 1426 | |
| 1427 | // sign extend as need a l to ptr sized element |
| 1428 | void movl2ptr(Register dst, Address src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(movl(dst, src)); } |
| 1429 | void movl2ptr(Register dst, Register src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(if (dst != src) movl(dst, src)); } |
| 1430 | |
| 1431 | // C2 compiled method's prolog code. |
Roland Westrelin | f5adc57 | 2014-04-01 09:36:49 +0200 | [diff] [blame] | 1432 | void verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b); |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 1433 | |
Aleksey Shipilev | 103aff3 | 2016-03-04 01:30:11 +0300 | [diff] [blame] | 1434 | // clear memory of size 'cnt' qwords, starting at 'base'; |
| 1435 | // if 'is_large' is set, do not try to produce short loop |
| 1436 | void clear_mem(Register base, Register cnt, Register rtmp, bool is_large); |
Vladimir Kozlov | cfcd28f | 2013-01-03 15:09:55 -0800 | [diff] [blame] | 1437 | |
Tobias Hartmann | 7af927f | 2015-11-03 09:41:03 +0100 | [diff] [blame] | 1438 | #ifdef COMPILER2 |
| 1439 | void string_indexof_char(Register str1, Register cnt1, Register ch, Register result, |
| 1440 | XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp); |
| 1441 | |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 1442 | // IndexOf strings. |
| 1443 | // Small strings are loaded through stack if they cross page boundary. |
| 1444 | void string_indexof(Register str1, Register str2, |
| 1445 | Register cnt1, Register cnt2, |
| 1446 | int int_cnt2, Register result, |
Tobias Hartmann | 7af927f | 2015-11-03 09:41:03 +0100 | [diff] [blame] | 1447 | XMMRegister vec, Register tmp, |
| 1448 | int ae); |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 1449 | |
| 1450 | // IndexOf for constant substrings with size >= 8 elements |
| 1451 | // which don't need to be loaded through stack. |
| 1452 | void string_indexofC8(Register str1, Register str2, |
| 1453 | Register cnt1, Register cnt2, |
| 1454 | int int_cnt2, Register result, |
Tobias Hartmann | 7af927f | 2015-11-03 09:41:03 +0100 | [diff] [blame] | 1455 | XMMRegister vec, Register tmp, |
| 1456 | int ae); |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 1457 | |
| 1458 | // Smallest code: we don't need to load through stack, |
| 1459 | // check string tail. |
| 1460 | |
Tobias Hartmann | 7af927f | 2015-11-03 09:41:03 +0100 | [diff] [blame] | 1461 | // helper function for string_compare |
| 1462 | void load_next_elements(Register elem1, Register elem2, Register str1, Register str2, |
| 1463 | Address::ScaleFactor scale, Address::ScaleFactor scale1, |
| 1464 | Address::ScaleFactor scale2, Register index, int ae); |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 1465 | // Compare strings. |
| 1466 | void string_compare(Register str1, Register str2, |
| 1467 | Register cnt1, Register cnt2, Register result, |
Tobias Hartmann | 7af927f | 2015-11-03 09:41:03 +0100 | [diff] [blame] | 1468 | XMMRegister vec1, int ae); |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 1469 | |
Tobias Hartmann | 7af927f | 2015-11-03 09:41:03 +0100 | [diff] [blame] | 1470 | // Search for Non-ASCII character (Negative byte value) in a byte array, |
| 1471 | // return true if it has any and false otherwise. |
| 1472 | void has_negatives(Register ary1, Register len, |
| 1473 | Register result, Register tmp1, |
| 1474 | XMMRegister vec1, XMMRegister vec2); |
| 1475 | |
| 1476 | // Compare char[] or byte[] arrays. |
| 1477 | void arrays_equals(bool is_array_equ, Register ary1, Register ary2, |
| 1478 | Register limit, Register result, Register chr, |
| 1479 | XMMRegister vec1, XMMRegister vec2, bool is_char); |
| 1480 | |
| 1481 | #endif |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 1482 | |
| 1483 | // Fill primitive arrays |
| 1484 | void generate_fill(BasicType t, bool aligned, |
| 1485 | Register to, Register value, Register count, |
| 1486 | Register rtmp, XMMRegister xtmp); |
| 1487 | |
Vladimir Kozlov | 9e0c61f | 2013-01-22 15:34:16 -0800 | [diff] [blame] | 1488 | void encode_iso_array(Register src, Register dst, Register len, |
| 1489 | XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3, |
| 1490 | XMMRegister tmp4, Register tmp5, Register result); |
| 1491 | |
Vladimir Kozlov | b9e9491 | 2014-09-02 12:48:45 -0700 | [diff] [blame] | 1492 | #ifdef _LP64 |
| 1493 | void add2_with_carry(Register dest_hi, Register dest_lo, Register src1, Register src2); |
| 1494 | void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, |
| 1495 | Register y, Register y_idx, Register z, |
| 1496 | Register carry, Register product, |
| 1497 | Register idx, Register kdx); |
| 1498 | void multiply_add_128_x_128(Register x_xstart, Register y, Register z, |
| 1499 | Register yz_idx, Register idx, |
| 1500 | Register carry, Register product, int offset); |
| 1501 | void multiply_128_x_128_bmi2_loop(Register y, Register z, |
| 1502 | Register carry, Register carry2, |
| 1503 | Register idx, Register jdx, |
| 1504 | Register yz_idx1, Register yz_idx2, |
| 1505 | Register tmp, Register tmp3, Register tmp4); |
| 1506 | void multiply_128_x_128_loop(Register x_xstart, Register y, Register z, |
| 1507 | Register yz_idx, Register idx, Register jdx, |
| 1508 | Register carry, Register product, |
| 1509 | Register carry2); |
| 1510 | void multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z, Register zlen, |
| 1511 | Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5); |
Vladimir Kozlov | 93d1573 | 2015-06-03 15:02:10 -0700 | [diff] [blame] | 1512 | void square_rshift(Register x, Register len, Register z, Register tmp1, Register tmp3, |
| 1513 | Register tmp4, Register tmp5, Register rdxReg, Register raxReg); |
| 1514 | void multiply_add_64_bmi2(Register sum, Register op1, Register op2, Register carry, |
| 1515 | Register tmp2); |
| 1516 | void multiply_add_64(Register sum, Register op1, Register op2, Register carry, |
| 1517 | Register rdxReg, Register raxReg); |
| 1518 | void add_one_64(Register z, Register zlen, Register carry, Register tmp1); |
| 1519 | void lshift_by_1(Register x, Register len, Register z, Register zlen, Register tmp1, Register tmp2, |
| 1520 | Register tmp3, Register tmp4); |
| 1521 | void square_to_len(Register x, Register len, Register z, Register zlen, Register tmp1, Register tmp2, |
| 1522 | Register tmp3, Register tmp4, Register tmp5, Register rdxReg, Register raxReg); |
| 1523 | |
| 1524 | void mul_add_128_x_32_loop(Register out, Register in, Register offset, Register len, Register tmp1, |
| 1525 | Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register rdxReg, |
| 1526 | Register raxReg); |
| 1527 | void mul_add(Register out, Register in, Register offset, Register len, Register k, Register tmp1, |
| 1528 | Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register rdxReg, |
| 1529 | Register raxReg); |
Vivek R Deshpande | 2d9a6cf | 2015-12-07 16:35:07 -0800 | [diff] [blame] | 1530 | void vectorized_mismatch(Register obja, Register objb, Register length, Register log2_array_indxscale, |
| 1531 | Register result, Register tmp1, Register tmp2, |
| 1532 | XMMRegister vec1, XMMRegister vec2, XMMRegister vec3); |
Vladimir Kozlov | b9e9491 | 2014-09-02 12:48:45 -0700 | [diff] [blame] | 1533 | #endif |
| 1534 | |
Tomasz Wojtowicz | 61b77b8 | 2015-09-16 15:54:32 -0700 | [diff] [blame] | 1535 | // CRC32 code for java.util.zip.CRC32::updateBytes() intrinsic. |
David Chase | 9a35998 | 2013-07-02 20:42:12 -0400 | [diff] [blame] | 1536 | void update_byte_crc32(Register crc, Register val, Register table); |
| 1537 | void kernel_crc32(Register crc, Register buf, Register len, Register table, Register tmp); |
Tomasz Wojtowicz | 61b77b8 | 2015-09-16 15:54:32 -0700 | [diff] [blame] | 1538 | // CRC32C code for java.util.zip.CRC32C::updateBytes() intrinsic |
| 1539 | // Note on a naming convention: |
| 1540 | // Prefix w = register only used on a Westmere+ architecture |
| 1541 | // Prefix n = register only used on a Nehalem architecture |
| 1542 | #ifdef _LP64 |
| 1543 | void crc32c_ipl_alg4(Register in_out, uint32_t n, |
| 1544 | Register tmp1, Register tmp2, Register tmp3); |
| 1545 | #else |
| 1546 | void crc32c_ipl_alg4(Register in_out, uint32_t n, |
| 1547 | Register tmp1, Register tmp2, Register tmp3, |
| 1548 | XMMRegister xtmp1, XMMRegister xtmp2); |
| 1549 | #endif |
| 1550 | void crc32c_pclmulqdq(XMMRegister w_xtmp1, |
| 1551 | Register in_out, |
| 1552 | uint32_t const_or_pre_comp_const_index, bool is_pclmulqdq_supported, |
| 1553 | XMMRegister w_xtmp2, |
| 1554 | Register tmp1, |
| 1555 | Register n_tmp2, Register n_tmp3); |
| 1556 | void crc32c_rec_alt2(uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported, Register in_out, Register in1, Register in2, |
| 1557 | XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3, |
| 1558 | Register tmp1, Register tmp2, |
| 1559 | Register n_tmp3); |
| 1560 | void crc32c_proc_chunk(uint32_t size, uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported, |
| 1561 | Register in_out1, Register in_out2, Register in_out3, |
| 1562 | Register tmp1, Register tmp2, Register tmp3, |
| 1563 | XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3, |
| 1564 | Register tmp4, Register tmp5, |
| 1565 | Register n_tmp6); |
| 1566 | void crc32c_ipl_alg2_alt2(Register in_out, Register in1, Register in2, |
| 1567 | Register tmp1, Register tmp2, Register tmp3, |
| 1568 | Register tmp4, Register tmp5, Register tmp6, |
| 1569 | XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3, |
| 1570 | bool is_pclmulqdq_supported); |
David Chase | 9a35998 | 2013-07-02 20:42:12 -0400 | [diff] [blame] | 1571 | // Fold 128-bit data chunk |
| 1572 | void fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, int offset); |
| 1573 | void fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, XMMRegister xbuf); |
| 1574 | // Fold 8-bit data |
| 1575 | void fold_8bit_crc32(Register crc, Register table, Register tmp); |
| 1576 | void fold_8bit_crc32(XMMRegister crc, Register table, XMMRegister xtmp, Register tmp); |
| 1577 | |
Tobias Hartmann | 7af927f | 2015-11-03 09:41:03 +0100 | [diff] [blame] | 1578 | // Compress char[] array to byte[]. |
| 1579 | void char_array_compress(Register src, Register dst, Register len, |
| 1580 | XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3, |
| 1581 | XMMRegister tmp4, Register tmp5, Register result); |
| 1582 | |
| 1583 | // Inflate byte[] array to char[]. |
| 1584 | void byte_array_inflate(Register src, Register dst, Register len, |
| 1585 | XMMRegister tmp1, Register tmp2); |
| 1586 | |
Christian Thalinger | 2c2c007 | 2012-11-30 15:23:16 -0800 | [diff] [blame] | 1587 | }; |
| 1588 | |
| 1589 | /** |
| 1590 | * class SkipIfEqual: |
| 1591 | * |
| 1592 | * Instantiating this class will result in assembly code being output that will |
| 1593 | * jump around any code emitted between the creation of the instance and it's |
| 1594 | * automatic destruction at the end of a scope block, depending on the value of |
| 1595 | * the flag passed to the constructor, which will be checked at run-time. |
| 1596 | */ |
| 1597 | class SkipIfEqual { |
| 1598 | private: |
| 1599 | MacroAssembler* _masm; |
| 1600 | Label _label; |
| 1601 | |
| 1602 | public: |
| 1603 | SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value); |
| 1604 | ~SkipIfEqual(); |
| 1605 | }; |
| 1606 | |
| 1607 | #endif // CPU_X86_VM_MACROASSEMBLER_X86_HPP |