1 /* 2 * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #ifndef CPU_X86_MACROASSEMBLER_X86_HPP 26 #define CPU_X86_MACROASSEMBLER_X86_HPP 27 28 #include "asm/assembler.hpp" 29 #include "asm/register.hpp" 30 #include "code/vmreg.inline.hpp" 31 #include "compiler/oopMap.hpp" 32 #include "utilities/macros.hpp" 33 #include "runtime/signature.hpp" 34 #include "runtime/vm_version.hpp" 35 #include "utilities/checkedCast.hpp" 36 37 class ciInlineKlass; 38 39 // MacroAssembler extends Assembler by frequently used macros. 40 // 41 // Instructions for which a 'better' code sequence exists depending 42 // on arguments should also go in here. 43 44 class MacroAssembler: public Assembler { 45 friend class LIR_Assembler; 46 friend class Runtime1; // as_Address() 47 48 public: 49 // Support for VM calls 50 // 51 // This is the base routine called by the different versions of call_VM_leaf. The interpreter 52 // may customize this version by overriding it for its purposes (e.g., to save/restore 53 // additional registers when doing a VM call). 54 55 virtual void call_VM_leaf_base( 56 address entry_point, // the entry point 57 int number_of_arguments // the number of arguments to pop after the call 58 ); 59 60 protected: 61 // This is the base routine called by the different versions of call_VM. The interpreter 62 // may customize this version by overriding it for its purposes (e.g., to save/restore 63 // additional registers when doing a VM call). 64 // 65 // If no java_thread register is specified (noreg) than rdi will be used instead. call_VM_base 66 // returns the register which contains the thread upon return. If a thread register has been 67 // specified, the return value will correspond to that register. If no last_java_sp is specified 68 // (noreg) than rsp will be used instead. 69 virtual void call_VM_base( // returns the register containing the thread upon return 70 Register oop_result, // where an oop-result ends up if any; use noreg otherwise 71 Register java_thread, // the thread if computed before ; use noreg otherwise 72 Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise 73 address entry_point, // the entry point 74 int number_of_arguments, // the number of arguments (w/o thread) to pop after the call 75 bool check_exceptions // whether to check for pending exceptions after return 76 ); 77 78 void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true); 79 80 // helpers for FPU flag access 81 // tmp is a temporary register, if none is available use noreg 82 void save_rax (Register tmp); 83 void restore_rax(Register tmp); 84 85 public: 86 MacroAssembler(CodeBuffer* code) : Assembler(code) {} 87 88 // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code. 89 // The implementation is only non-empty for the InterpreterMacroAssembler, 90 // as only the interpreter handles PopFrame and ForceEarlyReturn requests. 91 virtual void check_and_handle_popframe(Register java_thread); 92 virtual void check_and_handle_earlyret(Register java_thread); 93 94 Address as_Address(AddressLiteral adr); 95 Address as_Address(ArrayAddress adr, Register rscratch); 96 97 // Support for null-checks 98 // 99 // Generates code that causes a null OS exception if the content of reg is null. 100 // If the accessed location is M[reg + offset] and the offset is known, provide the 101 // offset. No explicit code generation is needed if the offset is within a certain 102 // range (0 <= offset <= page_size). 103 104 void null_check(Register reg, int offset = -1); 105 static bool needs_explicit_null_check(intptr_t offset); 106 static bool uses_implicit_null_check(void* address); 107 108 // markWord tests, kills markWord reg 109 void test_markword_is_inline_type(Register markword, Label& is_inline_type); 110 111 // inlineKlass queries, kills temp_reg 112 void test_klass_is_inline_type(Register klass, Register temp_reg, Label& is_inline_type); 113 void test_oop_is_not_inline_type(Register object, Register tmp, Label& not_inline_type); 114 115 void test_field_is_null_free_inline_type(Register flags, Register temp_reg, Label& is_null_free); 116 void test_field_is_not_null_free_inline_type(Register flags, Register temp_reg, Label& not_null_free); 117 void test_field_is_flat(Register flags, Register temp_reg, Label& is_flat); 118 void test_field_has_null_marker(Register flags, Register temp_reg, Label& has_null_marker); 119 120 // Check oops for special arrays, i.e. flat arrays and/or null-free arrays 121 void test_oop_prototype_bit(Register oop, Register temp_reg, int32_t test_bit, bool jmp_set, Label& jmp_label); 122 void test_flat_array_oop(Register oop, Register temp_reg, Label& is_flat_array); 123 void test_non_flat_array_oop(Register oop, Register temp_reg, Label& is_non_flat_array); 124 void test_null_free_array_oop(Register oop, Register temp_reg, Label& is_null_free_array); 125 void test_non_null_free_array_oop(Register oop, Register temp_reg, Label& is_non_null_free_array); 126 127 // Check array klass layout helper for flat or null-free arrays... 128 void test_flat_array_layout(Register lh, Label& is_flat_array); 129 void test_non_flat_array_layout(Register lh, Label& is_non_flat_array); 130 131 // Required platform-specific helpers for Label::patch_instructions. 132 // They _shadow_ the declarations in AbstractAssembler, which are undefined. 133 void pd_patch_instruction(address branch, address target, const char* file, int line) { 134 unsigned char op = branch[0]; 135 assert(op == 0xE8 /* call */ || 136 op == 0xE9 /* jmp */ || 137 op == 0xEB /* short jmp */ || 138 (op & 0xF0) == 0x70 /* short jcc */ || 139 (op == 0x0F && (branch[1] & 0xF0) == 0x80) /* jcc */ || 140 (op == 0xC7 && branch[1] == 0xF8) /* xbegin */ || 141 (op == 0x8D) /* lea */, 142 "Invalid opcode at patch point"); 143 144 if (op == 0xEB || (op & 0xF0) == 0x70) { 145 // short offset operators (jmp and jcc) 146 char* disp = (char*) &branch[1]; 147 int imm8 = checked_cast<int>(target - (address) &disp[1]); 148 guarantee(this->is8bit(imm8), "Short forward jump exceeds 8-bit offset at %s:%d", 149 file == nullptr ? "<null>" : file, line); 150 *disp = (char)imm8; 151 } else { 152 int* disp = (int*) &branch[(op == 0x0F || op == 0xC7 || op == 0x8D) ? 2 : 1]; 153 int imm32 = checked_cast<int>(target - (address) &disp[1]); 154 *disp = imm32; 155 } 156 } 157 158 // The following 4 methods return the offset of the appropriate move instruction 159 160 // Support for fast byte/short loading with zero extension (depending on particular CPU) 161 int load_unsigned_byte(Register dst, Address src); 162 int load_unsigned_short(Register dst, Address src); 163 164 // Support for fast byte/short loading with sign extension (depending on particular CPU) 165 int load_signed_byte(Register dst, Address src); 166 int load_signed_short(Register dst, Address src); 167 168 // Support for sign-extension (hi:lo = extend_sign(lo)) 169 void extend_sign(Register hi, Register lo); 170 171 // Load and store values by size and signed-ness 172 void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg); 173 void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg); 174 175 // Support for inc/dec with optimal instruction selection depending on value 176 177 void increment(Register reg, int value = 1) { LP64_ONLY(incrementq(reg, value)) NOT_LP64(incrementl(reg, value)) ; } 178 void decrement(Register reg, int value = 1) { LP64_ONLY(decrementq(reg, value)) NOT_LP64(decrementl(reg, value)) ; } 179 void increment(Address dst, int value = 1) { LP64_ONLY(incrementq(dst, value)) NOT_LP64(incrementl(dst, value)) ; } 180 void decrement(Address dst, int value = 1) { LP64_ONLY(decrementq(dst, value)) NOT_LP64(decrementl(dst, value)) ; } 181 182 void decrementl(Address dst, int value = 1); 183 void decrementl(Register reg, int value = 1); 184 185 void decrementq(Register reg, int value = 1); 186 void decrementq(Address dst, int value = 1); 187 188 void incrementl(Address dst, int value = 1); 189 void incrementl(Register reg, int value = 1); 190 191 void incrementq(Register reg, int value = 1); 192 void incrementq(Address dst, int value = 1); 193 194 void incrementl(AddressLiteral dst, Register rscratch = noreg); 195 void incrementl(ArrayAddress dst, Register rscratch); 196 197 void incrementq(AddressLiteral dst, Register rscratch = noreg); 198 199 // Support optimal SSE move instructions. 200 void movflt(XMMRegister dst, XMMRegister src) { 201 if (dst-> encoding() == src->encoding()) return; 202 if (UseXmmRegToRegMoveAll) { movaps(dst, src); return; } 203 else { movss (dst, src); return; } 204 } 205 void movflt(XMMRegister dst, Address src) { movss(dst, src); } 206 void movflt(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 207 void movflt(Address dst, XMMRegister src) { movss(dst, src); } 208 209 // Move with zero extension 210 void movfltz(XMMRegister dst, XMMRegister src) { movss(dst, src); } 211 212 void movdbl(XMMRegister dst, XMMRegister src) { 213 if (dst-> encoding() == src->encoding()) return; 214 if (UseXmmRegToRegMoveAll) { movapd(dst, src); return; } 215 else { movsd (dst, src); return; } 216 } 217 218 void movdbl(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 219 220 void movdbl(XMMRegister dst, Address src) { 221 if (UseXmmLoadAndClearUpper) { movsd (dst, src); return; } 222 else { movlpd(dst, src); return; } 223 } 224 void movdbl(Address dst, XMMRegister src) { movsd(dst, src); } 225 226 void flt_to_flt16(Register dst, XMMRegister src, XMMRegister tmp) { 227 // Use separate tmp XMM register because caller may 228 // requires src XMM register to be unchanged (as in x86.ad). 229 vcvtps2ph(tmp, src, 0x04, Assembler::AVX_128bit); 230 movdl(dst, tmp); 231 movswl(dst, dst); 232 } 233 234 void flt16_to_flt(XMMRegister dst, Register src) { 235 movdl(dst, src); 236 vcvtph2ps(dst, dst, Assembler::AVX_128bit); 237 } 238 239 // Alignment 240 void align32(); 241 void align64(); 242 void align(uint modulus); 243 void align(uint modulus, uint target); 244 245 void post_call_nop(); 246 // A 5 byte nop that is safe for patching (see patch_verified_entry) 247 void fat_nop(); 248 249 // Stack frame creation/removal 250 void enter(); 251 void leave(); 252 253 // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information) 254 // The pointer will be loaded into the thread register. 255 void get_thread(Register thread); 256 257 #ifdef _LP64 258 // Support for argument shuffling 259 260 // bias in bytes 261 void move32_64(VMRegPair src, VMRegPair dst, Register tmp = rax, int in_stk_bias = 0, int out_stk_bias = 0); 262 void long_move(VMRegPair src, VMRegPair dst, Register tmp = rax, int in_stk_bias = 0, int out_stk_bias = 0); 263 void float_move(VMRegPair src, VMRegPair dst, Register tmp = rax, int in_stk_bias = 0, int out_stk_bias = 0); 264 void double_move(VMRegPair src, VMRegPair dst, Register tmp = rax, int in_stk_bias = 0, int out_stk_bias = 0); 265 void move_ptr(VMRegPair src, VMRegPair dst); 266 void object_move(OopMap* map, 267 int oop_handle_offset, 268 int framesize_in_slots, 269 VMRegPair src, 270 VMRegPair dst, 271 bool is_receiver, 272 int* receiver_offset); 273 #endif // _LP64 274 275 // Support for VM calls 276 // 277 // It is imperative that all calls into the VM are handled via the call_VM macros. 278 // They make sure that the stack linkage is setup correctly. call_VM's correspond 279 // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points. 280 281 282 void call_VM(Register oop_result, 283 address entry_point, 284 bool check_exceptions = true); 285 void call_VM(Register oop_result, 286 address entry_point, 287 Register arg_1, 288 bool check_exceptions = true); 289 void call_VM(Register oop_result, 290 address entry_point, 291 Register arg_1, Register arg_2, 292 bool check_exceptions = true); 293 void call_VM(Register oop_result, 294 address entry_point, 295 Register arg_1, Register arg_2, Register arg_3, 296 bool check_exceptions = true); 297 298 // Overloadings with last_Java_sp 299 void call_VM(Register oop_result, 300 Register last_java_sp, 301 address entry_point, 302 int number_of_arguments = 0, 303 bool check_exceptions = true); 304 void call_VM(Register oop_result, 305 Register last_java_sp, 306 address entry_point, 307 Register arg_1, bool 308 check_exceptions = true); 309 void call_VM(Register oop_result, 310 Register last_java_sp, 311 address entry_point, 312 Register arg_1, Register arg_2, 313 bool check_exceptions = true); 314 void call_VM(Register oop_result, 315 Register last_java_sp, 316 address entry_point, 317 Register arg_1, Register arg_2, Register arg_3, 318 bool check_exceptions = true); 319 320 void get_vm_result (Register oop_result, Register thread); 321 void get_vm_result_2(Register metadata_result, Register thread); 322 323 // These always tightly bind to MacroAssembler::call_VM_base 324 // bypassing the virtual implementation 325 void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments = 0, bool check_exceptions = true); 326 void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions = true); 327 void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true); 328 void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions = true); 329 void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4, bool check_exceptions = true); 330 331 void call_VM_leaf0(address entry_point); 332 void call_VM_leaf(address entry_point, 333 int number_of_arguments = 0); 334 void call_VM_leaf(address entry_point, 335 Register arg_1); 336 void call_VM_leaf(address entry_point, 337 Register arg_1, Register arg_2); 338 void call_VM_leaf(address entry_point, 339 Register arg_1, Register arg_2, Register arg_3); 340 341 void call_VM_leaf(address entry_point, 342 Register arg_1, Register arg_2, Register arg_3, Register arg_4); 343 344 // These always tightly bind to MacroAssembler::call_VM_leaf_base 345 // bypassing the virtual implementation 346 void super_call_VM_leaf(address entry_point); 347 void super_call_VM_leaf(address entry_point, Register arg_1); 348 void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2); 349 void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3); 350 void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4); 351 352 // last Java Frame (fills frame anchor) 353 void set_last_Java_frame(Register thread, 354 Register last_java_sp, 355 Register last_java_fp, 356 address last_java_pc, 357 Register rscratch); 358 359 // thread in the default location (r15_thread on 64bit) 360 void set_last_Java_frame(Register last_java_sp, 361 Register last_java_fp, 362 address last_java_pc, 363 Register rscratch); 364 365 #ifdef _LP64 366 void set_last_Java_frame(Register last_java_sp, 367 Register last_java_fp, 368 Label &last_java_pc, 369 Register scratch); 370 #endif 371 372 void reset_last_Java_frame(Register thread, bool clear_fp); 373 374 // thread in the default location (r15_thread on 64bit) 375 void reset_last_Java_frame(bool clear_fp); 376 377 // jobjects 378 void clear_jobject_tag(Register possibly_non_local); 379 void resolve_jobject(Register value, Register thread, Register tmp); 380 void resolve_global_jobject(Register value, Register thread, Register tmp); 381 382 // C 'boolean' to Java boolean: x == 0 ? 0 : 1 383 void c2bool(Register x); 384 385 // C++ bool manipulation 386 387 void movbool(Register dst, Address src); 388 void movbool(Address dst, bool boolconst); 389 void movbool(Address dst, Register src); 390 void testbool(Register dst); 391 392 void resolve_oop_handle(Register result, Register tmp); 393 void resolve_weak_handle(Register result, Register tmp); 394 void load_mirror(Register mirror, Register method, Register tmp); 395 void load_method_holder_cld(Register rresult, Register rmethod); 396 397 void load_method_holder(Register holder, Register method); 398 399 // oop manipulations 400 401 // Load oopDesc._metadata without decode (useful for direct Klass* compare from oops) 402 void load_metadata(Register dst, Register src); 403 #ifdef _LP64 404 void load_narrow_klass_compact(Register dst, Register src); 405 #endif 406 void load_klass(Register dst, Register src, Register tmp); 407 void store_klass(Register dst, Register src, Register tmp); 408 409 // Compares the Klass pointer of an object to a given Klass (which might be narrow, 410 // depending on UseCompressedClassPointers). 411 void cmp_klass(Register klass, Register obj, Register tmp); 412 413 // Compares the Klass pointer of two objects obj1 and obj2. Result is in the condition flags. 414 // Uses tmp1 and tmp2 as temporary registers. 415 void cmp_klasses_from_objects(Register obj1, Register obj2, Register tmp1, Register tmp2); 416 417 void access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src, 418 Register tmp1, Register thread_tmp); 419 void access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register val, 420 Register tmp1, Register tmp2, Register tmp3); 421 422 void flat_field_copy(DecoratorSet decorators, Register src, Register dst, Register inline_layout_info); 423 424 // inline type data payload offsets... 425 void payload_offset(Register inline_klass, Register offset); 426 void payload_addr(Register oop, Register data, Register inline_klass); 427 // get data payload ptr a flat value array at index, kills rcx and index 428 void data_for_value_array_index(Register array, Register array_klass, 429 Register index, Register data); 430 431 void load_heap_oop(Register dst, Address src, Register tmp1 = noreg, 432 Register thread_tmp = noreg, DecoratorSet decorators = 0); 433 void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg, 434 Register thread_tmp = noreg, DecoratorSet decorators = 0); 435 void store_heap_oop(Address dst, Register val, Register tmp1 = noreg, 436 Register tmp2 = noreg, Register tmp3 = noreg, DecoratorSet decorators = 0); 437 438 // Used for storing null. All other oop constants should be 439 // stored using routines that take a jobject. 440 void store_heap_oop_null(Address dst); 441 442 void load_prototype_header(Register dst, Register src, Register tmp); 443 444 #ifdef _LP64 445 void store_klass_gap(Register dst, Register src); 446 447 // This dummy is to prevent a call to store_heap_oop from 448 // converting a zero (like null) into a Register by giving 449 // the compiler two choices it can't resolve 450 451 void store_heap_oop(Address dst, void* dummy); 452 453 void encode_heap_oop(Register r); 454 void decode_heap_oop(Register r); 455 void encode_heap_oop_not_null(Register r); 456 void decode_heap_oop_not_null(Register r); 457 void encode_heap_oop_not_null(Register dst, Register src); 458 void decode_heap_oop_not_null(Register dst, Register src); 459 460 void set_narrow_oop(Register dst, jobject obj); 461 void set_narrow_oop(Address dst, jobject obj); 462 void cmp_narrow_oop(Register dst, jobject obj); 463 void cmp_narrow_oop(Address dst, jobject obj); 464 465 void encode_klass_not_null(Register r, Register tmp); 466 void decode_klass_not_null(Register r, Register tmp); 467 void encode_and_move_klass_not_null(Register dst, Register src); 468 void decode_and_move_klass_not_null(Register dst, Register src); 469 void set_narrow_klass(Register dst, Klass* k); 470 void set_narrow_klass(Address dst, Klass* k); 471 void cmp_narrow_klass(Register dst, Klass* k); 472 void cmp_narrow_klass(Address dst, Klass* k); 473 474 // if heap base register is used - reinit it with the correct value 475 void reinit_heapbase(); 476 477 DEBUG_ONLY(void verify_heapbase(const char* msg);) 478 479 #endif // _LP64 480 481 // Int division/remainder for Java 482 // (as idivl, but checks for special case as described in JVM spec.) 483 // returns idivl instruction offset for implicit exception handling 484 int corrected_idivl(Register reg); 485 486 // Long division/remainder for Java 487 // (as idivq, but checks for special case as described in JVM spec.) 488 // returns idivq instruction offset for implicit exception handling 489 int corrected_idivq(Register reg); 490 491 void int3(); 492 493 // Long operation macros for a 32bit cpu 494 // Long negation for Java 495 void lneg(Register hi, Register lo); 496 497 // Long multiplication for Java 498 // (destroys contents of eax, ebx, ecx and edx) 499 void lmul(int x_rsp_offset, int y_rsp_offset); // rdx:rax = x * y 500 501 // Long shifts for Java 502 // (semantics as described in JVM spec.) 503 void lshl(Register hi, Register lo); // hi:lo << (rcx & 0x3f) 504 void lshr(Register hi, Register lo, bool sign_extension = false); // hi:lo >> (rcx & 0x3f) 505 506 // Long compare for Java 507 // (semantics as described in JVM spec.) 508 void lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo); // x_hi = lcmp(x, y) 509 510 511 // misc 512 513 // Sign extension 514 void sign_extend_short(Register reg); 515 void sign_extend_byte(Register reg); 516 517 // Division by power of 2, rounding towards 0 518 void division_with_shift(Register reg, int shift_value); 519 520 #ifndef _LP64 521 // Compares the top-most stack entries on the FPU stack and sets the eflags as follows: 522 // 523 // CF (corresponds to C0) if x < y 524 // PF (corresponds to C2) if unordered 525 // ZF (corresponds to C3) if x = y 526 // 527 // The arguments are in reversed order on the stack (i.e., top of stack is first argument). 528 // tmp is a temporary register, if none is available use noreg (only matters for non-P6 code) 529 void fcmp(Register tmp); 530 // Variant of the above which allows y to be further down the stack 531 // and which only pops x and y if specified. If pop_right is 532 // specified then pop_left must also be specified. 533 void fcmp(Register tmp, int index, bool pop_left, bool pop_right); 534 535 // Floating-point comparison for Java 536 // Compares the top-most stack entries on the FPU stack and stores the result in dst. 537 // The arguments are in reversed order on the stack (i.e., top of stack is first argument). 538 // (semantics as described in JVM spec.) 539 void fcmp2int(Register dst, bool unordered_is_less); 540 // Variant of the above which allows y to be further down the stack 541 // and which only pops x and y if specified. If pop_right is 542 // specified then pop_left must also be specified. 543 void fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right); 544 545 // Floating-point remainder for Java (ST0 = ST0 fremr ST1, ST1 is empty afterwards) 546 // tmp is a temporary register, if none is available use noreg 547 void fremr(Register tmp); 548 549 // only if +VerifyFPU 550 void verify_FPU(int stack_depth, const char* s = "illegal FPU state"); 551 #endif // !LP64 552 553 // dst = c = a * b + c 554 void fmad(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c); 555 void fmaf(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c); 556 557 void vfmad(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c, int vector_len); 558 void vfmaf(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c, int vector_len); 559 void vfmad(XMMRegister dst, XMMRegister a, Address b, XMMRegister c, int vector_len); 560 void vfmaf(XMMRegister dst, XMMRegister a, Address b, XMMRegister c, int vector_len); 561 562 563 // same as fcmp2int, but using SSE2 564 void cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less); 565 void cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less); 566 567 // branch to L if FPU flag C2 is set/not set 568 // tmp is a temporary register, if none is available use noreg 569 void jC2 (Register tmp, Label& L); 570 void jnC2(Register tmp, Label& L); 571 572 // Load float value from 'address'. If UseSSE >= 1, the value is loaded into 573 // register xmm0. Otherwise, the value is loaded onto the FPU stack. 574 void load_float(Address src); 575 576 // Store float value to 'address'. If UseSSE >= 1, the value is stored 577 // from register xmm0. Otherwise, the value is stored from the FPU stack. 578 void store_float(Address dst); 579 580 // Load double value from 'address'. If UseSSE >= 2, the value is loaded into 581 // register xmm0. Otherwise, the value is loaded onto the FPU stack. 582 void load_double(Address src); 583 584 // Store double value to 'address'. If UseSSE >= 2, the value is stored 585 // from register xmm0. Otherwise, the value is stored from the FPU stack. 586 void store_double(Address dst); 587 588 #ifndef _LP64 589 // Pop ST (ffree & fincstp combined) 590 void fpop(); 591 592 void empty_FPU_stack(); 593 #endif // !_LP64 594 595 void push_IU_state(); 596 void pop_IU_state(); 597 598 void push_FPU_state(); 599 void pop_FPU_state(); 600 601 void push_CPU_state(); 602 void pop_CPU_state(); 603 604 void push_cont_fastpath(); 605 void pop_cont_fastpath(); 606 607 void inc_held_monitor_count(); 608 void dec_held_monitor_count(); 609 610 DEBUG_ONLY(void stop_if_in_cont(Register cont_reg, const char* name);) 611 612 // Round up to a power of two 613 void round_to(Register reg, int modulus); 614 615 private: 616 // General purpose and XMM registers potentially clobbered by native code; there 617 // is no need for FPU or AVX opmask related methods because C1/interpreter 618 // - we save/restore FPU state as a whole always 619 // - do not care about AVX-512 opmask 620 static RegSet call_clobbered_gp_registers(); 621 static XMMRegSet call_clobbered_xmm_registers(); 622 623 void push_set(XMMRegSet set, int offset); 624 void pop_set(XMMRegSet set, int offset); 625 626 public: 627 void push_set(RegSet set, int offset = -1); 628 void pop_set(RegSet set, int offset = -1); 629 630 // Push and pop everything that might be clobbered by a native 631 // runtime call. 632 // Only save the lower 64 bits of each vector register. 633 // Additional registers can be excluded in a passed RegSet. 634 void push_call_clobbered_registers_except(RegSet exclude, bool save_fpu = true); 635 void pop_call_clobbered_registers_except(RegSet exclude, bool restore_fpu = true); 636 637 void push_call_clobbered_registers(bool save_fpu = true) { 638 push_call_clobbered_registers_except(RegSet(), save_fpu); 639 } 640 void pop_call_clobbered_registers(bool restore_fpu = true) { 641 pop_call_clobbered_registers_except(RegSet(), restore_fpu); 642 } 643 644 // allocation 645 646 // Object / value buffer allocation... 647 // Allocate instance of klass, assumes klass initialized by caller 648 // new_obj prefers to be rax 649 // Kills t1 and t2, perserves klass, return allocation in new_obj (rsi on LP64) 650 void allocate_instance(Register klass, Register new_obj, 651 Register t1, Register t2, 652 bool clear_fields, Label& alloc_failed); 653 654 void tlab_allocate( 655 Register thread, // Current thread 656 Register obj, // result: pointer to object after successful allocation 657 Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise 658 int con_size_in_bytes, // object size in bytes if known at compile time 659 Register t1, // temp register 660 Register t2, // temp register 661 Label& slow_case // continuation point if fast allocation fails 662 ); 663 void zero_memory(Register address, Register length_in_bytes, int offset_in_bytes, Register temp); 664 665 // For field "index" within "klass", return inline_klass ... 666 void get_inline_type_field_klass(Register klass, Register index, Register inline_klass); 667 668 void inline_layout_info(Register klass, Register index, Register layout_info); 669 670 void population_count(Register dst, Register src, Register scratch1, Register scratch2); 671 672 // interface method calling 673 void lookup_interface_method(Register recv_klass, 674 Register intf_klass, 675 RegisterOrConstant itable_index, 676 Register method_result, 677 Register scan_temp, 678 Label& no_such_interface, 679 bool return_method = true); 680 681 void lookup_interface_method_stub(Register recv_klass, 682 Register holder_klass, 683 Register resolved_klass, 684 Register method_result, 685 Register scan_temp, 686 Register temp_reg2, 687 Register receiver, 688 int itable_index, 689 Label& L_no_such_interface); 690 691 // virtual method calling 692 void lookup_virtual_method(Register recv_klass, 693 RegisterOrConstant vtable_index, 694 Register method_result); 695 696 // Test sub_klass against super_klass, with fast and slow paths. 697 698 // The fast path produces a tri-state answer: yes / no / maybe-slow. 699 // One of the three labels can be null, meaning take the fall-through. 700 // If super_check_offset is -1, the value is loaded up from super_klass. 701 // No registers are killed, except temp_reg. 702 void check_klass_subtype_fast_path(Register sub_klass, 703 Register super_klass, 704 Register temp_reg, 705 Label* L_success, 706 Label* L_failure, 707 Label* L_slow_path, 708 RegisterOrConstant super_check_offset = RegisterOrConstant(-1)); 709 710 // The rest of the type check; must be wired to a corresponding fast path. 711 // It does not repeat the fast path logic, so don't use it standalone. 712 // The temp_reg and temp2_reg can be noreg, if no temps are available. 713 // Updates the sub's secondary super cache as necessary. 714 // If set_cond_codes, condition codes will be Z on success, NZ on failure. 715 void check_klass_subtype_slow_path(Register sub_klass, 716 Register super_klass, 717 Register temp_reg, 718 Register temp2_reg, 719 Label* L_success, 720 Label* L_failure, 721 bool set_cond_codes = false); 722 723 #ifdef _LP64 724 // The 64-bit version, which may do a hashed subclass lookup. 725 void check_klass_subtype_slow_path(Register sub_klass, 726 Register super_klass, 727 Register temp_reg, 728 Register temp2_reg, 729 Register temp3_reg, 730 Register temp4_reg, 731 Label* L_success, 732 Label* L_failure); 733 #endif 734 735 // Three parts of a hashed subclass lookup: a simple linear search, 736 // a table lookup, and a fallback that does linear probing in the 737 // event of a hash collision. 738 void check_klass_subtype_slow_path_linear(Register sub_klass, 739 Register super_klass, 740 Register temp_reg, 741 Register temp2_reg, 742 Label* L_success, 743 Label* L_failure, 744 bool set_cond_codes = false); 745 void check_klass_subtype_slow_path_table(Register sub_klass, 746 Register super_klass, 747 Register temp_reg, 748 Register temp2_reg, 749 Register temp3_reg, 750 Register result_reg, 751 Label* L_success, 752 Label* L_failure); 753 void hashed_check_klass_subtype_slow_path(Register sub_klass, 754 Register super_klass, 755 Register temp_reg, 756 Label* L_success, 757 Label* L_failure); 758 759 // As above, but with a constant super_klass. 760 // The result is in Register result, not the condition codes. 761 void lookup_secondary_supers_table_const(Register sub_klass, 762 Register super_klass, 763 Register temp1, 764 Register temp2, 765 Register temp3, 766 Register temp4, 767 Register result, 768 u1 super_klass_slot); 769 770 #ifdef _LP64 771 using Assembler::salq; 772 void salq(Register dest, Register count); 773 using Assembler::rorq; 774 void rorq(Register dest, Register count); 775 void lookup_secondary_supers_table_var(Register sub_klass, 776 Register super_klass, 777 Register temp1, 778 Register temp2, 779 Register temp3, 780 Register temp4, 781 Register result); 782 783 void lookup_secondary_supers_table_slow_path(Register r_super_klass, 784 Register r_array_base, 785 Register r_array_index, 786 Register r_bitmap, 787 Register temp1, 788 Register temp2, 789 Label* L_success, 790 Label* L_failure = nullptr); 791 792 void verify_secondary_supers_table(Register r_sub_klass, 793 Register r_super_klass, 794 Register expected, 795 Register temp1, 796 Register temp2, 797 Register temp3); 798 #endif 799 800 void repne_scanq(Register addr, Register value, Register count, Register limit, 801 Label* L_success, 802 Label* L_failure = nullptr); 803 804 // If r is valid, return r. 805 // If r is invalid, remove a register r2 from available_regs, add r2 806 // to regs_to_push, then return r2. 807 Register allocate_if_noreg(const Register r, 808 RegSetIterator<Register> &available_regs, 809 RegSet ®s_to_push); 810 811 // Simplified, combined version, good for typical uses. 812 // Falls through on failure. 813 void check_klass_subtype(Register sub_klass, 814 Register super_klass, 815 Register temp_reg, 816 Label& L_success); 817 818 void clinit_barrier(Register klass, 819 Register thread, 820 Label* L_fast_path = nullptr, 821 Label* L_slow_path = nullptr); 822 823 // method handles (JSR 292) 824 Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0); 825 826 // Debugging 827 828 // only if +VerifyOops 829 void _verify_oop(Register reg, const char* s, const char* file, int line); 830 void _verify_oop_addr(Address addr, const char* s, const char* file, int line); 831 832 void _verify_oop_checked(Register reg, const char* s, const char* file, int line) { 833 if (VerifyOops) { 834 _verify_oop(reg, s, file, line); 835 } 836 } 837 void _verify_oop_addr_checked(Address reg, const char* s, const char* file, int line) { 838 if (VerifyOops) { 839 _verify_oop_addr(reg, s, file, line); 840 } 841 } 842 843 // TODO: verify method and klass metadata (compare against vptr?) 844 void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {} 845 void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){} 846 847 #define verify_oop(reg) _verify_oop_checked(reg, "broken oop " #reg, __FILE__, __LINE__) 848 #define verify_oop_msg(reg, msg) _verify_oop_checked(reg, "broken oop " #reg ", " #msg, __FILE__, __LINE__) 849 #define verify_oop_addr(addr) _verify_oop_addr_checked(addr, "broken oop addr " #addr, __FILE__, __LINE__) 850 #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__) 851 #define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__) 852 853 // Verify or restore cpu control state after JNI call 854 void restore_cpu_control_state_after_jni(Register rscratch); 855 856 // prints msg, dumps registers and stops execution 857 void stop(const char* msg); 858 859 // prints msg and continues 860 void warn(const char* msg); 861 862 // dumps registers and other state 863 void print_state(); 864 865 static void debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg); 866 static void debug64(char* msg, int64_t pc, int64_t regs[]); 867 static void print_state32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip); 868 static void print_state64(int64_t pc, int64_t regs[]); 869 870 void os_breakpoint(); 871 872 void untested() { stop("untested"); } 873 874 void unimplemented(const char* what = ""); 875 876 void should_not_reach_here() { stop("should not reach here"); } 877 878 void print_CPU_state(); 879 880 // Stack overflow checking 881 void bang_stack_with_offset(int offset) { 882 // stack grows down, caller passes positive offset 883 assert(offset > 0, "must bang with negative offset"); 884 movl(Address(rsp, (-offset)), rax); 885 } 886 887 // Writes to stack successive pages until offset reached to check for 888 // stack overflow + shadow pages. Also, clobbers tmp 889 void bang_stack_size(Register size, Register tmp); 890 891 // Check for reserved stack access in method being exited (for JIT) 892 void reserved_stack_check(); 893 894 void safepoint_poll(Label& slow_path, Register thread_reg, bool at_return, bool in_nmethod); 895 896 void verify_tlab(); 897 898 static Condition negate_condition(Condition cond); 899 900 // Instructions that use AddressLiteral operands. These instruction can handle 32bit/64bit 901 // operands. In general the names are modified to avoid hiding the instruction in Assembler 902 // so that we don't need to implement all the varieties in the Assembler with trivial wrappers 903 // here in MacroAssembler. The major exception to this rule is call 904 905 // Arithmetics 906 907 908 void addptr(Address dst, int32_t src) { LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)) ; } 909 void addptr(Address dst, Register src); 910 911 void addptr(Register dst, Address src) { LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); } 912 void addptr(Register dst, int32_t src); 913 void addptr(Register dst, Register src); 914 void addptr(Register dst, RegisterOrConstant src) { 915 if (src.is_constant()) addptr(dst, checked_cast<int>(src.as_constant())); 916 else addptr(dst, src.as_register()); 917 } 918 919 void andptr(Register dst, int32_t src); 920 void andptr(Register dst, Register src) { LP64_ONLY(andq(dst, src)) NOT_LP64(andl(dst, src)) ; } 921 void andptr(Register dst, Address src) { LP64_ONLY(andq(dst, src)) NOT_LP64(andl(dst, src)) ; } 922 923 #ifdef _LP64 924 using Assembler::andq; 925 void andq(Register dst, AddressLiteral src, Register rscratch = noreg); 926 #endif 927 928 void cmp8(AddressLiteral src1, int imm, Register rscratch = noreg); 929 930 // renamed to drag out the casting of address to int32_t/intptr_t 931 void cmp32(Register src1, int32_t imm); 932 933 void cmp32(AddressLiteral src1, int32_t imm, Register rscratch = noreg); 934 // compare reg - mem, or reg - &mem 935 void cmp32(Register src1, AddressLiteral src2, Register rscratch = noreg); 936 937 void cmp32(Register src1, Address src2); 938 939 #ifndef _LP64 940 void cmpklass(Address dst, Metadata* obj); 941 void cmpklass(Register dst, Metadata* obj); 942 void cmpoop(Address dst, jobject obj); 943 #endif // _LP64 944 945 void cmpoop(Register src1, Register src2); 946 void cmpoop(Register src1, Address src2); 947 void cmpoop(Register dst, jobject obj, Register rscratch); 948 949 // NOTE src2 must be the lval. This is NOT an mem-mem compare 950 void cmpptr(Address src1, AddressLiteral src2, Register rscratch); 951 952 void cmpptr(Register src1, AddressLiteral src2, Register rscratch = noreg); 953 954 void cmpptr(Register src1, Register src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; } 955 void cmpptr(Register src1, Address src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; } 956 // void cmpptr(Address src1, Register src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; } 957 958 void cmpptr(Register src1, int32_t src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; } 959 void cmpptr(Address src1, int32_t src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; } 960 961 // cmp64 to avoild hiding cmpq 962 void cmp64(Register src1, AddressLiteral src, Register rscratch = noreg); 963 964 void cmpxchgptr(Register reg, Address adr); 965 966 void locked_cmpxchgptr(Register reg, AddressLiteral adr, Register rscratch = noreg); 967 968 void imulptr(Register dst, Register src) { LP64_ONLY(imulq(dst, src)) NOT_LP64(imull(dst, src)); } 969 void imulptr(Register dst, Register src, int imm32) { LP64_ONLY(imulq(dst, src, imm32)) NOT_LP64(imull(dst, src, imm32)); } 970 971 972 void negptr(Register dst) { LP64_ONLY(negq(dst)) NOT_LP64(negl(dst)); } 973 974 void notptr(Register dst) { LP64_ONLY(notq(dst)) NOT_LP64(notl(dst)); } 975 976 void shlptr(Register dst, int32_t shift); 977 void shlptr(Register dst) { LP64_ONLY(shlq(dst)) NOT_LP64(shll(dst)); } 978 979 void shrptr(Register dst, int32_t shift); 980 void shrptr(Register dst) { LP64_ONLY(shrq(dst)) NOT_LP64(shrl(dst)); } 981 982 void sarptr(Register dst) { LP64_ONLY(sarq(dst)) NOT_LP64(sarl(dst)); } 983 void sarptr(Register dst, int32_t src) { LP64_ONLY(sarq(dst, src)) NOT_LP64(sarl(dst, src)); } 984 985 void subptr(Address dst, int32_t src) { LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); } 986 987 void subptr(Register dst, Address src) { LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); } 988 void subptr(Register dst, int32_t src); 989 // Force generation of a 4 byte immediate value even if it fits into 8bit 990 void subptr_imm32(Register dst, int32_t src); 991 void subptr(Register dst, Register src); 992 void subptr(Register dst, RegisterOrConstant src) { 993 if (src.is_constant()) subptr(dst, (int) src.as_constant()); 994 else subptr(dst, src.as_register()); 995 } 996 997 void sbbptr(Address dst, int32_t src) { LP64_ONLY(sbbq(dst, src)) NOT_LP64(sbbl(dst, src)); } 998 void sbbptr(Register dst, int32_t src) { LP64_ONLY(sbbq(dst, src)) NOT_LP64(sbbl(dst, src)); } 999 1000 void xchgptr(Register src1, Register src2) { LP64_ONLY(xchgq(src1, src2)) NOT_LP64(xchgl(src1, src2)) ; } 1001 void xchgptr(Register src1, Address src2) { LP64_ONLY(xchgq(src1, src2)) NOT_LP64(xchgl(src1, src2)) ; } 1002 1003 void xaddptr(Address src1, Register src2) { LP64_ONLY(xaddq(src1, src2)) NOT_LP64(xaddl(src1, src2)) ; } 1004 1005 1006 1007 // Helper functions for statistics gathering. 1008 // Conditionally (atomically, on MPs) increments passed counter address, preserving condition codes. 1009 void cond_inc32(Condition cond, AddressLiteral counter_addr, Register rscratch = noreg); 1010 // Unconditional atomic increment. 1011 void atomic_incl(Address counter_addr); 1012 void atomic_incl(AddressLiteral counter_addr, Register rscratch = noreg); 1013 #ifdef _LP64 1014 void atomic_incq(Address counter_addr); 1015 void atomic_incq(AddressLiteral counter_addr, Register rscratch = noreg); 1016 #endif 1017 void atomic_incptr(AddressLiteral counter_addr, Register rscratch = noreg) { LP64_ONLY(atomic_incq(counter_addr, rscratch)) NOT_LP64(atomic_incl(counter_addr, rscratch)) ; } 1018 void atomic_incptr(Address counter_addr) { LP64_ONLY(atomic_incq(counter_addr)) NOT_LP64(atomic_incl(counter_addr)) ; } 1019 1020 using Assembler::lea; 1021 void lea(Register dst, AddressLiteral adr); 1022 void lea(Address dst, AddressLiteral adr, Register rscratch); 1023 1024 void leal32(Register dst, Address src) { leal(dst, src); } 1025 1026 // Import other testl() methods from the parent class or else 1027 // they will be hidden by the following overriding declaration. 1028 using Assembler::testl; 1029 void testl(Address dst, int32_t imm32); 1030 void testl(Register dst, int32_t imm32); 1031 void testl(Register dst, AddressLiteral src); // requires reachable address 1032 using Assembler::testq; 1033 void testq(Address dst, int32_t imm32); 1034 void testq(Register dst, int32_t imm32); 1035 1036 void orptr(Register dst, Address src) { LP64_ONLY(orq(dst, src)) NOT_LP64(orl(dst, src)); } 1037 void orptr(Register dst, Register src) { LP64_ONLY(orq(dst, src)) NOT_LP64(orl(dst, src)); } 1038 void orptr(Register dst, int32_t src) { LP64_ONLY(orq(dst, src)) NOT_LP64(orl(dst, src)); } 1039 void orptr(Address dst, int32_t imm32) { LP64_ONLY(orq(dst, imm32)) NOT_LP64(orl(dst, imm32)); } 1040 1041 void testptr(Register src, int32_t imm32) { LP64_ONLY(testq(src, imm32)) NOT_LP64(testl(src, imm32)); } 1042 void testptr(Register src1, Address src2) { LP64_ONLY(testq(src1, src2)) NOT_LP64(testl(src1, src2)); } 1043 void testptr(Address src, int32_t imm32) { LP64_ONLY(testq(src, imm32)) NOT_LP64(testl(src, imm32)); } 1044 void testptr(Register src1, Register src2); 1045 1046 void xorptr(Register dst, Register src) { LP64_ONLY(xorq(dst, src)) NOT_LP64(xorl(dst, src)); } 1047 void xorptr(Register dst, Address src) { LP64_ONLY(xorq(dst, src)) NOT_LP64(xorl(dst, src)); } 1048 1049 // Calls 1050 1051 void call(Label& L, relocInfo::relocType rtype); 1052 void call(Register entry); 1053 void call(Address addr) { Assembler::call(addr); } 1054 1055 // NOTE: this call transfers to the effective address of entry NOT 1056 // the address contained by entry. This is because this is more natural 1057 // for jumps/calls. 1058 void call(AddressLiteral entry, Register rscratch = rax); 1059 1060 // Emit the CompiledIC call idiom 1061 void ic_call(address entry, jint method_index = 0); 1062 static int ic_check_size(); 1063 int ic_check(int end_alignment); 1064 1065 void emit_static_call_stub(); 1066 1067 // Jumps 1068 1069 // NOTE: these jumps transfer to the effective address of dst NOT 1070 // the address contained by dst. This is because this is more natural 1071 // for jumps/calls. 1072 void jump(AddressLiteral dst, Register rscratch = noreg); 1073 1074 void jump_cc(Condition cc, AddressLiteral dst, Register rscratch = noreg); 1075 1076 // 32bit can do a case table jump in one instruction but we no longer allow the base 1077 // to be installed in the Address class. This jump will transfer to the address 1078 // contained in the location described by entry (not the address of entry) 1079 void jump(ArrayAddress entry, Register rscratch); 1080 1081 // Adding more natural conditional jump instructions 1082 void ALWAYSINLINE jo(Label& L, bool maybe_short = true) { jcc(Assembler::overflow, L, maybe_short); } 1083 void ALWAYSINLINE jno(Label& L, bool maybe_short = true) { jcc(Assembler::noOverflow, L, maybe_short); } 1084 void ALWAYSINLINE js(Label& L, bool maybe_short = true) { jcc(Assembler::negative, L, maybe_short); } 1085 void ALWAYSINLINE jns(Label& L, bool maybe_short = true) { jcc(Assembler::positive, L, maybe_short); } 1086 void ALWAYSINLINE je(Label& L, bool maybe_short = true) { jcc(Assembler::equal, L, maybe_short); } 1087 void ALWAYSINLINE jz(Label& L, bool maybe_short = true) { jcc(Assembler::zero, L, maybe_short); } 1088 void ALWAYSINLINE jne(Label& L, bool maybe_short = true) { jcc(Assembler::notEqual, L, maybe_short); } 1089 void ALWAYSINLINE jnz(Label& L, bool maybe_short = true) { jcc(Assembler::notZero, L, maybe_short); } 1090 void ALWAYSINLINE jb(Label& L, bool maybe_short = true) { jcc(Assembler::below, L, maybe_short); } 1091 void ALWAYSINLINE jnae(Label& L, bool maybe_short = true) { jcc(Assembler::below, L, maybe_short); } 1092 void ALWAYSINLINE jc(Label& L, bool maybe_short = true) { jcc(Assembler::carrySet, L, maybe_short); } 1093 void ALWAYSINLINE jnb(Label& L, bool maybe_short = true) { jcc(Assembler::aboveEqual, L, maybe_short); } 1094 void ALWAYSINLINE jae(Label& L, bool maybe_short = true) { jcc(Assembler::aboveEqual, L, maybe_short); } 1095 void ALWAYSINLINE jnc(Label& L, bool maybe_short = true) { jcc(Assembler::carryClear, L, maybe_short); } 1096 void ALWAYSINLINE jbe(Label& L, bool maybe_short = true) { jcc(Assembler::belowEqual, L, maybe_short); } 1097 void ALWAYSINLINE jna(Label& L, bool maybe_short = true) { jcc(Assembler::belowEqual, L, maybe_short); } 1098 void ALWAYSINLINE ja(Label& L, bool maybe_short = true) { jcc(Assembler::above, L, maybe_short); } 1099 void ALWAYSINLINE jnbe(Label& L, bool maybe_short = true) { jcc(Assembler::above, L, maybe_short); } 1100 void ALWAYSINLINE jl(Label& L, bool maybe_short = true) { jcc(Assembler::less, L, maybe_short); } 1101 void ALWAYSINLINE jnge(Label& L, bool maybe_short = true) { jcc(Assembler::less, L, maybe_short); } 1102 void ALWAYSINLINE jge(Label& L, bool maybe_short = true) { jcc(Assembler::greaterEqual, L, maybe_short); } 1103 void ALWAYSINLINE jnl(Label& L, bool maybe_short = true) { jcc(Assembler::greaterEqual, L, maybe_short); } 1104 void ALWAYSINLINE jle(Label& L, bool maybe_short = true) { jcc(Assembler::lessEqual, L, maybe_short); } 1105 void ALWAYSINLINE jng(Label& L, bool maybe_short = true) { jcc(Assembler::lessEqual, L, maybe_short); } 1106 void ALWAYSINLINE jg(Label& L, bool maybe_short = true) { jcc(Assembler::greater, L, maybe_short); } 1107 void ALWAYSINLINE jnle(Label& L, bool maybe_short = true) { jcc(Assembler::greater, L, maybe_short); } 1108 void ALWAYSINLINE jp(Label& L, bool maybe_short = true) { jcc(Assembler::parity, L, maybe_short); } 1109 void ALWAYSINLINE jpe(Label& L, bool maybe_short = true) { jcc(Assembler::parity, L, maybe_short); } 1110 void ALWAYSINLINE jnp(Label& L, bool maybe_short = true) { jcc(Assembler::noParity, L, maybe_short); } 1111 void ALWAYSINLINE jpo(Label& L, bool maybe_short = true) { jcc(Assembler::noParity, L, maybe_short); } 1112 // * No condition for this * void ALWAYSINLINE jcxz(Label& L, bool maybe_short = true) { jcc(Assembler::cxz, L, maybe_short); } 1113 // * No condition for this * void ALWAYSINLINE jecxz(Label& L, bool maybe_short = true) { jcc(Assembler::cxz, L, maybe_short); } 1114 1115 // Short versions of the above 1116 void ALWAYSINLINE jo_b(Label& L) { jccb(Assembler::overflow, L); } 1117 void ALWAYSINLINE jno_b(Label& L) { jccb(Assembler::noOverflow, L); } 1118 void ALWAYSINLINE js_b(Label& L) { jccb(Assembler::negative, L); } 1119 void ALWAYSINLINE jns_b(Label& L) { jccb(Assembler::positive, L); } 1120 void ALWAYSINLINE je_b(Label& L) { jccb(Assembler::equal, L); } 1121 void ALWAYSINLINE jz_b(Label& L) { jccb(Assembler::zero, L); } 1122 void ALWAYSINLINE jne_b(Label& L) { jccb(Assembler::notEqual, L); } 1123 void ALWAYSINLINE jnz_b(Label& L) { jccb(Assembler::notZero, L); } 1124 void ALWAYSINLINE jb_b(Label& L) { jccb(Assembler::below, L); } 1125 void ALWAYSINLINE jnae_b(Label& L) { jccb(Assembler::below, L); } 1126 void ALWAYSINLINE jc_b(Label& L) { jccb(Assembler::carrySet, L); } 1127 void ALWAYSINLINE jnb_b(Label& L) { jccb(Assembler::aboveEqual, L); } 1128 void ALWAYSINLINE jae_b(Label& L) { jccb(Assembler::aboveEqual, L); } 1129 void ALWAYSINLINE jnc_b(Label& L) { jccb(Assembler::carryClear, L); } 1130 void ALWAYSINLINE jbe_b(Label& L) { jccb(Assembler::belowEqual, L); } 1131 void ALWAYSINLINE jna_b(Label& L) { jccb(Assembler::belowEqual, L); } 1132 void ALWAYSINLINE ja_b(Label& L) { jccb(Assembler::above, L); } 1133 void ALWAYSINLINE jnbe_b(Label& L) { jccb(Assembler::above, L); } 1134 void ALWAYSINLINE jl_b(Label& L) { jccb(Assembler::less, L); } 1135 void ALWAYSINLINE jnge_b(Label& L) { jccb(Assembler::less, L); } 1136 void ALWAYSINLINE jge_b(Label& L) { jccb(Assembler::greaterEqual, L); } 1137 void ALWAYSINLINE jnl_b(Label& L) { jccb(Assembler::greaterEqual, L); } 1138 void ALWAYSINLINE jle_b(Label& L) { jccb(Assembler::lessEqual, L); } 1139 void ALWAYSINLINE jng_b(Label& L) { jccb(Assembler::lessEqual, L); } 1140 void ALWAYSINLINE jg_b(Label& L) { jccb(Assembler::greater, L); } 1141 void ALWAYSINLINE jnle_b(Label& L) { jccb(Assembler::greater, L); } 1142 void ALWAYSINLINE jp_b(Label& L) { jccb(Assembler::parity, L); } 1143 void ALWAYSINLINE jpe_b(Label& L) { jccb(Assembler::parity, L); } 1144 void ALWAYSINLINE jnp_b(Label& L) { jccb(Assembler::noParity, L); } 1145 void ALWAYSINLINE jpo_b(Label& L) { jccb(Assembler::noParity, L); } 1146 // * No condition for this * void ALWAYSINLINE jcxz_b(Label& L) { jccb(Assembler::cxz, L); } 1147 // * No condition for this * void ALWAYSINLINE jecxz_b(Label& L) { jccb(Assembler::cxz, L); } 1148 1149 // Floating 1150 1151 void push_f(XMMRegister r); 1152 void pop_f(XMMRegister r); 1153 void push_d(XMMRegister r); 1154 void pop_d(XMMRegister r); 1155 1156 void andpd(XMMRegister dst, XMMRegister src) { Assembler::andpd(dst, src); } 1157 void andpd(XMMRegister dst, Address src) { Assembler::andpd(dst, src); } 1158 void andpd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1159 1160 void andps(XMMRegister dst, XMMRegister src) { Assembler::andps(dst, src); } 1161 void andps(XMMRegister dst, Address src) { Assembler::andps(dst, src); } 1162 void andps(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1163 1164 void comiss(XMMRegister dst, XMMRegister src) { Assembler::comiss(dst, src); } 1165 void comiss(XMMRegister dst, Address src) { Assembler::comiss(dst, src); } 1166 void comiss(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1167 1168 void comisd(XMMRegister dst, XMMRegister src) { Assembler::comisd(dst, src); } 1169 void comisd(XMMRegister dst, Address src) { Assembler::comisd(dst, src); } 1170 void comisd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1171 1172 #ifndef _LP64 1173 void fadd_s(Address src) { Assembler::fadd_s(src); } 1174 void fadd_s(AddressLiteral src) { Assembler::fadd_s(as_Address(src)); } 1175 1176 void fldcw(Address src) { Assembler::fldcw(src); } 1177 void fldcw(AddressLiteral src); 1178 1179 void fld_s(int index) { Assembler::fld_s(index); } 1180 void fld_s(Address src) { Assembler::fld_s(src); } 1181 void fld_s(AddressLiteral src); 1182 1183 void fld_d(Address src) { Assembler::fld_d(src); } 1184 void fld_d(AddressLiteral src); 1185 1186 void fld_x(Address src) { Assembler::fld_x(src); } 1187 void fld_x(AddressLiteral src) { Assembler::fld_x(as_Address(src)); } 1188 1189 void fmul_s(Address src) { Assembler::fmul_s(src); } 1190 void fmul_s(AddressLiteral src) { Assembler::fmul_s(as_Address(src)); } 1191 #endif // !_LP64 1192 1193 void cmp32_mxcsr_std(Address mxcsr_save, Register tmp, Register rscratch = noreg); 1194 void ldmxcsr(Address src) { Assembler::ldmxcsr(src); } 1195 void ldmxcsr(AddressLiteral src, Register rscratch = noreg); 1196 1197 #ifdef _LP64 1198 private: 1199 void sha256_AVX2_one_round_compute( 1200 Register reg_old_h, 1201 Register reg_a, 1202 Register reg_b, 1203 Register reg_c, 1204 Register reg_d, 1205 Register reg_e, 1206 Register reg_f, 1207 Register reg_g, 1208 Register reg_h, 1209 int iter); 1210 void sha256_AVX2_four_rounds_compute_first(int start); 1211 void sha256_AVX2_four_rounds_compute_last(int start); 1212 void sha256_AVX2_one_round_and_sched( 1213 XMMRegister xmm_0, /* == ymm4 on 0, 1, 2, 3 iterations, then rotate 4 registers left on 4, 8, 12 iterations */ 1214 XMMRegister xmm_1, /* ymm5 */ /* full cycle is 16 iterations */ 1215 XMMRegister xmm_2, /* ymm6 */ 1216 XMMRegister xmm_3, /* ymm7 */ 1217 Register reg_a, /* == eax on 0 iteration, then rotate 8 register right on each next iteration */ 1218 Register reg_b, /* ebx */ /* full cycle is 8 iterations */ 1219 Register reg_c, /* edi */ 1220 Register reg_d, /* esi */ 1221 Register reg_e, /* r8d */ 1222 Register reg_f, /* r9d */ 1223 Register reg_g, /* r10d */ 1224 Register reg_h, /* r11d */ 1225 int iter); 1226 1227 void addm(int disp, Register r1, Register r2); 1228 1229 void sha512_AVX2_one_round_compute(Register old_h, Register a, Register b, Register c, Register d, 1230 Register e, Register f, Register g, Register h, int iteration); 1231 1232 void sha512_AVX2_one_round_and_schedule(XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, 1233 Register a, Register b, Register c, Register d, Register e, Register f, 1234 Register g, Register h, int iteration); 1235 1236 void addmq(int disp, Register r1, Register r2); 1237 public: 1238 void sha256_AVX2(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0, 1239 XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, 1240 Register buf, Register state, Register ofs, Register limit, Register rsp, 1241 bool multi_block, XMMRegister shuf_mask); 1242 void sha512_AVX2(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0, 1243 XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, 1244 Register buf, Register state, Register ofs, Register limit, Register rsp, bool multi_block, 1245 XMMRegister shuf_mask); 1246 void sha512_update_ni_x1(Register arg_hash, Register arg_msg, Register ofs, Register limit, bool multi_block); 1247 #endif // _LP64 1248 1249 void fast_md5(Register buf, Address state, Address ofs, Address limit, 1250 bool multi_block); 1251 1252 void fast_sha1(XMMRegister abcd, XMMRegister e0, XMMRegister e1, XMMRegister msg0, 1253 XMMRegister msg1, XMMRegister msg2, XMMRegister msg3, XMMRegister shuf_mask, 1254 Register buf, Register state, Register ofs, Register limit, Register rsp, 1255 bool multi_block); 1256 1257 #ifdef _LP64 1258 void fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0, 1259 XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, 1260 Register buf, Register state, Register ofs, Register limit, Register rsp, 1261 bool multi_block, XMMRegister shuf_mask); 1262 #else 1263 void fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0, 1264 XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, 1265 Register buf, Register state, Register ofs, Register limit, Register rsp, 1266 bool multi_block); 1267 #endif 1268 1269 void fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, 1270 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, 1271 Register rax, Register rcx, Register rdx, Register tmp); 1272 1273 #ifndef _LP64 1274 private: 1275 // Initialized in macroAssembler_x86_constants.cpp 1276 static address ONES; 1277 static address L_2IL0FLOATPACKET_0; 1278 static address PI4_INV; 1279 static address PI4X3; 1280 static address PI4X4; 1281 1282 public: 1283 void fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, 1284 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, 1285 Register rax, Register rcx, Register rdx, Register tmp1); 1286 1287 void fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, 1288 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, 1289 Register rax, Register rcx, Register rdx, Register tmp); 1290 1291 void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, 1292 XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx, 1293 Register rdx, Register tmp); 1294 1295 void fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, 1296 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, 1297 Register rax, Register rbx, Register rdx); 1298 1299 void fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, 1300 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, 1301 Register rax, Register rcx, Register rdx, Register tmp); 1302 1303 void libm_sincos_huge(XMMRegister xmm0, XMMRegister xmm1, Register eax, Register ecx, 1304 Register edx, Register ebx, Register esi, Register edi, 1305 Register ebp, Register esp); 1306 1307 void libm_reduce_pi04l(Register eax, Register ecx, Register edx, Register ebx, 1308 Register esi, Register edi, Register ebp, Register esp); 1309 1310 void libm_tancot_huge(XMMRegister xmm0, XMMRegister xmm1, Register eax, Register ecx, 1311 Register edx, Register ebx, Register esi, Register edi, 1312 Register ebp, Register esp); 1313 1314 void fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, 1315 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, 1316 Register rax, Register rcx, Register rdx, Register tmp); 1317 #endif // !_LP64 1318 1319 private: 1320 1321 // these are private because users should be doing movflt/movdbl 1322 1323 void movss(Address dst, XMMRegister src) { Assembler::movss(dst, src); } 1324 void movss(XMMRegister dst, XMMRegister src) { Assembler::movss(dst, src); } 1325 void movss(XMMRegister dst, Address src) { Assembler::movss(dst, src); } 1326 void movss(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1327 1328 void movlpd(XMMRegister dst, Address src) {Assembler::movlpd(dst, src); } 1329 void movlpd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1330 1331 public: 1332 1333 void addsd(XMMRegister dst, XMMRegister src) { Assembler::addsd(dst, src); } 1334 void addsd(XMMRegister dst, Address src) { Assembler::addsd(dst, src); } 1335 void addsd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1336 1337 void addss(XMMRegister dst, XMMRegister src) { Assembler::addss(dst, src); } 1338 void addss(XMMRegister dst, Address src) { Assembler::addss(dst, src); } 1339 void addss(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1340 1341 void addpd(XMMRegister dst, XMMRegister src) { Assembler::addpd(dst, src); } 1342 void addpd(XMMRegister dst, Address src) { Assembler::addpd(dst, src); } 1343 void addpd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1344 1345 using Assembler::vbroadcasti128; 1346 void vbroadcasti128(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = noreg); 1347 1348 using Assembler::vbroadcastsd; 1349 void vbroadcastsd(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = noreg); 1350 1351 using Assembler::vbroadcastss; 1352 void vbroadcastss(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = noreg); 1353 1354 // Vector float blend 1355 void vblendvps(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len, bool compute_mask = true, XMMRegister scratch = xnoreg); 1356 void vblendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len, bool compute_mask = true, XMMRegister scratch = xnoreg); 1357 1358 void divsd(XMMRegister dst, XMMRegister src) { Assembler::divsd(dst, src); } 1359 void divsd(XMMRegister dst, Address src) { Assembler::divsd(dst, src); } 1360 void divsd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1361 1362 void divss(XMMRegister dst, XMMRegister src) { Assembler::divss(dst, src); } 1363 void divss(XMMRegister dst, Address src) { Assembler::divss(dst, src); } 1364 void divss(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1365 1366 // Move Unaligned Double Quadword 1367 void movdqu(Address dst, XMMRegister src); 1368 void movdqu(XMMRegister dst, XMMRegister src); 1369 void movdqu(XMMRegister dst, Address src); 1370 void movdqu(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1371 1372 void kmovwl(Register dst, KRegister src) { Assembler::kmovwl(dst, src); } 1373 void kmovwl(Address dst, KRegister src) { Assembler::kmovwl(dst, src); } 1374 void kmovwl(KRegister dst, KRegister src) { Assembler::kmovwl(dst, src); } 1375 void kmovwl(KRegister dst, Register src) { Assembler::kmovwl(dst, src); } 1376 void kmovwl(KRegister dst, Address src) { Assembler::kmovwl(dst, src); } 1377 void kmovwl(KRegister dst, AddressLiteral src, Register rscratch = noreg); 1378 1379 void kmovql(KRegister dst, KRegister src) { Assembler::kmovql(dst, src); } 1380 void kmovql(KRegister dst, Register src) { Assembler::kmovql(dst, src); } 1381 void kmovql(Register dst, KRegister src) { Assembler::kmovql(dst, src); } 1382 void kmovql(KRegister dst, Address src) { Assembler::kmovql(dst, src); } 1383 void kmovql(Address dst, KRegister src) { Assembler::kmovql(dst, src); } 1384 void kmovql(KRegister dst, AddressLiteral src, Register rscratch = noreg); 1385 1386 // Safe move operation, lowers down to 16bit moves for targets supporting 1387 // AVX512F feature and 64bit moves for targets supporting AVX512BW feature. 1388 void kmov(Address dst, KRegister src); 1389 void kmov(KRegister dst, Address src); 1390 void kmov(KRegister dst, KRegister src); 1391 void kmov(Register dst, KRegister src); 1392 void kmov(KRegister dst, Register src); 1393 1394 using Assembler::movddup; 1395 void movddup(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1396 1397 using Assembler::vmovddup; 1398 void vmovddup(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = noreg); 1399 1400 // AVX Unaligned forms 1401 void vmovdqu(Address dst, XMMRegister src); 1402 void vmovdqu(XMMRegister dst, Address src); 1403 void vmovdqu(XMMRegister dst, XMMRegister src); 1404 void vmovdqu(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1405 void vmovdqu(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = noreg); 1406 void vmovdqu(XMMRegister dst, XMMRegister src, int vector_len); 1407 void vmovdqu(XMMRegister dst, Address src, int vector_len); 1408 void vmovdqu(Address dst, XMMRegister src, int vector_len); 1409 1410 // AVX Aligned forms 1411 using Assembler::vmovdqa; 1412 void vmovdqa(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1413 void vmovdqa(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = noreg); 1414 1415 // AVX512 Unaligned 1416 void evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, bool merge, int vector_len); 1417 void evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address src, bool merge, int vector_len); 1418 void evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, XMMRegister src, bool merge, int vector_len); 1419 1420 void evmovdqub(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::evmovdqub(dst, src, vector_len); } 1421 void evmovdqub(XMMRegister dst, Address src, int vector_len) { Assembler::evmovdqub(dst, src, vector_len); } 1422 1423 void evmovdqub(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { 1424 if (dst->encoding() != src->encoding() || mask != k0) { 1425 Assembler::evmovdqub(dst, mask, src, merge, vector_len); 1426 } 1427 } 1428 void evmovdqub(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqub(dst, mask, src, merge, vector_len); } 1429 void evmovdqub(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdqub(dst, mask, src, merge, vector_len); } 1430 void evmovdqub(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register rscratch = noreg); 1431 1432 void evmovdquw(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::evmovdquw(dst, src, vector_len); } 1433 void evmovdquw(Address dst, XMMRegister src, int vector_len) { Assembler::evmovdquw(dst, src, vector_len); } 1434 void evmovdquw(XMMRegister dst, Address src, int vector_len) { Assembler::evmovdquw(dst, src, vector_len); } 1435 1436 void evmovdquw(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { 1437 if (dst->encoding() != src->encoding() || mask != k0) { 1438 Assembler::evmovdquw(dst, mask, src, merge, vector_len); 1439 } 1440 } 1441 void evmovdquw(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdquw(dst, mask, src, merge, vector_len); } 1442 void evmovdquw(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdquw(dst, mask, src, merge, vector_len); } 1443 void evmovdquw(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register rscratch = noreg); 1444 1445 void evmovdqul(XMMRegister dst, XMMRegister src, int vector_len) { 1446 if (dst->encoding() != src->encoding()) { 1447 Assembler::evmovdqul(dst, src, vector_len); 1448 } 1449 } 1450 void evmovdqul(Address dst, XMMRegister src, int vector_len) { Assembler::evmovdqul(dst, src, vector_len); } 1451 void evmovdqul(XMMRegister dst, Address src, int vector_len) { Assembler::evmovdqul(dst, src, vector_len); } 1452 1453 void evmovdqul(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { 1454 if (dst->encoding() != src->encoding() || mask != k0) { 1455 Assembler::evmovdqul(dst, mask, src, merge, vector_len); 1456 } 1457 } 1458 void evmovdqul(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqul(dst, mask, src, merge, vector_len); } 1459 void evmovdqul(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdqul(dst, mask, src, merge, vector_len); } 1460 void evmovdqul(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register rscratch = noreg); 1461 1462 void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) { 1463 if (dst->encoding() != src->encoding()) { 1464 Assembler::evmovdquq(dst, src, vector_len); 1465 } 1466 } 1467 void evmovdquq(XMMRegister dst, Address src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); } 1468 void evmovdquq(Address dst, XMMRegister src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); } 1469 void evmovdquq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = noreg); 1470 void evmovdqaq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = noreg); 1471 1472 void evmovdquq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { 1473 if (dst->encoding() != src->encoding() || mask != k0) { 1474 Assembler::evmovdquq(dst, mask, src, merge, vector_len); 1475 } 1476 } 1477 void evmovdquq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdquq(dst, mask, src, merge, vector_len); } 1478 void evmovdquq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdquq(dst, mask, src, merge, vector_len); } 1479 void evmovdquq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register rscratch = noreg); 1480 void evmovdqaq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register rscratch = noreg); 1481 1482 // Move Aligned Double Quadword 1483 void movdqa(XMMRegister dst, XMMRegister src) { Assembler::movdqa(dst, src); } 1484 void movdqa(XMMRegister dst, Address src) { Assembler::movdqa(dst, src); } 1485 void movdqa(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1486 1487 void movsd(Address dst, XMMRegister src) { Assembler::movsd(dst, src); } 1488 void movsd(XMMRegister dst, XMMRegister src) { Assembler::movsd(dst, src); } 1489 void movsd(XMMRegister dst, Address src) { Assembler::movsd(dst, src); } 1490 void movsd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1491 1492 void mulpd(XMMRegister dst, XMMRegister src) { Assembler::mulpd(dst, src); } 1493 void mulpd(XMMRegister dst, Address src) { Assembler::mulpd(dst, src); } 1494 void mulpd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1495 1496 void mulsd(XMMRegister dst, XMMRegister src) { Assembler::mulsd(dst, src); } 1497 void mulsd(XMMRegister dst, Address src) { Assembler::mulsd(dst, src); } 1498 void mulsd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1499 1500 void mulss(XMMRegister dst, XMMRegister src) { Assembler::mulss(dst, src); } 1501 void mulss(XMMRegister dst, Address src) { Assembler::mulss(dst, src); } 1502 void mulss(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1503 1504 // Carry-Less Multiplication Quadword 1505 void pclmulldq(XMMRegister dst, XMMRegister src) { 1506 // 0x00 - multiply lower 64 bits [0:63] 1507 Assembler::pclmulqdq(dst, src, 0x00); 1508 } 1509 void pclmulhdq(XMMRegister dst, XMMRegister src) { 1510 // 0x11 - multiply upper 64 bits [64:127] 1511 Assembler::pclmulqdq(dst, src, 0x11); 1512 } 1513 1514 void pcmpeqb(XMMRegister dst, XMMRegister src); 1515 void pcmpeqw(XMMRegister dst, XMMRegister src); 1516 1517 void pcmpestri(XMMRegister dst, Address src, int imm8); 1518 void pcmpestri(XMMRegister dst, XMMRegister src, int imm8); 1519 1520 void pmovzxbw(XMMRegister dst, XMMRegister src); 1521 void pmovzxbw(XMMRegister dst, Address src); 1522 1523 void pmovmskb(Register dst, XMMRegister src); 1524 1525 void ptest(XMMRegister dst, XMMRegister src); 1526 1527 void roundsd(XMMRegister dst, XMMRegister src, int32_t rmode) { Assembler::roundsd(dst, src, rmode); } 1528 void roundsd(XMMRegister dst, Address src, int32_t rmode) { Assembler::roundsd(dst, src, rmode); } 1529 void roundsd(XMMRegister dst, AddressLiteral src, int32_t rmode, Register rscratch = noreg); 1530 1531 void sqrtss(XMMRegister dst, XMMRegister src) { Assembler::sqrtss(dst, src); } 1532 void sqrtss(XMMRegister dst, Address src) { Assembler::sqrtss(dst, src); } 1533 void sqrtss(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1534 1535 void subsd(XMMRegister dst, XMMRegister src) { Assembler::subsd(dst, src); } 1536 void subsd(XMMRegister dst, Address src) { Assembler::subsd(dst, src); } 1537 void subsd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1538 1539 void subss(XMMRegister dst, XMMRegister src) { Assembler::subss(dst, src); } 1540 void subss(XMMRegister dst, Address src) { Assembler::subss(dst, src); } 1541 void subss(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1542 1543 void ucomiss(XMMRegister dst, XMMRegister src) { Assembler::ucomiss(dst, src); } 1544 void ucomiss(XMMRegister dst, Address src) { Assembler::ucomiss(dst, src); } 1545 void ucomiss(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1546 1547 void ucomisd(XMMRegister dst, XMMRegister src) { Assembler::ucomisd(dst, src); } 1548 void ucomisd(XMMRegister dst, Address src) { Assembler::ucomisd(dst, src); } 1549 void ucomisd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1550 1551 // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values 1552 void xorpd(XMMRegister dst, XMMRegister src); 1553 void xorpd(XMMRegister dst, Address src) { Assembler::xorpd(dst, src); } 1554 void xorpd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1555 1556 // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values 1557 void xorps(XMMRegister dst, XMMRegister src); 1558 void xorps(XMMRegister dst, Address src) { Assembler::xorps(dst, src); } 1559 void xorps(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1560 1561 // Shuffle Bytes 1562 void pshufb(XMMRegister dst, XMMRegister src) { Assembler::pshufb(dst, src); } 1563 void pshufb(XMMRegister dst, Address src) { Assembler::pshufb(dst, src); } 1564 void pshufb(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1565 // AVX 3-operands instructions 1566 1567 void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddsd(dst, nds, src); } 1568 void vaddsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddsd(dst, nds, src); } 1569 void vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src, Register rscratch = noreg); 1570 1571 void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddss(dst, nds, src); } 1572 void vaddss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddss(dst, nds, src); } 1573 void vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src, Register rscratch = noreg); 1574 1575 void vabsss(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len, Register rscratch = noreg); 1576 void vabssd(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len, Register rscratch = noreg); 1577 1578 void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); 1579 void vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len); 1580 void vpaddb(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg); 1581 1582 void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); 1583 void vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len); 1584 1585 void vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vpaddd(dst, nds, src, vector_len); } 1586 void vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vpaddd(dst, nds, src, vector_len); } 1587 void vpaddd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg); 1588 1589 void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vpand(dst, nds, src, vector_len); } 1590 void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vpand(dst, nds, src, vector_len); } 1591 void vpand(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg); 1592 1593 using Assembler::vpbroadcastd; 1594 void vpbroadcastd(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = noreg); 1595 1596 using Assembler::vpbroadcastq; 1597 void vpbroadcastq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = noreg); 1598 1599 void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); 1600 void vpcmpeqb(XMMRegister dst, XMMRegister src1, Address src2, int vector_len); 1601 1602 void vpcmpeqw(XMMRegister dst, XMMRegister nds, Address src, int vector_len); 1603 void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); 1604 void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg); 1605 1606 // Vector compares 1607 void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, int comparison, bool is_signed, int vector_len) { 1608 Assembler::evpcmpd(kdst, mask, nds, src, comparison, is_signed, vector_len); 1609 } 1610 void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, int comparison, bool is_signed, int vector_len, Register rscratch = noreg); 1611 1612 void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, int comparison, bool is_signed, int vector_len) { 1613 Assembler::evpcmpq(kdst, mask, nds, src, comparison, is_signed, vector_len); 1614 } 1615 void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, int comparison, bool is_signed, int vector_len, Register rscratch = noreg); 1616 1617 void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, int comparison, bool is_signed, int vector_len) { 1618 Assembler::evpcmpb(kdst, mask, nds, src, comparison, is_signed, vector_len); 1619 } 1620 void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, int comparison, bool is_signed, int vector_len, Register rscratch = noreg); 1621 1622 void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, int comparison, bool is_signed, int vector_len) { 1623 Assembler::evpcmpw(kdst, mask, nds, src, comparison, is_signed, vector_len); 1624 } 1625 void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, int comparison, bool is_signed, int vector_len, Register rscratch = noreg); 1626 1627 void evpbroadcast(BasicType type, XMMRegister dst, Register src, int vector_len); 1628 1629 // Emit comparison instruction for the specified comparison predicate. 1630 void vpcmpCCW(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister xtmp, ComparisonPredicate cond, Width width, int vector_len); 1631 void vpcmpCC(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, Width width, int vector_len); 1632 1633 void vpmovzxbw(XMMRegister dst, Address src, int vector_len); 1634 void vpmovzxbw(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::vpmovzxbw(dst, src, vector_len); } 1635 1636 void vpmovmskb(Register dst, XMMRegister src, int vector_len = Assembler::AVX_256bit); 1637 1638 void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); 1639 void vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len); 1640 1641 void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vpmulld(dst, nds, src, vector_len); } 1642 void vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vpmulld(dst, nds, src, vector_len); } 1643 void vpmulld(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg); 1644 1645 void vpmuldq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vpmuldq(dst, nds, src, vector_len); } 1646 1647 void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); 1648 void vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len); 1649 1650 void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); 1651 void vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len); 1652 1653 void vpsraw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len); 1654 void vpsraw(XMMRegister dst, XMMRegister nds, int shift, int vector_len); 1655 1656 void evpsrad(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len); 1657 void evpsrad(XMMRegister dst, XMMRegister nds, int shift, int vector_len); 1658 1659 void evpsraq(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len); 1660 void evpsraq(XMMRegister dst, XMMRegister nds, int shift, int vector_len); 1661 1662 using Assembler::evpsllw; 1663 void evpsllw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { 1664 if (!is_varshift) { 1665 Assembler::evpsllw(dst, mask, nds, src, merge, vector_len); 1666 } else { 1667 Assembler::evpsllvw(dst, mask, nds, src, merge, vector_len); 1668 } 1669 } 1670 void evpslld(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { 1671 if (!is_varshift) { 1672 Assembler::evpslld(dst, mask, nds, src, merge, vector_len); 1673 } else { 1674 Assembler::evpsllvd(dst, mask, nds, src, merge, vector_len); 1675 } 1676 } 1677 void evpsllq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { 1678 if (!is_varshift) { 1679 Assembler::evpsllq(dst, mask, nds, src, merge, vector_len); 1680 } else { 1681 Assembler::evpsllvq(dst, mask, nds, src, merge, vector_len); 1682 } 1683 } 1684 void evpsrlw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { 1685 if (!is_varshift) { 1686 Assembler::evpsrlw(dst, mask, nds, src, merge, vector_len); 1687 } else { 1688 Assembler::evpsrlvw(dst, mask, nds, src, merge, vector_len); 1689 } 1690 } 1691 void evpsrld(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { 1692 if (!is_varshift) { 1693 Assembler::evpsrld(dst, mask, nds, src, merge, vector_len); 1694 } else { 1695 Assembler::evpsrlvd(dst, mask, nds, src, merge, vector_len); 1696 } 1697 } 1698 1699 using Assembler::evpsrlq; 1700 void evpsrlq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { 1701 if (!is_varshift) { 1702 Assembler::evpsrlq(dst, mask, nds, src, merge, vector_len); 1703 } else { 1704 Assembler::evpsrlvq(dst, mask, nds, src, merge, vector_len); 1705 } 1706 } 1707 using Assembler::evpsraw; 1708 void evpsraw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { 1709 if (!is_varshift) { 1710 Assembler::evpsraw(dst, mask, nds, src, merge, vector_len); 1711 } else { 1712 Assembler::evpsravw(dst, mask, nds, src, merge, vector_len); 1713 } 1714 } 1715 using Assembler::evpsrad; 1716 void evpsrad(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { 1717 if (!is_varshift) { 1718 Assembler::evpsrad(dst, mask, nds, src, merge, vector_len); 1719 } else { 1720 Assembler::evpsravd(dst, mask, nds, src, merge, vector_len); 1721 } 1722 } 1723 using Assembler::evpsraq; 1724 void evpsraq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { 1725 if (!is_varshift) { 1726 Assembler::evpsraq(dst, mask, nds, src, merge, vector_len); 1727 } else { 1728 Assembler::evpsravq(dst, mask, nds, src, merge, vector_len); 1729 } 1730 } 1731 1732 void evpmins(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); 1733 void evpmaxs(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); 1734 void evpmins(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); 1735 void evpmaxs(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); 1736 1737 void evpminu(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); 1738 void evpmaxu(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); 1739 void evpminu(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); 1740 void evpmaxu(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); 1741 1742 void vpsrlw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len); 1743 void vpsrlw(XMMRegister dst, XMMRegister nds, int shift, int vector_len); 1744 1745 void vpsllw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len); 1746 void vpsllw(XMMRegister dst, XMMRegister nds, int shift, int vector_len); 1747 1748 void vptest(XMMRegister dst, XMMRegister src); 1749 void vptest(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::vptest(dst, src, vector_len); } 1750 1751 void punpcklbw(XMMRegister dst, XMMRegister src); 1752 void punpcklbw(XMMRegister dst, Address src) { Assembler::punpcklbw(dst, src); } 1753 1754 void pshufd(XMMRegister dst, Address src, int mode); 1755 void pshufd(XMMRegister dst, XMMRegister src, int mode) { Assembler::pshufd(dst, src, mode); } 1756 1757 void pshuflw(XMMRegister dst, XMMRegister src, int mode); 1758 void pshuflw(XMMRegister dst, Address src, int mode) { Assembler::pshuflw(dst, src, mode); } 1759 1760 void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vandpd(dst, nds, src, vector_len); } 1761 void vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vandpd(dst, nds, src, vector_len); } 1762 void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg); 1763 1764 void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vandps(dst, nds, src, vector_len); } 1765 void vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vandps(dst, nds, src, vector_len); } 1766 void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg); 1767 1768 void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, AddressLiteral src, bool merge, int vector_len, Register rscratch = noreg); 1769 1770 void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivsd(dst, nds, src); } 1771 void vdivsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivsd(dst, nds, src); } 1772 void vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src, Register rscratch = noreg); 1773 1774 void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivss(dst, nds, src); } 1775 void vdivss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivss(dst, nds, src); } 1776 void vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src, Register rscratch = noreg); 1777 1778 void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulsd(dst, nds, src); } 1779 void vmulsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vmulsd(dst, nds, src); } 1780 void vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src, Register rscratch = noreg); 1781 1782 void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulss(dst, nds, src); } 1783 void vmulss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vmulss(dst, nds, src); } 1784 void vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src, Register rscratch = noreg); 1785 1786 void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubsd(dst, nds, src); } 1787 void vsubsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vsubsd(dst, nds, src); } 1788 void vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src, Register rscratch = noreg); 1789 1790 void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubss(dst, nds, src); } 1791 void vsubss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vsubss(dst, nds, src); } 1792 void vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src, Register rscratch = noreg); 1793 1794 void vnegatess(XMMRegister dst, XMMRegister nds, AddressLiteral src, Register rscratch = noreg); 1795 void vnegatesd(XMMRegister dst, XMMRegister nds, AddressLiteral src, Register rscratch = noreg); 1796 1797 // AVX Vector instructions 1798 1799 void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vxorpd(dst, nds, src, vector_len); } 1800 void vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vxorpd(dst, nds, src, vector_len); } 1801 void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg); 1802 1803 void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vxorps(dst, nds, src, vector_len); } 1804 void vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vxorps(dst, nds, src, vector_len); } 1805 void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg); 1806 1807 void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { 1808 if (UseAVX > 1 || (vector_len < 1)) // vpxor 256 bit is available only in AVX2 1809 Assembler::vpxor(dst, nds, src, vector_len); 1810 else 1811 Assembler::vxorpd(dst, nds, src, vector_len); 1812 } 1813 void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { 1814 if (UseAVX > 1 || (vector_len < 1)) // vpxor 256 bit is available only in AVX2 1815 Assembler::vpxor(dst, nds, src, vector_len); 1816 else 1817 Assembler::vxorpd(dst, nds, src, vector_len); 1818 } 1819 void vpxor(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg); 1820 1821 // Simple version for AVX2 256bit vectors 1822 void vpxor(XMMRegister dst, XMMRegister src) { 1823 assert(UseAVX >= 2, "Should be at least AVX2"); 1824 Assembler::vpxor(dst, dst, src, AVX_256bit); 1825 } 1826 void vpxor(XMMRegister dst, Address src) { 1827 assert(UseAVX >= 2, "Should be at least AVX2"); 1828 Assembler::vpxor(dst, dst, src, AVX_256bit); 1829 } 1830 1831 void vpermd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vpermd(dst, nds, src, vector_len); } 1832 void vpermd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg); 1833 1834 void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) { 1835 if (UseAVX > 2 && VM_Version::supports_avx512novl()) { 1836 Assembler::vinserti32x4(dst, nds, src, imm8); 1837 } else if (UseAVX > 1) { 1838 // vinserti128 is available only in AVX2 1839 Assembler::vinserti128(dst, nds, src, imm8); 1840 } else { 1841 Assembler::vinsertf128(dst, nds, src, imm8); 1842 } 1843 } 1844 1845 void vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) { 1846 if (UseAVX > 2 && VM_Version::supports_avx512novl()) { 1847 Assembler::vinserti32x4(dst, nds, src, imm8); 1848 } else if (UseAVX > 1) { 1849 // vinserti128 is available only in AVX2 1850 Assembler::vinserti128(dst, nds, src, imm8); 1851 } else { 1852 Assembler::vinsertf128(dst, nds, src, imm8); 1853 } 1854 } 1855 1856 void vextracti128(XMMRegister dst, XMMRegister src, uint8_t imm8) { 1857 if (UseAVX > 2 && VM_Version::supports_avx512novl()) { 1858 Assembler::vextracti32x4(dst, src, imm8); 1859 } else if (UseAVX > 1) { 1860 // vextracti128 is available only in AVX2 1861 Assembler::vextracti128(dst, src, imm8); 1862 } else { 1863 Assembler::vextractf128(dst, src, imm8); 1864 } 1865 } 1866 1867 void vextracti128(Address dst, XMMRegister src, uint8_t imm8) { 1868 if (UseAVX > 2 && VM_Version::supports_avx512novl()) { 1869 Assembler::vextracti32x4(dst, src, imm8); 1870 } else if (UseAVX > 1) { 1871 // vextracti128 is available only in AVX2 1872 Assembler::vextracti128(dst, src, imm8); 1873 } else { 1874 Assembler::vextractf128(dst, src, imm8); 1875 } 1876 } 1877 1878 // 128bit copy to/from high 128 bits of 256bit (YMM) vector registers 1879 void vinserti128_high(XMMRegister dst, XMMRegister src) { 1880 vinserti128(dst, dst, src, 1); 1881 } 1882 void vinserti128_high(XMMRegister dst, Address src) { 1883 vinserti128(dst, dst, src, 1); 1884 } 1885 void vextracti128_high(XMMRegister dst, XMMRegister src) { 1886 vextracti128(dst, src, 1); 1887 } 1888 void vextracti128_high(Address dst, XMMRegister src) { 1889 vextracti128(dst, src, 1); 1890 } 1891 1892 void vinsertf128_high(XMMRegister dst, XMMRegister src) { 1893 if (UseAVX > 2 && VM_Version::supports_avx512novl()) { 1894 Assembler::vinsertf32x4(dst, dst, src, 1); 1895 } else { 1896 Assembler::vinsertf128(dst, dst, src, 1); 1897 } 1898 } 1899 1900 void vinsertf128_high(XMMRegister dst, Address src) { 1901 if (UseAVX > 2 && VM_Version::supports_avx512novl()) { 1902 Assembler::vinsertf32x4(dst, dst, src, 1); 1903 } else { 1904 Assembler::vinsertf128(dst, dst, src, 1); 1905 } 1906 } 1907 1908 void vextractf128_high(XMMRegister dst, XMMRegister src) { 1909 if (UseAVX > 2 && VM_Version::supports_avx512novl()) { 1910 Assembler::vextractf32x4(dst, src, 1); 1911 } else { 1912 Assembler::vextractf128(dst, src, 1); 1913 } 1914 } 1915 1916 void vextractf128_high(Address dst, XMMRegister src) { 1917 if (UseAVX > 2 && VM_Version::supports_avx512novl()) { 1918 Assembler::vextractf32x4(dst, src, 1); 1919 } else { 1920 Assembler::vextractf128(dst, src, 1); 1921 } 1922 } 1923 1924 // 256bit copy to/from high 256 bits of 512bit (ZMM) vector registers 1925 void vinserti64x4_high(XMMRegister dst, XMMRegister src) { 1926 Assembler::vinserti64x4(dst, dst, src, 1); 1927 } 1928 void vinsertf64x4_high(XMMRegister dst, XMMRegister src) { 1929 Assembler::vinsertf64x4(dst, dst, src, 1); 1930 } 1931 void vextracti64x4_high(XMMRegister dst, XMMRegister src) { 1932 Assembler::vextracti64x4(dst, src, 1); 1933 } 1934 void vextractf64x4_high(XMMRegister dst, XMMRegister src) { 1935 Assembler::vextractf64x4(dst, src, 1); 1936 } 1937 void vextractf64x4_high(Address dst, XMMRegister src) { 1938 Assembler::vextractf64x4(dst, src, 1); 1939 } 1940 void vinsertf64x4_high(XMMRegister dst, Address src) { 1941 Assembler::vinsertf64x4(dst, dst, src, 1); 1942 } 1943 1944 // 128bit copy to/from low 128 bits of 256bit (YMM) vector registers 1945 void vinserti128_low(XMMRegister dst, XMMRegister src) { 1946 vinserti128(dst, dst, src, 0); 1947 } 1948 void vinserti128_low(XMMRegister dst, Address src) { 1949 vinserti128(dst, dst, src, 0); 1950 } 1951 void vextracti128_low(XMMRegister dst, XMMRegister src) { 1952 vextracti128(dst, src, 0); 1953 } 1954 void vextracti128_low(Address dst, XMMRegister src) { 1955 vextracti128(dst, src, 0); 1956 } 1957 1958 void vinsertf128_low(XMMRegister dst, XMMRegister src) { 1959 if (UseAVX > 2 && VM_Version::supports_avx512novl()) { 1960 Assembler::vinsertf32x4(dst, dst, src, 0); 1961 } else { 1962 Assembler::vinsertf128(dst, dst, src, 0); 1963 } 1964 } 1965 1966 void vinsertf128_low(XMMRegister dst, Address src) { 1967 if (UseAVX > 2 && VM_Version::supports_avx512novl()) { 1968 Assembler::vinsertf32x4(dst, dst, src, 0); 1969 } else { 1970 Assembler::vinsertf128(dst, dst, src, 0); 1971 } 1972 } 1973 1974 void vextractf128_low(XMMRegister dst, XMMRegister src) { 1975 if (UseAVX > 2 && VM_Version::supports_avx512novl()) { 1976 Assembler::vextractf32x4(dst, src, 0); 1977 } else { 1978 Assembler::vextractf128(dst, src, 0); 1979 } 1980 } 1981 1982 void vextractf128_low(Address dst, XMMRegister src) { 1983 if (UseAVX > 2 && VM_Version::supports_avx512novl()) { 1984 Assembler::vextractf32x4(dst, src, 0); 1985 } else { 1986 Assembler::vextractf128(dst, src, 0); 1987 } 1988 } 1989 1990 // 256bit copy to/from low 256 bits of 512bit (ZMM) vector registers 1991 void vinserti64x4_low(XMMRegister dst, XMMRegister src) { 1992 Assembler::vinserti64x4(dst, dst, src, 0); 1993 } 1994 void vinsertf64x4_low(XMMRegister dst, XMMRegister src) { 1995 Assembler::vinsertf64x4(dst, dst, src, 0); 1996 } 1997 void vextracti64x4_low(XMMRegister dst, XMMRegister src) { 1998 Assembler::vextracti64x4(dst, src, 0); 1999 } 2000 void vextractf64x4_low(XMMRegister dst, XMMRegister src) { 2001 Assembler::vextractf64x4(dst, src, 0); 2002 } 2003 void vextractf64x4_low(Address dst, XMMRegister src) { 2004 Assembler::vextractf64x4(dst, src, 0); 2005 } 2006 void vinsertf64x4_low(XMMRegister dst, Address src) { 2007 Assembler::vinsertf64x4(dst, dst, src, 0); 2008 } 2009 2010 // Carry-Less Multiplication Quadword 2011 void vpclmulldq(XMMRegister dst, XMMRegister nds, XMMRegister src) { 2012 // 0x00 - multiply lower 64 bits [0:63] 2013 Assembler::vpclmulqdq(dst, nds, src, 0x00); 2014 } 2015 void vpclmulhdq(XMMRegister dst, XMMRegister nds, XMMRegister src) { 2016 // 0x11 - multiply upper 64 bits [64:127] 2017 Assembler::vpclmulqdq(dst, nds, src, 0x11); 2018 } 2019 void vpclmullqhqdq(XMMRegister dst, XMMRegister nds, XMMRegister src) { 2020 // 0x10 - multiply nds[0:63] and src[64:127] 2021 Assembler::vpclmulqdq(dst, nds, src, 0x10); 2022 } 2023 void vpclmulhqlqdq(XMMRegister dst, XMMRegister nds, XMMRegister src) { 2024 //0x01 - multiply nds[64:127] and src[0:63] 2025 Assembler::vpclmulqdq(dst, nds, src, 0x01); 2026 } 2027 2028 void evpclmulldq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { 2029 // 0x00 - multiply lower 64 bits [0:63] 2030 Assembler::evpclmulqdq(dst, nds, src, 0x00, vector_len); 2031 } 2032 void evpclmulhdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { 2033 // 0x11 - multiply upper 64 bits [64:127] 2034 Assembler::evpclmulqdq(dst, nds, src, 0x11, vector_len); 2035 } 2036 2037 // AVX-512 mask operations. 2038 void kand(BasicType etype, KRegister dst, KRegister src1, KRegister src2); 2039 void kor(BasicType type, KRegister dst, KRegister src1, KRegister src2); 2040 void knot(uint masklen, KRegister dst, KRegister src, KRegister ktmp = knoreg, Register rtmp = noreg); 2041 void kxor(BasicType type, KRegister dst, KRegister src1, KRegister src2); 2042 void kortest(uint masklen, KRegister src1, KRegister src2); 2043 void ktest(uint masklen, KRegister src1, KRegister src2); 2044 2045 void evperm(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); 2046 void evperm(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); 2047 2048 void evor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); 2049 void evor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); 2050 2051 void evand(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); 2052 void evand(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); 2053 2054 void evxor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); 2055 void evxor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); 2056 2057 void evrold(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vlen_enc); 2058 void evrold(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vlen_enc); 2059 void evrord(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vlen_enc); 2060 void evrord(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vlen_enc); 2061 2062 using Assembler::evpandq; 2063 void evpandq(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg); 2064 2065 using Assembler::evpaddq; 2066 void evpaddq(XMMRegister dst, KRegister mask, XMMRegister nds, AddressLiteral src, bool merge, int vector_len, Register rscratch = noreg); 2067 2068 using Assembler::evporq; 2069 void evporq(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg); 2070 2071 using Assembler::vpshufb; 2072 void vpshufb(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg); 2073 2074 using Assembler::vpor; 2075 void vpor(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg); 2076 2077 using Assembler::vpternlogq; 2078 void vpternlogq(XMMRegister dst, int imm8, XMMRegister src2, AddressLiteral src3, int vector_len, Register rscratch = noreg); 2079 2080 void cmov32( Condition cc, Register dst, Address src); 2081 void cmov32( Condition cc, Register dst, Register src); 2082 2083 void cmov( Condition cc, Register dst, Register src) { cmovptr(cc, dst, src); } 2084 2085 void cmovptr(Condition cc, Register dst, Address src) { LP64_ONLY(cmovq(cc, dst, src)) NOT_LP64(cmov32(cc, dst, src)); } 2086 void cmovptr(Condition cc, Register dst, Register src) { LP64_ONLY(cmovq(cc, dst, src)) NOT_LP64(cmov32(cc, dst, src)); } 2087 2088 void movoop(Register dst, jobject obj); 2089 void movoop(Address dst, jobject obj, Register rscratch); 2090 2091 void mov_metadata(Register dst, Metadata* obj); 2092 void mov_metadata(Address dst, Metadata* obj, Register rscratch); 2093 2094 void movptr(Register dst, Register src); 2095 void movptr(Register dst, Address src); 2096 void movptr(Register dst, AddressLiteral src); 2097 void movptr(Register dst, ArrayAddress src); 2098 void movptr(Register dst, intptr_t src); 2099 void movptr(Address dst, Register src); 2100 void movptr(Address dst, int32_t imm); 2101 void movptr(Address dst, intptr_t src, Register rscratch); 2102 void movptr(ArrayAddress dst, Register src, Register rscratch); 2103 2104 void movptr(Register dst, RegisterOrConstant src) { 2105 if (src.is_constant()) movptr(dst, src.as_constant()); 2106 else movptr(dst, src.as_register()); 2107 } 2108 2109 2110 // to avoid hiding movl 2111 void mov32(Register dst, AddressLiteral src); 2112 void mov32(AddressLiteral dst, Register src, Register rscratch = noreg); 2113 2114 // Import other mov() methods from the parent class or else 2115 // they will be hidden by the following overriding declaration. 2116 using Assembler::movdl; 2117 void movdl(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 2118 2119 using Assembler::movq; 2120 void movq(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 2121 2122 // Can push value or effective address 2123 void pushptr(AddressLiteral src, Register rscratch); 2124 2125 void pushptr(Address src) { LP64_ONLY(pushq(src)) NOT_LP64(pushl(src)); } 2126 void popptr(Address src) { LP64_ONLY(popq(src)) NOT_LP64(popl(src)); } 2127 2128 void pushoop(jobject obj, Register rscratch); 2129 void pushklass(Metadata* obj, Register rscratch); 2130 2131 // sign extend as need a l to ptr sized element 2132 void movl2ptr(Register dst, Address src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(movl(dst, src)); } 2133 void movl2ptr(Register dst, Register src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(if (dst != src) movl(dst, src)); } 2134 2135 2136 public: 2137 // Inline type specific methods 2138 #include "asm/macroAssembler_common.hpp" 2139 2140 int store_inline_type_fields_to_buf(ciInlineKlass* vk, bool from_interpreter = true); 2141 bool move_helper(VMReg from, VMReg to, BasicType bt, RegState reg_state[]); 2142 bool unpack_inline_helper(const GrowableArray<SigEntry>* sig, int& sig_index, 2143 VMReg from, int& from_index, VMRegPair* to, int to_count, int& to_index, 2144 RegState reg_state[]); 2145 bool pack_inline_helper(const GrowableArray<SigEntry>* sig, int& sig_index, int vtarg_index, 2146 VMRegPair* from, int from_count, int& from_index, VMReg to, 2147 RegState reg_state[], Register val_array); 2148 int extend_stack_for_inline_args(int args_on_stack); 2149 void remove_frame(int initial_framesize, bool needs_stack_repair); 2150 VMReg spill_reg_for(VMReg reg); 2151 2152 // clear memory of size 'cnt' qwords, starting at 'base'; 2153 // if 'is_large' is set, do not try to produce short loop 2154 void clear_mem(Register base, Register cnt, Register val, XMMRegister xtmp, bool is_large, bool word_copy_only, KRegister mask=knoreg); 2155 2156 // clear memory initialization sequence for constant size; 2157 void clear_mem(Register base, int cnt, Register rtmp, XMMRegister xtmp, KRegister mask=knoreg); 2158 2159 // clear memory of size 'cnt' qwords, starting at 'base' using XMM/YMM registers 2160 void xmm_clear_mem(Register base, Register cnt, Register rtmp, XMMRegister xtmp, KRegister mask=knoreg); 2161 2162 // Fill primitive arrays 2163 void generate_fill(BasicType t, bool aligned, 2164 Register to, Register value, Register count, 2165 Register rtmp, XMMRegister xtmp); 2166 2167 void encode_iso_array(Register src, Register dst, Register len, 2168 XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3, 2169 XMMRegister tmp4, Register tmp5, Register result, bool ascii); 2170 2171 #ifdef _LP64 2172 void add2_with_carry(Register dest_hi, Register dest_lo, Register src1, Register src2); 2173 void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, 2174 Register y, Register y_idx, Register z, 2175 Register carry, Register product, 2176 Register idx, Register kdx); 2177 void multiply_add_128_x_128(Register x_xstart, Register y, Register z, 2178 Register yz_idx, Register idx, 2179 Register carry, Register product, int offset); 2180 void multiply_128_x_128_bmi2_loop(Register y, Register z, 2181 Register carry, Register carry2, 2182 Register idx, Register jdx, 2183 Register yz_idx1, Register yz_idx2, 2184 Register tmp, Register tmp3, Register tmp4); 2185 void multiply_128_x_128_loop(Register x_xstart, Register y, Register z, 2186 Register yz_idx, Register idx, Register jdx, 2187 Register carry, Register product, 2188 Register carry2); 2189 void multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z, Register tmp0, 2190 Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5); 2191 void square_rshift(Register x, Register len, Register z, Register tmp1, Register tmp3, 2192 Register tmp4, Register tmp5, Register rdxReg, Register raxReg); 2193 void multiply_add_64_bmi2(Register sum, Register op1, Register op2, Register carry, 2194 Register tmp2); 2195 void multiply_add_64(Register sum, Register op1, Register op2, Register carry, 2196 Register rdxReg, Register raxReg); 2197 void add_one_64(Register z, Register zlen, Register carry, Register tmp1); 2198 void lshift_by_1(Register x, Register len, Register z, Register zlen, Register tmp1, Register tmp2, 2199 Register tmp3, Register tmp4); 2200 void square_to_len(Register x, Register len, Register z, Register zlen, Register tmp1, Register tmp2, 2201 Register tmp3, Register tmp4, Register tmp5, Register rdxReg, Register raxReg); 2202 2203 void mul_add_128_x_32_loop(Register out, Register in, Register offset, Register len, Register tmp1, 2204 Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register rdxReg, 2205 Register raxReg); 2206 void mul_add(Register out, Register in, Register offset, Register len, Register k, Register tmp1, 2207 Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register rdxReg, 2208 Register raxReg); 2209 void vectorized_mismatch(Register obja, Register objb, Register length, Register log2_array_indxscale, 2210 Register result, Register tmp1, Register tmp2, 2211 XMMRegister vec1, XMMRegister vec2, XMMRegister vec3); 2212 #endif 2213 2214 // CRC32 code for java.util.zip.CRC32::updateBytes() intrinsic. 2215 void update_byte_crc32(Register crc, Register val, Register table); 2216 void kernel_crc32(Register crc, Register buf, Register len, Register table, Register tmp); 2217 2218 2219 #ifdef _LP64 2220 void kernel_crc32_avx512(Register crc, Register buf, Register len, Register table, Register tmp1, Register tmp2); 2221 void kernel_crc32_avx512_256B(Register crc, Register buf, Register len, Register key, Register pos, 2222 Register tmp1, Register tmp2, Label& L_barrett, Label& L_16B_reduction_loop, 2223 Label& L_get_last_two_xmms, Label& L_128_done, Label& L_cleanup); 2224 #endif // _LP64 2225 2226 // CRC32C code for java.util.zip.CRC32C::updateBytes() intrinsic 2227 // Note on a naming convention: 2228 // Prefix w = register only used on a Westmere+ architecture 2229 // Prefix n = register only used on a Nehalem architecture 2230 #ifdef _LP64 2231 void crc32c_ipl_alg4(Register in_out, uint32_t n, 2232 Register tmp1, Register tmp2, Register tmp3); 2233 #else 2234 void crc32c_ipl_alg4(Register in_out, uint32_t n, 2235 Register tmp1, Register tmp2, Register tmp3, 2236 XMMRegister xtmp1, XMMRegister xtmp2); 2237 #endif 2238 void crc32c_pclmulqdq(XMMRegister w_xtmp1, 2239 Register in_out, 2240 uint32_t const_or_pre_comp_const_index, bool is_pclmulqdq_supported, 2241 XMMRegister w_xtmp2, 2242 Register tmp1, 2243 Register n_tmp2, Register n_tmp3); 2244 void crc32c_rec_alt2(uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported, Register in_out, Register in1, Register in2, 2245 XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3, 2246 Register tmp1, Register tmp2, 2247 Register n_tmp3); 2248 void crc32c_proc_chunk(uint32_t size, uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported, 2249 Register in_out1, Register in_out2, Register in_out3, 2250 Register tmp1, Register tmp2, Register tmp3, 2251 XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3, 2252 Register tmp4, Register tmp5, 2253 Register n_tmp6); 2254 void crc32c_ipl_alg2_alt2(Register in_out, Register in1, Register in2, 2255 Register tmp1, Register tmp2, Register tmp3, 2256 Register tmp4, Register tmp5, Register tmp6, 2257 XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3, 2258 bool is_pclmulqdq_supported); 2259 // Fold 128-bit data chunk 2260 void fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, int offset); 2261 void fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, XMMRegister xbuf); 2262 #ifdef _LP64 2263 // Fold 512-bit data chunk 2264 void fold512bit_crc32_avx512(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, Register pos, int offset); 2265 #endif // _LP64 2266 // Fold 8-bit data 2267 void fold_8bit_crc32(Register crc, Register table, Register tmp); 2268 void fold_8bit_crc32(XMMRegister crc, Register table, XMMRegister xtmp, Register tmp); 2269 2270 // Compress char[] array to byte[]. 2271 void char_array_compress(Register src, Register dst, Register len, 2272 XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3, 2273 XMMRegister tmp4, Register tmp5, Register result, 2274 KRegister mask1 = knoreg, KRegister mask2 = knoreg); 2275 2276 // Inflate byte[] array to char[]. 2277 void byte_array_inflate(Register src, Register dst, Register len, 2278 XMMRegister tmp1, Register tmp2, KRegister mask = knoreg); 2279 2280 void fill_masked(BasicType bt, Address dst, XMMRegister xmm, KRegister mask, 2281 Register length, Register temp, int vec_enc); 2282 2283 void fill64_masked(uint shift, Register dst, int disp, 2284 XMMRegister xmm, KRegister mask, Register length, 2285 Register temp, bool use64byteVector = false); 2286 2287 void fill32_masked(uint shift, Register dst, int disp, 2288 XMMRegister xmm, KRegister mask, Register length, 2289 Register temp); 2290 2291 void fill32(Address dst, XMMRegister xmm); 2292 2293 void fill32(Register dst, int disp, XMMRegister xmm); 2294 2295 void fill64(Address dst, XMMRegister xmm, bool use64byteVector = false); 2296 2297 void fill64(Register dst, int dis, XMMRegister xmm, bool use64byteVector = false); 2298 2299 #ifdef _LP64 2300 void convert_f2i(Register dst, XMMRegister src); 2301 void convert_d2i(Register dst, XMMRegister src); 2302 void convert_f2l(Register dst, XMMRegister src); 2303 void convert_d2l(Register dst, XMMRegister src); 2304 void round_double(Register dst, XMMRegister src, Register rtmp, Register rcx); 2305 void round_float(Register dst, XMMRegister src, Register rtmp, Register rcx); 2306 2307 void cache_wb(Address line); 2308 void cache_wbsync(bool is_pre); 2309 2310 #ifdef COMPILER2_OR_JVMCI 2311 void generate_fill_avx3(BasicType type, Register to, Register value, 2312 Register count, Register rtmp, XMMRegister xtmp); 2313 #endif // COMPILER2_OR_JVMCI 2314 #endif // _LP64 2315 2316 void vallones(XMMRegister dst, int vector_len); 2317 2318 void check_stack_alignment(Register sp, const char* msg, unsigned bias = 0, Register tmp = noreg); 2319 2320 void lightweight_lock(Register basic_lock, Register obj, Register reg_rax, Register thread, Register tmp, Label& slow); 2321 void lightweight_unlock(Register obj, Register reg_rax, Register thread, Register tmp, Label& slow); 2322 2323 #ifdef _LP64 2324 void save_legacy_gprs(); 2325 void restore_legacy_gprs(); 2326 void setcc(Assembler::Condition comparison, Register dst); 2327 #endif 2328 }; 2329 2330 #endif // CPU_X86_MACROASSEMBLER_X86_HPP