1 /* 2 * Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #ifndef CPU_X86_MACROASSEMBLER_X86_HPP 26 #define CPU_X86_MACROASSEMBLER_X86_HPP 27 28 #include "asm/assembler.hpp" 29 #include "asm/register.hpp" 30 #include "code/vmreg.inline.hpp" 31 #include "compiler/oopMap.hpp" 32 #include "utilities/macros.hpp" 33 #include "runtime/rtmLocking.hpp" 34 #include "runtime/signature.hpp" 35 #include "runtime/vm_version.hpp" 36 37 class ciInlineKlass; 38 39 // MacroAssembler extends Assembler by frequently used macros. 40 // 41 // Instructions for which a 'better' code sequence exists depending 42 // on arguments should also go in here. 43 44 class MacroAssembler: public Assembler { 45 friend class LIR_Assembler; 46 friend class Runtime1; // as_Address() 47 48 public: 49 // Support for VM calls 50 // 51 // This is the base routine called by the different versions of call_VM_leaf. The interpreter 52 // may customize this version by overriding it for its purposes (e.g., to save/restore 53 // additional registers when doing a VM call). 54 55 virtual void call_VM_leaf_base( 56 address entry_point, // the entry point 57 int number_of_arguments // the number of arguments to pop after the call 58 ); 59 60 protected: 61 // This is the base routine called by the different versions of call_VM. The interpreter 62 // may customize this version by overriding it for its purposes (e.g., to save/restore 63 // additional registers when doing a VM call). 64 // 65 // If no java_thread register is specified (noreg) than rdi will be used instead. call_VM_base 66 // returns the register which contains the thread upon return. If a thread register has been 67 // specified, the return value will correspond to that register. If no last_java_sp is specified 68 // (noreg) than rsp will be used instead. 69 virtual void call_VM_base( // returns the register containing the thread upon return 70 Register oop_result, // where an oop-result ends up if any; use noreg otherwise 71 Register java_thread, // the thread if computed before ; use noreg otherwise 72 Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise 73 address entry_point, // the entry point 74 int number_of_arguments, // the number of arguments (w/o thread) to pop after the call 75 bool check_exceptions // whether to check for pending exceptions after return 76 ); 77 78 void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true); 79 80 // helpers for FPU flag access 81 // tmp is a temporary register, if none is available use noreg 82 void save_rax (Register tmp); 83 void restore_rax(Register tmp); 84 85 public: 86 MacroAssembler(CodeBuffer* code) : Assembler(code) {} 87 88 // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code. 89 // The implementation is only non-empty for the InterpreterMacroAssembler, 90 // as only the interpreter handles PopFrame and ForceEarlyReturn requests. 91 virtual void check_and_handle_popframe(Register java_thread); 92 virtual void check_and_handle_earlyret(Register java_thread); 93 94 Address as_Address(AddressLiteral adr); 95 Address as_Address(ArrayAddress adr, Register rscratch); 96 97 // Support for NULL-checks 98 // 99 // Generates code that causes a NULL OS exception if the content of reg is NULL. 100 // If the accessed location is M[reg + offset] and the offset is known, provide the 101 // offset. No explicit code generation is needed if the offset is within a certain 102 // range (0 <= offset <= page_size). 103 104 void null_check(Register reg, int offset = -1); 105 static bool needs_explicit_null_check(intptr_t offset); 106 static bool uses_implicit_null_check(void* address); 107 108 // markWord tests, kills markWord reg 109 void test_markword_is_inline_type(Register markword, Label& is_inline_type); 110 111 // inlineKlass queries, kills temp_reg 112 void test_klass_is_inline_type(Register klass, Register temp_reg, Label& is_inline_type); 113 void test_klass_is_empty_inline_type(Register klass, Register temp_reg, Label& is_empty_inline_type); 114 void test_oop_is_not_inline_type(Register object, Register tmp, Label& not_inline_type); 115 116 // Get the default value oop for the given InlineKlass 117 void get_default_value_oop(Register inline_klass, Register temp_reg, Register obj); 118 // The empty value oop, for the given InlineKlass ("empty" as in no instance fields) 119 // get_default_value_oop with extra assertion for empty inline klass 120 void get_empty_inline_type_oop(Register inline_klass, Register temp_reg, Register obj); 121 122 void test_field_is_null_free_inline_type(Register flags, Register temp_reg, Label& is_null_free); 123 void test_field_is_not_null_free_inline_type(Register flags, Register temp_reg, Label& not_null_free); 124 void test_field_is_inlined(Register flags, Register temp_reg, Label& is_inlined); 125 126 // Check oops for special arrays, i.e. flattened and/or null-free 127 void test_oop_prototype_bit(Register oop, Register temp_reg, int32_t test_bit, bool jmp_set, Label& jmp_label); 128 void test_flattened_array_oop(Register oop, Register temp_reg, Label&is_flattened_array); 129 void test_non_flattened_array_oop(Register oop, Register temp_reg, Label&is_non_flattened_array); 130 void test_null_free_array_oop(Register oop, Register temp_reg, Label&is_null_free_array); 131 void test_non_null_free_array_oop(Register oop, Register temp_reg, Label&is_non_null_free_array); 132 133 // Check array klass layout helper for flatten or null-free arrays... 134 void test_flattened_array_layout(Register lh, Label& is_flattened_array); 135 void test_non_flattened_array_layout(Register lh, Label& is_non_flattened_array); 136 void test_null_free_array_layout(Register lh, Label& is_null_free_array); 137 void test_non_null_free_array_layout(Register lh, Label& is_non_null_free_array); 138 139 // Required platform-specific helpers for Label::patch_instructions. 140 // They _shadow_ the declarations in AbstractAssembler, which are undefined. 141 void pd_patch_instruction(address branch, address target, const char* file, int line) { 142 unsigned char op = branch[0]; 143 assert(op == 0xE8 /* call */ || 144 op == 0xE9 /* jmp */ || 145 op == 0xEB /* short jmp */ || 146 (op & 0xF0) == 0x70 /* short jcc */ || 147 op == 0x0F && (branch[1] & 0xF0) == 0x80 /* jcc */ || 148 op == 0xC7 && branch[1] == 0xF8 /* xbegin */, 149 "Invalid opcode at patch point"); 150 151 if (op == 0xEB || (op & 0xF0) == 0x70) { 152 // short offset operators (jmp and jcc) 153 char* disp = (char*) &branch[1]; 154 int imm8 = target - (address) &disp[1]; 155 guarantee(this->is8bit(imm8), "Short forward jump exceeds 8-bit offset at %s:%d", 156 file == NULL ? "<NULL>" : file, line); 157 *disp = imm8; 158 } else { 159 int* disp = (int*) &branch[(op == 0x0F || op == 0xC7)? 2: 1]; 160 int imm32 = target - (address) &disp[1]; 161 *disp = imm32; 162 } 163 } 164 165 // The following 4 methods return the offset of the appropriate move instruction 166 167 // Support for fast byte/short loading with zero extension (depending on particular CPU) 168 int load_unsigned_byte(Register dst, Address src); 169 int load_unsigned_short(Register dst, Address src); 170 171 // Support for fast byte/short loading with sign extension (depending on particular CPU) 172 int load_signed_byte(Register dst, Address src); 173 int load_signed_short(Register dst, Address src); 174 175 // Support for sign-extension (hi:lo = extend_sign(lo)) 176 void extend_sign(Register hi, Register lo); 177 178 // Load and store values by size and signed-ness 179 void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg); 180 void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg); 181 182 // Support for inc/dec with optimal instruction selection depending on value 183 184 void increment(Register reg, int value = 1) { LP64_ONLY(incrementq(reg, value)) NOT_LP64(incrementl(reg, value)) ; } 185 void decrement(Register reg, int value = 1) { LP64_ONLY(decrementq(reg, value)) NOT_LP64(decrementl(reg, value)) ; } 186 187 void decrementl(Address dst, int value = 1); 188 void decrementl(Register reg, int value = 1); 189 190 void decrementq(Register reg, int value = 1); 191 void decrementq(Address dst, int value = 1); 192 193 void incrementl(Address dst, int value = 1); 194 void incrementl(Register reg, int value = 1); 195 196 void incrementq(Register reg, int value = 1); 197 void incrementq(Address dst, int value = 1); 198 199 // Support optimal SSE move instructions. 200 void movflt(XMMRegister dst, XMMRegister src) { 201 if (dst-> encoding() == src->encoding()) return; 202 if (UseXmmRegToRegMoveAll) { movaps(dst, src); return; } 203 else { movss (dst, src); return; } 204 } 205 void movflt(XMMRegister dst, Address src) { movss(dst, src); } 206 void movflt(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 207 void movflt(Address dst, XMMRegister src) { movss(dst, src); } 208 209 // Move with zero extension 210 void movfltz(XMMRegister dst, XMMRegister src) { movss(dst, src); } 211 212 void movdbl(XMMRegister dst, XMMRegister src) { 213 if (dst-> encoding() == src->encoding()) return; 214 if (UseXmmRegToRegMoveAll) { movapd(dst, src); return; } 215 else { movsd (dst, src); return; } 216 } 217 218 void movdbl(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 219 220 void movdbl(XMMRegister dst, Address src) { 221 if (UseXmmLoadAndClearUpper) { movsd (dst, src); return; } 222 else { movlpd(dst, src); return; } 223 } 224 void movdbl(Address dst, XMMRegister src) { movsd(dst, src); } 225 226 void incrementl(AddressLiteral dst, Register rscratch = noreg); 227 void incrementl(ArrayAddress dst, Register rscratch); 228 229 void incrementq(AddressLiteral dst, Register rscratch = noreg); 230 231 // Alignment 232 void align32(); 233 void align64(); 234 void align(int modulus); 235 void align(int modulus, int target); 236 237 void post_call_nop(); 238 // A 5 byte nop that is safe for patching (see patch_verified_entry) 239 void fat_nop(); 240 241 // Stack frame creation/removal 242 void enter(); 243 void leave(); 244 245 // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information) 246 // The pointer will be loaded into the thread register. 247 void get_thread(Register thread); 248 249 #ifdef _LP64 250 // Support for argument shuffling 251 252 // bias in bytes 253 void move32_64(VMRegPair src, VMRegPair dst, Register tmp = rax, int in_stk_bias = 0, int out_stk_bias = 0); 254 void long_move(VMRegPair src, VMRegPair dst, Register tmp = rax, int in_stk_bias = 0, int out_stk_bias = 0); 255 void float_move(VMRegPair src, VMRegPair dst, Register tmp = rax, int in_stk_bias = 0, int out_stk_bias = 0); 256 void double_move(VMRegPair src, VMRegPair dst, Register tmp = rax, int in_stk_bias = 0, int out_stk_bias = 0); 257 void move_ptr(VMRegPair src, VMRegPair dst); 258 void object_move(OopMap* map, 259 int oop_handle_offset, 260 int framesize_in_slots, 261 VMRegPair src, 262 VMRegPair dst, 263 bool is_receiver, 264 int* receiver_offset); 265 #endif // _LP64 266 267 // Support for VM calls 268 // 269 // It is imperative that all calls into the VM are handled via the call_VM macros. 270 // They make sure that the stack linkage is setup correctly. call_VM's correspond 271 // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points. 272 273 274 void call_VM(Register oop_result, 275 address entry_point, 276 bool check_exceptions = true); 277 void call_VM(Register oop_result, 278 address entry_point, 279 Register arg_1, 280 bool check_exceptions = true); 281 void call_VM(Register oop_result, 282 address entry_point, 283 Register arg_1, Register arg_2, 284 bool check_exceptions = true); 285 void call_VM(Register oop_result, 286 address entry_point, 287 Register arg_1, Register arg_2, Register arg_3, 288 bool check_exceptions = true); 289 290 // Overloadings with last_Java_sp 291 void call_VM(Register oop_result, 292 Register last_java_sp, 293 address entry_point, 294 int number_of_arguments = 0, 295 bool check_exceptions = true); 296 void call_VM(Register oop_result, 297 Register last_java_sp, 298 address entry_point, 299 Register arg_1, bool 300 check_exceptions = true); 301 void call_VM(Register oop_result, 302 Register last_java_sp, 303 address entry_point, 304 Register arg_1, Register arg_2, 305 bool check_exceptions = true); 306 void call_VM(Register oop_result, 307 Register last_java_sp, 308 address entry_point, 309 Register arg_1, Register arg_2, Register arg_3, 310 bool check_exceptions = true); 311 312 void get_vm_result (Register oop_result, Register thread); 313 void get_vm_result_2(Register metadata_result, Register thread); 314 315 // These always tightly bind to MacroAssembler::call_VM_base 316 // bypassing the virtual implementation 317 void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments = 0, bool check_exceptions = true); 318 void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions = true); 319 void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true); 320 void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions = true); 321 void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4, bool check_exceptions = true); 322 323 void call_VM_leaf0(address entry_point); 324 void call_VM_leaf(address entry_point, 325 int number_of_arguments = 0); 326 void call_VM_leaf(address entry_point, 327 Register arg_1); 328 void call_VM_leaf(address entry_point, 329 Register arg_1, Register arg_2); 330 void call_VM_leaf(address entry_point, 331 Register arg_1, Register arg_2, Register arg_3); 332 333 void call_VM_leaf(address entry_point, 334 Register arg_1, Register arg_2, Register arg_3, Register arg_4); 335 336 // These always tightly bind to MacroAssembler::call_VM_leaf_base 337 // bypassing the virtual implementation 338 void super_call_VM_leaf(address entry_point); 339 void super_call_VM_leaf(address entry_point, Register arg_1); 340 void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2); 341 void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3); 342 void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4); 343 344 // last Java Frame (fills frame anchor) 345 void set_last_Java_frame(Register thread, 346 Register last_java_sp, 347 Register last_java_fp, 348 address last_java_pc, 349 Register rscratch); 350 351 // thread in the default location (r15_thread on 64bit) 352 void set_last_Java_frame(Register last_java_sp, 353 Register last_java_fp, 354 address last_java_pc, 355 Register rscratch); 356 357 void reset_last_Java_frame(Register thread, bool clear_fp); 358 359 // thread in the default location (r15_thread on 64bit) 360 void reset_last_Java_frame(bool clear_fp); 361 362 // jobjects 363 void clear_jweak_tag(Register possibly_jweak); 364 void resolve_jobject(Register value, Register thread, Register tmp); 365 366 // C 'boolean' to Java boolean: x == 0 ? 0 : 1 367 void c2bool(Register x); 368 369 // C++ bool manipulation 370 371 void movbool(Register dst, Address src); 372 void movbool(Address dst, bool boolconst); 373 void movbool(Address dst, Register src); 374 void testbool(Register dst); 375 376 void resolve_oop_handle(Register result, Register tmp); 377 void resolve_weak_handle(Register result, Register tmp); 378 void load_mirror(Register mirror, Register method, Register tmp); 379 void load_method_holder_cld(Register rresult, Register rmethod); 380 381 void load_method_holder(Register holder, Register method); 382 383 // oop manipulations 384 void load_metadata(Register dst, Register src); 385 void load_klass(Register dst, Register src, Register tmp); 386 void store_klass(Register dst, Register src, Register tmp); 387 388 void access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src, 389 Register tmp1, Register thread_tmp); 390 void access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register val, 391 Register tmp1, Register tmp2, Register tmp3); 392 393 void access_value_copy(DecoratorSet decorators, Register src, Register dst, Register inline_klass); 394 395 // inline type data payload offsets... 396 void first_field_offset(Register inline_klass, Register offset); 397 void data_for_oop(Register oop, Register data, Register inline_klass); 398 // get data payload ptr a flat value array at index, kills rcx and index 399 void data_for_value_array_index(Register array, Register array_klass, 400 Register index, Register data); 401 402 403 void load_heap_oop(Register dst, Address src, Register tmp1 = noreg, 404 Register thread_tmp = noreg, DecoratorSet decorators = 0); 405 void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg, 406 Register thread_tmp = noreg, DecoratorSet decorators = 0); 407 void store_heap_oop(Address dst, Register val, Register tmp1 = noreg, 408 Register tmp2 = noreg, Register tmp3 = noreg, DecoratorSet decorators = 0); 409 410 // Used for storing NULL. All other oop constants should be 411 // stored using routines that take a jobject. 412 void store_heap_oop_null(Address dst); 413 414 void load_prototype_header(Register dst, Register src, Register tmp); 415 416 #ifdef _LP64 417 void store_klass_gap(Register dst, Register src); 418 419 // This dummy is to prevent a call to store_heap_oop from 420 // converting a zero (like NULL) into a Register by giving 421 // the compiler two choices it can't resolve 422 423 void store_heap_oop(Address dst, void* dummy); 424 425 void encode_heap_oop(Register r); 426 void decode_heap_oop(Register r); 427 void encode_heap_oop_not_null(Register r); 428 void decode_heap_oop_not_null(Register r); 429 void encode_heap_oop_not_null(Register dst, Register src); 430 void decode_heap_oop_not_null(Register dst, Register src); 431 432 void set_narrow_oop(Register dst, jobject obj); 433 void set_narrow_oop(Address dst, jobject obj); 434 void cmp_narrow_oop(Register dst, jobject obj); 435 void cmp_narrow_oop(Address dst, jobject obj); 436 437 void encode_klass_not_null(Register r, Register tmp); 438 void decode_klass_not_null(Register r, Register tmp); 439 void encode_and_move_klass_not_null(Register dst, Register src); 440 void decode_and_move_klass_not_null(Register dst, Register src); 441 void set_narrow_klass(Register dst, Klass* k); 442 void set_narrow_klass(Address dst, Klass* k); 443 void cmp_narrow_klass(Register dst, Klass* k); 444 void cmp_narrow_klass(Address dst, Klass* k); 445 446 // if heap base register is used - reinit it with the correct value 447 void reinit_heapbase(); 448 449 DEBUG_ONLY(void verify_heapbase(const char* msg);) 450 451 #endif // _LP64 452 453 // Int division/remainder for Java 454 // (as idivl, but checks for special case as described in JVM spec.) 455 // returns idivl instruction offset for implicit exception handling 456 int corrected_idivl(Register reg); 457 458 // Long division/remainder for Java 459 // (as idivq, but checks for special case as described in JVM spec.) 460 // returns idivq instruction offset for implicit exception handling 461 int corrected_idivq(Register reg); 462 463 void int3(); 464 465 // Long operation macros for a 32bit cpu 466 // Long negation for Java 467 void lneg(Register hi, Register lo); 468 469 // Long multiplication for Java 470 // (destroys contents of eax, ebx, ecx and edx) 471 void lmul(int x_rsp_offset, int y_rsp_offset); // rdx:rax = x * y 472 473 // Long shifts for Java 474 // (semantics as described in JVM spec.) 475 void lshl(Register hi, Register lo); // hi:lo << (rcx & 0x3f) 476 void lshr(Register hi, Register lo, bool sign_extension = false); // hi:lo >> (rcx & 0x3f) 477 478 // Long compare for Java 479 // (semantics as described in JVM spec.) 480 void lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo); // x_hi = lcmp(x, y) 481 482 483 // misc 484 485 // Sign extension 486 void sign_extend_short(Register reg); 487 void sign_extend_byte(Register reg); 488 489 // Division by power of 2, rounding towards 0 490 void division_with_shift(Register reg, int shift_value); 491 492 #ifndef _LP64 493 // Compares the top-most stack entries on the FPU stack and sets the eflags as follows: 494 // 495 // CF (corresponds to C0) if x < y 496 // PF (corresponds to C2) if unordered 497 // ZF (corresponds to C3) if x = y 498 // 499 // The arguments are in reversed order on the stack (i.e., top of stack is first argument). 500 // tmp is a temporary register, if none is available use noreg (only matters for non-P6 code) 501 void fcmp(Register tmp); 502 // Variant of the above which allows y to be further down the stack 503 // and which only pops x and y if specified. If pop_right is 504 // specified then pop_left must also be specified. 505 void fcmp(Register tmp, int index, bool pop_left, bool pop_right); 506 507 // Floating-point comparison for Java 508 // Compares the top-most stack entries on the FPU stack and stores the result in dst. 509 // The arguments are in reversed order on the stack (i.e., top of stack is first argument). 510 // (semantics as described in JVM spec.) 511 void fcmp2int(Register dst, bool unordered_is_less); 512 // Variant of the above which allows y to be further down the stack 513 // and which only pops x and y if specified. If pop_right is 514 // specified then pop_left must also be specified. 515 void fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right); 516 517 // Floating-point remainder for Java (ST0 = ST0 fremr ST1, ST1 is empty afterwards) 518 // tmp is a temporary register, if none is available use noreg 519 void fremr(Register tmp); 520 521 // only if +VerifyFPU 522 void verify_FPU(int stack_depth, const char* s = "illegal FPU state"); 523 #endif // !LP64 524 525 // dst = c = a * b + c 526 void fmad(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c); 527 void fmaf(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c); 528 529 void vfmad(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c, int vector_len); 530 void vfmaf(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c, int vector_len); 531 void vfmad(XMMRegister dst, XMMRegister a, Address b, XMMRegister c, int vector_len); 532 void vfmaf(XMMRegister dst, XMMRegister a, Address b, XMMRegister c, int vector_len); 533 534 535 // same as fcmp2int, but using SSE2 536 void cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less); 537 void cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less); 538 539 // branch to L if FPU flag C2 is set/not set 540 // tmp is a temporary register, if none is available use noreg 541 void jC2 (Register tmp, Label& L); 542 void jnC2(Register tmp, Label& L); 543 544 // Load float value from 'address'. If UseSSE >= 1, the value is loaded into 545 // register xmm0. Otherwise, the value is loaded onto the FPU stack. 546 void load_float(Address src); 547 548 // Store float value to 'address'. If UseSSE >= 1, the value is stored 549 // from register xmm0. Otherwise, the value is stored from the FPU stack. 550 void store_float(Address dst); 551 552 // Load double value from 'address'. If UseSSE >= 2, the value is loaded into 553 // register xmm0. Otherwise, the value is loaded onto the FPU stack. 554 void load_double(Address src); 555 556 // Store double value to 'address'. If UseSSE >= 2, the value is stored 557 // from register xmm0. Otherwise, the value is stored from the FPU stack. 558 void store_double(Address dst); 559 560 #ifndef _LP64 561 // Pop ST (ffree & fincstp combined) 562 void fpop(); 563 564 void empty_FPU_stack(); 565 #endif // !_LP64 566 567 void push_IU_state(); 568 void pop_IU_state(); 569 570 void push_FPU_state(); 571 void pop_FPU_state(); 572 573 void push_CPU_state(); 574 void pop_CPU_state(); 575 576 void push_cont_fastpath(); 577 void pop_cont_fastpath(); 578 579 void inc_held_monitor_count(); 580 void dec_held_monitor_count(); 581 582 DEBUG_ONLY(void stop_if_in_cont(Register cont_reg, const char* name);) 583 584 // Round up to a power of two 585 void round_to(Register reg, int modulus); 586 587 private: 588 // General purpose and XMM registers potentially clobbered by native code; there 589 // is no need for FPU or AVX opmask related methods because C1/interpreter 590 // - we save/restore FPU state as a whole always 591 // - do not care about AVX-512 opmask 592 static RegSet call_clobbered_gp_registers(); 593 static XMMRegSet call_clobbered_xmm_registers(); 594 595 void push_set(XMMRegSet set, int offset); 596 void pop_set(XMMRegSet set, int offset); 597 598 public: 599 void push_set(RegSet set, int offset = -1); 600 void pop_set(RegSet set, int offset = -1); 601 602 // Push and pop everything that might be clobbered by a native 603 // runtime call. 604 // Only save the lower 64 bits of each vector register. 605 // Additional registers can be excluded in a passed RegSet. 606 void push_call_clobbered_registers_except(RegSet exclude, bool save_fpu = true); 607 void pop_call_clobbered_registers_except(RegSet exclude, bool restore_fpu = true); 608 609 void push_call_clobbered_registers(bool save_fpu = true) { 610 push_call_clobbered_registers_except(RegSet(), save_fpu); 611 } 612 void pop_call_clobbered_registers(bool restore_fpu = true) { 613 pop_call_clobbered_registers_except(RegSet(), restore_fpu); 614 } 615 616 // allocation 617 618 // Object / value buffer allocation... 619 // Allocate instance of klass, assumes klass initialized by caller 620 // new_obj prefers to be rax 621 // Kills t1 and t2, perserves klass, return allocation in new_obj (rsi on LP64) 622 void allocate_instance(Register klass, Register new_obj, 623 Register t1, Register t2, 624 bool clear_fields, Label& alloc_failed); 625 626 void tlab_allocate( 627 Register thread, // Current thread 628 Register obj, // result: pointer to object after successful allocation 629 Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise 630 int con_size_in_bytes, // object size in bytes if known at compile time 631 Register t1, // temp register 632 Register t2, // temp register 633 Label& slow_case // continuation point if fast allocation fails 634 ); 635 void zero_memory(Register address, Register length_in_bytes, int offset_in_bytes, Register temp); 636 637 // For field "index" within "klass", return inline_klass ... 638 void get_inline_type_field_klass(Register klass, Register index, Register inline_klass); 639 640 // interface method calling 641 void lookup_interface_method(Register recv_klass, 642 Register intf_klass, 643 RegisterOrConstant itable_index, 644 Register method_result, 645 Register scan_temp, 646 Label& no_such_interface, 647 bool return_method = true); 648 649 // virtual method calling 650 void lookup_virtual_method(Register recv_klass, 651 RegisterOrConstant vtable_index, 652 Register method_result); 653 654 // Test sub_klass against super_klass, with fast and slow paths. 655 656 // The fast path produces a tri-state answer: yes / no / maybe-slow. 657 // One of the three labels can be NULL, meaning take the fall-through. 658 // If super_check_offset is -1, the value is loaded up from super_klass. 659 // No registers are killed, except temp_reg. 660 void check_klass_subtype_fast_path(Register sub_klass, 661 Register super_klass, 662 Register temp_reg, 663 Label* L_success, 664 Label* L_failure, 665 Label* L_slow_path, 666 RegisterOrConstant super_check_offset = RegisterOrConstant(-1)); 667 668 // The rest of the type check; must be wired to a corresponding fast path. 669 // It does not repeat the fast path logic, so don't use it standalone. 670 // The temp_reg and temp2_reg can be noreg, if no temps are available. 671 // Updates the sub's secondary super cache as necessary. 672 // If set_cond_codes, condition codes will be Z on success, NZ on failure. 673 void check_klass_subtype_slow_path(Register sub_klass, 674 Register super_klass, 675 Register temp_reg, 676 Register temp2_reg, 677 Label* L_success, 678 Label* L_failure, 679 bool set_cond_codes = false); 680 681 // Simplified, combined version, good for typical uses. 682 // Falls through on failure. 683 void check_klass_subtype(Register sub_klass, 684 Register super_klass, 685 Register temp_reg, 686 Label& L_success); 687 688 void clinit_barrier(Register klass, 689 Register thread, 690 Label* L_fast_path = NULL, 691 Label* L_slow_path = NULL); 692 693 // method handles (JSR 292) 694 Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0); 695 696 // Debugging 697 698 // only if +VerifyOops 699 void _verify_oop(Register reg, const char* s, const char* file, int line); 700 void _verify_oop_addr(Address addr, const char* s, const char* file, int line); 701 702 void _verify_oop_checked(Register reg, const char* s, const char* file, int line) { 703 if (VerifyOops) { 704 _verify_oop(reg, s, file, line); 705 } 706 } 707 void _verify_oop_addr_checked(Address reg, const char* s, const char* file, int line) { 708 if (VerifyOops) { 709 _verify_oop_addr(reg, s, file, line); 710 } 711 } 712 713 // TODO: verify method and klass metadata (compare against vptr?) 714 void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {} 715 void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){} 716 717 #define verify_oop(reg) _verify_oop_checked(reg, "broken oop " #reg, __FILE__, __LINE__) 718 #define verify_oop_msg(reg, msg) _verify_oop_checked(reg, "broken oop " #reg ", " #msg, __FILE__, __LINE__) 719 #define verify_oop_addr(addr) _verify_oop_addr_checked(addr, "broken oop addr " #addr, __FILE__, __LINE__) 720 #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__) 721 #define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__) 722 723 // Verify or restore cpu control state after JNI call 724 void restore_cpu_control_state_after_jni(Register rscratch); 725 726 // prints msg, dumps registers and stops execution 727 void stop(const char* msg); 728 729 // prints msg and continues 730 void warn(const char* msg); 731 732 // dumps registers and other state 733 void print_state(); 734 735 static void debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg); 736 static void debug64(char* msg, int64_t pc, int64_t regs[]); 737 static void print_state32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip); 738 static void print_state64(int64_t pc, int64_t regs[]); 739 740 void os_breakpoint(); 741 742 void untested() { stop("untested"); } 743 744 void unimplemented(const char* what = ""); 745 746 void should_not_reach_here() { stop("should not reach here"); } 747 748 void print_CPU_state(); 749 750 // Stack overflow checking 751 void bang_stack_with_offset(int offset) { 752 // stack grows down, caller passes positive offset 753 assert(offset > 0, "must bang with negative offset"); 754 movl(Address(rsp, (-offset)), rax); 755 } 756 757 // Writes to stack successive pages until offset reached to check for 758 // stack overflow + shadow pages. Also, clobbers tmp 759 void bang_stack_size(Register size, Register tmp); 760 761 // Check for reserved stack access in method being exited (for JIT) 762 void reserved_stack_check(); 763 764 void safepoint_poll(Label& slow_path, Register thread_reg, bool at_return, bool in_nmethod); 765 766 void verify_tlab(); 767 768 static Condition negate_condition(Condition cond); 769 770 // Instructions that use AddressLiteral operands. These instruction can handle 32bit/64bit 771 // operands. In general the names are modified to avoid hiding the instruction in Assembler 772 // so that we don't need to implement all the varieties in the Assembler with trivial wrappers 773 // here in MacroAssembler. The major exception to this rule is call 774 775 // Arithmetics 776 777 778 void addptr(Address dst, int32_t src) { LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)) ; } 779 void addptr(Address dst, Register src); 780 781 void addptr(Register dst, Address src) { LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); } 782 void addptr(Register dst, int32_t src); 783 void addptr(Register dst, Register src); 784 void addptr(Register dst, RegisterOrConstant src) { 785 if (src.is_constant()) addptr(dst, src.as_constant()); 786 else addptr(dst, src.as_register()); 787 } 788 789 void andptr(Register dst, int32_t src); 790 void andptr(Register dst, Register src) { LP64_ONLY(andq(dst, src)) NOT_LP64(andl(dst, src)) ; } 791 void andptr(Register dst, Address src) { LP64_ONLY(andq(dst, src)) NOT_LP64(andl(dst, src)) ; } 792 793 #ifdef _LP64 794 using Assembler::andq; 795 void andq(Register dst, AddressLiteral src, Register rscratch = noreg); 796 #endif 797 798 void cmp8(AddressLiteral src1, int imm, Register rscratch = noreg); 799 800 // renamed to drag out the casting of address to int32_t/intptr_t 801 void cmp32(Register src1, int32_t imm); 802 803 void cmp32(AddressLiteral src1, int32_t imm, Register rscratch = noreg); 804 // compare reg - mem, or reg - &mem 805 void cmp32(Register src1, AddressLiteral src2, Register rscratch = noreg); 806 807 void cmp32(Register src1, Address src2); 808 809 #ifndef _LP64 810 void cmpklass(Address dst, Metadata* obj); 811 void cmpklass(Register dst, Metadata* obj); 812 void cmpoop(Address dst, jobject obj); 813 #endif // _LP64 814 815 void cmpoop(Register src1, Register src2); 816 void cmpoop(Register src1, Address src2); 817 void cmpoop(Register dst, jobject obj, Register rscratch); 818 819 // NOTE src2 must be the lval. This is NOT an mem-mem compare 820 void cmpptr(Address src1, AddressLiteral src2, Register rscratch); 821 822 void cmpptr(Register src1, AddressLiteral src2, Register rscratch = noreg); 823 824 void cmpptr(Register src1, Register src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; } 825 void cmpptr(Register src1, Address src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; } 826 // void cmpptr(Address src1, Register src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; } 827 828 void cmpptr(Register src1, int32_t src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; } 829 void cmpptr(Address src1, int32_t src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; } 830 831 // cmp64 to avoild hiding cmpq 832 void cmp64(Register src1, AddressLiteral src, Register rscratch = noreg); 833 834 void cmpxchgptr(Register reg, Address adr); 835 836 void locked_cmpxchgptr(Register reg, AddressLiteral adr, Register rscratch = noreg); 837 838 void imulptr(Register dst, Register src) { LP64_ONLY(imulq(dst, src)) NOT_LP64(imull(dst, src)); } 839 void imulptr(Register dst, Register src, int imm32) { LP64_ONLY(imulq(dst, src, imm32)) NOT_LP64(imull(dst, src, imm32)); } 840 841 842 void negptr(Register dst) { LP64_ONLY(negq(dst)) NOT_LP64(negl(dst)); } 843 844 void notptr(Register dst) { LP64_ONLY(notq(dst)) NOT_LP64(notl(dst)); } 845 846 void shlptr(Register dst, int32_t shift); 847 void shlptr(Register dst) { LP64_ONLY(shlq(dst)) NOT_LP64(shll(dst)); } 848 849 void shrptr(Register dst, int32_t shift); 850 void shrptr(Register dst) { LP64_ONLY(shrq(dst)) NOT_LP64(shrl(dst)); } 851 852 void sarptr(Register dst) { LP64_ONLY(sarq(dst)) NOT_LP64(sarl(dst)); } 853 void sarptr(Register dst, int32_t src) { LP64_ONLY(sarq(dst, src)) NOT_LP64(sarl(dst, src)); } 854 855 void subptr(Address dst, int32_t src) { LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); } 856 857 void subptr(Register dst, Address src) { LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); } 858 void subptr(Register dst, int32_t src); 859 // Force generation of a 4 byte immediate value even if it fits into 8bit 860 void subptr_imm32(Register dst, int32_t src); 861 void subptr(Register dst, Register src); 862 void subptr(Register dst, RegisterOrConstant src) { 863 if (src.is_constant()) subptr(dst, (int) src.as_constant()); 864 else subptr(dst, src.as_register()); 865 } 866 867 void sbbptr(Address dst, int32_t src) { LP64_ONLY(sbbq(dst, src)) NOT_LP64(sbbl(dst, src)); } 868 void sbbptr(Register dst, int32_t src) { LP64_ONLY(sbbq(dst, src)) NOT_LP64(sbbl(dst, src)); } 869 870 void xchgptr(Register src1, Register src2) { LP64_ONLY(xchgq(src1, src2)) NOT_LP64(xchgl(src1, src2)) ; } 871 void xchgptr(Register src1, Address src2) { LP64_ONLY(xchgq(src1, src2)) NOT_LP64(xchgl(src1, src2)) ; } 872 873 void xaddptr(Address src1, Register src2) { LP64_ONLY(xaddq(src1, src2)) NOT_LP64(xaddl(src1, src2)) ; } 874 875 876 877 // Helper functions for statistics gathering. 878 // Conditionally (atomically, on MPs) increments passed counter address, preserving condition codes. 879 void cond_inc32(Condition cond, AddressLiteral counter_addr, Register rscratch = noreg); 880 // Unconditional atomic increment. 881 void atomic_incl(Address counter_addr); 882 void atomic_incl(AddressLiteral counter_addr, Register rscratch = noreg); 883 #ifdef _LP64 884 void atomic_incq(Address counter_addr); 885 void atomic_incq(AddressLiteral counter_addr, Register rscratch = noreg); 886 #endif 887 void atomic_incptr(AddressLiteral counter_addr, Register rscratch = noreg) { LP64_ONLY(atomic_incq(counter_addr, rscratch)) NOT_LP64(atomic_incl(counter_addr, rscratch)) ; } 888 void atomic_incptr(Address counter_addr) { LP64_ONLY(atomic_incq(counter_addr)) NOT_LP64(atomic_incl(counter_addr)) ; } 889 890 void lea(Register dst, Address adr) { Assembler::lea(dst, adr); } 891 void lea(Register dst, AddressLiteral adr); 892 void lea(Address dst, AddressLiteral adr, Register rscratch); 893 894 void leal32(Register dst, Address src) { leal(dst, src); } 895 896 // Import other testl() methods from the parent class or else 897 // they will be hidden by the following overriding declaration. 898 using Assembler::testl; 899 void testl(Address dst, int32_t imm32); 900 void testl(Register dst, int32_t imm32); 901 void testl(Register dst, AddressLiteral src); // requires reachable address 902 using Assembler::testq; 903 void testq(Address dst, int32_t imm32); 904 void testq(Register dst, int32_t imm32); 905 906 void orptr(Register dst, Address src) { LP64_ONLY(orq(dst, src)) NOT_LP64(orl(dst, src)); } 907 void orptr(Register dst, Register src) { LP64_ONLY(orq(dst, src)) NOT_LP64(orl(dst, src)); } 908 void orptr(Register dst, int32_t src) { LP64_ONLY(orq(dst, src)) NOT_LP64(orl(dst, src)); } 909 void orptr(Address dst, int32_t imm32) { LP64_ONLY(orq(dst, imm32)) NOT_LP64(orl(dst, imm32)); } 910 911 void testptr(Register src, int32_t imm32) { LP64_ONLY(testq(src, imm32)) NOT_LP64(testl(src, imm32)); } 912 void testptr(Register src1, Address src2) { LP64_ONLY(testq(src1, src2)) NOT_LP64(testl(src1, src2)); } 913 void testptr(Register src1, Register src2); 914 915 void xorptr(Register dst, Register src) { LP64_ONLY(xorq(dst, src)) NOT_LP64(xorl(dst, src)); } 916 void xorptr(Register dst, Address src) { LP64_ONLY(xorq(dst, src)) NOT_LP64(xorl(dst, src)); } 917 918 // Calls 919 920 void call(Label& L, relocInfo::relocType rtype); 921 void call(Register entry); 922 void call(Address addr) { Assembler::call(addr); } 923 924 // NOTE: this call transfers to the effective address of entry NOT 925 // the address contained by entry. This is because this is more natural 926 // for jumps/calls. 927 void call(AddressLiteral entry, Register rscratch = rax); 928 929 // Emit the CompiledIC call idiom 930 void ic_call(address entry, jint method_index = 0); 931 932 void emit_static_call_stub(); 933 934 // Jumps 935 936 // NOTE: these jumps transfer to the effective address of dst NOT 937 // the address contained by dst. This is because this is more natural 938 // for jumps/calls. 939 void jump(AddressLiteral dst, Register rscratch = noreg); 940 941 void jump_cc(Condition cc, AddressLiteral dst, Register rscratch = noreg); 942 943 // 32bit can do a case table jump in one instruction but we no longer allow the base 944 // to be installed in the Address class. This jump will transfer to the address 945 // contained in the location described by entry (not the address of entry) 946 void jump(ArrayAddress entry, Register rscratch); 947 948 // Floating 949 950 void push_f(XMMRegister r); 951 void pop_f(XMMRegister r); 952 void push_d(XMMRegister r); 953 void pop_d(XMMRegister r); 954 955 void andpd(XMMRegister dst, XMMRegister src) { Assembler::andpd(dst, src); } 956 void andpd(XMMRegister dst, Address src) { Assembler::andpd(dst, src); } 957 void andpd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 958 959 void andps(XMMRegister dst, XMMRegister src) { Assembler::andps(dst, src); } 960 void andps(XMMRegister dst, Address src) { Assembler::andps(dst, src); } 961 void andps(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 962 963 void comiss(XMMRegister dst, XMMRegister src) { Assembler::comiss(dst, src); } 964 void comiss(XMMRegister dst, Address src) { Assembler::comiss(dst, src); } 965 void comiss(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 966 967 void comisd(XMMRegister dst, XMMRegister src) { Assembler::comisd(dst, src); } 968 void comisd(XMMRegister dst, Address src) { Assembler::comisd(dst, src); } 969 void comisd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 970 971 #ifndef _LP64 972 void fadd_s(Address src) { Assembler::fadd_s(src); } 973 void fadd_s(AddressLiteral src) { Assembler::fadd_s(as_Address(src)); } 974 975 void fldcw(Address src) { Assembler::fldcw(src); } 976 void fldcw(AddressLiteral src); 977 978 void fld_s(int index) { Assembler::fld_s(index); } 979 void fld_s(Address src) { Assembler::fld_s(src); } 980 void fld_s(AddressLiteral src); 981 982 void fld_d(Address src) { Assembler::fld_d(src); } 983 void fld_d(AddressLiteral src); 984 985 void fld_x(Address src) { Assembler::fld_x(src); } 986 void fld_x(AddressLiteral src) { Assembler::fld_x(as_Address(src)); } 987 988 void fmul_s(Address src) { Assembler::fmul_s(src); } 989 void fmul_s(AddressLiteral src) { Assembler::fmul_s(as_Address(src)); } 990 #endif // !_LP64 991 992 void ldmxcsr(Address src) { Assembler::ldmxcsr(src); } 993 void ldmxcsr(AddressLiteral src, Register rscratch = noreg); 994 995 #ifdef _LP64 996 private: 997 void sha256_AVX2_one_round_compute( 998 Register reg_old_h, 999 Register reg_a, 1000 Register reg_b, 1001 Register reg_c, 1002 Register reg_d, 1003 Register reg_e, 1004 Register reg_f, 1005 Register reg_g, 1006 Register reg_h, 1007 int iter); 1008 void sha256_AVX2_four_rounds_compute_first(int start); 1009 void sha256_AVX2_four_rounds_compute_last(int start); 1010 void sha256_AVX2_one_round_and_sched( 1011 XMMRegister xmm_0, /* == ymm4 on 0, 1, 2, 3 iterations, then rotate 4 registers left on 4, 8, 12 iterations */ 1012 XMMRegister xmm_1, /* ymm5 */ /* full cycle is 16 iterations */ 1013 XMMRegister xmm_2, /* ymm6 */ 1014 XMMRegister xmm_3, /* ymm7 */ 1015 Register reg_a, /* == eax on 0 iteration, then rotate 8 register right on each next iteration */ 1016 Register reg_b, /* ebx */ /* full cycle is 8 iterations */ 1017 Register reg_c, /* edi */ 1018 Register reg_d, /* esi */ 1019 Register reg_e, /* r8d */ 1020 Register reg_f, /* r9d */ 1021 Register reg_g, /* r10d */ 1022 Register reg_h, /* r11d */ 1023 int iter); 1024 1025 void addm(int disp, Register r1, Register r2); 1026 1027 void sha512_AVX2_one_round_compute(Register old_h, Register a, Register b, Register c, Register d, 1028 Register e, Register f, Register g, Register h, int iteration); 1029 1030 void sha512_AVX2_one_round_and_schedule(XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, 1031 Register a, Register b, Register c, Register d, Register e, Register f, 1032 Register g, Register h, int iteration); 1033 1034 void addmq(int disp, Register r1, Register r2); 1035 public: 1036 void sha256_AVX2(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0, 1037 XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, 1038 Register buf, Register state, Register ofs, Register limit, Register rsp, 1039 bool multi_block, XMMRegister shuf_mask); 1040 void sha512_AVX2(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0, 1041 XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, 1042 Register buf, Register state, Register ofs, Register limit, Register rsp, bool multi_block, 1043 XMMRegister shuf_mask); 1044 #endif // _LP64 1045 1046 void fast_md5(Register buf, Address state, Address ofs, Address limit, 1047 bool multi_block); 1048 1049 void fast_sha1(XMMRegister abcd, XMMRegister e0, XMMRegister e1, XMMRegister msg0, 1050 XMMRegister msg1, XMMRegister msg2, XMMRegister msg3, XMMRegister shuf_mask, 1051 Register buf, Register state, Register ofs, Register limit, Register rsp, 1052 bool multi_block); 1053 1054 #ifdef _LP64 1055 void fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0, 1056 XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, 1057 Register buf, Register state, Register ofs, Register limit, Register rsp, 1058 bool multi_block, XMMRegister shuf_mask); 1059 #else 1060 void fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0, 1061 XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, 1062 Register buf, Register state, Register ofs, Register limit, Register rsp, 1063 bool multi_block); 1064 #endif 1065 1066 void fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, 1067 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, 1068 Register rax, Register rcx, Register rdx, Register tmp); 1069 1070 #ifndef _LP64 1071 private: 1072 // Initialized in macroAssembler_x86_constants.cpp 1073 static address ONES; 1074 static address L_2IL0FLOATPACKET_0; 1075 static address PI4_INV; 1076 static address PI4X3; 1077 static address PI4X4; 1078 1079 public: 1080 void fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, 1081 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, 1082 Register rax, Register rcx, Register rdx, Register tmp1); 1083 1084 void fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, 1085 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, 1086 Register rax, Register rcx, Register rdx, Register tmp); 1087 1088 void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, 1089 XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx, 1090 Register rdx, Register tmp); 1091 1092 void fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, 1093 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, 1094 Register rax, Register rbx, Register rdx); 1095 1096 void fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, 1097 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, 1098 Register rax, Register rcx, Register rdx, Register tmp); 1099 1100 void libm_sincos_huge(XMMRegister xmm0, XMMRegister xmm1, Register eax, Register ecx, 1101 Register edx, Register ebx, Register esi, Register edi, 1102 Register ebp, Register esp); 1103 1104 void libm_reduce_pi04l(Register eax, Register ecx, Register edx, Register ebx, 1105 Register esi, Register edi, Register ebp, Register esp); 1106 1107 void libm_tancot_huge(XMMRegister xmm0, XMMRegister xmm1, Register eax, Register ecx, 1108 Register edx, Register ebx, Register esi, Register edi, 1109 Register ebp, Register esp); 1110 1111 void fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, 1112 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, 1113 Register rax, Register rcx, Register rdx, Register tmp); 1114 #endif // !_LP64 1115 1116 private: 1117 1118 // these are private because users should be doing movflt/movdbl 1119 1120 void movss(Address dst, XMMRegister src) { Assembler::movss(dst, src); } 1121 void movss(XMMRegister dst, XMMRegister src) { Assembler::movss(dst, src); } 1122 void movss(XMMRegister dst, Address src) { Assembler::movss(dst, src); } 1123 void movss(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1124 1125 void movlpd(XMMRegister dst, Address src) {Assembler::movlpd(dst, src); } 1126 void movlpd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1127 1128 public: 1129 1130 void addsd(XMMRegister dst, XMMRegister src) { Assembler::addsd(dst, src); } 1131 void addsd(XMMRegister dst, Address src) { Assembler::addsd(dst, src); } 1132 void addsd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1133 1134 void addss(XMMRegister dst, XMMRegister src) { Assembler::addss(dst, src); } 1135 void addss(XMMRegister dst, Address src) { Assembler::addss(dst, src); } 1136 void addss(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1137 1138 void addpd(XMMRegister dst, XMMRegister src) { Assembler::addpd(dst, src); } 1139 void addpd(XMMRegister dst, Address src) { Assembler::addpd(dst, src); } 1140 void addpd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1141 1142 using Assembler::vbroadcastsd; 1143 void vbroadcastsd(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = noreg); 1144 1145 using Assembler::vbroadcastss; 1146 void vbroadcastss(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = noreg); 1147 1148 void divsd(XMMRegister dst, XMMRegister src) { Assembler::divsd(dst, src); } 1149 void divsd(XMMRegister dst, Address src) { Assembler::divsd(dst, src); } 1150 void divsd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1151 1152 void divss(XMMRegister dst, XMMRegister src) { Assembler::divss(dst, src); } 1153 void divss(XMMRegister dst, Address src) { Assembler::divss(dst, src); } 1154 void divss(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1155 1156 // Move Unaligned Double Quadword 1157 void movdqu(Address dst, XMMRegister src); 1158 void movdqu(XMMRegister dst, XMMRegister src); 1159 void movdqu(XMMRegister dst, Address src); 1160 void movdqu(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1161 1162 void kmovwl(Register dst, KRegister src) { Assembler::kmovwl(dst, src); } 1163 void kmovwl(Address dst, KRegister src) { Assembler::kmovwl(dst, src); } 1164 void kmovwl(KRegister dst, KRegister src) { Assembler::kmovwl(dst, src); } 1165 void kmovwl(KRegister dst, Register src) { Assembler::kmovwl(dst, src); } 1166 void kmovwl(KRegister dst, Address src) { Assembler::kmovwl(dst, src); } 1167 void kmovwl(KRegister dst, AddressLiteral src, Register rscratch = noreg); 1168 1169 void kmovql(KRegister dst, KRegister src) { Assembler::kmovql(dst, src); } 1170 void kmovql(KRegister dst, Register src) { Assembler::kmovql(dst, src); } 1171 void kmovql(Register dst, KRegister src) { Assembler::kmovql(dst, src); } 1172 void kmovql(KRegister dst, Address src) { Assembler::kmovql(dst, src); } 1173 void kmovql(Address dst, KRegister src) { Assembler::kmovql(dst, src); } 1174 void kmovql(KRegister dst, AddressLiteral src, Register rscratch = noreg); 1175 1176 // Safe move operation, lowers down to 16bit moves for targets supporting 1177 // AVX512F feature and 64bit moves for targets supporting AVX512BW feature. 1178 void kmov(Address dst, KRegister src); 1179 void kmov(KRegister dst, Address src); 1180 void kmov(KRegister dst, KRegister src); 1181 void kmov(Register dst, KRegister src); 1182 void kmov(KRegister dst, Register src); 1183 1184 using Assembler::movddup; 1185 void movddup(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1186 1187 using Assembler::vmovddup; 1188 void vmovddup(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = noreg); 1189 1190 // AVX Unaligned forms 1191 void vmovdqu(Address dst, XMMRegister src); 1192 void vmovdqu(XMMRegister dst, Address src); 1193 void vmovdqu(XMMRegister dst, XMMRegister src); 1194 void vmovdqu(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1195 void vmovdqu(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = noreg); 1196 1197 // AVX512 Unaligned 1198 void evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, bool merge, int vector_len); 1199 void evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address src, bool merge, int vector_len); 1200 1201 void evmovdqub(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::evmovdqub(dst, src, vector_len); } 1202 void evmovdqub(XMMRegister dst, Address src, int vector_len) { Assembler::evmovdqub(dst, src, vector_len); } 1203 1204 void evmovdqub(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { 1205 if (dst->encoding() != src->encoding() || mask != k0) { 1206 Assembler::evmovdqub(dst, mask, src, merge, vector_len); 1207 } 1208 } 1209 void evmovdqub(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqub(dst, mask, src, merge, vector_len); } 1210 void evmovdqub(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdqub(dst, mask, src, merge, vector_len); } 1211 void evmovdqub(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register rscratch = noreg); 1212 1213 void evmovdquw(Address dst, XMMRegister src, int vector_len) { Assembler::evmovdquw(dst, src, vector_len); } 1214 void evmovdquw(XMMRegister dst, Address src, int vector_len) { Assembler::evmovdquw(dst, src, vector_len); } 1215 1216 void evmovdquw(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { 1217 if (dst->encoding() != src->encoding() || mask != k0) { 1218 Assembler::evmovdquw(dst, mask, src, merge, vector_len); 1219 } 1220 } 1221 void evmovdquw(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdquw(dst, mask, src, merge, vector_len); } 1222 void evmovdquw(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdquw(dst, mask, src, merge, vector_len); } 1223 void evmovdquw(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register rscratch = noreg); 1224 1225 void evmovdqul(XMMRegister dst, XMMRegister src, int vector_len) { 1226 if (dst->encoding() != src->encoding()) { 1227 Assembler::evmovdqul(dst, src, vector_len); 1228 } 1229 } 1230 void evmovdqul(Address dst, XMMRegister src, int vector_len) { Assembler::evmovdqul(dst, src, vector_len); } 1231 void evmovdqul(XMMRegister dst, Address src, int vector_len) { Assembler::evmovdqul(dst, src, vector_len); } 1232 1233 void evmovdqul(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { 1234 if (dst->encoding() != src->encoding() || mask != k0) { 1235 Assembler::evmovdqul(dst, mask, src, merge, vector_len); 1236 } 1237 } 1238 void evmovdqul(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqul(dst, mask, src, merge, vector_len); } 1239 void evmovdqul(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdqul(dst, mask, src, merge, vector_len); } 1240 void evmovdqul(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register rscratch = noreg); 1241 1242 void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) { 1243 if (dst->encoding() != src->encoding()) { 1244 Assembler::evmovdquq(dst, src, vector_len); 1245 } 1246 } 1247 void evmovdquq(XMMRegister dst, Address src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); } 1248 void evmovdquq(Address dst, XMMRegister src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); } 1249 void evmovdquq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = noreg); 1250 1251 void evmovdquq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { 1252 if (dst->encoding() != src->encoding() || mask != k0) { 1253 Assembler::evmovdquq(dst, mask, src, merge, vector_len); 1254 } 1255 } 1256 void evmovdquq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdquq(dst, mask, src, merge, vector_len); } 1257 void evmovdquq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdquq(dst, mask, src, merge, vector_len); } 1258 void evmovdquq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register rscratch = noreg); 1259 1260 // Move Aligned Double Quadword 1261 void movdqa(XMMRegister dst, XMMRegister src) { Assembler::movdqa(dst, src); } 1262 void movdqa(XMMRegister dst, Address src) { Assembler::movdqa(dst, src); } 1263 void movdqa(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1264 1265 void movsd(Address dst, XMMRegister src) { Assembler::movsd(dst, src); } 1266 void movsd(XMMRegister dst, XMMRegister src) { Assembler::movsd(dst, src); } 1267 void movsd(XMMRegister dst, Address src) { Assembler::movsd(dst, src); } 1268 void movsd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1269 1270 void mulpd(XMMRegister dst, XMMRegister src) { Assembler::mulpd(dst, src); } 1271 void mulpd(XMMRegister dst, Address src) { Assembler::mulpd(dst, src); } 1272 void mulpd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1273 1274 void mulsd(XMMRegister dst, XMMRegister src) { Assembler::mulsd(dst, src); } 1275 void mulsd(XMMRegister dst, Address src) { Assembler::mulsd(dst, src); } 1276 void mulsd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1277 1278 void mulss(XMMRegister dst, XMMRegister src) { Assembler::mulss(dst, src); } 1279 void mulss(XMMRegister dst, Address src) { Assembler::mulss(dst, src); } 1280 void mulss(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1281 1282 // Carry-Less Multiplication Quadword 1283 void pclmulldq(XMMRegister dst, XMMRegister src) { 1284 // 0x00 - multiply lower 64 bits [0:63] 1285 Assembler::pclmulqdq(dst, src, 0x00); 1286 } 1287 void pclmulhdq(XMMRegister dst, XMMRegister src) { 1288 // 0x11 - multiply upper 64 bits [64:127] 1289 Assembler::pclmulqdq(dst, src, 0x11); 1290 } 1291 1292 void pcmpeqb(XMMRegister dst, XMMRegister src); 1293 void pcmpeqw(XMMRegister dst, XMMRegister src); 1294 1295 void pcmpestri(XMMRegister dst, Address src, int imm8); 1296 void pcmpestri(XMMRegister dst, XMMRegister src, int imm8); 1297 1298 void pmovzxbw(XMMRegister dst, XMMRegister src); 1299 void pmovzxbw(XMMRegister dst, Address src); 1300 1301 void pmovmskb(Register dst, XMMRegister src); 1302 1303 void ptest(XMMRegister dst, XMMRegister src); 1304 1305 void roundsd(XMMRegister dst, XMMRegister src, int32_t rmode) { Assembler::roundsd(dst, src, rmode); } 1306 void roundsd(XMMRegister dst, Address src, int32_t rmode) { Assembler::roundsd(dst, src, rmode); } 1307 void roundsd(XMMRegister dst, AddressLiteral src, int32_t rmode, Register rscratch = noreg); 1308 1309 void sqrtss(XMMRegister dst, XMMRegister src) { Assembler::sqrtss(dst, src); } 1310 void sqrtss(XMMRegister dst, Address src) { Assembler::sqrtss(dst, src); } 1311 void sqrtss(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1312 1313 void subsd(XMMRegister dst, XMMRegister src) { Assembler::subsd(dst, src); } 1314 void subsd(XMMRegister dst, Address src) { Assembler::subsd(dst, src); } 1315 void subsd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1316 1317 void subss(XMMRegister dst, XMMRegister src) { Assembler::subss(dst, src); } 1318 void subss(XMMRegister dst, Address src) { Assembler::subss(dst, src); } 1319 void subss(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1320 1321 void ucomiss(XMMRegister dst, XMMRegister src) { Assembler::ucomiss(dst, src); } 1322 void ucomiss(XMMRegister dst, Address src) { Assembler::ucomiss(dst, src); } 1323 void ucomiss(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1324 1325 void ucomisd(XMMRegister dst, XMMRegister src) { Assembler::ucomisd(dst, src); } 1326 void ucomisd(XMMRegister dst, Address src) { Assembler::ucomisd(dst, src); } 1327 void ucomisd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1328 1329 // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values 1330 void xorpd(XMMRegister dst, XMMRegister src); 1331 void xorpd(XMMRegister dst, Address src) { Assembler::xorpd(dst, src); } 1332 void xorpd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1333 1334 // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values 1335 void xorps(XMMRegister dst, XMMRegister src); 1336 void xorps(XMMRegister dst, Address src) { Assembler::xorps(dst, src); } 1337 void xorps(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1338 1339 // Shuffle Bytes 1340 void pshufb(XMMRegister dst, XMMRegister src) { Assembler::pshufb(dst, src); } 1341 void pshufb(XMMRegister dst, Address src) { Assembler::pshufb(dst, src); } 1342 void pshufb(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1343 // AVX 3-operands instructions 1344 1345 void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddsd(dst, nds, src); } 1346 void vaddsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddsd(dst, nds, src); } 1347 void vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src, Register rscratch = noreg); 1348 1349 void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddss(dst, nds, src); } 1350 void vaddss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddss(dst, nds, src); } 1351 void vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src, Register rscratch = noreg); 1352 1353 void vabsss(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len, Register rscratch = noreg); 1354 void vabssd(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len, Register rscratch = noreg); 1355 1356 void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); 1357 void vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len); 1358 void vpaddb(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg); 1359 1360 void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); 1361 void vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len); 1362 1363 void vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vpaddd(dst, nds, src, vector_len); } 1364 void vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vpaddd(dst, nds, src, vector_len); } 1365 void vpaddd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg); 1366 1367 void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vpand(dst, nds, src, vector_len); } 1368 void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vpand(dst, nds, src, vector_len); } 1369 void vpand(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg); 1370 1371 using Assembler::vpbroadcastd; 1372 void vpbroadcastd(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = noreg); 1373 1374 using Assembler::vpbroadcastq; 1375 void vpbroadcastq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = noreg); 1376 1377 void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); 1378 1379 void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); 1380 void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg); 1381 1382 // Vector compares 1383 void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, int comparison, bool is_signed, int vector_len) { 1384 Assembler::evpcmpd(kdst, mask, nds, src, comparison, is_signed, vector_len); 1385 } 1386 void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, int comparison, bool is_signed, int vector_len, Register rscratch = noreg); 1387 1388 void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, int comparison, bool is_signed, int vector_len) { 1389 Assembler::evpcmpq(kdst, mask, nds, src, comparison, is_signed, vector_len); 1390 } 1391 void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, int comparison, bool is_signed, int vector_len, Register rscratch = noreg); 1392 1393 void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, int comparison, bool is_signed, int vector_len) { 1394 Assembler::evpcmpb(kdst, mask, nds, src, comparison, is_signed, vector_len); 1395 } 1396 void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, int comparison, bool is_signed, int vector_len, Register rscratch = noreg); 1397 1398 void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, int comparison, bool is_signed, int vector_len) { 1399 Assembler::evpcmpw(kdst, mask, nds, src, comparison, is_signed, vector_len); 1400 } 1401 void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, int comparison, bool is_signed, int vector_len, Register rscratch = noreg); 1402 1403 void evpbroadcast(BasicType type, XMMRegister dst, Register src, int vector_len); 1404 1405 // Emit comparison instruction for the specified comparison predicate. 1406 void vpcmpCCW(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister xtmp, ComparisonPredicate cond, Width width, int vector_len); 1407 void vpcmpCC(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, Width width, int vector_len); 1408 1409 void vpmovzxbw(XMMRegister dst, Address src, int vector_len); 1410 void vpmovzxbw(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::vpmovzxbw(dst, src, vector_len); } 1411 1412 void vpmovmskb(Register dst, XMMRegister src, int vector_len = Assembler::AVX_256bit); 1413 1414 void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); 1415 void vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len); 1416 1417 void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vpmulld(dst, nds, src, vector_len); } 1418 void vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vpmulld(dst, nds, src, vector_len); } 1419 void vpmulld(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg); 1420 1421 void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); 1422 void vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len); 1423 1424 void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); 1425 void vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len); 1426 1427 void vpsraw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len); 1428 void vpsraw(XMMRegister dst, XMMRegister nds, int shift, int vector_len); 1429 1430 void evpsraq(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len); 1431 void evpsraq(XMMRegister dst, XMMRegister nds, int shift, int vector_len); 1432 1433 void evpsllw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { 1434 if (!is_varshift) { 1435 Assembler::evpsllw(dst, mask, nds, src, merge, vector_len); 1436 } else { 1437 Assembler::evpsllvw(dst, mask, nds, src, merge, vector_len); 1438 } 1439 } 1440 void evpslld(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { 1441 if (!is_varshift) { 1442 Assembler::evpslld(dst, mask, nds, src, merge, vector_len); 1443 } else { 1444 Assembler::evpsllvd(dst, mask, nds, src, merge, vector_len); 1445 } 1446 } 1447 void evpsllq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { 1448 if (!is_varshift) { 1449 Assembler::evpsllq(dst, mask, nds, src, merge, vector_len); 1450 } else { 1451 Assembler::evpsllvq(dst, mask, nds, src, merge, vector_len); 1452 } 1453 } 1454 void evpsrlw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { 1455 if (!is_varshift) { 1456 Assembler::evpsrlw(dst, mask, nds, src, merge, vector_len); 1457 } else { 1458 Assembler::evpsrlvw(dst, mask, nds, src, merge, vector_len); 1459 } 1460 } 1461 void evpsrld(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { 1462 if (!is_varshift) { 1463 Assembler::evpsrld(dst, mask, nds, src, merge, vector_len); 1464 } else { 1465 Assembler::evpsrlvd(dst, mask, nds, src, merge, vector_len); 1466 } 1467 } 1468 void evpsrlq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { 1469 if (!is_varshift) { 1470 Assembler::evpsrlq(dst, mask, nds, src, merge, vector_len); 1471 } else { 1472 Assembler::evpsrlvq(dst, mask, nds, src, merge, vector_len); 1473 } 1474 } 1475 void evpsraw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { 1476 if (!is_varshift) { 1477 Assembler::evpsraw(dst, mask, nds, src, merge, vector_len); 1478 } else { 1479 Assembler::evpsravw(dst, mask, nds, src, merge, vector_len); 1480 } 1481 } 1482 void evpsrad(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { 1483 if (!is_varshift) { 1484 Assembler::evpsrad(dst, mask, nds, src, merge, vector_len); 1485 } else { 1486 Assembler::evpsravd(dst, mask, nds, src, merge, vector_len); 1487 } 1488 } 1489 void evpsraq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { 1490 if (!is_varshift) { 1491 Assembler::evpsraq(dst, mask, nds, src, merge, vector_len); 1492 } else { 1493 Assembler::evpsravq(dst, mask, nds, src, merge, vector_len); 1494 } 1495 } 1496 1497 void evpmins(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); 1498 void evpmaxs(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); 1499 void evpmins(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); 1500 void evpmaxs(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); 1501 1502 void vpsrlw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len); 1503 void vpsrlw(XMMRegister dst, XMMRegister nds, int shift, int vector_len); 1504 1505 void vpsllw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len); 1506 void vpsllw(XMMRegister dst, XMMRegister nds, int shift, int vector_len); 1507 1508 void vptest(XMMRegister dst, XMMRegister src); 1509 void vptest(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::vptest(dst, src, vector_len); } 1510 1511 void punpcklbw(XMMRegister dst, XMMRegister src); 1512 void punpcklbw(XMMRegister dst, Address src) { Assembler::punpcklbw(dst, src); } 1513 1514 void pshufd(XMMRegister dst, Address src, int mode); 1515 void pshufd(XMMRegister dst, XMMRegister src, int mode) { Assembler::pshufd(dst, src, mode); } 1516 1517 void pshuflw(XMMRegister dst, XMMRegister src, int mode); 1518 void pshuflw(XMMRegister dst, Address src, int mode) { Assembler::pshuflw(dst, src, mode); } 1519 1520 void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vandpd(dst, nds, src, vector_len); } 1521 void vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vandpd(dst, nds, src, vector_len); } 1522 void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg); 1523 1524 void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vandps(dst, nds, src, vector_len); } 1525 void vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vandps(dst, nds, src, vector_len); } 1526 void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg); 1527 1528 void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, AddressLiteral src, bool merge, int vector_len, Register rscratch = noreg); 1529 1530 void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivsd(dst, nds, src); } 1531 void vdivsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivsd(dst, nds, src); } 1532 void vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src, Register rscratch = noreg); 1533 1534 void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivss(dst, nds, src); } 1535 void vdivss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivss(dst, nds, src); } 1536 void vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src, Register rscratch = noreg); 1537 1538 void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulsd(dst, nds, src); } 1539 void vmulsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vmulsd(dst, nds, src); } 1540 void vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src, Register rscratch = noreg); 1541 1542 void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulss(dst, nds, src); } 1543 void vmulss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vmulss(dst, nds, src); } 1544 void vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src, Register rscratch = noreg); 1545 1546 void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubsd(dst, nds, src); } 1547 void vsubsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vsubsd(dst, nds, src); } 1548 void vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src, Register rscratch = noreg); 1549 1550 void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubss(dst, nds, src); } 1551 void vsubss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vsubss(dst, nds, src); } 1552 void vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src, Register rscratch = noreg); 1553 1554 void vnegatess(XMMRegister dst, XMMRegister nds, AddressLiteral src, Register rscratch = noreg); 1555 void vnegatesd(XMMRegister dst, XMMRegister nds, AddressLiteral src, Register rscratch = noreg); 1556 1557 // AVX Vector instructions 1558 1559 void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vxorpd(dst, nds, src, vector_len); } 1560 void vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vxorpd(dst, nds, src, vector_len); } 1561 void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg); 1562 1563 void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vxorps(dst, nds, src, vector_len); } 1564 void vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vxorps(dst, nds, src, vector_len); } 1565 void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg); 1566 1567 void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { 1568 if (UseAVX > 1 || (vector_len < 1)) // vpxor 256 bit is available only in AVX2 1569 Assembler::vpxor(dst, nds, src, vector_len); 1570 else 1571 Assembler::vxorpd(dst, nds, src, vector_len); 1572 } 1573 void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { 1574 if (UseAVX > 1 || (vector_len < 1)) // vpxor 256 bit is available only in AVX2 1575 Assembler::vpxor(dst, nds, src, vector_len); 1576 else 1577 Assembler::vxorpd(dst, nds, src, vector_len); 1578 } 1579 void vpxor(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg); 1580 1581 // Simple version for AVX2 256bit vectors 1582 void vpxor(XMMRegister dst, XMMRegister src) { 1583 assert(UseAVX >= 2, "Should be at least AVX2"); 1584 Assembler::vpxor(dst, dst, src, AVX_256bit); 1585 } 1586 void vpxor(XMMRegister dst, Address src) { 1587 assert(UseAVX >= 2, "Should be at least AVX2"); 1588 Assembler::vpxor(dst, dst, src, AVX_256bit); 1589 } 1590 1591 void vpermd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vpermd(dst, nds, src, vector_len); } 1592 void vpermd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg); 1593 1594 void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) { 1595 if (UseAVX > 2 && VM_Version::supports_avx512novl()) { 1596 Assembler::vinserti32x4(dst, nds, src, imm8); 1597 } else if (UseAVX > 1) { 1598 // vinserti128 is available only in AVX2 1599 Assembler::vinserti128(dst, nds, src, imm8); 1600 } else { 1601 Assembler::vinsertf128(dst, nds, src, imm8); 1602 } 1603 } 1604 1605 void vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) { 1606 if (UseAVX > 2 && VM_Version::supports_avx512novl()) { 1607 Assembler::vinserti32x4(dst, nds, src, imm8); 1608 } else if (UseAVX > 1) { 1609 // vinserti128 is available only in AVX2 1610 Assembler::vinserti128(dst, nds, src, imm8); 1611 } else { 1612 Assembler::vinsertf128(dst, nds, src, imm8); 1613 } 1614 } 1615 1616 void vextracti128(XMMRegister dst, XMMRegister src, uint8_t imm8) { 1617 if (UseAVX > 2 && VM_Version::supports_avx512novl()) { 1618 Assembler::vextracti32x4(dst, src, imm8); 1619 } else if (UseAVX > 1) { 1620 // vextracti128 is available only in AVX2 1621 Assembler::vextracti128(dst, src, imm8); 1622 } else { 1623 Assembler::vextractf128(dst, src, imm8); 1624 } 1625 } 1626 1627 void vextracti128(Address dst, XMMRegister src, uint8_t imm8) { 1628 if (UseAVX > 2 && VM_Version::supports_avx512novl()) { 1629 Assembler::vextracti32x4(dst, src, imm8); 1630 } else if (UseAVX > 1) { 1631 // vextracti128 is available only in AVX2 1632 Assembler::vextracti128(dst, src, imm8); 1633 } else { 1634 Assembler::vextractf128(dst, src, imm8); 1635 } 1636 } 1637 1638 // 128bit copy to/from high 128 bits of 256bit (YMM) vector registers 1639 void vinserti128_high(XMMRegister dst, XMMRegister src) { 1640 vinserti128(dst, dst, src, 1); 1641 } 1642 void vinserti128_high(XMMRegister dst, Address src) { 1643 vinserti128(dst, dst, src, 1); 1644 } 1645 void vextracti128_high(XMMRegister dst, XMMRegister src) { 1646 vextracti128(dst, src, 1); 1647 } 1648 void vextracti128_high(Address dst, XMMRegister src) { 1649 vextracti128(dst, src, 1); 1650 } 1651 1652 void vinsertf128_high(XMMRegister dst, XMMRegister src) { 1653 if (UseAVX > 2 && VM_Version::supports_avx512novl()) { 1654 Assembler::vinsertf32x4(dst, dst, src, 1); 1655 } else { 1656 Assembler::vinsertf128(dst, dst, src, 1); 1657 } 1658 } 1659 1660 void vinsertf128_high(XMMRegister dst, Address src) { 1661 if (UseAVX > 2 && VM_Version::supports_avx512novl()) { 1662 Assembler::vinsertf32x4(dst, dst, src, 1); 1663 } else { 1664 Assembler::vinsertf128(dst, dst, src, 1); 1665 } 1666 } 1667 1668 void vextractf128_high(XMMRegister dst, XMMRegister src) { 1669 if (UseAVX > 2 && VM_Version::supports_avx512novl()) { 1670 Assembler::vextractf32x4(dst, src, 1); 1671 } else { 1672 Assembler::vextractf128(dst, src, 1); 1673 } 1674 } 1675 1676 void vextractf128_high(Address dst, XMMRegister src) { 1677 if (UseAVX > 2 && VM_Version::supports_avx512novl()) { 1678 Assembler::vextractf32x4(dst, src, 1); 1679 } else { 1680 Assembler::vextractf128(dst, src, 1); 1681 } 1682 } 1683 1684 // 256bit copy to/from high 256 bits of 512bit (ZMM) vector registers 1685 void vinserti64x4_high(XMMRegister dst, XMMRegister src) { 1686 Assembler::vinserti64x4(dst, dst, src, 1); 1687 } 1688 void vinsertf64x4_high(XMMRegister dst, XMMRegister src) { 1689 Assembler::vinsertf64x4(dst, dst, src, 1); 1690 } 1691 void vextracti64x4_high(XMMRegister dst, XMMRegister src) { 1692 Assembler::vextracti64x4(dst, src, 1); 1693 } 1694 void vextractf64x4_high(XMMRegister dst, XMMRegister src) { 1695 Assembler::vextractf64x4(dst, src, 1); 1696 } 1697 void vextractf64x4_high(Address dst, XMMRegister src) { 1698 Assembler::vextractf64x4(dst, src, 1); 1699 } 1700 void vinsertf64x4_high(XMMRegister dst, Address src) { 1701 Assembler::vinsertf64x4(dst, dst, src, 1); 1702 } 1703 1704 // 128bit copy to/from low 128 bits of 256bit (YMM) vector registers 1705 void vinserti128_low(XMMRegister dst, XMMRegister src) { 1706 vinserti128(dst, dst, src, 0); 1707 } 1708 void vinserti128_low(XMMRegister dst, Address src) { 1709 vinserti128(dst, dst, src, 0); 1710 } 1711 void vextracti128_low(XMMRegister dst, XMMRegister src) { 1712 vextracti128(dst, src, 0); 1713 } 1714 void vextracti128_low(Address dst, XMMRegister src) { 1715 vextracti128(dst, src, 0); 1716 } 1717 1718 void vinsertf128_low(XMMRegister dst, XMMRegister src) { 1719 if (UseAVX > 2 && VM_Version::supports_avx512novl()) { 1720 Assembler::vinsertf32x4(dst, dst, src, 0); 1721 } else { 1722 Assembler::vinsertf128(dst, dst, src, 0); 1723 } 1724 } 1725 1726 void vinsertf128_low(XMMRegister dst, Address src) { 1727 if (UseAVX > 2 && VM_Version::supports_avx512novl()) { 1728 Assembler::vinsertf32x4(dst, dst, src, 0); 1729 } else { 1730 Assembler::vinsertf128(dst, dst, src, 0); 1731 } 1732 } 1733 1734 void vextractf128_low(XMMRegister dst, XMMRegister src) { 1735 if (UseAVX > 2 && VM_Version::supports_avx512novl()) { 1736 Assembler::vextractf32x4(dst, src, 0); 1737 } else { 1738 Assembler::vextractf128(dst, src, 0); 1739 } 1740 } 1741 1742 void vextractf128_low(Address dst, XMMRegister src) { 1743 if (UseAVX > 2 && VM_Version::supports_avx512novl()) { 1744 Assembler::vextractf32x4(dst, src, 0); 1745 } else { 1746 Assembler::vextractf128(dst, src, 0); 1747 } 1748 } 1749 1750 // 256bit copy to/from low 256 bits of 512bit (ZMM) vector registers 1751 void vinserti64x4_low(XMMRegister dst, XMMRegister src) { 1752 Assembler::vinserti64x4(dst, dst, src, 0); 1753 } 1754 void vinsertf64x4_low(XMMRegister dst, XMMRegister src) { 1755 Assembler::vinsertf64x4(dst, dst, src, 0); 1756 } 1757 void vextracti64x4_low(XMMRegister dst, XMMRegister src) { 1758 Assembler::vextracti64x4(dst, src, 0); 1759 } 1760 void vextractf64x4_low(XMMRegister dst, XMMRegister src) { 1761 Assembler::vextractf64x4(dst, src, 0); 1762 } 1763 void vextractf64x4_low(Address dst, XMMRegister src) { 1764 Assembler::vextractf64x4(dst, src, 0); 1765 } 1766 void vinsertf64x4_low(XMMRegister dst, Address src) { 1767 Assembler::vinsertf64x4(dst, dst, src, 0); 1768 } 1769 1770 // Carry-Less Multiplication Quadword 1771 void vpclmulldq(XMMRegister dst, XMMRegister nds, XMMRegister src) { 1772 // 0x00 - multiply lower 64 bits [0:63] 1773 Assembler::vpclmulqdq(dst, nds, src, 0x00); 1774 } 1775 void vpclmulhdq(XMMRegister dst, XMMRegister nds, XMMRegister src) { 1776 // 0x11 - multiply upper 64 bits [64:127] 1777 Assembler::vpclmulqdq(dst, nds, src, 0x11); 1778 } 1779 void vpclmullqhqdq(XMMRegister dst, XMMRegister nds, XMMRegister src) { 1780 // 0x10 - multiply nds[0:63] and src[64:127] 1781 Assembler::vpclmulqdq(dst, nds, src, 0x10); 1782 } 1783 void vpclmulhqlqdq(XMMRegister dst, XMMRegister nds, XMMRegister src) { 1784 //0x01 - multiply nds[64:127] and src[0:63] 1785 Assembler::vpclmulqdq(dst, nds, src, 0x01); 1786 } 1787 1788 void evpclmulldq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { 1789 // 0x00 - multiply lower 64 bits [0:63] 1790 Assembler::evpclmulqdq(dst, nds, src, 0x00, vector_len); 1791 } 1792 void evpclmulhdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { 1793 // 0x11 - multiply upper 64 bits [64:127] 1794 Assembler::evpclmulqdq(dst, nds, src, 0x11, vector_len); 1795 } 1796 1797 // AVX-512 mask operations. 1798 void kand(BasicType etype, KRegister dst, KRegister src1, KRegister src2); 1799 void kor(BasicType type, KRegister dst, KRegister src1, KRegister src2); 1800 void knot(uint masklen, KRegister dst, KRegister src, KRegister ktmp = knoreg, Register rtmp = noreg); 1801 void kxor(BasicType type, KRegister dst, KRegister src1, KRegister src2); 1802 void kortest(uint masklen, KRegister src1, KRegister src2); 1803 void ktest(uint masklen, KRegister src1, KRegister src2); 1804 1805 void evperm(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); 1806 void evperm(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); 1807 1808 void evor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); 1809 void evor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); 1810 1811 void evand(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); 1812 void evand(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); 1813 1814 void evxor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); 1815 void evxor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); 1816 1817 void evrold(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vlen_enc); 1818 void evrold(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vlen_enc); 1819 void evrord(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vlen_enc); 1820 void evrord(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vlen_enc); 1821 1822 using Assembler::evpandq; 1823 void evpandq(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg); 1824 1825 using Assembler::evporq; 1826 void evporq(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg); 1827 1828 using Assembler::vpternlogq; 1829 void vpternlogq(XMMRegister dst, int imm8, XMMRegister src2, AddressLiteral src3, int vector_len, Register rscratch = noreg); 1830 1831 void cmov32( Condition cc, Register dst, Address src); 1832 void cmov32( Condition cc, Register dst, Register src); 1833 1834 void cmov( Condition cc, Register dst, Register src) { cmovptr(cc, dst, src); } 1835 1836 void cmovptr(Condition cc, Register dst, Address src) { LP64_ONLY(cmovq(cc, dst, src)) NOT_LP64(cmov32(cc, dst, src)); } 1837 void cmovptr(Condition cc, Register dst, Register src) { LP64_ONLY(cmovq(cc, dst, src)) NOT_LP64(cmov32(cc, dst, src)); } 1838 1839 void movoop(Register dst, jobject obj); 1840 void movoop(Address dst, jobject obj, Register rscratch); 1841 1842 void mov_metadata(Register dst, Metadata* obj); 1843 void mov_metadata(Address dst, Metadata* obj, Register rscratch); 1844 1845 void movptr(Register dst, Register src); 1846 void movptr(Register dst, Address src); 1847 void movptr(Register dst, AddressLiteral src); 1848 void movptr(Register dst, ArrayAddress src); 1849 void movptr(Register dst, intptr_t src); 1850 void movptr(Address dst, Register src); 1851 void movptr(Address dst, int32_t imm); 1852 void movptr(Address dst, intptr_t src, Register rscratch); 1853 void movptr(ArrayAddress dst, Register src, Register rscratch); 1854 1855 void movptr(Register dst, RegisterOrConstant src) { 1856 if (src.is_constant()) movptr(dst, src.as_constant()); 1857 else movptr(dst, src.as_register()); 1858 } 1859 1860 1861 // to avoid hiding movl 1862 void mov32(Register dst, AddressLiteral src); 1863 void mov32(AddressLiteral dst, Register src, Register rscratch = noreg); 1864 1865 // Import other mov() methods from the parent class or else 1866 // they will be hidden by the following overriding declaration. 1867 using Assembler::movdl; 1868 void movdl(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1869 1870 using Assembler::movq; 1871 void movq(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); 1872 1873 // Can push value or effective address 1874 void pushptr(AddressLiteral src, Register rscratch); 1875 1876 void pushptr(Address src) { LP64_ONLY(pushq(src)) NOT_LP64(pushl(src)); } 1877 void popptr(Address src) { LP64_ONLY(popq(src)) NOT_LP64(popl(src)); } 1878 1879 void pushoop(jobject obj, Register rscratch); 1880 void pushklass(Metadata* obj, Register rscratch); 1881 1882 // sign extend as need a l to ptr sized element 1883 void movl2ptr(Register dst, Address src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(movl(dst, src)); } 1884 void movl2ptr(Register dst, Register src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(if (dst != src) movl(dst, src)); } 1885 1886 1887 public: 1888 // Inline type specific methods 1889 #include "asm/macroAssembler_common.hpp" 1890 1891 int store_inline_type_fields_to_buf(ciInlineKlass* vk, bool from_interpreter = true); 1892 bool move_helper(VMReg from, VMReg to, BasicType bt, RegState reg_state[]); 1893 bool unpack_inline_helper(const GrowableArray<SigEntry>* sig, int& sig_index, 1894 VMReg from, int& from_index, VMRegPair* to, int to_count, int& to_index, 1895 RegState reg_state[]); 1896 bool pack_inline_helper(const GrowableArray<SigEntry>* sig, int& sig_index, int vtarg_index, 1897 VMRegPair* from, int from_count, int& from_index, VMReg to, 1898 RegState reg_state[], Register val_array); 1899 int extend_stack_for_inline_args(int args_on_stack); 1900 void remove_frame(int initial_framesize, bool needs_stack_repair); 1901 VMReg spill_reg_for(VMReg reg); 1902 1903 // clear memory of size 'cnt' qwords, starting at 'base'; 1904 // if 'is_large' is set, do not try to produce short loop 1905 void clear_mem(Register base, Register cnt, Register val, XMMRegister xtmp, bool is_large, bool word_copy_only, KRegister mask=knoreg); 1906 1907 // clear memory initialization sequence for constant size; 1908 void clear_mem(Register base, int cnt, Register rtmp, XMMRegister xtmp, KRegister mask=knoreg); 1909 1910 // clear memory of size 'cnt' qwords, starting at 'base' using XMM/YMM registers 1911 void xmm_clear_mem(Register base, Register cnt, Register rtmp, XMMRegister xtmp, KRegister mask=knoreg); 1912 1913 // Fill primitive arrays 1914 void generate_fill(BasicType t, bool aligned, 1915 Register to, Register value, Register count, 1916 Register rtmp, XMMRegister xtmp); 1917 1918 void encode_iso_array(Register src, Register dst, Register len, 1919 XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3, 1920 XMMRegister tmp4, Register tmp5, Register result, bool ascii); 1921 1922 #ifdef _LP64 1923 void add2_with_carry(Register dest_hi, Register dest_lo, Register src1, Register src2); 1924 void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, 1925 Register y, Register y_idx, Register z, 1926 Register carry, Register product, 1927 Register idx, Register kdx); 1928 void multiply_add_128_x_128(Register x_xstart, Register y, Register z, 1929 Register yz_idx, Register idx, 1930 Register carry, Register product, int offset); 1931 void multiply_128_x_128_bmi2_loop(Register y, Register z, 1932 Register carry, Register carry2, 1933 Register idx, Register jdx, 1934 Register yz_idx1, Register yz_idx2, 1935 Register tmp, Register tmp3, Register tmp4); 1936 void multiply_128_x_128_loop(Register x_xstart, Register y, Register z, 1937 Register yz_idx, Register idx, Register jdx, 1938 Register carry, Register product, 1939 Register carry2); 1940 void multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z, Register zlen, 1941 Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5); 1942 void square_rshift(Register x, Register len, Register z, Register tmp1, Register tmp3, 1943 Register tmp4, Register tmp5, Register rdxReg, Register raxReg); 1944 void multiply_add_64_bmi2(Register sum, Register op1, Register op2, Register carry, 1945 Register tmp2); 1946 void multiply_add_64(Register sum, Register op1, Register op2, Register carry, 1947 Register rdxReg, Register raxReg); 1948 void add_one_64(Register z, Register zlen, Register carry, Register tmp1); 1949 void lshift_by_1(Register x, Register len, Register z, Register zlen, Register tmp1, Register tmp2, 1950 Register tmp3, Register tmp4); 1951 void square_to_len(Register x, Register len, Register z, Register zlen, Register tmp1, Register tmp2, 1952 Register tmp3, Register tmp4, Register tmp5, Register rdxReg, Register raxReg); 1953 1954 void mul_add_128_x_32_loop(Register out, Register in, Register offset, Register len, Register tmp1, 1955 Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register rdxReg, 1956 Register raxReg); 1957 void mul_add(Register out, Register in, Register offset, Register len, Register k, Register tmp1, 1958 Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register rdxReg, 1959 Register raxReg); 1960 void vectorized_mismatch(Register obja, Register objb, Register length, Register log2_array_indxscale, 1961 Register result, Register tmp1, Register tmp2, 1962 XMMRegister vec1, XMMRegister vec2, XMMRegister vec3); 1963 #endif 1964 1965 // CRC32 code for java.util.zip.CRC32::updateBytes() intrinsic. 1966 void update_byte_crc32(Register crc, Register val, Register table); 1967 void kernel_crc32(Register crc, Register buf, Register len, Register table, Register tmp); 1968 1969 1970 #ifdef _LP64 1971 void kernel_crc32_avx512(Register crc, Register buf, Register len, Register table, Register tmp1, Register tmp2); 1972 void kernel_crc32_avx512_256B(Register crc, Register buf, Register len, Register key, Register pos, 1973 Register tmp1, Register tmp2, Label& L_barrett, Label& L_16B_reduction_loop, 1974 Label& L_get_last_two_xmms, Label& L_128_done, Label& L_cleanup); 1975 #endif // _LP64 1976 1977 // CRC32C code for java.util.zip.CRC32C::updateBytes() intrinsic 1978 // Note on a naming convention: 1979 // Prefix w = register only used on a Westmere+ architecture 1980 // Prefix n = register only used on a Nehalem architecture 1981 #ifdef _LP64 1982 void crc32c_ipl_alg4(Register in_out, uint32_t n, 1983 Register tmp1, Register tmp2, Register tmp3); 1984 #else 1985 void crc32c_ipl_alg4(Register in_out, uint32_t n, 1986 Register tmp1, Register tmp2, Register tmp3, 1987 XMMRegister xtmp1, XMMRegister xtmp2); 1988 #endif 1989 void crc32c_pclmulqdq(XMMRegister w_xtmp1, 1990 Register in_out, 1991 uint32_t const_or_pre_comp_const_index, bool is_pclmulqdq_supported, 1992 XMMRegister w_xtmp2, 1993 Register tmp1, 1994 Register n_tmp2, Register n_tmp3); 1995 void crc32c_rec_alt2(uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported, Register in_out, Register in1, Register in2, 1996 XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3, 1997 Register tmp1, Register tmp2, 1998 Register n_tmp3); 1999 void crc32c_proc_chunk(uint32_t size, uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported, 2000 Register in_out1, Register in_out2, Register in_out3, 2001 Register tmp1, Register tmp2, Register tmp3, 2002 XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3, 2003 Register tmp4, Register tmp5, 2004 Register n_tmp6); 2005 void crc32c_ipl_alg2_alt2(Register in_out, Register in1, Register in2, 2006 Register tmp1, Register tmp2, Register tmp3, 2007 Register tmp4, Register tmp5, Register tmp6, 2008 XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3, 2009 bool is_pclmulqdq_supported); 2010 // Fold 128-bit data chunk 2011 void fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, int offset); 2012 void fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, XMMRegister xbuf); 2013 #ifdef _LP64 2014 // Fold 512-bit data chunk 2015 void fold512bit_crc32_avx512(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, Register pos, int offset); 2016 #endif // _LP64 2017 // Fold 8-bit data 2018 void fold_8bit_crc32(Register crc, Register table, Register tmp); 2019 void fold_8bit_crc32(XMMRegister crc, Register table, XMMRegister xtmp, Register tmp); 2020 2021 // Compress char[] array to byte[]. 2022 void char_array_compress(Register src, Register dst, Register len, 2023 XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3, 2024 XMMRegister tmp4, Register tmp5, Register result, 2025 KRegister mask1 = knoreg, KRegister mask2 = knoreg); 2026 2027 // Inflate byte[] array to char[]. 2028 void byte_array_inflate(Register src, Register dst, Register len, 2029 XMMRegister tmp1, Register tmp2, KRegister mask = knoreg); 2030 2031 void fill_masked(BasicType bt, Address dst, XMMRegister xmm, KRegister mask, 2032 Register length, Register temp, int vec_enc); 2033 2034 void fill64_masked(uint shift, Register dst, int disp, 2035 XMMRegister xmm, KRegister mask, Register length, 2036 Register temp, bool use64byteVector = false); 2037 2038 void fill32_masked(uint shift, Register dst, int disp, 2039 XMMRegister xmm, KRegister mask, Register length, 2040 Register temp); 2041 2042 void fill32(Address dst, XMMRegister xmm); 2043 2044 void fill32(Register dst, int disp, XMMRegister xmm); 2045 2046 void fill64(Address dst, XMMRegister xmm, bool use64byteVector = false); 2047 2048 void fill64(Register dst, int dis, XMMRegister xmm, bool use64byteVector = false); 2049 2050 #ifdef _LP64 2051 void convert_f2i(Register dst, XMMRegister src); 2052 void convert_d2i(Register dst, XMMRegister src); 2053 void convert_f2l(Register dst, XMMRegister src); 2054 void convert_d2l(Register dst, XMMRegister src); 2055 void round_double(Register dst, XMMRegister src, Register rtmp, Register rcx); 2056 void round_float(Register dst, XMMRegister src, Register rtmp, Register rcx); 2057 2058 void cache_wb(Address line); 2059 void cache_wbsync(bool is_pre); 2060 2061 #ifdef COMPILER2_OR_JVMCI 2062 void generate_fill_avx3(BasicType type, Register to, Register value, 2063 Register count, Register rtmp, XMMRegister xtmp); 2064 #endif // COMPILER2_OR_JVMCI 2065 #endif // _LP64 2066 2067 void vallones(XMMRegister dst, int vector_len); 2068 2069 void check_stack_alignment(Register sp, const char* msg, unsigned bias = 0, Register tmp = noreg); 2070 2071 }; 2072 2073 /** 2074 * class SkipIfEqual: 2075 * 2076 * Instantiating this class will result in assembly code being output that will 2077 * jump around any code emitted between the creation of the instance and it's 2078 * automatic destruction at the end of a scope block, depending on the value of 2079 * the flag passed to the constructor, which will be checked at run-time. 2080 */ 2081 class SkipIfEqual { 2082 private: 2083 MacroAssembler* _masm; 2084 Label _label; 2085 2086 public: 2087 SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value, Register rscratch); 2088 ~SkipIfEqual(); 2089 }; 2090 2091 #endif // CPU_X86_MACROASSEMBLER_X86_HPP