1 /*
   2  * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #ifndef CPU_X86_MACROASSEMBLER_X86_HPP
  26 #define CPU_X86_MACROASSEMBLER_X86_HPP
  27 
  28 #include "asm/assembler.hpp"
  29 #include "asm/register.hpp"
  30 #include "code/vmreg.inline.hpp"
  31 #include "compiler/oopMap.hpp"
  32 #include "utilities/macros.hpp"
  33 #include "runtime/vm_version.hpp"
  34 #include "utilities/checkedCast.hpp"
  35 
  36 // MacroAssembler extends Assembler by frequently used macros.
  37 //
  38 // Instructions for which a 'better' code sequence exists depending
  39 // on arguments should also go in here.
  40 
  41 class MacroAssembler: public Assembler {
  42   friend class LIR_Assembler;
  43   friend class Runtime1;      // as_Address()
  44 
  45  public:
  46   // Support for VM calls
  47   //
  48   // This is the base routine called by the different versions of call_VM_leaf. The interpreter
  49   // may customize this version by overriding it for its purposes (e.g., to save/restore
  50   // additional registers when doing a VM call).
  51 
  52   virtual void call_VM_leaf_base(
  53     address entry_point,               // the entry point
  54     int     number_of_arguments        // the number of arguments to pop after the call
  55   );
  56 
  57  protected:
  58   // This is the base routine called by the different versions of call_VM. The interpreter
  59   // may customize this version by overriding it for its purposes (e.g., to save/restore
  60   // additional registers when doing a VM call).
  61   //
  62   // If no java_thread register is specified (noreg) than rdi will be used instead. call_VM_base
  63   // returns the register which contains the thread upon return. If a thread register has been
  64   // specified, the return value will correspond to that register. If no last_java_sp is specified
  65   // (noreg) than rsp will be used instead.
  66   virtual void call_VM_base(           // returns the register containing the thread upon return
  67     Register oop_result,               // where an oop-result ends up if any; use noreg otherwise
  68     Register java_thread,              // the thread if computed before     ; use noreg otherwise
  69     Register last_java_sp,             // to set up last_Java_frame in stubs; use noreg otherwise
  70     address  entry_point,              // the entry point
  71     int      number_of_arguments,      // the number of arguments (w/o thread) to pop after the call
  72     bool     check_exceptions          // whether to check for pending exceptions after return
  73   );
  74 
  75   void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true);
  76 
  77   // helpers for FPU flag access
  78   // tmp is a temporary register, if none is available use noreg
  79   void save_rax   (Register tmp);
  80   void restore_rax(Register tmp);
  81 
  82  public:
  83   MacroAssembler(CodeBuffer* code) : Assembler(code) {}
  84 
  85  // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code.
  86  // The implementation is only non-empty for the InterpreterMacroAssembler,
  87  // as only the interpreter handles PopFrame and ForceEarlyReturn requests.
  88  virtual void check_and_handle_popframe(Register java_thread);
  89  virtual void check_and_handle_earlyret(Register java_thread);
  90 
  91   Address as_Address(AddressLiteral adr);
  92   Address as_Address(ArrayAddress adr, Register rscratch);
  93 
  94   // Support for null-checks
  95   //
  96   // Generates code that causes a null OS exception if the content of reg is null.
  97   // If the accessed location is M[reg + offset] and the offset is known, provide the
  98   // offset. No explicit code generation is needed if the offset is within a certain
  99   // range (0 <= offset <= page_size).
 100 
 101   void null_check(Register reg, int offset = -1);
 102   static bool needs_explicit_null_check(intptr_t offset);
 103   static bool uses_implicit_null_check(void* address);
 104 
 105   // Required platform-specific helpers for Label::patch_instructions.
 106   // They _shadow_ the declarations in AbstractAssembler, which are undefined.
 107   void pd_patch_instruction(address branch, address target, const char* file, int line) {
 108     unsigned char op = branch[0];
 109     assert(op == 0xE8 /* call */ ||
 110         op == 0xE9 /* jmp */ ||
 111         op == 0xEB /* short jmp */ ||
 112         (op & 0xF0) == 0x70 /* short jcc */ ||
 113         (op == 0x0F && (branch[1] & 0xF0) == 0x80) /* jcc */ ||
 114         (op == 0xC7 && branch[1] == 0xF8) /* xbegin */ ||
 115         (op == 0x8D) /* lea */,
 116         "Invalid opcode at patch point");
 117 
 118     if (op == 0xEB || (op & 0xF0) == 0x70) {
 119       // short offset operators (jmp and jcc)
 120       char* disp = (char*) &branch[1];
 121       int imm8 = checked_cast<int>(target - (address) &disp[1]);
 122       guarantee(this->is8bit(imm8), "Short forward jump exceeds 8-bit offset at %s:%d",
 123                 file == nullptr ? "<null>" : file, line);
 124       *disp = (char)imm8;
 125     } else {
 126       int* disp = (int*) &branch[(op == 0x0F || op == 0xC7 || op == 0x8D) ? 2 : 1];
 127       int imm32 = checked_cast<int>(target - (address) &disp[1]);
 128       *disp = imm32;
 129     }
 130   }
 131 
 132   // The following 4 methods return the offset of the appropriate move instruction
 133 
 134   // Support for fast byte/short loading with zero extension (depending on particular CPU)
 135   int load_unsigned_byte(Register dst, Address src);
 136   int load_unsigned_short(Register dst, Address src);
 137 
 138   // Support for fast byte/short loading with sign extension (depending on particular CPU)
 139   int load_signed_byte(Register dst, Address src);
 140   int load_signed_short(Register dst, Address src);
 141 
 142   // Support for sign-extension (hi:lo = extend_sign(lo))
 143   void extend_sign(Register hi, Register lo);
 144 
 145   // Load and store values by size and signed-ness
 146   void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg);
 147   void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg);
 148 
 149   // Support for inc/dec with optimal instruction selection depending on value
 150 
 151   void increment(Register reg, int value = 1) { LP64_ONLY(incrementq(reg, value)) NOT_LP64(incrementl(reg, value)) ; }
 152   void decrement(Register reg, int value = 1) { LP64_ONLY(decrementq(reg, value)) NOT_LP64(decrementl(reg, value)) ; }
 153   void increment(Address dst, int value = 1)  { LP64_ONLY(incrementq(dst, value)) NOT_LP64(incrementl(dst, value)) ; }
 154   void decrement(Address dst, int value = 1)  { LP64_ONLY(decrementq(dst, value)) NOT_LP64(decrementl(dst, value)) ; }
 155 
 156   void decrementl(Address dst, int value = 1);
 157   void decrementl(Register reg, int value = 1);
 158 
 159   void decrementq(Register reg, int value = 1);
 160   void decrementq(Address dst, int value = 1);
 161 
 162   void incrementl(Address dst, int value = 1);
 163   void incrementl(Register reg, int value = 1);
 164 
 165   void incrementq(Register reg, int value = 1);
 166   void incrementq(Address dst, int value = 1);
 167 
 168   void incrementl(AddressLiteral dst, Register rscratch = noreg);
 169   void incrementl(ArrayAddress   dst, Register rscratch);
 170 
 171   void incrementq(AddressLiteral dst, Register rscratch = noreg);
 172 
 173   // Support optimal SSE move instructions.
 174   void movflt(XMMRegister dst, XMMRegister src) {
 175     if (dst-> encoding() == src->encoding()) return;
 176     if (UseXmmRegToRegMoveAll) { movaps(dst, src); return; }
 177     else                       { movss (dst, src); return; }
 178   }
 179   void movflt(XMMRegister dst, Address src) { movss(dst, src); }
 180   void movflt(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
 181   void movflt(Address dst, XMMRegister src) { movss(dst, src); }
 182 
 183   // Move with zero extension
 184   void movfltz(XMMRegister dst, XMMRegister src) { movss(dst, src); }
 185 
 186   void movdbl(XMMRegister dst, XMMRegister src) {
 187     if (dst-> encoding() == src->encoding()) return;
 188     if (UseXmmRegToRegMoveAll) { movapd(dst, src); return; }
 189     else                       { movsd (dst, src); return; }
 190   }
 191 
 192   void movdbl(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
 193 
 194   void movdbl(XMMRegister dst, Address src) {
 195     if (UseXmmLoadAndClearUpper) { movsd (dst, src); return; }
 196     else                         { movlpd(dst, src); return; }
 197   }
 198   void movdbl(Address dst, XMMRegister src) { movsd(dst, src); }
 199 
 200   void flt_to_flt16(Register dst, XMMRegister src, XMMRegister tmp) {
 201     // Use separate tmp XMM register because caller may
 202     // requires src XMM register to be unchanged (as in x86.ad).
 203     vcvtps2ph(tmp, src, 0x04, Assembler::AVX_128bit);
 204     movdl(dst, tmp);
 205     movswl(dst, dst);
 206   }
 207 
 208   void flt16_to_flt(XMMRegister dst, Register src) {
 209     movdl(dst, src);
 210     vcvtph2ps(dst, dst, Assembler::AVX_128bit);
 211   }
 212 
 213   // Alignment
 214   void align32();
 215   void align64();
 216   void align(uint modulus);
 217   void align(uint modulus, uint target);
 218 
 219   void post_call_nop();
 220   // A 5 byte nop that is safe for patching (see patch_verified_entry)
 221   void fat_nop();
 222 
 223   // Stack frame creation/removal
 224   void enter();
 225   void leave();
 226 
 227   // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information)
 228   // The pointer will be loaded into the thread register.
 229   void get_thread(Register thread);
 230 
 231 #ifdef _LP64
 232   // Support for argument shuffling
 233 
 234   // bias in bytes
 235   void move32_64(VMRegPair src, VMRegPair dst, Register tmp = rax, int in_stk_bias = 0, int out_stk_bias = 0);
 236   void long_move(VMRegPair src, VMRegPair dst, Register tmp = rax, int in_stk_bias = 0, int out_stk_bias = 0);
 237   void float_move(VMRegPair src, VMRegPair dst, Register tmp = rax, int in_stk_bias = 0, int out_stk_bias = 0);
 238   void double_move(VMRegPair src, VMRegPair dst, Register tmp = rax, int in_stk_bias = 0, int out_stk_bias = 0);
 239   void move_ptr(VMRegPair src, VMRegPair dst);
 240   void object_move(OopMap* map,
 241                    int oop_handle_offset,
 242                    int framesize_in_slots,
 243                    VMRegPair src,
 244                    VMRegPair dst,
 245                    bool is_receiver,
 246                    int* receiver_offset);
 247 #endif // _LP64
 248 
 249   // Support for VM calls
 250   //
 251   // It is imperative that all calls into the VM are handled via the call_VM macros.
 252   // They make sure that the stack linkage is setup correctly. call_VM's correspond
 253   // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
 254 
 255 
 256   void call_VM(Register oop_result,
 257                address entry_point,
 258                bool check_exceptions = true);
 259   void call_VM(Register oop_result,
 260                address entry_point,
 261                Register arg_1,
 262                bool check_exceptions = true);
 263   void call_VM(Register oop_result,
 264                address entry_point,
 265                Register arg_1, Register arg_2,
 266                bool check_exceptions = true);
 267   void call_VM(Register oop_result,
 268                address entry_point,
 269                Register arg_1, Register arg_2, Register arg_3,
 270                bool check_exceptions = true);
 271 
 272   // Overloadings with last_Java_sp
 273   void call_VM(Register oop_result,
 274                Register last_java_sp,
 275                address entry_point,
 276                int number_of_arguments = 0,
 277                bool check_exceptions = true);
 278   void call_VM(Register oop_result,
 279                Register last_java_sp,
 280                address entry_point,
 281                Register arg_1, bool
 282                check_exceptions = true);
 283   void call_VM(Register oop_result,
 284                Register last_java_sp,
 285                address entry_point,
 286                Register arg_1, Register arg_2,
 287                bool check_exceptions = true);
 288   void call_VM(Register oop_result,
 289                Register last_java_sp,
 290                address entry_point,
 291                Register arg_1, Register arg_2, Register arg_3,
 292                bool check_exceptions = true);
 293 
 294   void get_vm_result  (Register oop_result, Register thread);
 295   void get_vm_result_2(Register metadata_result, Register thread);
 296 
 297   // These always tightly bind to MacroAssembler::call_VM_base
 298   // bypassing the virtual implementation
 299   void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments = 0, bool check_exceptions = true);
 300   void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions = true);
 301   void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true);
 302   void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions = true);
 303   void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4, bool check_exceptions = true);
 304 
 305   void call_VM_leaf0(address entry_point);
 306   void call_VM_leaf(address entry_point,
 307                     int number_of_arguments = 0);
 308   void call_VM_leaf(address entry_point,
 309                     Register arg_1);
 310   void call_VM_leaf(address entry_point,
 311                     Register arg_1, Register arg_2);
 312   void call_VM_leaf(address entry_point,
 313                     Register arg_1, Register arg_2, Register arg_3);
 314 
 315   void call_VM_leaf(address entry_point,
 316                     Register arg_1, Register arg_2, Register arg_3, Register arg_4);
 317 
 318   // These always tightly bind to MacroAssembler::call_VM_leaf_base
 319   // bypassing the virtual implementation
 320   void super_call_VM_leaf(address entry_point);
 321   void super_call_VM_leaf(address entry_point, Register arg_1);
 322   void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2);
 323   void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3);
 324   void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4);
 325 
 326   // last Java Frame (fills frame anchor)
 327   void set_last_Java_frame(Register thread,
 328                            Register last_java_sp,
 329                            Register last_java_fp,
 330                            address  last_java_pc,
 331                            Register rscratch);
 332 
 333   // thread in the default location (r15_thread on 64bit)
 334   void set_last_Java_frame(Register last_java_sp,
 335                            Register last_java_fp,
 336                            address  last_java_pc,
 337                            Register rscratch);
 338 
 339 #ifdef _LP64
 340   void set_last_Java_frame(Register last_java_sp,
 341                            Register last_java_fp,
 342                            Label &last_java_pc,
 343                            Register scratch);
 344 #endif
 345 
 346   void reset_last_Java_frame(Register thread, bool clear_fp);
 347 
 348   // thread in the default location (r15_thread on 64bit)
 349   void reset_last_Java_frame(bool clear_fp);
 350 
 351   // jobjects
 352   void clear_jobject_tag(Register possibly_non_local);
 353   void resolve_jobject(Register value, Register thread, Register tmp);
 354   void resolve_global_jobject(Register value, Register thread, Register tmp);
 355 
 356   // C 'boolean' to Java boolean: x == 0 ? 0 : 1
 357   void c2bool(Register x);
 358 
 359   // C++ bool manipulation
 360 
 361   void movbool(Register dst, Address src);
 362   void movbool(Address dst, bool boolconst);
 363   void movbool(Address dst, Register src);
 364   void testbool(Register dst);
 365 
 366   void resolve_oop_handle(Register result, Register tmp);
 367   void resolve_weak_handle(Register result, Register tmp);
 368   void load_mirror(Register mirror, Register method, Register tmp);
 369   void load_method_holder_cld(Register rresult, Register rmethod);
 370 
 371   void load_method_holder(Register holder, Register method);
 372 
 373   // oop manipulations
 374   void load_klass(Register dst, Register src, Register tmp);
 375   void store_klass(Register dst, Register src, Register tmp);
 376 
 377   void access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src,
 378                       Register tmp1, Register thread_tmp);
 379   void access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register val,
 380                        Register tmp1, Register tmp2, Register tmp3);
 381 
 382   void load_heap_oop(Register dst, Address src, Register tmp1 = noreg,
 383                      Register thread_tmp = noreg, DecoratorSet decorators = 0);
 384   void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg,
 385                               Register thread_tmp = noreg, DecoratorSet decorators = 0);
 386   void store_heap_oop(Address dst, Register val, Register tmp1 = noreg,
 387                       Register tmp2 = noreg, Register tmp3 = noreg, DecoratorSet decorators = 0);
 388 
 389   // Used for storing null. All other oop constants should be
 390   // stored using routines that take a jobject.
 391   void store_heap_oop_null(Address dst);
 392 
 393 #ifdef _LP64
 394   void store_klass_gap(Register dst, Register src);
 395 
 396   // This dummy is to prevent a call to store_heap_oop from
 397   // converting a zero (like null) into a Register by giving
 398   // the compiler two choices it can't resolve
 399 
 400   void store_heap_oop(Address dst, void* dummy);
 401 
 402   void encode_heap_oop(Register r);
 403   void decode_heap_oop(Register r);
 404   void encode_heap_oop_not_null(Register r);
 405   void decode_heap_oop_not_null(Register r);
 406   void encode_heap_oop_not_null(Register dst, Register src);
 407   void decode_heap_oop_not_null(Register dst, Register src);
 408 
 409   void set_narrow_oop(Register dst, jobject obj);
 410   void set_narrow_oop(Address dst, jobject obj);
 411   void cmp_narrow_oop(Register dst, jobject obj);
 412   void cmp_narrow_oop(Address dst, jobject obj);
 413 
 414   void encode_klass_not_null(Register r, Register tmp);
 415   void decode_klass_not_null(Register r, Register tmp);
 416   void encode_and_move_klass_not_null(Register dst, Register src);
 417   void decode_and_move_klass_not_null(Register dst, Register src);
 418   void set_narrow_klass(Register dst, Klass* k);
 419   void set_narrow_klass(Address dst, Klass* k);
 420   void cmp_narrow_klass(Register dst, Klass* k);
 421   void cmp_narrow_klass(Address dst, Klass* k);
 422 
 423   // if heap base register is used - reinit it with the correct value
 424   void reinit_heapbase();
 425 
 426   DEBUG_ONLY(void verify_heapbase(const char* msg);)
 427 
 428 #endif // _LP64
 429 
 430   // Int division/remainder for Java
 431   // (as idivl, but checks for special case as described in JVM spec.)
 432   // returns idivl instruction offset for implicit exception handling
 433   int corrected_idivl(Register reg);
 434 
 435   // Long division/remainder for Java
 436   // (as idivq, but checks for special case as described in JVM spec.)
 437   // returns idivq instruction offset for implicit exception handling
 438   int corrected_idivq(Register reg);
 439 
 440   void int3();
 441 
 442   // Long operation macros for a 32bit cpu
 443   // Long negation for Java
 444   void lneg(Register hi, Register lo);
 445 
 446   // Long multiplication for Java
 447   // (destroys contents of eax, ebx, ecx and edx)
 448   void lmul(int x_rsp_offset, int y_rsp_offset); // rdx:rax = x * y
 449 
 450   // Long shifts for Java
 451   // (semantics as described in JVM spec.)
 452   void lshl(Register hi, Register lo);                               // hi:lo << (rcx & 0x3f)
 453   void lshr(Register hi, Register lo, bool sign_extension = false);  // hi:lo >> (rcx & 0x3f)
 454 
 455   // Long compare for Java
 456   // (semantics as described in JVM spec.)
 457   void lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo); // x_hi = lcmp(x, y)
 458 
 459 
 460   // misc
 461 
 462   // Sign extension
 463   void sign_extend_short(Register reg);
 464   void sign_extend_byte(Register reg);
 465 
 466   // Division by power of 2, rounding towards 0
 467   void division_with_shift(Register reg, int shift_value);
 468 
 469 #ifndef _LP64
 470   // Compares the top-most stack entries on the FPU stack and sets the eflags as follows:
 471   //
 472   // CF (corresponds to C0) if x < y
 473   // PF (corresponds to C2) if unordered
 474   // ZF (corresponds to C3) if x = y
 475   //
 476   // The arguments are in reversed order on the stack (i.e., top of stack is first argument).
 477   // tmp is a temporary register, if none is available use noreg (only matters for non-P6 code)
 478   void fcmp(Register tmp);
 479   // Variant of the above which allows y to be further down the stack
 480   // and which only pops x and y if specified. If pop_right is
 481   // specified then pop_left must also be specified.
 482   void fcmp(Register tmp, int index, bool pop_left, bool pop_right);
 483 
 484   // Floating-point comparison for Java
 485   // Compares the top-most stack entries on the FPU stack and stores the result in dst.
 486   // The arguments are in reversed order on the stack (i.e., top of stack is first argument).
 487   // (semantics as described in JVM spec.)
 488   void fcmp2int(Register dst, bool unordered_is_less);
 489   // Variant of the above which allows y to be further down the stack
 490   // and which only pops x and y if specified. If pop_right is
 491   // specified then pop_left must also be specified.
 492   void fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right);
 493 
 494   // Floating-point remainder for Java (ST0 = ST0 fremr ST1, ST1 is empty afterwards)
 495   // tmp is a temporary register, if none is available use noreg
 496   void fremr(Register tmp);
 497 
 498   // only if +VerifyFPU
 499   void verify_FPU(int stack_depth, const char* s = "illegal FPU state");
 500 #endif // !LP64
 501 
 502   // dst = c = a * b + c
 503   void fmad(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c);
 504   void fmaf(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c);
 505 
 506   void vfmad(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c, int vector_len);
 507   void vfmaf(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c, int vector_len);
 508   void vfmad(XMMRegister dst, XMMRegister a, Address b, XMMRegister c, int vector_len);
 509   void vfmaf(XMMRegister dst, XMMRegister a, Address b, XMMRegister c, int vector_len);
 510 
 511 
 512   // same as fcmp2int, but using SSE2
 513   void cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less);
 514   void cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less);
 515 
 516   // branch to L if FPU flag C2 is set/not set
 517   // tmp is a temporary register, if none is available use noreg
 518   void jC2 (Register tmp, Label& L);
 519   void jnC2(Register tmp, Label& L);
 520 
 521   // Load float value from 'address'. If UseSSE >= 1, the value is loaded into
 522   // register xmm0. Otherwise, the value is loaded onto the FPU stack.
 523   void load_float(Address src);
 524 
 525   // Store float value to 'address'. If UseSSE >= 1, the value is stored
 526   // from register xmm0. Otherwise, the value is stored from the FPU stack.
 527   void store_float(Address dst);
 528 
 529   // Load double value from 'address'. If UseSSE >= 2, the value is loaded into
 530   // register xmm0. Otherwise, the value is loaded onto the FPU stack.
 531   void load_double(Address src);
 532 
 533   // Store double value to 'address'. If UseSSE >= 2, the value is stored
 534   // from register xmm0. Otherwise, the value is stored from the FPU stack.
 535   void store_double(Address dst);
 536 
 537 #ifndef _LP64
 538   // Pop ST (ffree & fincstp combined)
 539   void fpop();
 540 
 541   void empty_FPU_stack();
 542 #endif // !_LP64
 543 
 544   void push_IU_state();
 545   void pop_IU_state();
 546 
 547   void push_FPU_state();
 548   void pop_FPU_state();
 549 
 550   void push_CPU_state();
 551   void pop_CPU_state();
 552 
 553   void push_cont_fastpath();
 554   void pop_cont_fastpath();
 555 
 556   void inc_held_monitor_count();
 557   void dec_held_monitor_count();
 558 
 559   DEBUG_ONLY(void stop_if_in_cont(Register cont_reg, const char* name);)
 560 
 561   // Round up to a power of two
 562   void round_to(Register reg, int modulus);
 563 
 564 private:
 565   // General purpose and XMM registers potentially clobbered by native code; there
 566   // is no need for FPU or AVX opmask related methods because C1/interpreter
 567   // - we save/restore FPU state as a whole always
 568   // - do not care about AVX-512 opmask
 569   static RegSet call_clobbered_gp_registers();
 570   static XMMRegSet call_clobbered_xmm_registers();
 571 
 572   void push_set(XMMRegSet set, int offset);
 573   void pop_set(XMMRegSet set, int offset);
 574 
 575 public:
 576   void push_set(RegSet set, int offset = -1);
 577   void pop_set(RegSet set, int offset = -1);
 578 
 579   // Push and pop everything that might be clobbered by a native
 580   // runtime call.
 581   // Only save the lower 64 bits of each vector register.
 582   // Additional registers can be excluded in a passed RegSet.
 583   void push_call_clobbered_registers_except(RegSet exclude, bool save_fpu = true);
 584   void pop_call_clobbered_registers_except(RegSet exclude, bool restore_fpu = true);
 585 
 586   void push_call_clobbered_registers(bool save_fpu = true) {
 587     push_call_clobbered_registers_except(RegSet(), save_fpu);
 588   }
 589   void pop_call_clobbered_registers(bool restore_fpu = true) {
 590     pop_call_clobbered_registers_except(RegSet(), restore_fpu);
 591   }
 592 
 593   // allocation
 594   void tlab_allocate(
 595     Register thread,                   // Current thread
 596     Register obj,                      // result: pointer to object after successful allocation
 597     Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
 598     int      con_size_in_bytes,        // object size in bytes if   known at compile time
 599     Register t1,                       // temp register
 600     Register t2,                       // temp register
 601     Label&   slow_case                 // continuation point if fast allocation fails
 602   );
 603   void zero_memory(Register address, Register length_in_bytes, int offset_in_bytes, Register temp);
 604 
 605   void population_count(Register dst, Register src, Register scratch1, Register scratch2);
 606 
 607   // interface method calling
 608   void lookup_interface_method(Register recv_klass,
 609                                Register intf_klass,
 610                                RegisterOrConstant itable_index,
 611                                Register method_result,
 612                                Register scan_temp,
 613                                Label& no_such_interface,
 614                                bool return_method = true);
 615 
 616   void lookup_interface_method_stub(Register recv_klass,
 617                                     Register holder_klass,
 618                                     Register resolved_klass,
 619                                     Register method_result,
 620                                     Register scan_temp,
 621                                     Register temp_reg2,
 622                                     Register receiver,
 623                                     int itable_index,
 624                                     Label& L_no_such_interface);
 625 
 626   // virtual method calling
 627   void lookup_virtual_method(Register recv_klass,
 628                              RegisterOrConstant vtable_index,
 629                              Register method_result);
 630 
 631   // Test sub_klass against super_klass, with fast and slow paths.
 632 
 633   // The fast path produces a tri-state answer: yes / no / maybe-slow.
 634   // One of the three labels can be null, meaning take the fall-through.
 635   // If super_check_offset is -1, the value is loaded up from super_klass.
 636   // No registers are killed, except temp_reg.
 637   void check_klass_subtype_fast_path(Register sub_klass,
 638                                      Register super_klass,
 639                                      Register temp_reg,
 640                                      Label* L_success,
 641                                      Label* L_failure,
 642                                      Label* L_slow_path,
 643                 RegisterOrConstant super_check_offset = RegisterOrConstant(-1));
 644 
 645   // The rest of the type check; must be wired to a corresponding fast path.
 646   // It does not repeat the fast path logic, so don't use it standalone.
 647   // The temp_reg and temp2_reg can be noreg, if no temps are available.
 648   // Updates the sub's secondary super cache as necessary.
 649   // If set_cond_codes, condition codes will be Z on success, NZ on failure.
 650   void check_klass_subtype_slow_path(Register sub_klass,
 651                                      Register super_klass,
 652                                      Register temp_reg,
 653                                      Register temp2_reg,
 654                                      Label* L_success,
 655                                      Label* L_failure,
 656                                      bool set_cond_codes = false);
 657   void hashed_check_klass_subtype_slow_path(Register sub_klass,
 658                                      Register super_klass,
 659                                      Register temp_reg,
 660                                      Register temp2_reg,
 661                                      Label* L_success,
 662                                      Label* L_failure,
 663                                      bool set_cond_codes = false);
 664 
 665   // As above, but with a constant super_klass.
 666   // The result is in Register result, not the condition codes.
 667   void lookup_secondary_supers_table(Register sub_klass,
 668                                      Register super_klass,
 669                                      Register temp1,
 670                                      Register temp2,
 671                                      Register temp3,
 672                                      Register temp4,
 673                                      Register result,
 674                                      u1 super_klass_slot);
 675 
 676   void lookup_secondary_supers_table_slow_path(Register r_super_klass,
 677                                                Register r_array_base,
 678                                                Register r_array_index,
 679                                                Register r_bitmap,
 680                                                Register temp1,
 681                                                Register temp2,
 682                                                Label* L_success,
 683                                                Label* L_failure = nullptr);
 684 
 685   void verify_secondary_supers_table(Register r_sub_klass,
 686                                      Register r_super_klass,
 687                                      Register expected,
 688                                      Register temp1,
 689                                      Register temp2,
 690                                      Register temp3);
 691 
 692   void repne_scanq(Register addr, Register value, Register count, Register limit,
 693                    Label* L_success,
 694                    Label* L_failure = nullptr);
 695 
 696     // Simplified, combined version, good for typical uses.
 697   // Falls through on failure.
 698   void check_klass_subtype(Register sub_klass,
 699                            Register super_klass,
 700                            Register temp_reg,
 701                            Label& L_success);
 702 
 703   void clinit_barrier(Register klass,
 704                       Register thread,
 705                       Label* L_fast_path = nullptr,
 706                       Label* L_slow_path = nullptr);
 707 
 708   // method handles (JSR 292)
 709   Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
 710 
 711   // Debugging
 712 
 713   // only if +VerifyOops
 714   void _verify_oop(Register reg, const char* s, const char* file, int line);
 715   void _verify_oop_addr(Address addr, const char* s, const char* file, int line);
 716 
 717   void _verify_oop_checked(Register reg, const char* s, const char* file, int line) {
 718     if (VerifyOops) {
 719       _verify_oop(reg, s, file, line);
 720     }
 721   }
 722   void _verify_oop_addr_checked(Address reg, const char* s, const char* file, int line) {
 723     if (VerifyOops) {
 724       _verify_oop_addr(reg, s, file, line);
 725     }
 726   }
 727 
 728   // TODO: verify method and klass metadata (compare against vptr?)
 729   void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {}
 730   void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){}
 731 
 732 #define verify_oop(reg) _verify_oop_checked(reg, "broken oop " #reg, __FILE__, __LINE__)
 733 #define verify_oop_msg(reg, msg) _verify_oop_checked(reg, "broken oop " #reg ", " #msg, __FILE__, __LINE__)
 734 #define verify_oop_addr(addr) _verify_oop_addr_checked(addr, "broken oop addr " #addr, __FILE__, __LINE__)
 735 #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
 736 #define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
 737 
 738   // Verify or restore cpu control state after JNI call
 739   void restore_cpu_control_state_after_jni(Register rscratch);
 740 
 741   // prints msg, dumps registers and stops execution
 742   void stop(const char* msg);
 743 
 744   // prints msg and continues
 745   void warn(const char* msg);
 746 
 747   // dumps registers and other state
 748   void print_state();
 749 
 750   static void debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg);
 751   static void debug64(char* msg, int64_t pc, int64_t regs[]);
 752   static void print_state32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip);
 753   static void print_state64(int64_t pc, int64_t regs[]);
 754 
 755   void os_breakpoint();
 756 
 757   void untested()                                { stop("untested"); }
 758 
 759   void unimplemented(const char* what = "");
 760 
 761   void should_not_reach_here()                   { stop("should not reach here"); }
 762 
 763   void print_CPU_state();
 764 
 765   // Stack overflow checking
 766   void bang_stack_with_offset(int offset) {
 767     // stack grows down, caller passes positive offset
 768     assert(offset > 0, "must bang with negative offset");
 769     movl(Address(rsp, (-offset)), rax);
 770   }
 771 
 772   // Writes to stack successive pages until offset reached to check for
 773   // stack overflow + shadow pages.  Also, clobbers tmp
 774   void bang_stack_size(Register size, Register tmp);
 775 
 776   // Check for reserved stack access in method being exited (for JIT)
 777   void reserved_stack_check();
 778 
 779   void safepoint_poll(Label& slow_path, Register thread_reg, bool at_return, bool in_nmethod);
 780 
 781   void verify_tlab();
 782 
 783   static Condition negate_condition(Condition cond);
 784 
 785   // Instructions that use AddressLiteral operands. These instruction can handle 32bit/64bit
 786   // operands. In general the names are modified to avoid hiding the instruction in Assembler
 787   // so that we don't need to implement all the varieties in the Assembler with trivial wrappers
 788   // here in MacroAssembler. The major exception to this rule is call
 789 
 790   // Arithmetics
 791 
 792 
 793   void addptr(Address dst, int32_t src) { LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)) ; }
 794   void addptr(Address dst, Register src);
 795 
 796   void addptr(Register dst, Address src) { LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); }
 797   void addptr(Register dst, int32_t src);
 798   void addptr(Register dst, Register src);
 799   void addptr(Register dst, RegisterOrConstant src) {
 800     if (src.is_constant()) addptr(dst, checked_cast<int>(src.as_constant()));
 801     else                   addptr(dst, src.as_register());
 802   }
 803 
 804   void andptr(Register dst, int32_t src);
 805   void andptr(Register src1, Register src2) { LP64_ONLY(andq(src1, src2)) NOT_LP64(andl(src1, src2)) ; }
 806 
 807 #ifdef _LP64
 808   using Assembler::andq;
 809   void andq(Register dst, AddressLiteral src, Register rscratch = noreg);
 810 #endif
 811 
 812   void cmp8(AddressLiteral src1, int imm, Register rscratch = noreg);
 813 
 814   // renamed to drag out the casting of address to int32_t/intptr_t
 815   void cmp32(Register src1, int32_t imm);
 816 
 817   void cmp32(AddressLiteral src1, int32_t imm, Register rscratch = noreg);
 818   // compare reg - mem, or reg - &mem
 819   void cmp32(Register src1, AddressLiteral src2, Register rscratch = noreg);
 820 
 821   void cmp32(Register src1, Address src2);
 822 
 823 #ifndef _LP64
 824   void cmpklass(Address dst, Metadata* obj);
 825   void cmpklass(Register dst, Metadata* obj);
 826   void cmpoop(Address dst, jobject obj);
 827 #endif // _LP64
 828 
 829   void cmpoop(Register src1, Register src2);
 830   void cmpoop(Register src1, Address src2);
 831   void cmpoop(Register dst, jobject obj, Register rscratch);
 832 
 833   // NOTE src2 must be the lval. This is NOT an mem-mem compare
 834   void cmpptr(Address src1, AddressLiteral src2, Register rscratch);
 835 
 836   void cmpptr(Register src1, AddressLiteral src2, Register rscratch = noreg);
 837 
 838   void cmpptr(Register src1, Register src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; }
 839   void cmpptr(Register src1, Address src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; }
 840   // void cmpptr(Address src1, Register src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; }
 841 
 842   void cmpptr(Register src1, int32_t src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; }
 843   void cmpptr(Address src1, int32_t src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; }
 844 
 845   // cmp64 to avoild hiding cmpq
 846   void cmp64(Register src1, AddressLiteral src, Register rscratch = noreg);
 847 
 848   void cmpxchgptr(Register reg, Address adr);
 849 
 850   void locked_cmpxchgptr(Register reg, AddressLiteral adr, Register rscratch = noreg);
 851 
 852   void imulptr(Register dst, Register src) { LP64_ONLY(imulq(dst, src)) NOT_LP64(imull(dst, src)); }
 853   void imulptr(Register dst, Register src, int imm32) { LP64_ONLY(imulq(dst, src, imm32)) NOT_LP64(imull(dst, src, imm32)); }
 854 
 855 
 856   void negptr(Register dst) { LP64_ONLY(negq(dst)) NOT_LP64(negl(dst)); }
 857 
 858   void notptr(Register dst) { LP64_ONLY(notq(dst)) NOT_LP64(notl(dst)); }
 859 
 860   void shlptr(Register dst, int32_t shift);
 861   void shlptr(Register dst) { LP64_ONLY(shlq(dst)) NOT_LP64(shll(dst)); }
 862 
 863   void shrptr(Register dst, int32_t shift);
 864   void shrptr(Register dst) { LP64_ONLY(shrq(dst)) NOT_LP64(shrl(dst)); }
 865 
 866   void sarptr(Register dst) { LP64_ONLY(sarq(dst)) NOT_LP64(sarl(dst)); }
 867   void sarptr(Register dst, int32_t src) { LP64_ONLY(sarq(dst, src)) NOT_LP64(sarl(dst, src)); }
 868 
 869   void subptr(Address dst, int32_t src) { LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); }
 870 
 871   void subptr(Register dst, Address src) { LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); }
 872   void subptr(Register dst, int32_t src);
 873   // Force generation of a 4 byte immediate value even if it fits into 8bit
 874   void subptr_imm32(Register dst, int32_t src);
 875   void subptr(Register dst, Register src);
 876   void subptr(Register dst, RegisterOrConstant src) {
 877     if (src.is_constant()) subptr(dst, (int) src.as_constant());
 878     else                   subptr(dst,       src.as_register());
 879   }
 880 
 881   void sbbptr(Address dst, int32_t src) { LP64_ONLY(sbbq(dst, src)) NOT_LP64(sbbl(dst, src)); }
 882   void sbbptr(Register dst, int32_t src) { LP64_ONLY(sbbq(dst, src)) NOT_LP64(sbbl(dst, src)); }
 883 
 884   void xchgptr(Register src1, Register src2) { LP64_ONLY(xchgq(src1, src2)) NOT_LP64(xchgl(src1, src2)) ; }
 885   void xchgptr(Register src1, Address src2) { LP64_ONLY(xchgq(src1, src2)) NOT_LP64(xchgl(src1, src2)) ; }
 886 
 887   void xaddptr(Address src1, Register src2) { LP64_ONLY(xaddq(src1, src2)) NOT_LP64(xaddl(src1, src2)) ; }
 888 
 889 
 890 
 891   // Helper functions for statistics gathering.
 892   // Conditionally (atomically, on MPs) increments passed counter address, preserving condition codes.
 893   void cond_inc32(Condition cond, AddressLiteral counter_addr, Register rscratch = noreg);
 894   // Unconditional atomic increment.
 895   void atomic_incl(Address counter_addr);
 896   void atomic_incl(AddressLiteral counter_addr, Register rscratch = noreg);
 897 #ifdef _LP64
 898   void atomic_incq(Address counter_addr);
 899   void atomic_incq(AddressLiteral counter_addr, Register rscratch = noreg);
 900 #endif
 901   void atomic_incptr(AddressLiteral counter_addr, Register rscratch = noreg) { LP64_ONLY(atomic_incq(counter_addr, rscratch)) NOT_LP64(atomic_incl(counter_addr, rscratch)) ; }
 902   void atomic_incptr(Address counter_addr) { LP64_ONLY(atomic_incq(counter_addr)) NOT_LP64(atomic_incl(counter_addr)) ; }
 903 
 904   using Assembler::lea;
 905   void lea(Register dst, AddressLiteral adr);
 906   void lea(Address  dst, AddressLiteral adr, Register rscratch);
 907 
 908   void leal32(Register dst, Address src) { leal(dst, src); }
 909 
 910   // Import other testl() methods from the parent class or else
 911   // they will be hidden by the following overriding declaration.
 912   using Assembler::testl;
 913   void testl(Address dst, int32_t imm32);
 914   void testl(Register dst, int32_t imm32);
 915   void testl(Register dst, AddressLiteral src); // requires reachable address
 916   using Assembler::testq;
 917   void testq(Address dst, int32_t imm32);
 918   void testq(Register dst, int32_t imm32);
 919 
 920   void orptr(Register dst, Address src) { LP64_ONLY(orq(dst, src)) NOT_LP64(orl(dst, src)); }
 921   void orptr(Register dst, Register src) { LP64_ONLY(orq(dst, src)) NOT_LP64(orl(dst, src)); }
 922   void orptr(Register dst, int32_t src) { LP64_ONLY(orq(dst, src)) NOT_LP64(orl(dst, src)); }
 923   void orptr(Address dst, int32_t imm32) { LP64_ONLY(orq(dst, imm32)) NOT_LP64(orl(dst, imm32)); }
 924 
 925   void testptr(Register src, int32_t imm32) {  LP64_ONLY(testq(src, imm32)) NOT_LP64(testl(src, imm32)); }
 926   void testptr(Register src1, Address src2) { LP64_ONLY(testq(src1, src2)) NOT_LP64(testl(src1, src2)); }
 927   void testptr(Address src, int32_t imm32) {  LP64_ONLY(testq(src, imm32)) NOT_LP64(testl(src, imm32)); }
 928   void testptr(Register src1, Register src2);
 929 
 930   void xorptr(Register dst, Register src) { LP64_ONLY(xorq(dst, src)) NOT_LP64(xorl(dst, src)); }
 931   void xorptr(Register dst, Address src) { LP64_ONLY(xorq(dst, src)) NOT_LP64(xorl(dst, src)); }
 932 
 933   // Calls
 934 
 935   void call(Label& L, relocInfo::relocType rtype);
 936   void call(Register entry);
 937   void call(Address addr) { Assembler::call(addr); }
 938 
 939   // NOTE: this call transfers to the effective address of entry NOT
 940   // the address contained by entry. This is because this is more natural
 941   // for jumps/calls.
 942   void call(AddressLiteral entry, Register rscratch = rax);
 943 
 944   // Emit the CompiledIC call idiom
 945   void ic_call(address entry, jint method_index = 0);
 946   static int ic_check_size();
 947   int ic_check(int end_alignment);
 948 
 949   void emit_static_call_stub();
 950 
 951   // Jumps
 952 
 953   // NOTE: these jumps transfer to the effective address of dst NOT
 954   // the address contained by dst. This is because this is more natural
 955   // for jumps/calls.
 956   void jump(AddressLiteral dst, Register rscratch = noreg);
 957 
 958   void jump_cc(Condition cc, AddressLiteral dst, Register rscratch = noreg);
 959 
 960   // 32bit can do a case table jump in one instruction but we no longer allow the base
 961   // to be installed in the Address class. This jump will transfer to the address
 962   // contained in the location described by entry (not the address of entry)
 963   void jump(ArrayAddress entry, Register rscratch);
 964 
 965   // Adding more natural conditional jump instructions
 966   void ALWAYSINLINE jo(Label& L, bool maybe_short = true) { jcc(Assembler::overflow, L, maybe_short); }
 967   void ALWAYSINLINE jno(Label& L, bool maybe_short = true) { jcc(Assembler::noOverflow, L, maybe_short); }
 968   void ALWAYSINLINE js(Label& L, bool maybe_short = true) { jcc(Assembler::negative, L, maybe_short); }
 969   void ALWAYSINLINE jns(Label& L, bool maybe_short = true) { jcc(Assembler::positive, L, maybe_short); }
 970   void ALWAYSINLINE je(Label& L, bool maybe_short = true) { jcc(Assembler::equal, L, maybe_short); }
 971   void ALWAYSINLINE jz(Label& L, bool maybe_short = true) { jcc(Assembler::zero, L, maybe_short); }
 972   void ALWAYSINLINE jne(Label& L, bool maybe_short = true) { jcc(Assembler::notEqual, L, maybe_short); }
 973   void ALWAYSINLINE jnz(Label& L, bool maybe_short = true) { jcc(Assembler::notZero, L, maybe_short); }
 974   void ALWAYSINLINE jb(Label& L, bool maybe_short = true) { jcc(Assembler::below, L, maybe_short); }
 975   void ALWAYSINLINE jnae(Label& L, bool maybe_short = true) { jcc(Assembler::below, L, maybe_short); }
 976   void ALWAYSINLINE jc(Label& L, bool maybe_short = true) { jcc(Assembler::carrySet, L, maybe_short); }
 977   void ALWAYSINLINE jnb(Label& L, bool maybe_short = true) { jcc(Assembler::aboveEqual, L, maybe_short); }
 978   void ALWAYSINLINE jae(Label& L, bool maybe_short = true) { jcc(Assembler::aboveEqual, L, maybe_short); }
 979   void ALWAYSINLINE jnc(Label& L, bool maybe_short = true) { jcc(Assembler::carryClear, L, maybe_short); }
 980   void ALWAYSINLINE jbe(Label& L, bool maybe_short = true) { jcc(Assembler::belowEqual, L, maybe_short); }
 981   void ALWAYSINLINE jna(Label& L, bool maybe_short = true) { jcc(Assembler::belowEqual, L, maybe_short); }
 982   void ALWAYSINLINE ja(Label& L, bool maybe_short = true) { jcc(Assembler::above, L, maybe_short); }
 983   void ALWAYSINLINE jnbe(Label& L, bool maybe_short = true) { jcc(Assembler::above, L, maybe_short); }
 984   void ALWAYSINLINE jl(Label& L, bool maybe_short = true) { jcc(Assembler::less, L, maybe_short); }
 985   void ALWAYSINLINE jnge(Label& L, bool maybe_short = true) { jcc(Assembler::less, L, maybe_short); }
 986   void ALWAYSINLINE jge(Label& L, bool maybe_short = true) { jcc(Assembler::greaterEqual, L, maybe_short); }
 987   void ALWAYSINLINE jnl(Label& L, bool maybe_short = true) { jcc(Assembler::greaterEqual, L, maybe_short); }
 988   void ALWAYSINLINE jle(Label& L, bool maybe_short = true) { jcc(Assembler::lessEqual, L, maybe_short); }
 989   void ALWAYSINLINE jng(Label& L, bool maybe_short = true) { jcc(Assembler::lessEqual, L, maybe_short); }
 990   void ALWAYSINLINE jg(Label& L, bool maybe_short = true) { jcc(Assembler::greater, L, maybe_short); }
 991   void ALWAYSINLINE jnle(Label& L, bool maybe_short = true) { jcc(Assembler::greater, L, maybe_short); }
 992   void ALWAYSINLINE jp(Label& L, bool maybe_short = true) { jcc(Assembler::parity, L, maybe_short); }
 993   void ALWAYSINLINE jpe(Label& L, bool maybe_short = true) { jcc(Assembler::parity, L, maybe_short); }
 994   void ALWAYSINLINE jnp(Label& L, bool maybe_short = true) { jcc(Assembler::noParity, L, maybe_short); }
 995   void ALWAYSINLINE jpo(Label& L, bool maybe_short = true) { jcc(Assembler::noParity, L, maybe_short); }
 996   // * No condition for this *  void ALWAYSINLINE jcxz(Label& L, bool maybe_short = true) { jcc(Assembler::cxz, L, maybe_short); }
 997   // * No condition for this *  void ALWAYSINLINE jecxz(Label& L, bool maybe_short = true) { jcc(Assembler::cxz, L, maybe_short); }
 998 
 999   // Short versions of the above
1000   void ALWAYSINLINE jo_b(Label& L) { jccb(Assembler::overflow, L); }
1001   void ALWAYSINLINE jno_b(Label& L) { jccb(Assembler::noOverflow, L); }
1002   void ALWAYSINLINE js_b(Label& L) { jccb(Assembler::negative, L); }
1003   void ALWAYSINLINE jns_b(Label& L) { jccb(Assembler::positive, L); }
1004   void ALWAYSINLINE je_b(Label& L) { jccb(Assembler::equal, L); }
1005   void ALWAYSINLINE jz_b(Label& L) { jccb(Assembler::zero, L); }
1006   void ALWAYSINLINE jne_b(Label& L) { jccb(Assembler::notEqual, L); }
1007   void ALWAYSINLINE jnz_b(Label& L) { jccb(Assembler::notZero, L); }
1008   void ALWAYSINLINE jb_b(Label& L) { jccb(Assembler::below, L); }
1009   void ALWAYSINLINE jnae_b(Label& L) { jccb(Assembler::below, L); }
1010   void ALWAYSINLINE jc_b(Label& L) { jccb(Assembler::carrySet, L); }
1011   void ALWAYSINLINE jnb_b(Label& L) { jccb(Assembler::aboveEqual, L); }
1012   void ALWAYSINLINE jae_b(Label& L) { jccb(Assembler::aboveEqual, L); }
1013   void ALWAYSINLINE jnc_b(Label& L) { jccb(Assembler::carryClear, L); }
1014   void ALWAYSINLINE jbe_b(Label& L) { jccb(Assembler::belowEqual, L); }
1015   void ALWAYSINLINE jna_b(Label& L) { jccb(Assembler::belowEqual, L); }
1016   void ALWAYSINLINE ja_b(Label& L) { jccb(Assembler::above, L); }
1017   void ALWAYSINLINE jnbe_b(Label& L) { jccb(Assembler::above, L); }
1018   void ALWAYSINLINE jl_b(Label& L) { jccb(Assembler::less, L); }
1019   void ALWAYSINLINE jnge_b(Label& L) { jccb(Assembler::less, L); }
1020   void ALWAYSINLINE jge_b(Label& L) { jccb(Assembler::greaterEqual, L); }
1021   void ALWAYSINLINE jnl_b(Label& L) { jccb(Assembler::greaterEqual, L); }
1022   void ALWAYSINLINE jle_b(Label& L) { jccb(Assembler::lessEqual, L); }
1023   void ALWAYSINLINE jng_b(Label& L) { jccb(Assembler::lessEqual, L); }
1024   void ALWAYSINLINE jg_b(Label& L) { jccb(Assembler::greater, L); }
1025   void ALWAYSINLINE jnle_b(Label& L) { jccb(Assembler::greater, L); }
1026   void ALWAYSINLINE jp_b(Label& L) { jccb(Assembler::parity, L); }
1027   void ALWAYSINLINE jpe_b(Label& L) { jccb(Assembler::parity, L); }
1028   void ALWAYSINLINE jnp_b(Label& L) { jccb(Assembler::noParity, L); }
1029   void ALWAYSINLINE jpo_b(Label& L) { jccb(Assembler::noParity, L); }
1030   // * No condition for this *  void ALWAYSINLINE jcxz_b(Label& L) { jccb(Assembler::cxz, L); }
1031   // * No condition for this *  void ALWAYSINLINE jecxz_b(Label& L) { jccb(Assembler::cxz, L); }
1032 
1033   // Floating
1034 
1035   void push_f(XMMRegister r);
1036   void pop_f(XMMRegister r);
1037   void push_d(XMMRegister r);
1038   void pop_d(XMMRegister r);
1039 
1040   void andpd(XMMRegister dst, XMMRegister    src) { Assembler::andpd(dst, src); }
1041   void andpd(XMMRegister dst, Address        src) { Assembler::andpd(dst, src); }
1042   void andpd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
1043 
1044   void andps(XMMRegister dst, XMMRegister    src) { Assembler::andps(dst, src); }
1045   void andps(XMMRegister dst, Address        src) { Assembler::andps(dst, src); }
1046   void andps(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
1047 
1048   void comiss(XMMRegister dst, XMMRegister    src) { Assembler::comiss(dst, src); }
1049   void comiss(XMMRegister dst, Address        src) { Assembler::comiss(dst, src); }
1050   void comiss(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
1051 
1052   void comisd(XMMRegister dst, XMMRegister    src) { Assembler::comisd(dst, src); }
1053   void comisd(XMMRegister dst, Address        src) { Assembler::comisd(dst, src); }
1054   void comisd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
1055 
1056 #ifndef _LP64
1057   void fadd_s(Address        src) { Assembler::fadd_s(src); }
1058   void fadd_s(AddressLiteral src) { Assembler::fadd_s(as_Address(src)); }
1059 
1060   void fldcw(Address        src) { Assembler::fldcw(src); }
1061   void fldcw(AddressLiteral src);
1062 
1063   void fld_s(int index)          { Assembler::fld_s(index); }
1064   void fld_s(Address        src) { Assembler::fld_s(src); }
1065   void fld_s(AddressLiteral src);
1066 
1067   void fld_d(Address        src) { Assembler::fld_d(src); }
1068   void fld_d(AddressLiteral src);
1069 
1070   void fld_x(Address        src) { Assembler::fld_x(src); }
1071   void fld_x(AddressLiteral src) { Assembler::fld_x(as_Address(src)); }
1072 
1073   void fmul_s(Address        src) { Assembler::fmul_s(src); }
1074   void fmul_s(AddressLiteral src) { Assembler::fmul_s(as_Address(src)); }
1075 #endif // !_LP64
1076 
1077   void ldmxcsr(Address src) { Assembler::ldmxcsr(src); }
1078   void ldmxcsr(AddressLiteral src, Register rscratch = noreg);
1079 
1080 #ifdef _LP64
1081  private:
1082   void sha256_AVX2_one_round_compute(
1083     Register  reg_old_h,
1084     Register  reg_a,
1085     Register  reg_b,
1086     Register  reg_c,
1087     Register  reg_d,
1088     Register  reg_e,
1089     Register  reg_f,
1090     Register  reg_g,
1091     Register  reg_h,
1092     int iter);
1093   void sha256_AVX2_four_rounds_compute_first(int start);
1094   void sha256_AVX2_four_rounds_compute_last(int start);
1095   void sha256_AVX2_one_round_and_sched(
1096         XMMRegister xmm_0,     /* == ymm4 on 0, 1, 2, 3 iterations, then rotate 4 registers left on 4, 8, 12 iterations */
1097         XMMRegister xmm_1,     /* ymm5 */  /* full cycle is 16 iterations */
1098         XMMRegister xmm_2,     /* ymm6 */
1099         XMMRegister xmm_3,     /* ymm7 */
1100         Register    reg_a,      /* == eax on 0 iteration, then rotate 8 register right on each next iteration */
1101         Register    reg_b,      /* ebx */    /* full cycle is 8 iterations */
1102         Register    reg_c,      /* edi */
1103         Register    reg_d,      /* esi */
1104         Register    reg_e,      /* r8d */
1105         Register    reg_f,      /* r9d */
1106         Register    reg_g,      /* r10d */
1107         Register    reg_h,      /* r11d */
1108         int iter);
1109 
1110   void addm(int disp, Register r1, Register r2);
1111 
1112   void sha512_AVX2_one_round_compute(Register old_h, Register a, Register b, Register c, Register d,
1113                                      Register e, Register f, Register g, Register h, int iteration);
1114 
1115   void sha512_AVX2_one_round_and_schedule(XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
1116                                           Register a, Register b, Register c, Register d, Register e, Register f,
1117                                           Register g, Register h, int iteration);
1118 
1119   void addmq(int disp, Register r1, Register r2);
1120  public:
1121   void sha256_AVX2(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0,
1122                    XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4,
1123                    Register buf, Register state, Register ofs, Register limit, Register rsp,
1124                    bool multi_block, XMMRegister shuf_mask);
1125   void sha512_AVX2(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0,
1126                    XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4,
1127                    Register buf, Register state, Register ofs, Register limit, Register rsp, bool multi_block,
1128                    XMMRegister shuf_mask);
1129   void sha512_update_ni_x1(Register arg_hash, Register arg_msg, Register ofs, Register limit, bool multi_block);
1130 #endif // _LP64
1131 
1132   void fast_md5(Register buf, Address state, Address ofs, Address limit,
1133                 bool multi_block);
1134 
1135   void fast_sha1(XMMRegister abcd, XMMRegister e0, XMMRegister e1, XMMRegister msg0,
1136                  XMMRegister msg1, XMMRegister msg2, XMMRegister msg3, XMMRegister shuf_mask,
1137                  Register buf, Register state, Register ofs, Register limit, Register rsp,
1138                  bool multi_block);
1139 
1140 #ifdef _LP64
1141   void fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0,
1142                    XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4,
1143                    Register buf, Register state, Register ofs, Register limit, Register rsp,
1144                    bool multi_block, XMMRegister shuf_mask);
1145 #else
1146   void fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0,
1147                    XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4,
1148                    Register buf, Register state, Register ofs, Register limit, Register rsp,
1149                    bool multi_block);
1150 #endif
1151 
1152   void fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
1153                 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
1154                 Register rax, Register rcx, Register rdx, Register tmp);
1155 
1156 #ifndef _LP64
1157  private:
1158   // Initialized in macroAssembler_x86_constants.cpp
1159   static address ONES;
1160   static address L_2IL0FLOATPACKET_0;
1161   static address PI4_INV;
1162   static address PI4X3;
1163   static address PI4X4;
1164 
1165  public:
1166   void fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
1167                 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
1168                 Register rax, Register rcx, Register rdx, Register tmp1);
1169 
1170   void fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
1171                 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
1172                 Register rax, Register rcx, Register rdx, Register tmp);
1173 
1174   void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4,
1175                 XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx,
1176                 Register rdx, Register tmp);
1177 
1178   void fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
1179                 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
1180                 Register rax, Register rbx, Register rdx);
1181 
1182   void fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
1183                 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
1184                 Register rax, Register rcx, Register rdx, Register tmp);
1185 
1186   void libm_sincos_huge(XMMRegister xmm0, XMMRegister xmm1, Register eax, Register ecx,
1187                         Register edx, Register ebx, Register esi, Register edi,
1188                         Register ebp, Register esp);
1189 
1190   void libm_reduce_pi04l(Register eax, Register ecx, Register edx, Register ebx,
1191                          Register esi, Register edi, Register ebp, Register esp);
1192 
1193   void libm_tancot_huge(XMMRegister xmm0, XMMRegister xmm1, Register eax, Register ecx,
1194                         Register edx, Register ebx, Register esi, Register edi,
1195                         Register ebp, Register esp);
1196 
1197   void fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
1198                 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
1199                 Register rax, Register rcx, Register rdx, Register tmp);
1200 #endif // !_LP64
1201 
1202 private:
1203 
1204   // these are private because users should be doing movflt/movdbl
1205 
1206   void movss(Address     dst, XMMRegister    src) { Assembler::movss(dst, src); }
1207   void movss(XMMRegister dst, XMMRegister    src) { Assembler::movss(dst, src); }
1208   void movss(XMMRegister dst, Address        src) { Assembler::movss(dst, src); }
1209   void movss(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
1210 
1211   void movlpd(XMMRegister dst, Address        src) {Assembler::movlpd(dst, src); }
1212   void movlpd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
1213 
1214 public:
1215 
1216   void addsd(XMMRegister dst, XMMRegister    src) { Assembler::addsd(dst, src); }
1217   void addsd(XMMRegister dst, Address        src) { Assembler::addsd(dst, src); }
1218   void addsd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
1219 
1220   void addss(XMMRegister dst, XMMRegister    src) { Assembler::addss(dst, src); }
1221   void addss(XMMRegister dst, Address        src) { Assembler::addss(dst, src); }
1222   void addss(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
1223 
1224   void addpd(XMMRegister dst, XMMRegister    src) { Assembler::addpd(dst, src); }
1225   void addpd(XMMRegister dst, Address        src) { Assembler::addpd(dst, src); }
1226   void addpd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
1227 
1228   using Assembler::vbroadcasti128;
1229   void vbroadcasti128(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = noreg);
1230 
1231   using Assembler::vbroadcastsd;
1232   void vbroadcastsd(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = noreg);
1233 
1234   using Assembler::vbroadcastss;
1235   void vbroadcastss(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = noreg);
1236 
1237   // Vector float blend
1238   void vblendvps(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len, bool compute_mask = true, XMMRegister scratch = xnoreg);
1239   void vblendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len, bool compute_mask = true, XMMRegister scratch = xnoreg);
1240 
1241   void divsd(XMMRegister dst, XMMRegister    src) { Assembler::divsd(dst, src); }
1242   void divsd(XMMRegister dst, Address        src) { Assembler::divsd(dst, src); }
1243   void divsd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
1244 
1245   void divss(XMMRegister dst, XMMRegister    src) { Assembler::divss(dst, src); }
1246   void divss(XMMRegister dst, Address        src) { Assembler::divss(dst, src); }
1247   void divss(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
1248 
1249   // Move Unaligned Double Quadword
1250   void movdqu(Address     dst, XMMRegister    src);
1251   void movdqu(XMMRegister dst, XMMRegister    src);
1252   void movdqu(XMMRegister dst, Address        src);
1253   void movdqu(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
1254 
1255   void kmovwl(Register  dst, KRegister      src) { Assembler::kmovwl(dst, src); }
1256   void kmovwl(Address   dst, KRegister      src) { Assembler::kmovwl(dst, src); }
1257   void kmovwl(KRegister dst, KRegister      src) { Assembler::kmovwl(dst, src); }
1258   void kmovwl(KRegister dst, Register       src) { Assembler::kmovwl(dst, src); }
1259   void kmovwl(KRegister dst, Address        src) { Assembler::kmovwl(dst, src); }
1260   void kmovwl(KRegister dst, AddressLiteral src, Register rscratch = noreg);
1261 
1262   void kmovql(KRegister dst, KRegister      src) { Assembler::kmovql(dst, src); }
1263   void kmovql(KRegister dst, Register       src) { Assembler::kmovql(dst, src); }
1264   void kmovql(Register  dst, KRegister      src) { Assembler::kmovql(dst, src); }
1265   void kmovql(KRegister dst, Address        src) { Assembler::kmovql(dst, src); }
1266   void kmovql(Address   dst, KRegister      src) { Assembler::kmovql(dst, src); }
1267   void kmovql(KRegister dst, AddressLiteral src, Register rscratch = noreg);
1268 
1269   // Safe move operation, lowers down to 16bit moves for targets supporting
1270   // AVX512F feature and 64bit moves for targets supporting AVX512BW feature.
1271   void kmov(Address  dst, KRegister src);
1272   void kmov(KRegister dst, Address src);
1273   void kmov(KRegister dst, KRegister src);
1274   void kmov(Register dst, KRegister src);
1275   void kmov(KRegister dst, Register src);
1276 
1277   using Assembler::movddup;
1278   void movddup(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
1279 
1280   using Assembler::vmovddup;
1281   void vmovddup(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = noreg);
1282 
1283   // AVX Unaligned forms
1284   void vmovdqu(Address     dst, XMMRegister    src);
1285   void vmovdqu(XMMRegister dst, Address        src);
1286   void vmovdqu(XMMRegister dst, XMMRegister    src);
1287   void vmovdqu(XMMRegister dst, AddressLiteral src,                 Register rscratch = noreg);
1288   void vmovdqu(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = noreg);
1289 
1290   // AVX512 Unaligned
1291   void evmovdqu(BasicType type, KRegister kmask, Address     dst, XMMRegister src, bool merge, int vector_len);
1292   void evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address     src, bool merge, int vector_len);
1293   void evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, XMMRegister src, bool merge, int vector_len);
1294 
1295   void evmovdqub(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::evmovdqub(dst, src, vector_len); }
1296   void evmovdqub(XMMRegister dst, Address     src, int vector_len) { Assembler::evmovdqub(dst, src, vector_len); }
1297 
1298   void evmovdqub(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
1299     if (dst->encoding() != src->encoding() || mask != k0)  {
1300       Assembler::evmovdqub(dst, mask, src, merge, vector_len);
1301     }
1302   }
1303   void evmovdqub(Address     dst, KRegister mask, XMMRegister    src, bool merge, int vector_len) { Assembler::evmovdqub(dst, mask, src, merge, vector_len); }
1304   void evmovdqub(XMMRegister dst, KRegister mask, Address        src, bool merge, int vector_len) { Assembler::evmovdqub(dst, mask, src, merge, vector_len); }
1305   void evmovdqub(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register rscratch = noreg);
1306 
1307   void evmovdquw(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::evmovdquw(dst, src, vector_len); }
1308   void evmovdquw(Address     dst, XMMRegister src, int vector_len) { Assembler::evmovdquw(dst, src, vector_len); }
1309   void evmovdquw(XMMRegister dst, Address     src, int vector_len) { Assembler::evmovdquw(dst, src, vector_len); }
1310 
1311   void evmovdquw(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
1312     if (dst->encoding() != src->encoding() || mask != k0) {
1313       Assembler::evmovdquw(dst, mask, src, merge, vector_len);
1314     }
1315   }
1316   void evmovdquw(XMMRegister dst, KRegister mask, Address        src, bool merge, int vector_len) { Assembler::evmovdquw(dst, mask, src, merge, vector_len); }
1317   void evmovdquw(Address     dst, KRegister mask, XMMRegister    src, bool merge, int vector_len) { Assembler::evmovdquw(dst, mask, src, merge, vector_len); }
1318   void evmovdquw(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register rscratch = noreg);
1319 
1320   void evmovdqul(XMMRegister dst, XMMRegister src, int vector_len) {
1321      if (dst->encoding() != src->encoding()) {
1322        Assembler::evmovdqul(dst, src, vector_len);
1323      }
1324   }
1325   void evmovdqul(Address     dst, XMMRegister src, int vector_len) { Assembler::evmovdqul(dst, src, vector_len); }
1326   void evmovdqul(XMMRegister dst, Address     src, int vector_len) { Assembler::evmovdqul(dst, src, vector_len); }
1327 
1328   void evmovdqul(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
1329     if (dst->encoding() != src->encoding() || mask != k0)  {
1330       Assembler::evmovdqul(dst, mask, src, merge, vector_len);
1331     }
1332   }
1333   void evmovdqul(Address     dst, KRegister mask, XMMRegister    src, bool merge, int vector_len) { Assembler::evmovdqul(dst, mask, src, merge, vector_len); }
1334   void evmovdqul(XMMRegister dst, KRegister mask, Address        src, bool merge, int vector_len) { Assembler::evmovdqul(dst, mask, src, merge, vector_len); }
1335   void evmovdqul(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register rscratch = noreg);
1336 
1337   void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) {
1338     if (dst->encoding() != src->encoding()) {
1339       Assembler::evmovdquq(dst, src, vector_len);
1340     }
1341   }
1342   void evmovdquq(XMMRegister dst, Address        src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); }
1343   void evmovdquq(Address     dst, XMMRegister    src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); }
1344   void evmovdquq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = noreg);
1345 
1346   void evmovdquq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
1347     if (dst->encoding() != src->encoding() || mask != k0) {
1348       Assembler::evmovdquq(dst, mask, src, merge, vector_len);
1349     }
1350   }
1351   void evmovdquq(Address     dst, KRegister mask, XMMRegister    src, bool merge, int vector_len) { Assembler::evmovdquq(dst, mask, src, merge, vector_len); }
1352   void evmovdquq(XMMRegister dst, KRegister mask, Address        src, bool merge, int vector_len) { Assembler::evmovdquq(dst, mask, src, merge, vector_len); }
1353   void evmovdquq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register rscratch = noreg);
1354 
1355   // Move Aligned Double Quadword
1356   void movdqa(XMMRegister dst, XMMRegister    src) { Assembler::movdqa(dst, src); }
1357   void movdqa(XMMRegister dst, Address        src) { Assembler::movdqa(dst, src); }
1358   void movdqa(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
1359 
1360   void movsd(Address     dst, XMMRegister    src) { Assembler::movsd(dst, src); }
1361   void movsd(XMMRegister dst, XMMRegister    src) { Assembler::movsd(dst, src); }
1362   void movsd(XMMRegister dst, Address        src) { Assembler::movsd(dst, src); }
1363   void movsd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
1364 
1365   void mulpd(XMMRegister dst, XMMRegister    src) { Assembler::mulpd(dst, src); }
1366   void mulpd(XMMRegister dst, Address        src) { Assembler::mulpd(dst, src); }
1367   void mulpd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
1368 
1369   void mulsd(XMMRegister dst, XMMRegister    src) { Assembler::mulsd(dst, src); }
1370   void mulsd(XMMRegister dst, Address        src) { Assembler::mulsd(dst, src); }
1371   void mulsd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
1372 
1373   void mulss(XMMRegister dst, XMMRegister    src) { Assembler::mulss(dst, src); }
1374   void mulss(XMMRegister dst, Address        src) { Assembler::mulss(dst, src); }
1375   void mulss(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
1376 
1377   // Carry-Less Multiplication Quadword
1378   void pclmulldq(XMMRegister dst, XMMRegister src) {
1379     // 0x00 - multiply lower 64 bits [0:63]
1380     Assembler::pclmulqdq(dst, src, 0x00);
1381   }
1382   void pclmulhdq(XMMRegister dst, XMMRegister src) {
1383     // 0x11 - multiply upper 64 bits [64:127]
1384     Assembler::pclmulqdq(dst, src, 0x11);
1385   }
1386 
1387   void pcmpeqb(XMMRegister dst, XMMRegister src);
1388   void pcmpeqw(XMMRegister dst, XMMRegister src);
1389 
1390   void pcmpestri(XMMRegister dst, Address src, int imm8);
1391   void pcmpestri(XMMRegister dst, XMMRegister src, int imm8);
1392 
1393   void pmovzxbw(XMMRegister dst, XMMRegister src);
1394   void pmovzxbw(XMMRegister dst, Address src);
1395 
1396   void pmovmskb(Register dst, XMMRegister src);
1397 
1398   void ptest(XMMRegister dst, XMMRegister src);
1399 
1400   void roundsd(XMMRegister dst, XMMRegister    src, int32_t rmode) { Assembler::roundsd(dst, src, rmode); }
1401   void roundsd(XMMRegister dst, Address        src, int32_t rmode) { Assembler::roundsd(dst, src, rmode); }
1402   void roundsd(XMMRegister dst, AddressLiteral src, int32_t rmode, Register rscratch = noreg);
1403 
1404   void sqrtss(XMMRegister dst, XMMRegister     src) { Assembler::sqrtss(dst, src); }
1405   void sqrtss(XMMRegister dst, Address         src) { Assembler::sqrtss(dst, src); }
1406   void sqrtss(XMMRegister dst, AddressLiteral  src, Register rscratch = noreg);
1407 
1408   void subsd(XMMRegister dst, XMMRegister    src) { Assembler::subsd(dst, src); }
1409   void subsd(XMMRegister dst, Address        src) { Assembler::subsd(dst, src); }
1410   void subsd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
1411 
1412   void subss(XMMRegister dst, XMMRegister    src) { Assembler::subss(dst, src); }
1413   void subss(XMMRegister dst, Address        src) { Assembler::subss(dst, src); }
1414   void subss(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
1415 
1416   void ucomiss(XMMRegister dst, XMMRegister    src) { Assembler::ucomiss(dst, src); }
1417   void ucomiss(XMMRegister dst, Address        src) { Assembler::ucomiss(dst, src); }
1418   void ucomiss(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
1419 
1420   void ucomisd(XMMRegister dst, XMMRegister    src) { Assembler::ucomisd(dst, src); }
1421   void ucomisd(XMMRegister dst, Address        src) { Assembler::ucomisd(dst, src); }
1422   void ucomisd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
1423 
1424   // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values
1425   void xorpd(XMMRegister dst, XMMRegister    src);
1426   void xorpd(XMMRegister dst, Address        src) { Assembler::xorpd(dst, src); }
1427   void xorpd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
1428 
1429   // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values
1430   void xorps(XMMRegister dst, XMMRegister    src);
1431   void xorps(XMMRegister dst, Address        src) { Assembler::xorps(dst, src); }
1432   void xorps(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
1433 
1434   // Shuffle Bytes
1435   void pshufb(XMMRegister dst, XMMRegister    src) { Assembler::pshufb(dst, src); }
1436   void pshufb(XMMRegister dst, Address        src) { Assembler::pshufb(dst, src); }
1437   void pshufb(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
1438   // AVX 3-operands instructions
1439 
1440   void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister    src) { Assembler::vaddsd(dst, nds, src); }
1441   void vaddsd(XMMRegister dst, XMMRegister nds, Address        src) { Assembler::vaddsd(dst, nds, src); }
1442   void vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src, Register rscratch = noreg);
1443 
1444   void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister    src) { Assembler::vaddss(dst, nds, src); }
1445   void vaddss(XMMRegister dst, XMMRegister nds, Address        src) { Assembler::vaddss(dst, nds, src); }
1446   void vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src, Register rscratch = noreg);
1447 
1448   void vabsss(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len, Register rscratch = noreg);
1449   void vabssd(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len, Register rscratch = noreg);
1450 
1451   void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister    src, int vector_len);
1452   void vpaddb(XMMRegister dst, XMMRegister nds, Address        src, int vector_len);
1453   void vpaddb(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg);
1454 
1455   void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1456   void vpaddw(XMMRegister dst, XMMRegister nds, Address     src, int vector_len);
1457 
1458   void vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister    src, int vector_len) { Assembler::vpaddd(dst, nds, src, vector_len); }
1459   void vpaddd(XMMRegister dst, XMMRegister nds, Address        src, int vector_len) { Assembler::vpaddd(dst, nds, src, vector_len); }
1460   void vpaddd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg);
1461 
1462   void vpand(XMMRegister dst, XMMRegister nds, XMMRegister    src, int vector_len) { Assembler::vpand(dst, nds, src, vector_len); }
1463   void vpand(XMMRegister dst, XMMRegister nds, Address        src, int vector_len) { Assembler::vpand(dst, nds, src, vector_len); }
1464   void vpand(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg);
1465 
1466   using Assembler::vpbroadcastd;
1467   void vpbroadcastd(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = noreg);
1468 
1469   using Assembler::vpbroadcastq;
1470   void vpbroadcastq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = noreg);
1471 
1472   void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1473   void vpcmpeqb(XMMRegister dst, XMMRegister src1, Address src2, int vector_len);
1474 
1475   void vpcmpeqw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1476   void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1477   void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg);
1478 
1479   // Vector compares
1480   void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister    src, int comparison, bool is_signed, int vector_len) {
1481     Assembler::evpcmpd(kdst, mask, nds, src, comparison, is_signed, vector_len);
1482   }
1483   void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, int comparison, bool is_signed, int vector_len, Register rscratch = noreg);
1484 
1485   void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister    src, int comparison, bool is_signed, int vector_len) {
1486     Assembler::evpcmpq(kdst, mask, nds, src, comparison, is_signed, vector_len);
1487   }
1488   void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, int comparison, bool is_signed, int vector_len, Register rscratch = noreg);
1489 
1490   void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister    src, int comparison, bool is_signed, int vector_len) {
1491     Assembler::evpcmpb(kdst, mask, nds, src, comparison, is_signed, vector_len);
1492   }
1493   void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, int comparison, bool is_signed, int vector_len, Register rscratch = noreg);
1494 
1495   void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister    src, int comparison, bool is_signed, int vector_len) {
1496     Assembler::evpcmpw(kdst, mask, nds, src, comparison, is_signed, vector_len);
1497   }
1498   void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, int comparison, bool is_signed, int vector_len, Register rscratch = noreg);
1499 
1500   void evpbroadcast(BasicType type, XMMRegister dst, Register src, int vector_len);
1501 
1502   // Emit comparison instruction for the specified comparison predicate.
1503   void vpcmpCCW(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister xtmp, ComparisonPredicate cond, Width width, int vector_len);
1504   void vpcmpCC(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, Width width, int vector_len);
1505 
1506   void vpmovzxbw(XMMRegister dst, Address     src, int vector_len);
1507   void vpmovzxbw(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::vpmovzxbw(dst, src, vector_len); }
1508 
1509   void vpmovmskb(Register dst, XMMRegister src, int vector_len = Assembler::AVX_256bit);
1510 
1511   void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1512   void vpmullw(XMMRegister dst, XMMRegister nds, Address     src, int vector_len);
1513 
1514   void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister    src, int vector_len) { Assembler::vpmulld(dst, nds, src, vector_len); }
1515   void vpmulld(XMMRegister dst, XMMRegister nds, Address        src, int vector_len) { Assembler::vpmulld(dst, nds, src, vector_len); }
1516   void vpmulld(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg);
1517 
1518   void vpmuldq(XMMRegister dst, XMMRegister nds, XMMRegister    src, int vector_len) { Assembler::vpmuldq(dst, nds, src, vector_len); }
1519 
1520   void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1521   void vpsubb(XMMRegister dst, XMMRegister nds, Address     src, int vector_len);
1522 
1523   void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1524   void vpsubw(XMMRegister dst, XMMRegister nds, Address     src, int vector_len);
1525 
1526   void vpsraw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len);
1527   void vpsraw(XMMRegister dst, XMMRegister nds, int         shift, int vector_len);
1528 
1529   void evpsrad(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len);
1530   void evpsrad(XMMRegister dst, XMMRegister nds, int         shift, int vector_len);
1531 
1532   void evpsraq(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len);
1533   void evpsraq(XMMRegister dst, XMMRegister nds, int         shift, int vector_len);
1534 
1535   using Assembler::evpsllw;
1536   void evpsllw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
1537     if (!is_varshift) {
1538       Assembler::evpsllw(dst, mask, nds, src, merge, vector_len);
1539     } else {
1540       Assembler::evpsllvw(dst, mask, nds, src, merge, vector_len);
1541     }
1542   }
1543   void evpslld(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
1544     if (!is_varshift) {
1545       Assembler::evpslld(dst, mask, nds, src, merge, vector_len);
1546     } else {
1547       Assembler::evpsllvd(dst, mask, nds, src, merge, vector_len);
1548     }
1549   }
1550   void evpsllq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
1551     if (!is_varshift) {
1552       Assembler::evpsllq(dst, mask, nds, src, merge, vector_len);
1553     } else {
1554       Assembler::evpsllvq(dst, mask, nds, src, merge, vector_len);
1555     }
1556   }
1557   void evpsrlw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
1558     if (!is_varshift) {
1559       Assembler::evpsrlw(dst, mask, nds, src, merge, vector_len);
1560     } else {
1561       Assembler::evpsrlvw(dst, mask, nds, src, merge, vector_len);
1562     }
1563   }
1564   void evpsrld(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
1565     if (!is_varshift) {
1566       Assembler::evpsrld(dst, mask, nds, src, merge, vector_len);
1567     } else {
1568       Assembler::evpsrlvd(dst, mask, nds, src, merge, vector_len);
1569     }
1570   }
1571 
1572   using Assembler::evpsrlq;
1573   void evpsrlq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
1574     if (!is_varshift) {
1575       Assembler::evpsrlq(dst, mask, nds, src, merge, vector_len);
1576     } else {
1577       Assembler::evpsrlvq(dst, mask, nds, src, merge, vector_len);
1578     }
1579   }
1580   using Assembler::evpsraw;
1581   void evpsraw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
1582     if (!is_varshift) {
1583       Assembler::evpsraw(dst, mask, nds, src, merge, vector_len);
1584     } else {
1585       Assembler::evpsravw(dst, mask, nds, src, merge, vector_len);
1586     }
1587   }
1588   using Assembler::evpsrad;
1589   void evpsrad(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
1590     if (!is_varshift) {
1591       Assembler::evpsrad(dst, mask, nds, src, merge, vector_len);
1592     } else {
1593       Assembler::evpsravd(dst, mask, nds, src, merge, vector_len);
1594     }
1595   }
1596   using Assembler::evpsraq;
1597   void evpsraq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
1598     if (!is_varshift) {
1599       Assembler::evpsraq(dst, mask, nds, src, merge, vector_len);
1600     } else {
1601       Assembler::evpsravq(dst, mask, nds, src, merge, vector_len);
1602     }
1603   }
1604 
1605   void evpmins(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
1606   void evpmaxs(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
1607   void evpmins(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
1608   void evpmaxs(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
1609 
1610   void evpminu(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
1611   void evpmaxu(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
1612   void evpminu(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
1613   void evpmaxu(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
1614 
1615   void vpsrlw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len);
1616   void vpsrlw(XMMRegister dst, XMMRegister nds, int shift, int vector_len);
1617 
1618   void vpsllw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len);
1619   void vpsllw(XMMRegister dst, XMMRegister nds, int shift, int vector_len);
1620 
1621   void vptest(XMMRegister dst, XMMRegister src);
1622   void vptest(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::vptest(dst, src, vector_len); }
1623 
1624   void punpcklbw(XMMRegister dst, XMMRegister src);
1625   void punpcklbw(XMMRegister dst, Address src) { Assembler::punpcklbw(dst, src); }
1626 
1627   void pshufd(XMMRegister dst, Address src, int mode);
1628   void pshufd(XMMRegister dst, XMMRegister src, int mode) { Assembler::pshufd(dst, src, mode); }
1629 
1630   void pshuflw(XMMRegister dst, XMMRegister src, int mode);
1631   void pshuflw(XMMRegister dst, Address src, int mode) { Assembler::pshuflw(dst, src, mode); }
1632 
1633   void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister    src, int vector_len) { Assembler::vandpd(dst, nds, src, vector_len); }
1634   void vandpd(XMMRegister dst, XMMRegister nds, Address        src, int vector_len) { Assembler::vandpd(dst, nds, src, vector_len); }
1635   void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg);
1636 
1637   void vandps(XMMRegister dst, XMMRegister nds, XMMRegister    src, int vector_len) { Assembler::vandps(dst, nds, src, vector_len); }
1638   void vandps(XMMRegister dst, XMMRegister nds, Address        src, int vector_len) { Assembler::vandps(dst, nds, src, vector_len); }
1639   void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg);
1640 
1641   void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, AddressLiteral src, bool merge, int vector_len, Register rscratch = noreg);
1642 
1643   void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister    src) { Assembler::vdivsd(dst, nds, src); }
1644   void vdivsd(XMMRegister dst, XMMRegister nds, Address        src) { Assembler::vdivsd(dst, nds, src); }
1645   void vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src, Register rscratch = noreg);
1646 
1647   void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister    src) { Assembler::vdivss(dst, nds, src); }
1648   void vdivss(XMMRegister dst, XMMRegister nds, Address        src) { Assembler::vdivss(dst, nds, src); }
1649   void vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src, Register rscratch = noreg);
1650 
1651   void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister    src) { Assembler::vmulsd(dst, nds, src); }
1652   void vmulsd(XMMRegister dst, XMMRegister nds, Address        src) { Assembler::vmulsd(dst, nds, src); }
1653   void vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src, Register rscratch = noreg);
1654 
1655   void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister    src) { Assembler::vmulss(dst, nds, src); }
1656   void vmulss(XMMRegister dst, XMMRegister nds, Address        src) { Assembler::vmulss(dst, nds, src); }
1657   void vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src, Register rscratch = noreg);
1658 
1659   void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister    src) { Assembler::vsubsd(dst, nds, src); }
1660   void vsubsd(XMMRegister dst, XMMRegister nds, Address        src) { Assembler::vsubsd(dst, nds, src); }
1661   void vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src, Register rscratch = noreg);
1662 
1663   void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister    src) { Assembler::vsubss(dst, nds, src); }
1664   void vsubss(XMMRegister dst, XMMRegister nds, Address        src) { Assembler::vsubss(dst, nds, src); }
1665   void vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src, Register rscratch = noreg);
1666 
1667   void vnegatess(XMMRegister dst, XMMRegister nds, AddressLiteral src, Register rscratch = noreg);
1668   void vnegatesd(XMMRegister dst, XMMRegister nds, AddressLiteral src, Register rscratch = noreg);
1669 
1670   // AVX Vector instructions
1671 
1672   void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister    src, int vector_len) { Assembler::vxorpd(dst, nds, src, vector_len); }
1673   void vxorpd(XMMRegister dst, XMMRegister nds, Address        src, int vector_len) { Assembler::vxorpd(dst, nds, src, vector_len); }
1674   void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg);
1675 
1676   void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister    src, int vector_len) { Assembler::vxorps(dst, nds, src, vector_len); }
1677   void vxorps(XMMRegister dst, XMMRegister nds, Address        src, int vector_len) { Assembler::vxorps(dst, nds, src, vector_len); }
1678   void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg);
1679 
1680   void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
1681     if (UseAVX > 1 || (vector_len < 1)) // vpxor 256 bit is available only in AVX2
1682       Assembler::vpxor(dst, nds, src, vector_len);
1683     else
1684       Assembler::vxorpd(dst, nds, src, vector_len);
1685   }
1686   void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
1687     if (UseAVX > 1 || (vector_len < 1)) // vpxor 256 bit is available only in AVX2
1688       Assembler::vpxor(dst, nds, src, vector_len);
1689     else
1690       Assembler::vxorpd(dst, nds, src, vector_len);
1691   }
1692   void vpxor(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg);
1693 
1694   // Simple version for AVX2 256bit vectors
1695   void vpxor(XMMRegister dst, XMMRegister src) {
1696     assert(UseAVX >= 2, "Should be at least AVX2");
1697     Assembler::vpxor(dst, dst, src, AVX_256bit);
1698   }
1699   void vpxor(XMMRegister dst, Address src) {
1700     assert(UseAVX >= 2, "Should be at least AVX2");
1701     Assembler::vpxor(dst, dst, src, AVX_256bit);
1702   }
1703 
1704   void vpermd(XMMRegister dst, XMMRegister nds, XMMRegister    src, int vector_len) { Assembler::vpermd(dst, nds, src, vector_len); }
1705   void vpermd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg);
1706 
1707   void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
1708     if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
1709       Assembler::vinserti32x4(dst, nds, src, imm8);
1710     } else if (UseAVX > 1) {
1711       // vinserti128 is available only in AVX2
1712       Assembler::vinserti128(dst, nds, src, imm8);
1713     } else {
1714       Assembler::vinsertf128(dst, nds, src, imm8);
1715     }
1716   }
1717 
1718   void vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
1719     if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
1720       Assembler::vinserti32x4(dst, nds, src, imm8);
1721     } else if (UseAVX > 1) {
1722       // vinserti128 is available only in AVX2
1723       Assembler::vinserti128(dst, nds, src, imm8);
1724     } else {
1725       Assembler::vinsertf128(dst, nds, src, imm8);
1726     }
1727   }
1728 
1729   void vextracti128(XMMRegister dst, XMMRegister src, uint8_t imm8) {
1730     if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
1731       Assembler::vextracti32x4(dst, src, imm8);
1732     } else if (UseAVX > 1) {
1733       // vextracti128 is available only in AVX2
1734       Assembler::vextracti128(dst, src, imm8);
1735     } else {
1736       Assembler::vextractf128(dst, src, imm8);
1737     }
1738   }
1739 
1740   void vextracti128(Address dst, XMMRegister src, uint8_t imm8) {
1741     if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
1742       Assembler::vextracti32x4(dst, src, imm8);
1743     } else if (UseAVX > 1) {
1744       // vextracti128 is available only in AVX2
1745       Assembler::vextracti128(dst, src, imm8);
1746     } else {
1747       Assembler::vextractf128(dst, src, imm8);
1748     }
1749   }
1750 
1751   // 128bit copy to/from high 128 bits of 256bit (YMM) vector registers
1752   void vinserti128_high(XMMRegister dst, XMMRegister src) {
1753     vinserti128(dst, dst, src, 1);
1754   }
1755   void vinserti128_high(XMMRegister dst, Address src) {
1756     vinserti128(dst, dst, src, 1);
1757   }
1758   void vextracti128_high(XMMRegister dst, XMMRegister src) {
1759     vextracti128(dst, src, 1);
1760   }
1761   void vextracti128_high(Address dst, XMMRegister src) {
1762     vextracti128(dst, src, 1);
1763   }
1764 
1765   void vinsertf128_high(XMMRegister dst, XMMRegister src) {
1766     if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
1767       Assembler::vinsertf32x4(dst, dst, src, 1);
1768     } else {
1769       Assembler::vinsertf128(dst, dst, src, 1);
1770     }
1771   }
1772 
1773   void vinsertf128_high(XMMRegister dst, Address src) {
1774     if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
1775       Assembler::vinsertf32x4(dst, dst, src, 1);
1776     } else {
1777       Assembler::vinsertf128(dst, dst, src, 1);
1778     }
1779   }
1780 
1781   void vextractf128_high(XMMRegister dst, XMMRegister src) {
1782     if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
1783       Assembler::vextractf32x4(dst, src, 1);
1784     } else {
1785       Assembler::vextractf128(dst, src, 1);
1786     }
1787   }
1788 
1789   void vextractf128_high(Address dst, XMMRegister src) {
1790     if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
1791       Assembler::vextractf32x4(dst, src, 1);
1792     } else {
1793       Assembler::vextractf128(dst, src, 1);
1794     }
1795   }
1796 
1797   // 256bit copy to/from high 256 bits of 512bit (ZMM) vector registers
1798   void vinserti64x4_high(XMMRegister dst, XMMRegister src) {
1799     Assembler::vinserti64x4(dst, dst, src, 1);
1800   }
1801   void vinsertf64x4_high(XMMRegister dst, XMMRegister src) {
1802     Assembler::vinsertf64x4(dst, dst, src, 1);
1803   }
1804   void vextracti64x4_high(XMMRegister dst, XMMRegister src) {
1805     Assembler::vextracti64x4(dst, src, 1);
1806   }
1807   void vextractf64x4_high(XMMRegister dst, XMMRegister src) {
1808     Assembler::vextractf64x4(dst, src, 1);
1809   }
1810   void vextractf64x4_high(Address dst, XMMRegister src) {
1811     Assembler::vextractf64x4(dst, src, 1);
1812   }
1813   void vinsertf64x4_high(XMMRegister dst, Address src) {
1814     Assembler::vinsertf64x4(dst, dst, src, 1);
1815   }
1816 
1817   // 128bit copy to/from low 128 bits of 256bit (YMM) vector registers
1818   void vinserti128_low(XMMRegister dst, XMMRegister src) {
1819     vinserti128(dst, dst, src, 0);
1820   }
1821   void vinserti128_low(XMMRegister dst, Address src) {
1822     vinserti128(dst, dst, src, 0);
1823   }
1824   void vextracti128_low(XMMRegister dst, XMMRegister src) {
1825     vextracti128(dst, src, 0);
1826   }
1827   void vextracti128_low(Address dst, XMMRegister src) {
1828     vextracti128(dst, src, 0);
1829   }
1830 
1831   void vinsertf128_low(XMMRegister dst, XMMRegister src) {
1832     if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
1833       Assembler::vinsertf32x4(dst, dst, src, 0);
1834     } else {
1835       Assembler::vinsertf128(dst, dst, src, 0);
1836     }
1837   }
1838 
1839   void vinsertf128_low(XMMRegister dst, Address src) {
1840     if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
1841       Assembler::vinsertf32x4(dst, dst, src, 0);
1842     } else {
1843       Assembler::vinsertf128(dst, dst, src, 0);
1844     }
1845   }
1846 
1847   void vextractf128_low(XMMRegister dst, XMMRegister src) {
1848     if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
1849       Assembler::vextractf32x4(dst, src, 0);
1850     } else {
1851       Assembler::vextractf128(dst, src, 0);
1852     }
1853   }
1854 
1855   void vextractf128_low(Address dst, XMMRegister src) {
1856     if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
1857       Assembler::vextractf32x4(dst, src, 0);
1858     } else {
1859       Assembler::vextractf128(dst, src, 0);
1860     }
1861   }
1862 
1863   // 256bit copy to/from low 256 bits of 512bit (ZMM) vector registers
1864   void vinserti64x4_low(XMMRegister dst, XMMRegister src) {
1865     Assembler::vinserti64x4(dst, dst, src, 0);
1866   }
1867   void vinsertf64x4_low(XMMRegister dst, XMMRegister src) {
1868     Assembler::vinsertf64x4(dst, dst, src, 0);
1869   }
1870   void vextracti64x4_low(XMMRegister dst, XMMRegister src) {
1871     Assembler::vextracti64x4(dst, src, 0);
1872   }
1873   void vextractf64x4_low(XMMRegister dst, XMMRegister src) {
1874     Assembler::vextractf64x4(dst, src, 0);
1875   }
1876   void vextractf64x4_low(Address dst, XMMRegister src) {
1877     Assembler::vextractf64x4(dst, src, 0);
1878   }
1879   void vinsertf64x4_low(XMMRegister dst, Address src) {
1880     Assembler::vinsertf64x4(dst, dst, src, 0);
1881   }
1882 
1883   // Carry-Less Multiplication Quadword
1884   void vpclmulldq(XMMRegister dst, XMMRegister nds, XMMRegister src) {
1885     // 0x00 - multiply lower 64 bits [0:63]
1886     Assembler::vpclmulqdq(dst, nds, src, 0x00);
1887   }
1888   void vpclmulhdq(XMMRegister dst, XMMRegister nds, XMMRegister src) {
1889     // 0x11 - multiply upper 64 bits [64:127]
1890     Assembler::vpclmulqdq(dst, nds, src, 0x11);
1891   }
1892   void vpclmullqhqdq(XMMRegister dst, XMMRegister nds, XMMRegister src) {
1893     // 0x10 - multiply nds[0:63] and src[64:127]
1894     Assembler::vpclmulqdq(dst, nds, src, 0x10);
1895   }
1896   void vpclmulhqlqdq(XMMRegister dst, XMMRegister nds, XMMRegister src) {
1897     //0x01 - multiply nds[64:127] and src[0:63]
1898     Assembler::vpclmulqdq(dst, nds, src, 0x01);
1899   }
1900 
1901   void evpclmulldq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
1902     // 0x00 - multiply lower 64 bits [0:63]
1903     Assembler::evpclmulqdq(dst, nds, src, 0x00, vector_len);
1904   }
1905   void evpclmulhdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
1906     // 0x11 - multiply upper 64 bits [64:127]
1907     Assembler::evpclmulqdq(dst, nds, src, 0x11, vector_len);
1908   }
1909 
1910   // AVX-512 mask operations.
1911   void kand(BasicType etype, KRegister dst, KRegister src1, KRegister src2);
1912   void kor(BasicType type, KRegister dst, KRegister src1, KRegister src2);
1913   void knot(uint masklen, KRegister dst, KRegister src, KRegister ktmp = knoreg, Register rtmp = noreg);
1914   void kxor(BasicType type, KRegister dst, KRegister src1, KRegister src2);
1915   void kortest(uint masklen, KRegister src1, KRegister src2);
1916   void ktest(uint masklen, KRegister src1, KRegister src2);
1917 
1918   void evperm(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
1919   void evperm(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
1920 
1921   void evor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
1922   void evor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
1923 
1924   void evand(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
1925   void evand(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
1926 
1927   void evxor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
1928   void evxor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
1929 
1930   void evrold(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vlen_enc);
1931   void evrold(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vlen_enc);
1932   void evrord(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vlen_enc);
1933   void evrord(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vlen_enc);
1934 
1935   using Assembler::evpandq;
1936   void evpandq(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg);
1937 
1938   using Assembler::evpaddq;
1939   void evpaddq(XMMRegister dst, KRegister mask, XMMRegister nds, AddressLiteral src, bool merge, int vector_len, Register rscratch = noreg);
1940 
1941   using Assembler::evporq;
1942   void evporq(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg);
1943 
1944   using Assembler::vpshufb;
1945   void vpshufb(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg);
1946 
1947   using Assembler::vpor;
1948   void vpor(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg);
1949 
1950   using Assembler::vpternlogq;
1951   void vpternlogq(XMMRegister dst, int imm8, XMMRegister src2, AddressLiteral src3, int vector_len, Register rscratch = noreg);
1952 
1953   void cmov32( Condition cc, Register dst, Address  src);
1954   void cmov32( Condition cc, Register dst, Register src);
1955 
1956   void cmov(   Condition cc, Register dst, Register src) { cmovptr(cc, dst, src); }
1957 
1958   void cmovptr(Condition cc, Register dst, Address  src) { LP64_ONLY(cmovq(cc, dst, src)) NOT_LP64(cmov32(cc, dst, src)); }
1959   void cmovptr(Condition cc, Register dst, Register src) { LP64_ONLY(cmovq(cc, dst, src)) NOT_LP64(cmov32(cc, dst, src)); }
1960 
1961   void movoop(Register dst, jobject obj);
1962   void movoop(Address  dst, jobject obj, Register rscratch);
1963 
1964   void mov_metadata(Register dst, Metadata* obj);
1965   void mov_metadata(Address  dst, Metadata* obj, Register rscratch);
1966 
1967   void movptr(Register     dst, Register       src);
1968   void movptr(Register     dst, Address        src);
1969   void movptr(Register     dst, AddressLiteral src);
1970   void movptr(Register     dst, ArrayAddress   src);
1971   void movptr(Register     dst, intptr_t       src);
1972   void movptr(Address      dst, Register       src);
1973   void movptr(Address      dst, int32_t        imm);
1974   void movptr(Address      dst, intptr_t       src, Register rscratch);
1975   void movptr(ArrayAddress dst, Register       src, Register rscratch);
1976 
1977   void movptr(Register dst, RegisterOrConstant src) {
1978     if (src.is_constant()) movptr(dst, src.as_constant());
1979     else                   movptr(dst, src.as_register());
1980   }
1981 
1982 
1983   // to avoid hiding movl
1984   void mov32(Register       dst, AddressLiteral src);
1985   void mov32(AddressLiteral dst, Register        src, Register rscratch = noreg);
1986 
1987   // Import other mov() methods from the parent class or else
1988   // they will be hidden by the following overriding declaration.
1989   using Assembler::movdl;
1990   void movdl(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
1991 
1992   using Assembler::movq;
1993   void movq(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
1994 
1995   // Can push value or effective address
1996   void pushptr(AddressLiteral src, Register rscratch);
1997 
1998   void pushptr(Address src) { LP64_ONLY(pushq(src)) NOT_LP64(pushl(src)); }
1999   void popptr(Address src) { LP64_ONLY(popq(src)) NOT_LP64(popl(src)); }
2000 
2001   void pushoop(jobject obj, Register rscratch);
2002   void pushklass(Metadata* obj, Register rscratch);
2003 
2004   // sign extend as need a l to ptr sized element
2005   void movl2ptr(Register dst, Address src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(movl(dst, src)); }
2006   void movl2ptr(Register dst, Register src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(if (dst != src) movl(dst, src)); }
2007 
2008 
2009  public:
2010   // clear memory of size 'cnt' qwords, starting at 'base';
2011   // if 'is_large' is set, do not try to produce short loop
2012   void clear_mem(Register base, Register cnt, Register rtmp, XMMRegister xtmp, bool is_large, KRegister mask=knoreg);
2013 
2014   // clear memory initialization sequence for constant size;
2015   void clear_mem(Register base, int cnt, Register rtmp, XMMRegister xtmp, KRegister mask=knoreg);
2016 
2017   // clear memory of size 'cnt' qwords, starting at 'base' using XMM/YMM registers
2018   void xmm_clear_mem(Register base, Register cnt, Register rtmp, XMMRegister xtmp, KRegister mask=knoreg);
2019 
2020   // Fill primitive arrays
2021   void generate_fill(BasicType t, bool aligned,
2022                      Register to, Register value, Register count,
2023                      Register rtmp, XMMRegister xtmp);
2024 
2025   void encode_iso_array(Register src, Register dst, Register len,
2026                         XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3,
2027                         XMMRegister tmp4, Register tmp5, Register result, bool ascii);
2028 
2029 #ifdef _LP64
2030   void add2_with_carry(Register dest_hi, Register dest_lo, Register src1, Register src2);
2031   void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
2032                              Register y, Register y_idx, Register z,
2033                              Register carry, Register product,
2034                              Register idx, Register kdx);
2035   void multiply_add_128_x_128(Register x_xstart, Register y, Register z,
2036                               Register yz_idx, Register idx,
2037                               Register carry, Register product, int offset);
2038   void multiply_128_x_128_bmi2_loop(Register y, Register z,
2039                                     Register carry, Register carry2,
2040                                     Register idx, Register jdx,
2041                                     Register yz_idx1, Register yz_idx2,
2042                                     Register tmp, Register tmp3, Register tmp4);
2043   void multiply_128_x_128_loop(Register x_xstart, Register y, Register z,
2044                                Register yz_idx, Register idx, Register jdx,
2045                                Register carry, Register product,
2046                                Register carry2);
2047   void multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z, Register tmp0,
2048                        Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5);
2049   void square_rshift(Register x, Register len, Register z, Register tmp1, Register tmp3,
2050                      Register tmp4, Register tmp5, Register rdxReg, Register raxReg);
2051   void multiply_add_64_bmi2(Register sum, Register op1, Register op2, Register carry,
2052                             Register tmp2);
2053   void multiply_add_64(Register sum, Register op1, Register op2, Register carry,
2054                        Register rdxReg, Register raxReg);
2055   void add_one_64(Register z, Register zlen, Register carry, Register tmp1);
2056   void lshift_by_1(Register x, Register len, Register z, Register zlen, Register tmp1, Register tmp2,
2057                        Register tmp3, Register tmp4);
2058   void square_to_len(Register x, Register len, Register z, Register zlen, Register tmp1, Register tmp2,
2059                      Register tmp3, Register tmp4, Register tmp5, Register rdxReg, Register raxReg);
2060 
2061   void mul_add_128_x_32_loop(Register out, Register in, Register offset, Register len, Register tmp1,
2062                Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register rdxReg,
2063                Register raxReg);
2064   void mul_add(Register out, Register in, Register offset, Register len, Register k, Register tmp1,
2065                Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register rdxReg,
2066                Register raxReg);
2067   void vectorized_mismatch(Register obja, Register objb, Register length, Register log2_array_indxscale,
2068                            Register result, Register tmp1, Register tmp2,
2069                            XMMRegister vec1, XMMRegister vec2, XMMRegister vec3);
2070 #endif
2071 
2072   // CRC32 code for java.util.zip.CRC32::updateBytes() intrinsic.
2073   void update_byte_crc32(Register crc, Register val, Register table);
2074   void kernel_crc32(Register crc, Register buf, Register len, Register table, Register tmp);
2075 
2076 
2077 #ifdef _LP64
2078   void kernel_crc32_avx512(Register crc, Register buf, Register len, Register table, Register tmp1, Register tmp2);
2079   void kernel_crc32_avx512_256B(Register crc, Register buf, Register len, Register key, Register pos,
2080                                 Register tmp1, Register tmp2, Label& L_barrett, Label& L_16B_reduction_loop,
2081                                 Label& L_get_last_two_xmms, Label& L_128_done, Label& L_cleanup);
2082 #endif // _LP64
2083 
2084   // CRC32C code for java.util.zip.CRC32C::updateBytes() intrinsic
2085   // Note on a naming convention:
2086   // Prefix w = register only used on a Westmere+ architecture
2087   // Prefix n = register only used on a Nehalem architecture
2088 #ifdef _LP64
2089   void crc32c_ipl_alg4(Register in_out, uint32_t n,
2090                        Register tmp1, Register tmp2, Register tmp3);
2091 #else
2092   void crc32c_ipl_alg4(Register in_out, uint32_t n,
2093                        Register tmp1, Register tmp2, Register tmp3,
2094                        XMMRegister xtmp1, XMMRegister xtmp2);
2095 #endif
2096   void crc32c_pclmulqdq(XMMRegister w_xtmp1,
2097                         Register in_out,
2098                         uint32_t const_or_pre_comp_const_index, bool is_pclmulqdq_supported,
2099                         XMMRegister w_xtmp2,
2100                         Register tmp1,
2101                         Register n_tmp2, Register n_tmp3);
2102   void crc32c_rec_alt2(uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported, Register in_out, Register in1, Register in2,
2103                        XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
2104                        Register tmp1, Register tmp2,
2105                        Register n_tmp3);
2106   void crc32c_proc_chunk(uint32_t size, uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported,
2107                          Register in_out1, Register in_out2, Register in_out3,
2108                          Register tmp1, Register tmp2, Register tmp3,
2109                          XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
2110                          Register tmp4, Register tmp5,
2111                          Register n_tmp6);
2112   void crc32c_ipl_alg2_alt2(Register in_out, Register in1, Register in2,
2113                             Register tmp1, Register tmp2, Register tmp3,
2114                             Register tmp4, Register tmp5, Register tmp6,
2115                             XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
2116                             bool is_pclmulqdq_supported);
2117   // Fold 128-bit data chunk
2118   void fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, int offset);
2119   void fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, XMMRegister xbuf);
2120 #ifdef _LP64
2121   // Fold 512-bit data chunk
2122   void fold512bit_crc32_avx512(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, Register pos, int offset);
2123 #endif // _LP64
2124   // Fold 8-bit data
2125   void fold_8bit_crc32(Register crc, Register table, Register tmp);
2126   void fold_8bit_crc32(XMMRegister crc, Register table, XMMRegister xtmp, Register tmp);
2127 
2128   // Compress char[] array to byte[].
2129   void char_array_compress(Register src, Register dst, Register len,
2130                            XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3,
2131                            XMMRegister tmp4, Register tmp5, Register result,
2132                            KRegister mask1 = knoreg, KRegister mask2 = knoreg);
2133 
2134   // Inflate byte[] array to char[].
2135   void byte_array_inflate(Register src, Register dst, Register len,
2136                           XMMRegister tmp1, Register tmp2, KRegister mask = knoreg);
2137 
2138   void fill_masked(BasicType bt, Address dst, XMMRegister xmm, KRegister mask,
2139                    Register length, Register temp, int vec_enc);
2140 
2141   void fill64_masked(uint shift, Register dst, int disp,
2142                          XMMRegister xmm, KRegister mask, Register length,
2143                          Register temp, bool use64byteVector = false);
2144 
2145   void fill32_masked(uint shift, Register dst, int disp,
2146                          XMMRegister xmm, KRegister mask, Register length,
2147                          Register temp);
2148 
2149   void fill32(Address dst, XMMRegister xmm);
2150 
2151   void fill32(Register dst, int disp, XMMRegister xmm);
2152 
2153   void fill64(Address dst, XMMRegister xmm, bool use64byteVector = false);
2154 
2155   void fill64(Register dst, int dis, XMMRegister xmm, bool use64byteVector = false);
2156 
2157 #ifdef _LP64
2158   void convert_f2i(Register dst, XMMRegister src);
2159   void convert_d2i(Register dst, XMMRegister src);
2160   void convert_f2l(Register dst, XMMRegister src);
2161   void convert_d2l(Register dst, XMMRegister src);
2162   void round_double(Register dst, XMMRegister src, Register rtmp, Register rcx);
2163   void round_float(Register dst, XMMRegister src, Register rtmp, Register rcx);
2164 
2165   void cache_wb(Address line);
2166   void cache_wbsync(bool is_pre);
2167 
2168 #ifdef COMPILER2_OR_JVMCI
2169   void generate_fill_avx3(BasicType type, Register to, Register value,
2170                           Register count, Register rtmp, XMMRegister xtmp);
2171 #endif // COMPILER2_OR_JVMCI
2172 #endif // _LP64
2173 
2174   void vallones(XMMRegister dst, int vector_len);
2175 
2176   void check_stack_alignment(Register sp, const char* msg, unsigned bias = 0, Register tmp = noreg);
2177 
2178   void lightweight_lock(Register basic_lock, Register obj, Register reg_rax, Register thread, Register tmp, Label& slow);
2179   void lightweight_unlock(Register obj, Register reg_rax, Register thread, Register tmp, Label& slow);
2180 
2181 #ifdef _LP64
2182   void save_legacy_gprs();
2183   void restore_legacy_gprs();
2184   void setcc(Assembler::Condition comparison, Register dst);
2185 #endif
2186 };
2187 
2188 #endif // CPU_X86_MACROASSEMBLER_X86_HPP