< prev index next >

src/hotspot/cpu/x86/macroAssembler_x86.cpp

Print this page

  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "jvm.h"
  27 #include "asm/assembler.hpp"
  28 #include "asm/assembler.inline.hpp"
  29 #include "compiler/compiler_globals.hpp"
  30 #include "compiler/disassembler.hpp"

  31 #include "gc/shared/barrierSet.hpp"
  32 #include "gc/shared/barrierSetAssembler.hpp"
  33 #include "gc/shared/collectedHeap.inline.hpp"
  34 #include "gc/shared/tlab_globals.hpp"
  35 #include "interpreter/bytecodeHistogram.hpp"
  36 #include "interpreter/interpreter.hpp"
  37 #include "memory/resourceArea.hpp"
  38 #include "memory/universe.hpp"
  39 #include "oops/accessDecorators.hpp"
  40 #include "oops/compressedOops.inline.hpp"
  41 #include "oops/klass.inline.hpp"
  42 #include "prims/methodHandles.hpp"
  43 #include "runtime/flags/flagSetting.hpp"
  44 #include "runtime/interfaceSupport.inline.hpp"
  45 #include "runtime/jniHandles.hpp"
  46 #include "runtime/objectMonitor.hpp"
  47 #include "runtime/os.hpp"
  48 #include "runtime/safepoint.hpp"
  49 #include "runtime/safepointMechanism.hpp"
  50 #include "runtime/sharedRuntime.hpp"

  51 #include "runtime/stubRoutines.hpp"
  52 #include "runtime/thread.hpp"
  53 #include "utilities/macros.hpp"

  54 #include "crc32c.h"



  55 
  56 #ifdef PRODUCT
  57 #define BLOCK_COMMENT(str) /* nothing */
  58 #define STOP(error) stop(error)
  59 #else
  60 #define BLOCK_COMMENT(str) block_comment(str)
  61 #define STOP(error) block_comment(error); stop(error)
  62 #endif
  63 
  64 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
  65 
  66 #ifdef ASSERT
  67 bool AbstractAssembler::pd_check_instruction_mark() { return true; }
  68 #endif
  69 
  70 static Assembler::Condition reverse[] = {
  71     Assembler::noOverflow     /* overflow      = 0x0 */ ,
  72     Assembler::overflow       /* noOverflow    = 0x1 */ ,
  73     Assembler::aboveEqual     /* carrySet      = 0x2, below         = 0x2 */ ,
  74     Assembler::below          /* aboveEqual    = 0x3, carryClear    = 0x3 */ ,

1633 }
1634 
1635 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
1636 
1637   LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
1638   pass_arg1(this, arg_1);
1639   pass_arg0(this, arg_0);
1640   call_VM_leaf(entry_point, 2);
1641 }
1642 
1643 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
1644   LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"));
1645   LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
1646   pass_arg2(this, arg_2);
1647   LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
1648   pass_arg1(this, arg_1);
1649   pass_arg0(this, arg_0);
1650   call_VM_leaf(entry_point, 3);
1651 }
1652 




1653 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {
1654   pass_arg0(this, arg_0);
1655   MacroAssembler::call_VM_leaf_base(entry_point, 1);
1656 }
1657 
1658 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
1659 
1660   LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
1661   pass_arg1(this, arg_1);
1662   pass_arg0(this, arg_0);
1663   MacroAssembler::call_VM_leaf_base(entry_point, 2);
1664 }
1665 
1666 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
1667   LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"));
1668   LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
1669   pass_arg2(this, arg_2);
1670   LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
1671   pass_arg1(this, arg_1);
1672   pass_arg0(this, arg_0);

2709     lea(rscratch1, src);
2710     Assembler::mulss(dst, Address(rscratch1, 0));
2711   }
2712 }
2713 
2714 void MacroAssembler::null_check(Register reg, int offset) {
2715   if (needs_explicit_null_check(offset)) {
2716     // provoke OS NULL exception if reg = NULL by
2717     // accessing M[reg] w/o changing any (non-CC) registers
2718     // NOTE: cmpl is plenty here to provoke a segv
2719     cmpptr(rax, Address(reg, 0));
2720     // Note: should probably use testl(rax, Address(reg, 0));
2721     //       may be shorter code (however, this version of
2722     //       testl needs to be implemented first)
2723   } else {
2724     // nothing to do, (later) access of M[reg + offset]
2725     // will provoke OS NULL exception if reg = NULL
2726   }
2727 }
2728 












































































































































2729 void MacroAssembler::os_breakpoint() {
2730   // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
2731   // (e.g., MSVC can't call ps() otherwise)
2732   call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
2733 }
2734 
2735 void MacroAssembler::unimplemented(const char* what) {
2736   const char* buf = NULL;
2737   {
2738     ResourceMark rm;
2739     stringStream ss;
2740     ss.print("unimplemented: %s", what);
2741     buf = code_string(ss.as_string());
2742   }
2743   stop(buf);
2744 }
2745 
2746 #ifdef _LP64
2747 #define XSTATE_BV 0x200
2748 #endif

3535 }
3536 
3537 // C++ bool manipulation
3538 void MacroAssembler::testbool(Register dst) {
3539   if(sizeof(bool) == 1)
3540     testb(dst, 0xff);
3541   else if(sizeof(bool) == 2) {
3542     // testw implementation needed for two byte bools
3543     ShouldNotReachHere();
3544   } else if(sizeof(bool) == 4)
3545     testl(dst, dst);
3546   else
3547     // unsupported
3548     ShouldNotReachHere();
3549 }
3550 
3551 void MacroAssembler::testptr(Register dst, Register src) {
3552   LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src));
3553 }
3554 























































































































3555 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
3556 void MacroAssembler::tlab_allocate(Register thread, Register obj,
3557                                    Register var_size_in_bytes,
3558                                    int con_size_in_bytes,
3559                                    Register t1,
3560                                    Register t2,
3561                                    Label& slow_case) {
3562   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
3563   bs->tlab_allocate(this, thread, obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case);
3564 }
3565 
3566 // Defines obj, preserves var_size_in_bytes
3567 void MacroAssembler::eden_allocate(Register thread, Register obj,
3568                                    Register var_size_in_bytes,
3569                                    int con_size_in_bytes,
3570                                    Register t1,
3571                                    Label& slow_case) {
3572   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
3573   bs->eden_allocate(this, thread, obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case);
3574 }

3612     // clear topmost word (no jump would be needed if conditional assignment worked here)
3613     movptr(Address(address, index, Address::times_8, offset_in_bytes - 0*BytesPerWord), temp);
3614     // index could be 0 now, must check again
3615     jcc(Assembler::zero, done);
3616     bind(even);
3617   }
3618 #endif // !_LP64
3619   // initialize remaining object fields: index is a multiple of 2 now
3620   {
3621     Label loop;
3622     bind(loop);
3623     movptr(Address(address, index, Address::times_8, offset_in_bytes - 1*BytesPerWord), temp);
3624     NOT_LP64(movptr(Address(address, index, Address::times_8, offset_in_bytes - 2*BytesPerWord), temp);)
3625     decrement(index);
3626     jcc(Assembler::notZero, loop);
3627   }
3628 
3629   bind(done);
3630 }
3631 


















































3632 // Look up the method for a megamorphic invokeinterface call.
3633 // The target method is determined by <intf_klass, itable_index>.
3634 // The receiver klass is in recv_klass.
3635 // On success, the result will be in method_result, and execution falls through.
3636 // On failure, execution transfers to the given label.
3637 void MacroAssembler::lookup_interface_method(Register recv_klass,
3638                                              Register intf_klass,
3639                                              RegisterOrConstant itable_index,
3640                                              Register method_result,
3641                                              Register scan_temp,
3642                                              Label& L_no_such_interface,
3643                                              bool return_method) {
3644   assert_different_registers(recv_klass, intf_klass, scan_temp);
3645   assert_different_registers(method_result, intf_klass, scan_temp);
3646   assert(recv_klass != method_result || !return_method,
3647          "recv_klass can be destroyed when method isn't needed");
3648 
3649   assert(itable_index.is_constant() || itable_index.as_register() == method_result,
3650          "caller must use same register for non-constant itable index as for method");
3651 

3960   } else {
3961     Label L;
3962     jccb(negate_condition(cc), L);
3963     movl(dst, src);
3964     bind(L);
3965   }
3966 }
3967 
3968 void MacroAssembler::cmov32(Condition cc, Register dst, Register src) {
3969   if (VM_Version::supports_cmov()) {
3970     cmovl(cc, dst, src);
3971   } else {
3972     Label L;
3973     jccb(negate_condition(cc), L);
3974     movl(dst, src);
3975     bind(L);
3976   }
3977 }
3978 
3979 void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) {
3980   if (!VerifyOops) return;




3981 
3982   // Pass register number to verify_oop_subroutine
3983   const char* b = NULL;
3984   {
3985     ResourceMark rm;
3986     stringStream ss;
3987     ss.print("verify_oop: %s: %s (%s:%d)", reg->name(), s, file, line);
3988     b = code_string(ss.as_string());
3989   }
3990   BLOCK_COMMENT("verify_oop {");
3991 #ifdef _LP64
3992   push(rscratch1);                    // save r10, trashed by movptr()
3993 #endif
3994   push(rax);                          // save rax,
3995   push(reg);                          // pass register argument
3996   ExternalAddress buffer((address) b);
3997   // avoid using pushptr, as it modifies scratch registers
3998   // and our contract is not to modify anything
3999   movptr(rax, buffer.addr());
4000   push(rax);

4019   // cf. TemplateTable::prepare_invoke(), if (load_receiver).
4020   int stackElementSize = Interpreter::stackElementSize;
4021   int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
4022 #ifdef ASSERT
4023   int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
4024   assert(offset1 - offset == stackElementSize, "correct arithmetic");
4025 #endif
4026   Register             scale_reg    = noreg;
4027   Address::ScaleFactor scale_factor = Address::no_scale;
4028   if (arg_slot.is_constant()) {
4029     offset += arg_slot.as_constant() * stackElementSize;
4030   } else {
4031     scale_reg    = arg_slot.as_register();
4032     scale_factor = Address::times(stackElementSize);
4033   }
4034   offset += wordSize;           // return PC is on stack
4035   return Address(rsp, scale_reg, scale_factor, offset);
4036 }
4037 
4038 void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) {
4039   if (!VerifyOops) return;




4040 
4041   // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord);
4042   // Pass register number to verify_oop_subroutine
4043   const char* b = NULL;
4044   {
4045     ResourceMark rm;
4046     stringStream ss;
4047     ss.print("verify_oop_addr: %s (%s:%d)", s, file, line);
4048     b = code_string(ss.as_string());
4049   }
4050 #ifdef _LP64
4051   push(rscratch1);                    // save r10, trashed by movptr()
4052 #endif
4053   push(rax);                          // save rax,
4054   // addr may contain rsp so we will have to adjust it based on the push
4055   // we just did (and on 64 bit we do two pushes)
4056   // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which
4057   // stores rax into addr which is backwards of what was intended.
4058   if (addr.uses(rsp)) {
4059     lea(rax, addr);

4521 
4522 void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) {
4523   // get mirror
4524   const int mirror_offset = in_bytes(Klass::java_mirror_offset());
4525   load_method_holder(mirror, method);
4526   movptr(mirror, Address(mirror, mirror_offset));
4527   resolve_oop_handle(mirror, tmp);
4528 }
4529 
4530 void MacroAssembler::load_method_holder_cld(Register rresult, Register rmethod) {
4531   load_method_holder(rresult, rmethod);
4532   movptr(rresult, Address(rresult, InstanceKlass::class_loader_data_offset()));
4533 }
4534 
4535 void MacroAssembler::load_method_holder(Register holder, Register method) {
4536   movptr(holder, Address(method, Method::const_offset()));                      // ConstMethod*
4537   movptr(holder, Address(holder, ConstMethod::constants_offset()));             // ConstantPool*
4538   movptr(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); // InstanceKlass*
4539 }
4540 








4541 void MacroAssembler::load_klass(Register dst, Register src, Register tmp) {
4542   assert_different_registers(src, tmp);
4543   assert_different_registers(dst, tmp);
4544 #ifdef _LP64
4545   if (UseCompressedClassPointers) {
4546     movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
4547     decode_klass_not_null(dst, tmp);
4548   } else
4549 #endif
4550     movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));





4551 }
4552 
4553 void MacroAssembler::store_klass(Register dst, Register src, Register tmp) {
4554   assert_different_registers(src, tmp);
4555   assert_different_registers(dst, tmp);
4556 #ifdef _LP64
4557   if (UseCompressedClassPointers) {
4558     encode_klass_not_null(src, tmp);
4559     movl(Address(dst, oopDesc::klass_offset_in_bytes()), src);
4560   } else
4561 #endif
4562     movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src);
4563 }
4564 
4565 void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src,
4566                                     Register tmp1, Register thread_tmp) {
4567   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
4568   decorators = AccessInternal::decorator_fixup(decorators);
4569   bool as_raw = (decorators & AS_RAW) != 0;
4570   if (as_raw) {
4571     bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
4572   } else {
4573     bs->load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
4574   }
4575 }
4576 
4577 void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src,
4578                                      Register tmp1, Register tmp2) {
4579   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
4580   decorators = AccessInternal::decorator_fixup(decorators);
4581   bool as_raw = (decorators & AS_RAW) != 0;
4582   if (as_raw) {
4583     bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, tmp2);
4584   } else {
4585     bs->store_at(this, decorators, type, dst, src, tmp1, tmp2);
4586   }
4587 }
4588 








































4589 void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1,
4590                                    Register thread_tmp, DecoratorSet decorators) {
4591   access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp);
4592 }
4593 
4594 // Doesn't do verfication, generates fixed size code
4595 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1,
4596                                             Register thread_tmp, DecoratorSet decorators) {
4597   access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL | decorators, dst, src, tmp1, thread_tmp);
4598 }
4599 
4600 void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1,
4601                                     Register tmp2, DecoratorSet decorators) {
4602   access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2);
4603 }
4604 
4605 // Used for storing NULLs.
4606 void MacroAssembler::store_heap_oop_null(Address dst) {
4607   access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg);
4608 }
4609 
4610 #ifdef _LP64
4611 void MacroAssembler::store_klass_gap(Register dst, Register src) {
4612   if (UseCompressedClassPointers) {
4613     // Store to klass gap in destination
4614     movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src);
4615   }
4616 }
4617 
4618 #ifdef ASSERT
4619 void MacroAssembler::verify_heapbase(const char* msg) {
4620   assert (UseCompressedOops, "should be compressed");
4621   assert (Universe::heap() != NULL, "java heap should be initialized");
4622   if (CheckCompressedOops) {
4623     Label ok;
4624     push(rscratch1); // cmpptr trashes rscratch1
4625     cmpptr(r12_heapbase, ExternalAddress((address)CompressedOops::ptrs_base_addr()));
4626     jcc(Assembler::equal, ok);
4627     STOP(msg);

4901   RelocationHolder rspec = metadata_Relocation::spec(klass_index);
4902   Assembler::cmp_narrow_oop(dst, CompressedKlassPointers::encode(k), rspec);
4903 }
4904 
4905 void MacroAssembler::reinit_heapbase() {
4906   if (UseCompressedOops) {
4907     if (Universe::heap() != NULL) {
4908       if (CompressedOops::base() == NULL) {
4909         MacroAssembler::xorptr(r12_heapbase, r12_heapbase);
4910       } else {
4911         mov64(r12_heapbase, (int64_t)CompressedOops::ptrs_base());
4912       }
4913     } else {
4914       movptr(r12_heapbase, ExternalAddress((address)CompressedOops::ptrs_base_addr()));
4915     }
4916   }
4917 }
4918 
4919 #endif // _LP64
4920 

4921 // C2 compiled method's prolog code.
4922 void MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b, bool is_stub) {




4923 
4924   // WARNING: Initial instruction MUST be 5 bytes or longer so that
4925   // NativeJump::patch_verified_entry will be able to patch out the entry
4926   // code safely. The push to verify stack depth is ok at 5 bytes,
4927   // the frame allocation can be either 3 or 6 bytes. So if we don't do
4928   // stack bang then we must use the 6 byte frame allocation even if
4929   // we have no frame. :-(
4930   assert(stack_bang_size >= framesize || stack_bang_size <= 0, "stack bang size incorrect");
4931 
4932   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
4933   // Remove word for return addr
4934   framesize -= wordSize;
4935   stack_bang_size -= wordSize;
4936 
4937   // Calls to C2R adapters often do not accept exceptional returns.
4938   // We require that their callers must bang for them.  But be careful, because
4939   // some VM calls (such as call site linkage) can use several kilobytes of
4940   // stack.  But the stack safety zone should account for that.
4941   // See bugs 4446381, 4468289, 4497237.
4942   if (stack_bang_size > 0) {

4955     // Create frame
4956     if (framesize) {
4957       subptr(rsp, framesize);
4958     }
4959   } else {
4960     // Create frame (force generation of a 4 byte immediate value)
4961     subptr_imm32(rsp, framesize);
4962 
4963     // Save RBP register now.
4964     framesize -= wordSize;
4965     movptr(Address(rsp, framesize), rbp);
4966     // Save caller's stack pointer into RBP if the frame pointer is preserved.
4967     if (PreserveFramePointer) {
4968       movptr(rbp, rsp);
4969       if (framesize > 0) {
4970         addptr(rbp, framesize);
4971       }
4972     }
4973   }
4974 






4975   if (VerifyStackAtCalls) { // Majik cookie to verify stack depth
4976     framesize -= wordSize;
4977     movptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4978   }
4979 
4980 #ifndef _LP64
4981   // If method sets FPU control word do it now
4982   if (fp_mode_24b) {
4983     fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
4984   }
4985   if (UseSSE >= 2 && VerifyFPU) {
4986     verify_FPU(0, "FPU stack must be clean on entry");
4987   }
4988 #endif
4989 
4990 #ifdef ASSERT
4991   if (VerifyStackAtCalls) {
4992     Label L;
4993     push(rax);
4994     mov(rax, rsp);
4995     andptr(rax, StackAlignmentInBytes-1);
4996     cmpptr(rax, StackAlignmentInBytes-wordSize);
4997     pop(rax);
4998     jcc(Assembler::equal, L);
4999     STOP("Stack is not properly aligned!");
5000     bind(L);
5001   }
5002 #endif
5003 
5004   if (!is_stub) {
5005     BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
5006     bs->nmethod_entry_barrier(this);
5007   }
5008 }

5009 
5010 #if COMPILER2_OR_JVMCI
5011 
5012 // clear memory of size 'cnt' qwords, starting at 'base' using XMM/YMM/ZMM registers
5013 void MacroAssembler::xmm_clear_mem(Register base, Register cnt, Register rtmp, XMMRegister xtmp, KRegister mask) {
5014   // cnt - number of qwords (8-byte words).
5015   // base - start address, qword aligned.
5016   Label L_zero_64_bytes, L_loop, L_sloop, L_tail, L_end;
5017   bool use64byteVector = MaxVectorSize == 64 && AVX3Threshold == 0;
5018   if (use64byteVector) {
5019     vpxor(xtmp, xtmp, xtmp, AVX_512bit);
5020   } else if (MaxVectorSize >= 32) {
5021     vpxor(xtmp, xtmp, xtmp, AVX_256bit);


5022   } else {
5023     pxor(xtmp, xtmp);

5024   }
5025   jmp(L_zero_64_bytes);
5026 
5027   BIND(L_loop);
5028   if (MaxVectorSize >= 32) {
5029     fill64_avx(base, 0, xtmp, use64byteVector);
5030   } else {
5031     movdqu(Address(base,  0), xtmp);
5032     movdqu(Address(base, 16), xtmp);
5033     movdqu(Address(base, 32), xtmp);
5034     movdqu(Address(base, 48), xtmp);
5035   }
5036   addptr(base, 64);
5037 
5038   BIND(L_zero_64_bytes);
5039   subptr(cnt, 8);
5040   jccb(Assembler::greaterEqual, L_loop);
5041 
5042   // Copy trailing 64 bytes
5043   if (use64byteVector) {
5044     addptr(cnt, 8);
5045     jccb(Assembler::equal, L_end);
5046     fill64_masked_avx(3, base, 0, xtmp, mask, cnt, rtmp, true);
5047     jmp(L_end);
5048   } else {
5049     addptr(cnt, 4);
5050     jccb(Assembler::less, L_tail);
5051     if (MaxVectorSize >= 32) {
5052       vmovdqu(Address(base, 0), xtmp);
5053     } else {
5054       movdqu(Address(base,  0), xtmp);
5055       movdqu(Address(base, 16), xtmp);
5056     }
5057   }
5058   addptr(base, 32);
5059   subptr(cnt, 4);
5060 
5061   BIND(L_tail);
5062   addptr(cnt, 4);
5063   jccb(Assembler::lessEqual, L_end);
5064   if (UseAVX > 2 && MaxVectorSize >= 32 && VM_Version::supports_avx512vl()) {
5065     fill32_masked_avx(3, base, 0, xtmp, mask, cnt, rtmp);
5066   } else {
5067     decrement(cnt);
5068 
5069     BIND(L_sloop);
5070     movq(Address(base, 0), xtmp);
5071     addptr(base, 8);
5072     decrement(cnt);
5073     jccb(Assembler::greaterEqual, L_sloop);
5074   }
5075   BIND(L_end);
5076 }
5077 






























































































































































































































































































































5078 // Clearing constant sized memory using YMM/ZMM registers.
5079 void MacroAssembler::clear_mem(Register base, int cnt, Register rtmp, XMMRegister xtmp, KRegister mask) {
5080   assert(UseAVX > 2 && VM_Version::supports_avx512vlbw(), "");
5081   bool use64byteVector = MaxVectorSize > 32 && AVX3Threshold == 0;
5082 
5083   int vector64_count = (cnt & (~0x7)) >> 3;
5084   cnt = cnt & 0x7;
5085 
5086   // 64 byte initialization loop.
5087   vpxor(xtmp, xtmp, xtmp, use64byteVector ? AVX_512bit : AVX_256bit);
5088   for (int i = 0; i < vector64_count; i++) {
5089     fill64_avx(base, i * 64, xtmp, use64byteVector);
5090   }
5091 
5092   // Clear remaining 64 byte tail.
5093   int disp = vector64_count * 64;
5094   if (cnt) {
5095     switch (cnt) {
5096       case 1:
5097         movq(Address(base, disp), xtmp);

5129         break;
5130       case 7:
5131         if (use64byteVector) {
5132           movl(rtmp, 0x7F);
5133           kmovwl(mask, rtmp);
5134           evmovdqu(T_LONG, mask, Address(base, disp), xtmp, Assembler::AVX_512bit);
5135         } else {
5136           evmovdqu(T_LONG, k0, Address(base, disp), xtmp, Assembler::AVX_256bit);
5137           movl(rtmp, 0x7);
5138           kmovwl(mask, rtmp);
5139           evmovdqu(T_LONG, mask, Address(base, disp + 32), xtmp, Assembler::AVX_256bit);
5140         }
5141         break;
5142       default:
5143         fatal("Unexpected length : %d\n",cnt);
5144         break;
5145     }
5146   }
5147 }
5148 
5149 void MacroAssembler::clear_mem(Register base, Register cnt, Register tmp, XMMRegister xtmp,
5150                                bool is_large, KRegister mask) {
5151   // cnt      - number of qwords (8-byte words).
5152   // base     - start address, qword aligned.
5153   // is_large - if optimizers know cnt is larger than InitArrayShortSize
5154   assert(base==rdi, "base register must be edi for rep stos");
5155   assert(tmp==rax,   "tmp register must be eax for rep stos");
5156   assert(cnt==rcx,   "cnt register must be ecx for rep stos");
5157   assert(InitArrayShortSize % BytesPerLong == 0,
5158     "InitArrayShortSize should be the multiple of BytesPerLong");
5159 
5160   Label DONE;
5161   if (!is_large || !UseXMMForObjInit) {
5162     xorptr(tmp, tmp);
5163   }
5164 
5165   if (!is_large) {
5166     Label LOOP, LONG;
5167     cmpptr(cnt, InitArrayShortSize/BytesPerLong);
5168     jccb(Assembler::greater, LONG);
5169 
5170     NOT_LP64(shlptr(cnt, 1);) // convert to number of 32-bit words for 32-bit VM
5171 
5172     decrement(cnt);
5173     jccb(Assembler::negative, DONE); // Zero length
5174 
5175     // Use individual pointer-sized stores for small counts:
5176     BIND(LOOP);
5177     movptr(Address(base, cnt, Address::times_ptr), tmp);
5178     decrement(cnt);
5179     jccb(Assembler::greaterEqual, LOOP);
5180     jmpb(DONE);
5181 
5182     BIND(LONG);
5183   }
5184 
5185   // Use longer rep-prefixed ops for non-small counts:
5186   if (UseFastStosb) {
5187     shlptr(cnt, 3); // convert to number of bytes
5188     rep_stosb();
5189   } else if (UseXMMForObjInit) {
5190     xmm_clear_mem(base, cnt, tmp, xtmp, mask);
5191   } else {
5192     NOT_LP64(shlptr(cnt, 1);) // convert to number of 32-bit words for 32-bit VM
5193     rep_stos();
5194   }
5195 
5196   BIND(DONE);
5197 }
5198 
5199 #endif //COMPILER2_OR_JVMCI
5200 
5201 
5202 void MacroAssembler::generate_fill(BasicType t, bool aligned,
5203                                    Register to, Register value, Register count,
5204                                    Register rtmp, XMMRegister xtmp) {
5205   ShortBranchVerifier sbv(this);
5206   assert_different_registers(to, value, count, rtmp);
5207   Label L_exit;
5208   Label L_fill_2_bytes, L_fill_4_bytes;
5209 
5210   int shift = -1;

  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "jvm.h"
  27 #include "asm/assembler.hpp"
  28 #include "asm/assembler.inline.hpp"
  29 #include "compiler/compiler_globals.hpp"
  30 #include "compiler/disassembler.hpp"
  31 #include "ci/ciInlineKlass.hpp"
  32 #include "gc/shared/barrierSet.hpp"
  33 #include "gc/shared/barrierSetAssembler.hpp"
  34 #include "gc/shared/collectedHeap.inline.hpp"
  35 #include "gc/shared/tlab_globals.hpp"
  36 #include "interpreter/bytecodeHistogram.hpp"
  37 #include "interpreter/interpreter.hpp"
  38 #include "memory/resourceArea.hpp"
  39 #include "memory/universe.hpp"
  40 #include "oops/accessDecorators.hpp"
  41 #include "oops/compressedOops.inline.hpp"
  42 #include "oops/klass.inline.hpp"
  43 #include "prims/methodHandles.hpp"
  44 #include "runtime/flags/flagSetting.hpp"
  45 #include "runtime/interfaceSupport.inline.hpp"
  46 #include "runtime/jniHandles.hpp"
  47 #include "runtime/objectMonitor.hpp"
  48 #include "runtime/os.hpp"
  49 #include "runtime/safepoint.hpp"
  50 #include "runtime/safepointMechanism.hpp"
  51 #include "runtime/sharedRuntime.hpp"
  52 #include "runtime/signature_cc.hpp"
  53 #include "runtime/stubRoutines.hpp"
  54 #include "runtime/thread.hpp"
  55 #include "utilities/macros.hpp"
  56 #include "vmreg_x86.inline.hpp"
  57 #include "crc32c.h"
  58 #ifdef COMPILER2
  59 #include "opto/output.hpp"
  60 #endif
  61 
  62 #ifdef PRODUCT
  63 #define BLOCK_COMMENT(str) /* nothing */
  64 #define STOP(error) stop(error)
  65 #else
  66 #define BLOCK_COMMENT(str) block_comment(str)
  67 #define STOP(error) block_comment(error); stop(error)
  68 #endif
  69 
  70 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
  71 
  72 #ifdef ASSERT
  73 bool AbstractAssembler::pd_check_instruction_mark() { return true; }
  74 #endif
  75 
  76 static Assembler::Condition reverse[] = {
  77     Assembler::noOverflow     /* overflow      = 0x0 */ ,
  78     Assembler::overflow       /* noOverflow    = 0x1 */ ,
  79     Assembler::aboveEqual     /* carrySet      = 0x2, below         = 0x2 */ ,
  80     Assembler::below          /* aboveEqual    = 0x3, carryClear    = 0x3 */ ,

1639 }
1640 
1641 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
1642 
1643   LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
1644   pass_arg1(this, arg_1);
1645   pass_arg0(this, arg_0);
1646   call_VM_leaf(entry_point, 2);
1647 }
1648 
1649 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
1650   LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"));
1651   LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
1652   pass_arg2(this, arg_2);
1653   LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
1654   pass_arg1(this, arg_1);
1655   pass_arg0(this, arg_0);
1656   call_VM_leaf(entry_point, 3);
1657 }
1658 
1659 void MacroAssembler::super_call_VM_leaf(address entry_point) {
1660   MacroAssembler::call_VM_leaf_base(entry_point, 1);
1661 }
1662 
1663 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {
1664   pass_arg0(this, arg_0);
1665   MacroAssembler::call_VM_leaf_base(entry_point, 1);
1666 }
1667 
1668 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
1669 
1670   LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
1671   pass_arg1(this, arg_1);
1672   pass_arg0(this, arg_0);
1673   MacroAssembler::call_VM_leaf_base(entry_point, 2);
1674 }
1675 
1676 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
1677   LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"));
1678   LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
1679   pass_arg2(this, arg_2);
1680   LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
1681   pass_arg1(this, arg_1);
1682   pass_arg0(this, arg_0);

2719     lea(rscratch1, src);
2720     Assembler::mulss(dst, Address(rscratch1, 0));
2721   }
2722 }
2723 
2724 void MacroAssembler::null_check(Register reg, int offset) {
2725   if (needs_explicit_null_check(offset)) {
2726     // provoke OS NULL exception if reg = NULL by
2727     // accessing M[reg] w/o changing any (non-CC) registers
2728     // NOTE: cmpl is plenty here to provoke a segv
2729     cmpptr(rax, Address(reg, 0));
2730     // Note: should probably use testl(rax, Address(reg, 0));
2731     //       may be shorter code (however, this version of
2732     //       testl needs to be implemented first)
2733   } else {
2734     // nothing to do, (later) access of M[reg + offset]
2735     // will provoke OS NULL exception if reg = NULL
2736   }
2737 }
2738 
2739 void MacroAssembler::test_markword_is_inline_type(Register markword, Label& is_inline_type) {
2740   andptr(markword, markWord::inline_type_mask_in_place);
2741   cmpptr(markword, markWord::inline_type_pattern);
2742   jcc(Assembler::equal, is_inline_type);
2743 }
2744 
2745 void MacroAssembler::test_klass_is_inline_type(Register klass, Register temp_reg, Label& is_inline_type) {
2746   movl(temp_reg, Address(klass, Klass::access_flags_offset()));
2747   testl(temp_reg, JVM_ACC_INLINE);
2748   jcc(Assembler::notZero, is_inline_type);
2749 }
2750 
2751 void MacroAssembler::test_oop_is_not_inline_type(Register object, Register tmp, Label& not_inline_type) {
2752   testptr(object, object);
2753   jcc(Assembler::equal, not_inline_type);
2754   const int is_inline_type_mask = markWord::inline_type_pattern;
2755   movptr(tmp, Address(object, oopDesc::mark_offset_in_bytes()));
2756   andptr(tmp, is_inline_type_mask);
2757   cmpptr(tmp, is_inline_type_mask);
2758   jcc(Assembler::notEqual, not_inline_type);
2759 }
2760 
2761 void MacroAssembler::test_klass_is_empty_inline_type(Register klass, Register temp_reg, Label& is_empty_inline_type) {
2762 #ifdef ASSERT
2763   {
2764     Label done_check;
2765     test_klass_is_inline_type(klass, temp_reg, done_check);
2766     stop("test_klass_is_empty_inline_type with non inline type klass");
2767     bind(done_check);
2768   }
2769 #endif
2770   movl(temp_reg, Address(klass, InstanceKlass::misc_flags_offset()));
2771   testl(temp_reg, InstanceKlass::misc_flags_is_empty_inline_type());
2772   jcc(Assembler::notZero, is_empty_inline_type);
2773 }
2774 
2775 void MacroAssembler::test_field_is_null_free_inline_type(Register flags, Register temp_reg, Label& is_null_free_inline_type) {
2776   movl(temp_reg, flags);
2777   shrl(temp_reg, ConstantPoolCacheEntry::is_null_free_inline_type_shift);
2778   andl(temp_reg, 0x1);
2779   testl(temp_reg, temp_reg);
2780   jcc(Assembler::notZero, is_null_free_inline_type);
2781 }
2782 
2783 void MacroAssembler::test_field_is_not_null_free_inline_type(Register flags, Register temp_reg, Label& not_null_free_inline_type) {
2784   movl(temp_reg, flags);
2785   shrl(temp_reg, ConstantPoolCacheEntry::is_null_free_inline_type_shift);
2786   andl(temp_reg, 0x1);
2787   testl(temp_reg, temp_reg);
2788   jcc(Assembler::zero, not_null_free_inline_type);
2789 }
2790 
2791 void MacroAssembler::test_field_is_inlined(Register flags, Register temp_reg, Label& is_inlined) {
2792   movl(temp_reg, flags);
2793   shrl(temp_reg, ConstantPoolCacheEntry::is_inlined_shift);
2794   andl(temp_reg, 0x1);
2795   testl(temp_reg, temp_reg);
2796   jcc(Assembler::notZero, is_inlined);
2797 }
2798 
2799 void MacroAssembler::test_oop_prototype_bit(Register oop, Register temp_reg, int32_t test_bit, bool jmp_set, Label& jmp_label) {
2800   Label test_mark_word;
2801   // load mark word
2802   movptr(temp_reg, Address(oop, oopDesc::mark_offset_in_bytes()));
2803   // check displaced
2804   testl(temp_reg, markWord::unlocked_value);
2805   jccb(Assembler::notZero, test_mark_word);
2806   // slow path use klass prototype
2807   push(rscratch1);
2808   load_prototype_header(temp_reg, oop, rscratch1);
2809   pop(rscratch1);
2810 
2811   bind(test_mark_word);
2812   testl(temp_reg, test_bit);
2813   jcc((jmp_set) ? Assembler::notZero : Assembler::zero, jmp_label);
2814 }
2815 
2816 void MacroAssembler::test_flattened_array_oop(Register oop, Register temp_reg,
2817                                               Label&is_flattened_array) {
2818 #ifdef _LP64
2819   test_oop_prototype_bit(oop, temp_reg, markWord::flat_array_bit_in_place, true, is_flattened_array);
2820 #else
2821   load_klass(temp_reg, oop, noreg);
2822   movl(temp_reg, Address(temp_reg, Klass::layout_helper_offset()));
2823   test_flattened_array_layout(temp_reg, is_flattened_array);
2824 #endif
2825 }
2826 
2827 void MacroAssembler::test_non_flattened_array_oop(Register oop, Register temp_reg,
2828                                                   Label&is_non_flattened_array) {
2829 #ifdef _LP64
2830   test_oop_prototype_bit(oop, temp_reg, markWord::flat_array_bit_in_place, false, is_non_flattened_array);
2831 #else
2832   load_klass(temp_reg, oop, noreg);
2833   movl(temp_reg, Address(temp_reg, Klass::layout_helper_offset()));
2834   test_non_flattened_array_layout(temp_reg, is_non_flattened_array);
2835 #endif
2836 }
2837 
2838 void MacroAssembler::test_null_free_array_oop(Register oop, Register temp_reg, Label&is_null_free_array) {
2839 #ifdef _LP64
2840   test_oop_prototype_bit(oop, temp_reg, markWord::null_free_array_bit_in_place, true, is_null_free_array);
2841 #else
2842   load_klass(temp_reg, oop, noreg);
2843   movl(temp_reg, Address(temp_reg, Klass::layout_helper_offset()));
2844   test_null_free_array_layout(temp_reg, is_null_free_array);
2845 #endif
2846 }
2847 
2848 void MacroAssembler::test_non_null_free_array_oop(Register oop, Register temp_reg, Label&is_non_null_free_array) {
2849 #ifdef _LP64
2850   test_oop_prototype_bit(oop, temp_reg, markWord::null_free_array_bit_in_place, false, is_non_null_free_array);
2851 #else
2852   load_klass(temp_reg, oop, noreg);
2853   movl(temp_reg, Address(temp_reg, Klass::layout_helper_offset()));
2854   test_non_null_free_array_layout(temp_reg, is_non_null_free_array);
2855 #endif
2856 }
2857 
2858 void MacroAssembler::test_flattened_array_layout(Register lh, Label& is_flattened_array) {
2859   testl(lh, Klass::_lh_array_tag_vt_value_bit_inplace);
2860   jcc(Assembler::notZero, is_flattened_array);
2861 }
2862 
2863 void MacroAssembler::test_non_flattened_array_layout(Register lh, Label& is_non_flattened_array) {
2864   testl(lh, Klass::_lh_array_tag_vt_value_bit_inplace);
2865   jcc(Assembler::zero, is_non_flattened_array);
2866 }
2867 
2868 void MacroAssembler::test_null_free_array_layout(Register lh, Label& is_null_free_array) {
2869   testl(lh, Klass::_lh_null_free_bit_inplace);
2870   jcc(Assembler::notZero, is_null_free_array);
2871 }
2872 
2873 void MacroAssembler::test_non_null_free_array_layout(Register lh, Label& is_non_null_free_array) {
2874   testl(lh, Klass::_lh_null_free_bit_inplace);
2875   jcc(Assembler::zero, is_non_null_free_array);
2876 }
2877 
2878 
2879 void MacroAssembler::os_breakpoint() {
2880   // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
2881   // (e.g., MSVC can't call ps() otherwise)
2882   call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
2883 }
2884 
2885 void MacroAssembler::unimplemented(const char* what) {
2886   const char* buf = NULL;
2887   {
2888     ResourceMark rm;
2889     stringStream ss;
2890     ss.print("unimplemented: %s", what);
2891     buf = code_string(ss.as_string());
2892   }
2893   stop(buf);
2894 }
2895 
2896 #ifdef _LP64
2897 #define XSTATE_BV 0x200
2898 #endif

3685 }
3686 
3687 // C++ bool manipulation
3688 void MacroAssembler::testbool(Register dst) {
3689   if(sizeof(bool) == 1)
3690     testb(dst, 0xff);
3691   else if(sizeof(bool) == 2) {
3692     // testw implementation needed for two byte bools
3693     ShouldNotReachHere();
3694   } else if(sizeof(bool) == 4)
3695     testl(dst, dst);
3696   else
3697     // unsupported
3698     ShouldNotReachHere();
3699 }
3700 
3701 void MacroAssembler::testptr(Register dst, Register src) {
3702   LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src));
3703 }
3704 
3705 // Object / value buffer allocation...
3706 //
3707 // Kills klass and rsi on LP64
3708 void MacroAssembler::allocate_instance(Register klass, Register new_obj,
3709                                        Register t1, Register t2,
3710                                        bool clear_fields, Label& alloc_failed)
3711 {
3712   Label done, initialize_header, initialize_object, slow_case, slow_case_no_pop;
3713   Register layout_size = t1;
3714   assert(new_obj == rax, "needs to be rax, according to barrier asm eden_allocate");
3715   assert_different_registers(klass, new_obj, t1, t2);
3716 
3717   // get instance_size in InstanceKlass (scaled to a count of bytes)
3718   movl(layout_size, Address(klass, Klass::layout_helper_offset()));
3719   // test to see if it has a finalizer or is malformed in some way
3720   testl(layout_size, Klass::_lh_instance_slow_path_bit);
3721   jcc(Assembler::notZero, slow_case_no_pop);
3722 
3723   // Allocate the instance:
3724   //  If TLAB is enabled:
3725   //    Try to allocate in the TLAB.
3726   //    If fails, go to the slow path.
3727   //  Else If inline contiguous allocations are enabled:
3728   //    Try to allocate in eden.
3729   //    If fails due to heap end, go to slow path.
3730   //
3731   //  If TLAB is enabled OR inline contiguous is enabled:
3732   //    Initialize the allocation.
3733   //    Exit.
3734   //
3735   //  Go to slow path.
3736   const bool allow_shared_alloc =
3737     Universe::heap()->supports_inline_contig_alloc();
3738 
3739   push(klass);
3740   const Register thread = LP64_ONLY(r15_thread) NOT_LP64(klass);
3741 #ifndef _LP64
3742   if (UseTLAB || allow_shared_alloc) {
3743     get_thread(thread);
3744   }
3745 #endif // _LP64
3746 
3747   if (UseTLAB) {
3748     tlab_allocate(thread, new_obj, layout_size, 0, klass, t2, slow_case);
3749     if (ZeroTLAB || (!clear_fields)) {
3750       // the fields have been already cleared
3751       jmp(initialize_header);
3752     } else {
3753       // initialize both the header and fields
3754       jmp(initialize_object);
3755     }
3756   } else {
3757     // Allocation in the shared Eden, if allowed.
3758     //
3759     eden_allocate(thread, new_obj, layout_size, 0, t2, slow_case);
3760   }
3761 
3762   // If UseTLAB or allow_shared_alloc are true, the object is created above and
3763   // there is an initialize need. Otherwise, skip and go to the slow path.
3764   if (UseTLAB || allow_shared_alloc) {
3765     if (clear_fields) {
3766       // The object is initialized before the header.  If the object size is
3767       // zero, go directly to the header initialization.
3768       bind(initialize_object);
3769       decrement(layout_size, sizeof(oopDesc));
3770       jcc(Assembler::zero, initialize_header);
3771 
3772       // Initialize topmost object field, divide size by 8, check if odd and
3773       // test if zero.
3774       Register zero = klass;
3775       xorl(zero, zero);    // use zero reg to clear memory (shorter code)
3776       shrl(layout_size, LogBytesPerLong); // divide by 2*oopSize and set carry flag if odd
3777 
3778   #ifdef ASSERT
3779       // make sure instance_size was multiple of 8
3780       Label L;
3781       // Ignore partial flag stall after shrl() since it is debug VM
3782       jcc(Assembler::carryClear, L);
3783       stop("object size is not multiple of 2 - adjust this code");
3784       bind(L);
3785       // must be > 0, no extra check needed here
3786   #endif
3787 
3788       // initialize remaining object fields: instance_size was a multiple of 8
3789       {
3790         Label loop;
3791         bind(loop);
3792         movptr(Address(new_obj, layout_size, Address::times_8, sizeof(oopDesc) - 1*oopSize), zero);
3793         NOT_LP64(movptr(Address(new_obj, layout_size, Address::times_8, sizeof(oopDesc) - 2*oopSize), zero));
3794         decrement(layout_size);
3795         jcc(Assembler::notZero, loop);
3796       }
3797     } // clear_fields
3798 
3799     // initialize object header only.
3800     bind(initialize_header);
3801     pop(klass);
3802     Register mark_word = t2;
3803     movptr(mark_word, Address(klass, Klass::prototype_header_offset()));
3804     movptr(Address(new_obj, oopDesc::mark_offset_in_bytes ()), mark_word);
3805 #ifdef _LP64
3806     xorl(rsi, rsi);                 // use zero reg to clear memory (shorter code)
3807     store_klass_gap(new_obj, rsi);  // zero klass gap for compressed oops
3808 #endif
3809     movptr(t2, klass);         // preserve klass
3810     Register tmp_store_klass = LP64_ONLY(rscratch1) NOT_LP64(noreg);
3811     store_klass(new_obj, t2, tmp_store_klass);  // src klass reg is potentially compressed
3812 
3813     jmp(done);
3814   }
3815 
3816   bind(slow_case);
3817   pop(klass);
3818   bind(slow_case_no_pop);
3819   jmp(alloc_failed);
3820 
3821   bind(done);
3822 }
3823 
3824 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
3825 void MacroAssembler::tlab_allocate(Register thread, Register obj,
3826                                    Register var_size_in_bytes,
3827                                    int con_size_in_bytes,
3828                                    Register t1,
3829                                    Register t2,
3830                                    Label& slow_case) {
3831   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
3832   bs->tlab_allocate(this, thread, obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case);
3833 }
3834 
3835 // Defines obj, preserves var_size_in_bytes
3836 void MacroAssembler::eden_allocate(Register thread, Register obj,
3837                                    Register var_size_in_bytes,
3838                                    int con_size_in_bytes,
3839                                    Register t1,
3840                                    Label& slow_case) {
3841   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
3842   bs->eden_allocate(this, thread, obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case);
3843 }

3881     // clear topmost word (no jump would be needed if conditional assignment worked here)
3882     movptr(Address(address, index, Address::times_8, offset_in_bytes - 0*BytesPerWord), temp);
3883     // index could be 0 now, must check again
3884     jcc(Assembler::zero, done);
3885     bind(even);
3886   }
3887 #endif // !_LP64
3888   // initialize remaining object fields: index is a multiple of 2 now
3889   {
3890     Label loop;
3891     bind(loop);
3892     movptr(Address(address, index, Address::times_8, offset_in_bytes - 1*BytesPerWord), temp);
3893     NOT_LP64(movptr(Address(address, index, Address::times_8, offset_in_bytes - 2*BytesPerWord), temp);)
3894     decrement(index);
3895     jcc(Assembler::notZero, loop);
3896   }
3897 
3898   bind(done);
3899 }
3900 
3901 void MacroAssembler::get_inline_type_field_klass(Register klass, Register index, Register inline_klass) {
3902   movptr(inline_klass, Address(klass, InstanceKlass::inline_type_field_klasses_offset()));
3903 #ifdef ASSERT
3904   {
3905     Label done;
3906     cmpptr(inline_klass, 0);
3907     jcc(Assembler::notEqual, done);
3908     stop("get_inline_type_field_klass contains no inline klass");
3909     bind(done);
3910   }
3911 #endif
3912   movptr(inline_klass, Address(inline_klass, index, Address::times_ptr));
3913 }
3914 
3915 void MacroAssembler::get_default_value_oop(Register inline_klass, Register temp_reg, Register obj) {
3916 #ifdef ASSERT
3917   {
3918     Label done_check;
3919     test_klass_is_inline_type(inline_klass, temp_reg, done_check);
3920     stop("get_default_value_oop from non inline type klass");
3921     bind(done_check);
3922   }
3923 #endif
3924   Register offset = temp_reg;
3925   // Getting the offset of the pre-allocated default value
3926   movptr(offset, Address(inline_klass, in_bytes(InstanceKlass::adr_inlineklass_fixed_block_offset())));
3927   movl(offset, Address(offset, in_bytes(InlineKlass::default_value_offset_offset())));
3928 
3929   // Getting the mirror
3930   movptr(obj, Address(inline_klass, in_bytes(Klass::java_mirror_offset())));
3931   resolve_oop_handle(obj, inline_klass);
3932 
3933   // Getting the pre-allocated default value from the mirror
3934   Address field(obj, offset, Address::times_1);
3935   load_heap_oop(obj, field);
3936 }
3937 
3938 void MacroAssembler::get_empty_inline_type_oop(Register inline_klass, Register temp_reg, Register obj) {
3939 #ifdef ASSERT
3940   {
3941     Label done_check;
3942     test_klass_is_empty_inline_type(inline_klass, temp_reg, done_check);
3943     stop("get_empty_value from non-empty inline klass");
3944     bind(done_check);
3945   }
3946 #endif
3947   get_default_value_oop(inline_klass, temp_reg, obj);
3948 }
3949 
3950 
3951 // Look up the method for a megamorphic invokeinterface call.
3952 // The target method is determined by <intf_klass, itable_index>.
3953 // The receiver klass is in recv_klass.
3954 // On success, the result will be in method_result, and execution falls through.
3955 // On failure, execution transfers to the given label.
3956 void MacroAssembler::lookup_interface_method(Register recv_klass,
3957                                              Register intf_klass,
3958                                              RegisterOrConstant itable_index,
3959                                              Register method_result,
3960                                              Register scan_temp,
3961                                              Label& L_no_such_interface,
3962                                              bool return_method) {
3963   assert_different_registers(recv_klass, intf_klass, scan_temp);
3964   assert_different_registers(method_result, intf_klass, scan_temp);
3965   assert(recv_klass != method_result || !return_method,
3966          "recv_klass can be destroyed when method isn't needed");
3967 
3968   assert(itable_index.is_constant() || itable_index.as_register() == method_result,
3969          "caller must use same register for non-constant itable index as for method");
3970 

4279   } else {
4280     Label L;
4281     jccb(negate_condition(cc), L);
4282     movl(dst, src);
4283     bind(L);
4284   }
4285 }
4286 
4287 void MacroAssembler::cmov32(Condition cc, Register dst, Register src) {
4288   if (VM_Version::supports_cmov()) {
4289     cmovl(cc, dst, src);
4290   } else {
4291     Label L;
4292     jccb(negate_condition(cc), L);
4293     movl(dst, src);
4294     bind(L);
4295   }
4296 }
4297 
4298 void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) {
4299   if (!VerifyOops || VerifyAdapterSharing) {
4300     // Below address of the code string confuses VerifyAdapterSharing
4301     // because it may differ between otherwise equivalent adapters.
4302     return;
4303   }
4304 
4305   // Pass register number to verify_oop_subroutine
4306   const char* b = NULL;
4307   {
4308     ResourceMark rm;
4309     stringStream ss;
4310     ss.print("verify_oop: %s: %s (%s:%d)", reg->name(), s, file, line);
4311     b = code_string(ss.as_string());
4312   }
4313   BLOCK_COMMENT("verify_oop {");
4314 #ifdef _LP64
4315   push(rscratch1);                    // save r10, trashed by movptr()
4316 #endif
4317   push(rax);                          // save rax,
4318   push(reg);                          // pass register argument
4319   ExternalAddress buffer((address) b);
4320   // avoid using pushptr, as it modifies scratch registers
4321   // and our contract is not to modify anything
4322   movptr(rax, buffer.addr());
4323   push(rax);

4342   // cf. TemplateTable::prepare_invoke(), if (load_receiver).
4343   int stackElementSize = Interpreter::stackElementSize;
4344   int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
4345 #ifdef ASSERT
4346   int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
4347   assert(offset1 - offset == stackElementSize, "correct arithmetic");
4348 #endif
4349   Register             scale_reg    = noreg;
4350   Address::ScaleFactor scale_factor = Address::no_scale;
4351   if (arg_slot.is_constant()) {
4352     offset += arg_slot.as_constant() * stackElementSize;
4353   } else {
4354     scale_reg    = arg_slot.as_register();
4355     scale_factor = Address::times(stackElementSize);
4356   }
4357   offset += wordSize;           // return PC is on stack
4358   return Address(rsp, scale_reg, scale_factor, offset);
4359 }
4360 
4361 void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) {
4362   if (!VerifyOops || VerifyAdapterSharing) {
4363     // Below address of the code string confuses VerifyAdapterSharing
4364     // because it may differ between otherwise equivalent adapters.
4365     return;
4366   }
4367 
4368   // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord);
4369   // Pass register number to verify_oop_subroutine
4370   const char* b = NULL;
4371   {
4372     ResourceMark rm;
4373     stringStream ss;
4374     ss.print("verify_oop_addr: %s (%s:%d)", s, file, line);
4375     b = code_string(ss.as_string());
4376   }
4377 #ifdef _LP64
4378   push(rscratch1);                    // save r10, trashed by movptr()
4379 #endif
4380   push(rax);                          // save rax,
4381   // addr may contain rsp so we will have to adjust it based on the push
4382   // we just did (and on 64 bit we do two pushes)
4383   // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which
4384   // stores rax into addr which is backwards of what was intended.
4385   if (addr.uses(rsp)) {
4386     lea(rax, addr);

4848 
4849 void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) {
4850   // get mirror
4851   const int mirror_offset = in_bytes(Klass::java_mirror_offset());
4852   load_method_holder(mirror, method);
4853   movptr(mirror, Address(mirror, mirror_offset));
4854   resolve_oop_handle(mirror, tmp);
4855 }
4856 
4857 void MacroAssembler::load_method_holder_cld(Register rresult, Register rmethod) {
4858   load_method_holder(rresult, rmethod);
4859   movptr(rresult, Address(rresult, InstanceKlass::class_loader_data_offset()));
4860 }
4861 
4862 void MacroAssembler::load_method_holder(Register holder, Register method) {
4863   movptr(holder, Address(method, Method::const_offset()));                      // ConstMethod*
4864   movptr(holder, Address(holder, ConstMethod::constants_offset()));             // ConstantPool*
4865   movptr(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); // InstanceKlass*
4866 }
4867 
4868 void MacroAssembler::load_metadata(Register dst, Register src) {
4869   if (UseCompressedClassPointers) {
4870     movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
4871   } else {
4872     movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
4873   }
4874 }
4875 
4876 void MacroAssembler::load_klass(Register dst, Register src, Register tmp) {
4877   assert_different_registers(src, tmp);
4878   assert_different_registers(dst, tmp);
4879 #ifdef _LP64
4880   if (UseCompressedClassPointers) {
4881     movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
4882     decode_klass_not_null(dst, tmp);
4883   } else
4884 #endif
4885   movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
4886 }
4887 
4888 void MacroAssembler::load_prototype_header(Register dst, Register src, Register tmp) {
4889   load_klass(dst, src, tmp);
4890   movptr(dst, Address(dst, Klass::prototype_header_offset()));
4891 }
4892 
4893 void MacroAssembler::store_klass(Register dst, Register src, Register tmp) {
4894   assert_different_registers(src, tmp);
4895   assert_different_registers(dst, tmp);
4896 #ifdef _LP64
4897   if (UseCompressedClassPointers) {
4898     encode_klass_not_null(src, tmp);
4899     movl(Address(dst, oopDesc::klass_offset_in_bytes()), src);
4900   } else
4901 #endif
4902     movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src);
4903 }
4904 
4905 void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src,
4906                                     Register tmp1, Register thread_tmp) {
4907   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
4908   decorators = AccessInternal::decorator_fixup(decorators);
4909   bool as_raw = (decorators & AS_RAW) != 0;
4910   if (as_raw) {
4911     bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
4912   } else {
4913     bs->load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
4914   }
4915 }
4916 
4917 void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src,
4918                                      Register tmp1, Register tmp2, Register tmp3) {
4919   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
4920   decorators = AccessInternal::decorator_fixup(decorators);
4921   bool as_raw = (decorators & AS_RAW) != 0;
4922   if (as_raw) {
4923     bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3);
4924   } else {
4925     bs->store_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3);
4926   }
4927 }
4928 
4929 void MacroAssembler::access_value_copy(DecoratorSet decorators, Register src, Register dst,
4930                                        Register inline_klass) {
4931   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
4932   bs->value_copy(this, decorators, src, dst, inline_klass);
4933 }
4934 
4935 void MacroAssembler::first_field_offset(Register inline_klass, Register offset) {
4936   movptr(offset, Address(inline_klass, InstanceKlass::adr_inlineklass_fixed_block_offset()));
4937   movl(offset, Address(offset, InlineKlass::first_field_offset_offset()));
4938 }
4939 
4940 void MacroAssembler::data_for_oop(Register oop, Register data, Register inline_klass) {
4941   // ((address) (void*) o) + vk->first_field_offset();
4942   Register offset = (data == oop) ? rscratch1 : data;
4943   first_field_offset(inline_klass, offset);
4944   if (data == oop) {
4945     addptr(data, offset);
4946   } else {
4947     lea(data, Address(oop, offset));
4948   }
4949 }
4950 
4951 void MacroAssembler::data_for_value_array_index(Register array, Register array_klass,
4952                                                 Register index, Register data) {
4953   assert(index != rcx, "index needs to shift by rcx");
4954   assert_different_registers(array, array_klass, index);
4955   assert_different_registers(rcx, array, index);
4956 
4957   // array->base() + (index << Klass::layout_helper_log2_element_size(lh));
4958   movl(rcx, Address(array_klass, Klass::layout_helper_offset()));
4959 
4960   // Klass::layout_helper_log2_element_size(lh)
4961   // (lh >> _lh_log2_element_size_shift) & _lh_log2_element_size_mask;
4962   shrl(rcx, Klass::_lh_log2_element_size_shift);
4963   andl(rcx, Klass::_lh_log2_element_size_mask);
4964   shlptr(index); // index << rcx
4965 
4966   lea(data, Address(array, index, Address::times_1, arrayOopDesc::base_offset_in_bytes(T_INLINE_TYPE)));
4967 }
4968 
4969 void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1,
4970                                    Register thread_tmp, DecoratorSet decorators) {
4971   access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp);
4972 }
4973 
4974 // Doesn't do verfication, generates fixed size code
4975 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1,
4976                                             Register thread_tmp, DecoratorSet decorators) {
4977   access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL | decorators, dst, src, tmp1, thread_tmp);
4978 }
4979 
4980 void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1,
4981                                     Register tmp2, Register tmp3, DecoratorSet decorators) {
4982   access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2, tmp3);
4983 }
4984 
4985 // Used for storing NULLs.
4986 void MacroAssembler::store_heap_oop_null(Address dst) {
4987   access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg, noreg);
4988 }
4989 
4990 #ifdef _LP64
4991 void MacroAssembler::store_klass_gap(Register dst, Register src) {
4992   if (UseCompressedClassPointers) {
4993     // Store to klass gap in destination
4994     movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src);
4995   }
4996 }
4997 
4998 #ifdef ASSERT
4999 void MacroAssembler::verify_heapbase(const char* msg) {
5000   assert (UseCompressedOops, "should be compressed");
5001   assert (Universe::heap() != NULL, "java heap should be initialized");
5002   if (CheckCompressedOops) {
5003     Label ok;
5004     push(rscratch1); // cmpptr trashes rscratch1
5005     cmpptr(r12_heapbase, ExternalAddress((address)CompressedOops::ptrs_base_addr()));
5006     jcc(Assembler::equal, ok);
5007     STOP(msg);

5281   RelocationHolder rspec = metadata_Relocation::spec(klass_index);
5282   Assembler::cmp_narrow_oop(dst, CompressedKlassPointers::encode(k), rspec);
5283 }
5284 
5285 void MacroAssembler::reinit_heapbase() {
5286   if (UseCompressedOops) {
5287     if (Universe::heap() != NULL) {
5288       if (CompressedOops::base() == NULL) {
5289         MacroAssembler::xorptr(r12_heapbase, r12_heapbase);
5290       } else {
5291         mov64(r12_heapbase, (int64_t)CompressedOops::ptrs_base());
5292       }
5293     } else {
5294       movptr(r12_heapbase, ExternalAddress((address)CompressedOops::ptrs_base_addr()));
5295     }
5296   }
5297 }
5298 
5299 #endif // _LP64
5300 
5301 #ifdef COMPILER2
5302 // C2 compiled method's prolog code.
5303 void MacroAssembler::verified_entry(Compile* C, int sp_inc) {
5304   int framesize = C->output()->frame_size_in_bytes();
5305   int bangsize = C->output()->bang_size_in_bytes();
5306   bool fp_mode_24b = false;
5307   int stack_bang_size = C->output()->need_stack_bang(bangsize) ? bangsize : 0;
5308 
5309   // WARNING: Initial instruction MUST be 5 bytes or longer so that
5310   // NativeJump::patch_verified_entry will be able to patch out the entry
5311   // code safely. The push to verify stack depth is ok at 5 bytes,
5312   // the frame allocation can be either 3 or 6 bytes. So if we don't do
5313   // stack bang then we must use the 6 byte frame allocation even if
5314   // we have no frame. :-(
5315   assert(stack_bang_size >= framesize || stack_bang_size <= 0, "stack bang size incorrect");
5316 
5317   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
5318   // Remove word for return addr
5319   framesize -= wordSize;
5320   stack_bang_size -= wordSize;
5321 
5322   // Calls to C2R adapters often do not accept exceptional returns.
5323   // We require that their callers must bang for them.  But be careful, because
5324   // some VM calls (such as call site linkage) can use several kilobytes of
5325   // stack.  But the stack safety zone should account for that.
5326   // See bugs 4446381, 4468289, 4497237.
5327   if (stack_bang_size > 0) {

5340     // Create frame
5341     if (framesize) {
5342       subptr(rsp, framesize);
5343     }
5344   } else {
5345     // Create frame (force generation of a 4 byte immediate value)
5346     subptr_imm32(rsp, framesize);
5347 
5348     // Save RBP register now.
5349     framesize -= wordSize;
5350     movptr(Address(rsp, framesize), rbp);
5351     // Save caller's stack pointer into RBP if the frame pointer is preserved.
5352     if (PreserveFramePointer) {
5353       movptr(rbp, rsp);
5354       if (framesize > 0) {
5355         addptr(rbp, framesize);
5356       }
5357     }
5358   }
5359 
5360   if (C->needs_stack_repair()) {
5361     // Save stack increment just below the saved rbp (also account for fixed framesize and rbp)
5362     assert((sp_inc & (StackAlignmentInBytes-1)) == 0, "stack increment not aligned");
5363     movptr(Address(rsp, framesize - wordSize), sp_inc + framesize + wordSize);
5364   }
5365 
5366   if (VerifyStackAtCalls) { // Majik cookie to verify stack depth
5367     framesize -= wordSize;
5368     movptr(Address(rsp, framesize), (int32_t)0xbadb100d);
5369   }
5370 
5371 #ifndef _LP64
5372   // If method sets FPU control word do it now
5373   if (fp_mode_24b) {
5374     fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
5375   }
5376   if (UseSSE >= 2 && VerifyFPU) {
5377     verify_FPU(0, "FPU stack must be clean on entry");
5378   }
5379 #endif
5380 
5381 #ifdef ASSERT
5382   if (VerifyStackAtCalls) {
5383     Label L;
5384     push(rax);
5385     mov(rax, rsp);
5386     andptr(rax, StackAlignmentInBytes-1);
5387     cmpptr(rax, StackAlignmentInBytes-wordSize);
5388     pop(rax);
5389     jcc(Assembler::equal, L);
5390     STOP("Stack is not properly aligned!");
5391     bind(L);
5392   }
5393 #endif





5394 }
5395 #endif // COMPILER2
5396 
5397 #if COMPILER2_OR_JVMCI
5398 
5399 // clear memory of size 'cnt' qwords, starting at 'base' using XMM/YMM/ZMM registers
5400 void MacroAssembler::xmm_clear_mem(Register base, Register cnt, Register val, XMMRegister xtmp, KRegister mask) {
5401   // cnt - number of qwords (8-byte words).
5402   // base - start address, qword aligned.
5403   Label L_zero_64_bytes, L_loop, L_sloop, L_tail, L_end;
5404   bool use64byteVector = MaxVectorSize == 64 && AVX3Threshold == 0;
5405   if (use64byteVector) {
5406     evpbroadcastq(xtmp, val, AVX_512bit);
5407   } else if (MaxVectorSize >= 32) {
5408     movdq(xtmp, val);
5409     punpcklqdq(xtmp, xtmp);
5410     vinserti128_high(xtmp, xtmp);
5411   } else {
5412     movdq(xtmp, val);
5413     punpcklqdq(xtmp, xtmp);
5414   }
5415   jmp(L_zero_64_bytes);
5416 
5417   BIND(L_loop);
5418   if (MaxVectorSize >= 32) {
5419     fill64_avx(base, 0, xtmp, use64byteVector);
5420   } else {
5421     movdqu(Address(base,  0), xtmp);
5422     movdqu(Address(base, 16), xtmp);
5423     movdqu(Address(base, 32), xtmp);
5424     movdqu(Address(base, 48), xtmp);
5425   }
5426   addptr(base, 64);
5427 
5428   BIND(L_zero_64_bytes);
5429   subptr(cnt, 8);
5430   jccb(Assembler::greaterEqual, L_loop);
5431 
5432   // Copy trailing 64 bytes
5433   if (use64byteVector) {
5434     addptr(cnt, 8);
5435     jccb(Assembler::equal, L_end);
5436     fill64_masked_avx(3, base, 0, xtmp, mask, cnt, val, true);
5437     jmp(L_end);
5438   } else {
5439     addptr(cnt, 4);
5440     jccb(Assembler::less, L_tail);
5441     if (MaxVectorSize >= 32) {
5442       vmovdqu(Address(base, 0), xtmp);
5443     } else {
5444       movdqu(Address(base,  0), xtmp);
5445       movdqu(Address(base, 16), xtmp);
5446     }
5447   }
5448   addptr(base, 32);
5449   subptr(cnt, 4);
5450 
5451   BIND(L_tail);
5452   addptr(cnt, 4);
5453   jccb(Assembler::lessEqual, L_end);
5454   if (UseAVX > 2 && MaxVectorSize >= 32 && VM_Version::supports_avx512vl()) {
5455     fill32_masked_avx(3, base, 0, xtmp, mask, cnt, val);
5456   } else {
5457     decrement(cnt);
5458 
5459     BIND(L_sloop);
5460     movq(Address(base, 0), xtmp);
5461     addptr(base, 8);
5462     decrement(cnt);
5463     jccb(Assembler::greaterEqual, L_sloop);
5464   }
5465   BIND(L_end);
5466 }
5467 
5468 int MacroAssembler::store_inline_type_fields_to_buf(ciInlineKlass* vk, bool from_interpreter) {
5469   // An inline type might be returned. If fields are in registers we
5470   // need to allocate an inline type instance and initialize it with
5471   // the value of the fields.
5472   Label skip;
5473   // We only need a new buffered inline type if a new one is not returned
5474   testptr(rax, 1);
5475   jcc(Assembler::zero, skip);
5476   int call_offset = -1;
5477 
5478 #ifdef _LP64
5479   // The following code is similar to allocate_instance but has some slight differences,
5480   // e.g. object size is always not zero, sometimes it's constant; storing klass ptr after
5481   // allocating is not necessary if vk != NULL, etc. allocate_instance is not aware of these.
5482   Label slow_case;
5483   // 1. Try to allocate a new buffered inline instance either from TLAB or eden space
5484   mov(rscratch1, rax); // save rax for slow_case since *_allocate may corrupt it when allocation failed
5485   if (vk != NULL) {
5486     // Called from C1, where the return type is statically known.
5487     movptr(rbx, (intptr_t)vk->get_InlineKlass());
5488     jint obj_size = vk->layout_helper();
5489     assert(obj_size != Klass::_lh_neutral_value, "inline class in return type must have been resolved");
5490     if (UseTLAB) {
5491       tlab_allocate(r15_thread, rax, noreg, obj_size, r13, r14, slow_case);
5492     } else {
5493       eden_allocate(r15_thread, rax, noreg, obj_size, r13, slow_case);
5494     }
5495   } else {
5496     // Call from interpreter. RAX contains ((the InlineKlass* of the return type) | 0x01)
5497     mov(rbx, rax);
5498     andptr(rbx, -2);
5499     movl(r14, Address(rbx, Klass::layout_helper_offset()));
5500     if (UseTLAB) {
5501       tlab_allocate(r15_thread, rax, r14, 0, r13, r14, slow_case);
5502     } else {
5503       eden_allocate(r15_thread, rax, r14, 0, r13, slow_case);
5504     }
5505   }
5506   if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) {
5507     // 2. Initialize buffered inline instance header
5508     Register buffer_obj = rax;
5509     movptr(Address(buffer_obj, oopDesc::mark_offset_in_bytes()), (intptr_t)markWord::inline_type_prototype().value());
5510     xorl(r13, r13);
5511     store_klass_gap(buffer_obj, r13);
5512     if (vk == NULL) {
5513       // store_klass corrupts rbx(klass), so save it in r13 for later use (interpreter case only).
5514       mov(r13, rbx);
5515     }
5516     Register tmp_store_klass = LP64_ONLY(rscratch1) NOT_LP64(noreg);
5517     store_klass(buffer_obj, rbx, tmp_store_klass);
5518     // 3. Initialize its fields with an inline class specific handler
5519     if (vk != NULL) {
5520       call(RuntimeAddress(vk->pack_handler())); // no need for call info as this will not safepoint.
5521     } else {
5522       movptr(rbx, Address(r13, InstanceKlass::adr_inlineklass_fixed_block_offset()));
5523       movptr(rbx, Address(rbx, InlineKlass::pack_handler_offset()));
5524       call(rbx);
5525     }
5526     jmp(skip);
5527   }
5528   bind(slow_case);
5529   // We failed to allocate a new inline type, fall back to a runtime
5530   // call. Some oop field may be live in some registers but we can't
5531   // tell. That runtime call will take care of preserving them
5532   // across a GC if there's one.
5533   mov(rax, rscratch1);
5534 #endif
5535 
5536   if (from_interpreter) {
5537     super_call_VM_leaf(StubRoutines::store_inline_type_fields_to_buf());
5538   } else {
5539     call(RuntimeAddress(StubRoutines::store_inline_type_fields_to_buf()));
5540     call_offset = offset();
5541   }
5542 
5543   bind(skip);
5544   return call_offset;
5545 }
5546 
5547 // Move a value between registers/stack slots and update the reg_state
5548 bool MacroAssembler::move_helper(VMReg from, VMReg to, BasicType bt, RegState reg_state[]) {
5549   assert(from->is_valid() && to->is_valid(), "source and destination must be valid");
5550   if (reg_state[to->value()] == reg_written) {
5551     return true; // Already written
5552   }
5553   if (from != to && bt != T_VOID) {
5554     if (reg_state[to->value()] == reg_readonly) {
5555       return false; // Not yet writable
5556     }
5557     if (from->is_reg()) {
5558       if (to->is_reg()) {
5559         if (from->is_XMMRegister()) {
5560           if (bt == T_DOUBLE) {
5561             movdbl(to->as_XMMRegister(), from->as_XMMRegister());
5562           } else {
5563             assert(bt == T_FLOAT, "must be float");
5564             movflt(to->as_XMMRegister(), from->as_XMMRegister());
5565           }
5566         } else {
5567           movq(to->as_Register(), from->as_Register());
5568         }
5569       } else {
5570         int st_off = to->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
5571         Address to_addr = Address(rsp, st_off);
5572         if (from->is_XMMRegister()) {
5573           if (bt == T_DOUBLE) {
5574             movdbl(to_addr, from->as_XMMRegister());
5575           } else {
5576             assert(bt == T_FLOAT, "must be float");
5577             movflt(to_addr, from->as_XMMRegister());
5578           }
5579         } else {
5580           movq(to_addr, from->as_Register());
5581         }
5582       }
5583     } else {
5584       Address from_addr = Address(rsp, from->reg2stack() * VMRegImpl::stack_slot_size + wordSize);
5585       if (to->is_reg()) {
5586         if (to->is_XMMRegister()) {
5587           if (bt == T_DOUBLE) {
5588             movdbl(to->as_XMMRegister(), from_addr);
5589           } else {
5590             assert(bt == T_FLOAT, "must be float");
5591             movflt(to->as_XMMRegister(), from_addr);
5592           }
5593         } else {
5594           movq(to->as_Register(), from_addr);
5595         }
5596       } else {
5597         int st_off = to->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
5598         movq(r13, from_addr);
5599         movq(Address(rsp, st_off), r13);
5600       }
5601     }
5602   }
5603   // Update register states
5604   reg_state[from->value()] = reg_writable;
5605   reg_state[to->value()] = reg_written;
5606   return true;
5607 }
5608 
5609 // Calculate the extra stack space required for packing or unpacking inline
5610 // args and adjust the stack pointer
5611 int MacroAssembler::extend_stack_for_inline_args(int args_on_stack) {
5612   // Two additional slots to account for return address
5613   int sp_inc = (args_on_stack + 2) * VMRegImpl::stack_slot_size;
5614   sp_inc = align_up(sp_inc, StackAlignmentInBytes);
5615   // Save the return address, adjust the stack (make sure it is properly
5616   // 16-byte aligned) and copy the return address to the new top of the stack.
5617   // The stack will be repaired on return (see MacroAssembler::remove_frame).
5618   assert(sp_inc > 0, "sanity");
5619   pop(r13);
5620   subptr(rsp, sp_inc);
5621   push(r13);
5622   return sp_inc;
5623 }
5624 
5625 // Read all fields from an inline type buffer and store the field values in registers/stack slots.
5626 bool MacroAssembler::unpack_inline_helper(const GrowableArray<SigEntry>* sig, int& sig_index,
5627                                           VMReg from, int& from_index, VMRegPair* to, int to_count, int& to_index,
5628                                           RegState reg_state[]) {
5629   assert(sig->at(sig_index)._bt == T_VOID, "should be at end delimiter");
5630   assert(from->is_valid(), "source must be valid");
5631   Register fromReg;
5632   if (from->is_reg()) {
5633     fromReg = from->as_Register();
5634   } else {
5635     int st_off = from->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
5636     movq(r10, Address(rsp, st_off));
5637     fromReg = r10;
5638   }
5639 
5640   ScalarizedInlineArgsStream stream(sig, sig_index, to, to_count, to_index, -1);
5641   bool done = true;
5642   bool mark_done = true;
5643   VMReg toReg;
5644   BasicType bt;
5645   while (stream.next(toReg, bt)) {
5646     assert(toReg->is_valid(), "destination must be valid");
5647     int off = sig->at(stream.sig_index())._offset;
5648     assert(off > 0, "offset in object should be positive");
5649     Address fromAddr = Address(fromReg, off);
5650 
5651     int idx = (int)toReg->value();
5652     if (reg_state[idx] == reg_readonly) {
5653      if (idx != from->value()) {
5654        mark_done = false;
5655      }
5656      done = false;
5657      continue;
5658     } else if (reg_state[idx] == reg_written) {
5659       continue;
5660     } else {
5661       assert(reg_state[idx] == reg_writable, "must be writable");
5662       reg_state[idx] = reg_written;
5663     }
5664 
5665     if (!toReg->is_XMMRegister()) {
5666       Register dst = toReg->is_stack() ? r13 : toReg->as_Register();
5667       if (is_reference_type(bt)) {
5668         load_heap_oop(dst, fromAddr);
5669       } else {
5670         bool is_signed = (bt != T_CHAR) && (bt != T_BOOLEAN);
5671         load_sized_value(dst, fromAddr, type2aelembytes(bt), is_signed);
5672       }
5673       if (toReg->is_stack()) {
5674         int st_off = toReg->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
5675         movq(Address(rsp, st_off), dst);
5676       }
5677     } else if (bt == T_DOUBLE) {
5678       movdbl(toReg->as_XMMRegister(), fromAddr);
5679     } else {
5680       assert(bt == T_FLOAT, "must be float");
5681       movflt(toReg->as_XMMRegister(), fromAddr);
5682     }
5683   }
5684   sig_index = stream.sig_index();
5685   to_index = stream.regs_index();
5686 
5687   if (mark_done && reg_state[from->value()] != reg_written) {
5688     // This is okay because no one else will write to that slot
5689     reg_state[from->value()] = reg_writable;
5690   }
5691   from_index--;
5692   return done;
5693 }
5694 
5695 bool MacroAssembler::pack_inline_helper(const GrowableArray<SigEntry>* sig, int& sig_index, int vtarg_index,
5696                                         VMRegPair* from, int from_count, int& from_index, VMReg to,
5697                                         RegState reg_state[], Register val_array) {
5698   assert(sig->at(sig_index)._bt == T_INLINE_TYPE, "should be at end delimiter");
5699   assert(to->is_valid(), "destination must be valid");
5700 
5701   if (reg_state[to->value()] == reg_written) {
5702     skip_unpacked_fields(sig, sig_index, from, from_count, from_index);
5703     return true; // Already written
5704   }
5705 
5706   Register val_obj_tmp = r11;
5707   Register from_reg_tmp = r14; // Be careful with r14 because it's used for spilling
5708   Register tmp1 = r10;
5709   Register tmp2 = r13;
5710   Register tmp3 = rbx;
5711   Register val_obj = to->is_stack() ? val_obj_tmp : to->as_Register();
5712 
5713   assert_different_registers(val_obj_tmp, from_reg_tmp, tmp1, tmp2, tmp3, val_array);
5714 
5715   if (reg_state[to->value()] == reg_readonly) {
5716     if (!is_reg_in_unpacked_fields(sig, sig_index, to, from, from_count, from_index)) {
5717       skip_unpacked_fields(sig, sig_index, from, from_count, from_index);
5718       return false; // Not yet writable
5719     }
5720     val_obj = val_obj_tmp;
5721   }
5722 
5723   int index = arrayOopDesc::base_offset_in_bytes(T_OBJECT) + vtarg_index * type2aelembytes(T_INLINE_TYPE);
5724   load_heap_oop(val_obj, Address(val_array, index));
5725 
5726   ScalarizedInlineArgsStream stream(sig, sig_index, from, from_count, from_index);
5727   VMReg fromReg;
5728   BasicType bt;
5729   while (stream.next(fromReg, bt)) {
5730     assert(fromReg->is_valid(), "source must be valid");
5731     int off = sig->at(stream.sig_index())._offset;
5732     assert(off > 0, "offset in object should be positive");
5733     size_t size_in_bytes = is_java_primitive(bt) ? type2aelembytes(bt) : wordSize;
5734 
5735     Address dst(val_obj, off);
5736     if (!fromReg->is_XMMRegister()) {
5737       Register src;
5738       if (fromReg->is_stack()) {
5739         src = from_reg_tmp;
5740         int ld_off = fromReg->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
5741         load_sized_value(src, Address(rsp, ld_off), size_in_bytes, /* is_signed */ false);
5742       } else {
5743         src = fromReg->as_Register();
5744       }
5745       assert_different_registers(dst.base(), src, tmp1, tmp2, tmp3, val_array);
5746       if (is_reference_type(bt)) {
5747         store_heap_oop(dst, src, tmp1, tmp2, tmp3, IN_HEAP | ACCESS_WRITE | IS_DEST_UNINITIALIZED);
5748       } else {
5749         store_sized_value(dst, src, size_in_bytes);
5750       }
5751     } else if (bt == T_DOUBLE) {
5752       movdbl(dst, fromReg->as_XMMRegister());
5753     } else {
5754       assert(bt == T_FLOAT, "must be float");
5755       movflt(dst, fromReg->as_XMMRegister());
5756     }
5757     reg_state[fromReg->value()] = reg_writable;
5758   }
5759   sig_index = stream.sig_index();
5760   from_index = stream.regs_index();
5761 
5762   assert(reg_state[to->value()] == reg_writable, "must have already been read");
5763   bool success = move_helper(val_obj->as_VMReg(), to, T_OBJECT, reg_state);
5764   assert(success, "to register must be writeable");
5765   return true;
5766 }
5767 
5768 VMReg MacroAssembler::spill_reg_for(VMReg reg) {
5769   return reg->is_XMMRegister() ? xmm8->as_VMReg() : r14->as_VMReg();
5770 }
5771 
5772 void MacroAssembler::remove_frame(int initial_framesize, bool needs_stack_repair) {
5773   assert((initial_framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
5774   if (needs_stack_repair) {
5775     movq(rbp, Address(rsp, initial_framesize));
5776     // The stack increment resides just below the saved rbp
5777     addq(rsp, Address(rsp, initial_framesize - wordSize));
5778   } else {
5779     if (initial_framesize > 0) {
5780       addq(rsp, initial_framesize);
5781     }
5782     pop(rbp);
5783   }
5784 }
5785 
5786 // Clearing constant sized memory using YMM/ZMM registers.
5787 void MacroAssembler::clear_mem(Register base, int cnt, Register rtmp, XMMRegister xtmp, KRegister mask) {
5788   assert(UseAVX > 2 && VM_Version::supports_avx512vlbw(), "");
5789   bool use64byteVector = MaxVectorSize > 32 && AVX3Threshold == 0;
5790 
5791   int vector64_count = (cnt & (~0x7)) >> 3;
5792   cnt = cnt & 0x7;
5793 
5794   // 64 byte initialization loop.
5795   vpxor(xtmp, xtmp, xtmp, use64byteVector ? AVX_512bit : AVX_256bit);
5796   for (int i = 0; i < vector64_count; i++) {
5797     fill64_avx(base, i * 64, xtmp, use64byteVector);
5798   }
5799 
5800   // Clear remaining 64 byte tail.
5801   int disp = vector64_count * 64;
5802   if (cnt) {
5803     switch (cnt) {
5804       case 1:
5805         movq(Address(base, disp), xtmp);

5837         break;
5838       case 7:
5839         if (use64byteVector) {
5840           movl(rtmp, 0x7F);
5841           kmovwl(mask, rtmp);
5842           evmovdqu(T_LONG, mask, Address(base, disp), xtmp, Assembler::AVX_512bit);
5843         } else {
5844           evmovdqu(T_LONG, k0, Address(base, disp), xtmp, Assembler::AVX_256bit);
5845           movl(rtmp, 0x7);
5846           kmovwl(mask, rtmp);
5847           evmovdqu(T_LONG, mask, Address(base, disp + 32), xtmp, Assembler::AVX_256bit);
5848         }
5849         break;
5850       default:
5851         fatal("Unexpected length : %d\n",cnt);
5852         break;
5853     }
5854   }
5855 }
5856 
5857 void MacroAssembler::clear_mem(Register base, Register cnt, Register val, XMMRegister xtmp,
5858                                bool is_large, bool word_copy_only, KRegister mask) {
5859   // cnt      - number of qwords (8-byte words).
5860   // base     - start address, qword aligned.
5861   // is_large - if optimizers know cnt is larger than InitArrayShortSize
5862   assert(base==rdi, "base register must be edi for rep stos");
5863   assert(val==rax,   "val register must be eax for rep stos");
5864   assert(cnt==rcx,   "cnt register must be ecx for rep stos");
5865   assert(InitArrayShortSize % BytesPerLong == 0,
5866     "InitArrayShortSize should be the multiple of BytesPerLong");
5867 
5868   Label DONE;



5869 
5870   if (!is_large) {
5871     Label LOOP, LONG;
5872     cmpptr(cnt, InitArrayShortSize/BytesPerLong);
5873     jccb(Assembler::greater, LONG);
5874 
5875     NOT_LP64(shlptr(cnt, 1);) // convert to number of 32-bit words for 32-bit VM
5876 
5877     decrement(cnt);
5878     jccb(Assembler::negative, DONE); // Zero length
5879 
5880     // Use individual pointer-sized stores for small counts:
5881     BIND(LOOP);
5882     movptr(Address(base, cnt, Address::times_ptr), val);
5883     decrement(cnt);
5884     jccb(Assembler::greaterEqual, LOOP);
5885     jmpb(DONE);
5886 
5887     BIND(LONG);
5888   }
5889 
5890   // Use longer rep-prefixed ops for non-small counts:
5891   if (UseFastStosb && !word_copy_only) {
5892     shlptr(cnt, 3); // convert to number of bytes
5893     rep_stosb();
5894   } else if (UseXMMForObjInit) {
5895     xmm_clear_mem(base, cnt, val, xtmp, mask);
5896   } else {
5897     NOT_LP64(shlptr(cnt, 1);) // convert to number of 32-bit words for 32-bit VM
5898     rep_stos();
5899   }
5900 
5901   BIND(DONE);
5902 }
5903 
5904 #endif //COMPILER2_OR_JVMCI
5905 
5906 
5907 void MacroAssembler::generate_fill(BasicType t, bool aligned,
5908                                    Register to, Register value, Register count,
5909                                    Register rtmp, XMMRegister xtmp) {
5910   ShortBranchVerifier sbv(this);
5911   assert_different_registers(to, value, count, rtmp);
5912   Label L_exit;
5913   Label L_fill_2_bytes, L_fill_4_bytes;
5914 
5915   int shift = -1;
< prev index next >