< prev index next >

src/hotspot/cpu/x86/macroAssembler_x86.cpp

Print this page

   11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12  * version 2 for more details (a copy is included in the LICENSE file that
   13  * accompanied this code).
   14  *
   15  * You should have received a copy of the GNU General Public License version
   16  * 2 along with this work; if not, write to the Free Software Foundation,
   17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18  *
   19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20  * or visit www.oracle.com if you need additional information or have any
   21  * questions.
   22  *
   23  */
   24 
   25 #include "precompiled.hpp"
   26 #include "jvm.h"
   27 #include "asm/assembler.hpp"
   28 #include "asm/assembler.inline.hpp"
   29 #include "compiler/compiler_globals.hpp"
   30 #include "compiler/disassembler.hpp"

   31 #include "gc/shared/barrierSet.hpp"
   32 #include "gc/shared/barrierSetAssembler.hpp"
   33 #include "gc/shared/collectedHeap.inline.hpp"
   34 #include "gc/shared/tlab_globals.hpp"
   35 #include "interpreter/bytecodeHistogram.hpp"
   36 #include "interpreter/interpreter.hpp"
   37 #include "memory/resourceArea.hpp"
   38 #include "memory/universe.hpp"
   39 #include "oops/accessDecorators.hpp"
   40 #include "oops/compressedOops.inline.hpp"
   41 #include "oops/klass.inline.hpp"
   42 #include "prims/methodHandles.hpp"
   43 #include "runtime/continuation.hpp"
   44 #include "runtime/flags/flagSetting.hpp"
   45 #include "runtime/interfaceSupport.inline.hpp"
   46 #include "runtime/javaThread.hpp"
   47 #include "runtime/jniHandles.hpp"
   48 #include "runtime/objectMonitor.hpp"
   49 #include "runtime/os.hpp"
   50 #include "runtime/safepoint.hpp"
   51 #include "runtime/safepointMechanism.hpp"
   52 #include "runtime/sharedRuntime.hpp"

   53 #include "runtime/stubRoutines.hpp"
   54 #include "utilities/macros.hpp"

   55 #include "crc32c.h"



   56 
   57 #ifdef PRODUCT
   58 #define BLOCK_COMMENT(str) /* nothing */
   59 #define STOP(error) stop(error)
   60 #else
   61 #define BLOCK_COMMENT(str) block_comment(str)
   62 #define STOP(error) block_comment(error); stop(error)
   63 #endif
   64 
   65 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
   66 
   67 #ifdef ASSERT
   68 bool AbstractAssembler::pd_check_instruction_mark() { return true; }
   69 #endif
   70 
   71 static Assembler::Condition reverse[] = {
   72     Assembler::noOverflow     /* overflow      = 0x0 */ ,
   73     Assembler::overflow       /* noOverflow    = 0x1 */ ,
   74     Assembler::aboveEqual     /* carrySet      = 0x2, below         = 0x2 */ ,
   75     Assembler::below          /* aboveEqual    = 0x3, carryClear    = 0x3 */ ,

 1658   LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
 1659   pass_arg1(this, arg_1);
 1660   pass_arg0(this, arg_0);
 1661   call_VM_leaf(entry_point, 3);
 1662 }
 1663 
 1664 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) {
 1665   LP64_ONLY(assert(arg_0 != c_rarg3, "smashed arg"));
 1666   LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
 1667   LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
 1668   pass_arg3(this, arg_3);
 1669   LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"));
 1670   LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
 1671   pass_arg2(this, arg_2);
 1672   LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
 1673   pass_arg1(this, arg_1);
 1674   pass_arg0(this, arg_0);
 1675   call_VM_leaf(entry_point, 3);
 1676 }
 1677 




 1678 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {
 1679   pass_arg0(this, arg_0);
 1680   MacroAssembler::call_VM_leaf_base(entry_point, 1);
 1681 }
 1682 
 1683 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
 1684 
 1685   LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
 1686   pass_arg1(this, arg_1);
 1687   pass_arg0(this, arg_0);
 1688   MacroAssembler::call_VM_leaf_base(entry_point, 2);
 1689 }
 1690 
 1691 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
 1692   LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"));
 1693   LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
 1694   pass_arg2(this, arg_2);
 1695   LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
 1696   pass_arg1(this, arg_1);
 1697   pass_arg0(this, arg_0);

 2836     lea(rscratch, src);
 2837     Assembler::mulss(dst, Address(rscratch, 0));
 2838   }
 2839 }
 2840 
 2841 void MacroAssembler::null_check(Register reg, int offset) {
 2842   if (needs_explicit_null_check(offset)) {
 2843     // provoke OS NULL exception if reg = NULL by
 2844     // accessing M[reg] w/o changing any (non-CC) registers
 2845     // NOTE: cmpl is plenty here to provoke a segv
 2846     cmpptr(rax, Address(reg, 0));
 2847     // Note: should probably use testl(rax, Address(reg, 0));
 2848     //       may be shorter code (however, this version of
 2849     //       testl needs to be implemented first)
 2850   } else {
 2851     // nothing to do, (later) access of M[reg + offset]
 2852     // will provoke OS NULL exception if reg = NULL
 2853   }
 2854 }
 2855 












































































































































 2856 void MacroAssembler::os_breakpoint() {
 2857   // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
 2858   // (e.g., MSVC can't call ps() otherwise)
 2859   call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
 2860 }
 2861 
 2862 void MacroAssembler::unimplemented(const char* what) {
 2863   const char* buf = NULL;
 2864   {
 2865     ResourceMark rm;
 2866     stringStream ss;
 2867     ss.print("unimplemented: %s", what);
 2868     buf = code_string(ss.as_string());
 2869   }
 2870   stop(buf);
 2871 }
 2872 
 2873 #ifdef _LP64
 2874 #define XSTATE_BV 0x200
 2875 #endif

 3866 }
 3867 
 3868 // C++ bool manipulation
 3869 void MacroAssembler::testbool(Register dst) {
 3870   if(sizeof(bool) == 1)
 3871     testb(dst, 0xff);
 3872   else if(sizeof(bool) == 2) {
 3873     // testw implementation needed for two byte bools
 3874     ShouldNotReachHere();
 3875   } else if(sizeof(bool) == 4)
 3876     testl(dst, dst);
 3877   else
 3878     // unsupported
 3879     ShouldNotReachHere();
 3880 }
 3881 
 3882 void MacroAssembler::testptr(Register dst, Register src) {
 3883   LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src));
 3884 }
 3885 


















































































































 3886 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
 3887 void MacroAssembler::tlab_allocate(Register thread, Register obj,
 3888                                    Register var_size_in_bytes,
 3889                                    int con_size_in_bytes,
 3890                                    Register t1,
 3891                                    Register t2,
 3892                                    Label& slow_case) {
 3893   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 3894   bs->tlab_allocate(this, thread, obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case);
 3895 }
 3896 
 3897 RegSet MacroAssembler::call_clobbered_gp_registers() {
 3898   RegSet regs;
 3899 #ifdef _LP64
 3900   regs += RegSet::of(rax, rcx, rdx);
 3901 #ifndef WINDOWS
 3902   regs += RegSet::of(rsi, rdi);
 3903 #endif
 3904   regs += RegSet::range(r8, r11);
 3905 #else

 4118     // clear topmost word (no jump would be needed if conditional assignment worked here)
 4119     movptr(Address(address, index, Address::times_8, offset_in_bytes - 0*BytesPerWord), temp);
 4120     // index could be 0 now, must check again
 4121     jcc(Assembler::zero, done);
 4122     bind(even);
 4123   }
 4124 #endif // !_LP64
 4125   // initialize remaining object fields: index is a multiple of 2 now
 4126   {
 4127     Label loop;
 4128     bind(loop);
 4129     movptr(Address(address, index, Address::times_8, offset_in_bytes - 1*BytesPerWord), temp);
 4130     NOT_LP64(movptr(Address(address, index, Address::times_8, offset_in_bytes - 2*BytesPerWord), temp);)
 4131     decrement(index);
 4132     jcc(Assembler::notZero, loop);
 4133   }
 4134 
 4135   bind(done);
 4136 }
 4137 


















































 4138 // Look up the method for a megamorphic invokeinterface call.
 4139 // The target method is determined by <intf_klass, itable_index>.
 4140 // The receiver klass is in recv_klass.
 4141 // On success, the result will be in method_result, and execution falls through.
 4142 // On failure, execution transfers to the given label.
 4143 void MacroAssembler::lookup_interface_method(Register recv_klass,
 4144                                              Register intf_klass,
 4145                                              RegisterOrConstant itable_index,
 4146                                              Register method_result,
 4147                                              Register scan_temp,
 4148                                              Label& L_no_such_interface,
 4149                                              bool return_method) {
 4150   assert_different_registers(recv_klass, intf_klass, scan_temp);
 4151   assert_different_registers(method_result, intf_klass, scan_temp);
 4152   assert(recv_klass != method_result || !return_method,
 4153          "recv_klass can be destroyed when method isn't needed");
 4154 
 4155   assert(itable_index.is_constant() || itable_index.as_register() == method_result,
 4156          "caller must use same register for non-constant itable index as for method");
 4157 

 4466   } else {
 4467     Label L;
 4468     jccb(negate_condition(cc), L);
 4469     movl(dst, src);
 4470     bind(L);
 4471   }
 4472 }
 4473 
 4474 void MacroAssembler::cmov32(Condition cc, Register dst, Register src) {
 4475   if (VM_Version::supports_cmov()) {
 4476     cmovl(cc, dst, src);
 4477   } else {
 4478     Label L;
 4479     jccb(negate_condition(cc), L);
 4480     movl(dst, src);
 4481     bind(L);
 4482   }
 4483 }
 4484 
 4485 void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) {
 4486   if (!VerifyOops) return;




 4487 
 4488   BLOCK_COMMENT("verify_oop {");
 4489 #ifdef _LP64
 4490   push(rscratch1);
 4491 #endif
 4492   push(rax);                          // save rax
 4493   push(reg);                          // pass register argument
 4494 
 4495   // Pass register number to verify_oop_subroutine
 4496   const char* b = NULL;
 4497   {
 4498     ResourceMark rm;
 4499     stringStream ss;
 4500     ss.print("verify_oop: %s: %s (%s:%d)", reg->name(), s, file, line);
 4501     b = code_string(ss.as_string());
 4502   }
 4503   ExternalAddress buffer((address) b);
 4504   pushptr(buffer.addr(), rscratch1);
 4505 
 4506   // call indirectly to solve generation ordering problem

 4528   // cf. TemplateTable::prepare_invoke(), if (load_receiver).
 4529   int stackElementSize = Interpreter::stackElementSize;
 4530   int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
 4531 #ifdef ASSERT
 4532   int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
 4533   assert(offset1 - offset == stackElementSize, "correct arithmetic");
 4534 #endif
 4535   Register             scale_reg    = noreg;
 4536   Address::ScaleFactor scale_factor = Address::no_scale;
 4537   if (arg_slot.is_constant()) {
 4538     offset += arg_slot.as_constant() * stackElementSize;
 4539   } else {
 4540     scale_reg    = arg_slot.as_register();
 4541     scale_factor = Address::times(stackElementSize);
 4542   }
 4543   offset += wordSize;           // return PC is on stack
 4544   return Address(rsp, scale_reg, scale_factor, offset);
 4545 }
 4546 
 4547 void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) {
 4548   if (!VerifyOops) return;




 4549 
 4550 #ifdef _LP64
 4551   push(rscratch1);
 4552 #endif
 4553   push(rax); // save rax,
 4554   // addr may contain rsp so we will have to adjust it based on the push
 4555   // we just did (and on 64 bit we do two pushes)
 4556   // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which
 4557   // stores rax into addr which is backwards of what was intended.
 4558   if (addr.uses(rsp)) {
 4559     lea(rax, addr);
 4560     pushptr(Address(rax, LP64_ONLY(2 *) BytesPerWord));
 4561   } else {
 4562     pushptr(addr);
 4563   }
 4564 
 4565   // Pass register number to verify_oop_subroutine
 4566   const char* b = NULL;
 4567   {
 4568     ResourceMark rm;

 5015 
 5016 void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) {
 5017   // get mirror
 5018   const int mirror_offset = in_bytes(Klass::java_mirror_offset());
 5019   load_method_holder(mirror, method);
 5020   movptr(mirror, Address(mirror, mirror_offset));
 5021   resolve_oop_handle(mirror, tmp);
 5022 }
 5023 
 5024 void MacroAssembler::load_method_holder_cld(Register rresult, Register rmethod) {
 5025   load_method_holder(rresult, rmethod);
 5026   movptr(rresult, Address(rresult, InstanceKlass::class_loader_data_offset()));
 5027 }
 5028 
 5029 void MacroAssembler::load_method_holder(Register holder, Register method) {
 5030   movptr(holder, Address(method, Method::const_offset()));                      // ConstMethod*
 5031   movptr(holder, Address(holder, ConstMethod::constants_offset()));             // ConstantPool*
 5032   movptr(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); // InstanceKlass*
 5033 }
 5034 








 5035 void MacroAssembler::load_klass(Register dst, Register src, Register tmp) {
 5036   assert_different_registers(src, tmp);
 5037   assert_different_registers(dst, tmp);
 5038 #ifdef _LP64
 5039   if (UseCompressedClassPointers) {
 5040     movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
 5041     decode_klass_not_null(dst, tmp);
 5042   } else
 5043 #endif
 5044     movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));





 5045 }
 5046 
 5047 void MacroAssembler::store_klass(Register dst, Register src, Register tmp) {
 5048   assert_different_registers(src, tmp);
 5049   assert_different_registers(dst, tmp);
 5050 #ifdef _LP64
 5051   if (UseCompressedClassPointers) {
 5052     encode_klass_not_null(src, tmp);
 5053     movl(Address(dst, oopDesc::klass_offset_in_bytes()), src);
 5054   } else
 5055 #endif
 5056     movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src);
 5057 }
 5058 
 5059 void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src,
 5060                                     Register tmp1, Register thread_tmp) {
 5061   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 5062   decorators = AccessInternal::decorator_fixup(decorators);
 5063   bool as_raw = (decorators & AS_RAW) != 0;
 5064   if (as_raw) {
 5065     bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
 5066   } else {
 5067     bs->load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
 5068   }
 5069 }
 5070 
 5071 void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src,
 5072                                      Register tmp1, Register tmp2, Register tmp3) {
 5073   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 5074   decorators = AccessInternal::decorator_fixup(decorators);
 5075   bool as_raw = (decorators & AS_RAW) != 0;
 5076   if (as_raw) {
 5077     bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3);
 5078   } else {
 5079     bs->store_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3);
 5080   }
 5081 }
 5082 








































 5083 void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1,
 5084                                    Register thread_tmp, DecoratorSet decorators) {
 5085   access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp);
 5086 }
 5087 
 5088 // Doesn't do verification, generates fixed size code
 5089 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1,
 5090                                             Register thread_tmp, DecoratorSet decorators) {
 5091   access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL | decorators, dst, src, tmp1, thread_tmp);
 5092 }
 5093 
 5094 void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1,
 5095                                     Register tmp2, Register tmp3, DecoratorSet decorators) {
 5096   access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2, tmp3);
 5097 }
 5098 
 5099 // Used for storing NULLs.
 5100 void MacroAssembler::store_heap_oop_null(Address dst) {
 5101   access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg, noreg);
 5102 }

 5402 
 5403 void MacroAssembler::reinit_heapbase() {
 5404   if (UseCompressedOops) {
 5405     if (Universe::heap() != NULL) {
 5406       if (CompressedOops::base() == NULL) {
 5407         MacroAssembler::xorptr(r12_heapbase, r12_heapbase);
 5408       } else {
 5409         mov64(r12_heapbase, (int64_t)CompressedOops::ptrs_base());
 5410       }
 5411     } else {
 5412       movptr(r12_heapbase, ExternalAddress(CompressedOops::ptrs_base_addr()));
 5413     }
 5414   }
 5415 }
 5416 
 5417 #endif // _LP64
 5418 
 5419 #if COMPILER2_OR_JVMCI
 5420 
 5421 // clear memory of size 'cnt' qwords, starting at 'base' using XMM/YMM/ZMM registers
 5422 void MacroAssembler::xmm_clear_mem(Register base, Register cnt, Register rtmp, XMMRegister xtmp, KRegister mask) {
 5423   // cnt - number of qwords (8-byte words).
 5424   // base - start address, qword aligned.
 5425   Label L_zero_64_bytes, L_loop, L_sloop, L_tail, L_end;
 5426   bool use64byteVector = (MaxVectorSize == 64) && (VM_Version::avx3_threshold() == 0);
 5427   if (use64byteVector) {
 5428     vpxor(xtmp, xtmp, xtmp, AVX_512bit);
 5429   } else if (MaxVectorSize >= 32) {
 5430     vpxor(xtmp, xtmp, xtmp, AVX_256bit);


 5431   } else {
 5432     pxor(xtmp, xtmp);

 5433   }
 5434   jmp(L_zero_64_bytes);
 5435 
 5436   BIND(L_loop);
 5437   if (MaxVectorSize >= 32) {
 5438     fill64(base, 0, xtmp, use64byteVector);
 5439   } else {
 5440     movdqu(Address(base,  0), xtmp);
 5441     movdqu(Address(base, 16), xtmp);
 5442     movdqu(Address(base, 32), xtmp);
 5443     movdqu(Address(base, 48), xtmp);
 5444   }
 5445   addptr(base, 64);
 5446 
 5447   BIND(L_zero_64_bytes);
 5448   subptr(cnt, 8);
 5449   jccb(Assembler::greaterEqual, L_loop);
 5450 
 5451   // Copy trailing 64 bytes
 5452   if (use64byteVector) {
 5453     addptr(cnt, 8);
 5454     jccb(Assembler::equal, L_end);
 5455     fill64_masked(3, base, 0, xtmp, mask, cnt, rtmp, true);
 5456     jmp(L_end);
 5457   } else {
 5458     addptr(cnt, 4);
 5459     jccb(Assembler::less, L_tail);
 5460     if (MaxVectorSize >= 32) {
 5461       vmovdqu(Address(base, 0), xtmp);
 5462     } else {
 5463       movdqu(Address(base,  0), xtmp);
 5464       movdqu(Address(base, 16), xtmp);
 5465     }
 5466   }
 5467   addptr(base, 32);
 5468   subptr(cnt, 4);
 5469 
 5470   BIND(L_tail);
 5471   addptr(cnt, 4);
 5472   jccb(Assembler::lessEqual, L_end);
 5473   if (UseAVX > 2 && MaxVectorSize >= 32 && VM_Version::supports_avx512vl()) {
 5474     fill32_masked(3, base, 0, xtmp, mask, cnt, rtmp);
 5475   } else {
 5476     decrement(cnt);
 5477 
 5478     BIND(L_sloop);
 5479     movq(Address(base, 0), xtmp);
 5480     addptr(base, 8);
 5481     decrement(cnt);
 5482     jccb(Assembler::greaterEqual, L_sloop);
 5483   }
 5484   BIND(L_end);
 5485 }
 5486 














































































































































































































































































































































































































 5487 // Clearing constant sized memory using YMM/ZMM registers.
 5488 void MacroAssembler::clear_mem(Register base, int cnt, Register rtmp, XMMRegister xtmp, KRegister mask) {
 5489   assert(UseAVX > 2 && VM_Version::supports_avx512vlbw(), "");
 5490   bool use64byteVector = (MaxVectorSize > 32) && (VM_Version::avx3_threshold() == 0);
 5491 
 5492   int vector64_count = (cnt & (~0x7)) >> 3;
 5493   cnt = cnt & 0x7;
 5494   const int fill64_per_loop = 4;
 5495   const int max_unrolled_fill64 = 8;
 5496 
 5497   // 64 byte initialization loop.
 5498   vpxor(xtmp, xtmp, xtmp, use64byteVector ? AVX_512bit : AVX_256bit);
 5499   int start64 = 0;
 5500   if (vector64_count > max_unrolled_fill64) {
 5501     Label LOOP;
 5502     Register index = rtmp;
 5503 
 5504     start64 = vector64_count - (vector64_count % fill64_per_loop);
 5505 
 5506     movl(index, 0);

 5556         break;
 5557       case 7:
 5558         if (use64byteVector) {
 5559           movl(rtmp, 0x7F);
 5560           kmovwl(mask, rtmp);
 5561           evmovdqu(T_LONG, mask, Address(base, disp), xtmp, true, Assembler::AVX_512bit);
 5562         } else {
 5563           evmovdqu(T_LONG, k0, Address(base, disp), xtmp, false, Assembler::AVX_256bit);
 5564           movl(rtmp, 0x7);
 5565           kmovwl(mask, rtmp);
 5566           evmovdqu(T_LONG, mask, Address(base, disp + 32), xtmp, true, Assembler::AVX_256bit);
 5567         }
 5568         break;
 5569       default:
 5570         fatal("Unexpected length : %d\n",cnt);
 5571         break;
 5572     }
 5573   }
 5574 }
 5575 
 5576 void MacroAssembler::clear_mem(Register base, Register cnt, Register tmp, XMMRegister xtmp,
 5577                                bool is_large, KRegister mask) {
 5578   // cnt      - number of qwords (8-byte words).
 5579   // base     - start address, qword aligned.
 5580   // is_large - if optimizers know cnt is larger than InitArrayShortSize
 5581   assert(base==rdi, "base register must be edi for rep stos");
 5582   assert(tmp==rax,   "tmp register must be eax for rep stos");
 5583   assert(cnt==rcx,   "cnt register must be ecx for rep stos");
 5584   assert(InitArrayShortSize % BytesPerLong == 0,
 5585     "InitArrayShortSize should be the multiple of BytesPerLong");
 5586 
 5587   Label DONE;
 5588   if (!is_large || !UseXMMForObjInit) {
 5589     xorptr(tmp, tmp);
 5590   }
 5591 
 5592   if (!is_large) {
 5593     Label LOOP, LONG;
 5594     cmpptr(cnt, InitArrayShortSize/BytesPerLong);
 5595     jccb(Assembler::greater, LONG);
 5596 
 5597     NOT_LP64(shlptr(cnt, 1);) // convert to number of 32-bit words for 32-bit VM
 5598 
 5599     decrement(cnt);
 5600     jccb(Assembler::negative, DONE); // Zero length
 5601 
 5602     // Use individual pointer-sized stores for small counts:
 5603     BIND(LOOP);
 5604     movptr(Address(base, cnt, Address::times_ptr), tmp);
 5605     decrement(cnt);
 5606     jccb(Assembler::greaterEqual, LOOP);
 5607     jmpb(DONE);
 5608 
 5609     BIND(LONG);
 5610   }
 5611 
 5612   // Use longer rep-prefixed ops for non-small counts:
 5613   if (UseFastStosb) {
 5614     shlptr(cnt, 3); // convert to number of bytes
 5615     rep_stosb();
 5616   } else if (UseXMMForObjInit) {
 5617     xmm_clear_mem(base, cnt, tmp, xtmp, mask);
 5618   } else {
 5619     NOT_LP64(shlptr(cnt, 1);) // convert to number of 32-bit words for 32-bit VM
 5620     rep_stos();
 5621   }
 5622 
 5623   BIND(DONE);
 5624 }
 5625 
 5626 #endif //COMPILER2_OR_JVMCI
 5627 
 5628 
 5629 void MacroAssembler::generate_fill(BasicType t, bool aligned,
 5630                                    Register to, Register value, Register count,
 5631                                    Register rtmp, XMMRegister xtmp) {
 5632   ShortBranchVerifier sbv(this);
 5633   assert_different_registers(to, value, count, rtmp);
 5634   Label L_exit;
 5635   Label L_fill_2_bytes, L_fill_4_bytes;
 5636 
 5637 #if defined(COMPILER2) && defined(_LP64)

   11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12  * version 2 for more details (a copy is included in the LICENSE file that
   13  * accompanied this code).
   14  *
   15  * You should have received a copy of the GNU General Public License version
   16  * 2 along with this work; if not, write to the Free Software Foundation,
   17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18  *
   19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20  * or visit www.oracle.com if you need additional information or have any
   21  * questions.
   22  *
   23  */
   24 
   25 #include "precompiled.hpp"
   26 #include "jvm.h"
   27 #include "asm/assembler.hpp"
   28 #include "asm/assembler.inline.hpp"
   29 #include "compiler/compiler_globals.hpp"
   30 #include "compiler/disassembler.hpp"
   31 #include "ci/ciInlineKlass.hpp"
   32 #include "gc/shared/barrierSet.hpp"
   33 #include "gc/shared/barrierSetAssembler.hpp"
   34 #include "gc/shared/collectedHeap.inline.hpp"
   35 #include "gc/shared/tlab_globals.hpp"
   36 #include "interpreter/bytecodeHistogram.hpp"
   37 #include "interpreter/interpreter.hpp"
   38 #include "memory/resourceArea.hpp"
   39 #include "memory/universe.hpp"
   40 #include "oops/accessDecorators.hpp"
   41 #include "oops/compressedOops.inline.hpp"
   42 #include "oops/klass.inline.hpp"
   43 #include "prims/methodHandles.hpp"
   44 #include "runtime/continuation.hpp"
   45 #include "runtime/flags/flagSetting.hpp"
   46 #include "runtime/interfaceSupport.inline.hpp"
   47 #include "runtime/javaThread.hpp"
   48 #include "runtime/jniHandles.hpp"
   49 #include "runtime/objectMonitor.hpp"
   50 #include "runtime/os.hpp"
   51 #include "runtime/safepoint.hpp"
   52 #include "runtime/safepointMechanism.hpp"
   53 #include "runtime/sharedRuntime.hpp"
   54 #include "runtime/signature_cc.hpp"
   55 #include "runtime/stubRoutines.hpp"
   56 #include "utilities/macros.hpp"
   57 #include "vmreg_x86.inline.hpp"
   58 #include "crc32c.h"
   59 #ifdef COMPILER2
   60 #include "opto/output.hpp"
   61 #endif
   62 
   63 #ifdef PRODUCT
   64 #define BLOCK_COMMENT(str) /* nothing */
   65 #define STOP(error) stop(error)
   66 #else
   67 #define BLOCK_COMMENT(str) block_comment(str)
   68 #define STOP(error) block_comment(error); stop(error)
   69 #endif
   70 
   71 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
   72 
   73 #ifdef ASSERT
   74 bool AbstractAssembler::pd_check_instruction_mark() { return true; }
   75 #endif
   76 
   77 static Assembler::Condition reverse[] = {
   78     Assembler::noOverflow     /* overflow      = 0x0 */ ,
   79     Assembler::overflow       /* noOverflow    = 0x1 */ ,
   80     Assembler::aboveEqual     /* carrySet      = 0x2, below         = 0x2 */ ,
   81     Assembler::below          /* aboveEqual    = 0x3, carryClear    = 0x3 */ ,

 1664   LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
 1665   pass_arg1(this, arg_1);
 1666   pass_arg0(this, arg_0);
 1667   call_VM_leaf(entry_point, 3);
 1668 }
 1669 
 1670 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) {
 1671   LP64_ONLY(assert(arg_0 != c_rarg3, "smashed arg"));
 1672   LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
 1673   LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
 1674   pass_arg3(this, arg_3);
 1675   LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"));
 1676   LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
 1677   pass_arg2(this, arg_2);
 1678   LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
 1679   pass_arg1(this, arg_1);
 1680   pass_arg0(this, arg_0);
 1681   call_VM_leaf(entry_point, 3);
 1682 }
 1683 
 1684 void MacroAssembler::super_call_VM_leaf(address entry_point) {
 1685   MacroAssembler::call_VM_leaf_base(entry_point, 1);
 1686 }
 1687 
 1688 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {
 1689   pass_arg0(this, arg_0);
 1690   MacroAssembler::call_VM_leaf_base(entry_point, 1);
 1691 }
 1692 
 1693 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
 1694 
 1695   LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
 1696   pass_arg1(this, arg_1);
 1697   pass_arg0(this, arg_0);
 1698   MacroAssembler::call_VM_leaf_base(entry_point, 2);
 1699 }
 1700 
 1701 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
 1702   LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"));
 1703   LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
 1704   pass_arg2(this, arg_2);
 1705   LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
 1706   pass_arg1(this, arg_1);
 1707   pass_arg0(this, arg_0);

 2846     lea(rscratch, src);
 2847     Assembler::mulss(dst, Address(rscratch, 0));
 2848   }
 2849 }
 2850 
 2851 void MacroAssembler::null_check(Register reg, int offset) {
 2852   if (needs_explicit_null_check(offset)) {
 2853     // provoke OS NULL exception if reg = NULL by
 2854     // accessing M[reg] w/o changing any (non-CC) registers
 2855     // NOTE: cmpl is plenty here to provoke a segv
 2856     cmpptr(rax, Address(reg, 0));
 2857     // Note: should probably use testl(rax, Address(reg, 0));
 2858     //       may be shorter code (however, this version of
 2859     //       testl needs to be implemented first)
 2860   } else {
 2861     // nothing to do, (later) access of M[reg + offset]
 2862     // will provoke OS NULL exception if reg = NULL
 2863   }
 2864 }
 2865 
 2866 void MacroAssembler::test_markword_is_inline_type(Register markword, Label& is_inline_type) {
 2867   andptr(markword, markWord::inline_type_mask_in_place);
 2868   cmpptr(markword, markWord::inline_type_pattern);
 2869   jcc(Assembler::equal, is_inline_type);
 2870 }
 2871 
 2872 void MacroAssembler::test_klass_is_inline_type(Register klass, Register temp_reg, Label& is_inline_type) {
 2873   movl(temp_reg, Address(klass, Klass::access_flags_offset()));
 2874   testl(temp_reg, JVM_ACC_VALUE);
 2875   jcc(Assembler::notZero, is_inline_type);
 2876 }
 2877 
 2878 void MacroAssembler::test_oop_is_not_inline_type(Register object, Register tmp, Label& not_inline_type) {
 2879   testptr(object, object);
 2880   jcc(Assembler::zero, not_inline_type);
 2881   const int is_inline_type_mask = markWord::inline_type_pattern;
 2882   movptr(tmp, Address(object, oopDesc::mark_offset_in_bytes()));
 2883   andptr(tmp, is_inline_type_mask);
 2884   cmpptr(tmp, is_inline_type_mask);
 2885   jcc(Assembler::notEqual, not_inline_type);
 2886 }
 2887 
 2888 void MacroAssembler::test_klass_is_empty_inline_type(Register klass, Register temp_reg, Label& is_empty_inline_type) {
 2889 #ifdef ASSERT
 2890   {
 2891     Label done_check;
 2892     test_klass_is_inline_type(klass, temp_reg, done_check);
 2893     stop("test_klass_is_empty_inline_type with non inline type klass");
 2894     bind(done_check);
 2895   }
 2896 #endif
 2897   movl(temp_reg, Address(klass, InstanceKlass::misc_flags_offset()));
 2898   testl(temp_reg, InstanceKlass::misc_flag_is_empty_inline_type());
 2899   jcc(Assembler::notZero, is_empty_inline_type);
 2900 }
 2901 
 2902 void MacroAssembler::test_field_is_null_free_inline_type(Register flags, Register temp_reg, Label& is_null_free_inline_type) {
 2903   movl(temp_reg, flags);
 2904   shrl(temp_reg, ConstantPoolCacheEntry::is_null_free_inline_type_shift);
 2905   andl(temp_reg, 0x1);
 2906   testl(temp_reg, temp_reg);
 2907   jcc(Assembler::notZero, is_null_free_inline_type);
 2908 }
 2909 
 2910 void MacroAssembler::test_field_is_not_null_free_inline_type(Register flags, Register temp_reg, Label& not_null_free_inline_type) {
 2911   movl(temp_reg, flags);
 2912   shrl(temp_reg, ConstantPoolCacheEntry::is_null_free_inline_type_shift);
 2913   andl(temp_reg, 0x1);
 2914   testl(temp_reg, temp_reg);
 2915   jcc(Assembler::zero, not_null_free_inline_type);
 2916 }
 2917 
 2918 void MacroAssembler::test_field_is_inlined(Register flags, Register temp_reg, Label& is_inlined) {
 2919   movl(temp_reg, flags);
 2920   shrl(temp_reg, ConstantPoolCacheEntry::is_inlined_shift);
 2921   andl(temp_reg, 0x1);
 2922   testl(temp_reg, temp_reg);
 2923   jcc(Assembler::notZero, is_inlined);
 2924 }
 2925 
 2926 void MacroAssembler::test_oop_prototype_bit(Register oop, Register temp_reg, int32_t test_bit, bool jmp_set, Label& jmp_label) {
 2927   Label test_mark_word;
 2928   // load mark word
 2929   movptr(temp_reg, Address(oop, oopDesc::mark_offset_in_bytes()));
 2930   // check displaced
 2931   testl(temp_reg, markWord::unlocked_value);
 2932   jccb(Assembler::notZero, test_mark_word);
 2933   // slow path use klass prototype
 2934   push(rscratch1);
 2935   load_prototype_header(temp_reg, oop, rscratch1);
 2936   pop(rscratch1);
 2937 
 2938   bind(test_mark_word);
 2939   testl(temp_reg, test_bit);
 2940   jcc((jmp_set) ? Assembler::notZero : Assembler::zero, jmp_label);
 2941 }
 2942 
 2943 void MacroAssembler::test_flattened_array_oop(Register oop, Register temp_reg,
 2944                                               Label&is_flattened_array) {
 2945 #ifdef _LP64
 2946   test_oop_prototype_bit(oop, temp_reg, markWord::flat_array_bit_in_place, true, is_flattened_array);
 2947 #else
 2948   load_klass(temp_reg, oop, noreg);
 2949   movl(temp_reg, Address(temp_reg, Klass::layout_helper_offset()));
 2950   test_flattened_array_layout(temp_reg, is_flattened_array);
 2951 #endif
 2952 }
 2953 
 2954 void MacroAssembler::test_non_flattened_array_oop(Register oop, Register temp_reg,
 2955                                                   Label&is_non_flattened_array) {
 2956 #ifdef _LP64
 2957   test_oop_prototype_bit(oop, temp_reg, markWord::flat_array_bit_in_place, false, is_non_flattened_array);
 2958 #else
 2959   load_klass(temp_reg, oop, noreg);
 2960   movl(temp_reg, Address(temp_reg, Klass::layout_helper_offset()));
 2961   test_non_flattened_array_layout(temp_reg, is_non_flattened_array);
 2962 #endif
 2963 }
 2964 
 2965 void MacroAssembler::test_null_free_array_oop(Register oop, Register temp_reg, Label&is_null_free_array) {
 2966 #ifdef _LP64
 2967   test_oop_prototype_bit(oop, temp_reg, markWord::null_free_array_bit_in_place, true, is_null_free_array);
 2968 #else
 2969   load_klass(temp_reg, oop, noreg);
 2970   movl(temp_reg, Address(temp_reg, Klass::layout_helper_offset()));
 2971   test_null_free_array_layout(temp_reg, is_null_free_array);
 2972 #endif
 2973 }
 2974 
 2975 void MacroAssembler::test_non_null_free_array_oop(Register oop, Register temp_reg, Label&is_non_null_free_array) {
 2976 #ifdef _LP64
 2977   test_oop_prototype_bit(oop, temp_reg, markWord::null_free_array_bit_in_place, false, is_non_null_free_array);
 2978 #else
 2979   load_klass(temp_reg, oop, noreg);
 2980   movl(temp_reg, Address(temp_reg, Klass::layout_helper_offset()));
 2981   test_non_null_free_array_layout(temp_reg, is_non_null_free_array);
 2982 #endif
 2983 }
 2984 
 2985 void MacroAssembler::test_flattened_array_layout(Register lh, Label& is_flattened_array) {
 2986   testl(lh, Klass::_lh_array_tag_flat_value_bit_inplace);
 2987   jcc(Assembler::notZero, is_flattened_array);
 2988 }
 2989 
 2990 void MacroAssembler::test_non_flattened_array_layout(Register lh, Label& is_non_flattened_array) {
 2991   testl(lh, Klass::_lh_array_tag_flat_value_bit_inplace);
 2992   jcc(Assembler::zero, is_non_flattened_array);
 2993 }
 2994 
 2995 void MacroAssembler::test_null_free_array_layout(Register lh, Label& is_null_free_array) {
 2996   testl(lh, Klass::_lh_null_free_array_bit_inplace);
 2997   jcc(Assembler::notZero, is_null_free_array);
 2998 }
 2999 
 3000 void MacroAssembler::test_non_null_free_array_layout(Register lh, Label& is_non_null_free_array) {
 3001   testl(lh, Klass::_lh_null_free_array_bit_inplace);
 3002   jcc(Assembler::zero, is_non_null_free_array);
 3003 }
 3004 
 3005 
 3006 void MacroAssembler::os_breakpoint() {
 3007   // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
 3008   // (e.g., MSVC can't call ps() otherwise)
 3009   call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
 3010 }
 3011 
 3012 void MacroAssembler::unimplemented(const char* what) {
 3013   const char* buf = NULL;
 3014   {
 3015     ResourceMark rm;
 3016     stringStream ss;
 3017     ss.print("unimplemented: %s", what);
 3018     buf = code_string(ss.as_string());
 3019   }
 3020   stop(buf);
 3021 }
 3022 
 3023 #ifdef _LP64
 3024 #define XSTATE_BV 0x200
 3025 #endif

 4016 }
 4017 
 4018 // C++ bool manipulation
 4019 void MacroAssembler::testbool(Register dst) {
 4020   if(sizeof(bool) == 1)
 4021     testb(dst, 0xff);
 4022   else if(sizeof(bool) == 2) {
 4023     // testw implementation needed for two byte bools
 4024     ShouldNotReachHere();
 4025   } else if(sizeof(bool) == 4)
 4026     testl(dst, dst);
 4027   else
 4028     // unsupported
 4029     ShouldNotReachHere();
 4030 }
 4031 
 4032 void MacroAssembler::testptr(Register dst, Register src) {
 4033   LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src));
 4034 }
 4035 
 4036 // Object / value buffer allocation...
 4037 //
 4038 // Kills klass and rsi on LP64
 4039 void MacroAssembler::allocate_instance(Register klass, Register new_obj,
 4040                                        Register t1, Register t2,
 4041                                        bool clear_fields, Label& alloc_failed)
 4042 {
 4043   Label done, initialize_header, initialize_object, slow_case, slow_case_no_pop;
 4044   Register layout_size = t1;
 4045   assert(new_obj == rax, "needs to be rax");
 4046   assert_different_registers(klass, new_obj, t1, t2);
 4047 
 4048   // get instance_size in InstanceKlass (scaled to a count of bytes)
 4049   movl(layout_size, Address(klass, Klass::layout_helper_offset()));
 4050   // test to see if it has a finalizer or is malformed in some way
 4051   testl(layout_size, Klass::_lh_instance_slow_path_bit);
 4052   jcc(Assembler::notZero, slow_case_no_pop);
 4053 
 4054   // Allocate the instance:
 4055   //  If TLAB is enabled:
 4056   //    Try to allocate in the TLAB.
 4057   //    If fails, go to the slow path.
 4058   //  Else If inline contiguous allocations are enabled:
 4059   //    Try to allocate in eden.
 4060   //    If fails due to heap end, go to slow path.
 4061   //
 4062   //  If TLAB is enabled OR inline contiguous is enabled:
 4063   //    Initialize the allocation.
 4064   //    Exit.
 4065   //
 4066   //  Go to slow path.
 4067 
 4068   push(klass);
 4069   const Register thread = LP64_ONLY(r15_thread) NOT_LP64(klass);
 4070 #ifndef _LP64
 4071   if (UseTLAB) {
 4072     get_thread(thread);
 4073   }
 4074 #endif // _LP64
 4075 
 4076   if (UseTLAB) {
 4077     tlab_allocate(thread, new_obj, layout_size, 0, klass, t2, slow_case);
 4078     if (ZeroTLAB || (!clear_fields)) {
 4079       // the fields have been already cleared
 4080       jmp(initialize_header);
 4081     } else {
 4082       // initialize both the header and fields
 4083       jmp(initialize_object);
 4084     }
 4085   } else {
 4086     jmp(slow_case);
 4087   }
 4088 
 4089   // If UseTLAB is true, the object is created above and there is an initialize need.
 4090   // Otherwise, skip and go to the slow path.
 4091   if (UseTLAB) {
 4092     if (clear_fields) {
 4093       // The object is initialized before the header.  If the object size is
 4094       // zero, go directly to the header initialization.
 4095       bind(initialize_object);
 4096       decrement(layout_size, sizeof(oopDesc));
 4097       jcc(Assembler::zero, initialize_header);
 4098 
 4099       // Initialize topmost object field, divide size by 8, check if odd and
 4100       // test if zero.
 4101       Register zero = klass;
 4102       xorl(zero, zero);    // use zero reg to clear memory (shorter code)
 4103       shrl(layout_size, LogBytesPerLong); // divide by 2*oopSize and set carry flag if odd
 4104 
 4105   #ifdef ASSERT
 4106       // make sure instance_size was multiple of 8
 4107       Label L;
 4108       // Ignore partial flag stall after shrl() since it is debug VM
 4109       jcc(Assembler::carryClear, L);
 4110       stop("object size is not multiple of 2 - adjust this code");
 4111       bind(L);
 4112       // must be > 0, no extra check needed here
 4113   #endif
 4114 
 4115       // initialize remaining object fields: instance_size was a multiple of 8
 4116       {
 4117         Label loop;
 4118         bind(loop);
 4119         movptr(Address(new_obj, layout_size, Address::times_8, sizeof(oopDesc) - 1*oopSize), zero);
 4120         NOT_LP64(movptr(Address(new_obj, layout_size, Address::times_8, sizeof(oopDesc) - 2*oopSize), zero));
 4121         decrement(layout_size);
 4122         jcc(Assembler::notZero, loop);
 4123       }
 4124     } // clear_fields
 4125 
 4126     // initialize object header only.
 4127     bind(initialize_header);
 4128     pop(klass);
 4129     Register mark_word = t2;
 4130     movptr(mark_word, Address(klass, Klass::prototype_header_offset()));
 4131     movptr(Address(new_obj, oopDesc::mark_offset_in_bytes ()), mark_word);
 4132 #ifdef _LP64
 4133     xorl(rsi, rsi);                 // use zero reg to clear memory (shorter code)
 4134     store_klass_gap(new_obj, rsi);  // zero klass gap for compressed oops
 4135 #endif
 4136     movptr(t2, klass);         // preserve klass
 4137     store_klass(new_obj, t2, rscratch1);  // src klass reg is potentially compressed
 4138 
 4139     jmp(done);
 4140   }
 4141 
 4142   bind(slow_case);
 4143   pop(klass);
 4144   bind(slow_case_no_pop);
 4145   jmp(alloc_failed);
 4146 
 4147   bind(done);
 4148 }
 4149 
 4150 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
 4151 void MacroAssembler::tlab_allocate(Register thread, Register obj,
 4152                                    Register var_size_in_bytes,
 4153                                    int con_size_in_bytes,
 4154                                    Register t1,
 4155                                    Register t2,
 4156                                    Label& slow_case) {
 4157   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 4158   bs->tlab_allocate(this, thread, obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case);
 4159 }
 4160 
 4161 RegSet MacroAssembler::call_clobbered_gp_registers() {
 4162   RegSet regs;
 4163 #ifdef _LP64
 4164   regs += RegSet::of(rax, rcx, rdx);
 4165 #ifndef WINDOWS
 4166   regs += RegSet::of(rsi, rdi);
 4167 #endif
 4168   regs += RegSet::range(r8, r11);
 4169 #else

 4382     // clear topmost word (no jump would be needed if conditional assignment worked here)
 4383     movptr(Address(address, index, Address::times_8, offset_in_bytes - 0*BytesPerWord), temp);
 4384     // index could be 0 now, must check again
 4385     jcc(Assembler::zero, done);
 4386     bind(even);
 4387   }
 4388 #endif // !_LP64
 4389   // initialize remaining object fields: index is a multiple of 2 now
 4390   {
 4391     Label loop;
 4392     bind(loop);
 4393     movptr(Address(address, index, Address::times_8, offset_in_bytes - 1*BytesPerWord), temp);
 4394     NOT_LP64(movptr(Address(address, index, Address::times_8, offset_in_bytes - 2*BytesPerWord), temp);)
 4395     decrement(index);
 4396     jcc(Assembler::notZero, loop);
 4397   }
 4398 
 4399   bind(done);
 4400 }
 4401 
 4402 void MacroAssembler::get_inline_type_field_klass(Register klass, Register index, Register inline_klass) {
 4403   movptr(inline_klass, Address(klass, InstanceKlass::inline_type_field_klasses_offset()));
 4404 #ifdef ASSERT
 4405   {
 4406     Label done;
 4407     cmpptr(inline_klass, 0);
 4408     jcc(Assembler::notEqual, done);
 4409     stop("get_inline_type_field_klass contains no inline klass");
 4410     bind(done);
 4411   }
 4412 #endif
 4413   movptr(inline_klass, Address(inline_klass, index, Address::times_ptr));
 4414 }
 4415 
 4416 void MacroAssembler::get_default_value_oop(Register inline_klass, Register temp_reg, Register obj) {
 4417 #ifdef ASSERT
 4418   {
 4419     Label done_check;
 4420     test_klass_is_inline_type(inline_klass, temp_reg, done_check);
 4421     stop("get_default_value_oop from non inline type klass");
 4422     bind(done_check);
 4423   }
 4424 #endif
 4425   Register offset = temp_reg;
 4426   // Getting the offset of the pre-allocated default value
 4427   movptr(offset, Address(inline_klass, in_bytes(InstanceKlass::adr_inlineklass_fixed_block_offset())));
 4428   movl(offset, Address(offset, in_bytes(InlineKlass::default_value_offset_offset())));
 4429 
 4430   // Getting the mirror
 4431   movptr(obj, Address(inline_klass, in_bytes(Klass::java_mirror_offset())));
 4432   resolve_oop_handle(obj, inline_klass);
 4433 
 4434   // Getting the pre-allocated default value from the mirror
 4435   Address field(obj, offset, Address::times_1);
 4436   load_heap_oop(obj, field);
 4437 }
 4438 
 4439 void MacroAssembler::get_empty_inline_type_oop(Register inline_klass, Register temp_reg, Register obj) {
 4440 #ifdef ASSERT
 4441   {
 4442     Label done_check;
 4443     test_klass_is_empty_inline_type(inline_klass, temp_reg, done_check);
 4444     stop("get_empty_value from non-empty inline klass");
 4445     bind(done_check);
 4446   }
 4447 #endif
 4448   get_default_value_oop(inline_klass, temp_reg, obj);
 4449 }
 4450 
 4451 
 4452 // Look up the method for a megamorphic invokeinterface call.
 4453 // The target method is determined by <intf_klass, itable_index>.
 4454 // The receiver klass is in recv_klass.
 4455 // On success, the result will be in method_result, and execution falls through.
 4456 // On failure, execution transfers to the given label.
 4457 void MacroAssembler::lookup_interface_method(Register recv_klass,
 4458                                              Register intf_klass,
 4459                                              RegisterOrConstant itable_index,
 4460                                              Register method_result,
 4461                                              Register scan_temp,
 4462                                              Label& L_no_such_interface,
 4463                                              bool return_method) {
 4464   assert_different_registers(recv_klass, intf_klass, scan_temp);
 4465   assert_different_registers(method_result, intf_klass, scan_temp);
 4466   assert(recv_klass != method_result || !return_method,
 4467          "recv_klass can be destroyed when method isn't needed");
 4468 
 4469   assert(itable_index.is_constant() || itable_index.as_register() == method_result,
 4470          "caller must use same register for non-constant itable index as for method");
 4471 

 4780   } else {
 4781     Label L;
 4782     jccb(negate_condition(cc), L);
 4783     movl(dst, src);
 4784     bind(L);
 4785   }
 4786 }
 4787 
 4788 void MacroAssembler::cmov32(Condition cc, Register dst, Register src) {
 4789   if (VM_Version::supports_cmov()) {
 4790     cmovl(cc, dst, src);
 4791   } else {
 4792     Label L;
 4793     jccb(negate_condition(cc), L);
 4794     movl(dst, src);
 4795     bind(L);
 4796   }
 4797 }
 4798 
 4799 void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) {
 4800   if (!VerifyOops || VerifyAdapterSharing) {
 4801     // Below address of the code string confuses VerifyAdapterSharing
 4802     // because it may differ between otherwise equivalent adapters.
 4803     return;
 4804   }
 4805 
 4806   BLOCK_COMMENT("verify_oop {");
 4807 #ifdef _LP64
 4808   push(rscratch1);
 4809 #endif
 4810   push(rax);                          // save rax
 4811   push(reg);                          // pass register argument
 4812 
 4813   // Pass register number to verify_oop_subroutine
 4814   const char* b = NULL;
 4815   {
 4816     ResourceMark rm;
 4817     stringStream ss;
 4818     ss.print("verify_oop: %s: %s (%s:%d)", reg->name(), s, file, line);
 4819     b = code_string(ss.as_string());
 4820   }
 4821   ExternalAddress buffer((address) b);
 4822   pushptr(buffer.addr(), rscratch1);
 4823 
 4824   // call indirectly to solve generation ordering problem

 4846   // cf. TemplateTable::prepare_invoke(), if (load_receiver).
 4847   int stackElementSize = Interpreter::stackElementSize;
 4848   int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
 4849 #ifdef ASSERT
 4850   int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
 4851   assert(offset1 - offset == stackElementSize, "correct arithmetic");
 4852 #endif
 4853   Register             scale_reg    = noreg;
 4854   Address::ScaleFactor scale_factor = Address::no_scale;
 4855   if (arg_slot.is_constant()) {
 4856     offset += arg_slot.as_constant() * stackElementSize;
 4857   } else {
 4858     scale_reg    = arg_slot.as_register();
 4859     scale_factor = Address::times(stackElementSize);
 4860   }
 4861   offset += wordSize;           // return PC is on stack
 4862   return Address(rsp, scale_reg, scale_factor, offset);
 4863 }
 4864 
 4865 void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) {
 4866   if (!VerifyOops || VerifyAdapterSharing) {
 4867     // Below address of the code string confuses VerifyAdapterSharing
 4868     // because it may differ between otherwise equivalent adapters.
 4869     return;
 4870   }
 4871 
 4872 #ifdef _LP64
 4873   push(rscratch1);
 4874 #endif
 4875   push(rax); // save rax,
 4876   // addr may contain rsp so we will have to adjust it based on the push
 4877   // we just did (and on 64 bit we do two pushes)
 4878   // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which
 4879   // stores rax into addr which is backwards of what was intended.
 4880   if (addr.uses(rsp)) {
 4881     lea(rax, addr);
 4882     pushptr(Address(rax, LP64_ONLY(2 *) BytesPerWord));
 4883   } else {
 4884     pushptr(addr);
 4885   }
 4886 
 4887   // Pass register number to verify_oop_subroutine
 4888   const char* b = NULL;
 4889   {
 4890     ResourceMark rm;

 5337 
 5338 void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) {
 5339   // get mirror
 5340   const int mirror_offset = in_bytes(Klass::java_mirror_offset());
 5341   load_method_holder(mirror, method);
 5342   movptr(mirror, Address(mirror, mirror_offset));
 5343   resolve_oop_handle(mirror, tmp);
 5344 }
 5345 
 5346 void MacroAssembler::load_method_holder_cld(Register rresult, Register rmethod) {
 5347   load_method_holder(rresult, rmethod);
 5348   movptr(rresult, Address(rresult, InstanceKlass::class_loader_data_offset()));
 5349 }
 5350 
 5351 void MacroAssembler::load_method_holder(Register holder, Register method) {
 5352   movptr(holder, Address(method, Method::const_offset()));                      // ConstMethod*
 5353   movptr(holder, Address(holder, ConstMethod::constants_offset()));             // ConstantPool*
 5354   movptr(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); // InstanceKlass*
 5355 }
 5356 
 5357 void MacroAssembler::load_metadata(Register dst, Register src) {
 5358   if (UseCompressedClassPointers) {
 5359     movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
 5360   } else {
 5361     movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
 5362   }
 5363 }
 5364 
 5365 void MacroAssembler::load_klass(Register dst, Register src, Register tmp) {
 5366   assert_different_registers(src, tmp);
 5367   assert_different_registers(dst, tmp);
 5368 #ifdef _LP64
 5369   if (UseCompressedClassPointers) {
 5370     movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
 5371     decode_klass_not_null(dst, tmp);
 5372   } else
 5373 #endif
 5374   movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
 5375 }
 5376 
 5377 void MacroAssembler::load_prototype_header(Register dst, Register src, Register tmp) {
 5378   load_klass(dst, src, tmp);
 5379   movptr(dst, Address(dst, Klass::prototype_header_offset()));
 5380 }
 5381 
 5382 void MacroAssembler::store_klass(Register dst, Register src, Register tmp) {
 5383   assert_different_registers(src, tmp);
 5384   assert_different_registers(dst, tmp);
 5385 #ifdef _LP64
 5386   if (UseCompressedClassPointers) {
 5387     encode_klass_not_null(src, tmp);
 5388     movl(Address(dst, oopDesc::klass_offset_in_bytes()), src);
 5389   } else
 5390 #endif
 5391     movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src);
 5392 }
 5393 
 5394 void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src,
 5395                                     Register tmp1, Register thread_tmp) {
 5396   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 5397   decorators = AccessInternal::decorator_fixup(decorators);
 5398   bool as_raw = (decorators & AS_RAW) != 0;
 5399   if (as_raw) {
 5400     bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
 5401   } else {
 5402     bs->load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
 5403   }
 5404 }
 5405 
 5406 void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src,
 5407                                      Register tmp1, Register tmp2, Register tmp3) {
 5408   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 5409   decorators = AccessInternal::decorator_fixup(decorators);
 5410   bool as_raw = (decorators & AS_RAW) != 0;
 5411   if (as_raw) {
 5412     bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3);
 5413   } else {
 5414     bs->store_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3);
 5415   }
 5416 }
 5417 
 5418 void MacroAssembler::access_value_copy(DecoratorSet decorators, Register src, Register dst,
 5419                                        Register inline_klass) {
 5420   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 5421   bs->value_copy(this, decorators, src, dst, inline_klass);
 5422 }
 5423 
 5424 void MacroAssembler::first_field_offset(Register inline_klass, Register offset) {
 5425   movptr(offset, Address(inline_klass, InstanceKlass::adr_inlineklass_fixed_block_offset()));
 5426   movl(offset, Address(offset, InlineKlass::first_field_offset_offset()));
 5427 }
 5428 
 5429 void MacroAssembler::data_for_oop(Register oop, Register data, Register inline_klass) {
 5430   // ((address) (void*) o) + vk->first_field_offset();
 5431   Register offset = (data == oop) ? rscratch1 : data;
 5432   first_field_offset(inline_klass, offset);
 5433   if (data == oop) {
 5434     addptr(data, offset);
 5435   } else {
 5436     lea(data, Address(oop, offset));
 5437   }
 5438 }
 5439 
 5440 void MacroAssembler::data_for_value_array_index(Register array, Register array_klass,
 5441                                                 Register index, Register data) {
 5442   assert(index != rcx, "index needs to shift by rcx");
 5443   assert_different_registers(array, array_klass, index);
 5444   assert_different_registers(rcx, array, index);
 5445 
 5446   // array->base() + (index << Klass::layout_helper_log2_element_size(lh));
 5447   movl(rcx, Address(array_klass, Klass::layout_helper_offset()));
 5448 
 5449   // Klass::layout_helper_log2_element_size(lh)
 5450   // (lh >> _lh_log2_element_size_shift) & _lh_log2_element_size_mask;
 5451   shrl(rcx, Klass::_lh_log2_element_size_shift);
 5452   andl(rcx, Klass::_lh_log2_element_size_mask);
 5453   shlptr(index); // index << rcx
 5454 
 5455   lea(data, Address(array, index, Address::times_1, arrayOopDesc::base_offset_in_bytes(T_PRIMITIVE_OBJECT)));
 5456 }
 5457 
 5458 void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1,
 5459                                    Register thread_tmp, DecoratorSet decorators) {
 5460   access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp);
 5461 }
 5462 
 5463 // Doesn't do verification, generates fixed size code
 5464 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1,
 5465                                             Register thread_tmp, DecoratorSet decorators) {
 5466   access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL | decorators, dst, src, tmp1, thread_tmp);
 5467 }
 5468 
 5469 void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1,
 5470                                     Register tmp2, Register tmp3, DecoratorSet decorators) {
 5471   access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2, tmp3);
 5472 }
 5473 
 5474 // Used for storing NULLs.
 5475 void MacroAssembler::store_heap_oop_null(Address dst) {
 5476   access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg, noreg);
 5477 }

 5777 
 5778 void MacroAssembler::reinit_heapbase() {
 5779   if (UseCompressedOops) {
 5780     if (Universe::heap() != NULL) {
 5781       if (CompressedOops::base() == NULL) {
 5782         MacroAssembler::xorptr(r12_heapbase, r12_heapbase);
 5783       } else {
 5784         mov64(r12_heapbase, (int64_t)CompressedOops::ptrs_base());
 5785       }
 5786     } else {
 5787       movptr(r12_heapbase, ExternalAddress(CompressedOops::ptrs_base_addr()));
 5788     }
 5789   }
 5790 }
 5791 
 5792 #endif // _LP64
 5793 
 5794 #if COMPILER2_OR_JVMCI
 5795 
 5796 // clear memory of size 'cnt' qwords, starting at 'base' using XMM/YMM/ZMM registers
 5797 void MacroAssembler::xmm_clear_mem(Register base, Register cnt, Register val, XMMRegister xtmp, KRegister mask) {
 5798   // cnt - number of qwords (8-byte words).
 5799   // base - start address, qword aligned.
 5800   Label L_zero_64_bytes, L_loop, L_sloop, L_tail, L_end;
 5801   bool use64byteVector = (MaxVectorSize == 64) && (VM_Version::avx3_threshold() == 0);
 5802   if (use64byteVector) {
 5803     evpbroadcastq(xtmp, val, AVX_512bit);
 5804   } else if (MaxVectorSize >= 32) {
 5805     movdq(xtmp, val);
 5806     punpcklqdq(xtmp, xtmp);
 5807     vinserti128_high(xtmp, xtmp);
 5808   } else {
 5809     movdq(xtmp, val);
 5810     punpcklqdq(xtmp, xtmp);
 5811   }
 5812   jmp(L_zero_64_bytes);
 5813 
 5814   BIND(L_loop);
 5815   if (MaxVectorSize >= 32) {
 5816     fill64(base, 0, xtmp, use64byteVector);
 5817   } else {
 5818     movdqu(Address(base,  0), xtmp);
 5819     movdqu(Address(base, 16), xtmp);
 5820     movdqu(Address(base, 32), xtmp);
 5821     movdqu(Address(base, 48), xtmp);
 5822   }
 5823   addptr(base, 64);
 5824 
 5825   BIND(L_zero_64_bytes);
 5826   subptr(cnt, 8);
 5827   jccb(Assembler::greaterEqual, L_loop);
 5828 
 5829   // Copy trailing 64 bytes
 5830   if (use64byteVector) {
 5831     addptr(cnt, 8);
 5832     jccb(Assembler::equal, L_end);
 5833     fill64_masked(3, base, 0, xtmp, mask, cnt, val, true);
 5834     jmp(L_end);
 5835   } else {
 5836     addptr(cnt, 4);
 5837     jccb(Assembler::less, L_tail);
 5838     if (MaxVectorSize >= 32) {
 5839       vmovdqu(Address(base, 0), xtmp);
 5840     } else {
 5841       movdqu(Address(base,  0), xtmp);
 5842       movdqu(Address(base, 16), xtmp);
 5843     }
 5844   }
 5845   addptr(base, 32);
 5846   subptr(cnt, 4);
 5847 
 5848   BIND(L_tail);
 5849   addptr(cnt, 4);
 5850   jccb(Assembler::lessEqual, L_end);
 5851   if (UseAVX > 2 && MaxVectorSize >= 32 && VM_Version::supports_avx512vl()) {
 5852     fill32_masked(3, base, 0, xtmp, mask, cnt, val);
 5853   } else {
 5854     decrement(cnt);
 5855 
 5856     BIND(L_sloop);
 5857     movq(Address(base, 0), xtmp);
 5858     addptr(base, 8);
 5859     decrement(cnt);
 5860     jccb(Assembler::greaterEqual, L_sloop);
 5861   }
 5862   BIND(L_end);
 5863 }
 5864 
 5865 int MacroAssembler::store_inline_type_fields_to_buf(ciInlineKlass* vk, bool from_interpreter) {
 5866   assert(InlineTypeReturnedAsFields, "Inline types should never be returned as fields");
 5867   // An inline type might be returned. If fields are in registers we
 5868   // need to allocate an inline type instance and initialize it with
 5869   // the value of the fields.
 5870   Label skip;
 5871   // We only need a new buffered inline type if a new one is not returned
 5872   testptr(rax, 1);
 5873   jcc(Assembler::zero, skip);
 5874   int call_offset = -1;
 5875 
 5876 #ifdef _LP64
 5877   // The following code is similar to allocate_instance but has some slight differences,
 5878   // e.g. object size is always not zero, sometimes it's constant; storing klass ptr after
 5879   // allocating is not necessary if vk != NULL, etc. allocate_instance is not aware of these.
 5880   Label slow_case;
 5881   // 1. Try to allocate a new buffered inline instance either from TLAB or eden space
 5882   mov(rscratch1, rax); // save rax for slow_case since *_allocate may corrupt it when allocation failed
 5883   if (vk != NULL) {
 5884     // Called from C1, where the return type is statically known.
 5885     movptr(rbx, (intptr_t)vk->get_InlineKlass());
 5886     jint obj_size = vk->layout_helper();
 5887     assert(obj_size != Klass::_lh_neutral_value, "inline class in return type must have been resolved");
 5888     if (UseTLAB) {
 5889       tlab_allocate(r15_thread, rax, noreg, obj_size, r13, r14, slow_case);
 5890     } else {
 5891       jmp(slow_case);
 5892     }
 5893   } else {
 5894     // Call from interpreter. RAX contains ((the InlineKlass* of the return type) | 0x01)
 5895     mov(rbx, rax);
 5896     andptr(rbx, -2);
 5897     movl(r14, Address(rbx, Klass::layout_helper_offset()));
 5898     if (UseTLAB) {
 5899       tlab_allocate(r15_thread, rax, r14, 0, r13, r14, slow_case);
 5900     } else {
 5901       jmp(slow_case);
 5902     }
 5903   }
 5904   if (UseTLAB) {
 5905     // 2. Initialize buffered inline instance header
 5906     Register buffer_obj = rax;
 5907     movptr(Address(buffer_obj, oopDesc::mark_offset_in_bytes()), (intptr_t)markWord::inline_type_prototype().value());
 5908     xorl(r13, r13);
 5909     store_klass_gap(buffer_obj, r13);
 5910     if (vk == NULL) {
 5911       // store_klass corrupts rbx(klass), so save it in r13 for later use (interpreter case only).
 5912       mov(r13, rbx);
 5913     }
 5914     store_klass(buffer_obj, rbx, rscratch1);
 5915     // 3. Initialize its fields with an inline class specific handler
 5916     if (vk != NULL) {
 5917       call(RuntimeAddress(vk->pack_handler())); // no need for call info as this will not safepoint.
 5918     } else {
 5919       movptr(rbx, Address(r13, InstanceKlass::adr_inlineklass_fixed_block_offset()));
 5920       movptr(rbx, Address(rbx, InlineKlass::pack_handler_offset()));
 5921       call(rbx);
 5922     }
 5923     jmp(skip);
 5924   }
 5925   bind(slow_case);
 5926   // We failed to allocate a new inline type, fall back to a runtime
 5927   // call. Some oop field may be live in some registers but we can't
 5928   // tell. That runtime call will take care of preserving them
 5929   // across a GC if there's one.
 5930   mov(rax, rscratch1);
 5931 #endif
 5932 
 5933   if (from_interpreter) {
 5934     super_call_VM_leaf(StubRoutines::store_inline_type_fields_to_buf());
 5935   } else {
 5936     call(RuntimeAddress(StubRoutines::store_inline_type_fields_to_buf()));
 5937     call_offset = offset();
 5938   }
 5939 
 5940   bind(skip);
 5941   return call_offset;
 5942 }
 5943 
 5944 // Move a value between registers/stack slots and update the reg_state
 5945 bool MacroAssembler::move_helper(VMReg from, VMReg to, BasicType bt, RegState reg_state[]) {
 5946   assert(from->is_valid() && to->is_valid(), "source and destination must be valid");
 5947   if (reg_state[to->value()] == reg_written) {
 5948     return true; // Already written
 5949   }
 5950   if (from != to && bt != T_VOID) {
 5951     if (reg_state[to->value()] == reg_readonly) {
 5952       return false; // Not yet writable
 5953     }
 5954     if (from->is_reg()) {
 5955       if (to->is_reg()) {
 5956         if (from->is_XMMRegister()) {
 5957           if (bt == T_DOUBLE) {
 5958             movdbl(to->as_XMMRegister(), from->as_XMMRegister());
 5959           } else {
 5960             assert(bt == T_FLOAT, "must be float");
 5961             movflt(to->as_XMMRegister(), from->as_XMMRegister());
 5962           }
 5963         } else {
 5964           movq(to->as_Register(), from->as_Register());
 5965         }
 5966       } else {
 5967         int st_off = to->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
 5968         Address to_addr = Address(rsp, st_off);
 5969         if (from->is_XMMRegister()) {
 5970           if (bt == T_DOUBLE) {
 5971             movdbl(to_addr, from->as_XMMRegister());
 5972           } else {
 5973             assert(bt == T_FLOAT, "must be float");
 5974             movflt(to_addr, from->as_XMMRegister());
 5975           }
 5976         } else {
 5977           movq(to_addr, from->as_Register());
 5978         }
 5979       }
 5980     } else {
 5981       Address from_addr = Address(rsp, from->reg2stack() * VMRegImpl::stack_slot_size + wordSize);
 5982       if (to->is_reg()) {
 5983         if (to->is_XMMRegister()) {
 5984           if (bt == T_DOUBLE) {
 5985             movdbl(to->as_XMMRegister(), from_addr);
 5986           } else {
 5987             assert(bt == T_FLOAT, "must be float");
 5988             movflt(to->as_XMMRegister(), from_addr);
 5989           }
 5990         } else {
 5991           movq(to->as_Register(), from_addr);
 5992         }
 5993       } else {
 5994         int st_off = to->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
 5995         movq(r13, from_addr);
 5996         movq(Address(rsp, st_off), r13);
 5997       }
 5998     }
 5999   }
 6000   // Update register states
 6001   reg_state[from->value()] = reg_writable;
 6002   reg_state[to->value()] = reg_written;
 6003   return true;
 6004 }
 6005 
 6006 // Calculate the extra stack space required for packing or unpacking inline
 6007 // args and adjust the stack pointer
 6008 int MacroAssembler::extend_stack_for_inline_args(int args_on_stack) {
 6009   // Two additional slots to account for return address
 6010   int sp_inc = (args_on_stack + 2) * VMRegImpl::stack_slot_size;
 6011   sp_inc = align_up(sp_inc, StackAlignmentInBytes);
 6012   // Save the return address, adjust the stack (make sure it is properly
 6013   // 16-byte aligned) and copy the return address to the new top of the stack.
 6014   // The stack will be repaired on return (see MacroAssembler::remove_frame).
 6015   assert(sp_inc > 0, "sanity");
 6016   pop(r13);
 6017   subptr(rsp, sp_inc);
 6018   push(r13);
 6019   return sp_inc;
 6020 }
 6021 
 6022 // Read all fields from an inline type buffer and store the field values in registers/stack slots.
 6023 bool MacroAssembler::unpack_inline_helper(const GrowableArray<SigEntry>* sig, int& sig_index,
 6024                                           VMReg from, int& from_index, VMRegPair* to, int to_count, int& to_index,
 6025                                           RegState reg_state[]) {
 6026   assert(sig->at(sig_index)._bt == T_VOID, "should be at end delimiter");
 6027   assert(from->is_valid(), "source must be valid");
 6028   bool progress = false;
 6029 #ifdef ASSERT
 6030   const int start_offset = offset();
 6031 #endif
 6032 
 6033   Label L_null, L_notNull;
 6034   // Don't use r14 as tmp because it's used for spilling (see MacroAssembler::spill_reg_for)
 6035   Register tmp1 = r10;
 6036   Register tmp2 = r13;
 6037   Register fromReg = noreg;
 6038   ScalarizedInlineArgsStream stream(sig, sig_index, to, to_count, to_index, -1);
 6039   bool done = true;
 6040   bool mark_done = true;
 6041   VMReg toReg;
 6042   BasicType bt;
 6043   // Check if argument requires a null check
 6044   bool null_check = false;
 6045   VMReg nullCheckReg;
 6046   while (stream.next(nullCheckReg, bt)) {
 6047     if (sig->at(stream.sig_index())._offset == -1) {
 6048       null_check = true;
 6049       break;
 6050     }
 6051   }
 6052   stream.reset(sig_index, to_index);
 6053   while (stream.next(toReg, bt)) {
 6054     assert(toReg->is_valid(), "destination must be valid");
 6055     int idx = (int)toReg->value();
 6056     if (reg_state[idx] == reg_readonly) {
 6057       if (idx != from->value()) {
 6058         mark_done = false;
 6059       }
 6060       done = false;
 6061       continue;
 6062     } else if (reg_state[idx] == reg_written) {
 6063       continue;
 6064     }
 6065     assert(reg_state[idx] == reg_writable, "must be writable");
 6066     reg_state[idx] = reg_written;
 6067     progress = true;
 6068 
 6069     if (fromReg == noreg) {
 6070       if (from->is_reg()) {
 6071         fromReg = from->as_Register();
 6072       } else {
 6073         int st_off = from->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
 6074         movq(tmp1, Address(rsp, st_off));
 6075         fromReg = tmp1;
 6076       }
 6077       if (null_check) {
 6078         // Nullable inline type argument, emit null check
 6079         testptr(fromReg, fromReg);
 6080         jcc(Assembler::zero, L_null);
 6081       }
 6082     }
 6083     int off = sig->at(stream.sig_index())._offset;
 6084     if (off == -1) {
 6085       assert(null_check, "Missing null check at");
 6086       if (toReg->is_stack()) {
 6087         int st_off = toReg->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
 6088         movq(Address(rsp, st_off), 1);
 6089       } else {
 6090         movq(toReg->as_Register(), 1);
 6091       }
 6092       continue;
 6093     }
 6094     assert(off > 0, "offset in object should be positive");
 6095     Address fromAddr = Address(fromReg, off);
 6096     if (!toReg->is_XMMRegister()) {
 6097       Register dst = toReg->is_stack() ? tmp2 : toReg->as_Register();
 6098       if (is_reference_type(bt)) {
 6099         load_heap_oop(dst, fromAddr);
 6100       } else {
 6101         bool is_signed = (bt != T_CHAR) && (bt != T_BOOLEAN);
 6102         load_sized_value(dst, fromAddr, type2aelembytes(bt), is_signed);
 6103       }
 6104       if (toReg->is_stack()) {
 6105         int st_off = toReg->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
 6106         movq(Address(rsp, st_off), dst);
 6107       }
 6108     } else if (bt == T_DOUBLE) {
 6109       movdbl(toReg->as_XMMRegister(), fromAddr);
 6110     } else {
 6111       assert(bt == T_FLOAT, "must be float");
 6112       movflt(toReg->as_XMMRegister(), fromAddr);
 6113     }
 6114   }
 6115   if (progress && null_check) {
 6116     if (done) {
 6117       jmp(L_notNull);
 6118       bind(L_null);
 6119       // Set IsInit field to zero to signal that the argument is null.
 6120       // Also set all oop fields to zero to make the GC happy.
 6121       stream.reset(sig_index, to_index);
 6122       while (stream.next(toReg, bt)) {
 6123         if (sig->at(stream.sig_index())._offset == -1 ||
 6124             bt == T_OBJECT || bt == T_ARRAY) {
 6125           if (toReg->is_stack()) {
 6126             int st_off = toReg->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
 6127             movq(Address(rsp, st_off), 0);
 6128           } else {
 6129             xorq(toReg->as_Register(), toReg->as_Register());
 6130           }
 6131         }
 6132       }
 6133       bind(L_notNull);
 6134     } else {
 6135       bind(L_null);
 6136     }
 6137   }
 6138 
 6139   sig_index = stream.sig_index();
 6140   to_index = stream.regs_index();
 6141 
 6142   if (mark_done && reg_state[from->value()] != reg_written) {
 6143     // This is okay because no one else will write to that slot
 6144     reg_state[from->value()] = reg_writable;
 6145   }
 6146   from_index--;
 6147   assert(progress || (start_offset == offset()), "should not emit code");
 6148   return done;
 6149 }
 6150 
 6151 bool MacroAssembler::pack_inline_helper(const GrowableArray<SigEntry>* sig, int& sig_index, int vtarg_index,
 6152                                         VMRegPair* from, int from_count, int& from_index, VMReg to,
 6153                                         RegState reg_state[], Register val_array) {
 6154   assert(sig->at(sig_index)._bt == T_PRIMITIVE_OBJECT, "should be at end delimiter");
 6155   assert(to->is_valid(), "destination must be valid");
 6156 
 6157   if (reg_state[to->value()] == reg_written) {
 6158     skip_unpacked_fields(sig, sig_index, from, from_count, from_index);
 6159     return true; // Already written
 6160   }
 6161 
 6162   // TODO 8284443 Isn't it an issue if below code uses r14 as tmp when it contains a spilled value?
 6163   // Be careful with r14 because it's used for spilling (see MacroAssembler::spill_reg_for).
 6164   Register val_obj_tmp = r11;
 6165   Register from_reg_tmp = r14;
 6166   Register tmp1 = r10;
 6167   Register tmp2 = r13;
 6168   Register tmp3 = rbx;
 6169   Register val_obj = to->is_stack() ? val_obj_tmp : to->as_Register();
 6170 
 6171   assert_different_registers(val_obj_tmp, from_reg_tmp, tmp1, tmp2, tmp3, val_array);
 6172 
 6173   if (reg_state[to->value()] == reg_readonly) {
 6174     if (!is_reg_in_unpacked_fields(sig, sig_index, to, from, from_count, from_index)) {
 6175       skip_unpacked_fields(sig, sig_index, from, from_count, from_index);
 6176       return false; // Not yet writable
 6177     }
 6178     val_obj = val_obj_tmp;
 6179   }
 6180 
 6181   int index = arrayOopDesc::base_offset_in_bytes(T_OBJECT) + vtarg_index * type2aelembytes(T_PRIMITIVE_OBJECT);
 6182   load_heap_oop(val_obj, Address(val_array, index));
 6183 
 6184   ScalarizedInlineArgsStream stream(sig, sig_index, from, from_count, from_index);
 6185   VMReg fromReg;
 6186   BasicType bt;
 6187   Label L_null;
 6188   while (stream.next(fromReg, bt)) {
 6189     assert(fromReg->is_valid(), "source must be valid");
 6190     reg_state[fromReg->value()] = reg_writable;
 6191 
 6192     int off = sig->at(stream.sig_index())._offset;
 6193     if (off == -1) {
 6194       // Nullable inline type argument, emit null check
 6195       Label L_notNull;
 6196       if (fromReg->is_stack()) {
 6197         int ld_off = fromReg->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
 6198         testb(Address(rsp, ld_off), 1);
 6199       } else {
 6200         testb(fromReg->as_Register(), 1);
 6201       }
 6202       jcc(Assembler::notZero, L_notNull);
 6203       movptr(val_obj, 0);
 6204       jmp(L_null);
 6205       bind(L_notNull);
 6206       continue;
 6207     }
 6208 
 6209     assert(off > 0, "offset in object should be positive");
 6210     size_t size_in_bytes = is_java_primitive(bt) ? type2aelembytes(bt) : wordSize;
 6211 
 6212     Address dst(val_obj, off);
 6213     if (!fromReg->is_XMMRegister()) {
 6214       Register src;
 6215       if (fromReg->is_stack()) {
 6216         src = from_reg_tmp;
 6217         int ld_off = fromReg->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
 6218         load_sized_value(src, Address(rsp, ld_off), size_in_bytes, /* is_signed */ false);
 6219       } else {
 6220         src = fromReg->as_Register();
 6221       }
 6222       assert_different_registers(dst.base(), src, tmp1, tmp2, tmp3, val_array);
 6223       if (is_reference_type(bt)) {
 6224         store_heap_oop(dst, src, tmp1, tmp2, tmp3, IN_HEAP | ACCESS_WRITE | IS_DEST_UNINITIALIZED);
 6225       } else {
 6226         store_sized_value(dst, src, size_in_bytes);
 6227       }
 6228     } else if (bt == T_DOUBLE) {
 6229       movdbl(dst, fromReg->as_XMMRegister());
 6230     } else {
 6231       assert(bt == T_FLOAT, "must be float");
 6232       movflt(dst, fromReg->as_XMMRegister());
 6233     }
 6234   }
 6235   bind(L_null);
 6236   sig_index = stream.sig_index();
 6237   from_index = stream.regs_index();
 6238 
 6239   assert(reg_state[to->value()] == reg_writable, "must have already been read");
 6240   bool success = move_helper(val_obj->as_VMReg(), to, T_OBJECT, reg_state);
 6241   assert(success, "to register must be writeable");
 6242   return true;
 6243 }
 6244 
 6245 VMReg MacroAssembler::spill_reg_for(VMReg reg) {
 6246   return reg->is_XMMRegister() ? xmm8->as_VMReg() : r14->as_VMReg();
 6247 }
 6248 
 6249 void MacroAssembler::remove_frame(int initial_framesize, bool needs_stack_repair) {
 6250   assert((initial_framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 6251   if (needs_stack_repair) {
 6252     movq(rbp, Address(rsp, initial_framesize));
 6253     // The stack increment resides just below the saved rbp
 6254     addq(rsp, Address(rsp, initial_framesize - wordSize));
 6255   } else {
 6256     if (initial_framesize > 0) {
 6257       addq(rsp, initial_framesize);
 6258     }
 6259     pop(rbp);
 6260   }
 6261 }
 6262 
 6263 // Clearing constant sized memory using YMM/ZMM registers.
 6264 void MacroAssembler::clear_mem(Register base, int cnt, Register rtmp, XMMRegister xtmp, KRegister mask) {
 6265   assert(UseAVX > 2 && VM_Version::supports_avx512vlbw(), "");
 6266   bool use64byteVector = (MaxVectorSize > 32) && (VM_Version::avx3_threshold() == 0);
 6267 
 6268   int vector64_count = (cnt & (~0x7)) >> 3;
 6269   cnt = cnt & 0x7;
 6270   const int fill64_per_loop = 4;
 6271   const int max_unrolled_fill64 = 8;
 6272 
 6273   // 64 byte initialization loop.
 6274   vpxor(xtmp, xtmp, xtmp, use64byteVector ? AVX_512bit : AVX_256bit);
 6275   int start64 = 0;
 6276   if (vector64_count > max_unrolled_fill64) {
 6277     Label LOOP;
 6278     Register index = rtmp;
 6279 
 6280     start64 = vector64_count - (vector64_count % fill64_per_loop);
 6281 
 6282     movl(index, 0);

 6332         break;
 6333       case 7:
 6334         if (use64byteVector) {
 6335           movl(rtmp, 0x7F);
 6336           kmovwl(mask, rtmp);
 6337           evmovdqu(T_LONG, mask, Address(base, disp), xtmp, true, Assembler::AVX_512bit);
 6338         } else {
 6339           evmovdqu(T_LONG, k0, Address(base, disp), xtmp, false, Assembler::AVX_256bit);
 6340           movl(rtmp, 0x7);
 6341           kmovwl(mask, rtmp);
 6342           evmovdqu(T_LONG, mask, Address(base, disp + 32), xtmp, true, Assembler::AVX_256bit);
 6343         }
 6344         break;
 6345       default:
 6346         fatal("Unexpected length : %d\n",cnt);
 6347         break;
 6348     }
 6349   }
 6350 }
 6351 
 6352 void MacroAssembler::clear_mem(Register base, Register cnt, Register val, XMMRegister xtmp,
 6353                                bool is_large, bool word_copy_only, KRegister mask) {
 6354   // cnt      - number of qwords (8-byte words).
 6355   // base     - start address, qword aligned.
 6356   // is_large - if optimizers know cnt is larger than InitArrayShortSize
 6357   assert(base==rdi, "base register must be edi for rep stos");
 6358   assert(val==rax,   "val register must be eax for rep stos");
 6359   assert(cnt==rcx,   "cnt register must be ecx for rep stos");
 6360   assert(InitArrayShortSize % BytesPerLong == 0,
 6361     "InitArrayShortSize should be the multiple of BytesPerLong");
 6362 
 6363   Label DONE;



 6364 
 6365   if (!is_large) {
 6366     Label LOOP, LONG;
 6367     cmpptr(cnt, InitArrayShortSize/BytesPerLong);
 6368     jccb(Assembler::greater, LONG);
 6369 
 6370     NOT_LP64(shlptr(cnt, 1);) // convert to number of 32-bit words for 32-bit VM
 6371 
 6372     decrement(cnt);
 6373     jccb(Assembler::negative, DONE); // Zero length
 6374 
 6375     // Use individual pointer-sized stores for small counts:
 6376     BIND(LOOP);
 6377     movptr(Address(base, cnt, Address::times_ptr), val);
 6378     decrement(cnt);
 6379     jccb(Assembler::greaterEqual, LOOP);
 6380     jmpb(DONE);
 6381 
 6382     BIND(LONG);
 6383   }
 6384 
 6385   // Use longer rep-prefixed ops for non-small counts:
 6386   if (UseFastStosb && !word_copy_only) {
 6387     shlptr(cnt, 3); // convert to number of bytes
 6388     rep_stosb();
 6389   } else if (UseXMMForObjInit) {
 6390     xmm_clear_mem(base, cnt, val, xtmp, mask);
 6391   } else {
 6392     NOT_LP64(shlptr(cnt, 1);) // convert to number of 32-bit words for 32-bit VM
 6393     rep_stos();
 6394   }
 6395 
 6396   BIND(DONE);
 6397 }
 6398 
 6399 #endif //COMPILER2_OR_JVMCI
 6400 
 6401 
 6402 void MacroAssembler::generate_fill(BasicType t, bool aligned,
 6403                                    Register to, Register value, Register count,
 6404                                    Register rtmp, XMMRegister xtmp) {
 6405   ShortBranchVerifier sbv(this);
 6406   assert_different_registers(to, value, count, rtmp);
 6407   Label L_exit;
 6408   Label L_fill_2_bytes, L_fill_4_bytes;
 6409 
 6410 #if defined(COMPILER2) && defined(_LP64)
< prev index next >