< prev index next >

src/hotspot/cpu/x86/macroAssembler_x86.cpp

Print this page

   10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12  * version 2 for more details (a copy is included in the LICENSE file that
   13  * accompanied this code).
   14  *
   15  * You should have received a copy of the GNU General Public License version
   16  * 2 along with this work; if not, write to the Free Software Foundation,
   17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18  *
   19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20  * or visit www.oracle.com if you need additional information or have any
   21  * questions.
   22  *
   23  */
   24 
   25 #include "precompiled.hpp"
   26 #include "asm/assembler.hpp"
   27 #include "asm/assembler.inline.hpp"
   28 #include "compiler/compiler_globals.hpp"
   29 #include "compiler/disassembler.hpp"

   30 #include "crc32c.h"
   31 #include "gc/shared/barrierSet.hpp"
   32 #include "gc/shared/barrierSetAssembler.hpp"
   33 #include "gc/shared/collectedHeap.inline.hpp"
   34 #include "gc/shared/tlab_globals.hpp"
   35 #include "interpreter/bytecodeHistogram.hpp"
   36 #include "interpreter/interpreter.hpp"
   37 #include "jvm.h"
   38 #include "memory/resourceArea.hpp"
   39 #include "memory/universe.hpp"
   40 #include "oops/accessDecorators.hpp"
   41 #include "oops/compressedOops.inline.hpp"
   42 #include "oops/klass.inline.hpp"
   43 #include "prims/methodHandles.hpp"
   44 #include "runtime/continuation.hpp"
   45 #include "runtime/flags/flagSetting.hpp"
   46 #include "runtime/interfaceSupport.inline.hpp"
   47 #include "runtime/javaThread.hpp"
   48 #include "runtime/jniHandles.hpp"
   49 #include "runtime/objectMonitor.hpp"
   50 #include "runtime/os.hpp"
   51 #include "runtime/safepoint.hpp"
   52 #include "runtime/safepointMechanism.hpp"
   53 #include "runtime/sharedRuntime.hpp"

   54 #include "runtime/stubRoutines.hpp"
   55 #include "utilities/macros.hpp"




   56 
   57 #ifdef PRODUCT
   58 #define BLOCK_COMMENT(str) /* nothing */
   59 #define STOP(error) stop(error)
   60 #else
   61 #define BLOCK_COMMENT(str) block_comment(str)
   62 #define STOP(error) block_comment(error); stop(error)
   63 #endif
   64 
   65 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
   66 
   67 #ifdef ASSERT
   68 bool AbstractAssembler::pd_check_instruction_mark() { return true; }
   69 #endif
   70 
   71 static Assembler::Condition reverse[] = {
   72     Assembler::noOverflow     /* overflow      = 0x0 */ ,
   73     Assembler::overflow       /* noOverflow    = 0x1 */ ,
   74     Assembler::aboveEqual     /* carrySet      = 0x2, below         = 0x2 */ ,
   75     Assembler::below          /* aboveEqual    = 0x3, carryClear    = 0x3 */ ,

 1671   LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
 1672   pass_arg1(this, arg_1);
 1673   pass_arg0(this, arg_0);
 1674   call_VM_leaf(entry_point, 3);
 1675 }
 1676 
 1677 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) {
 1678   LP64_ONLY(assert(arg_0 != c_rarg3, "smashed arg"));
 1679   LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
 1680   LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
 1681   pass_arg3(this, arg_3);
 1682   LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"));
 1683   LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
 1684   pass_arg2(this, arg_2);
 1685   LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
 1686   pass_arg1(this, arg_1);
 1687   pass_arg0(this, arg_0);
 1688   call_VM_leaf(entry_point, 3);
 1689 }
 1690 




 1691 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {
 1692   pass_arg0(this, arg_0);
 1693   MacroAssembler::call_VM_leaf_base(entry_point, 1);
 1694 }
 1695 
 1696 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
 1697 
 1698   LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
 1699   pass_arg1(this, arg_1);
 1700   pass_arg0(this, arg_0);
 1701   MacroAssembler::call_VM_leaf_base(entry_point, 2);
 1702 }
 1703 
 1704 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
 1705   LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"));
 1706   LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
 1707   pass_arg2(this, arg_2);
 1708   LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
 1709   pass_arg1(this, arg_1);
 1710   pass_arg0(this, arg_0);

 2849     lea(rscratch, src);
 2850     Assembler::mulss(dst, Address(rscratch, 0));
 2851   }
 2852 }
 2853 
 2854 void MacroAssembler::null_check(Register reg, int offset) {
 2855   if (needs_explicit_null_check(offset)) {
 2856     // provoke OS NULL exception if reg = NULL by
 2857     // accessing M[reg] w/o changing any (non-CC) registers
 2858     // NOTE: cmpl is plenty here to provoke a segv
 2859     cmpptr(rax, Address(reg, 0));
 2860     // Note: should probably use testl(rax, Address(reg, 0));
 2861     //       may be shorter code (however, this version of
 2862     //       testl needs to be implemented first)
 2863   } else {
 2864     // nothing to do, (later) access of M[reg + offset]
 2865     // will provoke OS NULL exception if reg = NULL
 2866   }
 2867 }
 2868 












































































































































 2869 void MacroAssembler::os_breakpoint() {
 2870   // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
 2871   // (e.g., MSVC can't call ps() otherwise)
 2872   call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
 2873 }
 2874 
 2875 void MacroAssembler::unimplemented(const char* what) {
 2876   const char* buf = NULL;
 2877   {
 2878     ResourceMark rm;
 2879     stringStream ss;
 2880     ss.print("unimplemented: %s", what);
 2881     buf = code_string(ss.as_string());
 2882   }
 2883   stop(buf);
 2884 }
 2885 
 2886 #ifdef _LP64
 2887 #define XSTATE_BV 0x200
 2888 #endif

 3915 }
 3916 
 3917 // C++ bool manipulation
 3918 void MacroAssembler::testbool(Register dst) {
 3919   if(sizeof(bool) == 1)
 3920     testb(dst, 0xff);
 3921   else if(sizeof(bool) == 2) {
 3922     // testw implementation needed for two byte bools
 3923     ShouldNotReachHere();
 3924   } else if(sizeof(bool) == 4)
 3925     testl(dst, dst);
 3926   else
 3927     // unsupported
 3928     ShouldNotReachHere();
 3929 }
 3930 
 3931 void MacroAssembler::testptr(Register dst, Register src) {
 3932   LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src));
 3933 }
 3934 


















































































































 3935 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
 3936 void MacroAssembler::tlab_allocate(Register thread, Register obj,
 3937                                    Register var_size_in_bytes,
 3938                                    int con_size_in_bytes,
 3939                                    Register t1,
 3940                                    Register t2,
 3941                                    Label& slow_case) {
 3942   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 3943   bs->tlab_allocate(this, thread, obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case);
 3944 }
 3945 
 3946 RegSet MacroAssembler::call_clobbered_gp_registers() {
 3947   RegSet regs;
 3948 #ifdef _LP64
 3949   regs += RegSet::of(rax, rcx, rdx);
 3950 #ifndef WINDOWS
 3951   regs += RegSet::of(rsi, rdi);
 3952 #endif
 3953   regs += RegSet::range(r8, r11);
 3954 #else

 4167     // clear topmost word (no jump would be needed if conditional assignment worked here)
 4168     movptr(Address(address, index, Address::times_8, offset_in_bytes - 0*BytesPerWord), temp);
 4169     // index could be 0 now, must check again
 4170     jcc(Assembler::zero, done);
 4171     bind(even);
 4172   }
 4173 #endif // !_LP64
 4174   // initialize remaining object fields: index is a multiple of 2 now
 4175   {
 4176     Label loop;
 4177     bind(loop);
 4178     movptr(Address(address, index, Address::times_8, offset_in_bytes - 1*BytesPerWord), temp);
 4179     NOT_LP64(movptr(Address(address, index, Address::times_8, offset_in_bytes - 2*BytesPerWord), temp);)
 4180     decrement(index);
 4181     jcc(Assembler::notZero, loop);
 4182   }
 4183 
 4184   bind(done);
 4185 }
 4186 


















































 4187 // Look up the method for a megamorphic invokeinterface call.
 4188 // The target method is determined by <intf_klass, itable_index>.
 4189 // The receiver klass is in recv_klass.
 4190 // On success, the result will be in method_result, and execution falls through.
 4191 // On failure, execution transfers to the given label.
 4192 void MacroAssembler::lookup_interface_method(Register recv_klass,
 4193                                              Register intf_klass,
 4194                                              RegisterOrConstant itable_index,
 4195                                              Register method_result,
 4196                                              Register scan_temp,
 4197                                              Label& L_no_such_interface,
 4198                                              bool return_method) {
 4199   assert_different_registers(recv_klass, intf_klass, scan_temp);
 4200   assert_different_registers(method_result, intf_klass, scan_temp);
 4201   assert(recv_klass != method_result || !return_method,
 4202          "recv_klass can be destroyed when method isn't needed");
 4203 
 4204   assert(itable_index.is_constant() || itable_index.as_register() == method_result,
 4205          "caller must use same register for non-constant itable index as for method");
 4206 

 4515   } else {
 4516     Label L;
 4517     jccb(negate_condition(cc), L);
 4518     movl(dst, src);
 4519     bind(L);
 4520   }
 4521 }
 4522 
 4523 void MacroAssembler::cmov32(Condition cc, Register dst, Register src) {
 4524   if (VM_Version::supports_cmov()) {
 4525     cmovl(cc, dst, src);
 4526   } else {
 4527     Label L;
 4528     jccb(negate_condition(cc), L);
 4529     movl(dst, src);
 4530     bind(L);
 4531   }
 4532 }
 4533 
 4534 void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) {
 4535   if (!VerifyOops) return;




 4536 
 4537   BLOCK_COMMENT("verify_oop {");
 4538 #ifdef _LP64
 4539   push(rscratch1);
 4540 #endif
 4541   push(rax);                          // save rax
 4542   push(reg);                          // pass register argument
 4543 
 4544   // Pass register number to verify_oop_subroutine
 4545   const char* b = NULL;
 4546   {
 4547     ResourceMark rm;
 4548     stringStream ss;
 4549     ss.print("verify_oop: %s: %s (%s:%d)", reg->name(), s, file, line);
 4550     b = code_string(ss.as_string());
 4551   }
 4552   ExternalAddress buffer((address) b);
 4553   pushptr(buffer.addr(), rscratch1);
 4554 
 4555   // call indirectly to solve generation ordering problem

 4577   // cf. TemplateTable::prepare_invoke(), if (load_receiver).
 4578   int stackElementSize = Interpreter::stackElementSize;
 4579   int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
 4580 #ifdef ASSERT
 4581   int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
 4582   assert(offset1 - offset == stackElementSize, "correct arithmetic");
 4583 #endif
 4584   Register             scale_reg    = noreg;
 4585   Address::ScaleFactor scale_factor = Address::no_scale;
 4586   if (arg_slot.is_constant()) {
 4587     offset += arg_slot.as_constant() * stackElementSize;
 4588   } else {
 4589     scale_reg    = arg_slot.as_register();
 4590     scale_factor = Address::times(stackElementSize);
 4591   }
 4592   offset += wordSize;           // return PC is on stack
 4593   return Address(rsp, scale_reg, scale_factor, offset);
 4594 }
 4595 
 4596 void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) {
 4597   if (!VerifyOops) return;




 4598 
 4599 #ifdef _LP64
 4600   push(rscratch1);
 4601 #endif
 4602   push(rax); // save rax,
 4603   // addr may contain rsp so we will have to adjust it based on the push
 4604   // we just did (and on 64 bit we do two pushes)
 4605   // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which
 4606   // stores rax into addr which is backwards of what was intended.
 4607   if (addr.uses(rsp)) {
 4608     lea(rax, addr);
 4609     pushptr(Address(rax, LP64_ONLY(2 *) BytesPerWord));
 4610   } else {
 4611     pushptr(addr);
 4612   }
 4613 
 4614   // Pass register number to verify_oop_subroutine
 4615   const char* b = NULL;
 4616   {
 4617     ResourceMark rm;

 5064 
 5065 void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) {
 5066   // get mirror
 5067   const int mirror_offset = in_bytes(Klass::java_mirror_offset());
 5068   load_method_holder(mirror, method);
 5069   movptr(mirror, Address(mirror, mirror_offset));
 5070   resolve_oop_handle(mirror, tmp);
 5071 }
 5072 
 5073 void MacroAssembler::load_method_holder_cld(Register rresult, Register rmethod) {
 5074   load_method_holder(rresult, rmethod);
 5075   movptr(rresult, Address(rresult, InstanceKlass::class_loader_data_offset()));
 5076 }
 5077 
 5078 void MacroAssembler::load_method_holder(Register holder, Register method) {
 5079   movptr(holder, Address(method, Method::const_offset()));                      // ConstMethod*
 5080   movptr(holder, Address(holder, ConstMethod::constants_offset()));             // ConstantPool*
 5081   movptr(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); // InstanceKlass*
 5082 }
 5083 








 5084 void MacroAssembler::load_klass(Register dst, Register src, Register tmp) {
 5085   assert_different_registers(src, tmp);
 5086   assert_different_registers(dst, tmp);
 5087 #ifdef _LP64
 5088   if (UseCompressedClassPointers) {
 5089     movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
 5090     decode_klass_not_null(dst, tmp);
 5091   } else
 5092 #endif
 5093     movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));





 5094 }
 5095 
 5096 void MacroAssembler::store_klass(Register dst, Register src, Register tmp) {
 5097   assert_different_registers(src, tmp);
 5098   assert_different_registers(dst, tmp);
 5099 #ifdef _LP64
 5100   if (UseCompressedClassPointers) {
 5101     encode_klass_not_null(src, tmp);
 5102     movl(Address(dst, oopDesc::klass_offset_in_bytes()), src);
 5103   } else
 5104 #endif
 5105     movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src);
 5106 }
 5107 
 5108 void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src,
 5109                                     Register tmp1, Register thread_tmp) {
 5110   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 5111   decorators = AccessInternal::decorator_fixup(decorators, type);
 5112   bool as_raw = (decorators & AS_RAW) != 0;
 5113   if (as_raw) {
 5114     bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
 5115   } else {
 5116     bs->load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
 5117   }
 5118 }
 5119 
 5120 void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register val,
 5121                                      Register tmp1, Register tmp2, Register tmp3) {
 5122   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 5123   decorators = AccessInternal::decorator_fixup(decorators, type);
 5124   bool as_raw = (decorators & AS_RAW) != 0;
 5125   if (as_raw) {
 5126     bs->BarrierSetAssembler::store_at(this, decorators, type, dst, val, tmp1, tmp2, tmp3);
 5127   } else {
 5128     bs->store_at(this, decorators, type, dst, val, tmp1, tmp2, tmp3);
 5129   }
 5130 }
 5131 








































 5132 void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1,
 5133                                    Register thread_tmp, DecoratorSet decorators) {
 5134   access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp);
 5135 }
 5136 
 5137 // Doesn't do verification, generates fixed size code
 5138 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1,
 5139                                             Register thread_tmp, DecoratorSet decorators) {
 5140   access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL | decorators, dst, src, tmp1, thread_tmp);
 5141 }
 5142 
 5143 void MacroAssembler::store_heap_oop(Address dst, Register val, Register tmp1,
 5144                                     Register tmp2, Register tmp3, DecoratorSet decorators) {
 5145   access_store_at(T_OBJECT, IN_HEAP | decorators, dst, val, tmp1, tmp2, tmp3);
 5146 }
 5147 
 5148 // Used for storing NULLs.
 5149 void MacroAssembler::store_heap_oop_null(Address dst) {
 5150   access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg, noreg);
 5151 }

 5451 
 5452 void MacroAssembler::reinit_heapbase() {
 5453   if (UseCompressedOops) {
 5454     if (Universe::heap() != NULL) {
 5455       if (CompressedOops::base() == NULL) {
 5456         MacroAssembler::xorptr(r12_heapbase, r12_heapbase);
 5457       } else {
 5458         mov64(r12_heapbase, (int64_t)CompressedOops::ptrs_base());
 5459       }
 5460     } else {
 5461       movptr(r12_heapbase, ExternalAddress(CompressedOops::ptrs_base_addr()));
 5462     }
 5463   }
 5464 }
 5465 
 5466 #endif // _LP64
 5467 
 5468 #if COMPILER2_OR_JVMCI
 5469 
 5470 // clear memory of size 'cnt' qwords, starting at 'base' using XMM/YMM/ZMM registers
 5471 void MacroAssembler::xmm_clear_mem(Register base, Register cnt, Register rtmp, XMMRegister xtmp, KRegister mask) {
 5472   // cnt - number of qwords (8-byte words).
 5473   // base - start address, qword aligned.
 5474   Label L_zero_64_bytes, L_loop, L_sloop, L_tail, L_end;
 5475   bool use64byteVector = (MaxVectorSize == 64) && (VM_Version::avx3_threshold() == 0);
 5476   if (use64byteVector) {
 5477     vpxor(xtmp, xtmp, xtmp, AVX_512bit);
 5478   } else if (MaxVectorSize >= 32) {
 5479     vpxor(xtmp, xtmp, xtmp, AVX_256bit);


 5480   } else {
 5481     pxor(xtmp, xtmp);

 5482   }
 5483   jmp(L_zero_64_bytes);
 5484 
 5485   BIND(L_loop);
 5486   if (MaxVectorSize >= 32) {
 5487     fill64(base, 0, xtmp, use64byteVector);
 5488   } else {
 5489     movdqu(Address(base,  0), xtmp);
 5490     movdqu(Address(base, 16), xtmp);
 5491     movdqu(Address(base, 32), xtmp);
 5492     movdqu(Address(base, 48), xtmp);
 5493   }
 5494   addptr(base, 64);
 5495 
 5496   BIND(L_zero_64_bytes);
 5497   subptr(cnt, 8);
 5498   jccb(Assembler::greaterEqual, L_loop);
 5499 
 5500   // Copy trailing 64 bytes
 5501   if (use64byteVector) {
 5502     addptr(cnt, 8);
 5503     jccb(Assembler::equal, L_end);
 5504     fill64_masked(3, base, 0, xtmp, mask, cnt, rtmp, true);
 5505     jmp(L_end);
 5506   } else {
 5507     addptr(cnt, 4);
 5508     jccb(Assembler::less, L_tail);
 5509     if (MaxVectorSize >= 32) {
 5510       vmovdqu(Address(base, 0), xtmp);
 5511     } else {
 5512       movdqu(Address(base,  0), xtmp);
 5513       movdqu(Address(base, 16), xtmp);
 5514     }
 5515   }
 5516   addptr(base, 32);
 5517   subptr(cnt, 4);
 5518 
 5519   BIND(L_tail);
 5520   addptr(cnt, 4);
 5521   jccb(Assembler::lessEqual, L_end);
 5522   if (UseAVX > 2 && MaxVectorSize >= 32 && VM_Version::supports_avx512vl()) {
 5523     fill32_masked(3, base, 0, xtmp, mask, cnt, rtmp);
 5524   } else {
 5525     decrement(cnt);
 5526 
 5527     BIND(L_sloop);
 5528     movq(Address(base, 0), xtmp);
 5529     addptr(base, 8);
 5530     decrement(cnt);
 5531     jccb(Assembler::greaterEqual, L_sloop);
 5532   }
 5533   BIND(L_end);
 5534 }
 5535 














































































































































































































































































































































































































 5536 // Clearing constant sized memory using YMM/ZMM registers.
 5537 void MacroAssembler::clear_mem(Register base, int cnt, Register rtmp, XMMRegister xtmp, KRegister mask) {
 5538   assert(UseAVX > 2 && VM_Version::supports_avx512vlbw(), "");
 5539   bool use64byteVector = (MaxVectorSize > 32) && (VM_Version::avx3_threshold() == 0);
 5540 
 5541   int vector64_count = (cnt & (~0x7)) >> 3;
 5542   cnt = cnt & 0x7;
 5543   const int fill64_per_loop = 4;
 5544   const int max_unrolled_fill64 = 8;
 5545 
 5546   // 64 byte initialization loop.
 5547   vpxor(xtmp, xtmp, xtmp, use64byteVector ? AVX_512bit : AVX_256bit);
 5548   int start64 = 0;
 5549   if (vector64_count > max_unrolled_fill64) {
 5550     Label LOOP;
 5551     Register index = rtmp;
 5552 
 5553     start64 = vector64_count - (vector64_count % fill64_per_loop);
 5554 
 5555     movl(index, 0);

 5605         break;
 5606       case 7:
 5607         if (use64byteVector) {
 5608           movl(rtmp, 0x7F);
 5609           kmovwl(mask, rtmp);
 5610           evmovdqu(T_LONG, mask, Address(base, disp), xtmp, true, Assembler::AVX_512bit);
 5611         } else {
 5612           evmovdqu(T_LONG, k0, Address(base, disp), xtmp, false, Assembler::AVX_256bit);
 5613           movl(rtmp, 0x7);
 5614           kmovwl(mask, rtmp);
 5615           evmovdqu(T_LONG, mask, Address(base, disp + 32), xtmp, true, Assembler::AVX_256bit);
 5616         }
 5617         break;
 5618       default:
 5619         fatal("Unexpected length : %d\n",cnt);
 5620         break;
 5621     }
 5622   }
 5623 }
 5624 
 5625 void MacroAssembler::clear_mem(Register base, Register cnt, Register tmp, XMMRegister xtmp,
 5626                                bool is_large, KRegister mask) {
 5627   // cnt      - number of qwords (8-byte words).
 5628   // base     - start address, qword aligned.
 5629   // is_large - if optimizers know cnt is larger than InitArrayShortSize
 5630   assert(base==rdi, "base register must be edi for rep stos");
 5631   assert(tmp==rax,   "tmp register must be eax for rep stos");
 5632   assert(cnt==rcx,   "cnt register must be ecx for rep stos");
 5633   assert(InitArrayShortSize % BytesPerLong == 0,
 5634     "InitArrayShortSize should be the multiple of BytesPerLong");
 5635 
 5636   Label DONE;
 5637   if (!is_large || !UseXMMForObjInit) {
 5638     xorptr(tmp, tmp);
 5639   }
 5640 
 5641   if (!is_large) {
 5642     Label LOOP, LONG;
 5643     cmpptr(cnt, InitArrayShortSize/BytesPerLong);
 5644     jccb(Assembler::greater, LONG);
 5645 
 5646     NOT_LP64(shlptr(cnt, 1);) // convert to number of 32-bit words for 32-bit VM
 5647 
 5648     decrement(cnt);
 5649     jccb(Assembler::negative, DONE); // Zero length
 5650 
 5651     // Use individual pointer-sized stores for small counts:
 5652     BIND(LOOP);
 5653     movptr(Address(base, cnt, Address::times_ptr), tmp);
 5654     decrement(cnt);
 5655     jccb(Assembler::greaterEqual, LOOP);
 5656     jmpb(DONE);
 5657 
 5658     BIND(LONG);
 5659   }
 5660 
 5661   // Use longer rep-prefixed ops for non-small counts:
 5662   if (UseFastStosb) {
 5663     shlptr(cnt, 3); // convert to number of bytes
 5664     rep_stosb();
 5665   } else if (UseXMMForObjInit) {
 5666     xmm_clear_mem(base, cnt, tmp, xtmp, mask);
 5667   } else {
 5668     NOT_LP64(shlptr(cnt, 1);) // convert to number of 32-bit words for 32-bit VM
 5669     rep_stos();
 5670   }
 5671 
 5672   BIND(DONE);
 5673 }
 5674 
 5675 #endif //COMPILER2_OR_JVMCI
 5676 
 5677 
 5678 void MacroAssembler::generate_fill(BasicType t, bool aligned,
 5679                                    Register to, Register value, Register count,
 5680                                    Register rtmp, XMMRegister xtmp) {
 5681   ShortBranchVerifier sbv(this);
 5682   assert_different_registers(to, value, count, rtmp);
 5683   Label L_exit;
 5684   Label L_fill_2_bytes, L_fill_4_bytes;
 5685 
 5686 #if defined(COMPILER2) && defined(_LP64)

   10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12  * version 2 for more details (a copy is included in the LICENSE file that
   13  * accompanied this code).
   14  *
   15  * You should have received a copy of the GNU General Public License version
   16  * 2 along with this work; if not, write to the Free Software Foundation,
   17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18  *
   19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20  * or visit www.oracle.com if you need additional information or have any
   21  * questions.
   22  *
   23  */
   24 
   25 #include "precompiled.hpp"
   26 #include "asm/assembler.hpp"
   27 #include "asm/assembler.inline.hpp"
   28 #include "compiler/compiler_globals.hpp"
   29 #include "compiler/disassembler.hpp"
   30 #include "ci/ciInlineKlass.hpp"
   31 #include "crc32c.h"
   32 #include "gc/shared/barrierSet.hpp"
   33 #include "gc/shared/barrierSetAssembler.hpp"
   34 #include "gc/shared/collectedHeap.inline.hpp"
   35 #include "gc/shared/tlab_globals.hpp"
   36 #include "interpreter/bytecodeHistogram.hpp"
   37 #include "interpreter/interpreter.hpp"
   38 #include "jvm.h"
   39 #include "memory/resourceArea.hpp"
   40 #include "memory/universe.hpp"
   41 #include "oops/accessDecorators.hpp"
   42 #include "oops/compressedOops.inline.hpp"
   43 #include "oops/klass.inline.hpp"
   44 #include "prims/methodHandles.hpp"
   45 #include "runtime/continuation.hpp"
   46 #include "runtime/flags/flagSetting.hpp"
   47 #include "runtime/interfaceSupport.inline.hpp"
   48 #include "runtime/javaThread.hpp"
   49 #include "runtime/jniHandles.hpp"
   50 #include "runtime/objectMonitor.hpp"
   51 #include "runtime/os.hpp"
   52 #include "runtime/safepoint.hpp"
   53 #include "runtime/safepointMechanism.hpp"
   54 #include "runtime/sharedRuntime.hpp"
   55 #include "runtime/signature_cc.hpp"
   56 #include "runtime/stubRoutines.hpp"
   57 #include "utilities/macros.hpp"
   58 #include "vmreg_x86.inline.hpp"
   59 #ifdef COMPILER2
   60 #include "opto/output.hpp"
   61 #endif
   62 
   63 #ifdef PRODUCT
   64 #define BLOCK_COMMENT(str) /* nothing */
   65 #define STOP(error) stop(error)
   66 #else
   67 #define BLOCK_COMMENT(str) block_comment(str)
   68 #define STOP(error) block_comment(error); stop(error)
   69 #endif
   70 
   71 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
   72 
   73 #ifdef ASSERT
   74 bool AbstractAssembler::pd_check_instruction_mark() { return true; }
   75 #endif
   76 
   77 static Assembler::Condition reverse[] = {
   78     Assembler::noOverflow     /* overflow      = 0x0 */ ,
   79     Assembler::overflow       /* noOverflow    = 0x1 */ ,
   80     Assembler::aboveEqual     /* carrySet      = 0x2, below         = 0x2 */ ,
   81     Assembler::below          /* aboveEqual    = 0x3, carryClear    = 0x3 */ ,

 1677   LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
 1678   pass_arg1(this, arg_1);
 1679   pass_arg0(this, arg_0);
 1680   call_VM_leaf(entry_point, 3);
 1681 }
 1682 
 1683 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) {
 1684   LP64_ONLY(assert(arg_0 != c_rarg3, "smashed arg"));
 1685   LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
 1686   LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
 1687   pass_arg3(this, arg_3);
 1688   LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"));
 1689   LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
 1690   pass_arg2(this, arg_2);
 1691   LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
 1692   pass_arg1(this, arg_1);
 1693   pass_arg0(this, arg_0);
 1694   call_VM_leaf(entry_point, 3);
 1695 }
 1696 
 1697 void MacroAssembler::super_call_VM_leaf(address entry_point) {
 1698   MacroAssembler::call_VM_leaf_base(entry_point, 1);
 1699 }
 1700 
 1701 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {
 1702   pass_arg0(this, arg_0);
 1703   MacroAssembler::call_VM_leaf_base(entry_point, 1);
 1704 }
 1705 
 1706 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
 1707 
 1708   LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
 1709   pass_arg1(this, arg_1);
 1710   pass_arg0(this, arg_0);
 1711   MacroAssembler::call_VM_leaf_base(entry_point, 2);
 1712 }
 1713 
 1714 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
 1715   LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"));
 1716   LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
 1717   pass_arg2(this, arg_2);
 1718   LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
 1719   pass_arg1(this, arg_1);
 1720   pass_arg0(this, arg_0);

 2859     lea(rscratch, src);
 2860     Assembler::mulss(dst, Address(rscratch, 0));
 2861   }
 2862 }
 2863 
 2864 void MacroAssembler::null_check(Register reg, int offset) {
 2865   if (needs_explicit_null_check(offset)) {
 2866     // provoke OS NULL exception if reg = NULL by
 2867     // accessing M[reg] w/o changing any (non-CC) registers
 2868     // NOTE: cmpl is plenty here to provoke a segv
 2869     cmpptr(rax, Address(reg, 0));
 2870     // Note: should probably use testl(rax, Address(reg, 0));
 2871     //       may be shorter code (however, this version of
 2872     //       testl needs to be implemented first)
 2873   } else {
 2874     // nothing to do, (later) access of M[reg + offset]
 2875     // will provoke OS NULL exception if reg = NULL
 2876   }
 2877 }
 2878 
 2879 void MacroAssembler::test_markword_is_inline_type(Register markword, Label& is_inline_type) {
 2880   andptr(markword, markWord::inline_type_mask_in_place);
 2881   cmpptr(markword, markWord::inline_type_pattern);
 2882   jcc(Assembler::equal, is_inline_type);
 2883 }
 2884 
 2885 void MacroAssembler::test_klass_is_inline_type(Register klass, Register temp_reg, Label& is_inline_type) {
 2886   movl(temp_reg, Address(klass, Klass::access_flags_offset()));
 2887   testl(temp_reg, JVM_ACC_VALUE);
 2888   jcc(Assembler::notZero, is_inline_type);
 2889 }
 2890 
 2891 void MacroAssembler::test_oop_is_not_inline_type(Register object, Register tmp, Label& not_inline_type) {
 2892   testptr(object, object);
 2893   jcc(Assembler::zero, not_inline_type);
 2894   const int is_inline_type_mask = markWord::inline_type_pattern;
 2895   movptr(tmp, Address(object, oopDesc::mark_offset_in_bytes()));
 2896   andptr(tmp, is_inline_type_mask);
 2897   cmpptr(tmp, is_inline_type_mask);
 2898   jcc(Assembler::notEqual, not_inline_type);
 2899 }
 2900 
 2901 void MacroAssembler::test_klass_is_empty_inline_type(Register klass, Register temp_reg, Label& is_empty_inline_type) {
 2902 #ifdef ASSERT
 2903   {
 2904     Label done_check;
 2905     test_klass_is_inline_type(klass, temp_reg, done_check);
 2906     stop("test_klass_is_empty_inline_type with non inline type klass");
 2907     bind(done_check);
 2908   }
 2909 #endif
 2910   movl(temp_reg, Address(klass, InstanceKlass::misc_status_offset()));
 2911   testl(temp_reg, InstanceKlassMiscStatus::is_empty_inline_type_value());
 2912   jcc(Assembler::notZero, is_empty_inline_type);
 2913 }
 2914 
 2915 void MacroAssembler::test_field_is_null_free_inline_type(Register flags, Register temp_reg, Label& is_null_free_inline_type) {
 2916   movl(temp_reg, flags);
 2917   shrl(temp_reg, ConstantPoolCacheEntry::is_null_free_inline_type_shift);
 2918   andl(temp_reg, 0x1);
 2919   testl(temp_reg, temp_reg);
 2920   jcc(Assembler::notZero, is_null_free_inline_type);
 2921 }
 2922 
 2923 void MacroAssembler::test_field_is_not_null_free_inline_type(Register flags, Register temp_reg, Label& not_null_free_inline_type) {
 2924   movl(temp_reg, flags);
 2925   shrl(temp_reg, ConstantPoolCacheEntry::is_null_free_inline_type_shift);
 2926   andl(temp_reg, 0x1);
 2927   testl(temp_reg, temp_reg);
 2928   jcc(Assembler::zero, not_null_free_inline_type);
 2929 }
 2930 
 2931 void MacroAssembler::test_field_is_inlined(Register flags, Register temp_reg, Label& is_inlined) {
 2932   movl(temp_reg, flags);
 2933   shrl(temp_reg, ConstantPoolCacheEntry::is_inlined_shift);
 2934   andl(temp_reg, 0x1);
 2935   testl(temp_reg, temp_reg);
 2936   jcc(Assembler::notZero, is_inlined);
 2937 }
 2938 
 2939 void MacroAssembler::test_oop_prototype_bit(Register oop, Register temp_reg, int32_t test_bit, bool jmp_set, Label& jmp_label) {
 2940   Label test_mark_word;
 2941   // load mark word
 2942   movptr(temp_reg, Address(oop, oopDesc::mark_offset_in_bytes()));
 2943   // check displaced
 2944   testl(temp_reg, markWord::unlocked_value);
 2945   jccb(Assembler::notZero, test_mark_word);
 2946   // slow path use klass prototype
 2947   push(rscratch1);
 2948   load_prototype_header(temp_reg, oop, rscratch1);
 2949   pop(rscratch1);
 2950 
 2951   bind(test_mark_word);
 2952   testl(temp_reg, test_bit);
 2953   jcc((jmp_set) ? Assembler::notZero : Assembler::zero, jmp_label);
 2954 }
 2955 
 2956 void MacroAssembler::test_flattened_array_oop(Register oop, Register temp_reg,
 2957                                               Label&is_flattened_array) {
 2958 #ifdef _LP64
 2959   test_oop_prototype_bit(oop, temp_reg, markWord::flat_array_bit_in_place, true, is_flattened_array);
 2960 #else
 2961   load_klass(temp_reg, oop, noreg);
 2962   movl(temp_reg, Address(temp_reg, Klass::layout_helper_offset()));
 2963   test_flattened_array_layout(temp_reg, is_flattened_array);
 2964 #endif
 2965 }
 2966 
 2967 void MacroAssembler::test_non_flattened_array_oop(Register oop, Register temp_reg,
 2968                                                   Label&is_non_flattened_array) {
 2969 #ifdef _LP64
 2970   test_oop_prototype_bit(oop, temp_reg, markWord::flat_array_bit_in_place, false, is_non_flattened_array);
 2971 #else
 2972   load_klass(temp_reg, oop, noreg);
 2973   movl(temp_reg, Address(temp_reg, Klass::layout_helper_offset()));
 2974   test_non_flattened_array_layout(temp_reg, is_non_flattened_array);
 2975 #endif
 2976 }
 2977 
 2978 void MacroAssembler::test_null_free_array_oop(Register oop, Register temp_reg, Label&is_null_free_array) {
 2979 #ifdef _LP64
 2980   test_oop_prototype_bit(oop, temp_reg, markWord::null_free_array_bit_in_place, true, is_null_free_array);
 2981 #else
 2982   load_klass(temp_reg, oop, noreg);
 2983   movl(temp_reg, Address(temp_reg, Klass::layout_helper_offset()));
 2984   test_null_free_array_layout(temp_reg, is_null_free_array);
 2985 #endif
 2986 }
 2987 
 2988 void MacroAssembler::test_non_null_free_array_oop(Register oop, Register temp_reg, Label&is_non_null_free_array) {
 2989 #ifdef _LP64
 2990   test_oop_prototype_bit(oop, temp_reg, markWord::null_free_array_bit_in_place, false, is_non_null_free_array);
 2991 #else
 2992   load_klass(temp_reg, oop, noreg);
 2993   movl(temp_reg, Address(temp_reg, Klass::layout_helper_offset()));
 2994   test_non_null_free_array_layout(temp_reg, is_non_null_free_array);
 2995 #endif
 2996 }
 2997 
 2998 void MacroAssembler::test_flattened_array_layout(Register lh, Label& is_flattened_array) {
 2999   testl(lh, Klass::_lh_array_tag_flat_value_bit_inplace);
 3000   jcc(Assembler::notZero, is_flattened_array);
 3001 }
 3002 
 3003 void MacroAssembler::test_non_flattened_array_layout(Register lh, Label& is_non_flattened_array) {
 3004   testl(lh, Klass::_lh_array_tag_flat_value_bit_inplace);
 3005   jcc(Assembler::zero, is_non_flattened_array);
 3006 }
 3007 
 3008 void MacroAssembler::test_null_free_array_layout(Register lh, Label& is_null_free_array) {
 3009   testl(lh, Klass::_lh_null_free_array_bit_inplace);
 3010   jcc(Assembler::notZero, is_null_free_array);
 3011 }
 3012 
 3013 void MacroAssembler::test_non_null_free_array_layout(Register lh, Label& is_non_null_free_array) {
 3014   testl(lh, Klass::_lh_null_free_array_bit_inplace);
 3015   jcc(Assembler::zero, is_non_null_free_array);
 3016 }
 3017 
 3018 
 3019 void MacroAssembler::os_breakpoint() {
 3020   // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
 3021   // (e.g., MSVC can't call ps() otherwise)
 3022   call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
 3023 }
 3024 
 3025 void MacroAssembler::unimplemented(const char* what) {
 3026   const char* buf = NULL;
 3027   {
 3028     ResourceMark rm;
 3029     stringStream ss;
 3030     ss.print("unimplemented: %s", what);
 3031     buf = code_string(ss.as_string());
 3032   }
 3033   stop(buf);
 3034 }
 3035 
 3036 #ifdef _LP64
 3037 #define XSTATE_BV 0x200
 3038 #endif

 4065 }
 4066 
 4067 // C++ bool manipulation
 4068 void MacroAssembler::testbool(Register dst) {
 4069   if(sizeof(bool) == 1)
 4070     testb(dst, 0xff);
 4071   else if(sizeof(bool) == 2) {
 4072     // testw implementation needed for two byte bools
 4073     ShouldNotReachHere();
 4074   } else if(sizeof(bool) == 4)
 4075     testl(dst, dst);
 4076   else
 4077     // unsupported
 4078     ShouldNotReachHere();
 4079 }
 4080 
 4081 void MacroAssembler::testptr(Register dst, Register src) {
 4082   LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src));
 4083 }
 4084 
 4085 // Object / value buffer allocation...
 4086 //
 4087 // Kills klass and rsi on LP64
 4088 void MacroAssembler::allocate_instance(Register klass, Register new_obj,
 4089                                        Register t1, Register t2,
 4090                                        bool clear_fields, Label& alloc_failed)
 4091 {
 4092   Label done, initialize_header, initialize_object, slow_case, slow_case_no_pop;
 4093   Register layout_size = t1;
 4094   assert(new_obj == rax, "needs to be rax");
 4095   assert_different_registers(klass, new_obj, t1, t2);
 4096 
 4097   // get instance_size in InstanceKlass (scaled to a count of bytes)
 4098   movl(layout_size, Address(klass, Klass::layout_helper_offset()));
 4099   // test to see if it has a finalizer or is malformed in some way
 4100   testl(layout_size, Klass::_lh_instance_slow_path_bit);
 4101   jcc(Assembler::notZero, slow_case_no_pop);
 4102 
 4103   // Allocate the instance:
 4104   //  If TLAB is enabled:
 4105   //    Try to allocate in the TLAB.
 4106   //    If fails, go to the slow path.
 4107   //  Else If inline contiguous allocations are enabled:
 4108   //    Try to allocate in eden.
 4109   //    If fails due to heap end, go to slow path.
 4110   //
 4111   //  If TLAB is enabled OR inline contiguous is enabled:
 4112   //    Initialize the allocation.
 4113   //    Exit.
 4114   //
 4115   //  Go to slow path.
 4116 
 4117   push(klass);
 4118   const Register thread = LP64_ONLY(r15_thread) NOT_LP64(klass);
 4119 #ifndef _LP64
 4120   if (UseTLAB) {
 4121     get_thread(thread);
 4122   }
 4123 #endif // _LP64
 4124 
 4125   if (UseTLAB) {
 4126     tlab_allocate(thread, new_obj, layout_size, 0, klass, t2, slow_case);
 4127     if (ZeroTLAB || (!clear_fields)) {
 4128       // the fields have been already cleared
 4129       jmp(initialize_header);
 4130     } else {
 4131       // initialize both the header and fields
 4132       jmp(initialize_object);
 4133     }
 4134   } else {
 4135     jmp(slow_case);
 4136   }
 4137 
 4138   // If UseTLAB is true, the object is created above and there is an initialize need.
 4139   // Otherwise, skip and go to the slow path.
 4140   if (UseTLAB) {
 4141     if (clear_fields) {
 4142       // The object is initialized before the header.  If the object size is
 4143       // zero, go directly to the header initialization.
 4144       bind(initialize_object);
 4145       decrement(layout_size, sizeof(oopDesc));
 4146       jcc(Assembler::zero, initialize_header);
 4147 
 4148       // Initialize topmost object field, divide size by 8, check if odd and
 4149       // test if zero.
 4150       Register zero = klass;
 4151       xorl(zero, zero);    // use zero reg to clear memory (shorter code)
 4152       shrl(layout_size, LogBytesPerLong); // divide by 2*oopSize and set carry flag if odd
 4153 
 4154   #ifdef ASSERT
 4155       // make sure instance_size was multiple of 8
 4156       Label L;
 4157       // Ignore partial flag stall after shrl() since it is debug VM
 4158       jcc(Assembler::carryClear, L);
 4159       stop("object size is not multiple of 2 - adjust this code");
 4160       bind(L);
 4161       // must be > 0, no extra check needed here
 4162   #endif
 4163 
 4164       // initialize remaining object fields: instance_size was a multiple of 8
 4165       {
 4166         Label loop;
 4167         bind(loop);
 4168         movptr(Address(new_obj, layout_size, Address::times_8, sizeof(oopDesc) - 1*oopSize), zero);
 4169         NOT_LP64(movptr(Address(new_obj, layout_size, Address::times_8, sizeof(oopDesc) - 2*oopSize), zero));
 4170         decrement(layout_size);
 4171         jcc(Assembler::notZero, loop);
 4172       }
 4173     } // clear_fields
 4174 
 4175     // initialize object header only.
 4176     bind(initialize_header);
 4177     pop(klass);
 4178     Register mark_word = t2;
 4179     movptr(mark_word, Address(klass, Klass::prototype_header_offset()));
 4180     movptr(Address(new_obj, oopDesc::mark_offset_in_bytes ()), mark_word);
 4181 #ifdef _LP64
 4182     xorl(rsi, rsi);                 // use zero reg to clear memory (shorter code)
 4183     store_klass_gap(new_obj, rsi);  // zero klass gap for compressed oops
 4184 #endif
 4185     movptr(t2, klass);         // preserve klass
 4186     store_klass(new_obj, t2, rscratch1);  // src klass reg is potentially compressed
 4187 
 4188     jmp(done);
 4189   }
 4190 
 4191   bind(slow_case);
 4192   pop(klass);
 4193   bind(slow_case_no_pop);
 4194   jmp(alloc_failed);
 4195 
 4196   bind(done);
 4197 }
 4198 
 4199 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
 4200 void MacroAssembler::tlab_allocate(Register thread, Register obj,
 4201                                    Register var_size_in_bytes,
 4202                                    int con_size_in_bytes,
 4203                                    Register t1,
 4204                                    Register t2,
 4205                                    Label& slow_case) {
 4206   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 4207   bs->tlab_allocate(this, thread, obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case);
 4208 }
 4209 
 4210 RegSet MacroAssembler::call_clobbered_gp_registers() {
 4211   RegSet regs;
 4212 #ifdef _LP64
 4213   regs += RegSet::of(rax, rcx, rdx);
 4214 #ifndef WINDOWS
 4215   regs += RegSet::of(rsi, rdi);
 4216 #endif
 4217   regs += RegSet::range(r8, r11);
 4218 #else

 4431     // clear topmost word (no jump would be needed if conditional assignment worked here)
 4432     movptr(Address(address, index, Address::times_8, offset_in_bytes - 0*BytesPerWord), temp);
 4433     // index could be 0 now, must check again
 4434     jcc(Assembler::zero, done);
 4435     bind(even);
 4436   }
 4437 #endif // !_LP64
 4438   // initialize remaining object fields: index is a multiple of 2 now
 4439   {
 4440     Label loop;
 4441     bind(loop);
 4442     movptr(Address(address, index, Address::times_8, offset_in_bytes - 1*BytesPerWord), temp);
 4443     NOT_LP64(movptr(Address(address, index, Address::times_8, offset_in_bytes - 2*BytesPerWord), temp);)
 4444     decrement(index);
 4445     jcc(Assembler::notZero, loop);
 4446   }
 4447 
 4448   bind(done);
 4449 }
 4450 
 4451 void MacroAssembler::get_inline_type_field_klass(Register klass, Register index, Register inline_klass) {
 4452   movptr(inline_klass, Address(klass, InstanceKlass::inline_type_field_klasses_offset()));
 4453 #ifdef ASSERT
 4454   {
 4455     Label done;
 4456     cmpptr(inline_klass, 0);
 4457     jcc(Assembler::notEqual, done);
 4458     stop("get_inline_type_field_klass contains no inline klass");
 4459     bind(done);
 4460   }
 4461 #endif
 4462   movptr(inline_klass, Address(inline_klass, index, Address::times_ptr));
 4463 }
 4464 
 4465 void MacroAssembler::get_default_value_oop(Register inline_klass, Register temp_reg, Register obj) {
 4466 #ifdef ASSERT
 4467   {
 4468     Label done_check;
 4469     test_klass_is_inline_type(inline_klass, temp_reg, done_check);
 4470     stop("get_default_value_oop from non inline type klass");
 4471     bind(done_check);
 4472   }
 4473 #endif
 4474   Register offset = temp_reg;
 4475   // Getting the offset of the pre-allocated default value
 4476   movptr(offset, Address(inline_klass, in_bytes(InstanceKlass::adr_inlineklass_fixed_block_offset())));
 4477   movl(offset, Address(offset, in_bytes(InlineKlass::default_value_offset_offset())));
 4478 
 4479   // Getting the mirror
 4480   movptr(obj, Address(inline_klass, in_bytes(Klass::java_mirror_offset())));
 4481   resolve_oop_handle(obj, inline_klass);
 4482 
 4483   // Getting the pre-allocated default value from the mirror
 4484   Address field(obj, offset, Address::times_1);
 4485   load_heap_oop(obj, field);
 4486 }
 4487 
 4488 void MacroAssembler::get_empty_inline_type_oop(Register inline_klass, Register temp_reg, Register obj) {
 4489 #ifdef ASSERT
 4490   {
 4491     Label done_check;
 4492     test_klass_is_empty_inline_type(inline_klass, temp_reg, done_check);
 4493     stop("get_empty_value from non-empty inline klass");
 4494     bind(done_check);
 4495   }
 4496 #endif
 4497   get_default_value_oop(inline_klass, temp_reg, obj);
 4498 }
 4499 
 4500 
 4501 // Look up the method for a megamorphic invokeinterface call.
 4502 // The target method is determined by <intf_klass, itable_index>.
 4503 // The receiver klass is in recv_klass.
 4504 // On success, the result will be in method_result, and execution falls through.
 4505 // On failure, execution transfers to the given label.
 4506 void MacroAssembler::lookup_interface_method(Register recv_klass,
 4507                                              Register intf_klass,
 4508                                              RegisterOrConstant itable_index,
 4509                                              Register method_result,
 4510                                              Register scan_temp,
 4511                                              Label& L_no_such_interface,
 4512                                              bool return_method) {
 4513   assert_different_registers(recv_klass, intf_klass, scan_temp);
 4514   assert_different_registers(method_result, intf_klass, scan_temp);
 4515   assert(recv_klass != method_result || !return_method,
 4516          "recv_klass can be destroyed when method isn't needed");
 4517 
 4518   assert(itable_index.is_constant() || itable_index.as_register() == method_result,
 4519          "caller must use same register for non-constant itable index as for method");
 4520 

 4829   } else {
 4830     Label L;
 4831     jccb(negate_condition(cc), L);
 4832     movl(dst, src);
 4833     bind(L);
 4834   }
 4835 }
 4836 
 4837 void MacroAssembler::cmov32(Condition cc, Register dst, Register src) {
 4838   if (VM_Version::supports_cmov()) {
 4839     cmovl(cc, dst, src);
 4840   } else {
 4841     Label L;
 4842     jccb(negate_condition(cc), L);
 4843     movl(dst, src);
 4844     bind(L);
 4845   }
 4846 }
 4847 
 4848 void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) {
 4849   if (!VerifyOops || VerifyAdapterSharing) {
 4850     // Below address of the code string confuses VerifyAdapterSharing
 4851     // because it may differ between otherwise equivalent adapters.
 4852     return;
 4853   }
 4854 
 4855   BLOCK_COMMENT("verify_oop {");
 4856 #ifdef _LP64
 4857   push(rscratch1);
 4858 #endif
 4859   push(rax);                          // save rax
 4860   push(reg);                          // pass register argument
 4861 
 4862   // Pass register number to verify_oop_subroutine
 4863   const char* b = NULL;
 4864   {
 4865     ResourceMark rm;
 4866     stringStream ss;
 4867     ss.print("verify_oop: %s: %s (%s:%d)", reg->name(), s, file, line);
 4868     b = code_string(ss.as_string());
 4869   }
 4870   ExternalAddress buffer((address) b);
 4871   pushptr(buffer.addr(), rscratch1);
 4872 
 4873   // call indirectly to solve generation ordering problem

 4895   // cf. TemplateTable::prepare_invoke(), if (load_receiver).
 4896   int stackElementSize = Interpreter::stackElementSize;
 4897   int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
 4898 #ifdef ASSERT
 4899   int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
 4900   assert(offset1 - offset == stackElementSize, "correct arithmetic");
 4901 #endif
 4902   Register             scale_reg    = noreg;
 4903   Address::ScaleFactor scale_factor = Address::no_scale;
 4904   if (arg_slot.is_constant()) {
 4905     offset += arg_slot.as_constant() * stackElementSize;
 4906   } else {
 4907     scale_reg    = arg_slot.as_register();
 4908     scale_factor = Address::times(stackElementSize);
 4909   }
 4910   offset += wordSize;           // return PC is on stack
 4911   return Address(rsp, scale_reg, scale_factor, offset);
 4912 }
 4913 
 4914 void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) {
 4915   if (!VerifyOops || VerifyAdapterSharing) {
 4916     // Below address of the code string confuses VerifyAdapterSharing
 4917     // because it may differ between otherwise equivalent adapters.
 4918     return;
 4919   }
 4920 
 4921 #ifdef _LP64
 4922   push(rscratch1);
 4923 #endif
 4924   push(rax); // save rax,
 4925   // addr may contain rsp so we will have to adjust it based on the push
 4926   // we just did (and on 64 bit we do two pushes)
 4927   // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which
 4928   // stores rax into addr which is backwards of what was intended.
 4929   if (addr.uses(rsp)) {
 4930     lea(rax, addr);
 4931     pushptr(Address(rax, LP64_ONLY(2 *) BytesPerWord));
 4932   } else {
 4933     pushptr(addr);
 4934   }
 4935 
 4936   // Pass register number to verify_oop_subroutine
 4937   const char* b = NULL;
 4938   {
 4939     ResourceMark rm;

 5386 
 5387 void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) {
 5388   // get mirror
 5389   const int mirror_offset = in_bytes(Klass::java_mirror_offset());
 5390   load_method_holder(mirror, method);
 5391   movptr(mirror, Address(mirror, mirror_offset));
 5392   resolve_oop_handle(mirror, tmp);
 5393 }
 5394 
 5395 void MacroAssembler::load_method_holder_cld(Register rresult, Register rmethod) {
 5396   load_method_holder(rresult, rmethod);
 5397   movptr(rresult, Address(rresult, InstanceKlass::class_loader_data_offset()));
 5398 }
 5399 
 5400 void MacroAssembler::load_method_holder(Register holder, Register method) {
 5401   movptr(holder, Address(method, Method::const_offset()));                      // ConstMethod*
 5402   movptr(holder, Address(holder, ConstMethod::constants_offset()));             // ConstantPool*
 5403   movptr(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); // InstanceKlass*
 5404 }
 5405 
 5406 void MacroAssembler::load_metadata(Register dst, Register src) {
 5407   if (UseCompressedClassPointers) {
 5408     movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
 5409   } else {
 5410     movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
 5411   }
 5412 }
 5413 
 5414 void MacroAssembler::load_klass(Register dst, Register src, Register tmp) {
 5415   assert_different_registers(src, tmp);
 5416   assert_different_registers(dst, tmp);
 5417 #ifdef _LP64
 5418   if (UseCompressedClassPointers) {
 5419     movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
 5420     decode_klass_not_null(dst, tmp);
 5421   } else
 5422 #endif
 5423   movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
 5424 }
 5425 
 5426 void MacroAssembler::load_prototype_header(Register dst, Register src, Register tmp) {
 5427   load_klass(dst, src, tmp);
 5428   movptr(dst, Address(dst, Klass::prototype_header_offset()));
 5429 }
 5430 
 5431 void MacroAssembler::store_klass(Register dst, Register src, Register tmp) {
 5432   assert_different_registers(src, tmp);
 5433   assert_different_registers(dst, tmp);
 5434 #ifdef _LP64
 5435   if (UseCompressedClassPointers) {
 5436     encode_klass_not_null(src, tmp);
 5437     movl(Address(dst, oopDesc::klass_offset_in_bytes()), src);
 5438   } else
 5439 #endif
 5440     movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src);
 5441 }
 5442 
 5443 void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src,
 5444                                     Register tmp1, Register thread_tmp) {
 5445   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 5446   decorators = AccessInternal::decorator_fixup(decorators, type);
 5447   bool as_raw = (decorators & AS_RAW) != 0;
 5448   if (as_raw) {
 5449     bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
 5450   } else {
 5451     bs->load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
 5452   }
 5453 }
 5454 
 5455 void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register val,
 5456                                      Register tmp1, Register tmp2, Register tmp3) {
 5457   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 5458   decorators = AccessInternal::decorator_fixup(decorators, type);
 5459   bool as_raw = (decorators & AS_RAW) != 0;
 5460   if (as_raw) {
 5461     bs->BarrierSetAssembler::store_at(this, decorators, type, dst, val, tmp1, tmp2, tmp3);
 5462   } else {
 5463     bs->store_at(this, decorators, type, dst, val, tmp1, tmp2, tmp3);
 5464   }
 5465 }
 5466 
 5467 void MacroAssembler::access_value_copy(DecoratorSet decorators, Register src, Register dst,
 5468                                        Register inline_klass) {
 5469   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 5470   bs->value_copy(this, decorators, src, dst, inline_klass);
 5471 }
 5472 
 5473 void MacroAssembler::first_field_offset(Register inline_klass, Register offset) {
 5474   movptr(offset, Address(inline_klass, InstanceKlass::adr_inlineklass_fixed_block_offset()));
 5475   movl(offset, Address(offset, InlineKlass::first_field_offset_offset()));
 5476 }
 5477 
 5478 void MacroAssembler::data_for_oop(Register oop, Register data, Register inline_klass) {
 5479   // ((address) (void*) o) + vk->first_field_offset();
 5480   Register offset = (data == oop) ? rscratch1 : data;
 5481   first_field_offset(inline_klass, offset);
 5482   if (data == oop) {
 5483     addptr(data, offset);
 5484   } else {
 5485     lea(data, Address(oop, offset));
 5486   }
 5487 }
 5488 
 5489 void MacroAssembler::data_for_value_array_index(Register array, Register array_klass,
 5490                                                 Register index, Register data) {
 5491   assert(index != rcx, "index needs to shift by rcx");
 5492   assert_different_registers(array, array_klass, index);
 5493   assert_different_registers(rcx, array, index);
 5494 
 5495   // array->base() + (index << Klass::layout_helper_log2_element_size(lh));
 5496   movl(rcx, Address(array_klass, Klass::layout_helper_offset()));
 5497 
 5498   // Klass::layout_helper_log2_element_size(lh)
 5499   // (lh >> _lh_log2_element_size_shift) & _lh_log2_element_size_mask;
 5500   shrl(rcx, Klass::_lh_log2_element_size_shift);
 5501   andl(rcx, Klass::_lh_log2_element_size_mask);
 5502   shlptr(index); // index << rcx
 5503 
 5504   lea(data, Address(array, index, Address::times_1, arrayOopDesc::base_offset_in_bytes(T_PRIMITIVE_OBJECT)));
 5505 }
 5506 
 5507 void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1,
 5508                                    Register thread_tmp, DecoratorSet decorators) {
 5509   access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp);
 5510 }
 5511 
 5512 // Doesn't do verification, generates fixed size code
 5513 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1,
 5514                                             Register thread_tmp, DecoratorSet decorators) {
 5515   access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL | decorators, dst, src, tmp1, thread_tmp);
 5516 }
 5517 
 5518 void MacroAssembler::store_heap_oop(Address dst, Register val, Register tmp1,
 5519                                     Register tmp2, Register tmp3, DecoratorSet decorators) {
 5520   access_store_at(T_OBJECT, IN_HEAP | decorators, dst, val, tmp1, tmp2, tmp3);
 5521 }
 5522 
 5523 // Used for storing NULLs.
 5524 void MacroAssembler::store_heap_oop_null(Address dst) {
 5525   access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg, noreg);
 5526 }

 5826 
 5827 void MacroAssembler::reinit_heapbase() {
 5828   if (UseCompressedOops) {
 5829     if (Universe::heap() != NULL) {
 5830       if (CompressedOops::base() == NULL) {
 5831         MacroAssembler::xorptr(r12_heapbase, r12_heapbase);
 5832       } else {
 5833         mov64(r12_heapbase, (int64_t)CompressedOops::ptrs_base());
 5834       }
 5835     } else {
 5836       movptr(r12_heapbase, ExternalAddress(CompressedOops::ptrs_base_addr()));
 5837     }
 5838   }
 5839 }
 5840 
 5841 #endif // _LP64
 5842 
 5843 #if COMPILER2_OR_JVMCI
 5844 
 5845 // clear memory of size 'cnt' qwords, starting at 'base' using XMM/YMM/ZMM registers
 5846 void MacroAssembler::xmm_clear_mem(Register base, Register cnt, Register val, XMMRegister xtmp, KRegister mask) {
 5847   // cnt - number of qwords (8-byte words).
 5848   // base - start address, qword aligned.
 5849   Label L_zero_64_bytes, L_loop, L_sloop, L_tail, L_end;
 5850   bool use64byteVector = (MaxVectorSize == 64) && (VM_Version::avx3_threshold() == 0);
 5851   if (use64byteVector) {
 5852     evpbroadcastq(xtmp, val, AVX_512bit);
 5853   } else if (MaxVectorSize >= 32) {
 5854     movdq(xtmp, val);
 5855     punpcklqdq(xtmp, xtmp);
 5856     vinserti128_high(xtmp, xtmp);
 5857   } else {
 5858     movdq(xtmp, val);
 5859     punpcklqdq(xtmp, xtmp);
 5860   }
 5861   jmp(L_zero_64_bytes);
 5862 
 5863   BIND(L_loop);
 5864   if (MaxVectorSize >= 32) {
 5865     fill64(base, 0, xtmp, use64byteVector);
 5866   } else {
 5867     movdqu(Address(base,  0), xtmp);
 5868     movdqu(Address(base, 16), xtmp);
 5869     movdqu(Address(base, 32), xtmp);
 5870     movdqu(Address(base, 48), xtmp);
 5871   }
 5872   addptr(base, 64);
 5873 
 5874   BIND(L_zero_64_bytes);
 5875   subptr(cnt, 8);
 5876   jccb(Assembler::greaterEqual, L_loop);
 5877 
 5878   // Copy trailing 64 bytes
 5879   if (use64byteVector) {
 5880     addptr(cnt, 8);
 5881     jccb(Assembler::equal, L_end);
 5882     fill64_masked(3, base, 0, xtmp, mask, cnt, val, true);
 5883     jmp(L_end);
 5884   } else {
 5885     addptr(cnt, 4);
 5886     jccb(Assembler::less, L_tail);
 5887     if (MaxVectorSize >= 32) {
 5888       vmovdqu(Address(base, 0), xtmp);
 5889     } else {
 5890       movdqu(Address(base,  0), xtmp);
 5891       movdqu(Address(base, 16), xtmp);
 5892     }
 5893   }
 5894   addptr(base, 32);
 5895   subptr(cnt, 4);
 5896 
 5897   BIND(L_tail);
 5898   addptr(cnt, 4);
 5899   jccb(Assembler::lessEqual, L_end);
 5900   if (UseAVX > 2 && MaxVectorSize >= 32 && VM_Version::supports_avx512vl()) {
 5901     fill32_masked(3, base, 0, xtmp, mask, cnt, val);
 5902   } else {
 5903     decrement(cnt);
 5904 
 5905     BIND(L_sloop);
 5906     movq(Address(base, 0), xtmp);
 5907     addptr(base, 8);
 5908     decrement(cnt);
 5909     jccb(Assembler::greaterEqual, L_sloop);
 5910   }
 5911   BIND(L_end);
 5912 }
 5913 
 5914 int MacroAssembler::store_inline_type_fields_to_buf(ciInlineKlass* vk, bool from_interpreter) {
 5915   assert(InlineTypeReturnedAsFields, "Inline types should never be returned as fields");
 5916   // An inline type might be returned. If fields are in registers we
 5917   // need to allocate an inline type instance and initialize it with
 5918   // the value of the fields.
 5919   Label skip;
 5920   // We only need a new buffered inline type if a new one is not returned
 5921   testptr(rax, 1);
 5922   jcc(Assembler::zero, skip);
 5923   int call_offset = -1;
 5924 
 5925 #ifdef _LP64
 5926   // The following code is similar to allocate_instance but has some slight differences,
 5927   // e.g. object size is always not zero, sometimes it's constant; storing klass ptr after
 5928   // allocating is not necessary if vk != NULL, etc. allocate_instance is not aware of these.
 5929   Label slow_case;
 5930   // 1. Try to allocate a new buffered inline instance either from TLAB or eden space
 5931   mov(rscratch1, rax); // save rax for slow_case since *_allocate may corrupt it when allocation failed
 5932   if (vk != NULL) {
 5933     // Called from C1, where the return type is statically known.
 5934     movptr(rbx, (intptr_t)vk->get_InlineKlass());
 5935     jint obj_size = vk->layout_helper();
 5936     assert(obj_size != Klass::_lh_neutral_value, "inline class in return type must have been resolved");
 5937     if (UseTLAB) {
 5938       tlab_allocate(r15_thread, rax, noreg, obj_size, r13, r14, slow_case);
 5939     } else {
 5940       jmp(slow_case);
 5941     }
 5942   } else {
 5943     // Call from interpreter. RAX contains ((the InlineKlass* of the return type) | 0x01)
 5944     mov(rbx, rax);
 5945     andptr(rbx, -2);
 5946     movl(r14, Address(rbx, Klass::layout_helper_offset()));
 5947     if (UseTLAB) {
 5948       tlab_allocate(r15_thread, rax, r14, 0, r13, r14, slow_case);
 5949     } else {
 5950       jmp(slow_case);
 5951     }
 5952   }
 5953   if (UseTLAB) {
 5954     // 2. Initialize buffered inline instance header
 5955     Register buffer_obj = rax;
 5956     movptr(Address(buffer_obj, oopDesc::mark_offset_in_bytes()), (intptr_t)markWord::inline_type_prototype().value());
 5957     xorl(r13, r13);
 5958     store_klass_gap(buffer_obj, r13);
 5959     if (vk == NULL) {
 5960       // store_klass corrupts rbx(klass), so save it in r13 for later use (interpreter case only).
 5961       mov(r13, rbx);
 5962     }
 5963     store_klass(buffer_obj, rbx, rscratch1);
 5964     // 3. Initialize its fields with an inline class specific handler
 5965     if (vk != NULL) {
 5966       call(RuntimeAddress(vk->pack_handler())); // no need for call info as this will not safepoint.
 5967     } else {
 5968       movptr(rbx, Address(r13, InstanceKlass::adr_inlineklass_fixed_block_offset()));
 5969       movptr(rbx, Address(rbx, InlineKlass::pack_handler_offset()));
 5970       call(rbx);
 5971     }
 5972     jmp(skip);
 5973   }
 5974   bind(slow_case);
 5975   // We failed to allocate a new inline type, fall back to a runtime
 5976   // call. Some oop field may be live in some registers but we can't
 5977   // tell. That runtime call will take care of preserving them
 5978   // across a GC if there's one.
 5979   mov(rax, rscratch1);
 5980 #endif
 5981 
 5982   if (from_interpreter) {
 5983     super_call_VM_leaf(StubRoutines::store_inline_type_fields_to_buf());
 5984   } else {
 5985     call(RuntimeAddress(StubRoutines::store_inline_type_fields_to_buf()));
 5986     call_offset = offset();
 5987   }
 5988 
 5989   bind(skip);
 5990   return call_offset;
 5991 }
 5992 
 5993 // Move a value between registers/stack slots and update the reg_state
 5994 bool MacroAssembler::move_helper(VMReg from, VMReg to, BasicType bt, RegState reg_state[]) {
 5995   assert(from->is_valid() && to->is_valid(), "source and destination must be valid");
 5996   if (reg_state[to->value()] == reg_written) {
 5997     return true; // Already written
 5998   }
 5999   if (from != to && bt != T_VOID) {
 6000     if (reg_state[to->value()] == reg_readonly) {
 6001       return false; // Not yet writable
 6002     }
 6003     if (from->is_reg()) {
 6004       if (to->is_reg()) {
 6005         if (from->is_XMMRegister()) {
 6006           if (bt == T_DOUBLE) {
 6007             movdbl(to->as_XMMRegister(), from->as_XMMRegister());
 6008           } else {
 6009             assert(bt == T_FLOAT, "must be float");
 6010             movflt(to->as_XMMRegister(), from->as_XMMRegister());
 6011           }
 6012         } else {
 6013           movq(to->as_Register(), from->as_Register());
 6014         }
 6015       } else {
 6016         int st_off = to->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
 6017         Address to_addr = Address(rsp, st_off);
 6018         if (from->is_XMMRegister()) {
 6019           if (bt == T_DOUBLE) {
 6020             movdbl(to_addr, from->as_XMMRegister());
 6021           } else {
 6022             assert(bt == T_FLOAT, "must be float");
 6023             movflt(to_addr, from->as_XMMRegister());
 6024           }
 6025         } else {
 6026           movq(to_addr, from->as_Register());
 6027         }
 6028       }
 6029     } else {
 6030       Address from_addr = Address(rsp, from->reg2stack() * VMRegImpl::stack_slot_size + wordSize);
 6031       if (to->is_reg()) {
 6032         if (to->is_XMMRegister()) {
 6033           if (bt == T_DOUBLE) {
 6034             movdbl(to->as_XMMRegister(), from_addr);
 6035           } else {
 6036             assert(bt == T_FLOAT, "must be float");
 6037             movflt(to->as_XMMRegister(), from_addr);
 6038           }
 6039         } else {
 6040           movq(to->as_Register(), from_addr);
 6041         }
 6042       } else {
 6043         int st_off = to->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
 6044         movq(r13, from_addr);
 6045         movq(Address(rsp, st_off), r13);
 6046       }
 6047     }
 6048   }
 6049   // Update register states
 6050   reg_state[from->value()] = reg_writable;
 6051   reg_state[to->value()] = reg_written;
 6052   return true;
 6053 }
 6054 
 6055 // Calculate the extra stack space required for packing or unpacking inline
 6056 // args and adjust the stack pointer
 6057 int MacroAssembler::extend_stack_for_inline_args(int args_on_stack) {
 6058   // Two additional slots to account for return address
 6059   int sp_inc = (args_on_stack + 2) * VMRegImpl::stack_slot_size;
 6060   sp_inc = align_up(sp_inc, StackAlignmentInBytes);
 6061   // Save the return address, adjust the stack (make sure it is properly
 6062   // 16-byte aligned) and copy the return address to the new top of the stack.
 6063   // The stack will be repaired on return (see MacroAssembler::remove_frame).
 6064   assert(sp_inc > 0, "sanity");
 6065   pop(r13);
 6066   subptr(rsp, sp_inc);
 6067   push(r13);
 6068   return sp_inc;
 6069 }
 6070 
 6071 // Read all fields from an inline type buffer and store the field values in registers/stack slots.
 6072 bool MacroAssembler::unpack_inline_helper(const GrowableArray<SigEntry>* sig, int& sig_index,
 6073                                           VMReg from, int& from_index, VMRegPair* to, int to_count, int& to_index,
 6074                                           RegState reg_state[]) {
 6075   assert(sig->at(sig_index)._bt == T_VOID, "should be at end delimiter");
 6076   assert(from->is_valid(), "source must be valid");
 6077   bool progress = false;
 6078 #ifdef ASSERT
 6079   const int start_offset = offset();
 6080 #endif
 6081 
 6082   Label L_null, L_notNull;
 6083   // Don't use r14 as tmp because it's used for spilling (see MacroAssembler::spill_reg_for)
 6084   Register tmp1 = r10;
 6085   Register tmp2 = r13;
 6086   Register fromReg = noreg;
 6087   ScalarizedInlineArgsStream stream(sig, sig_index, to, to_count, to_index, -1);
 6088   bool done = true;
 6089   bool mark_done = true;
 6090   VMReg toReg;
 6091   BasicType bt;
 6092   // Check if argument requires a null check
 6093   bool null_check = false;
 6094   VMReg nullCheckReg;
 6095   while (stream.next(nullCheckReg, bt)) {
 6096     if (sig->at(stream.sig_index())._offset == -1) {
 6097       null_check = true;
 6098       break;
 6099     }
 6100   }
 6101   stream.reset(sig_index, to_index);
 6102   while (stream.next(toReg, bt)) {
 6103     assert(toReg->is_valid(), "destination must be valid");
 6104     int idx = (int)toReg->value();
 6105     if (reg_state[idx] == reg_readonly) {
 6106       if (idx != from->value()) {
 6107         mark_done = false;
 6108       }
 6109       done = false;
 6110       continue;
 6111     } else if (reg_state[idx] == reg_written) {
 6112       continue;
 6113     }
 6114     assert(reg_state[idx] == reg_writable, "must be writable");
 6115     reg_state[idx] = reg_written;
 6116     progress = true;
 6117 
 6118     if (fromReg == noreg) {
 6119       if (from->is_reg()) {
 6120         fromReg = from->as_Register();
 6121       } else {
 6122         int st_off = from->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
 6123         movq(tmp1, Address(rsp, st_off));
 6124         fromReg = tmp1;
 6125       }
 6126       if (null_check) {
 6127         // Nullable inline type argument, emit null check
 6128         testptr(fromReg, fromReg);
 6129         jcc(Assembler::zero, L_null);
 6130       }
 6131     }
 6132     int off = sig->at(stream.sig_index())._offset;
 6133     if (off == -1) {
 6134       assert(null_check, "Missing null check at");
 6135       if (toReg->is_stack()) {
 6136         int st_off = toReg->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
 6137         movq(Address(rsp, st_off), 1);
 6138       } else {
 6139         movq(toReg->as_Register(), 1);
 6140       }
 6141       continue;
 6142     }
 6143     assert(off > 0, "offset in object should be positive");
 6144     Address fromAddr = Address(fromReg, off);
 6145     if (!toReg->is_XMMRegister()) {
 6146       Register dst = toReg->is_stack() ? tmp2 : toReg->as_Register();
 6147       if (is_reference_type(bt)) {
 6148         load_heap_oop(dst, fromAddr);
 6149       } else {
 6150         bool is_signed = (bt != T_CHAR) && (bt != T_BOOLEAN);
 6151         load_sized_value(dst, fromAddr, type2aelembytes(bt), is_signed);
 6152       }
 6153       if (toReg->is_stack()) {
 6154         int st_off = toReg->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
 6155         movq(Address(rsp, st_off), dst);
 6156       }
 6157     } else if (bt == T_DOUBLE) {
 6158       movdbl(toReg->as_XMMRegister(), fromAddr);
 6159     } else {
 6160       assert(bt == T_FLOAT, "must be float");
 6161       movflt(toReg->as_XMMRegister(), fromAddr);
 6162     }
 6163   }
 6164   if (progress && null_check) {
 6165     if (done) {
 6166       jmp(L_notNull);
 6167       bind(L_null);
 6168       // Set IsInit field to zero to signal that the argument is null.
 6169       // Also set all oop fields to zero to make the GC happy.
 6170       stream.reset(sig_index, to_index);
 6171       while (stream.next(toReg, bt)) {
 6172         if (sig->at(stream.sig_index())._offset == -1 ||
 6173             bt == T_OBJECT || bt == T_ARRAY) {
 6174           if (toReg->is_stack()) {
 6175             int st_off = toReg->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
 6176             movq(Address(rsp, st_off), 0);
 6177           } else {
 6178             xorq(toReg->as_Register(), toReg->as_Register());
 6179           }
 6180         }
 6181       }
 6182       bind(L_notNull);
 6183     } else {
 6184       bind(L_null);
 6185     }
 6186   }
 6187 
 6188   sig_index = stream.sig_index();
 6189   to_index = stream.regs_index();
 6190 
 6191   if (mark_done && reg_state[from->value()] != reg_written) {
 6192     // This is okay because no one else will write to that slot
 6193     reg_state[from->value()] = reg_writable;
 6194   }
 6195   from_index--;
 6196   assert(progress || (start_offset == offset()), "should not emit code");
 6197   return done;
 6198 }
 6199 
 6200 bool MacroAssembler::pack_inline_helper(const GrowableArray<SigEntry>* sig, int& sig_index, int vtarg_index,
 6201                                         VMRegPair* from, int from_count, int& from_index, VMReg to,
 6202                                         RegState reg_state[], Register val_array) {
 6203   assert(sig->at(sig_index)._bt == T_PRIMITIVE_OBJECT, "should be at end delimiter");
 6204   assert(to->is_valid(), "destination must be valid");
 6205 
 6206   if (reg_state[to->value()] == reg_written) {
 6207     skip_unpacked_fields(sig, sig_index, from, from_count, from_index);
 6208     return true; // Already written
 6209   }
 6210 
 6211   // TODO 8284443 Isn't it an issue if below code uses r14 as tmp when it contains a spilled value?
 6212   // Be careful with r14 because it's used for spilling (see MacroAssembler::spill_reg_for).
 6213   Register val_obj_tmp = r11;
 6214   Register from_reg_tmp = r14;
 6215   Register tmp1 = r10;
 6216   Register tmp2 = r13;
 6217   Register tmp3 = rbx;
 6218   Register val_obj = to->is_stack() ? val_obj_tmp : to->as_Register();
 6219 
 6220   assert_different_registers(val_obj_tmp, from_reg_tmp, tmp1, tmp2, tmp3, val_array);
 6221 
 6222   if (reg_state[to->value()] == reg_readonly) {
 6223     if (!is_reg_in_unpacked_fields(sig, sig_index, to, from, from_count, from_index)) {
 6224       skip_unpacked_fields(sig, sig_index, from, from_count, from_index);
 6225       return false; // Not yet writable
 6226     }
 6227     val_obj = val_obj_tmp;
 6228   }
 6229 
 6230   int index = arrayOopDesc::base_offset_in_bytes(T_OBJECT) + vtarg_index * type2aelembytes(T_PRIMITIVE_OBJECT);
 6231   load_heap_oop(val_obj, Address(val_array, index));
 6232 
 6233   ScalarizedInlineArgsStream stream(sig, sig_index, from, from_count, from_index);
 6234   VMReg fromReg;
 6235   BasicType bt;
 6236   Label L_null;
 6237   while (stream.next(fromReg, bt)) {
 6238     assert(fromReg->is_valid(), "source must be valid");
 6239     reg_state[fromReg->value()] = reg_writable;
 6240 
 6241     int off = sig->at(stream.sig_index())._offset;
 6242     if (off == -1) {
 6243       // Nullable inline type argument, emit null check
 6244       Label L_notNull;
 6245       if (fromReg->is_stack()) {
 6246         int ld_off = fromReg->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
 6247         testb(Address(rsp, ld_off), 1);
 6248       } else {
 6249         testb(fromReg->as_Register(), 1);
 6250       }
 6251       jcc(Assembler::notZero, L_notNull);
 6252       movptr(val_obj, 0);
 6253       jmp(L_null);
 6254       bind(L_notNull);
 6255       continue;
 6256     }
 6257 
 6258     assert(off > 0, "offset in object should be positive");
 6259     size_t size_in_bytes = is_java_primitive(bt) ? type2aelembytes(bt) : wordSize;
 6260 
 6261     Address dst(val_obj, off);
 6262     if (!fromReg->is_XMMRegister()) {
 6263       Register src;
 6264       if (fromReg->is_stack()) {
 6265         src = from_reg_tmp;
 6266         int ld_off = fromReg->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
 6267         load_sized_value(src, Address(rsp, ld_off), size_in_bytes, /* is_signed */ false);
 6268       } else {
 6269         src = fromReg->as_Register();
 6270       }
 6271       assert_different_registers(dst.base(), src, tmp1, tmp2, tmp3, val_array);
 6272       if (is_reference_type(bt)) {
 6273         store_heap_oop(dst, src, tmp1, tmp2, tmp3, IN_HEAP | ACCESS_WRITE | IS_DEST_UNINITIALIZED);
 6274       } else {
 6275         store_sized_value(dst, src, size_in_bytes);
 6276       }
 6277     } else if (bt == T_DOUBLE) {
 6278       movdbl(dst, fromReg->as_XMMRegister());
 6279     } else {
 6280       assert(bt == T_FLOAT, "must be float");
 6281       movflt(dst, fromReg->as_XMMRegister());
 6282     }
 6283   }
 6284   bind(L_null);
 6285   sig_index = stream.sig_index();
 6286   from_index = stream.regs_index();
 6287 
 6288   assert(reg_state[to->value()] == reg_writable, "must have already been read");
 6289   bool success = move_helper(val_obj->as_VMReg(), to, T_OBJECT, reg_state);
 6290   assert(success, "to register must be writeable");
 6291   return true;
 6292 }
 6293 
 6294 VMReg MacroAssembler::spill_reg_for(VMReg reg) {
 6295   return reg->is_XMMRegister() ? xmm8->as_VMReg() : r14->as_VMReg();
 6296 }
 6297 
 6298 void MacroAssembler::remove_frame(int initial_framesize, bool needs_stack_repair) {
 6299   assert((initial_framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 6300   if (needs_stack_repair) {
 6301     movq(rbp, Address(rsp, initial_framesize));
 6302     // The stack increment resides just below the saved rbp
 6303     addq(rsp, Address(rsp, initial_framesize - wordSize));
 6304   } else {
 6305     if (initial_framesize > 0) {
 6306       addq(rsp, initial_framesize);
 6307     }
 6308     pop(rbp);
 6309   }
 6310 }
 6311 
 6312 // Clearing constant sized memory using YMM/ZMM registers.
 6313 void MacroAssembler::clear_mem(Register base, int cnt, Register rtmp, XMMRegister xtmp, KRegister mask) {
 6314   assert(UseAVX > 2 && VM_Version::supports_avx512vlbw(), "");
 6315   bool use64byteVector = (MaxVectorSize > 32) && (VM_Version::avx3_threshold() == 0);
 6316 
 6317   int vector64_count = (cnt & (~0x7)) >> 3;
 6318   cnt = cnt & 0x7;
 6319   const int fill64_per_loop = 4;
 6320   const int max_unrolled_fill64 = 8;
 6321 
 6322   // 64 byte initialization loop.
 6323   vpxor(xtmp, xtmp, xtmp, use64byteVector ? AVX_512bit : AVX_256bit);
 6324   int start64 = 0;
 6325   if (vector64_count > max_unrolled_fill64) {
 6326     Label LOOP;
 6327     Register index = rtmp;
 6328 
 6329     start64 = vector64_count - (vector64_count % fill64_per_loop);
 6330 
 6331     movl(index, 0);

 6381         break;
 6382       case 7:
 6383         if (use64byteVector) {
 6384           movl(rtmp, 0x7F);
 6385           kmovwl(mask, rtmp);
 6386           evmovdqu(T_LONG, mask, Address(base, disp), xtmp, true, Assembler::AVX_512bit);
 6387         } else {
 6388           evmovdqu(T_LONG, k0, Address(base, disp), xtmp, false, Assembler::AVX_256bit);
 6389           movl(rtmp, 0x7);
 6390           kmovwl(mask, rtmp);
 6391           evmovdqu(T_LONG, mask, Address(base, disp + 32), xtmp, true, Assembler::AVX_256bit);
 6392         }
 6393         break;
 6394       default:
 6395         fatal("Unexpected length : %d\n",cnt);
 6396         break;
 6397     }
 6398   }
 6399 }
 6400 
 6401 void MacroAssembler::clear_mem(Register base, Register cnt, Register val, XMMRegister xtmp,
 6402                                bool is_large, bool word_copy_only, KRegister mask) {
 6403   // cnt      - number of qwords (8-byte words).
 6404   // base     - start address, qword aligned.
 6405   // is_large - if optimizers know cnt is larger than InitArrayShortSize
 6406   assert(base==rdi, "base register must be edi for rep stos");
 6407   assert(val==rax,   "val register must be eax for rep stos");
 6408   assert(cnt==rcx,   "cnt register must be ecx for rep stos");
 6409   assert(InitArrayShortSize % BytesPerLong == 0,
 6410     "InitArrayShortSize should be the multiple of BytesPerLong");
 6411 
 6412   Label DONE;



 6413 
 6414   if (!is_large) {
 6415     Label LOOP, LONG;
 6416     cmpptr(cnt, InitArrayShortSize/BytesPerLong);
 6417     jccb(Assembler::greater, LONG);
 6418 
 6419     NOT_LP64(shlptr(cnt, 1);) // convert to number of 32-bit words for 32-bit VM
 6420 
 6421     decrement(cnt);
 6422     jccb(Assembler::negative, DONE); // Zero length
 6423 
 6424     // Use individual pointer-sized stores for small counts:
 6425     BIND(LOOP);
 6426     movptr(Address(base, cnt, Address::times_ptr), val);
 6427     decrement(cnt);
 6428     jccb(Assembler::greaterEqual, LOOP);
 6429     jmpb(DONE);
 6430 
 6431     BIND(LONG);
 6432   }
 6433 
 6434   // Use longer rep-prefixed ops for non-small counts:
 6435   if (UseFastStosb && !word_copy_only) {
 6436     shlptr(cnt, 3); // convert to number of bytes
 6437     rep_stosb();
 6438   } else if (UseXMMForObjInit) {
 6439     xmm_clear_mem(base, cnt, val, xtmp, mask);
 6440   } else {
 6441     NOT_LP64(shlptr(cnt, 1);) // convert to number of 32-bit words for 32-bit VM
 6442     rep_stos();
 6443   }
 6444 
 6445   BIND(DONE);
 6446 }
 6447 
 6448 #endif //COMPILER2_OR_JVMCI
 6449 
 6450 
 6451 void MacroAssembler::generate_fill(BasicType t, bool aligned,
 6452                                    Register to, Register value, Register count,
 6453                                    Register rtmp, XMMRegister xtmp) {
 6454   ShortBranchVerifier sbv(this);
 6455   assert_different_registers(to, value, count, rtmp);
 6456   Label L_exit;
 6457   Label L_fill_2_bytes, L_fill_4_bytes;
 6458 
 6459 #if defined(COMPILER2) && defined(_LP64)
< prev index next >