< prev index next >

src/hotspot/cpu/x86/macroAssembler_x86.cpp

Print this page

   11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12  * version 2 for more details (a copy is included in the LICENSE file that
   13  * accompanied this code).
   14  *
   15  * You should have received a copy of the GNU General Public License version
   16  * 2 along with this work; if not, write to the Free Software Foundation,
   17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18  *
   19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20  * or visit www.oracle.com if you need additional information or have any
   21  * questions.
   22  *
   23  */
   24 
   25 #include "asm/assembler.hpp"
   26 #include "asm/assembler.inline.hpp"
   27 #include "code/aotCodeCache.hpp"
   28 #include "code/compiledIC.hpp"
   29 #include "compiler/compiler_globals.hpp"
   30 #include "compiler/disassembler.hpp"

   31 #include "crc32c.h"
   32 #include "gc/shared/barrierSet.hpp"
   33 #include "gc/shared/barrierSetAssembler.hpp"
   34 #include "gc/shared/collectedHeap.inline.hpp"
   35 #include "gc/shared/tlab_globals.hpp"
   36 #include "interpreter/bytecodeHistogram.hpp"
   37 #include "interpreter/interpreter.hpp"
   38 #include "interpreter/interpreterRuntime.hpp"
   39 #include "jvm.h"
   40 #include "memory/resourceArea.hpp"
   41 #include "memory/universe.hpp"
   42 #include "oops/accessDecorators.hpp"
   43 #include "oops/compressedKlass.inline.hpp"
   44 #include "oops/compressedOops.inline.hpp"
   45 #include "oops/klass.inline.hpp"

   46 #include "prims/methodHandles.hpp"
   47 #include "runtime/continuation.hpp"
   48 #include "runtime/interfaceSupport.inline.hpp"
   49 #include "runtime/javaThread.hpp"
   50 #include "runtime/jniHandles.hpp"
   51 #include "runtime/objectMonitor.hpp"
   52 #include "runtime/os.hpp"
   53 #include "runtime/safepoint.hpp"
   54 #include "runtime/safepointMechanism.hpp"
   55 #include "runtime/sharedRuntime.hpp"

   56 #include "runtime/stubRoutines.hpp"
   57 #include "utilities/checkedCast.hpp"
   58 #include "utilities/macros.hpp"




   59 
   60 #ifdef PRODUCT
   61 #define BLOCK_COMMENT(str) /* nothing */
   62 #define STOP(error) stop(error)
   63 #else
   64 #define BLOCK_COMMENT(str) block_comment(str)
   65 #define STOP(error) block_comment(error); stop(error)
   66 #endif
   67 
   68 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
   69 
   70 #ifdef ASSERT
   71 bool AbstractAssembler::pd_check_instruction_mark() { return true; }
   72 #endif
   73 
   74 static const Assembler::Condition reverse[] = {
   75     Assembler::noOverflow     /* overflow      = 0x0 */ ,
   76     Assembler::overflow       /* noOverflow    = 0x1 */ ,
   77     Assembler::aboveEqual     /* carrySet      = 0x2, below         = 0x2 */ ,
   78     Assembler::below          /* aboveEqual    = 0x3, carryClear    = 0x3 */ ,

 1270 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
 1271   assert_different_registers(arg_0, c_rarg1, c_rarg2);
 1272   assert_different_registers(arg_1, c_rarg2);
 1273   pass_arg2(this, arg_2);
 1274   pass_arg1(this, arg_1);
 1275   pass_arg0(this, arg_0);
 1276   call_VM_leaf(entry_point, 3);
 1277 }
 1278 
 1279 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) {
 1280   assert_different_registers(arg_0, c_rarg1, c_rarg2, c_rarg3);
 1281   assert_different_registers(arg_1, c_rarg2, c_rarg3);
 1282   assert_different_registers(arg_2, c_rarg3);
 1283   pass_arg3(this, arg_3);
 1284   pass_arg2(this, arg_2);
 1285   pass_arg1(this, arg_1);
 1286   pass_arg0(this, arg_0);
 1287   call_VM_leaf(entry_point, 3);
 1288 }
 1289 




 1290 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {
 1291   pass_arg0(this, arg_0);
 1292   MacroAssembler::call_VM_leaf_base(entry_point, 1);
 1293 }
 1294 
 1295 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
 1296   assert_different_registers(arg_0, c_rarg1);
 1297   pass_arg1(this, arg_1);
 1298   pass_arg0(this, arg_0);
 1299   MacroAssembler::call_VM_leaf_base(entry_point, 2);
 1300 }
 1301 
 1302 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
 1303   assert_different_registers(arg_0, c_rarg1, c_rarg2);
 1304   assert_different_registers(arg_1, c_rarg2);
 1305   pass_arg2(this, arg_2);
 1306   pass_arg1(this, arg_1);
 1307   pass_arg0(this, arg_0);
 1308   MacroAssembler::call_VM_leaf_base(entry_point, 3);
 1309 }

 2326     lea(rscratch, src);
 2327     Assembler::mulss(dst, Address(rscratch, 0));
 2328   }
 2329 }
 2330 
 2331 void MacroAssembler::null_check(Register reg, int offset) {
 2332   if (needs_explicit_null_check(offset)) {
 2333     // provoke OS null exception if reg is null by
 2334     // accessing M[reg] w/o changing any (non-CC) registers
 2335     // NOTE: cmpl is plenty here to provoke a segv
 2336     cmpptr(rax, Address(reg, 0));
 2337     // Note: should probably use testl(rax, Address(reg, 0));
 2338     //       may be shorter code (however, this version of
 2339     //       testl needs to be implemented first)
 2340   } else {
 2341     // nothing to do, (later) access of M[reg + offset]
 2342     // will provoke OS null exception if reg is null
 2343   }
 2344 }
 2345 

















































































































 2346 void MacroAssembler::os_breakpoint() {
 2347   // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
 2348   // (e.g., MSVC can't call ps() otherwise)
 2349   call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
 2350 }
 2351 
 2352 void MacroAssembler::unimplemented(const char* what) {
 2353   const char* buf = nullptr;
 2354   {
 2355     ResourceMark rm;
 2356     stringStream ss;
 2357     ss.print("unimplemented: %s", what);
 2358     buf = code_string(ss.as_string());
 2359   }
 2360   stop(buf);
 2361 }
 2362 
 2363 #define XSTATE_BV 0x200
 2364 
 2365 void MacroAssembler::pop_CPU_state() {

 3414 }
 3415 
 3416 // C++ bool manipulation
 3417 void MacroAssembler::testbool(Register dst) {
 3418   if(sizeof(bool) == 1)
 3419     testb(dst, 0xff);
 3420   else if(sizeof(bool) == 2) {
 3421     // testw implementation needed for two byte bools
 3422     ShouldNotReachHere();
 3423   } else if(sizeof(bool) == 4)
 3424     testl(dst, dst);
 3425   else
 3426     // unsupported
 3427     ShouldNotReachHere();
 3428 }
 3429 
 3430 void MacroAssembler::testptr(Register dst, Register src) {
 3431   testq(dst, src);
 3432 }
 3433 






















































































































 3434 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
 3435 void MacroAssembler::tlab_allocate(Register obj,
 3436                                    Register var_size_in_bytes,
 3437                                    int con_size_in_bytes,
 3438                                    Register t1,
 3439                                    Register t2,
 3440                                    Label& slow_case) {
 3441   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 3442   bs->tlab_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case);
 3443 }
 3444 
 3445 RegSet MacroAssembler::call_clobbered_gp_registers() {
 3446   RegSet regs;
 3447   regs += RegSet::of(rax, rcx, rdx);
 3448 #ifndef _WINDOWS
 3449   regs += RegSet::of(rsi, rdi);
 3450 #endif
 3451   regs += RegSet::range(r8, r11);
 3452   if (UseAPX) {
 3453     regs += RegSet::range(r16, as_Register(Register::number_of_registers - 1));

 3617   xorptr(temp, temp);    // use _zero reg to clear memory (shorter code)
 3618   if (UseIncDec) {
 3619     shrptr(index, 3);  // divide by 8/16 and set carry flag if bit 2 was set
 3620   } else {
 3621     shrptr(index, 2);  // use 2 instructions to avoid partial flag stall
 3622     shrptr(index, 1);
 3623   }
 3624 
 3625   // initialize remaining object fields: index is a multiple of 2 now
 3626   {
 3627     Label loop;
 3628     bind(loop);
 3629     movptr(Address(address, index, Address::times_8, offset_in_bytes - 1*BytesPerWord), temp);
 3630     decrement(index);
 3631     jcc(Assembler::notZero, loop);
 3632   }
 3633 
 3634   bind(done);
 3635 }
 3636 



























 3637 // Look up the method for a megamorphic invokeinterface call.
 3638 // The target method is determined by <intf_klass, itable_index>.
 3639 // The receiver klass is in recv_klass.
 3640 // On success, the result will be in method_result, and execution falls through.
 3641 // On failure, execution transfers to the given label.
 3642 void MacroAssembler::lookup_interface_method(Register recv_klass,
 3643                                              Register intf_klass,
 3644                                              RegisterOrConstant itable_index,
 3645                                              Register method_result,
 3646                                              Register scan_temp,
 3647                                              Label& L_no_such_interface,
 3648                                              bool return_method) {
 3649   assert_different_registers(recv_klass, intf_klass, scan_temp);
 3650   assert_different_registers(method_result, intf_klass, scan_temp);
 3651   assert(recv_klass != method_result || !return_method,
 3652          "recv_klass can be destroyed when method isn't needed");
 3653 
 3654   assert(itable_index.is_constant() || itable_index.as_register() == method_result,
 3655          "caller must use same register for non-constant itable index as for method");
 3656 

 4667   } else {
 4668     Label L;
 4669     jccb(negate_condition(cc), L);
 4670     movl(dst, src);
 4671     bind(L);
 4672   }
 4673 }
 4674 
 4675 void MacroAssembler::cmov32(Condition cc, Register dst, Register src) {
 4676   if (VM_Version::supports_cmov()) {
 4677     cmovl(cc, dst, src);
 4678   } else {
 4679     Label L;
 4680     jccb(negate_condition(cc), L);
 4681     movl(dst, src);
 4682     bind(L);
 4683   }
 4684 }
 4685 
 4686 void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) {
 4687   if (!VerifyOops) return;




 4688 
 4689   BLOCK_COMMENT("verify_oop {");
 4690   push(rscratch1);
 4691   push(rax);                          // save rax
 4692   push(reg);                          // pass register argument
 4693 
 4694   // Pass register number to verify_oop_subroutine
 4695   const char* b = nullptr;
 4696   {
 4697     ResourceMark rm;
 4698     stringStream ss;
 4699     ss.print("verify_oop: %s: %s (%s:%d)", reg->name(), s, file, line);
 4700     b = code_string(ss.as_string());
 4701   }
 4702   AddressLiteral buffer((address) b, external_word_Relocation::spec_for_immediate());
 4703   pushptr(buffer.addr(), rscratch1);
 4704 
 4705   // call indirectly to solve generation ordering problem
 4706   movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
 4707   call(rax);

 4726   // cf. TemplateTable::prepare_invoke(), if (load_receiver).
 4727   int stackElementSize = Interpreter::stackElementSize;
 4728   int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
 4729 #ifdef ASSERT
 4730   int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
 4731   assert(offset1 - offset == stackElementSize, "correct arithmetic");
 4732 #endif
 4733   Register             scale_reg    = noreg;
 4734   Address::ScaleFactor scale_factor = Address::no_scale;
 4735   if (arg_slot.is_constant()) {
 4736     offset += arg_slot.as_constant() * stackElementSize;
 4737   } else {
 4738     scale_reg    = arg_slot.as_register();
 4739     scale_factor = Address::times(stackElementSize);
 4740   }
 4741   offset += wordSize;           // return PC is on stack
 4742   return Address(rsp, scale_reg, scale_factor, offset);
 4743 }
 4744 
 4745 void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) {
 4746   if (!VerifyOops) return;




 4747 
 4748   push(rscratch1);
 4749   push(rax); // save rax,
 4750   // addr may contain rsp so we will have to adjust it based on the push
 4751   // we just did (and on 64 bit we do two pushes)
 4752   // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which
 4753   // stores rax into addr which is backwards of what was intended.
 4754   if (addr.uses(rsp)) {
 4755     lea(rax, addr);
 4756     pushptr(Address(rax, 2 * BytesPerWord));
 4757   } else {
 4758     pushptr(addr);
 4759   }
 4760 
 4761   // Pass register number to verify_oop_subroutine
 4762   const char* b = nullptr;
 4763   {
 4764     ResourceMark rm;
 4765     stringStream ss;
 4766     ss.print("verify_oop_addr: %s (%s:%d)", s, file, line);

 5120 
 5121 void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) {
 5122   // get mirror
 5123   const int mirror_offset = in_bytes(Klass::java_mirror_offset());
 5124   load_method_holder(mirror, method);
 5125   movptr(mirror, Address(mirror, mirror_offset));
 5126   resolve_oop_handle(mirror, tmp);
 5127 }
 5128 
 5129 void MacroAssembler::load_method_holder_cld(Register rresult, Register rmethod) {
 5130   load_method_holder(rresult, rmethod);
 5131   movptr(rresult, Address(rresult, InstanceKlass::class_loader_data_offset()));
 5132 }
 5133 
 5134 void MacroAssembler::load_method_holder(Register holder, Register method) {
 5135   movptr(holder, Address(method, Method::const_offset()));                      // ConstMethod*
 5136   movptr(holder, Address(holder, ConstMethod::constants_offset()));             // ConstantPool*
 5137   movptr(holder, Address(holder, ConstantPool::pool_holder_offset()));          // InstanceKlass*
 5138 }
 5139 










 5140 void MacroAssembler::load_narrow_klass_compact(Register dst, Register src) {
 5141   assert(UseCompactObjectHeaders, "expect compact object headers");
 5142   movq(dst, Address(src, oopDesc::mark_offset_in_bytes()));
 5143   shrq(dst, markWord::klass_shift);
 5144 }
 5145 
 5146 void MacroAssembler::load_klass(Register dst, Register src, Register tmp) {
 5147   assert_different_registers(src, tmp);
 5148   assert_different_registers(dst, tmp);
 5149 
 5150   if (UseCompactObjectHeaders) {
 5151     load_narrow_klass_compact(dst, src);
 5152     decode_klass_not_null(dst, tmp);
 5153   } else if (UseCompressedClassPointers) {
 5154     movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
 5155     decode_klass_not_null(dst, tmp);
 5156   } else {
 5157     movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
 5158   }
 5159 }
 5160 





 5161 void MacroAssembler::store_klass(Register dst, Register src, Register tmp) {
 5162   assert(!UseCompactObjectHeaders, "not with compact headers");
 5163   assert_different_registers(src, tmp);
 5164   assert_different_registers(dst, tmp);
 5165   if (UseCompressedClassPointers) {
 5166     encode_klass_not_null(src, tmp);
 5167     movl(Address(dst, oopDesc::klass_offset_in_bytes()), src);
 5168   } else {
 5169     movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src);
 5170   }
 5171 }
 5172 
 5173 void MacroAssembler::cmp_klass(Register klass, Register obj, Register tmp) {
 5174   if (UseCompactObjectHeaders) {
 5175     assert(tmp != noreg, "need tmp");
 5176     assert_different_registers(klass, obj, tmp);
 5177     load_narrow_klass_compact(tmp, obj);
 5178     cmpl(klass, tmp);
 5179   } else if (UseCompressedClassPointers) {
 5180     cmpl(klass, Address(obj, oopDesc::klass_offset_in_bytes()));

 5206   bool as_raw = (decorators & AS_RAW) != 0;
 5207   if (as_raw) {
 5208     bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1);
 5209   } else {
 5210     bs->load_at(this, decorators, type, dst, src, tmp1);
 5211   }
 5212 }
 5213 
 5214 void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register val,
 5215                                      Register tmp1, Register tmp2, Register tmp3) {
 5216   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 5217   decorators = AccessInternal::decorator_fixup(decorators, type);
 5218   bool as_raw = (decorators & AS_RAW) != 0;
 5219   if (as_raw) {
 5220     bs->BarrierSetAssembler::store_at(this, decorators, type, dst, val, tmp1, tmp2, tmp3);
 5221   } else {
 5222     bs->store_at(this, decorators, type, dst, val, tmp1, tmp2, tmp3);
 5223   }
 5224 }
 5225 








































 5226 void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, DecoratorSet decorators) {
 5227   access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1);
 5228 }
 5229 
 5230 // Doesn't do verification, generates fixed size code
 5231 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1, DecoratorSet decorators) {
 5232   access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL | decorators, dst, src, tmp1);
 5233 }
 5234 
 5235 void MacroAssembler::store_heap_oop(Address dst, Register val, Register tmp1,
 5236                                     Register tmp2, Register tmp3, DecoratorSet decorators) {
 5237   access_store_at(T_OBJECT, IN_HEAP | decorators, dst, val, tmp1, tmp2, tmp3);
 5238 }
 5239 
 5240 // Used for storing nulls.
 5241 void MacroAssembler::store_heap_oop_null(Address dst) {
 5242   access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg, noreg);
 5243 }
 5244 
 5245 void MacroAssembler::store_klass_gap(Register dst, Register src) {

 5557   Assembler::cmp_narrow_oop(dst, CompressedKlassPointers::encode(k), rspec);
 5558 }
 5559 
 5560 void MacroAssembler::reinit_heapbase() {
 5561   if (UseCompressedOops) {
 5562     if (Universe::heap() != nullptr) {
 5563       if (CompressedOops::base() == nullptr) {
 5564         MacroAssembler::xorptr(r12_heapbase, r12_heapbase);
 5565       } else {
 5566         mov64(r12_heapbase, (int64_t)CompressedOops::base());
 5567       }
 5568     } else {
 5569       movptr(r12_heapbase, ExternalAddress(CompressedOops::base_addr()));
 5570     }
 5571   }
 5572 }
 5573 
 5574 #if COMPILER2_OR_JVMCI
 5575 
 5576 // clear memory of size 'cnt' qwords, starting at 'base' using XMM/YMM/ZMM registers
 5577 void MacroAssembler::xmm_clear_mem(Register base, Register cnt, Register rtmp, XMMRegister xtmp, KRegister mask) {
 5578   // cnt - number of qwords (8-byte words).
 5579   // base - start address, qword aligned.
 5580   Label L_zero_64_bytes, L_loop, L_sloop, L_tail, L_end;
 5581   bool use64byteVector = (MaxVectorSize == 64) && (VM_Version::avx3_threshold() == 0);
 5582   if (use64byteVector) {
 5583     vpxor(xtmp, xtmp, xtmp, AVX_512bit);
 5584   } else if (MaxVectorSize >= 32) {
 5585     vpxor(xtmp, xtmp, xtmp, AVX_256bit);


 5586   } else {
 5587     pxor(xtmp, xtmp);

 5588   }
 5589   jmp(L_zero_64_bytes);
 5590 
 5591   BIND(L_loop);
 5592   if (MaxVectorSize >= 32) {
 5593     fill64(base, 0, xtmp, use64byteVector);
 5594   } else {
 5595     movdqu(Address(base,  0), xtmp);
 5596     movdqu(Address(base, 16), xtmp);
 5597     movdqu(Address(base, 32), xtmp);
 5598     movdqu(Address(base, 48), xtmp);
 5599   }
 5600   addptr(base, 64);
 5601 
 5602   BIND(L_zero_64_bytes);
 5603   subptr(cnt, 8);
 5604   jccb(Assembler::greaterEqual, L_loop);
 5605 
 5606   // Copy trailing 64 bytes
 5607   if (use64byteVector) {
 5608     addptr(cnt, 8);
 5609     jccb(Assembler::equal, L_end);
 5610     fill64_masked(3, base, 0, xtmp, mask, cnt, rtmp, true);
 5611     jmp(L_end);
 5612   } else {
 5613     addptr(cnt, 4);
 5614     jccb(Assembler::less, L_tail);
 5615     if (MaxVectorSize >= 32) {
 5616       vmovdqu(Address(base, 0), xtmp);
 5617     } else {
 5618       movdqu(Address(base,  0), xtmp);
 5619       movdqu(Address(base, 16), xtmp);
 5620     }
 5621   }
 5622   addptr(base, 32);
 5623   subptr(cnt, 4);
 5624 
 5625   BIND(L_tail);
 5626   addptr(cnt, 4);
 5627   jccb(Assembler::lessEqual, L_end);
 5628   if (UseAVX > 2 && MaxVectorSize >= 32 && VM_Version::supports_avx512vl()) {
 5629     fill32_masked(3, base, 0, xtmp, mask, cnt, rtmp);
 5630   } else {
 5631     decrement(cnt);
 5632 
 5633     BIND(L_sloop);
 5634     movq(Address(base, 0), xtmp);
 5635     addptr(base, 8);
 5636     decrement(cnt);
 5637     jccb(Assembler::greaterEqual, L_sloop);
 5638   }
 5639   BIND(L_end);
 5640 }
 5641 






















































































































































































































































































































































































































 5642 // Clearing constant sized memory using YMM/ZMM registers.
 5643 void MacroAssembler::clear_mem(Register base, int cnt, Register rtmp, XMMRegister xtmp, KRegister mask) {
 5644   assert(UseAVX > 2 && VM_Version::supports_avx512vl(), "");
 5645   bool use64byteVector = (MaxVectorSize > 32) && (VM_Version::avx3_threshold() == 0);
 5646 
 5647   int vector64_count = (cnt & (~0x7)) >> 3;
 5648   cnt = cnt & 0x7;
 5649   const int fill64_per_loop = 4;
 5650   const int max_unrolled_fill64 = 8;
 5651 
 5652   // 64 byte initialization loop.
 5653   vpxor(xtmp, xtmp, xtmp, use64byteVector ? AVX_512bit : AVX_256bit);
 5654   int start64 = 0;
 5655   if (vector64_count > max_unrolled_fill64) {
 5656     Label LOOP;
 5657     Register index = rtmp;
 5658 
 5659     start64 = vector64_count - (vector64_count % fill64_per_loop);
 5660 
 5661     movl(index, 0);

 5711         break;
 5712       case 7:
 5713         if (use64byteVector) {
 5714           movl(rtmp, 0x7F);
 5715           kmovwl(mask, rtmp);
 5716           evmovdqu(T_LONG, mask, Address(base, disp), xtmp, true, Assembler::AVX_512bit);
 5717         } else {
 5718           evmovdqu(T_LONG, k0, Address(base, disp), xtmp, false, Assembler::AVX_256bit);
 5719           movl(rtmp, 0x7);
 5720           kmovwl(mask, rtmp);
 5721           evmovdqu(T_LONG, mask, Address(base, disp + 32), xtmp, true, Assembler::AVX_256bit);
 5722         }
 5723         break;
 5724       default:
 5725         fatal("Unexpected length : %d\n",cnt);
 5726         break;
 5727     }
 5728   }
 5729 }
 5730 
 5731 void MacroAssembler::clear_mem(Register base, Register cnt, Register tmp, XMMRegister xtmp,
 5732                                bool is_large, KRegister mask) {
 5733   // cnt      - number of qwords (8-byte words).
 5734   // base     - start address, qword aligned.
 5735   // is_large - if optimizers know cnt is larger than InitArrayShortSize
 5736   assert(base==rdi, "base register must be edi for rep stos");
 5737   assert(tmp==rax,   "tmp register must be eax for rep stos");
 5738   assert(cnt==rcx,   "cnt register must be ecx for rep stos");
 5739   assert(InitArrayShortSize % BytesPerLong == 0,
 5740     "InitArrayShortSize should be the multiple of BytesPerLong");
 5741 
 5742   Label DONE;
 5743   if (!is_large || !UseXMMForObjInit) {
 5744     xorptr(tmp, tmp);
 5745   }
 5746 
 5747   if (!is_large) {
 5748     Label LOOP, LONG;
 5749     cmpptr(cnt, InitArrayShortSize/BytesPerLong);
 5750     jccb(Assembler::greater, LONG);
 5751 
 5752     decrement(cnt);
 5753     jccb(Assembler::negative, DONE); // Zero length
 5754 
 5755     // Use individual pointer-sized stores for small counts:
 5756     BIND(LOOP);
 5757     movptr(Address(base, cnt, Address::times_ptr), tmp);
 5758     decrement(cnt);
 5759     jccb(Assembler::greaterEqual, LOOP);
 5760     jmpb(DONE);
 5761 
 5762     BIND(LONG);
 5763   }
 5764 
 5765   // Use longer rep-prefixed ops for non-small counts:
 5766   if (UseFastStosb) {
 5767     shlptr(cnt, 3); // convert to number of bytes
 5768     rep_stosb();
 5769   } else if (UseXMMForObjInit) {
 5770     xmm_clear_mem(base, cnt, tmp, xtmp, mask);
 5771   } else {
 5772     rep_stos();
 5773   }
 5774 
 5775   BIND(DONE);
 5776 }
 5777 
 5778 #endif //COMPILER2_OR_JVMCI
 5779 
 5780 
 5781 void MacroAssembler::generate_fill(BasicType t, bool aligned,
 5782                                    Register to, Register value, Register count,
 5783                                    Register rtmp, XMMRegister xtmp) {
 5784   ShortBranchVerifier sbv(this);
 5785   assert_different_registers(to, value, count, rtmp);
 5786   Label L_exit;
 5787   Label L_fill_2_bytes, L_fill_4_bytes;
 5788 
 5789 #if defined(COMPILER2)
 5790   if(MaxVectorSize >=32 &&

 9607 
 9608   // Load top.
 9609   movl(top, Address(thread, JavaThread::lock_stack_top_offset()));
 9610 
 9611   // Check if the lock-stack is full.
 9612   cmpl(top, LockStack::end_offset());
 9613   jcc(Assembler::greaterEqual, slow);
 9614 
 9615   // Check for recursion.
 9616   cmpptr(obj, Address(thread, top, Address::times_1, -oopSize));
 9617   jcc(Assembler::equal, push);
 9618 
 9619   // Check header for monitor (0b10).
 9620   testptr(reg_rax, markWord::monitor_value);
 9621   jcc(Assembler::notZero, slow);
 9622 
 9623   // Try to lock. Transition lock bits 0b01 => 0b00
 9624   movptr(tmp, reg_rax);
 9625   andptr(tmp, ~(int32_t)markWord::unlocked_value);
 9626   orptr(reg_rax, markWord::unlocked_value);




 9627   lock(); cmpxchgptr(tmp, Address(obj, oopDesc::mark_offset_in_bytes()));
 9628   jcc(Assembler::notEqual, slow);
 9629 
 9630   // Restore top, CAS clobbers register.
 9631   movl(top, Address(thread, JavaThread::lock_stack_top_offset()));
 9632 
 9633   bind(push);
 9634   // After successful lock, push object on lock-stack.
 9635   movptr(Address(thread, top), obj);
 9636   incrementl(top, oopSize);
 9637   movl(Address(thread, JavaThread::lock_stack_top_offset()), top);
 9638 }
 9639 
 9640 // Implements lightweight-unlocking.
 9641 //
 9642 // obj: the object to be unlocked
 9643 // reg_rax: rax
 9644 // thread: the thread
 9645 // tmp: a temporary register
 9646 void MacroAssembler::lightweight_unlock(Register obj, Register reg_rax, Register tmp, Label& slow) {

   11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12  * version 2 for more details (a copy is included in the LICENSE file that
   13  * accompanied this code).
   14  *
   15  * You should have received a copy of the GNU General Public License version
   16  * 2 along with this work; if not, write to the Free Software Foundation,
   17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18  *
   19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20  * or visit www.oracle.com if you need additional information or have any
   21  * questions.
   22  *
   23  */
   24 
   25 #include "asm/assembler.hpp"
   26 #include "asm/assembler.inline.hpp"
   27 #include "code/aotCodeCache.hpp"
   28 #include "code/compiledIC.hpp"
   29 #include "compiler/compiler_globals.hpp"
   30 #include "compiler/disassembler.hpp"
   31 #include "ci/ciInlineKlass.hpp"
   32 #include "crc32c.h"
   33 #include "gc/shared/barrierSet.hpp"
   34 #include "gc/shared/barrierSetAssembler.hpp"
   35 #include "gc/shared/collectedHeap.inline.hpp"
   36 #include "gc/shared/tlab_globals.hpp"
   37 #include "interpreter/bytecodeHistogram.hpp"
   38 #include "interpreter/interpreter.hpp"
   39 #include "interpreter/interpreterRuntime.hpp"
   40 #include "jvm.h"
   41 #include "memory/resourceArea.hpp"
   42 #include "memory/universe.hpp"
   43 #include "oops/accessDecorators.hpp"
   44 #include "oops/compressedKlass.inline.hpp"
   45 #include "oops/compressedOops.inline.hpp"
   46 #include "oops/klass.inline.hpp"
   47 #include "oops/resolvedFieldEntry.hpp"
   48 #include "prims/methodHandles.hpp"
   49 #include "runtime/continuation.hpp"
   50 #include "runtime/interfaceSupport.inline.hpp"
   51 #include "runtime/javaThread.hpp"
   52 #include "runtime/jniHandles.hpp"
   53 #include "runtime/objectMonitor.hpp"
   54 #include "runtime/os.hpp"
   55 #include "runtime/safepoint.hpp"
   56 #include "runtime/safepointMechanism.hpp"
   57 #include "runtime/sharedRuntime.hpp"
   58 #include "runtime/signature_cc.hpp"
   59 #include "runtime/stubRoutines.hpp"
   60 #include "utilities/checkedCast.hpp"
   61 #include "utilities/macros.hpp"
   62 #include "vmreg_x86.inline.hpp"
   63 #ifdef COMPILER2
   64 #include "opto/output.hpp"
   65 #endif
   66 
   67 #ifdef PRODUCT
   68 #define BLOCK_COMMENT(str) /* nothing */
   69 #define STOP(error) stop(error)
   70 #else
   71 #define BLOCK_COMMENT(str) block_comment(str)
   72 #define STOP(error) block_comment(error); stop(error)
   73 #endif
   74 
   75 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
   76 
   77 #ifdef ASSERT
   78 bool AbstractAssembler::pd_check_instruction_mark() { return true; }
   79 #endif
   80 
   81 static const Assembler::Condition reverse[] = {
   82     Assembler::noOverflow     /* overflow      = 0x0 */ ,
   83     Assembler::overflow       /* noOverflow    = 0x1 */ ,
   84     Assembler::aboveEqual     /* carrySet      = 0x2, below         = 0x2 */ ,
   85     Assembler::below          /* aboveEqual    = 0x3, carryClear    = 0x3 */ ,

 1277 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
 1278   assert_different_registers(arg_0, c_rarg1, c_rarg2);
 1279   assert_different_registers(arg_1, c_rarg2);
 1280   pass_arg2(this, arg_2);
 1281   pass_arg1(this, arg_1);
 1282   pass_arg0(this, arg_0);
 1283   call_VM_leaf(entry_point, 3);
 1284 }
 1285 
 1286 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) {
 1287   assert_different_registers(arg_0, c_rarg1, c_rarg2, c_rarg3);
 1288   assert_different_registers(arg_1, c_rarg2, c_rarg3);
 1289   assert_different_registers(arg_2, c_rarg3);
 1290   pass_arg3(this, arg_3);
 1291   pass_arg2(this, arg_2);
 1292   pass_arg1(this, arg_1);
 1293   pass_arg0(this, arg_0);
 1294   call_VM_leaf(entry_point, 3);
 1295 }
 1296 
 1297 void MacroAssembler::super_call_VM_leaf(address entry_point) {
 1298   MacroAssembler::call_VM_leaf_base(entry_point, 1);
 1299 }
 1300 
 1301 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {
 1302   pass_arg0(this, arg_0);
 1303   MacroAssembler::call_VM_leaf_base(entry_point, 1);
 1304 }
 1305 
 1306 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
 1307   assert_different_registers(arg_0, c_rarg1);
 1308   pass_arg1(this, arg_1);
 1309   pass_arg0(this, arg_0);
 1310   MacroAssembler::call_VM_leaf_base(entry_point, 2);
 1311 }
 1312 
 1313 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
 1314   assert_different_registers(arg_0, c_rarg1, c_rarg2);
 1315   assert_different_registers(arg_1, c_rarg2);
 1316   pass_arg2(this, arg_2);
 1317   pass_arg1(this, arg_1);
 1318   pass_arg0(this, arg_0);
 1319   MacroAssembler::call_VM_leaf_base(entry_point, 3);
 1320 }

 2337     lea(rscratch, src);
 2338     Assembler::mulss(dst, Address(rscratch, 0));
 2339   }
 2340 }
 2341 
 2342 void MacroAssembler::null_check(Register reg, int offset) {
 2343   if (needs_explicit_null_check(offset)) {
 2344     // provoke OS null exception if reg is null by
 2345     // accessing M[reg] w/o changing any (non-CC) registers
 2346     // NOTE: cmpl is plenty here to provoke a segv
 2347     cmpptr(rax, Address(reg, 0));
 2348     // Note: should probably use testl(rax, Address(reg, 0));
 2349     //       may be shorter code (however, this version of
 2350     //       testl needs to be implemented first)
 2351   } else {
 2352     // nothing to do, (later) access of M[reg + offset]
 2353     // will provoke OS null exception if reg is null
 2354   }
 2355 }
 2356 
 2357 void MacroAssembler::test_markword_is_inline_type(Register markword, Label& is_inline_type) {
 2358   andptr(markword, markWord::inline_type_mask_in_place);
 2359   cmpptr(markword, markWord::inline_type_pattern);
 2360   jcc(Assembler::equal, is_inline_type);
 2361 }
 2362 
 2363 void MacroAssembler::test_klass_is_inline_type(Register klass, Register temp_reg, Label& is_inline_type) {
 2364   load_unsigned_short(temp_reg, Address(klass, Klass::access_flags_offset()));
 2365   testl(temp_reg, JVM_ACC_IDENTITY);
 2366   jcc(Assembler::zero, is_inline_type);
 2367 }
 2368 
 2369 void MacroAssembler::test_oop_is_not_inline_type(Register object, Register tmp, Label& not_inline_type, bool can_be_null) {
 2370   if (can_be_null) {
 2371     testptr(object, object);
 2372     jcc(Assembler::zero, not_inline_type);
 2373   }
 2374   const int is_inline_type_mask = markWord::inline_type_pattern;
 2375   movptr(tmp, Address(object, oopDesc::mark_offset_in_bytes()));
 2376   andptr(tmp, is_inline_type_mask);
 2377   cmpptr(tmp, is_inline_type_mask);
 2378   jcc(Assembler::notEqual, not_inline_type);
 2379 }
 2380 
 2381 void MacroAssembler::test_field_is_null_free_inline_type(Register flags, Register temp_reg, Label& is_null_free_inline_type) {
 2382   movl(temp_reg, flags);
 2383   testl(temp_reg, 1 << ResolvedFieldEntry::is_null_free_inline_type_shift);
 2384   jcc(Assembler::notEqual, is_null_free_inline_type);
 2385 }
 2386 
 2387 void MacroAssembler::test_field_is_not_null_free_inline_type(Register flags, Register temp_reg, Label& not_null_free_inline_type) {
 2388   movl(temp_reg, flags);
 2389   testl(temp_reg, 1 << ResolvedFieldEntry::is_null_free_inline_type_shift);
 2390   jcc(Assembler::equal, not_null_free_inline_type);
 2391 }
 2392 
 2393 void MacroAssembler::test_field_is_flat(Register flags, Register temp_reg, Label& is_flat) {
 2394   movl(temp_reg, flags);
 2395   testl(temp_reg, 1 << ResolvedFieldEntry::is_flat_shift);
 2396   jcc(Assembler::notEqual, is_flat);
 2397 }
 2398 
 2399 void MacroAssembler::test_field_has_null_marker(Register flags, Register temp_reg, Label& has_null_marker) {
 2400   movl(temp_reg, flags);
 2401   testl(temp_reg, 1 << ResolvedFieldEntry::has_null_marker_shift);
 2402   jcc(Assembler::notEqual, has_null_marker);
 2403 }
 2404 
 2405 void MacroAssembler::test_oop_prototype_bit(Register oop, Register temp_reg, int32_t test_bit, bool jmp_set, Label& jmp_label) {
 2406   Label test_mark_word;
 2407   // load mark word
 2408   movptr(temp_reg, Address(oop, oopDesc::mark_offset_in_bytes()));
 2409   // check displaced
 2410   testl(temp_reg, markWord::unlocked_value);
 2411   jccb(Assembler::notZero, test_mark_word);
 2412   // slow path use klass prototype
 2413   push(rscratch1);
 2414   load_prototype_header(temp_reg, oop, rscratch1);
 2415   pop(rscratch1);
 2416 
 2417   bind(test_mark_word);
 2418   testl(temp_reg, test_bit);
 2419   jcc((jmp_set) ? Assembler::notZero : Assembler::zero, jmp_label);
 2420 }
 2421 
 2422 void MacroAssembler::test_flat_array_oop(Register oop, Register temp_reg,
 2423                                          Label& is_flat_array) {
 2424 #ifdef _LP64
 2425   test_oop_prototype_bit(oop, temp_reg, markWord::flat_array_bit_in_place, true, is_flat_array);
 2426 #else
 2427   load_klass(temp_reg, oop, noreg);
 2428   movl(temp_reg, Address(temp_reg, Klass::layout_helper_offset()));
 2429   test_flat_array_layout(temp_reg, is_flat_array);
 2430 #endif
 2431 }
 2432 
 2433 void MacroAssembler::test_non_flat_array_oop(Register oop, Register temp_reg,
 2434                                              Label& is_non_flat_array) {
 2435 #ifdef _LP64
 2436   test_oop_prototype_bit(oop, temp_reg, markWord::flat_array_bit_in_place, false, is_non_flat_array);
 2437 #else
 2438   load_klass(temp_reg, oop, noreg);
 2439   movl(temp_reg, Address(temp_reg, Klass::layout_helper_offset()));
 2440   test_non_flat_array_layout(temp_reg, is_non_flat_array);
 2441 #endif
 2442 }
 2443 
 2444 void MacroAssembler::test_null_free_array_oop(Register oop, Register temp_reg, Label&is_null_free_array) {
 2445 #ifdef _LP64
 2446   test_oop_prototype_bit(oop, temp_reg, markWord::null_free_array_bit_in_place, true, is_null_free_array);
 2447 #else
 2448   Unimplemented();
 2449 #endif
 2450 }
 2451 
 2452 void MacroAssembler::test_non_null_free_array_oop(Register oop, Register temp_reg, Label&is_non_null_free_array) {
 2453 #ifdef _LP64
 2454   test_oop_prototype_bit(oop, temp_reg, markWord::null_free_array_bit_in_place, false, is_non_null_free_array);
 2455 #else
 2456   Unimplemented();
 2457 #endif
 2458 }
 2459 
 2460 void MacroAssembler::test_flat_array_layout(Register lh, Label& is_flat_array) {
 2461   testl(lh, Klass::_lh_array_tag_flat_value_bit_inplace);
 2462   jcc(Assembler::notZero, is_flat_array);
 2463 }
 2464 
 2465 void MacroAssembler::test_non_flat_array_layout(Register lh, Label& is_non_flat_array) {
 2466   testl(lh, Klass::_lh_array_tag_flat_value_bit_inplace);
 2467   jcc(Assembler::zero, is_non_flat_array);
 2468 }
 2469 
 2470 void MacroAssembler::os_breakpoint() {
 2471   // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
 2472   // (e.g., MSVC can't call ps() otherwise)
 2473   call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
 2474 }
 2475 
 2476 void MacroAssembler::unimplemented(const char* what) {
 2477   const char* buf = nullptr;
 2478   {
 2479     ResourceMark rm;
 2480     stringStream ss;
 2481     ss.print("unimplemented: %s", what);
 2482     buf = code_string(ss.as_string());
 2483   }
 2484   stop(buf);
 2485 }
 2486 
 2487 #define XSTATE_BV 0x200
 2488 
 2489 void MacroAssembler::pop_CPU_state() {

 3538 }
 3539 
 3540 // C++ bool manipulation
 3541 void MacroAssembler::testbool(Register dst) {
 3542   if(sizeof(bool) == 1)
 3543     testb(dst, 0xff);
 3544   else if(sizeof(bool) == 2) {
 3545     // testw implementation needed for two byte bools
 3546     ShouldNotReachHere();
 3547   } else if(sizeof(bool) == 4)
 3548     testl(dst, dst);
 3549   else
 3550     // unsupported
 3551     ShouldNotReachHere();
 3552 }
 3553 
 3554 void MacroAssembler::testptr(Register dst, Register src) {
 3555   testq(dst, src);
 3556 }
 3557 
 3558 // Object / value buffer allocation...
 3559 //
 3560 // Kills klass and rsi on LP64
 3561 void MacroAssembler::allocate_instance(Register klass, Register new_obj,
 3562                                        Register t1, Register t2,
 3563                                        bool clear_fields, Label& alloc_failed)
 3564 {
 3565   Label done, initialize_header, initialize_object, slow_case, slow_case_no_pop;
 3566   Register layout_size = t1;
 3567   assert(new_obj == rax, "needs to be rax");
 3568   assert_different_registers(klass, new_obj, t1, t2);
 3569 
 3570   // get instance_size in InstanceKlass (scaled to a count of bytes)
 3571   movl(layout_size, Address(klass, Klass::layout_helper_offset()));
 3572   // test to see if it is malformed in some way
 3573   testl(layout_size, Klass::_lh_instance_slow_path_bit);
 3574   jcc(Assembler::notZero, slow_case_no_pop);
 3575 
 3576   // Allocate the instance:
 3577   //  If TLAB is enabled:
 3578   //    Try to allocate in the TLAB.
 3579   //    If fails, go to the slow path.
 3580   //  Else If inline contiguous allocations are enabled:
 3581   //    Try to allocate in eden.
 3582   //    If fails due to heap end, go to slow path.
 3583   //
 3584   //  If TLAB is enabled OR inline contiguous is enabled:
 3585   //    Initialize the allocation.
 3586   //    Exit.
 3587   //
 3588   //  Go to slow path.
 3589 
 3590   push(klass);
 3591   if (UseTLAB) {
 3592     tlab_allocate(new_obj, layout_size, 0, klass, t2, slow_case);
 3593     if (ZeroTLAB || (!clear_fields)) {
 3594       // the fields have been already cleared
 3595       jmp(initialize_header);
 3596     } else {
 3597       // initialize both the header and fields
 3598       jmp(initialize_object);
 3599     }
 3600   } else {
 3601     jmp(slow_case);
 3602   }
 3603 
 3604   // If UseTLAB is true, the object is created above and there is an initialize need.
 3605   // Otherwise, skip and go to the slow path.
 3606   if (UseTLAB) {
 3607     if (clear_fields) {
 3608       // The object is initialized before the header.  If the object size is
 3609       // zero, go directly to the header initialization.
 3610       bind(initialize_object);
 3611       if (UseCompactObjectHeaders) {
 3612         assert(is_aligned(oopDesc::base_offset_in_bytes(), BytesPerLong), "oop base offset must be 8-byte-aligned");
 3613         decrement(layout_size, oopDesc::base_offset_in_bytes());
 3614       } else {
 3615         decrement(layout_size, sizeof(oopDesc));
 3616       }
 3617       jcc(Assembler::zero, initialize_header);
 3618 
 3619       // Initialize topmost object field, divide size by 8, check if odd and
 3620       // test if zero.
 3621       Register zero = klass;
 3622       xorl(zero, zero);    // use zero reg to clear memory (shorter code)
 3623       shrl(layout_size, LogBytesPerLong); // divide by 2*oopSize and set carry flag if odd
 3624 
 3625   #ifdef ASSERT
 3626       // make sure instance_size was multiple of 8
 3627       Label L;
 3628       // Ignore partial flag stall after shrl() since it is debug VM
 3629       jcc(Assembler::carryClear, L);
 3630       stop("object size is not multiple of 2 - adjust this code");
 3631       bind(L);
 3632       // must be > 0, no extra check needed here
 3633   #endif
 3634 
 3635       // initialize remaining object fields: instance_size was a multiple of 8
 3636       {
 3637         Label loop;
 3638         bind(loop);
 3639         int header_size_bytes = oopDesc::header_size() * HeapWordSize;
 3640         assert(is_aligned(header_size_bytes, BytesPerLong), "oop header size must be 8-byte-aligned");
 3641         movptr(Address(new_obj, layout_size, Address::times_8, header_size_bytes - 1*oopSize), zero);
 3642         decrement(layout_size);
 3643         jcc(Assembler::notZero, loop);
 3644       }
 3645     } // clear_fields
 3646 
 3647     // initialize object header only.
 3648     bind(initialize_header);
 3649     if (UseCompactObjectHeaders || EnableValhalla) {
 3650       pop(klass);
 3651       Register mark_word = t2;
 3652       movptr(mark_word, Address(klass, Klass::prototype_header_offset()));
 3653       movptr(Address(new_obj, oopDesc::mark_offset_in_bytes ()), mark_word);
 3654     } else {
 3655      movptr(Address(new_obj, oopDesc::mark_offset_in_bytes()),
 3656             (intptr_t)markWord::prototype().value()); // header
 3657      pop(klass);   // get saved klass back in the register.
 3658     }
 3659     if (!UseCompactObjectHeaders) {
 3660       xorl(rsi, rsi);                 // use zero reg to clear memory (shorter code)
 3661       store_klass_gap(new_obj, rsi);  // zero klass gap for compressed oops
 3662       movptr(t2, klass);         // preserve klass
 3663       store_klass(new_obj, t2, rscratch1);  // src klass reg is potentially compressed
 3664     }
 3665     jmp(done);
 3666   }
 3667 
 3668   bind(slow_case);
 3669   pop(klass);
 3670   bind(slow_case_no_pop);
 3671   jmp(alloc_failed);
 3672 
 3673   bind(done);
 3674 }
 3675 
 3676 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
 3677 void MacroAssembler::tlab_allocate(Register obj,
 3678                                    Register var_size_in_bytes,
 3679                                    int con_size_in_bytes,
 3680                                    Register t1,
 3681                                    Register t2,
 3682                                    Label& slow_case) {
 3683   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 3684   bs->tlab_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case);
 3685 }
 3686 
 3687 RegSet MacroAssembler::call_clobbered_gp_registers() {
 3688   RegSet regs;
 3689   regs += RegSet::of(rax, rcx, rdx);
 3690 #ifndef _WINDOWS
 3691   regs += RegSet::of(rsi, rdi);
 3692 #endif
 3693   regs += RegSet::range(r8, r11);
 3694   if (UseAPX) {
 3695     regs += RegSet::range(r16, as_Register(Register::number_of_registers - 1));

 3859   xorptr(temp, temp);    // use _zero reg to clear memory (shorter code)
 3860   if (UseIncDec) {
 3861     shrptr(index, 3);  // divide by 8/16 and set carry flag if bit 2 was set
 3862   } else {
 3863     shrptr(index, 2);  // use 2 instructions to avoid partial flag stall
 3864     shrptr(index, 1);
 3865   }
 3866 
 3867   // initialize remaining object fields: index is a multiple of 2 now
 3868   {
 3869     Label loop;
 3870     bind(loop);
 3871     movptr(Address(address, index, Address::times_8, offset_in_bytes - 1*BytesPerWord), temp);
 3872     decrement(index);
 3873     jcc(Assembler::notZero, loop);
 3874   }
 3875 
 3876   bind(done);
 3877 }
 3878 
 3879 void MacroAssembler::get_inline_type_field_klass(Register holder_klass, Register index, Register inline_klass) {
 3880   inline_layout_info(holder_klass, index, inline_klass);
 3881   movptr(inline_klass, Address(inline_klass, InlineLayoutInfo::klass_offset()));
 3882 }
 3883 
 3884 void MacroAssembler::inline_layout_info(Register holder_klass, Register index, Register layout_info) {
 3885   movptr(layout_info, Address(holder_klass, InstanceKlass::inline_layout_info_array_offset()));
 3886 #ifdef ASSERT
 3887   {
 3888     Label done;
 3889     cmpptr(layout_info, 0);
 3890     jcc(Assembler::notEqual, done);
 3891     stop("inline_layout_info_array is null");
 3892     bind(done);
 3893   }
 3894 #endif
 3895 
 3896   InlineLayoutInfo array[2];
 3897   int size = (char*)&array[1] - (char*)&array[0]; // computing size of array elements
 3898   if (is_power_of_2(size)) {
 3899     shll(index, log2i_exact(size)); // Scale index by power of 2
 3900   } else {
 3901     imull(index, index, size); // Scale the index to be the entry index * array_element_size
 3902   }
 3903   lea(layout_info, Address(layout_info, index, Address::times_1, Array<InlineLayoutInfo>::base_offset_in_bytes()));
 3904 }
 3905 
 3906 // Look up the method for a megamorphic invokeinterface call.
 3907 // The target method is determined by <intf_klass, itable_index>.
 3908 // The receiver klass is in recv_klass.
 3909 // On success, the result will be in method_result, and execution falls through.
 3910 // On failure, execution transfers to the given label.
 3911 void MacroAssembler::lookup_interface_method(Register recv_klass,
 3912                                              Register intf_klass,
 3913                                              RegisterOrConstant itable_index,
 3914                                              Register method_result,
 3915                                              Register scan_temp,
 3916                                              Label& L_no_such_interface,
 3917                                              bool return_method) {
 3918   assert_different_registers(recv_klass, intf_klass, scan_temp);
 3919   assert_different_registers(method_result, intf_klass, scan_temp);
 3920   assert(recv_klass != method_result || !return_method,
 3921          "recv_klass can be destroyed when method isn't needed");
 3922 
 3923   assert(itable_index.is_constant() || itable_index.as_register() == method_result,
 3924          "caller must use same register for non-constant itable index as for method");
 3925 

 4936   } else {
 4937     Label L;
 4938     jccb(negate_condition(cc), L);
 4939     movl(dst, src);
 4940     bind(L);
 4941   }
 4942 }
 4943 
 4944 void MacroAssembler::cmov32(Condition cc, Register dst, Register src) {
 4945   if (VM_Version::supports_cmov()) {
 4946     cmovl(cc, dst, src);
 4947   } else {
 4948     Label L;
 4949     jccb(negate_condition(cc), L);
 4950     movl(dst, src);
 4951     bind(L);
 4952   }
 4953 }
 4954 
 4955 void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) {
 4956   if (!VerifyOops || VerifyAdapterSharing) {
 4957     // Below address of the code string confuses VerifyAdapterSharing
 4958     // because it may differ between otherwise equivalent adapters.
 4959     return;
 4960   }
 4961 
 4962   BLOCK_COMMENT("verify_oop {");
 4963   push(rscratch1);
 4964   push(rax);                          // save rax
 4965   push(reg);                          // pass register argument
 4966 
 4967   // Pass register number to verify_oop_subroutine
 4968   const char* b = nullptr;
 4969   {
 4970     ResourceMark rm;
 4971     stringStream ss;
 4972     ss.print("verify_oop: %s: %s (%s:%d)", reg->name(), s, file, line);
 4973     b = code_string(ss.as_string());
 4974   }
 4975   AddressLiteral buffer((address) b, external_word_Relocation::spec_for_immediate());
 4976   pushptr(buffer.addr(), rscratch1);
 4977 
 4978   // call indirectly to solve generation ordering problem
 4979   movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
 4980   call(rax);

 4999   // cf. TemplateTable::prepare_invoke(), if (load_receiver).
 5000   int stackElementSize = Interpreter::stackElementSize;
 5001   int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
 5002 #ifdef ASSERT
 5003   int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
 5004   assert(offset1 - offset == stackElementSize, "correct arithmetic");
 5005 #endif
 5006   Register             scale_reg    = noreg;
 5007   Address::ScaleFactor scale_factor = Address::no_scale;
 5008   if (arg_slot.is_constant()) {
 5009     offset += arg_slot.as_constant() * stackElementSize;
 5010   } else {
 5011     scale_reg    = arg_slot.as_register();
 5012     scale_factor = Address::times(stackElementSize);
 5013   }
 5014   offset += wordSize;           // return PC is on stack
 5015   return Address(rsp, scale_reg, scale_factor, offset);
 5016 }
 5017 
 5018 void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) {
 5019   if (!VerifyOops || VerifyAdapterSharing) {
 5020     // Below address of the code string confuses VerifyAdapterSharing
 5021     // because it may differ between otherwise equivalent adapters.
 5022     return;
 5023   }
 5024 
 5025   push(rscratch1);
 5026   push(rax); // save rax,
 5027   // addr may contain rsp so we will have to adjust it based on the push
 5028   // we just did (and on 64 bit we do two pushes)
 5029   // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which
 5030   // stores rax into addr which is backwards of what was intended.
 5031   if (addr.uses(rsp)) {
 5032     lea(rax, addr);
 5033     pushptr(Address(rax, 2 * BytesPerWord));
 5034   } else {
 5035     pushptr(addr);
 5036   }
 5037 
 5038   // Pass register number to verify_oop_subroutine
 5039   const char* b = nullptr;
 5040   {
 5041     ResourceMark rm;
 5042     stringStream ss;
 5043     ss.print("verify_oop_addr: %s (%s:%d)", s, file, line);

 5397 
 5398 void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) {
 5399   // get mirror
 5400   const int mirror_offset = in_bytes(Klass::java_mirror_offset());
 5401   load_method_holder(mirror, method);
 5402   movptr(mirror, Address(mirror, mirror_offset));
 5403   resolve_oop_handle(mirror, tmp);
 5404 }
 5405 
 5406 void MacroAssembler::load_method_holder_cld(Register rresult, Register rmethod) {
 5407   load_method_holder(rresult, rmethod);
 5408   movptr(rresult, Address(rresult, InstanceKlass::class_loader_data_offset()));
 5409 }
 5410 
 5411 void MacroAssembler::load_method_holder(Register holder, Register method) {
 5412   movptr(holder, Address(method, Method::const_offset()));                      // ConstMethod*
 5413   movptr(holder, Address(holder, ConstMethod::constants_offset()));             // ConstantPool*
 5414   movptr(holder, Address(holder, ConstantPool::pool_holder_offset()));          // InstanceKlass*
 5415 }
 5416 
 5417 void MacroAssembler::load_metadata(Register dst, Register src) {
 5418   if (UseCompactObjectHeaders) {
 5419     load_narrow_klass_compact(dst, src);
 5420   } else if (UseCompressedClassPointers) {
 5421     movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
 5422   } else {
 5423     movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
 5424   }
 5425 }
 5426 
 5427 void MacroAssembler::load_narrow_klass_compact(Register dst, Register src) {
 5428   assert(UseCompactObjectHeaders, "expect compact object headers");
 5429   movq(dst, Address(src, oopDesc::mark_offset_in_bytes()));
 5430   shrq(dst, markWord::klass_shift);
 5431 }
 5432 
 5433 void MacroAssembler::load_klass(Register dst, Register src, Register tmp) {
 5434   assert_different_registers(src, tmp);
 5435   assert_different_registers(dst, tmp);
 5436 
 5437   if (UseCompactObjectHeaders) {
 5438     load_narrow_klass_compact(dst, src);
 5439     decode_klass_not_null(dst, tmp);
 5440   } else if (UseCompressedClassPointers) {
 5441     movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
 5442     decode_klass_not_null(dst, tmp);
 5443   } else {
 5444     movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
 5445   }
 5446 }
 5447 
 5448 void MacroAssembler::load_prototype_header(Register dst, Register src, Register tmp) {
 5449   load_klass(dst, src, tmp);
 5450   movptr(dst, Address(dst, Klass::prototype_header_offset()));
 5451 }
 5452 
 5453 void MacroAssembler::store_klass(Register dst, Register src, Register tmp) {
 5454   assert(!UseCompactObjectHeaders, "not with compact headers");
 5455   assert_different_registers(src, tmp);
 5456   assert_different_registers(dst, tmp);
 5457   if (UseCompressedClassPointers) {
 5458     encode_klass_not_null(src, tmp);
 5459     movl(Address(dst, oopDesc::klass_offset_in_bytes()), src);
 5460   } else {
 5461     movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src);
 5462   }
 5463 }
 5464 
 5465 void MacroAssembler::cmp_klass(Register klass, Register obj, Register tmp) {
 5466   if (UseCompactObjectHeaders) {
 5467     assert(tmp != noreg, "need tmp");
 5468     assert_different_registers(klass, obj, tmp);
 5469     load_narrow_klass_compact(tmp, obj);
 5470     cmpl(klass, tmp);
 5471   } else if (UseCompressedClassPointers) {
 5472     cmpl(klass, Address(obj, oopDesc::klass_offset_in_bytes()));

 5498   bool as_raw = (decorators & AS_RAW) != 0;
 5499   if (as_raw) {
 5500     bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1);
 5501   } else {
 5502     bs->load_at(this, decorators, type, dst, src, tmp1);
 5503   }
 5504 }
 5505 
 5506 void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register val,
 5507                                      Register tmp1, Register tmp2, Register tmp3) {
 5508   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 5509   decorators = AccessInternal::decorator_fixup(decorators, type);
 5510   bool as_raw = (decorators & AS_RAW) != 0;
 5511   if (as_raw) {
 5512     bs->BarrierSetAssembler::store_at(this, decorators, type, dst, val, tmp1, tmp2, tmp3);
 5513   } else {
 5514     bs->store_at(this, decorators, type, dst, val, tmp1, tmp2, tmp3);
 5515   }
 5516 }
 5517 
 5518 void MacroAssembler::flat_field_copy(DecoratorSet decorators, Register src, Register dst,
 5519                                      Register inline_layout_info) {
 5520   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 5521   bs->flat_field_copy(this, decorators, src, dst, inline_layout_info);
 5522 }
 5523 
 5524 void MacroAssembler::payload_offset(Register inline_klass, Register offset) {
 5525   movptr(offset, Address(inline_klass, InstanceKlass::adr_inlineklass_fixed_block_offset()));
 5526   movl(offset, Address(offset, InlineKlass::payload_offset_offset()));
 5527 }
 5528 
 5529 void MacroAssembler::payload_addr(Register oop, Register data, Register inline_klass) {
 5530   // ((address) (void*) o) + vk->payload_offset();
 5531   Register offset = (data == oop) ? rscratch1 : data;
 5532   payload_offset(inline_klass, offset);
 5533   if (data == oop) {
 5534     addptr(data, offset);
 5535   } else {
 5536     lea(data, Address(oop, offset));
 5537   }
 5538 }
 5539 
 5540 void MacroAssembler::data_for_value_array_index(Register array, Register array_klass,
 5541                                                 Register index, Register data) {
 5542   assert(index != rcx, "index needs to shift by rcx");
 5543   assert_different_registers(array, array_klass, index);
 5544   assert_different_registers(rcx, array, index);
 5545 
 5546   // array->base() + (index << Klass::layout_helper_log2_element_size(lh));
 5547   movl(rcx, Address(array_klass, Klass::layout_helper_offset()));
 5548 
 5549   // Klass::layout_helper_log2_element_size(lh)
 5550   // (lh >> _lh_log2_element_size_shift) & _lh_log2_element_size_mask;
 5551   shrl(rcx, Klass::_lh_log2_element_size_shift);
 5552   andl(rcx, Klass::_lh_log2_element_size_mask);
 5553   shlptr(index); // index << rcx
 5554 
 5555   lea(data, Address(array, index, Address::times_1, arrayOopDesc::base_offset_in_bytes(T_FLAT_ELEMENT)));
 5556 }
 5557 
 5558 void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, DecoratorSet decorators) {
 5559   access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1);
 5560 }
 5561 
 5562 // Doesn't do verification, generates fixed size code
 5563 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1, DecoratorSet decorators) {
 5564   access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL | decorators, dst, src, tmp1);
 5565 }
 5566 
 5567 void MacroAssembler::store_heap_oop(Address dst, Register val, Register tmp1,
 5568                                     Register tmp2, Register tmp3, DecoratorSet decorators) {
 5569   access_store_at(T_OBJECT, IN_HEAP | decorators, dst, val, tmp1, tmp2, tmp3);
 5570 }
 5571 
 5572 // Used for storing nulls.
 5573 void MacroAssembler::store_heap_oop_null(Address dst) {
 5574   access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg, noreg);
 5575 }
 5576 
 5577 void MacroAssembler::store_klass_gap(Register dst, Register src) {

 5889   Assembler::cmp_narrow_oop(dst, CompressedKlassPointers::encode(k), rspec);
 5890 }
 5891 
 5892 void MacroAssembler::reinit_heapbase() {
 5893   if (UseCompressedOops) {
 5894     if (Universe::heap() != nullptr) {
 5895       if (CompressedOops::base() == nullptr) {
 5896         MacroAssembler::xorptr(r12_heapbase, r12_heapbase);
 5897       } else {
 5898         mov64(r12_heapbase, (int64_t)CompressedOops::base());
 5899       }
 5900     } else {
 5901       movptr(r12_heapbase, ExternalAddress(CompressedOops::base_addr()));
 5902     }
 5903   }
 5904 }
 5905 
 5906 #if COMPILER2_OR_JVMCI
 5907 
 5908 // clear memory of size 'cnt' qwords, starting at 'base' using XMM/YMM/ZMM registers
 5909 void MacroAssembler::xmm_clear_mem(Register base, Register cnt, Register val, XMMRegister xtmp, KRegister mask) {
 5910   // cnt - number of qwords (8-byte words).
 5911   // base - start address, qword aligned.
 5912   Label L_zero_64_bytes, L_loop, L_sloop, L_tail, L_end;
 5913   bool use64byteVector = (MaxVectorSize == 64) && (VM_Version::avx3_threshold() == 0);
 5914   if (use64byteVector) {
 5915     evpbroadcastq(xtmp, val, AVX_512bit);
 5916   } else if (MaxVectorSize >= 32) {
 5917     movdq(xtmp, val);
 5918     punpcklqdq(xtmp, xtmp);
 5919     vinserti128_high(xtmp, xtmp);
 5920   } else {
 5921     movdq(xtmp, val);
 5922     punpcklqdq(xtmp, xtmp);
 5923   }
 5924   jmp(L_zero_64_bytes);
 5925 
 5926   BIND(L_loop);
 5927   if (MaxVectorSize >= 32) {
 5928     fill64(base, 0, xtmp, use64byteVector);
 5929   } else {
 5930     movdqu(Address(base,  0), xtmp);
 5931     movdqu(Address(base, 16), xtmp);
 5932     movdqu(Address(base, 32), xtmp);
 5933     movdqu(Address(base, 48), xtmp);
 5934   }
 5935   addptr(base, 64);
 5936 
 5937   BIND(L_zero_64_bytes);
 5938   subptr(cnt, 8);
 5939   jccb(Assembler::greaterEqual, L_loop);
 5940 
 5941   // Copy trailing 64 bytes
 5942   if (use64byteVector) {
 5943     addptr(cnt, 8);
 5944     jccb(Assembler::equal, L_end);
 5945     fill64_masked(3, base, 0, xtmp, mask, cnt, val, true);
 5946     jmp(L_end);
 5947   } else {
 5948     addptr(cnt, 4);
 5949     jccb(Assembler::less, L_tail);
 5950     if (MaxVectorSize >= 32) {
 5951       vmovdqu(Address(base, 0), xtmp);
 5952     } else {
 5953       movdqu(Address(base,  0), xtmp);
 5954       movdqu(Address(base, 16), xtmp);
 5955     }
 5956   }
 5957   addptr(base, 32);
 5958   subptr(cnt, 4);
 5959 
 5960   BIND(L_tail);
 5961   addptr(cnt, 4);
 5962   jccb(Assembler::lessEqual, L_end);
 5963   if (UseAVX > 2 && MaxVectorSize >= 32 && VM_Version::supports_avx512vl()) {
 5964     fill32_masked(3, base, 0, xtmp, mask, cnt, val);
 5965   } else {
 5966     decrement(cnt);
 5967 
 5968     BIND(L_sloop);
 5969     movq(Address(base, 0), xtmp);
 5970     addptr(base, 8);
 5971     decrement(cnt);
 5972     jccb(Assembler::greaterEqual, L_sloop);
 5973   }
 5974   BIND(L_end);
 5975 }
 5976 
 5977 int MacroAssembler::store_inline_type_fields_to_buf(ciInlineKlass* vk, bool from_interpreter) {
 5978   assert(InlineTypeReturnedAsFields, "Inline types should never be returned as fields");
 5979   // An inline type might be returned. If fields are in registers we
 5980   // need to allocate an inline type instance and initialize it with
 5981   // the value of the fields.
 5982   Label skip;
 5983   // We only need a new buffered inline type if a new one is not returned
 5984   testptr(rax, 1);
 5985   jcc(Assembler::zero, skip);
 5986   int call_offset = -1;
 5987 
 5988 #ifdef _LP64
 5989   // The following code is similar to allocate_instance but has some slight differences,
 5990   // e.g. object size is always not zero, sometimes it's constant; storing klass ptr after
 5991   // allocating is not necessary if vk != nullptr, etc. allocate_instance is not aware of these.
 5992   Label slow_case;
 5993   // 1. Try to allocate a new buffered inline instance either from TLAB or eden space
 5994   mov(rscratch1, rax); // save rax for slow_case since *_allocate may corrupt it when allocation failed
 5995   if (vk != nullptr) {
 5996     // Called from C1, where the return type is statically known.
 5997     movptr(rbx, (intptr_t)vk->get_InlineKlass());
 5998     jint lh = vk->layout_helper();
 5999     assert(lh != Klass::_lh_neutral_value, "inline class in return type must have been resolved");
 6000     if (UseTLAB && !Klass::layout_helper_needs_slow_path(lh)) {
 6001       tlab_allocate(rax, noreg, lh, r13, r14, slow_case);
 6002     } else {
 6003       jmp(slow_case);
 6004     }
 6005   } else {
 6006     // Call from interpreter. RAX contains ((the InlineKlass* of the return type) | 0x01)
 6007     mov(rbx, rax);
 6008     andptr(rbx, -2);
 6009     if (UseTLAB) {
 6010       movl(r14, Address(rbx, Klass::layout_helper_offset()));
 6011       testl(r14, Klass::_lh_instance_slow_path_bit);
 6012       jcc(Assembler::notZero, slow_case);
 6013       tlab_allocate(rax, r14, 0, r13, r14, slow_case);
 6014     } else {
 6015       jmp(slow_case);
 6016     }
 6017   }
 6018   if (UseTLAB) {
 6019     // 2. Initialize buffered inline instance header
 6020     Register buffer_obj = rax;
 6021     if (UseCompactObjectHeaders) {
 6022       Register mark_word = r13;
 6023       movptr(mark_word, Address(rbx, Klass::prototype_header_offset()));
 6024       movptr(Address(buffer_obj, oopDesc::mark_offset_in_bytes ()), mark_word);
 6025     } else {
 6026       movptr(Address(buffer_obj, oopDesc::mark_offset_in_bytes()), (intptr_t)markWord::inline_type_prototype().value());
 6027       xorl(r13, r13);
 6028       store_klass_gap(buffer_obj, r13);
 6029       if (vk == nullptr) {
 6030         // store_klass corrupts rbx(klass), so save it in r13 for later use (interpreter case only).
 6031         mov(r13, rbx);
 6032       }
 6033       store_klass(buffer_obj, rbx, rscratch1);
 6034     }
 6035     // 3. Initialize its fields with an inline class specific handler
 6036     if (vk != nullptr) {
 6037       call(RuntimeAddress(vk->pack_handler())); // no need for call info as this will not safepoint.
 6038     } else {
 6039       movptr(rbx, Address(r13, InstanceKlass::adr_inlineklass_fixed_block_offset()));
 6040       movptr(rbx, Address(rbx, InlineKlass::pack_handler_offset()));
 6041       call(rbx);
 6042     }
 6043     jmp(skip);
 6044   }
 6045   bind(slow_case);
 6046   // We failed to allocate a new inline type, fall back to a runtime
 6047   // call. Some oop field may be live in some registers but we can't
 6048   // tell. That runtime call will take care of preserving them
 6049   // across a GC if there's one.
 6050   mov(rax, rscratch1);
 6051 #endif
 6052 
 6053   if (from_interpreter) {
 6054     super_call_VM_leaf(StubRoutines::store_inline_type_fields_to_buf());
 6055   } else {
 6056     call(RuntimeAddress(StubRoutines::store_inline_type_fields_to_buf()));
 6057     call_offset = offset();
 6058   }
 6059 
 6060   bind(skip);
 6061   return call_offset;
 6062 }
 6063 
 6064 // Move a value between registers/stack slots and update the reg_state
 6065 bool MacroAssembler::move_helper(VMReg from, VMReg to, BasicType bt, RegState reg_state[]) {
 6066   assert(from->is_valid() && to->is_valid(), "source and destination must be valid");
 6067   if (reg_state[to->value()] == reg_written) {
 6068     return true; // Already written
 6069   }
 6070   if (from != to && bt != T_VOID) {
 6071     if (reg_state[to->value()] == reg_readonly) {
 6072       return false; // Not yet writable
 6073     }
 6074     if (from->is_reg()) {
 6075       if (to->is_reg()) {
 6076         if (from->is_XMMRegister()) {
 6077           if (bt == T_DOUBLE) {
 6078             movdbl(to->as_XMMRegister(), from->as_XMMRegister());
 6079           } else {
 6080             assert(bt == T_FLOAT, "must be float");
 6081             movflt(to->as_XMMRegister(), from->as_XMMRegister());
 6082           }
 6083         } else {
 6084           movq(to->as_Register(), from->as_Register());
 6085         }
 6086       } else {
 6087         int st_off = to->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
 6088         Address to_addr = Address(rsp, st_off);
 6089         if (from->is_XMMRegister()) {
 6090           if (bt == T_DOUBLE) {
 6091             movdbl(to_addr, from->as_XMMRegister());
 6092           } else {
 6093             assert(bt == T_FLOAT, "must be float");
 6094             movflt(to_addr, from->as_XMMRegister());
 6095           }
 6096         } else {
 6097           movq(to_addr, from->as_Register());
 6098         }
 6099       }
 6100     } else {
 6101       Address from_addr = Address(rsp, from->reg2stack() * VMRegImpl::stack_slot_size + wordSize);
 6102       if (to->is_reg()) {
 6103         if (to->is_XMMRegister()) {
 6104           if (bt == T_DOUBLE) {
 6105             movdbl(to->as_XMMRegister(), from_addr);
 6106           } else {
 6107             assert(bt == T_FLOAT, "must be float");
 6108             movflt(to->as_XMMRegister(), from_addr);
 6109           }
 6110         } else {
 6111           movq(to->as_Register(), from_addr);
 6112         }
 6113       } else {
 6114         int st_off = to->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
 6115         movq(r13, from_addr);
 6116         movq(Address(rsp, st_off), r13);
 6117       }
 6118     }
 6119   }
 6120   // Update register states
 6121   reg_state[from->value()] = reg_writable;
 6122   reg_state[to->value()] = reg_written;
 6123   return true;
 6124 }
 6125 
 6126 // Calculate the extra stack space required for packing or unpacking inline
 6127 // args and adjust the stack pointer
 6128 int MacroAssembler::extend_stack_for_inline_args(int args_on_stack) {
 6129   // Two additional slots to account for return address
 6130   int sp_inc = (args_on_stack + 2) * VMRegImpl::stack_slot_size;
 6131   sp_inc = align_up(sp_inc, StackAlignmentInBytes);
 6132   // Save the return address, adjust the stack (make sure it is properly
 6133   // 16-byte aligned) and copy the return address to the new top of the stack.
 6134   // The stack will be repaired on return (see MacroAssembler::remove_frame).
 6135   assert(sp_inc > 0, "sanity");
 6136   pop(r13);
 6137   subptr(rsp, sp_inc);
 6138   push(r13);
 6139   return sp_inc;
 6140 }
 6141 
 6142 // Read all fields from an inline type buffer and store the field values in registers/stack slots.
 6143 bool MacroAssembler::unpack_inline_helper(const GrowableArray<SigEntry>* sig, int& sig_index,
 6144                                           VMReg from, int& from_index, VMRegPair* to, int to_count, int& to_index,
 6145                                           RegState reg_state[]) {
 6146   assert(sig->at(sig_index)._bt == T_VOID, "should be at end delimiter");
 6147   assert(from->is_valid(), "source must be valid");
 6148   bool progress = false;
 6149 #ifdef ASSERT
 6150   const int start_offset = offset();
 6151 #endif
 6152 
 6153   Label L_null, L_notNull;
 6154   // Don't use r14 as tmp because it's used for spilling (see MacroAssembler::spill_reg_for)
 6155   Register tmp1 = r10;
 6156   Register tmp2 = r13;
 6157   Register fromReg = noreg;
 6158   ScalarizedInlineArgsStream stream(sig, sig_index, to, to_count, to_index, -1);
 6159   bool done = true;
 6160   bool mark_done = true;
 6161   VMReg toReg;
 6162   BasicType bt;
 6163   // Check if argument requires a null check
 6164   bool null_check = false;
 6165   VMReg nullCheckReg;
 6166   while (stream.next(nullCheckReg, bt)) {
 6167     if (sig->at(stream.sig_index())._offset == -1) {
 6168       null_check = true;
 6169       break;
 6170     }
 6171   }
 6172   stream.reset(sig_index, to_index);
 6173   while (stream.next(toReg, bt)) {
 6174     assert(toReg->is_valid(), "destination must be valid");
 6175     int idx = (int)toReg->value();
 6176     if (reg_state[idx] == reg_readonly) {
 6177       if (idx != from->value()) {
 6178         mark_done = false;
 6179       }
 6180       done = false;
 6181       continue;
 6182     } else if (reg_state[idx] == reg_written) {
 6183       continue;
 6184     }
 6185     assert(reg_state[idx] == reg_writable, "must be writable");
 6186     reg_state[idx] = reg_written;
 6187     progress = true;
 6188 
 6189     if (fromReg == noreg) {
 6190       if (from->is_reg()) {
 6191         fromReg = from->as_Register();
 6192       } else {
 6193         int st_off = from->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
 6194         movq(tmp1, Address(rsp, st_off));
 6195         fromReg = tmp1;
 6196       }
 6197       if (null_check) {
 6198         // Nullable inline type argument, emit null check
 6199         testptr(fromReg, fromReg);
 6200         jcc(Assembler::zero, L_null);
 6201       }
 6202     }
 6203     int off = sig->at(stream.sig_index())._offset;
 6204     if (off == -1) {
 6205       assert(null_check, "Missing null check at");
 6206       if (toReg->is_stack()) {
 6207         int st_off = toReg->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
 6208         movq(Address(rsp, st_off), 1);
 6209       } else {
 6210         movq(toReg->as_Register(), 1);
 6211       }
 6212       continue;
 6213     }
 6214     assert(off > 0, "offset in object should be positive");
 6215     Address fromAddr = Address(fromReg, off);
 6216     if (!toReg->is_XMMRegister()) {
 6217       Register dst = toReg->is_stack() ? tmp2 : toReg->as_Register();
 6218       if (is_reference_type(bt)) {
 6219         load_heap_oop(dst, fromAddr);
 6220       } else {
 6221         bool is_signed = (bt != T_CHAR) && (bt != T_BOOLEAN);
 6222         load_sized_value(dst, fromAddr, type2aelembytes(bt), is_signed);
 6223       }
 6224       if (toReg->is_stack()) {
 6225         int st_off = toReg->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
 6226         movq(Address(rsp, st_off), dst);
 6227       }
 6228     } else if (bt == T_DOUBLE) {
 6229       movdbl(toReg->as_XMMRegister(), fromAddr);
 6230     } else {
 6231       assert(bt == T_FLOAT, "must be float");
 6232       movflt(toReg->as_XMMRegister(), fromAddr);
 6233     }
 6234   }
 6235   if (progress && null_check) {
 6236     if (done) {
 6237       jmp(L_notNull);
 6238       bind(L_null);
 6239       // Set null marker to zero to signal that the argument is null.
 6240       // Also set all oop fields to zero to make the GC happy.
 6241       stream.reset(sig_index, to_index);
 6242       while (stream.next(toReg, bt)) {
 6243         if (sig->at(stream.sig_index())._offset == -1 ||
 6244             bt == T_OBJECT || bt == T_ARRAY) {
 6245           if (toReg->is_stack()) {
 6246             int st_off = toReg->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
 6247             movq(Address(rsp, st_off), 0);
 6248           } else {
 6249             xorq(toReg->as_Register(), toReg->as_Register());
 6250           }
 6251         }
 6252       }
 6253       bind(L_notNull);
 6254     } else {
 6255       bind(L_null);
 6256     }
 6257   }
 6258 
 6259   sig_index = stream.sig_index();
 6260   to_index = stream.regs_index();
 6261 
 6262   if (mark_done && reg_state[from->value()] != reg_written) {
 6263     // This is okay because no one else will write to that slot
 6264     reg_state[from->value()] = reg_writable;
 6265   }
 6266   from_index--;
 6267   assert(progress || (start_offset == offset()), "should not emit code");
 6268   return done;
 6269 }
 6270 
 6271 bool MacroAssembler::pack_inline_helper(const GrowableArray<SigEntry>* sig, int& sig_index, int vtarg_index,
 6272                                         VMRegPair* from, int from_count, int& from_index, VMReg to,
 6273                                         RegState reg_state[], Register val_array) {
 6274   assert(sig->at(sig_index)._bt == T_METADATA, "should be at delimiter");
 6275   assert(to->is_valid(), "destination must be valid");
 6276 
 6277   if (reg_state[to->value()] == reg_written) {
 6278     skip_unpacked_fields(sig, sig_index, from, from_count, from_index);
 6279     return true; // Already written
 6280   }
 6281 
 6282   // TODO 8284443 Isn't it an issue if below code uses r14 as tmp when it contains a spilled value?
 6283   // Be careful with r14 because it's used for spilling (see MacroAssembler::spill_reg_for).
 6284   Register val_obj_tmp = r11;
 6285   Register from_reg_tmp = r14;
 6286   Register tmp1 = r10;
 6287   Register tmp2 = r13;
 6288   Register tmp3 = rbx;
 6289   Register val_obj = to->is_stack() ? val_obj_tmp : to->as_Register();
 6290 
 6291   assert_different_registers(val_obj_tmp, from_reg_tmp, tmp1, tmp2, tmp3, val_array);
 6292 
 6293   if (reg_state[to->value()] == reg_readonly) {
 6294     if (!is_reg_in_unpacked_fields(sig, sig_index, to, from, from_count, from_index)) {
 6295       skip_unpacked_fields(sig, sig_index, from, from_count, from_index);
 6296       return false; // Not yet writable
 6297     }
 6298     val_obj = val_obj_tmp;
 6299   }
 6300 
 6301   int index = arrayOopDesc::base_offset_in_bytes(T_OBJECT) + vtarg_index * type2aelembytes(T_OBJECT);
 6302   load_heap_oop(val_obj, Address(val_array, index));
 6303 
 6304   ScalarizedInlineArgsStream stream(sig, sig_index, from, from_count, from_index);
 6305   VMReg fromReg;
 6306   BasicType bt;
 6307   Label L_null;
 6308   while (stream.next(fromReg, bt)) {
 6309     assert(fromReg->is_valid(), "source must be valid");
 6310     reg_state[fromReg->value()] = reg_writable;
 6311 
 6312     int off = sig->at(stream.sig_index())._offset;
 6313     if (off == -1) {
 6314       // Nullable inline type argument, emit null check
 6315       Label L_notNull;
 6316       if (fromReg->is_stack()) {
 6317         int ld_off = fromReg->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
 6318         testb(Address(rsp, ld_off), 1);
 6319       } else {
 6320         testb(fromReg->as_Register(), 1);
 6321       }
 6322       jcc(Assembler::notZero, L_notNull);
 6323       movptr(val_obj, 0);
 6324       jmp(L_null);
 6325       bind(L_notNull);
 6326       continue;
 6327     }
 6328 
 6329     assert(off > 0, "offset in object should be positive");
 6330     size_t size_in_bytes = is_java_primitive(bt) ? type2aelembytes(bt) : wordSize;
 6331 
 6332     Address dst(val_obj, off);
 6333     if (!fromReg->is_XMMRegister()) {
 6334       Register src;
 6335       if (fromReg->is_stack()) {
 6336         src = from_reg_tmp;
 6337         int ld_off = fromReg->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
 6338         load_sized_value(src, Address(rsp, ld_off), size_in_bytes, /* is_signed */ false);
 6339       } else {
 6340         src = fromReg->as_Register();
 6341       }
 6342       assert_different_registers(dst.base(), src, tmp1, tmp2, tmp3, val_array);
 6343       if (is_reference_type(bt)) {
 6344         store_heap_oop(dst, src, tmp1, tmp2, tmp3, IN_HEAP | ACCESS_WRITE | IS_DEST_UNINITIALIZED);
 6345       } else {
 6346         store_sized_value(dst, src, size_in_bytes);
 6347       }
 6348     } else if (bt == T_DOUBLE) {
 6349       movdbl(dst, fromReg->as_XMMRegister());
 6350     } else {
 6351       assert(bt == T_FLOAT, "must be float");
 6352       movflt(dst, fromReg->as_XMMRegister());
 6353     }
 6354   }
 6355   bind(L_null);
 6356   sig_index = stream.sig_index();
 6357   from_index = stream.regs_index();
 6358 
 6359   assert(reg_state[to->value()] == reg_writable, "must have already been read");
 6360   bool success = move_helper(val_obj->as_VMReg(), to, T_OBJECT, reg_state);
 6361   assert(success, "to register must be writeable");
 6362   return true;
 6363 }
 6364 
 6365 VMReg MacroAssembler::spill_reg_for(VMReg reg) {
 6366   return reg->is_XMMRegister() ? xmm8->as_VMReg() : r14->as_VMReg();
 6367 }
 6368 
 6369 void MacroAssembler::remove_frame(int initial_framesize, bool needs_stack_repair) {
 6370   assert((initial_framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 6371   if (needs_stack_repair) {
 6372     movq(rbp, Address(rsp, initial_framesize));
 6373     // The stack increment resides just below the saved rbp
 6374     addq(rsp, Address(rsp, initial_framesize - wordSize));
 6375   } else {
 6376     if (initial_framesize > 0) {
 6377       addq(rsp, initial_framesize);
 6378     }
 6379     pop(rbp);
 6380   }
 6381 }
 6382 
 6383 // Clearing constant sized memory using YMM/ZMM registers.
 6384 void MacroAssembler::clear_mem(Register base, int cnt, Register rtmp, XMMRegister xtmp, KRegister mask) {
 6385   assert(UseAVX > 2 && VM_Version::supports_avx512vl(), "");
 6386   bool use64byteVector = (MaxVectorSize > 32) && (VM_Version::avx3_threshold() == 0);
 6387 
 6388   int vector64_count = (cnt & (~0x7)) >> 3;
 6389   cnt = cnt & 0x7;
 6390   const int fill64_per_loop = 4;
 6391   const int max_unrolled_fill64 = 8;
 6392 
 6393   // 64 byte initialization loop.
 6394   vpxor(xtmp, xtmp, xtmp, use64byteVector ? AVX_512bit : AVX_256bit);
 6395   int start64 = 0;
 6396   if (vector64_count > max_unrolled_fill64) {
 6397     Label LOOP;
 6398     Register index = rtmp;
 6399 
 6400     start64 = vector64_count - (vector64_count % fill64_per_loop);
 6401 
 6402     movl(index, 0);

 6452         break;
 6453       case 7:
 6454         if (use64byteVector) {
 6455           movl(rtmp, 0x7F);
 6456           kmovwl(mask, rtmp);
 6457           evmovdqu(T_LONG, mask, Address(base, disp), xtmp, true, Assembler::AVX_512bit);
 6458         } else {
 6459           evmovdqu(T_LONG, k0, Address(base, disp), xtmp, false, Assembler::AVX_256bit);
 6460           movl(rtmp, 0x7);
 6461           kmovwl(mask, rtmp);
 6462           evmovdqu(T_LONG, mask, Address(base, disp + 32), xtmp, true, Assembler::AVX_256bit);
 6463         }
 6464         break;
 6465       default:
 6466         fatal("Unexpected length : %d\n",cnt);
 6467         break;
 6468     }
 6469   }
 6470 }
 6471 
 6472 void MacroAssembler::clear_mem(Register base, Register cnt, Register val, XMMRegister xtmp,
 6473                                bool is_large, bool word_copy_only, KRegister mask) {
 6474   // cnt      - number of qwords (8-byte words).
 6475   // base     - start address, qword aligned.
 6476   // is_large - if optimizers know cnt is larger than InitArrayShortSize
 6477   assert(base==rdi, "base register must be edi for rep stos");
 6478   assert(val==rax,   "val register must be eax for rep stos");
 6479   assert(cnt==rcx,   "cnt register must be ecx for rep stos");
 6480   assert(InitArrayShortSize % BytesPerLong == 0,
 6481     "InitArrayShortSize should be the multiple of BytesPerLong");
 6482 
 6483   Label DONE;



 6484 
 6485   if (!is_large) {
 6486     Label LOOP, LONG;
 6487     cmpptr(cnt, InitArrayShortSize/BytesPerLong);
 6488     jccb(Assembler::greater, LONG);
 6489 
 6490     decrement(cnt);
 6491     jccb(Assembler::negative, DONE); // Zero length
 6492 
 6493     // Use individual pointer-sized stores for small counts:
 6494     BIND(LOOP);
 6495     movptr(Address(base, cnt, Address::times_ptr), val);
 6496     decrement(cnt);
 6497     jccb(Assembler::greaterEqual, LOOP);
 6498     jmpb(DONE);
 6499 
 6500     BIND(LONG);
 6501   }
 6502 
 6503   // Use longer rep-prefixed ops for non-small counts:
 6504   if (UseFastStosb && !word_copy_only) {
 6505     shlptr(cnt, 3); // convert to number of bytes
 6506     rep_stosb();
 6507   } else if (UseXMMForObjInit) {
 6508     xmm_clear_mem(base, cnt, val, xtmp, mask);
 6509   } else {
 6510     rep_stos();
 6511   }
 6512 
 6513   BIND(DONE);
 6514 }
 6515 
 6516 #endif //COMPILER2_OR_JVMCI
 6517 
 6518 
 6519 void MacroAssembler::generate_fill(BasicType t, bool aligned,
 6520                                    Register to, Register value, Register count,
 6521                                    Register rtmp, XMMRegister xtmp) {
 6522   ShortBranchVerifier sbv(this);
 6523   assert_different_registers(to, value, count, rtmp);
 6524   Label L_exit;
 6525   Label L_fill_2_bytes, L_fill_4_bytes;
 6526 
 6527 #if defined(COMPILER2)
 6528   if(MaxVectorSize >=32 &&

10345 
10346   // Load top.
10347   movl(top, Address(thread, JavaThread::lock_stack_top_offset()));
10348 
10349   // Check if the lock-stack is full.
10350   cmpl(top, LockStack::end_offset());
10351   jcc(Assembler::greaterEqual, slow);
10352 
10353   // Check for recursion.
10354   cmpptr(obj, Address(thread, top, Address::times_1, -oopSize));
10355   jcc(Assembler::equal, push);
10356 
10357   // Check header for monitor (0b10).
10358   testptr(reg_rax, markWord::monitor_value);
10359   jcc(Assembler::notZero, slow);
10360 
10361   // Try to lock. Transition lock bits 0b01 => 0b00
10362   movptr(tmp, reg_rax);
10363   andptr(tmp, ~(int32_t)markWord::unlocked_value);
10364   orptr(reg_rax, markWord::unlocked_value);
10365   if (EnableValhalla) {
10366     // Mask inline_type bit such that we go to the slow path if object is an inline type
10367     andptr(reg_rax, ~((int) markWord::inline_type_bit_in_place));
10368   }
10369   lock(); cmpxchgptr(tmp, Address(obj, oopDesc::mark_offset_in_bytes()));
10370   jcc(Assembler::notEqual, slow);
10371 
10372   // Restore top, CAS clobbers register.
10373   movl(top, Address(thread, JavaThread::lock_stack_top_offset()));
10374 
10375   bind(push);
10376   // After successful lock, push object on lock-stack.
10377   movptr(Address(thread, top), obj);
10378   incrementl(top, oopSize);
10379   movl(Address(thread, JavaThread::lock_stack_top_offset()), top);
10380 }
10381 
10382 // Implements lightweight-unlocking.
10383 //
10384 // obj: the object to be unlocked
10385 // reg_rax: rax
10386 // thread: the thread
10387 // tmp: a temporary register
10388 void MacroAssembler::lightweight_unlock(Register obj, Register reg_rax, Register tmp, Label& slow) {
< prev index next >