11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "asm/assembler.hpp"
26 #include "asm/assembler.inline.hpp"
27 #include "code/aotCodeCache.hpp"
28 #include "code/compiledIC.hpp"
29 #include "compiler/compiler_globals.hpp"
30 #include "compiler/disassembler.hpp"
31 #include "crc32c.h"
32 #include "gc/shared/barrierSet.hpp"
33 #include "gc/shared/barrierSetAssembler.hpp"
34 #include "gc/shared/collectedHeap.inline.hpp"
35 #include "gc/shared/tlab_globals.hpp"
36 #include "interpreter/bytecodeHistogram.hpp"
37 #include "interpreter/interpreter.hpp"
38 #include "interpreter/interpreterRuntime.hpp"
39 #include "jvm.h"
40 #include "memory/resourceArea.hpp"
41 #include "memory/universe.hpp"
42 #include "oops/accessDecorators.hpp"
43 #include "oops/compressedKlass.inline.hpp"
44 #include "oops/compressedOops.inline.hpp"
45 #include "oops/klass.inline.hpp"
46 #include "prims/methodHandles.hpp"
47 #include "runtime/continuation.hpp"
48 #include "runtime/interfaceSupport.inline.hpp"
49 #include "runtime/javaThread.hpp"
50 #include "runtime/jniHandles.hpp"
51 #include "runtime/objectMonitor.hpp"
52 #include "runtime/os.hpp"
53 #include "runtime/safepoint.hpp"
54 #include "runtime/safepointMechanism.hpp"
55 #include "runtime/sharedRuntime.hpp"
56 #include "runtime/stubRoutines.hpp"
57 #include "utilities/checkedCast.hpp"
58 #include "utilities/macros.hpp"
59
60 #ifdef PRODUCT
61 #define BLOCK_COMMENT(str) /* nothing */
62 #define STOP(error) stop(error)
63 #else
64 #define BLOCK_COMMENT(str) block_comment(str)
65 #define STOP(error) block_comment(error); stop(error)
66 #endif
67
68 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
69
70 #ifdef ASSERT
71 bool AbstractAssembler::pd_check_instruction_mark() { return true; }
72 #endif
73
74 static const Assembler::Condition reverse[] = {
75 Assembler::noOverflow /* overflow = 0x0 */ ,
76 Assembler::overflow /* noOverflow = 0x1 */ ,
77 Assembler::aboveEqual /* carrySet = 0x2, below = 0x2 */ ,
78 Assembler::below /* aboveEqual = 0x3, carryClear = 0x3 */ ,
1286 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
1287 assert_different_registers(arg_0, c_rarg1, c_rarg2);
1288 assert_different_registers(arg_1, c_rarg2);
1289 pass_arg2(this, arg_2);
1290 pass_arg1(this, arg_1);
1291 pass_arg0(this, arg_0);
1292 call_VM_leaf(entry_point, 3);
1293 }
1294
1295 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) {
1296 assert_different_registers(arg_0, c_rarg1, c_rarg2, c_rarg3);
1297 assert_different_registers(arg_1, c_rarg2, c_rarg3);
1298 assert_different_registers(arg_2, c_rarg3);
1299 pass_arg3(this, arg_3);
1300 pass_arg2(this, arg_2);
1301 pass_arg1(this, arg_1);
1302 pass_arg0(this, arg_0);
1303 call_VM_leaf(entry_point, 3);
1304 }
1305
1306 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {
1307 pass_arg0(this, arg_0);
1308 MacroAssembler::call_VM_leaf_base(entry_point, 1);
1309 }
1310
1311 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
1312 assert_different_registers(arg_0, c_rarg1);
1313 pass_arg1(this, arg_1);
1314 pass_arg0(this, arg_0);
1315 MacroAssembler::call_VM_leaf_base(entry_point, 2);
1316 }
1317
1318 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
1319 assert_different_registers(arg_0, c_rarg1, c_rarg2);
1320 assert_different_registers(arg_1, c_rarg2);
1321 pass_arg2(this, arg_2);
1322 pass_arg1(this, arg_1);
1323 pass_arg0(this, arg_0);
1324 MacroAssembler::call_VM_leaf_base(entry_point, 3);
1325 }
2339 lea(rscratch, src);
2340 Assembler::mulss(dst, Address(rscratch, 0));
2341 }
2342 }
2343
2344 void MacroAssembler::null_check(Register reg, int offset) {
2345 if (needs_explicit_null_check(offset)) {
2346 // provoke OS null exception if reg is null by
2347 // accessing M[reg] w/o changing any (non-CC) registers
2348 // NOTE: cmpl is plenty here to provoke a segv
2349 cmpptr(rax, Address(reg, 0));
2350 // Note: should probably use testl(rax, Address(reg, 0));
2351 // may be shorter code (however, this version of
2352 // testl needs to be implemented first)
2353 } else {
2354 // nothing to do, (later) access of M[reg + offset]
2355 // will provoke OS null exception if reg is null
2356 }
2357 }
2358
2359 void MacroAssembler::os_breakpoint() {
2360 // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
2361 // (e.g., MSVC can't call ps() otherwise)
2362 call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
2363 }
2364
2365 void MacroAssembler::unimplemented(const char* what) {
2366 const char* buf = nullptr;
2367 {
2368 ResourceMark rm;
2369 stringStream ss;
2370 ss.print("unimplemented: %s", what);
2371 buf = code_string(ss.as_string());
2372 }
2373 stop(buf);
2374 }
2375
2376 #define XSTATE_BV 0x200
2377
2378 void MacroAssembler::pop_CPU_state() {
3421 }
3422
3423 // C++ bool manipulation
3424 void MacroAssembler::testbool(Register dst) {
3425 if(sizeof(bool) == 1)
3426 testb(dst, 0xff);
3427 else if(sizeof(bool) == 2) {
3428 // testw implementation needed for two byte bools
3429 ShouldNotReachHere();
3430 } else if(sizeof(bool) == 4)
3431 testl(dst, dst);
3432 else
3433 // unsupported
3434 ShouldNotReachHere();
3435 }
3436
3437 void MacroAssembler::testptr(Register dst, Register src) {
3438 testq(dst, src);
3439 }
3440
3441 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
3442 void MacroAssembler::tlab_allocate(Register obj,
3443 Register var_size_in_bytes,
3444 int con_size_in_bytes,
3445 Register t1,
3446 Register t2,
3447 Label& slow_case) {
3448 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
3449 bs->tlab_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case);
3450 }
3451
3452 RegSet MacroAssembler::call_clobbered_gp_registers() {
3453 RegSet regs;
3454 regs += RegSet::of(rax, rcx, rdx);
3455 #ifndef _WINDOWS
3456 regs += RegSet::of(rsi, rdi);
3457 #endif
3458 regs += RegSet::range(r8, r11);
3459 if (UseAPX) {
3460 regs += RegSet::range(r16, as_Register(Register::number_of_registers - 1));
3624 xorptr(temp, temp); // use _zero reg to clear memory (shorter code)
3625 if (UseIncDec) {
3626 shrptr(index, 3); // divide by 8/16 and set carry flag if bit 2 was set
3627 } else {
3628 shrptr(index, 2); // use 2 instructions to avoid partial flag stall
3629 shrptr(index, 1);
3630 }
3631
3632 // initialize remaining object fields: index is a multiple of 2 now
3633 {
3634 Label loop;
3635 bind(loop);
3636 movptr(Address(address, index, Address::times_8, offset_in_bytes - 1*BytesPerWord), temp);
3637 decrement(index);
3638 jcc(Assembler::notZero, loop);
3639 }
3640
3641 bind(done);
3642 }
3643
3644 // Look up the method for a megamorphic invokeinterface call.
3645 // The target method is determined by <intf_klass, itable_index>.
3646 // The receiver klass is in recv_klass.
3647 // On success, the result will be in method_result, and execution falls through.
3648 // On failure, execution transfers to the given label.
3649 void MacroAssembler::lookup_interface_method(Register recv_klass,
3650 Register intf_klass,
3651 RegisterOrConstant itable_index,
3652 Register method_result,
3653 Register scan_temp,
3654 Label& L_no_such_interface,
3655 bool return_method) {
3656 assert_different_registers(recv_klass, intf_klass, scan_temp);
3657 assert_different_registers(method_result, intf_klass, scan_temp);
3658 assert(recv_klass != method_result || !return_method,
3659 "recv_klass can be destroyed when method isn't needed");
3660
3661 assert(itable_index.is_constant() || itable_index.as_register() == method_result,
3662 "caller must use same register for non-constant itable index as for method");
3663
3892
3893 // Hacked jcc, which "knows" that L_fallthrough, at least, is in
3894 // range of a jccb. If this routine grows larger, reconsider at
3895 // least some of these.
3896 #define local_jcc(assembler_cond, label) \
3897 if (&(label) == &L_fallthrough) jccb(assembler_cond, label); \
3898 else jcc( assembler_cond, label) /*omit semi*/
3899
3900 // Hacked jmp, which may only be used just before L_fallthrough.
3901 #define final_jmp(label) \
3902 if (&(label) == &L_fallthrough) { /*do nothing*/ } \
3903 else jmp(label) /*omit semi*/
3904
3905 // If the pointers are equal, we are done (e.g., String[] elements).
3906 // This self-check enables sharing of secondary supertype arrays among
3907 // non-primary types such as array-of-interface. Otherwise, each such
3908 // type would need its own customized SSA.
3909 // We move this check to the front of the fast path because many
3910 // type checks are in fact trivially successful in this manner,
3911 // so we get a nicely predicted branch right at the start of the check.
3912 cmpptr(sub_klass, super_klass);
3913 local_jcc(Assembler::equal, *L_success);
3914
3915 // Check the supertype display:
3916 if (must_load_sco) {
3917 // Positive movl does right thing on LP64.
3918 movl(temp_reg, super_check_offset_addr);
3919 super_check_offset = RegisterOrConstant(temp_reg);
3920 }
3921 Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0);
3922 cmpptr(super_klass, super_check_addr); // load displayed supertype
3923
3924 // This check has worked decisively for primary supers.
3925 // Secondary supers are sought in the super_cache ('super_cache_addr').
3926 // (Secondary supers are interfaces and very deeply nested subtypes.)
3927 // This works in the same check above because of a tricky aliasing
3928 // between the super_cache and the primary super display elements.
3929 // (The 'super_check_addr' can address either, as the case requires.)
3930 // Note that the cache is updated below if it does not help us find
3931 // what we need immediately.
4674 } else {
4675 Label L;
4676 jccb(negate_condition(cc), L);
4677 movl(dst, src);
4678 bind(L);
4679 }
4680 }
4681
4682 void MacroAssembler::cmov32(Condition cc, Register dst, Register src) {
4683 if (VM_Version::supports_cmov()) {
4684 cmovl(cc, dst, src);
4685 } else {
4686 Label L;
4687 jccb(negate_condition(cc), L);
4688 movl(dst, src);
4689 bind(L);
4690 }
4691 }
4692
4693 void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) {
4694 if (!VerifyOops) return;
4695
4696 BLOCK_COMMENT("verify_oop {");
4697 push(rscratch1);
4698 push(rax); // save rax
4699 push(reg); // pass register argument
4700
4701 // Pass register number to verify_oop_subroutine
4702 const char* b = nullptr;
4703 {
4704 ResourceMark rm;
4705 stringStream ss;
4706 ss.print("verify_oop: %s: %s (%s:%d)", reg->name(), s, file, line);
4707 b = code_string(ss.as_string());
4708 }
4709 AddressLiteral buffer((address) b, external_word_Relocation::spec_for_immediate());
4710 pushptr(buffer.addr(), rscratch1);
4711
4712 // call indirectly to solve generation ordering problem
4713 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
4714 call(rax);
4733 // cf. TemplateTable::prepare_invoke(), if (load_receiver).
4734 int stackElementSize = Interpreter::stackElementSize;
4735 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
4736 #ifdef ASSERT
4737 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
4738 assert(offset1 - offset == stackElementSize, "correct arithmetic");
4739 #endif
4740 Register scale_reg = noreg;
4741 Address::ScaleFactor scale_factor = Address::no_scale;
4742 if (arg_slot.is_constant()) {
4743 offset += arg_slot.as_constant() * stackElementSize;
4744 } else {
4745 scale_reg = arg_slot.as_register();
4746 scale_factor = Address::times(stackElementSize);
4747 }
4748 offset += wordSize; // return PC is on stack
4749 return Address(rsp, scale_reg, scale_factor, offset);
4750 }
4751
4752 void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) {
4753 if (!VerifyOops) return;
4754
4755 push(rscratch1);
4756 push(rax); // save rax,
4757 // addr may contain rsp so we will have to adjust it based on the push
4758 // we just did (and on 64 bit we do two pushes)
4759 // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which
4760 // stores rax into addr which is backwards of what was intended.
4761 if (addr.uses(rsp)) {
4762 lea(rax, addr);
4763 pushptr(Address(rax, 2 * BytesPerWord));
4764 } else {
4765 pushptr(addr);
4766 }
4767
4768 // Pass register number to verify_oop_subroutine
4769 const char* b = nullptr;
4770 {
4771 ResourceMark rm;
4772 stringStream ss;
4773 ss.print("verify_oop_addr: %s (%s:%d)", s, file, line);
5127
5128 void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) {
5129 // get mirror
5130 const int mirror_offset = in_bytes(Klass::java_mirror_offset());
5131 load_method_holder(mirror, method);
5132 movptr(mirror, Address(mirror, mirror_offset));
5133 resolve_oop_handle(mirror, tmp);
5134 }
5135
5136 void MacroAssembler::load_method_holder_cld(Register rresult, Register rmethod) {
5137 load_method_holder(rresult, rmethod);
5138 movptr(rresult, Address(rresult, InstanceKlass::class_loader_data_offset()));
5139 }
5140
5141 void MacroAssembler::load_method_holder(Register holder, Register method) {
5142 movptr(holder, Address(method, Method::const_offset())); // ConstMethod*
5143 movptr(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool*
5144 movptr(holder, Address(holder, ConstantPool::pool_holder_offset())); // InstanceKlass*
5145 }
5146
5147 void MacroAssembler::load_narrow_klass_compact(Register dst, Register src) {
5148 assert(UseCompactObjectHeaders, "expect compact object headers");
5149 movq(dst, Address(src, oopDesc::mark_offset_in_bytes()));
5150 shrq(dst, markWord::klass_shift);
5151 }
5152
5153 void MacroAssembler::load_klass(Register dst, Register src, Register tmp) {
5154 assert_different_registers(src, tmp);
5155 assert_different_registers(dst, tmp);
5156
5157 if (UseCompactObjectHeaders) {
5158 load_narrow_klass_compact(dst, src);
5159 decode_klass_not_null(dst, tmp);
5160 } else if (UseCompressedClassPointers) {
5161 movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
5162 decode_klass_not_null(dst, tmp);
5163 } else {
5164 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
5165 }
5166 }
5167
5168 void MacroAssembler::store_klass(Register dst, Register src, Register tmp) {
5169 assert(!UseCompactObjectHeaders, "not with compact headers");
5170 assert_different_registers(src, tmp);
5171 assert_different_registers(dst, tmp);
5172 if (UseCompressedClassPointers) {
5173 encode_klass_not_null(src, tmp);
5174 movl(Address(dst, oopDesc::klass_offset_in_bytes()), src);
5175 } else {
5176 movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src);
5177 }
5178 }
5179
5180 void MacroAssembler::cmp_klass(Register klass, Register obj, Register tmp) {
5181 if (UseCompactObjectHeaders) {
5182 assert(tmp != noreg, "need tmp");
5183 assert_different_registers(klass, obj, tmp);
5184 load_narrow_klass_compact(tmp, obj);
5185 cmpl(klass, tmp);
5186 } else if (UseCompressedClassPointers) {
5187 cmpl(klass, Address(obj, oopDesc::klass_offset_in_bytes()));
5213 bool as_raw = (decorators & AS_RAW) != 0;
5214 if (as_raw) {
5215 bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1);
5216 } else {
5217 bs->load_at(this, decorators, type, dst, src, tmp1);
5218 }
5219 }
5220
5221 void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register val,
5222 Register tmp1, Register tmp2, Register tmp3) {
5223 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
5224 decorators = AccessInternal::decorator_fixup(decorators, type);
5225 bool as_raw = (decorators & AS_RAW) != 0;
5226 if (as_raw) {
5227 bs->BarrierSetAssembler::store_at(this, decorators, type, dst, val, tmp1, tmp2, tmp3);
5228 } else {
5229 bs->store_at(this, decorators, type, dst, val, tmp1, tmp2, tmp3);
5230 }
5231 }
5232
5233 void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, DecoratorSet decorators) {
5234 access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1);
5235 }
5236
5237 // Doesn't do verification, generates fixed size code
5238 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1, DecoratorSet decorators) {
5239 access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL | decorators, dst, src, tmp1);
5240 }
5241
5242 void MacroAssembler::store_heap_oop(Address dst, Register val, Register tmp1,
5243 Register tmp2, Register tmp3, DecoratorSet decorators) {
5244 access_store_at(T_OBJECT, IN_HEAP | decorators, dst, val, tmp1, tmp2, tmp3);
5245 }
5246
5247 // Used for storing nulls.
5248 void MacroAssembler::store_heap_oop_null(Address dst) {
5249 access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg, noreg);
5250 }
5251
5252 void MacroAssembler::store_klass_gap(Register dst, Register src) {
5572 Assembler::cmp_narrow_oop(dst, CompressedKlassPointers::encode(k), rspec);
5573 }
5574
5575 void MacroAssembler::reinit_heapbase() {
5576 if (UseCompressedOops) {
5577 if (Universe::heap() != nullptr) {
5578 if (CompressedOops::base() == nullptr) {
5579 MacroAssembler::xorptr(r12_heapbase, r12_heapbase);
5580 } else {
5581 mov64(r12_heapbase, (int64_t)CompressedOops::base());
5582 }
5583 } else {
5584 movptr(r12_heapbase, ExternalAddress(CompressedOops::base_addr()));
5585 }
5586 }
5587 }
5588
5589 #if COMPILER2_OR_JVMCI
5590
5591 // clear memory of size 'cnt' qwords, starting at 'base' using XMM/YMM/ZMM registers
5592 void MacroAssembler::xmm_clear_mem(Register base, Register cnt, Register rtmp, XMMRegister xtmp, KRegister mask) {
5593 // cnt - number of qwords (8-byte words).
5594 // base - start address, qword aligned.
5595 Label L_zero_64_bytes, L_loop, L_sloop, L_tail, L_end;
5596 bool use64byteVector = (MaxVectorSize == 64) && (VM_Version::avx3_threshold() == 0);
5597 if (use64byteVector) {
5598 vpxor(xtmp, xtmp, xtmp, AVX_512bit);
5599 } else if (MaxVectorSize >= 32) {
5600 vpxor(xtmp, xtmp, xtmp, AVX_256bit);
5601 } else {
5602 pxor(xtmp, xtmp);
5603 }
5604 jmp(L_zero_64_bytes);
5605
5606 BIND(L_loop);
5607 if (MaxVectorSize >= 32) {
5608 fill64(base, 0, xtmp, use64byteVector);
5609 } else {
5610 movdqu(Address(base, 0), xtmp);
5611 movdqu(Address(base, 16), xtmp);
5612 movdqu(Address(base, 32), xtmp);
5613 movdqu(Address(base, 48), xtmp);
5614 }
5615 addptr(base, 64);
5616
5617 BIND(L_zero_64_bytes);
5618 subptr(cnt, 8);
5619 jccb(Assembler::greaterEqual, L_loop);
5620
5621 // Copy trailing 64 bytes
5622 if (use64byteVector) {
5623 addptr(cnt, 8);
5624 jccb(Assembler::equal, L_end);
5625 fill64_masked(3, base, 0, xtmp, mask, cnt, rtmp, true);
5626 jmp(L_end);
5627 } else {
5628 addptr(cnt, 4);
5629 jccb(Assembler::less, L_tail);
5630 if (MaxVectorSize >= 32) {
5631 vmovdqu(Address(base, 0), xtmp);
5632 } else {
5633 movdqu(Address(base, 0), xtmp);
5634 movdqu(Address(base, 16), xtmp);
5635 }
5636 }
5637 addptr(base, 32);
5638 subptr(cnt, 4);
5639
5640 BIND(L_tail);
5641 addptr(cnt, 4);
5642 jccb(Assembler::lessEqual, L_end);
5643 if (UseAVX > 2 && MaxVectorSize >= 32 && VM_Version::supports_avx512vl()) {
5644 fill32_masked(3, base, 0, xtmp, mask, cnt, rtmp);
5645 } else {
5646 decrement(cnt);
5647
5648 BIND(L_sloop);
5649 movq(Address(base, 0), xtmp);
5650 addptr(base, 8);
5651 decrement(cnt);
5652 jccb(Assembler::greaterEqual, L_sloop);
5653 }
5654 BIND(L_end);
5655 }
5656
5657 // Clearing constant sized memory using YMM/ZMM registers.
5658 void MacroAssembler::clear_mem(Register base, int cnt, Register rtmp, XMMRegister xtmp, KRegister mask) {
5659 assert(UseAVX > 2 && VM_Version::supports_avx512vl(), "");
5660 bool use64byteVector = (MaxVectorSize > 32) && (VM_Version::avx3_threshold() == 0);
5661
5662 int vector64_count = (cnt & (~0x7)) >> 3;
5663 cnt = cnt & 0x7;
5664 const int fill64_per_loop = 4;
5665 const int max_unrolled_fill64 = 8;
5666
5667 // 64 byte initialization loop.
5668 vpxor(xtmp, xtmp, xtmp, use64byteVector ? AVX_512bit : AVX_256bit);
5669 int start64 = 0;
5670 if (vector64_count > max_unrolled_fill64) {
5671 Label LOOP;
5672 Register index = rtmp;
5673
5674 start64 = vector64_count - (vector64_count % fill64_per_loop);
5675
5676 movl(index, 0);
5726 break;
5727 case 7:
5728 if (use64byteVector) {
5729 movl(rtmp, 0x7F);
5730 kmovwl(mask, rtmp);
5731 evmovdqu(T_LONG, mask, Address(base, disp), xtmp, true, Assembler::AVX_512bit);
5732 } else {
5733 evmovdqu(T_LONG, k0, Address(base, disp), xtmp, false, Assembler::AVX_256bit);
5734 movl(rtmp, 0x7);
5735 kmovwl(mask, rtmp);
5736 evmovdqu(T_LONG, mask, Address(base, disp + 32), xtmp, true, Assembler::AVX_256bit);
5737 }
5738 break;
5739 default:
5740 fatal("Unexpected length : %d\n",cnt);
5741 break;
5742 }
5743 }
5744 }
5745
5746 void MacroAssembler::clear_mem(Register base, Register cnt, Register tmp, XMMRegister xtmp,
5747 bool is_large, KRegister mask) {
5748 // cnt - number of qwords (8-byte words).
5749 // base - start address, qword aligned.
5750 // is_large - if optimizers know cnt is larger than InitArrayShortSize
5751 assert(base==rdi, "base register must be edi for rep stos");
5752 assert(tmp==rax, "tmp register must be eax for rep stos");
5753 assert(cnt==rcx, "cnt register must be ecx for rep stos");
5754 assert(InitArrayShortSize % BytesPerLong == 0,
5755 "InitArrayShortSize should be the multiple of BytesPerLong");
5756
5757 Label DONE;
5758 if (!is_large || !UseXMMForObjInit) {
5759 xorptr(tmp, tmp);
5760 }
5761
5762 if (!is_large) {
5763 Label LOOP, LONG;
5764 cmpptr(cnt, InitArrayShortSize/BytesPerLong);
5765 jccb(Assembler::greater, LONG);
5766
5767 decrement(cnt);
5768 jccb(Assembler::negative, DONE); // Zero length
5769
5770 // Use individual pointer-sized stores for small counts:
5771 BIND(LOOP);
5772 movptr(Address(base, cnt, Address::times_ptr), tmp);
5773 decrement(cnt);
5774 jccb(Assembler::greaterEqual, LOOP);
5775 jmpb(DONE);
5776
5777 BIND(LONG);
5778 }
5779
5780 // Use longer rep-prefixed ops for non-small counts:
5781 if (UseFastStosb) {
5782 shlptr(cnt, 3); // convert to number of bytes
5783 rep_stosb();
5784 } else if (UseXMMForObjInit) {
5785 xmm_clear_mem(base, cnt, tmp, xtmp, mask);
5786 } else {
5787 rep_stos();
5788 }
5789
5790 BIND(DONE);
5791 }
5792
5793 #endif //COMPILER2_OR_JVMCI
5794
5795
5796 void MacroAssembler::generate_fill(BasicType t, bool aligned,
5797 Register to, Register value, Register count,
5798 Register rtmp, XMMRegister xtmp) {
5799 ShortBranchVerifier sbv(this);
5800 assert_different_registers(to, value, count, rtmp);
5801 Label L_exit;
5802 Label L_fill_2_bytes, L_fill_4_bytes;
5803
5804 #if defined(COMPILER2)
5805 if(MaxVectorSize >=32 &&
9685
9686 // Load top.
9687 movl(top, Address(thread, JavaThread::lock_stack_top_offset()));
9688
9689 // Check if the lock-stack is full.
9690 cmpl(top, LockStack::end_offset());
9691 jcc(Assembler::greaterEqual, slow);
9692
9693 // Check for recursion.
9694 cmpptr(obj, Address(thread, top, Address::times_1, -oopSize));
9695 jcc(Assembler::equal, push);
9696
9697 // Check header for monitor (0b10).
9698 testptr(reg_rax, markWord::monitor_value);
9699 jcc(Assembler::notZero, slow);
9700
9701 // Try to lock. Transition lock bits 0b01 => 0b00
9702 movptr(tmp, reg_rax);
9703 andptr(tmp, ~(int32_t)markWord::unlocked_value);
9704 orptr(reg_rax, markWord::unlocked_value);
9705 lock(); cmpxchgptr(tmp, Address(obj, oopDesc::mark_offset_in_bytes()));
9706 jcc(Assembler::notEqual, slow);
9707
9708 // Restore top, CAS clobbers register.
9709 movl(top, Address(thread, JavaThread::lock_stack_top_offset()));
9710
9711 bind(push);
9712 // After successful lock, push object on lock-stack.
9713 movptr(Address(thread, top), obj);
9714 incrementl(top, oopSize);
9715 movl(Address(thread, JavaThread::lock_stack_top_offset()), top);
9716 }
9717
9718 // Implements lightweight-unlocking.
9719 //
9720 // obj: the object to be unlocked
9721 // reg_rax: rax
9722 // thread: the thread
9723 // tmp: a temporary register
9724 void MacroAssembler::lightweight_unlock(Register obj, Register reg_rax, Register tmp, Label& slow) {
|
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "asm/assembler.hpp"
26 #include "asm/assembler.inline.hpp"
27 #include "code/aotCodeCache.hpp"
28 #include "code/compiledIC.hpp"
29 #include "compiler/compiler_globals.hpp"
30 #include "compiler/disassembler.hpp"
31 #include "ci/ciInlineKlass.hpp"
32 #include "crc32c.h"
33 #include "gc/shared/barrierSet.hpp"
34 #include "gc/shared/barrierSetAssembler.hpp"
35 #include "gc/shared/collectedHeap.inline.hpp"
36 #include "gc/shared/tlab_globals.hpp"
37 #include "interpreter/bytecodeHistogram.hpp"
38 #include "interpreter/interpreter.hpp"
39 #include "interpreter/interpreterRuntime.hpp"
40 #include "jvm.h"
41 #include "memory/resourceArea.hpp"
42 #include "memory/universe.hpp"
43 #include "oops/accessDecorators.hpp"
44 #include "oops/compressedKlass.inline.hpp"
45 #include "oops/compressedOops.inline.hpp"
46 #include "oops/klass.inline.hpp"
47 #include "oops/resolvedFieldEntry.hpp"
48 #include "prims/methodHandles.hpp"
49 #include "runtime/continuation.hpp"
50 #include "runtime/interfaceSupport.inline.hpp"
51 #include "runtime/javaThread.hpp"
52 #include "runtime/jniHandles.hpp"
53 #include "runtime/objectMonitor.hpp"
54 #include "runtime/os.hpp"
55 #include "runtime/safepoint.hpp"
56 #include "runtime/safepointMechanism.hpp"
57 #include "runtime/sharedRuntime.hpp"
58 #include "runtime/signature_cc.hpp"
59 #include "runtime/stubRoutines.hpp"
60 #include "utilities/checkedCast.hpp"
61 #include "utilities/macros.hpp"
62 #include "vmreg_x86.inline.hpp"
63 #ifdef COMPILER2
64 #include "opto/output.hpp"
65 #endif
66
67 #ifdef PRODUCT
68 #define BLOCK_COMMENT(str) /* nothing */
69 #define STOP(error) stop(error)
70 #else
71 #define BLOCK_COMMENT(str) block_comment(str)
72 #define STOP(error) block_comment(error); stop(error)
73 #endif
74
75 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
76
77 #ifdef ASSERT
78 bool AbstractAssembler::pd_check_instruction_mark() { return true; }
79 #endif
80
81 static const Assembler::Condition reverse[] = {
82 Assembler::noOverflow /* overflow = 0x0 */ ,
83 Assembler::overflow /* noOverflow = 0x1 */ ,
84 Assembler::aboveEqual /* carrySet = 0x2, below = 0x2 */ ,
85 Assembler::below /* aboveEqual = 0x3, carryClear = 0x3 */ ,
1293 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
1294 assert_different_registers(arg_0, c_rarg1, c_rarg2);
1295 assert_different_registers(arg_1, c_rarg2);
1296 pass_arg2(this, arg_2);
1297 pass_arg1(this, arg_1);
1298 pass_arg0(this, arg_0);
1299 call_VM_leaf(entry_point, 3);
1300 }
1301
1302 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) {
1303 assert_different_registers(arg_0, c_rarg1, c_rarg2, c_rarg3);
1304 assert_different_registers(arg_1, c_rarg2, c_rarg3);
1305 assert_different_registers(arg_2, c_rarg3);
1306 pass_arg3(this, arg_3);
1307 pass_arg2(this, arg_2);
1308 pass_arg1(this, arg_1);
1309 pass_arg0(this, arg_0);
1310 call_VM_leaf(entry_point, 3);
1311 }
1312
1313 void MacroAssembler::super_call_VM_leaf(address entry_point) {
1314 MacroAssembler::call_VM_leaf_base(entry_point, 1);
1315 }
1316
1317 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {
1318 pass_arg0(this, arg_0);
1319 MacroAssembler::call_VM_leaf_base(entry_point, 1);
1320 }
1321
1322 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
1323 assert_different_registers(arg_0, c_rarg1);
1324 pass_arg1(this, arg_1);
1325 pass_arg0(this, arg_0);
1326 MacroAssembler::call_VM_leaf_base(entry_point, 2);
1327 }
1328
1329 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
1330 assert_different_registers(arg_0, c_rarg1, c_rarg2);
1331 assert_different_registers(arg_1, c_rarg2);
1332 pass_arg2(this, arg_2);
1333 pass_arg1(this, arg_1);
1334 pass_arg0(this, arg_0);
1335 MacroAssembler::call_VM_leaf_base(entry_point, 3);
1336 }
2350 lea(rscratch, src);
2351 Assembler::mulss(dst, Address(rscratch, 0));
2352 }
2353 }
2354
2355 void MacroAssembler::null_check(Register reg, int offset) {
2356 if (needs_explicit_null_check(offset)) {
2357 // provoke OS null exception if reg is null by
2358 // accessing M[reg] w/o changing any (non-CC) registers
2359 // NOTE: cmpl is plenty here to provoke a segv
2360 cmpptr(rax, Address(reg, 0));
2361 // Note: should probably use testl(rax, Address(reg, 0));
2362 // may be shorter code (however, this version of
2363 // testl needs to be implemented first)
2364 } else {
2365 // nothing to do, (later) access of M[reg + offset]
2366 // will provoke OS null exception if reg is null
2367 }
2368 }
2369
2370 void MacroAssembler::test_markword_is_inline_type(Register markword, Label& is_inline_type) {
2371 andptr(markword, markWord::inline_type_mask_in_place);
2372 cmpptr(markword, markWord::inline_type_pattern);
2373 jcc(Assembler::equal, is_inline_type);
2374 }
2375
2376 void MacroAssembler::test_oop_is_not_inline_type(Register object, Register tmp, Label& not_inline_type, bool can_be_null) {
2377 if (can_be_null) {
2378 testptr(object, object);
2379 jcc(Assembler::zero, not_inline_type);
2380 }
2381 const int is_inline_type_mask = markWord::inline_type_pattern;
2382 movptr(tmp, Address(object, oopDesc::mark_offset_in_bytes()));
2383 andptr(tmp, is_inline_type_mask);
2384 cmpptr(tmp, is_inline_type_mask);
2385 jcc(Assembler::notEqual, not_inline_type);
2386 }
2387
2388 void MacroAssembler::test_field_is_null_free_inline_type(Register flags, Register temp_reg, Label& is_null_free_inline_type) {
2389 movl(temp_reg, flags);
2390 testl(temp_reg, 1 << ResolvedFieldEntry::is_null_free_inline_type_shift);
2391 jcc(Assembler::notEqual, is_null_free_inline_type);
2392 }
2393
2394 void MacroAssembler::test_field_is_not_null_free_inline_type(Register flags, Register temp_reg, Label& not_null_free_inline_type) {
2395 movl(temp_reg, flags);
2396 testl(temp_reg, 1 << ResolvedFieldEntry::is_null_free_inline_type_shift);
2397 jcc(Assembler::equal, not_null_free_inline_type);
2398 }
2399
2400 void MacroAssembler::test_field_is_flat(Register flags, Register temp_reg, Label& is_flat) {
2401 movl(temp_reg, flags);
2402 testl(temp_reg, 1 << ResolvedFieldEntry::is_flat_shift);
2403 jcc(Assembler::notEqual, is_flat);
2404 }
2405
2406 void MacroAssembler::test_field_has_null_marker(Register flags, Register temp_reg, Label& has_null_marker) {
2407 movl(temp_reg, flags);
2408 testl(temp_reg, 1 << ResolvedFieldEntry::has_null_marker_shift);
2409 jcc(Assembler::notEqual, has_null_marker);
2410 }
2411
2412 void MacroAssembler::test_oop_prototype_bit(Register oop, Register temp_reg, int32_t test_bit, bool jmp_set, Label& jmp_label) {
2413 Label test_mark_word;
2414 // load mark word
2415 movptr(temp_reg, Address(oop, oopDesc::mark_offset_in_bytes()));
2416 // check displaced
2417 testl(temp_reg, markWord::unlocked_value);
2418 jccb(Assembler::notZero, test_mark_word);
2419 // slow path use klass prototype
2420 push(rscratch1);
2421 load_prototype_header(temp_reg, oop, rscratch1);
2422 pop(rscratch1);
2423
2424 bind(test_mark_word);
2425 testl(temp_reg, test_bit);
2426 jcc((jmp_set) ? Assembler::notZero : Assembler::zero, jmp_label);
2427 }
2428
2429 void MacroAssembler::test_flat_array_oop(Register oop, Register temp_reg,
2430 Label& is_flat_array) {
2431 #ifdef _LP64
2432 test_oop_prototype_bit(oop, temp_reg, markWord::flat_array_bit_in_place, true, is_flat_array);
2433 #else
2434 load_klass(temp_reg, oop, noreg);
2435 movl(temp_reg, Address(temp_reg, Klass::layout_helper_offset()));
2436 test_flat_array_layout(temp_reg, is_flat_array);
2437 #endif
2438 }
2439
2440 void MacroAssembler::test_non_flat_array_oop(Register oop, Register temp_reg,
2441 Label& is_non_flat_array) {
2442 #ifdef _LP64
2443 test_oop_prototype_bit(oop, temp_reg, markWord::flat_array_bit_in_place, false, is_non_flat_array);
2444 #else
2445 load_klass(temp_reg, oop, noreg);
2446 movl(temp_reg, Address(temp_reg, Klass::layout_helper_offset()));
2447 test_non_flat_array_layout(temp_reg, is_non_flat_array);
2448 #endif
2449 }
2450
2451 void MacroAssembler::test_null_free_array_oop(Register oop, Register temp_reg, Label&is_null_free_array) {
2452 #ifdef _LP64
2453 test_oop_prototype_bit(oop, temp_reg, markWord::null_free_array_bit_in_place, true, is_null_free_array);
2454 #else
2455 Unimplemented();
2456 #endif
2457 }
2458
2459 void MacroAssembler::test_non_null_free_array_oop(Register oop, Register temp_reg, Label&is_non_null_free_array) {
2460 #ifdef _LP64
2461 test_oop_prototype_bit(oop, temp_reg, markWord::null_free_array_bit_in_place, false, is_non_null_free_array);
2462 #else
2463 Unimplemented();
2464 #endif
2465 }
2466
2467 void MacroAssembler::test_flat_array_layout(Register lh, Label& is_flat_array) {
2468 testl(lh, Klass::_lh_array_tag_flat_value_bit_inplace);
2469 jcc(Assembler::notZero, is_flat_array);
2470 }
2471
2472 void MacroAssembler::test_non_flat_array_layout(Register lh, Label& is_non_flat_array) {
2473 testl(lh, Klass::_lh_array_tag_flat_value_bit_inplace);
2474 jcc(Assembler::zero, is_non_flat_array);
2475 }
2476
2477 void MacroAssembler::os_breakpoint() {
2478 // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
2479 // (e.g., MSVC can't call ps() otherwise)
2480 call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
2481 }
2482
2483 void MacroAssembler::unimplemented(const char* what) {
2484 const char* buf = nullptr;
2485 {
2486 ResourceMark rm;
2487 stringStream ss;
2488 ss.print("unimplemented: %s", what);
2489 buf = code_string(ss.as_string());
2490 }
2491 stop(buf);
2492 }
2493
2494 #define XSTATE_BV 0x200
2495
2496 void MacroAssembler::pop_CPU_state() {
3539 }
3540
3541 // C++ bool manipulation
3542 void MacroAssembler::testbool(Register dst) {
3543 if(sizeof(bool) == 1)
3544 testb(dst, 0xff);
3545 else if(sizeof(bool) == 2) {
3546 // testw implementation needed for two byte bools
3547 ShouldNotReachHere();
3548 } else if(sizeof(bool) == 4)
3549 testl(dst, dst);
3550 else
3551 // unsupported
3552 ShouldNotReachHere();
3553 }
3554
3555 void MacroAssembler::testptr(Register dst, Register src) {
3556 testq(dst, src);
3557 }
3558
3559 // Object / value buffer allocation...
3560 //
3561 // Kills klass and rsi on LP64
3562 void MacroAssembler::allocate_instance(Register klass, Register new_obj,
3563 Register t1, Register t2,
3564 bool clear_fields, Label& alloc_failed)
3565 {
3566 Label done, initialize_header, initialize_object, slow_case, slow_case_no_pop;
3567 Register layout_size = t1;
3568 assert(new_obj == rax, "needs to be rax");
3569 assert_different_registers(klass, new_obj, t1, t2);
3570
3571 // get instance_size in InstanceKlass (scaled to a count of bytes)
3572 movl(layout_size, Address(klass, Klass::layout_helper_offset()));
3573 // test to see if it is malformed in some way
3574 testl(layout_size, Klass::_lh_instance_slow_path_bit);
3575 jcc(Assembler::notZero, slow_case_no_pop);
3576
3577 // Allocate the instance:
3578 // If TLAB is enabled:
3579 // Try to allocate in the TLAB.
3580 // If fails, go to the slow path.
3581 // Else If inline contiguous allocations are enabled:
3582 // Try to allocate in eden.
3583 // If fails due to heap end, go to slow path.
3584 //
3585 // If TLAB is enabled OR inline contiguous is enabled:
3586 // Initialize the allocation.
3587 // Exit.
3588 //
3589 // Go to slow path.
3590
3591 push(klass);
3592 if (UseTLAB) {
3593 tlab_allocate(new_obj, layout_size, 0, klass, t2, slow_case);
3594 if (ZeroTLAB || (!clear_fields)) {
3595 // the fields have been already cleared
3596 jmp(initialize_header);
3597 } else {
3598 // initialize both the header and fields
3599 jmp(initialize_object);
3600 }
3601 } else {
3602 jmp(slow_case);
3603 }
3604
3605 // If UseTLAB is true, the object is created above and there is an initialize need.
3606 // Otherwise, skip and go to the slow path.
3607 if (UseTLAB) {
3608 if (clear_fields) {
3609 // The object is initialized before the header. If the object size is
3610 // zero, go directly to the header initialization.
3611 bind(initialize_object);
3612 if (UseCompactObjectHeaders) {
3613 assert(is_aligned(oopDesc::base_offset_in_bytes(), BytesPerLong), "oop base offset must be 8-byte-aligned");
3614 decrement(layout_size, oopDesc::base_offset_in_bytes());
3615 } else {
3616 decrement(layout_size, sizeof(oopDesc));
3617 }
3618 jcc(Assembler::zero, initialize_header);
3619
3620 // Initialize topmost object field, divide size by 8, check if odd and
3621 // test if zero.
3622 Register zero = klass;
3623 xorl(zero, zero); // use zero reg to clear memory (shorter code)
3624 shrl(layout_size, LogBytesPerLong); // divide by 2*oopSize and set carry flag if odd
3625
3626 #ifdef ASSERT
3627 // make sure instance_size was multiple of 8
3628 Label L;
3629 // Ignore partial flag stall after shrl() since it is debug VM
3630 jcc(Assembler::carryClear, L);
3631 stop("object size is not multiple of 2 - adjust this code");
3632 bind(L);
3633 // must be > 0, no extra check needed here
3634 #endif
3635
3636 // initialize remaining object fields: instance_size was a multiple of 8
3637 {
3638 Label loop;
3639 bind(loop);
3640 int header_size_bytes = oopDesc::header_size() * HeapWordSize;
3641 assert(is_aligned(header_size_bytes, BytesPerLong), "oop header size must be 8-byte-aligned");
3642 movptr(Address(new_obj, layout_size, Address::times_8, header_size_bytes - 1*oopSize), zero);
3643 decrement(layout_size);
3644 jcc(Assembler::notZero, loop);
3645 }
3646 } // clear_fields
3647
3648 // initialize object header only.
3649 bind(initialize_header);
3650 if (UseCompactObjectHeaders || EnableValhalla) {
3651 pop(klass);
3652 Register mark_word = t2;
3653 movptr(mark_word, Address(klass, Klass::prototype_header_offset()));
3654 movptr(Address(new_obj, oopDesc::mark_offset_in_bytes ()), mark_word);
3655 } else {
3656 movptr(Address(new_obj, oopDesc::mark_offset_in_bytes()),
3657 (intptr_t)markWord::prototype().value()); // header
3658 pop(klass); // get saved klass back in the register.
3659 }
3660 if (!UseCompactObjectHeaders) {
3661 xorl(rsi, rsi); // use zero reg to clear memory (shorter code)
3662 store_klass_gap(new_obj, rsi); // zero klass gap for compressed oops
3663 movptr(t2, klass); // preserve klass
3664 store_klass(new_obj, t2, rscratch1); // src klass reg is potentially compressed
3665 }
3666 jmp(done);
3667 }
3668
3669 bind(slow_case);
3670 pop(klass);
3671 bind(slow_case_no_pop);
3672 jmp(alloc_failed);
3673
3674 bind(done);
3675 }
3676
3677 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
3678 void MacroAssembler::tlab_allocate(Register obj,
3679 Register var_size_in_bytes,
3680 int con_size_in_bytes,
3681 Register t1,
3682 Register t2,
3683 Label& slow_case) {
3684 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
3685 bs->tlab_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case);
3686 }
3687
3688 RegSet MacroAssembler::call_clobbered_gp_registers() {
3689 RegSet regs;
3690 regs += RegSet::of(rax, rcx, rdx);
3691 #ifndef _WINDOWS
3692 regs += RegSet::of(rsi, rdi);
3693 #endif
3694 regs += RegSet::range(r8, r11);
3695 if (UseAPX) {
3696 regs += RegSet::range(r16, as_Register(Register::number_of_registers - 1));
3860 xorptr(temp, temp); // use _zero reg to clear memory (shorter code)
3861 if (UseIncDec) {
3862 shrptr(index, 3); // divide by 8/16 and set carry flag if bit 2 was set
3863 } else {
3864 shrptr(index, 2); // use 2 instructions to avoid partial flag stall
3865 shrptr(index, 1);
3866 }
3867
3868 // initialize remaining object fields: index is a multiple of 2 now
3869 {
3870 Label loop;
3871 bind(loop);
3872 movptr(Address(address, index, Address::times_8, offset_in_bytes - 1*BytesPerWord), temp);
3873 decrement(index);
3874 jcc(Assembler::notZero, loop);
3875 }
3876
3877 bind(done);
3878 }
3879
3880 void MacroAssembler::get_inline_type_field_klass(Register holder_klass, Register index, Register inline_klass) {
3881 inline_layout_info(holder_klass, index, inline_klass);
3882 movptr(inline_klass, Address(inline_klass, InlineLayoutInfo::klass_offset()));
3883 }
3884
3885 void MacroAssembler::inline_layout_info(Register holder_klass, Register index, Register layout_info) {
3886 movptr(layout_info, Address(holder_klass, InstanceKlass::inline_layout_info_array_offset()));
3887 #ifdef ASSERT
3888 {
3889 Label done;
3890 cmpptr(layout_info, 0);
3891 jcc(Assembler::notEqual, done);
3892 stop("inline_layout_info_array is null");
3893 bind(done);
3894 }
3895 #endif
3896
3897 InlineLayoutInfo array[2];
3898 int size = (char*)&array[1] - (char*)&array[0]; // computing size of array elements
3899 if (is_power_of_2(size)) {
3900 shll(index, log2i_exact(size)); // Scale index by power of 2
3901 } else {
3902 imull(index, index, size); // Scale the index to be the entry index * array_element_size
3903 }
3904 lea(layout_info, Address(layout_info, index, Address::times_1, Array<InlineLayoutInfo>::base_offset_in_bytes()));
3905 }
3906
3907 // Look up the method for a megamorphic invokeinterface call.
3908 // The target method is determined by <intf_klass, itable_index>.
3909 // The receiver klass is in recv_klass.
3910 // On success, the result will be in method_result, and execution falls through.
3911 // On failure, execution transfers to the given label.
3912 void MacroAssembler::lookup_interface_method(Register recv_klass,
3913 Register intf_klass,
3914 RegisterOrConstant itable_index,
3915 Register method_result,
3916 Register scan_temp,
3917 Label& L_no_such_interface,
3918 bool return_method) {
3919 assert_different_registers(recv_klass, intf_klass, scan_temp);
3920 assert_different_registers(method_result, intf_klass, scan_temp);
3921 assert(recv_klass != method_result || !return_method,
3922 "recv_klass can be destroyed when method isn't needed");
3923
3924 assert(itable_index.is_constant() || itable_index.as_register() == method_result,
3925 "caller must use same register for non-constant itable index as for method");
3926
4155
4156 // Hacked jcc, which "knows" that L_fallthrough, at least, is in
4157 // range of a jccb. If this routine grows larger, reconsider at
4158 // least some of these.
4159 #define local_jcc(assembler_cond, label) \
4160 if (&(label) == &L_fallthrough) jccb(assembler_cond, label); \
4161 else jcc( assembler_cond, label) /*omit semi*/
4162
4163 // Hacked jmp, which may only be used just before L_fallthrough.
4164 #define final_jmp(label) \
4165 if (&(label) == &L_fallthrough) { /*do nothing*/ } \
4166 else jmp(label) /*omit semi*/
4167
4168 // If the pointers are equal, we are done (e.g., String[] elements).
4169 // This self-check enables sharing of secondary supertype arrays among
4170 // non-primary types such as array-of-interface. Otherwise, each such
4171 // type would need its own customized SSA.
4172 // We move this check to the front of the fast path because many
4173 // type checks are in fact trivially successful in this manner,
4174 // so we get a nicely predicted branch right at the start of the check.
4175 // TODO 8370341 For a direct pointer comparison, we need the refined array klass pointer
4176 cmpptr(sub_klass, super_klass);
4177 local_jcc(Assembler::equal, *L_success);
4178
4179 // Check the supertype display:
4180 if (must_load_sco) {
4181 // Positive movl does right thing on LP64.
4182 movl(temp_reg, super_check_offset_addr);
4183 super_check_offset = RegisterOrConstant(temp_reg);
4184 }
4185 Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0);
4186 cmpptr(super_klass, super_check_addr); // load displayed supertype
4187
4188 // This check has worked decisively for primary supers.
4189 // Secondary supers are sought in the super_cache ('super_cache_addr').
4190 // (Secondary supers are interfaces and very deeply nested subtypes.)
4191 // This works in the same check above because of a tricky aliasing
4192 // between the super_cache and the primary super display elements.
4193 // (The 'super_check_addr' can address either, as the case requires.)
4194 // Note that the cache is updated below if it does not help us find
4195 // what we need immediately.
4938 } else {
4939 Label L;
4940 jccb(negate_condition(cc), L);
4941 movl(dst, src);
4942 bind(L);
4943 }
4944 }
4945
4946 void MacroAssembler::cmov32(Condition cc, Register dst, Register src) {
4947 if (VM_Version::supports_cmov()) {
4948 cmovl(cc, dst, src);
4949 } else {
4950 Label L;
4951 jccb(negate_condition(cc), L);
4952 movl(dst, src);
4953 bind(L);
4954 }
4955 }
4956
4957 void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) {
4958 if (!VerifyOops || VerifyAdapterSharing) {
4959 // Below address of the code string confuses VerifyAdapterSharing
4960 // because it may differ between otherwise equivalent adapters.
4961 return;
4962 }
4963
4964 BLOCK_COMMENT("verify_oop {");
4965 push(rscratch1);
4966 push(rax); // save rax
4967 push(reg); // pass register argument
4968
4969 // Pass register number to verify_oop_subroutine
4970 const char* b = nullptr;
4971 {
4972 ResourceMark rm;
4973 stringStream ss;
4974 ss.print("verify_oop: %s: %s (%s:%d)", reg->name(), s, file, line);
4975 b = code_string(ss.as_string());
4976 }
4977 AddressLiteral buffer((address) b, external_word_Relocation::spec_for_immediate());
4978 pushptr(buffer.addr(), rscratch1);
4979
4980 // call indirectly to solve generation ordering problem
4981 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
4982 call(rax);
5001 // cf. TemplateTable::prepare_invoke(), if (load_receiver).
5002 int stackElementSize = Interpreter::stackElementSize;
5003 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
5004 #ifdef ASSERT
5005 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
5006 assert(offset1 - offset == stackElementSize, "correct arithmetic");
5007 #endif
5008 Register scale_reg = noreg;
5009 Address::ScaleFactor scale_factor = Address::no_scale;
5010 if (arg_slot.is_constant()) {
5011 offset += arg_slot.as_constant() * stackElementSize;
5012 } else {
5013 scale_reg = arg_slot.as_register();
5014 scale_factor = Address::times(stackElementSize);
5015 }
5016 offset += wordSize; // return PC is on stack
5017 return Address(rsp, scale_reg, scale_factor, offset);
5018 }
5019
5020 void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) {
5021 if (!VerifyOops || VerifyAdapterSharing) {
5022 // Below address of the code string confuses VerifyAdapterSharing
5023 // because it may differ between otherwise equivalent adapters.
5024 return;
5025 }
5026
5027 push(rscratch1);
5028 push(rax); // save rax,
5029 // addr may contain rsp so we will have to adjust it based on the push
5030 // we just did (and on 64 bit we do two pushes)
5031 // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which
5032 // stores rax into addr which is backwards of what was intended.
5033 if (addr.uses(rsp)) {
5034 lea(rax, addr);
5035 pushptr(Address(rax, 2 * BytesPerWord));
5036 } else {
5037 pushptr(addr);
5038 }
5039
5040 // Pass register number to verify_oop_subroutine
5041 const char* b = nullptr;
5042 {
5043 ResourceMark rm;
5044 stringStream ss;
5045 ss.print("verify_oop_addr: %s (%s:%d)", s, file, line);
5399
5400 void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) {
5401 // get mirror
5402 const int mirror_offset = in_bytes(Klass::java_mirror_offset());
5403 load_method_holder(mirror, method);
5404 movptr(mirror, Address(mirror, mirror_offset));
5405 resolve_oop_handle(mirror, tmp);
5406 }
5407
5408 void MacroAssembler::load_method_holder_cld(Register rresult, Register rmethod) {
5409 load_method_holder(rresult, rmethod);
5410 movptr(rresult, Address(rresult, InstanceKlass::class_loader_data_offset()));
5411 }
5412
5413 void MacroAssembler::load_method_holder(Register holder, Register method) {
5414 movptr(holder, Address(method, Method::const_offset())); // ConstMethod*
5415 movptr(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool*
5416 movptr(holder, Address(holder, ConstantPool::pool_holder_offset())); // InstanceKlass*
5417 }
5418
5419 void MacroAssembler::load_metadata(Register dst, Register src) {
5420 if (UseCompactObjectHeaders) {
5421 load_narrow_klass_compact(dst, src);
5422 } else if (UseCompressedClassPointers) {
5423 movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
5424 } else {
5425 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
5426 }
5427 }
5428
5429 void MacroAssembler::load_narrow_klass_compact(Register dst, Register src) {
5430 assert(UseCompactObjectHeaders, "expect compact object headers");
5431 movq(dst, Address(src, oopDesc::mark_offset_in_bytes()));
5432 shrq(dst, markWord::klass_shift);
5433 }
5434
5435 void MacroAssembler::load_klass(Register dst, Register src, Register tmp) {
5436 assert_different_registers(src, tmp);
5437 assert_different_registers(dst, tmp);
5438
5439 if (UseCompactObjectHeaders) {
5440 load_narrow_klass_compact(dst, src);
5441 decode_klass_not_null(dst, tmp);
5442 } else if (UseCompressedClassPointers) {
5443 movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
5444 decode_klass_not_null(dst, tmp);
5445 } else {
5446 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
5447 }
5448 }
5449
5450 void MacroAssembler::load_prototype_header(Register dst, Register src, Register tmp) {
5451 load_klass(dst, src, tmp);
5452 movptr(dst, Address(dst, Klass::prototype_header_offset()));
5453 }
5454
5455 void MacroAssembler::store_klass(Register dst, Register src, Register tmp) {
5456 assert(!UseCompactObjectHeaders, "not with compact headers");
5457 assert_different_registers(src, tmp);
5458 assert_different_registers(dst, tmp);
5459 if (UseCompressedClassPointers) {
5460 encode_klass_not_null(src, tmp);
5461 movl(Address(dst, oopDesc::klass_offset_in_bytes()), src);
5462 } else {
5463 movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src);
5464 }
5465 }
5466
5467 void MacroAssembler::cmp_klass(Register klass, Register obj, Register tmp) {
5468 if (UseCompactObjectHeaders) {
5469 assert(tmp != noreg, "need tmp");
5470 assert_different_registers(klass, obj, tmp);
5471 load_narrow_klass_compact(tmp, obj);
5472 cmpl(klass, tmp);
5473 } else if (UseCompressedClassPointers) {
5474 cmpl(klass, Address(obj, oopDesc::klass_offset_in_bytes()));
5500 bool as_raw = (decorators & AS_RAW) != 0;
5501 if (as_raw) {
5502 bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1);
5503 } else {
5504 bs->load_at(this, decorators, type, dst, src, tmp1);
5505 }
5506 }
5507
5508 void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register val,
5509 Register tmp1, Register tmp2, Register tmp3) {
5510 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
5511 decorators = AccessInternal::decorator_fixup(decorators, type);
5512 bool as_raw = (decorators & AS_RAW) != 0;
5513 if (as_raw) {
5514 bs->BarrierSetAssembler::store_at(this, decorators, type, dst, val, tmp1, tmp2, tmp3);
5515 } else {
5516 bs->store_at(this, decorators, type, dst, val, tmp1, tmp2, tmp3);
5517 }
5518 }
5519
5520 void MacroAssembler::flat_field_copy(DecoratorSet decorators, Register src, Register dst,
5521 Register inline_layout_info) {
5522 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
5523 bs->flat_field_copy(this, decorators, src, dst, inline_layout_info);
5524 }
5525
5526 void MacroAssembler::payload_offset(Register inline_klass, Register offset) {
5527 movptr(offset, Address(inline_klass, InstanceKlass::adr_inlineklass_fixed_block_offset()));
5528 movl(offset, Address(offset, InlineKlass::payload_offset_offset()));
5529 }
5530
5531 void MacroAssembler::payload_addr(Register oop, Register data, Register inline_klass) {
5532 // ((address) (void*) o) + vk->payload_offset();
5533 Register offset = (data == oop) ? rscratch1 : data;
5534 payload_offset(inline_klass, offset);
5535 if (data == oop) {
5536 addptr(data, offset);
5537 } else {
5538 lea(data, Address(oop, offset));
5539 }
5540 }
5541
5542 void MacroAssembler::data_for_value_array_index(Register array, Register array_klass,
5543 Register index, Register data) {
5544 assert(index != rcx, "index needs to shift by rcx");
5545 assert_different_registers(array, array_klass, index);
5546 assert_different_registers(rcx, array, index);
5547
5548 // array->base() + (index << Klass::layout_helper_log2_element_size(lh));
5549 movl(rcx, Address(array_klass, Klass::layout_helper_offset()));
5550
5551 // Klass::layout_helper_log2_element_size(lh)
5552 // (lh >> _lh_log2_element_size_shift) & _lh_log2_element_size_mask;
5553 shrl(rcx, Klass::_lh_log2_element_size_shift);
5554 andl(rcx, Klass::_lh_log2_element_size_mask);
5555 shlptr(index); // index << rcx
5556
5557 lea(data, Address(array, index, Address::times_1, arrayOopDesc::base_offset_in_bytes(T_FLAT_ELEMENT)));
5558 }
5559
5560 void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, DecoratorSet decorators) {
5561 access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1);
5562 }
5563
5564 // Doesn't do verification, generates fixed size code
5565 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1, DecoratorSet decorators) {
5566 access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL | decorators, dst, src, tmp1);
5567 }
5568
5569 void MacroAssembler::store_heap_oop(Address dst, Register val, Register tmp1,
5570 Register tmp2, Register tmp3, DecoratorSet decorators) {
5571 access_store_at(T_OBJECT, IN_HEAP | decorators, dst, val, tmp1, tmp2, tmp3);
5572 }
5573
5574 // Used for storing nulls.
5575 void MacroAssembler::store_heap_oop_null(Address dst) {
5576 access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg, noreg);
5577 }
5578
5579 void MacroAssembler::store_klass_gap(Register dst, Register src) {
5899 Assembler::cmp_narrow_oop(dst, CompressedKlassPointers::encode(k), rspec);
5900 }
5901
5902 void MacroAssembler::reinit_heapbase() {
5903 if (UseCompressedOops) {
5904 if (Universe::heap() != nullptr) {
5905 if (CompressedOops::base() == nullptr) {
5906 MacroAssembler::xorptr(r12_heapbase, r12_heapbase);
5907 } else {
5908 mov64(r12_heapbase, (int64_t)CompressedOops::base());
5909 }
5910 } else {
5911 movptr(r12_heapbase, ExternalAddress(CompressedOops::base_addr()));
5912 }
5913 }
5914 }
5915
5916 #if COMPILER2_OR_JVMCI
5917
5918 // clear memory of size 'cnt' qwords, starting at 'base' using XMM/YMM/ZMM registers
5919 void MacroAssembler::xmm_clear_mem(Register base, Register cnt, Register val, XMMRegister xtmp, KRegister mask) {
5920 // cnt - number of qwords (8-byte words).
5921 // base - start address, qword aligned.
5922 Label L_zero_64_bytes, L_loop, L_sloop, L_tail, L_end;
5923 bool use64byteVector = (MaxVectorSize == 64) && (VM_Version::avx3_threshold() == 0);
5924 if (use64byteVector) {
5925 evpbroadcastq(xtmp, val, AVX_512bit);
5926 } else if (MaxVectorSize >= 32) {
5927 movdq(xtmp, val);
5928 punpcklqdq(xtmp, xtmp);
5929 vinserti128_high(xtmp, xtmp);
5930 } else {
5931 movdq(xtmp, val);
5932 punpcklqdq(xtmp, xtmp);
5933 }
5934 jmp(L_zero_64_bytes);
5935
5936 BIND(L_loop);
5937 if (MaxVectorSize >= 32) {
5938 fill64(base, 0, xtmp, use64byteVector);
5939 } else {
5940 movdqu(Address(base, 0), xtmp);
5941 movdqu(Address(base, 16), xtmp);
5942 movdqu(Address(base, 32), xtmp);
5943 movdqu(Address(base, 48), xtmp);
5944 }
5945 addptr(base, 64);
5946
5947 BIND(L_zero_64_bytes);
5948 subptr(cnt, 8);
5949 jccb(Assembler::greaterEqual, L_loop);
5950
5951 // Copy trailing 64 bytes
5952 if (use64byteVector) {
5953 addptr(cnt, 8);
5954 jccb(Assembler::equal, L_end);
5955 fill64_masked(3, base, 0, xtmp, mask, cnt, val, true);
5956 jmp(L_end);
5957 } else {
5958 addptr(cnt, 4);
5959 jccb(Assembler::less, L_tail);
5960 if (MaxVectorSize >= 32) {
5961 vmovdqu(Address(base, 0), xtmp);
5962 } else {
5963 movdqu(Address(base, 0), xtmp);
5964 movdqu(Address(base, 16), xtmp);
5965 }
5966 }
5967 addptr(base, 32);
5968 subptr(cnt, 4);
5969
5970 BIND(L_tail);
5971 addptr(cnt, 4);
5972 jccb(Assembler::lessEqual, L_end);
5973 if (UseAVX > 2 && MaxVectorSize >= 32 && VM_Version::supports_avx512vl()) {
5974 fill32_masked(3, base, 0, xtmp, mask, cnt, val);
5975 } else {
5976 decrement(cnt);
5977
5978 BIND(L_sloop);
5979 movq(Address(base, 0), xtmp);
5980 addptr(base, 8);
5981 decrement(cnt);
5982 jccb(Assembler::greaterEqual, L_sloop);
5983 }
5984 BIND(L_end);
5985 }
5986
5987 int MacroAssembler::store_inline_type_fields_to_buf(ciInlineKlass* vk, bool from_interpreter) {
5988 assert(InlineTypeReturnedAsFields, "Inline types should never be returned as fields");
5989 // An inline type might be returned. If fields are in registers we
5990 // need to allocate an inline type instance and initialize it with
5991 // the value of the fields.
5992 Label skip;
5993 // We only need a new buffered inline type if a new one is not returned
5994 testptr(rax, 1);
5995 jcc(Assembler::zero, skip);
5996 int call_offset = -1;
5997
5998 #ifdef _LP64
5999 // The following code is similar to allocate_instance but has some slight differences,
6000 // e.g. object size is always not zero, sometimes it's constant; storing klass ptr after
6001 // allocating is not necessary if vk != nullptr, etc. allocate_instance is not aware of these.
6002 Label slow_case;
6003 // 1. Try to allocate a new buffered inline instance either from TLAB or eden space
6004 mov(rscratch1, rax); // save rax for slow_case since *_allocate may corrupt it when allocation failed
6005 if (vk != nullptr) {
6006 // Called from C1, where the return type is statically known.
6007 movptr(rbx, (intptr_t)vk->get_InlineKlass());
6008 jint lh = vk->layout_helper();
6009 assert(lh != Klass::_lh_neutral_value, "inline class in return type must have been resolved");
6010 if (UseTLAB && !Klass::layout_helper_needs_slow_path(lh)) {
6011 tlab_allocate(rax, noreg, lh, r13, r14, slow_case);
6012 } else {
6013 jmp(slow_case);
6014 }
6015 } else {
6016 // Call from interpreter. RAX contains ((the InlineKlass* of the return type) | 0x01)
6017 mov(rbx, rax);
6018 andptr(rbx, -2);
6019 if (UseTLAB) {
6020 movl(r14, Address(rbx, Klass::layout_helper_offset()));
6021 testl(r14, Klass::_lh_instance_slow_path_bit);
6022 jcc(Assembler::notZero, slow_case);
6023 tlab_allocate(rax, r14, 0, r13, r14, slow_case);
6024 } else {
6025 jmp(slow_case);
6026 }
6027 }
6028 if (UseTLAB) {
6029 // 2. Initialize buffered inline instance header
6030 Register buffer_obj = rax;
6031 Register klass = rbx;
6032 if (UseCompactObjectHeaders) {
6033 Register mark_word = r13;
6034 movptr(mark_word, Address(klass, Klass::prototype_header_offset()));
6035 movptr(Address(buffer_obj, oopDesc::mark_offset_in_bytes()), mark_word);
6036 } else {
6037 movptr(Address(buffer_obj, oopDesc::mark_offset_in_bytes()), (intptr_t)markWord::inline_type_prototype().value());
6038 xorl(r13, r13);
6039 store_klass_gap(buffer_obj, r13);
6040 if (vk == nullptr) {
6041 // store_klass corrupts rbx(klass), so save it in r13 for later use (interpreter case only).
6042 mov(r13, klass);
6043 }
6044 store_klass(buffer_obj, klass, rscratch1);
6045 klass = r13;
6046 }
6047 // 3. Initialize its fields with an inline class specific handler
6048 if (vk != nullptr) {
6049 call(RuntimeAddress(vk->pack_handler())); // no need for call info as this will not safepoint.
6050 } else {
6051 movptr(rbx, Address(klass, InstanceKlass::adr_inlineklass_fixed_block_offset()));
6052 movptr(rbx, Address(rbx, InlineKlass::pack_handler_offset()));
6053 call(rbx);
6054 }
6055 jmp(skip);
6056 }
6057 bind(slow_case);
6058 // We failed to allocate a new inline type, fall back to a runtime
6059 // call. Some oop field may be live in some registers but we can't
6060 // tell. That runtime call will take care of preserving them
6061 // across a GC if there's one.
6062 mov(rax, rscratch1);
6063 #endif
6064
6065 if (from_interpreter) {
6066 super_call_VM_leaf(StubRoutines::store_inline_type_fields_to_buf());
6067 } else {
6068 call(RuntimeAddress(StubRoutines::store_inline_type_fields_to_buf()));
6069 call_offset = offset();
6070 }
6071
6072 bind(skip);
6073 return call_offset;
6074 }
6075
6076 // Move a value between registers/stack slots and update the reg_state
6077 bool MacroAssembler::move_helper(VMReg from, VMReg to, BasicType bt, RegState reg_state[]) {
6078 assert(from->is_valid() && to->is_valid(), "source and destination must be valid");
6079 if (reg_state[to->value()] == reg_written) {
6080 return true; // Already written
6081 }
6082 if (from != to && bt != T_VOID) {
6083 if (reg_state[to->value()] == reg_readonly) {
6084 return false; // Not yet writable
6085 }
6086 if (from->is_reg()) {
6087 if (to->is_reg()) {
6088 if (from->is_XMMRegister()) {
6089 if (bt == T_DOUBLE) {
6090 movdbl(to->as_XMMRegister(), from->as_XMMRegister());
6091 } else {
6092 assert(bt == T_FLOAT, "must be float");
6093 movflt(to->as_XMMRegister(), from->as_XMMRegister());
6094 }
6095 } else {
6096 movq(to->as_Register(), from->as_Register());
6097 }
6098 } else {
6099 int st_off = to->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
6100 Address to_addr = Address(rsp, st_off);
6101 if (from->is_XMMRegister()) {
6102 if (bt == T_DOUBLE) {
6103 movdbl(to_addr, from->as_XMMRegister());
6104 } else {
6105 assert(bt == T_FLOAT, "must be float");
6106 movflt(to_addr, from->as_XMMRegister());
6107 }
6108 } else {
6109 movq(to_addr, from->as_Register());
6110 }
6111 }
6112 } else {
6113 Address from_addr = Address(rsp, from->reg2stack() * VMRegImpl::stack_slot_size + wordSize);
6114 if (to->is_reg()) {
6115 if (to->is_XMMRegister()) {
6116 if (bt == T_DOUBLE) {
6117 movdbl(to->as_XMMRegister(), from_addr);
6118 } else {
6119 assert(bt == T_FLOAT, "must be float");
6120 movflt(to->as_XMMRegister(), from_addr);
6121 }
6122 } else {
6123 movq(to->as_Register(), from_addr);
6124 }
6125 } else {
6126 int st_off = to->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
6127 movq(r13, from_addr);
6128 movq(Address(rsp, st_off), r13);
6129 }
6130 }
6131 }
6132 // Update register states
6133 reg_state[from->value()] = reg_writable;
6134 reg_state[to->value()] = reg_written;
6135 return true;
6136 }
6137
6138 // Calculate the extra stack space required for packing or unpacking inline
6139 // args and adjust the stack pointer
6140 int MacroAssembler::extend_stack_for_inline_args(int args_on_stack) {
6141 // Two additional slots to account for return address
6142 int sp_inc = (args_on_stack + 2) * VMRegImpl::stack_slot_size;
6143 sp_inc = align_up(sp_inc, StackAlignmentInBytes);
6144 // Save the return address, adjust the stack (make sure it is properly
6145 // 16-byte aligned) and copy the return address to the new top of the stack.
6146 // The stack will be repaired on return (see MacroAssembler::remove_frame).
6147 assert(sp_inc > 0, "sanity");
6148 pop(r13);
6149 subptr(rsp, sp_inc);
6150 push(r13);
6151 return sp_inc;
6152 }
6153
6154 // Read all fields from an inline type buffer and store the field values in registers/stack slots.
6155 bool MacroAssembler::unpack_inline_helper(const GrowableArray<SigEntry>* sig, int& sig_index,
6156 VMReg from, int& from_index, VMRegPair* to, int to_count, int& to_index,
6157 RegState reg_state[]) {
6158 assert(sig->at(sig_index)._bt == T_VOID, "should be at end delimiter");
6159 assert(from->is_valid(), "source must be valid");
6160 bool progress = false;
6161 #ifdef ASSERT
6162 const int start_offset = offset();
6163 #endif
6164
6165 Label L_null, L_notNull;
6166 // Don't use r14 as tmp because it's used for spilling (see MacroAssembler::spill_reg_for)
6167 Register tmp1 = r10;
6168 Register tmp2 = r13;
6169 Register fromReg = noreg;
6170 ScalarizedInlineArgsStream stream(sig, sig_index, to, to_count, to_index, -1);
6171 bool done = true;
6172 bool mark_done = true;
6173 VMReg toReg;
6174 BasicType bt;
6175 // Check if argument requires a null check
6176 bool null_check = false;
6177 VMReg nullCheckReg;
6178 while (stream.next(nullCheckReg, bt)) {
6179 if (sig->at(stream.sig_index())._offset == -1) {
6180 null_check = true;
6181 break;
6182 }
6183 }
6184 stream.reset(sig_index, to_index);
6185 while (stream.next(toReg, bt)) {
6186 assert(toReg->is_valid(), "destination must be valid");
6187 int idx = (int)toReg->value();
6188 if (reg_state[idx] == reg_readonly) {
6189 if (idx != from->value()) {
6190 mark_done = false;
6191 }
6192 done = false;
6193 continue;
6194 } else if (reg_state[idx] == reg_written) {
6195 continue;
6196 }
6197 assert(reg_state[idx] == reg_writable, "must be writable");
6198 reg_state[idx] = reg_written;
6199 progress = true;
6200
6201 if (fromReg == noreg) {
6202 if (from->is_reg()) {
6203 fromReg = from->as_Register();
6204 } else {
6205 int st_off = from->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
6206 movq(tmp1, Address(rsp, st_off));
6207 fromReg = tmp1;
6208 }
6209 if (null_check) {
6210 // Nullable inline type argument, emit null check
6211 testptr(fromReg, fromReg);
6212 jcc(Assembler::zero, L_null);
6213 }
6214 }
6215 int off = sig->at(stream.sig_index())._offset;
6216 if (off == -1) {
6217 assert(null_check, "Missing null check at");
6218 if (toReg->is_stack()) {
6219 int st_off = toReg->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
6220 movq(Address(rsp, st_off), 1);
6221 } else {
6222 movq(toReg->as_Register(), 1);
6223 }
6224 continue;
6225 }
6226 assert(off > 0, "offset in object should be positive");
6227 Address fromAddr = Address(fromReg, off);
6228 if (!toReg->is_XMMRegister()) {
6229 Register dst = toReg->is_stack() ? tmp2 : toReg->as_Register();
6230 if (is_reference_type(bt)) {
6231 load_heap_oop(dst, fromAddr);
6232 } else {
6233 bool is_signed = (bt != T_CHAR) && (bt != T_BOOLEAN);
6234 load_sized_value(dst, fromAddr, type2aelembytes(bt), is_signed);
6235 }
6236 if (toReg->is_stack()) {
6237 int st_off = toReg->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
6238 movq(Address(rsp, st_off), dst);
6239 }
6240 } else if (bt == T_DOUBLE) {
6241 movdbl(toReg->as_XMMRegister(), fromAddr);
6242 } else {
6243 assert(bt == T_FLOAT, "must be float");
6244 movflt(toReg->as_XMMRegister(), fromAddr);
6245 }
6246 }
6247 if (progress && null_check) {
6248 if (done) {
6249 jmp(L_notNull);
6250 bind(L_null);
6251 // Set null marker to zero to signal that the argument is null.
6252 // Also set all oop fields to zero to make the GC happy.
6253 stream.reset(sig_index, to_index);
6254 while (stream.next(toReg, bt)) {
6255 if (sig->at(stream.sig_index())._offset == -1 ||
6256 bt == T_OBJECT || bt == T_ARRAY) {
6257 if (toReg->is_stack()) {
6258 int st_off = toReg->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
6259 movq(Address(rsp, st_off), 0);
6260 } else {
6261 xorq(toReg->as_Register(), toReg->as_Register());
6262 }
6263 }
6264 }
6265 bind(L_notNull);
6266 } else {
6267 bind(L_null);
6268 }
6269 }
6270
6271 sig_index = stream.sig_index();
6272 to_index = stream.regs_index();
6273
6274 if (mark_done && reg_state[from->value()] != reg_written) {
6275 // This is okay because no one else will write to that slot
6276 reg_state[from->value()] = reg_writable;
6277 }
6278 from_index--;
6279 assert(progress || (start_offset == offset()), "should not emit code");
6280 return done;
6281 }
6282
6283 bool MacroAssembler::pack_inline_helper(const GrowableArray<SigEntry>* sig, int& sig_index, int vtarg_index,
6284 VMRegPair* from, int from_count, int& from_index, VMReg to,
6285 RegState reg_state[], Register val_array) {
6286 assert(sig->at(sig_index)._bt == T_METADATA, "should be at delimiter");
6287 assert(to->is_valid(), "destination must be valid");
6288
6289 if (reg_state[to->value()] == reg_written) {
6290 skip_unpacked_fields(sig, sig_index, from, from_count, from_index);
6291 return true; // Already written
6292 }
6293
6294 // TODO 8284443 Isn't it an issue if below code uses r14 as tmp when it contains a spilled value?
6295 // Be careful with r14 because it's used for spilling (see MacroAssembler::spill_reg_for).
6296 Register val_obj_tmp = r11;
6297 Register from_reg_tmp = r14;
6298 Register tmp1 = r10;
6299 Register tmp2 = r13;
6300 Register tmp3 = rbx;
6301 Register val_obj = to->is_stack() ? val_obj_tmp : to->as_Register();
6302
6303 assert_different_registers(val_obj_tmp, from_reg_tmp, tmp1, tmp2, tmp3, val_array);
6304
6305 if (reg_state[to->value()] == reg_readonly) {
6306 if (!is_reg_in_unpacked_fields(sig, sig_index, to, from, from_count, from_index)) {
6307 skip_unpacked_fields(sig, sig_index, from, from_count, from_index);
6308 return false; // Not yet writable
6309 }
6310 val_obj = val_obj_tmp;
6311 }
6312
6313 int index = arrayOopDesc::base_offset_in_bytes(T_OBJECT) + vtarg_index * type2aelembytes(T_OBJECT);
6314 load_heap_oop(val_obj, Address(val_array, index));
6315
6316 ScalarizedInlineArgsStream stream(sig, sig_index, from, from_count, from_index);
6317 VMReg fromReg;
6318 BasicType bt;
6319 Label L_null;
6320 while (stream.next(fromReg, bt)) {
6321 assert(fromReg->is_valid(), "source must be valid");
6322 reg_state[fromReg->value()] = reg_writable;
6323
6324 int off = sig->at(stream.sig_index())._offset;
6325 if (off == -1) {
6326 // Nullable inline type argument, emit null check
6327 Label L_notNull;
6328 if (fromReg->is_stack()) {
6329 int ld_off = fromReg->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
6330 testb(Address(rsp, ld_off), 1);
6331 } else {
6332 testb(fromReg->as_Register(), 1);
6333 }
6334 jcc(Assembler::notZero, L_notNull);
6335 movptr(val_obj, 0);
6336 jmp(L_null);
6337 bind(L_notNull);
6338 continue;
6339 }
6340
6341 assert(off > 0, "offset in object should be positive");
6342 size_t size_in_bytes = is_java_primitive(bt) ? type2aelembytes(bt) : wordSize;
6343
6344 // Pack the scalarized field into the value object.
6345 Address dst(val_obj, off);
6346 if (!fromReg->is_XMMRegister()) {
6347 Register src;
6348 if (fromReg->is_stack()) {
6349 src = from_reg_tmp;
6350 int ld_off = fromReg->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
6351 load_sized_value(src, Address(rsp, ld_off), size_in_bytes, /* is_signed */ false);
6352 } else {
6353 src = fromReg->as_Register();
6354 }
6355 assert_different_registers(dst.base(), src, tmp1, tmp2, tmp3, val_array);
6356 if (is_reference_type(bt)) {
6357 store_heap_oop(dst, src, tmp1, tmp2, tmp3, IN_HEAP | ACCESS_WRITE | IS_DEST_UNINITIALIZED);
6358 } else {
6359 store_sized_value(dst, src, size_in_bytes);
6360 }
6361 } else if (bt == T_DOUBLE) {
6362 movdbl(dst, fromReg->as_XMMRegister());
6363 } else {
6364 assert(bt == T_FLOAT, "must be float");
6365 movflt(dst, fromReg->as_XMMRegister());
6366 }
6367 }
6368 bind(L_null);
6369 sig_index = stream.sig_index();
6370 from_index = stream.regs_index();
6371
6372 assert(reg_state[to->value()] == reg_writable, "must have already been read");
6373 bool success = move_helper(val_obj->as_VMReg(), to, T_OBJECT, reg_state);
6374 assert(success, "to register must be writeable");
6375 return true;
6376 }
6377
6378 VMReg MacroAssembler::spill_reg_for(VMReg reg) {
6379 return reg->is_XMMRegister() ? xmm8->as_VMReg() : r14->as_VMReg();
6380 }
6381
6382 void MacroAssembler::remove_frame(int initial_framesize, bool needs_stack_repair) {
6383 assert((initial_framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
6384 if (needs_stack_repair) {
6385 // TODO 8284443 Add a comment drawing the frame like in Aarch64's version of MacroAssembler::remove_frame
6386 movq(rbp, Address(rsp, initial_framesize));
6387 // The stack increment resides just below the saved rbp
6388 addq(rsp, Address(rsp, initial_framesize - wordSize));
6389 } else {
6390 if (initial_framesize > 0) {
6391 addq(rsp, initial_framesize);
6392 }
6393 pop(rbp);
6394 }
6395 }
6396
6397 // Clearing constant sized memory using YMM/ZMM registers.
6398 void MacroAssembler::clear_mem(Register base, int cnt, Register rtmp, XMMRegister xtmp, KRegister mask) {
6399 assert(UseAVX > 2 && VM_Version::supports_avx512vl(), "");
6400 bool use64byteVector = (MaxVectorSize > 32) && (VM_Version::avx3_threshold() == 0);
6401
6402 int vector64_count = (cnt & (~0x7)) >> 3;
6403 cnt = cnt & 0x7;
6404 const int fill64_per_loop = 4;
6405 const int max_unrolled_fill64 = 8;
6406
6407 // 64 byte initialization loop.
6408 vpxor(xtmp, xtmp, xtmp, use64byteVector ? AVX_512bit : AVX_256bit);
6409 int start64 = 0;
6410 if (vector64_count > max_unrolled_fill64) {
6411 Label LOOP;
6412 Register index = rtmp;
6413
6414 start64 = vector64_count - (vector64_count % fill64_per_loop);
6415
6416 movl(index, 0);
6466 break;
6467 case 7:
6468 if (use64byteVector) {
6469 movl(rtmp, 0x7F);
6470 kmovwl(mask, rtmp);
6471 evmovdqu(T_LONG, mask, Address(base, disp), xtmp, true, Assembler::AVX_512bit);
6472 } else {
6473 evmovdqu(T_LONG, k0, Address(base, disp), xtmp, false, Assembler::AVX_256bit);
6474 movl(rtmp, 0x7);
6475 kmovwl(mask, rtmp);
6476 evmovdqu(T_LONG, mask, Address(base, disp + 32), xtmp, true, Assembler::AVX_256bit);
6477 }
6478 break;
6479 default:
6480 fatal("Unexpected length : %d\n",cnt);
6481 break;
6482 }
6483 }
6484 }
6485
6486 void MacroAssembler::clear_mem(Register base, Register cnt, Register val, XMMRegister xtmp,
6487 bool is_large, bool word_copy_only, KRegister mask) {
6488 // cnt - number of qwords (8-byte words).
6489 // base - start address, qword aligned.
6490 // is_large - if optimizers know cnt is larger than InitArrayShortSize
6491 assert(base==rdi, "base register must be edi for rep stos");
6492 assert(val==rax, "val register must be eax for rep stos");
6493 assert(cnt==rcx, "cnt register must be ecx for rep stos");
6494 assert(InitArrayShortSize % BytesPerLong == 0,
6495 "InitArrayShortSize should be the multiple of BytesPerLong");
6496
6497 Label DONE;
6498
6499 if (!is_large) {
6500 Label LOOP, LONG;
6501 cmpptr(cnt, InitArrayShortSize/BytesPerLong);
6502 jccb(Assembler::greater, LONG);
6503
6504 decrement(cnt);
6505 jccb(Assembler::negative, DONE); // Zero length
6506
6507 // Use individual pointer-sized stores for small counts:
6508 BIND(LOOP);
6509 movptr(Address(base, cnt, Address::times_ptr), val);
6510 decrement(cnt);
6511 jccb(Assembler::greaterEqual, LOOP);
6512 jmpb(DONE);
6513
6514 BIND(LONG);
6515 }
6516
6517 // Use longer rep-prefixed ops for non-small counts:
6518 if (UseFastStosb && !word_copy_only) {
6519 shlptr(cnt, 3); // convert to number of bytes
6520 rep_stosb();
6521 } else if (UseXMMForObjInit) {
6522 xmm_clear_mem(base, cnt, val, xtmp, mask);
6523 } else {
6524 rep_stos();
6525 }
6526
6527 BIND(DONE);
6528 }
6529
6530 #endif //COMPILER2_OR_JVMCI
6531
6532
6533 void MacroAssembler::generate_fill(BasicType t, bool aligned,
6534 Register to, Register value, Register count,
6535 Register rtmp, XMMRegister xtmp) {
6536 ShortBranchVerifier sbv(this);
6537 assert_different_registers(to, value, count, rtmp);
6538 Label L_exit;
6539 Label L_fill_2_bytes, L_fill_4_bytes;
6540
6541 #if defined(COMPILER2)
6542 if(MaxVectorSize >=32 &&
10422
10423 // Load top.
10424 movl(top, Address(thread, JavaThread::lock_stack_top_offset()));
10425
10426 // Check if the lock-stack is full.
10427 cmpl(top, LockStack::end_offset());
10428 jcc(Assembler::greaterEqual, slow);
10429
10430 // Check for recursion.
10431 cmpptr(obj, Address(thread, top, Address::times_1, -oopSize));
10432 jcc(Assembler::equal, push);
10433
10434 // Check header for monitor (0b10).
10435 testptr(reg_rax, markWord::monitor_value);
10436 jcc(Assembler::notZero, slow);
10437
10438 // Try to lock. Transition lock bits 0b01 => 0b00
10439 movptr(tmp, reg_rax);
10440 andptr(tmp, ~(int32_t)markWord::unlocked_value);
10441 orptr(reg_rax, markWord::unlocked_value);
10442 // Mask inline_type bit such that we go to the slow path if object is an inline type
10443 andptr(reg_rax, ~((int) markWord::inline_type_bit_in_place));
10444
10445 lock(); cmpxchgptr(tmp, Address(obj, oopDesc::mark_offset_in_bytes()));
10446 jcc(Assembler::notEqual, slow);
10447
10448 // Restore top, CAS clobbers register.
10449 movl(top, Address(thread, JavaThread::lock_stack_top_offset()));
10450
10451 bind(push);
10452 // After successful lock, push object on lock-stack.
10453 movptr(Address(thread, top), obj);
10454 incrementl(top, oopSize);
10455 movl(Address(thread, JavaThread::lock_stack_top_offset()), top);
10456 }
10457
10458 // Implements lightweight-unlocking.
10459 //
10460 // obj: the object to be unlocked
10461 // reg_rax: rax
10462 // thread: the thread
10463 // tmp: a temporary register
10464 void MacroAssembler::lightweight_unlock(Register obj, Register reg_rax, Register tmp, Label& slow) {
|