11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "asm/assembler.hpp"
26 #include "asm/assembler.inline.hpp"
27 #include "code/aotCodeCache.hpp"
28 #include "code/compiledIC.hpp"
29 #include "compiler/compiler_globals.hpp"
30 #include "compiler/disassembler.hpp"
31 #include "crc32c.h"
32 #include "gc/shared/barrierSet.hpp"
33 #include "gc/shared/barrierSetAssembler.hpp"
34 #include "gc/shared/collectedHeap.inline.hpp"
35 #include "gc/shared/tlab_globals.hpp"
36 #include "interpreter/bytecodeHistogram.hpp"
37 #include "interpreter/interpreter.hpp"
38 #include "interpreter/interpreterRuntime.hpp"
39 #include "jvm.h"
40 #include "memory/resourceArea.hpp"
41 #include "memory/universe.hpp"
42 #include "oops/accessDecorators.hpp"
43 #include "oops/compressedKlass.inline.hpp"
44 #include "oops/compressedOops.inline.hpp"
45 #include "oops/klass.inline.hpp"
46 #include "prims/methodHandles.hpp"
47 #include "runtime/continuation.hpp"
48 #include "runtime/interfaceSupport.inline.hpp"
49 #include "runtime/javaThread.hpp"
50 #include "runtime/jniHandles.hpp"
51 #include "runtime/objectMonitor.hpp"
52 #include "runtime/os.hpp"
53 #include "runtime/safepoint.hpp"
54 #include "runtime/safepointMechanism.hpp"
55 #include "runtime/sharedRuntime.hpp"
56 #include "runtime/stubRoutines.hpp"
57 #include "utilities/checkedCast.hpp"
58 #include "utilities/globalDefinitions.hpp"
59 #include "utilities/macros.hpp"
60
61 #ifdef PRODUCT
62 #define BLOCK_COMMENT(str) /* nothing */
63 #define STOP(error) stop(error)
64 #else
65 #define BLOCK_COMMENT(str) block_comment(str)
66 #define STOP(error) block_comment(error); stop(error)
67 #endif
68
69 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
70
71 #ifdef ASSERT
72 bool AbstractAssembler::pd_check_instruction_mark() { return true; }
73 #endif
74
75 static const Assembler::Condition reverse[] = {
76 Assembler::noOverflow /* overflow = 0x0 */ ,
77 Assembler::overflow /* noOverflow = 0x1 */ ,
78 Assembler::aboveEqual /* carrySet = 0x2, below = 0x2 */ ,
79 Assembler::below /* aboveEqual = 0x3, carryClear = 0x3 */ ,
1285 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
1286 assert_different_registers(arg_0, c_rarg1, c_rarg2);
1287 assert_different_registers(arg_1, c_rarg2);
1288 pass_arg2(this, arg_2);
1289 pass_arg1(this, arg_1);
1290 pass_arg0(this, arg_0);
1291 call_VM_leaf(entry_point, 3);
1292 }
1293
1294 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) {
1295 assert_different_registers(arg_0, c_rarg1, c_rarg2, c_rarg3);
1296 assert_different_registers(arg_1, c_rarg2, c_rarg3);
1297 assert_different_registers(arg_2, c_rarg3);
1298 pass_arg3(this, arg_3);
1299 pass_arg2(this, arg_2);
1300 pass_arg1(this, arg_1);
1301 pass_arg0(this, arg_0);
1302 call_VM_leaf(entry_point, 3);
1303 }
1304
1305 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {
1306 pass_arg0(this, arg_0);
1307 MacroAssembler::call_VM_leaf_base(entry_point, 1);
1308 }
1309
1310 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
1311 assert_different_registers(arg_0, c_rarg1);
1312 pass_arg1(this, arg_1);
1313 pass_arg0(this, arg_0);
1314 MacroAssembler::call_VM_leaf_base(entry_point, 2);
1315 }
1316
1317 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
1318 assert_different_registers(arg_0, c_rarg1, c_rarg2);
1319 assert_different_registers(arg_1, c_rarg2);
1320 pass_arg2(this, arg_2);
1321 pass_arg1(this, arg_1);
1322 pass_arg0(this, arg_0);
1323 MacroAssembler::call_VM_leaf_base(entry_point, 3);
1324 }
2356 lea(rscratch, src);
2357 Assembler::mulss(dst, Address(rscratch, 0));
2358 }
2359 }
2360
2361 void MacroAssembler::null_check(Register reg, int offset) {
2362 if (needs_explicit_null_check(offset)) {
2363 // provoke OS null exception if reg is null by
2364 // accessing M[reg] w/o changing any (non-CC) registers
2365 // NOTE: cmpl is plenty here to provoke a segv
2366 cmpptr(rax, Address(reg, 0));
2367 // Note: should probably use testl(rax, Address(reg, 0));
2368 // may be shorter code (however, this version of
2369 // testl needs to be implemented first)
2370 } else {
2371 // nothing to do, (later) access of M[reg + offset]
2372 // will provoke OS null exception if reg is null
2373 }
2374 }
2375
2376 void MacroAssembler::os_breakpoint() {
2377 // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
2378 // (e.g., MSVC can't call ps() otherwise)
2379 call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
2380 }
2381
2382 void MacroAssembler::unimplemented(const char* what) {
2383 const char* buf = nullptr;
2384 {
2385 ResourceMark rm;
2386 stringStream ss;
2387 ss.print("unimplemented: %s", what);
2388 buf = code_string(ss.as_string());
2389 }
2390 stop(buf);
2391 }
2392
2393 #define XSTATE_BV 0x200
2394
2395 void MacroAssembler::pop_CPU_state() {
3493 }
3494
3495 // C++ bool manipulation
3496 void MacroAssembler::testbool(Register dst) {
3497 if(sizeof(bool) == 1)
3498 testb(dst, 0xff);
3499 else if(sizeof(bool) == 2) {
3500 // testw implementation needed for two byte bools
3501 ShouldNotReachHere();
3502 } else if(sizeof(bool) == 4)
3503 testl(dst, dst);
3504 else
3505 // unsupported
3506 ShouldNotReachHere();
3507 }
3508
3509 void MacroAssembler::testptr(Register dst, Register src) {
3510 testq(dst, src);
3511 }
3512
3513 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
3514 void MacroAssembler::tlab_allocate(Register obj,
3515 Register var_size_in_bytes,
3516 int con_size_in_bytes,
3517 Register t1,
3518 Register t2,
3519 Label& slow_case) {
3520 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
3521 bs->tlab_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case);
3522 }
3523
3524 RegSet MacroAssembler::call_clobbered_gp_registers() {
3525 RegSet regs;
3526 regs += RegSet::of(rax, rcx, rdx);
3527 #ifndef _WINDOWS
3528 regs += RegSet::of(rsi, rdi);
3529 #endif
3530 regs += RegSet::range(r8, r11);
3531 if (UseAPX) {
3532 regs += RegSet::range(r16, as_Register(Register::number_of_registers - 1));
3696 xorptr(temp, temp); // use _zero reg to clear memory (shorter code)
3697 if (UseIncDec) {
3698 shrptr(index, 3); // divide by 8/16 and set carry flag if bit 2 was set
3699 } else {
3700 shrptr(index, 2); // use 2 instructions to avoid partial flag stall
3701 shrptr(index, 1);
3702 }
3703
3704 // initialize remaining object fields: index is a multiple of 2 now
3705 {
3706 Label loop;
3707 bind(loop);
3708 movptr(Address(address, index, Address::times_8, offset_in_bytes - 1*BytesPerWord), temp);
3709 decrement(index);
3710 jcc(Assembler::notZero, loop);
3711 }
3712
3713 bind(done);
3714 }
3715
3716 // Look up the method for a megamorphic invokeinterface call.
3717 // The target method is determined by <intf_klass, itable_index>.
3718 // The receiver klass is in recv_klass.
3719 // On success, the result will be in method_result, and execution falls through.
3720 // On failure, execution transfers to the given label.
3721 void MacroAssembler::lookup_interface_method(Register recv_klass,
3722 Register intf_klass,
3723 RegisterOrConstant itable_index,
3724 Register method_result,
3725 Register scan_temp,
3726 Label& L_no_such_interface,
3727 bool return_method) {
3728 assert_different_registers(recv_klass, intf_klass, scan_temp);
3729 assert_different_registers(method_result, intf_klass, scan_temp);
3730 assert(recv_klass != method_result || !return_method,
3731 "recv_klass can be destroyed when method isn't needed");
3732
3733 assert(itable_index.is_constant() || itable_index.as_register() == method_result,
3734 "caller must use same register for non-constant itable index as for method");
3735
4746 } else {
4747 Label L;
4748 jccb(negate_condition(cc), L);
4749 movl(dst, src);
4750 bind(L);
4751 }
4752 }
4753
4754 void MacroAssembler::cmov32(Condition cc, Register dst, Register src) {
4755 if (VM_Version::supports_cmov()) {
4756 cmovl(cc, dst, src);
4757 } else {
4758 Label L;
4759 jccb(negate_condition(cc), L);
4760 movl(dst, src);
4761 bind(L);
4762 }
4763 }
4764
4765 void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) {
4766 if (!VerifyOops) return;
4767
4768 BLOCK_COMMENT("verify_oop {");
4769 push(rscratch1);
4770 push(rax); // save rax
4771 push(reg); // pass register argument
4772
4773 // Pass register number to verify_oop_subroutine
4774 const char* b = nullptr;
4775 {
4776 ResourceMark rm;
4777 stringStream ss;
4778 ss.print("verify_oop: %s: %s (%s:%d)", reg->name(), s, file, line);
4779 b = code_string(ss.as_string());
4780 }
4781 AddressLiteral buffer((address) b, external_word_Relocation::spec_for_immediate());
4782 pushptr(buffer.addr(), rscratch1);
4783
4784 // call indirectly to solve generation ordering problem
4785 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
4786 call(rax);
5002 // or something else. Since this is a slow path, we can optimize for code density,
5003 // and just restart the search from the beginning.
5004 jmpb(L_restart);
5005
5006 // Counter updates:
5007
5008 // Increment polymorphic counter instead of receiver slot.
5009 bind(L_polymorphic);
5010 movptr(offset, poly_count_offset);
5011 jmpb(L_count_update);
5012
5013 // Found a receiver, convert its slot offset to corresponding count offset.
5014 bind(L_found_recv);
5015 addptr(offset, receiver_to_count_step);
5016
5017 bind(L_count_update);
5018 addptr(Address(mdp, offset, Address::times_ptr), DataLayout::counter_increment);
5019 }
5020
5021 void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) {
5022 if (!VerifyOops) return;
5023
5024 push(rscratch1);
5025 push(rax); // save rax,
5026 // addr may contain rsp so we will have to adjust it based on the push
5027 // we just did (and on 64 bit we do two pushes)
5028 // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which
5029 // stores rax into addr which is backwards of what was intended.
5030 if (addr.uses(rsp)) {
5031 lea(rax, addr);
5032 pushptr(Address(rax, 2 * BytesPerWord));
5033 } else {
5034 pushptr(addr);
5035 }
5036
5037 // Pass register number to verify_oop_subroutine
5038 const char* b = nullptr;
5039 {
5040 ResourceMark rm;
5041 stringStream ss;
5042 ss.print("verify_oop_addr: %s (%s:%d)", s, file, line);
5396
5397 void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) {
5398 // get mirror
5399 const int mirror_offset = in_bytes(Klass::java_mirror_offset());
5400 load_method_holder(mirror, method);
5401 movptr(mirror, Address(mirror, mirror_offset));
5402 resolve_oop_handle(mirror, tmp);
5403 }
5404
5405 void MacroAssembler::load_method_holder_cld(Register rresult, Register rmethod) {
5406 load_method_holder(rresult, rmethod);
5407 movptr(rresult, Address(rresult, InstanceKlass::class_loader_data_offset()));
5408 }
5409
5410 void MacroAssembler::load_method_holder(Register holder, Register method) {
5411 movptr(holder, Address(method, Method::const_offset())); // ConstMethod*
5412 movptr(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool*
5413 movptr(holder, Address(holder, ConstantPool::pool_holder_offset())); // InstanceKlass*
5414 }
5415
5416 void MacroAssembler::load_narrow_klass_compact(Register dst, Register src) {
5417 assert(UseCompactObjectHeaders, "expect compact object headers");
5418 movq(dst, Address(src, oopDesc::mark_offset_in_bytes()));
5419 shrq(dst, markWord::klass_shift);
5420 }
5421
5422 void MacroAssembler::load_klass(Register dst, Register src, Register tmp) {
5423 assert_different_registers(src, tmp);
5424 assert_different_registers(dst, tmp);
5425
5426 if (UseCompactObjectHeaders) {
5427 load_narrow_klass_compact(dst, src);
5428 decode_klass_not_null(dst, tmp);
5429 } else {
5430 movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
5431 decode_klass_not_null(dst, tmp);
5432 }
5433 }
5434
5435 void MacroAssembler::store_klass(Register dst, Register src, Register tmp) {
5436 assert(!UseCompactObjectHeaders, "not with compact headers");
5437 assert_different_registers(src, tmp);
5438 assert_different_registers(dst, tmp);
5439 encode_klass_not_null(src, tmp);
5440 movl(Address(dst, oopDesc::klass_offset_in_bytes()), src);
5441 }
5442
5443 void MacroAssembler::cmp_klass(Register klass, Register obj, Register tmp) {
5444 if (UseCompactObjectHeaders) {
5445 assert(tmp != noreg, "need tmp");
5446 assert_different_registers(klass, obj, tmp);
5447 load_narrow_klass_compact(tmp, obj);
5448 cmpl(klass, tmp);
5449 } else {
5450 cmpl(klass, Address(obj, oopDesc::klass_offset_in_bytes()));
5451 }
5452 }
5453
5454 void MacroAssembler::cmp_klasses_from_objects(Register obj1, Register obj2, Register tmp1, Register tmp2) {
5471 bool as_raw = (decorators & AS_RAW) != 0;
5472 if (as_raw) {
5473 bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1);
5474 } else {
5475 bs->load_at(this, decorators, type, dst, src, tmp1);
5476 }
5477 }
5478
5479 void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register val,
5480 Register tmp1, Register tmp2, Register tmp3) {
5481 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
5482 decorators = AccessInternal::decorator_fixup(decorators, type);
5483 bool as_raw = (decorators & AS_RAW) != 0;
5484 if (as_raw) {
5485 bs->BarrierSetAssembler::store_at(this, decorators, type, dst, val, tmp1, tmp2, tmp3);
5486 } else {
5487 bs->store_at(this, decorators, type, dst, val, tmp1, tmp2, tmp3);
5488 }
5489 }
5490
5491 void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, DecoratorSet decorators) {
5492 access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1);
5493 }
5494
5495 // Doesn't do verification, generates fixed size code
5496 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1, DecoratorSet decorators) {
5497 access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL | decorators, dst, src, tmp1);
5498 }
5499
5500 void MacroAssembler::store_heap_oop(Address dst, Register val, Register tmp1,
5501 Register tmp2, Register tmp3, DecoratorSet decorators) {
5502 access_store_at(T_OBJECT, IN_HEAP | decorators, dst, val, tmp1, tmp2, tmp3);
5503 }
5504
5505 // Used for storing nulls.
5506 void MacroAssembler::store_heap_oop_null(Address dst) {
5507 access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg, noreg);
5508 }
5509
5510 void MacroAssembler::store_klass_gap(Register dst, Register src) {
5833 assert (oop_recorder() != nullptr, "this assembler needs an OopRecorder");
5834 int klass_index = oop_recorder()->find_index(k);
5835 RelocationHolder rspec = metadata_Relocation::spec(klass_index);
5836 Assembler::cmp_narrow_oop(dst, CompressedKlassPointers::encode(k), rspec);
5837 }
5838
5839 void MacroAssembler::reinit_heapbase() {
5840 if (UseCompressedOops) {
5841 if (Universe::heap() != nullptr && !AOTCodeCache::is_on_for_dump()) {
5842 if (CompressedOops::base() == nullptr) {
5843 MacroAssembler::xorptr(r12_heapbase, r12_heapbase);
5844 } else {
5845 mov64(r12_heapbase, (int64_t)CompressedOops::base());
5846 }
5847 } else {
5848 movptr(r12_heapbase, ExternalAddress(CompressedOops::base_addr()));
5849 }
5850 }
5851 }
5852
5853 #if COMPILER2_OR_JVMCI
5854
5855 // clear memory of size 'cnt' qwords, starting at 'base' using XMM/YMM/ZMM registers
5856 void MacroAssembler::xmm_clear_mem(Register base, Register cnt, Register rtmp, XMMRegister xtmp, KRegister mask) {
5857 // cnt - number of qwords (8-byte words).
5858 // base - start address, qword aligned.
5859 Label L_zero_64_bytes, L_loop, L_sloop, L_tail, L_end;
5860 bool use64byteVector = (MaxVectorSize == 64) && (CopyAVX3Threshold == 0);
5861 if (use64byteVector) {
5862 vpxor(xtmp, xtmp, xtmp, AVX_512bit);
5863 } else if (MaxVectorSize >= 32) {
5864 vpxor(xtmp, xtmp, xtmp, AVX_256bit);
5865 } else {
5866 pxor(xtmp, xtmp);
5867 }
5868 jmp(L_zero_64_bytes);
5869
5870 BIND(L_loop);
5871 if (MaxVectorSize >= 32) {
5872 fill64(base, 0, xtmp, use64byteVector);
5873 } else {
5874 movdqu(Address(base, 0), xtmp);
5875 movdqu(Address(base, 16), xtmp);
5876 movdqu(Address(base, 32), xtmp);
5877 movdqu(Address(base, 48), xtmp);
5878 }
5879 addptr(base, 64);
5880
5881 BIND(L_zero_64_bytes);
5882 subptr(cnt, 8);
5883 jccb(Assembler::greaterEqual, L_loop);
5884
5885 // Copy trailing 64 bytes
5886 if (use64byteVector) {
5887 addptr(cnt, 8);
5888 jccb(Assembler::equal, L_end);
5889 fill64_masked(3, base, 0, xtmp, mask, cnt, rtmp, true);
5890 jmp(L_end);
5891 } else {
5892 addptr(cnt, 4);
5893 jccb(Assembler::less, L_tail);
5894 if (MaxVectorSize >= 32) {
5895 vmovdqu(Address(base, 0), xtmp);
5896 } else {
5897 movdqu(Address(base, 0), xtmp);
5898 movdqu(Address(base, 16), xtmp);
5899 }
5900 }
5901 addptr(base, 32);
5902 subptr(cnt, 4);
5903
5904 BIND(L_tail);
5905 addptr(cnt, 4);
5906 jccb(Assembler::lessEqual, L_end);
5907 if (UseAVX > 2 && MaxVectorSize >= 32 && VM_Version::supports_avx512vl()) {
5908 fill32_masked(3, base, 0, xtmp, mask, cnt, rtmp);
5909 } else {
5910 decrement(cnt);
5911
5912 BIND(L_sloop);
5913 movq(Address(base, 0), xtmp);
5914 addptr(base, 8);
5915 decrement(cnt);
5916 jccb(Assembler::greaterEqual, L_sloop);
5917 }
5918 BIND(L_end);
5919 }
5920
5921 // Clearing constant sized memory using YMM/ZMM registers.
5922 void MacroAssembler::clear_mem(Register base, int cnt, Register rtmp, XMMRegister xtmp, KRegister mask) {
5923 assert(UseAVX > 2 && VM_Version::supports_avx512vl(), "");
5924 bool use64byteVector = (MaxVectorSize > 32) && (CopyAVX3Threshold == 0);
5925
5926 int vector64_count = (cnt & (~0x7)) >> 3;
5927 cnt = cnt & 0x7;
5928 const int fill64_per_loop = 4;
5990 break;
5991 case 7:
5992 if (use64byteVector) {
5993 movl(rtmp, 0x7F);
5994 kmovwl(mask, rtmp);
5995 evmovdqu(T_LONG, mask, Address(base, disp), xtmp, true, Assembler::AVX_512bit);
5996 } else {
5997 evmovdqu(T_LONG, k0, Address(base, disp), xtmp, false, Assembler::AVX_256bit);
5998 movl(rtmp, 0x7);
5999 kmovwl(mask, rtmp);
6000 evmovdqu(T_LONG, mask, Address(base, disp + 32), xtmp, true, Assembler::AVX_256bit);
6001 }
6002 break;
6003 default:
6004 fatal("Unexpected length : %d\n",cnt);
6005 break;
6006 }
6007 }
6008 }
6009
6010 void MacroAssembler::clear_mem(Register base, Register cnt, Register tmp, XMMRegister xtmp,
6011 bool is_large, KRegister mask) {
6012 // cnt - number of qwords (8-byte words).
6013 // base - start address, qword aligned.
6014 // is_large - if optimizers know cnt is larger than InitArrayShortSize
6015 assert(base==rdi, "base register must be edi for rep stos");
6016 assert(tmp==rax, "tmp register must be eax for rep stos");
6017 assert(cnt==rcx, "cnt register must be ecx for rep stos");
6018 assert(InitArrayShortSize % BytesPerLong == 0,
6019 "InitArrayShortSize should be the multiple of BytesPerLong");
6020
6021 Label DONE;
6022 if (!is_large || !UseXMMForObjInit) {
6023 xorptr(tmp, tmp);
6024 }
6025
6026 if (!is_large) {
6027 Label LOOP, LONG;
6028 cmpptr(cnt, InitArrayShortSize/BytesPerLong);
6029 jccb(Assembler::greater, LONG);
6030
6031 decrement(cnt);
6032 jccb(Assembler::negative, DONE); // Zero length
6033
6034 // Use individual pointer-sized stores for small counts:
6035 BIND(LOOP);
6036 movptr(Address(base, cnt, Address::times_ptr), tmp);
6037 decrement(cnt);
6038 jccb(Assembler::greaterEqual, LOOP);
6039 jmpb(DONE);
6040
6041 BIND(LONG);
6042 }
6043
6044 // Use longer rep-prefixed ops for non-small counts:
6045 if (UseFastStosb) {
6046 shlptr(cnt, 3); // convert to number of bytes
6047 rep_stosb();
6048 } else if (UseXMMForObjInit) {
6049 xmm_clear_mem(base, cnt, tmp, xtmp, mask);
6050 } else {
6051 rep_stos();
6052 }
6053
6054 BIND(DONE);
6055 }
6056
6057 #endif //COMPILER2_OR_JVMCI
6058
6059
6060 void MacroAssembler::generate_fill(BasicType t, bool aligned,
6061 Register to, Register value, Register count,
6062 Register rtmp, XMMRegister xtmp) {
6063 ShortBranchVerifier sbv(this);
6064 assert_different_registers(to, value, count, rtmp);
6065 Label L_exit;
6066 Label L_fill_2_bytes, L_fill_4_bytes;
6067
6068 #if defined(COMPILER2)
6069 if(MaxVectorSize >=32 &&
9948
9949 // Load top.
9950 movl(top, Address(thread, JavaThread::lock_stack_top_offset()));
9951
9952 // Check if the lock-stack is full.
9953 cmpl(top, LockStack::end_offset());
9954 jcc(Assembler::greaterEqual, slow);
9955
9956 // Check for recursion.
9957 cmpptr(obj, Address(thread, top, Address::times_1, -oopSize));
9958 jcc(Assembler::equal, push);
9959
9960 // Check header for monitor (0b10).
9961 testptr(reg_rax, markWord::monitor_value);
9962 jcc(Assembler::notZero, slow);
9963
9964 // Try to lock. Transition lock bits 0b01 => 0b00
9965 movptr(tmp, reg_rax);
9966 andptr(tmp, ~(int32_t)markWord::unlocked_value);
9967 orptr(reg_rax, markWord::unlocked_value);
9968 lock(); cmpxchgptr(tmp, Address(obj, oopDesc::mark_offset_in_bytes()));
9969 jcc(Assembler::notEqual, slow);
9970
9971 // Restore top, CAS clobbers register.
9972 movl(top, Address(thread, JavaThread::lock_stack_top_offset()));
9973
9974 bind(push);
9975 // After successful lock, push object on lock-stack.
9976 movptr(Address(thread, top), obj);
9977 incrementl(top, oopSize);
9978 movl(Address(thread, JavaThread::lock_stack_top_offset()), top);
9979 }
9980
9981 // Implements fast-unlocking.
9982 //
9983 // obj: the object to be unlocked
9984 // reg_rax: rax
9985 // thread: the thread
9986 // tmp: a temporary register
9987 void MacroAssembler::fast_unlock(Register obj, Register reg_rax, Register tmp, Label& slow) {
|
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "asm/assembler.hpp"
26 #include "asm/assembler.inline.hpp"
27 #include "code/aotCodeCache.hpp"
28 #include "code/compiledIC.hpp"
29 #include "compiler/compiler_globals.hpp"
30 #include "compiler/disassembler.hpp"
31 #include "ci/ciInlineKlass.hpp"
32 #include "crc32c.h"
33 #include "gc/shared/barrierSet.hpp"
34 #include "gc/shared/barrierSetAssembler.hpp"
35 #include "gc/shared/collectedHeap.inline.hpp"
36 #include "gc/shared/tlab_globals.hpp"
37 #include "interpreter/bytecodeHistogram.hpp"
38 #include "interpreter/interpreter.hpp"
39 #include "interpreter/interpreterRuntime.hpp"
40 #include "jvm.h"
41 #include "memory/resourceArea.hpp"
42 #include "memory/universe.hpp"
43 #include "oops/accessDecorators.hpp"
44 #include "oops/compressedKlass.inline.hpp"
45 #include "oops/compressedOops.inline.hpp"
46 #include "oops/klass.inline.hpp"
47 #include "oops/resolvedFieldEntry.hpp"
48 #include "prims/methodHandles.hpp"
49 #include "runtime/arguments.hpp"
50 #include "runtime/continuation.hpp"
51 #include "runtime/interfaceSupport.inline.hpp"
52 #include "runtime/javaThread.hpp"
53 #include "runtime/jniHandles.hpp"
54 #include "runtime/objectMonitor.hpp"
55 #include "runtime/os.hpp"
56 #include "runtime/safepoint.hpp"
57 #include "runtime/safepointMechanism.hpp"
58 #include "runtime/sharedRuntime.hpp"
59 #include "runtime/signature_cc.hpp"
60 #include "runtime/stubRoutines.hpp"
61 #include "utilities/checkedCast.hpp"
62 #include "utilities/globalDefinitions.hpp"
63 #include "utilities/macros.hpp"
64 #include "vmreg_x86.inline.hpp"
65 #ifdef COMPILER2
66 #include "opto/output.hpp"
67 #endif
68
69 #ifdef PRODUCT
70 #define BLOCK_COMMENT(str) /* nothing */
71 #define STOP(error) stop(error)
72 #else
73 #define BLOCK_COMMENT(str) block_comment(str)
74 #define STOP(error) block_comment(error); stop(error)
75 #endif
76
77 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
78
79 #ifdef ASSERT
80 bool AbstractAssembler::pd_check_instruction_mark() { return true; }
81 #endif
82
83 static const Assembler::Condition reverse[] = {
84 Assembler::noOverflow /* overflow = 0x0 */ ,
85 Assembler::overflow /* noOverflow = 0x1 */ ,
86 Assembler::aboveEqual /* carrySet = 0x2, below = 0x2 */ ,
87 Assembler::below /* aboveEqual = 0x3, carryClear = 0x3 */ ,
1293 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
1294 assert_different_registers(arg_0, c_rarg1, c_rarg2);
1295 assert_different_registers(arg_1, c_rarg2);
1296 pass_arg2(this, arg_2);
1297 pass_arg1(this, arg_1);
1298 pass_arg0(this, arg_0);
1299 call_VM_leaf(entry_point, 3);
1300 }
1301
1302 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) {
1303 assert_different_registers(arg_0, c_rarg1, c_rarg2, c_rarg3);
1304 assert_different_registers(arg_1, c_rarg2, c_rarg3);
1305 assert_different_registers(arg_2, c_rarg3);
1306 pass_arg3(this, arg_3);
1307 pass_arg2(this, arg_2);
1308 pass_arg1(this, arg_1);
1309 pass_arg0(this, arg_0);
1310 call_VM_leaf(entry_point, 3);
1311 }
1312
1313 void MacroAssembler::super_call_VM_leaf(address entry_point) {
1314 MacroAssembler::call_VM_leaf_base(entry_point, 1);
1315 }
1316
1317 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {
1318 pass_arg0(this, arg_0);
1319 MacroAssembler::call_VM_leaf_base(entry_point, 1);
1320 }
1321
1322 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
1323 assert_different_registers(arg_0, c_rarg1);
1324 pass_arg1(this, arg_1);
1325 pass_arg0(this, arg_0);
1326 MacroAssembler::call_VM_leaf_base(entry_point, 2);
1327 }
1328
1329 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
1330 assert_different_registers(arg_0, c_rarg1, c_rarg2);
1331 assert_different_registers(arg_1, c_rarg2);
1332 pass_arg2(this, arg_2);
1333 pass_arg1(this, arg_1);
1334 pass_arg0(this, arg_0);
1335 MacroAssembler::call_VM_leaf_base(entry_point, 3);
1336 }
2368 lea(rscratch, src);
2369 Assembler::mulss(dst, Address(rscratch, 0));
2370 }
2371 }
2372
2373 void MacroAssembler::null_check(Register reg, int offset) {
2374 if (needs_explicit_null_check(offset)) {
2375 // provoke OS null exception if reg is null by
2376 // accessing M[reg] w/o changing any (non-CC) registers
2377 // NOTE: cmpl is plenty here to provoke a segv
2378 cmpptr(rax, Address(reg, 0));
2379 // Note: should probably use testl(rax, Address(reg, 0));
2380 // may be shorter code (however, this version of
2381 // testl needs to be implemented first)
2382 } else {
2383 // nothing to do, (later) access of M[reg + offset]
2384 // will provoke OS null exception if reg is null
2385 }
2386 }
2387
2388 void MacroAssembler::test_markword_is_inline_type(Register markword, Label& is_inline_type) {
2389 andptr(markword, markWord::inline_type_pattern_mask);
2390 cmpptr(markword, markWord::inline_type_pattern);
2391 jcc(Assembler::equal, is_inline_type);
2392 }
2393
2394 void MacroAssembler::test_oop_is_not_inline_type(Register object, Register tmp, Label& not_inline_type, bool can_be_null) {
2395 if (can_be_null) {
2396 testptr(object, object);
2397 jcc(Assembler::zero, not_inline_type);
2398 }
2399 const int is_inline_type_mask = markWord::inline_type_pattern;
2400 movptr(tmp, Address(object, oopDesc::mark_offset_in_bytes()));
2401 andptr(tmp, is_inline_type_mask);
2402 cmpptr(tmp, is_inline_type_mask);
2403 jcc(Assembler::notEqual, not_inline_type);
2404 }
2405
2406 void MacroAssembler::test_field_is_null_free_inline_type(Register flags, Register temp_reg, Label& is_null_free_inline_type) {
2407 movl(temp_reg, flags);
2408 testl(temp_reg, 1 << ResolvedFieldEntry::is_null_free_inline_type_shift);
2409 jcc(Assembler::notEqual, is_null_free_inline_type);
2410 }
2411
2412 void MacroAssembler::test_field_is_not_null_free_inline_type(Register flags, Register temp_reg, Label& not_null_free_inline_type) {
2413 movl(temp_reg, flags);
2414 testl(temp_reg, 1 << ResolvedFieldEntry::is_null_free_inline_type_shift);
2415 jcc(Assembler::equal, not_null_free_inline_type);
2416 }
2417
2418 void MacroAssembler::test_field_is_flat(Register flags, Register temp_reg, Label& is_flat) {
2419 movl(temp_reg, flags);
2420 testl(temp_reg, 1 << ResolvedFieldEntry::is_flat_shift);
2421 jcc(Assembler::notEqual, is_flat);
2422 }
2423
2424 void MacroAssembler::test_field_has_null_marker(Register flags, Register temp_reg, Label& has_null_marker) {
2425 movl(temp_reg, flags);
2426 testl(temp_reg, 1 << ResolvedFieldEntry::has_null_marker_shift);
2427 jcc(Assembler::notEqual, has_null_marker);
2428 }
2429
2430 void MacroAssembler::test_oop_prototype_bit(Register oop, Register temp_reg, int32_t test_bit, bool jmp_set, Label& jmp_label) {
2431 Label test_mark_word;
2432 // load mark word
2433 movptr(temp_reg, Address(oop, oopDesc::mark_offset_in_bytes()));
2434 // check displaced
2435 testl(temp_reg, markWord::unlocked_value);
2436 jccb(Assembler::notZero, test_mark_word);
2437 // slow path use klass prototype
2438 push(rscratch1);
2439 load_prototype_header(temp_reg, oop, rscratch1);
2440 pop(rscratch1);
2441
2442 bind(test_mark_word);
2443 testl(temp_reg, test_bit);
2444 jcc((jmp_set) ? Assembler::notZero : Assembler::zero, jmp_label);
2445 }
2446
2447 void MacroAssembler::test_flat_array_oop(Register oop, Register temp_reg,
2448 Label& is_flat_array) {
2449 #ifdef _LP64
2450 test_oop_prototype_bit(oop, temp_reg, markWord::flat_array_bit_in_place, true, is_flat_array);
2451 #else
2452 load_klass(temp_reg, oop, noreg);
2453 movl(temp_reg, Address(temp_reg, Klass::layout_helper_offset()));
2454 test_flat_array_layout(temp_reg, is_flat_array);
2455 #endif
2456 }
2457
2458 void MacroAssembler::test_non_flat_array_oop(Register oop, Register temp_reg,
2459 Label& is_non_flat_array) {
2460 #ifdef _LP64
2461 test_oop_prototype_bit(oop, temp_reg, markWord::flat_array_bit_in_place, false, is_non_flat_array);
2462 #else
2463 load_klass(temp_reg, oop, noreg);
2464 movl(temp_reg, Address(temp_reg, Klass::layout_helper_offset()));
2465 test_non_flat_array_layout(temp_reg, is_non_flat_array);
2466 #endif
2467 }
2468
2469 void MacroAssembler::test_null_free_array_oop(Register oop, Register temp_reg, Label&is_null_free_array) {
2470 #ifdef _LP64
2471 test_oop_prototype_bit(oop, temp_reg, markWord::null_free_array_bit_in_place, true, is_null_free_array);
2472 #else
2473 Unimplemented();
2474 #endif
2475 }
2476
2477 void MacroAssembler::test_non_null_free_array_oop(Register oop, Register temp_reg, Label&is_non_null_free_array) {
2478 #ifdef _LP64
2479 test_oop_prototype_bit(oop, temp_reg, markWord::null_free_array_bit_in_place, false, is_non_null_free_array);
2480 #else
2481 Unimplemented();
2482 #endif
2483 }
2484
2485 void MacroAssembler::test_flat_array_layout(Register lh, Label& is_flat_array) {
2486 testl(lh, Klass::_lh_array_tag_flat_value_bit_inplace);
2487 jcc(Assembler::notZero, is_flat_array);
2488 }
2489
2490 void MacroAssembler::test_non_flat_array_layout(Register lh, Label& is_non_flat_array) {
2491 testl(lh, Klass::_lh_array_tag_flat_value_bit_inplace);
2492 jcc(Assembler::zero, is_non_flat_array);
2493 }
2494
2495 void MacroAssembler::os_breakpoint() {
2496 // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
2497 // (e.g., MSVC can't call ps() otherwise)
2498 call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
2499 }
2500
2501 void MacroAssembler::unimplemented(const char* what) {
2502 const char* buf = nullptr;
2503 {
2504 ResourceMark rm;
2505 stringStream ss;
2506 ss.print("unimplemented: %s", what);
2507 buf = code_string(ss.as_string());
2508 }
2509 stop(buf);
2510 }
2511
2512 #define XSTATE_BV 0x200
2513
2514 void MacroAssembler::pop_CPU_state() {
3612 }
3613
3614 // C++ bool manipulation
3615 void MacroAssembler::testbool(Register dst) {
3616 if(sizeof(bool) == 1)
3617 testb(dst, 0xff);
3618 else if(sizeof(bool) == 2) {
3619 // testw implementation needed for two byte bools
3620 ShouldNotReachHere();
3621 } else if(sizeof(bool) == 4)
3622 testl(dst, dst);
3623 else
3624 // unsupported
3625 ShouldNotReachHere();
3626 }
3627
3628 void MacroAssembler::testptr(Register dst, Register src) {
3629 testq(dst, src);
3630 }
3631
3632 // Object / value buffer allocation...
3633 //
3634 // Kills klass and rsi on LP64
3635 void MacroAssembler::allocate_instance(Register klass, Register new_obj,
3636 Register t1, Register t2,
3637 bool clear_fields, Label& alloc_failed)
3638 {
3639 Label done, initialize_header, initialize_object, slow_case, slow_case_no_pop;
3640 Register layout_size = t1;
3641 assert(new_obj == rax, "needs to be rax");
3642 assert_different_registers(klass, new_obj, t1, t2);
3643
3644 // get instance_size in InstanceKlass (scaled to a count of bytes)
3645 movl(layout_size, Address(klass, Klass::layout_helper_offset()));
3646 // test to see if it is malformed in some way
3647 testl(layout_size, Klass::_lh_instance_slow_path_bit);
3648 jcc(Assembler::notZero, slow_case_no_pop);
3649
3650 // Allocate the instance:
3651 // If TLAB is enabled:
3652 // Try to allocate in the TLAB.
3653 // If fails, go to the slow path.
3654 // Else If inline contiguous allocations are enabled:
3655 // Try to allocate in eden.
3656 // If fails due to heap end, go to slow path.
3657 //
3658 // If TLAB is enabled OR inline contiguous is enabled:
3659 // Initialize the allocation.
3660 // Exit.
3661 //
3662 // Go to slow path.
3663
3664 push(klass);
3665 if (UseTLAB) {
3666 tlab_allocate(new_obj, layout_size, 0, klass, t2, slow_case);
3667 if (ZeroTLAB || (!clear_fields)) {
3668 // the fields have been already cleared
3669 jmp(initialize_header);
3670 } else {
3671 // initialize both the header and fields
3672 jmp(initialize_object);
3673 }
3674 } else {
3675 jmp(slow_case);
3676 }
3677
3678 // If UseTLAB is true, the object is created above and there is an initialize need.
3679 // Otherwise, skip and go to the slow path.
3680 if (UseTLAB) {
3681 if (clear_fields) {
3682 // The object is initialized before the header. If the object size is
3683 // zero, go directly to the header initialization.
3684 bind(initialize_object);
3685 if (UseCompactObjectHeaders) {
3686 assert(is_aligned(oopDesc::base_offset_in_bytes(), BytesPerLong), "oop base offset must be 8-byte-aligned");
3687 decrement(layout_size, oopDesc::base_offset_in_bytes());
3688 } else {
3689 decrement(layout_size, sizeof(oopDesc));
3690 }
3691 jcc(Assembler::zero, initialize_header);
3692
3693 // Initialize topmost object field, divide size by 8, check if odd and
3694 // test if zero.
3695 Register zero = klass;
3696 xorl(zero, zero); // use zero reg to clear memory (shorter code)
3697 shrl(layout_size, LogBytesPerLong); // divide by 2*oopSize and set carry flag if odd
3698
3699 #ifdef ASSERT
3700 // make sure instance_size was multiple of 8
3701 Label L;
3702 // Ignore partial flag stall after shrl() since it is debug VM
3703 jcc(Assembler::carryClear, L);
3704 stop("object size is not multiple of 2 - adjust this code");
3705 bind(L);
3706 // must be > 0, no extra check needed here
3707 #endif
3708
3709 // initialize remaining object fields: instance_size was a multiple of 8
3710 {
3711 Label loop;
3712 bind(loop);
3713 int header_size_bytes = oopDesc::header_size() * HeapWordSize;
3714 assert(is_aligned(header_size_bytes, BytesPerLong), "oop header size must be 8-byte-aligned");
3715 movptr(Address(new_obj, layout_size, Address::times_8, header_size_bytes - 1*oopSize), zero);
3716 decrement(layout_size);
3717 jcc(Assembler::notZero, loop);
3718 }
3719 } // clear_fields
3720
3721 // initialize object header only.
3722 bind(initialize_header);
3723 if (UseCompactObjectHeaders || Arguments::is_valhalla_enabled()) {
3724 pop(klass);
3725 Register mark_word = t2;
3726 movptr(mark_word, Address(klass, Klass::prototype_header_offset()));
3727 movptr(Address(new_obj, oopDesc::mark_offset_in_bytes ()), mark_word);
3728 } else {
3729 movptr(Address(new_obj, oopDesc::mark_offset_in_bytes()),
3730 (intptr_t)markWord::prototype().value()); // header
3731 pop(klass); // get saved klass back in the register.
3732 }
3733 if (!UseCompactObjectHeaders) {
3734 xorl(rsi, rsi); // use zero reg to clear memory (shorter code)
3735 store_klass_gap(new_obj, rsi); // zero klass gap for compressed oops
3736 movptr(t2, klass); // preserve klass
3737 store_klass(new_obj, t2, rscratch1); // src klass reg is potentially compressed
3738 }
3739 jmp(done);
3740 }
3741
3742 bind(slow_case);
3743 pop(klass);
3744 bind(slow_case_no_pop);
3745 jmp(alloc_failed);
3746
3747 bind(done);
3748 }
3749
3750 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
3751 void MacroAssembler::tlab_allocate(Register obj,
3752 Register var_size_in_bytes,
3753 int con_size_in_bytes,
3754 Register t1,
3755 Register t2,
3756 Label& slow_case) {
3757 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
3758 bs->tlab_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case);
3759 }
3760
3761 RegSet MacroAssembler::call_clobbered_gp_registers() {
3762 RegSet regs;
3763 regs += RegSet::of(rax, rcx, rdx);
3764 #ifndef _WINDOWS
3765 regs += RegSet::of(rsi, rdi);
3766 #endif
3767 regs += RegSet::range(r8, r11);
3768 if (UseAPX) {
3769 regs += RegSet::range(r16, as_Register(Register::number_of_registers - 1));
3933 xorptr(temp, temp); // use _zero reg to clear memory (shorter code)
3934 if (UseIncDec) {
3935 shrptr(index, 3); // divide by 8/16 and set carry flag if bit 2 was set
3936 } else {
3937 shrptr(index, 2); // use 2 instructions to avoid partial flag stall
3938 shrptr(index, 1);
3939 }
3940
3941 // initialize remaining object fields: index is a multiple of 2 now
3942 {
3943 Label loop;
3944 bind(loop);
3945 movptr(Address(address, index, Address::times_8, offset_in_bytes - 1*BytesPerWord), temp);
3946 decrement(index);
3947 jcc(Assembler::notZero, loop);
3948 }
3949
3950 bind(done);
3951 }
3952
3953 void MacroAssembler::inline_layout_info(Register holder_klass, Register index, Register layout_info) {
3954 movptr(layout_info, Address(holder_klass, InstanceKlass::inline_layout_info_array_offset()));
3955 #ifdef ASSERT
3956 {
3957 Label done;
3958 cmpptr(layout_info, 0);
3959 jcc(Assembler::notEqual, done);
3960 stop("inline_layout_info_array is null");
3961 bind(done);
3962 }
3963 #endif
3964
3965 InlineLayoutInfo array[2];
3966 int size = (char*)&array[1] - (char*)&array[0]; // computing size of array elements
3967 if (is_power_of_2(size)) {
3968 shll(index, log2i_exact(size)); // Scale index by power of 2
3969 } else {
3970 imull(index, index, size); // Scale the index to be the entry index * array_element_size
3971 }
3972 lea(layout_info, Address(layout_info, index, Address::times_1, Array<InlineLayoutInfo>::base_offset_in_bytes()));
3973 }
3974
3975 // Look up the method for a megamorphic invokeinterface call.
3976 // The target method is determined by <intf_klass, itable_index>.
3977 // The receiver klass is in recv_klass.
3978 // On success, the result will be in method_result, and execution falls through.
3979 // On failure, execution transfers to the given label.
3980 void MacroAssembler::lookup_interface_method(Register recv_klass,
3981 Register intf_klass,
3982 RegisterOrConstant itable_index,
3983 Register method_result,
3984 Register scan_temp,
3985 Label& L_no_such_interface,
3986 bool return_method) {
3987 assert_different_registers(recv_klass, intf_klass, scan_temp);
3988 assert_different_registers(method_result, intf_klass, scan_temp);
3989 assert(recv_klass != method_result || !return_method,
3990 "recv_klass can be destroyed when method isn't needed");
3991
3992 assert(itable_index.is_constant() || itable_index.as_register() == method_result,
3993 "caller must use same register for non-constant itable index as for method");
3994
5005 } else {
5006 Label L;
5007 jccb(negate_condition(cc), L);
5008 movl(dst, src);
5009 bind(L);
5010 }
5011 }
5012
5013 void MacroAssembler::cmov32(Condition cc, Register dst, Register src) {
5014 if (VM_Version::supports_cmov()) {
5015 cmovl(cc, dst, src);
5016 } else {
5017 Label L;
5018 jccb(negate_condition(cc), L);
5019 movl(dst, src);
5020 bind(L);
5021 }
5022 }
5023
5024 void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) {
5025 if (!VerifyOops || VerifyAdapterSharing) {
5026 // Below address of the code string confuses VerifyAdapterSharing
5027 // because it may differ between otherwise equivalent adapters.
5028 return;
5029 }
5030
5031 BLOCK_COMMENT("verify_oop {");
5032 push(rscratch1);
5033 push(rax); // save rax
5034 push(reg); // pass register argument
5035
5036 // Pass register number to verify_oop_subroutine
5037 const char* b = nullptr;
5038 {
5039 ResourceMark rm;
5040 stringStream ss;
5041 ss.print("verify_oop: %s: %s (%s:%d)", reg->name(), s, file, line);
5042 b = code_string(ss.as_string());
5043 }
5044 AddressLiteral buffer((address) b, external_word_Relocation::spec_for_immediate());
5045 pushptr(buffer.addr(), rscratch1);
5046
5047 // call indirectly to solve generation ordering problem
5048 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
5049 call(rax);
5265 // or something else. Since this is a slow path, we can optimize for code density,
5266 // and just restart the search from the beginning.
5267 jmpb(L_restart);
5268
5269 // Counter updates:
5270
5271 // Increment polymorphic counter instead of receiver slot.
5272 bind(L_polymorphic);
5273 movptr(offset, poly_count_offset);
5274 jmpb(L_count_update);
5275
5276 // Found a receiver, convert its slot offset to corresponding count offset.
5277 bind(L_found_recv);
5278 addptr(offset, receiver_to_count_step);
5279
5280 bind(L_count_update);
5281 addptr(Address(mdp, offset, Address::times_ptr), DataLayout::counter_increment);
5282 }
5283
5284 void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) {
5285 if (!VerifyOops || VerifyAdapterSharing) {
5286 // Below address of the code string confuses VerifyAdapterSharing
5287 // because it may differ between otherwise equivalent adapters.
5288 return;
5289 }
5290
5291 push(rscratch1);
5292 push(rax); // save rax,
5293 // addr may contain rsp so we will have to adjust it based on the push
5294 // we just did (and on 64 bit we do two pushes)
5295 // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which
5296 // stores rax into addr which is backwards of what was intended.
5297 if (addr.uses(rsp)) {
5298 lea(rax, addr);
5299 pushptr(Address(rax, 2 * BytesPerWord));
5300 } else {
5301 pushptr(addr);
5302 }
5303
5304 // Pass register number to verify_oop_subroutine
5305 const char* b = nullptr;
5306 {
5307 ResourceMark rm;
5308 stringStream ss;
5309 ss.print("verify_oop_addr: %s (%s:%d)", s, file, line);
5663
5664 void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) {
5665 // get mirror
5666 const int mirror_offset = in_bytes(Klass::java_mirror_offset());
5667 load_method_holder(mirror, method);
5668 movptr(mirror, Address(mirror, mirror_offset));
5669 resolve_oop_handle(mirror, tmp);
5670 }
5671
5672 void MacroAssembler::load_method_holder_cld(Register rresult, Register rmethod) {
5673 load_method_holder(rresult, rmethod);
5674 movptr(rresult, Address(rresult, InstanceKlass::class_loader_data_offset()));
5675 }
5676
5677 void MacroAssembler::load_method_holder(Register holder, Register method) {
5678 movptr(holder, Address(method, Method::const_offset())); // ConstMethod*
5679 movptr(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool*
5680 movptr(holder, Address(holder, ConstantPool::pool_holder_offset())); // InstanceKlass*
5681 }
5682
5683 void MacroAssembler::load_metadata(Register dst, Register src) {
5684 if (UseCompactObjectHeaders) {
5685 load_narrow_klass_compact(dst, src);
5686 } else {
5687 movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
5688 }
5689 }
5690
5691 void MacroAssembler::load_narrow_klass_compact(Register dst, Register src) {
5692 assert(UseCompactObjectHeaders, "expect compact object headers");
5693 movq(dst, Address(src, oopDesc::mark_offset_in_bytes()));
5694 shrq(dst, markWord::klass_shift);
5695 }
5696
5697 void MacroAssembler::load_klass(Register dst, Register src, Register tmp) {
5698 assert_different_registers(src, tmp);
5699 assert_different_registers(dst, tmp);
5700
5701 if (UseCompactObjectHeaders) {
5702 load_narrow_klass_compact(dst, src);
5703 decode_klass_not_null(dst, tmp);
5704 } else {
5705 movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
5706 decode_klass_not_null(dst, tmp);
5707 }
5708 }
5709
5710 void MacroAssembler::load_prototype_header(Register dst, Register src, Register tmp) {
5711 load_klass(dst, src, tmp);
5712 movptr(dst, Address(dst, Klass::prototype_header_offset()));
5713 }
5714
5715 void MacroAssembler::store_klass(Register dst, Register src, Register tmp) {
5716 assert(!UseCompactObjectHeaders, "not with compact headers");
5717 assert_different_registers(src, tmp);
5718 assert_different_registers(dst, tmp);
5719 encode_klass_not_null(src, tmp);
5720 movl(Address(dst, oopDesc::klass_offset_in_bytes()), src);
5721 }
5722
5723 void MacroAssembler::cmp_klass(Register klass, Register obj, Register tmp) {
5724 if (UseCompactObjectHeaders) {
5725 assert(tmp != noreg, "need tmp");
5726 assert_different_registers(klass, obj, tmp);
5727 load_narrow_klass_compact(tmp, obj);
5728 cmpl(klass, tmp);
5729 } else {
5730 cmpl(klass, Address(obj, oopDesc::klass_offset_in_bytes()));
5731 }
5732 }
5733
5734 void MacroAssembler::cmp_klasses_from_objects(Register obj1, Register obj2, Register tmp1, Register tmp2) {
5751 bool as_raw = (decorators & AS_RAW) != 0;
5752 if (as_raw) {
5753 bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1);
5754 } else {
5755 bs->load_at(this, decorators, type, dst, src, tmp1);
5756 }
5757 }
5758
5759 void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register val,
5760 Register tmp1, Register tmp2, Register tmp3) {
5761 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
5762 decorators = AccessInternal::decorator_fixup(decorators, type);
5763 bool as_raw = (decorators & AS_RAW) != 0;
5764 if (as_raw) {
5765 bs->BarrierSetAssembler::store_at(this, decorators, type, dst, val, tmp1, tmp2, tmp3);
5766 } else {
5767 bs->store_at(this, decorators, type, dst, val, tmp1, tmp2, tmp3);
5768 }
5769 }
5770
5771 void MacroAssembler::flat_field_copy(DecoratorSet decorators, Register src, Register dst,
5772 Register inline_layout_info) {
5773 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
5774 bs->flat_field_copy(this, decorators, src, dst, inline_layout_info);
5775 }
5776
5777 void MacroAssembler::payload_offset(Register inline_klass, Register offset) {
5778 movptr(offset, Address(inline_klass, InlineKlass::adr_members_offset()));
5779 movl(offset, Address(offset, InlineKlass::payload_offset_offset()));
5780 }
5781
5782 void MacroAssembler::payload_addr(Register oop, Register data, Register inline_klass) {
5783 // ((address) (void*) o) + vk->payload_offset();
5784 Register offset = (data == oop) ? rscratch1 : data;
5785 payload_offset(inline_klass, offset);
5786 if (data == oop) {
5787 addptr(data, offset);
5788 } else {
5789 lea(data, Address(oop, offset));
5790 }
5791 }
5792
5793 void MacroAssembler::data_for_value_array_index(Register array, Register array_klass,
5794 Register index, Register data) {
5795 assert(index != rcx, "index needs to shift by rcx");
5796 assert_different_registers(array, array_klass, index);
5797 assert_different_registers(rcx, array, index);
5798
5799 // array->base() + (index << Klass::layout_helper_log2_element_size(lh));
5800 movl(rcx, Address(array_klass, Klass::layout_helper_offset()));
5801
5802 // Klass::layout_helper_log2_element_size(lh)
5803 // (lh >> _lh_log2_element_size_shift) & _lh_log2_element_size_mask;
5804 shrl(rcx, Klass::_lh_log2_element_size_shift);
5805 andl(rcx, Klass::_lh_log2_element_size_mask);
5806 shlptr(index); // index << rcx
5807
5808 lea(data, Address(array, index, Address::times_1, arrayOopDesc::base_offset_in_bytes(T_FLAT_ELEMENT)));
5809 }
5810
5811 void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, DecoratorSet decorators) {
5812 access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1);
5813 }
5814
5815 // Doesn't do verification, generates fixed size code
5816 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1, DecoratorSet decorators) {
5817 access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL | decorators, dst, src, tmp1);
5818 }
5819
5820 void MacroAssembler::store_heap_oop(Address dst, Register val, Register tmp1,
5821 Register tmp2, Register tmp3, DecoratorSet decorators) {
5822 access_store_at(T_OBJECT, IN_HEAP | decorators, dst, val, tmp1, tmp2, tmp3);
5823 }
5824
5825 // Used for storing nulls.
5826 void MacroAssembler::store_heap_oop_null(Address dst) {
5827 access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg, noreg);
5828 }
5829
5830 void MacroAssembler::store_klass_gap(Register dst, Register src) {
6153 assert (oop_recorder() != nullptr, "this assembler needs an OopRecorder");
6154 int klass_index = oop_recorder()->find_index(k);
6155 RelocationHolder rspec = metadata_Relocation::spec(klass_index);
6156 Assembler::cmp_narrow_oop(dst, CompressedKlassPointers::encode(k), rspec);
6157 }
6158
6159 void MacroAssembler::reinit_heapbase() {
6160 if (UseCompressedOops) {
6161 if (Universe::heap() != nullptr && !AOTCodeCache::is_on_for_dump()) {
6162 if (CompressedOops::base() == nullptr) {
6163 MacroAssembler::xorptr(r12_heapbase, r12_heapbase);
6164 } else {
6165 mov64(r12_heapbase, (int64_t)CompressedOops::base());
6166 }
6167 } else {
6168 movptr(r12_heapbase, ExternalAddress(CompressedOops::base_addr()));
6169 }
6170 }
6171 }
6172
6173 int MacroAssembler::store_inline_type_fields_to_buf(ciInlineKlass* vk, bool from_interpreter) {
6174 assert(InlineTypeReturnedAsFields, "Inline types should never be returned as fields");
6175 // An inline type might be returned. If fields are in registers we
6176 // need to allocate an inline type instance and initialize it with
6177 // the value of the fields.
6178 Label skip;
6179 // We only need a new buffered inline type if a new one is not returned
6180 testptr(rax, 1);
6181 jcc(Assembler::zero, skip);
6182 int call_offset = -1;
6183
6184 #ifdef _LP64
6185 // The following code is similar to allocate_instance but has some slight differences,
6186 // e.g. object size is always not zero, sometimes it's constant; storing klass ptr after
6187 // allocating is not necessary if vk != nullptr, etc. allocate_instance is not aware of these.
6188 Label slow_case;
6189 // 1. Try to allocate a new buffered inline instance either from TLAB or eden space
6190 mov(rscratch1, rax); // save rax for slow_case since *_allocate may corrupt it when allocation failed
6191 if (vk != nullptr) {
6192 // Called from C1, where the return type is statically known.
6193 movptr(rbx, (intptr_t)vk->get_InlineKlass());
6194 jint lh = vk->layout_helper();
6195 assert(lh != Klass::_lh_neutral_value, "inline class in return type must have been resolved");
6196 if (UseTLAB && !Klass::layout_helper_needs_slow_path(lh)) {
6197 tlab_allocate(rax, noreg, lh, r13, r14, slow_case);
6198 } else {
6199 jmp(slow_case);
6200 }
6201 } else {
6202 // Call from interpreter. RAX contains ((the InlineKlass* of the return type) | 0x01)
6203 mov(rbx, rax);
6204 andptr(rbx, -2);
6205 if (UseTLAB) {
6206 movl(r14, Address(rbx, Klass::layout_helper_offset()));
6207 testl(r14, Klass::_lh_instance_slow_path_bit);
6208 jcc(Assembler::notZero, slow_case);
6209 tlab_allocate(rax, r14, 0, r13, r14, slow_case);
6210 } else {
6211 jmp(slow_case);
6212 }
6213 }
6214 if (UseTLAB) {
6215 // 2. Initialize buffered inline instance header
6216 Register buffer_obj = rax;
6217 Register klass = rbx;
6218 if (UseCompactObjectHeaders) {
6219 Register mark_word = r13;
6220 movptr(mark_word, Address(klass, Klass::prototype_header_offset()));
6221 movptr(Address(buffer_obj, oopDesc::mark_offset_in_bytes()), mark_word);
6222 } else {
6223 movptr(Address(buffer_obj, oopDesc::mark_offset_in_bytes()), (intptr_t)markWord::inline_type_prototype().value());
6224 xorl(r13, r13);
6225 store_klass_gap(buffer_obj, r13);
6226 if (vk == nullptr) {
6227 // store_klass corrupts rbx(klass), so save it in r13 for later use (interpreter case only).
6228 mov(r13, klass);
6229 }
6230 store_klass(buffer_obj, klass, rscratch1);
6231 klass = r13;
6232 }
6233 // 3. Initialize its fields with an inline class specific handler
6234 if (vk != nullptr) {
6235 call(RuntimeAddress(vk->pack_handler())); // no need for call info as this will not safepoint.
6236 } else {
6237 movptr(rbx, Address(klass, InlineKlass::adr_members_offset()));
6238 movptr(rbx, Address(rbx, InlineKlass::pack_handler_offset()));
6239 call(rbx);
6240 }
6241 jmp(skip);
6242 }
6243 bind(slow_case);
6244 // We failed to allocate a new inline type, fall back to a runtime
6245 // call. Some oop field may be live in some registers but we can't
6246 // tell. That runtime call will take care of preserving them
6247 // across a GC if there's one.
6248 mov(rax, rscratch1);
6249 #endif
6250
6251 if (from_interpreter) {
6252 super_call_VM_leaf(StubRoutines::store_inline_type_fields_to_buf());
6253 } else {
6254 call(RuntimeAddress(StubRoutines::store_inline_type_fields_to_buf()));
6255 call_offset = offset();
6256 }
6257
6258 bind(skip);
6259 return call_offset;
6260 }
6261
6262 // Move a value between registers/stack slots and update the reg_state
6263 bool MacroAssembler::move_helper(VMReg from, VMReg to, BasicType bt, RegState reg_state[]) {
6264 assert(from->is_valid() && to->is_valid(), "source and destination must be valid");
6265 if (reg_state[to->value()] == reg_written) {
6266 return true; // Already written
6267 }
6268 if (from != to && bt != T_VOID) {
6269 if (reg_state[to->value()] == reg_readonly) {
6270 return false; // Not yet writable
6271 }
6272 if (from->is_reg()) {
6273 if (to->is_reg()) {
6274 if (from->is_XMMRegister()) {
6275 if (bt == T_DOUBLE) {
6276 movdbl(to->as_XMMRegister(), from->as_XMMRegister());
6277 } else {
6278 assert(bt == T_FLOAT, "must be float");
6279 movflt(to->as_XMMRegister(), from->as_XMMRegister());
6280 }
6281 } else {
6282 movq(to->as_Register(), from->as_Register());
6283 }
6284 } else {
6285 int st_off = to->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
6286 Address to_addr = Address(rsp, st_off);
6287 if (from->is_XMMRegister()) {
6288 if (bt == T_DOUBLE) {
6289 movdbl(to_addr, from->as_XMMRegister());
6290 } else {
6291 assert(bt == T_FLOAT, "must be float");
6292 movflt(to_addr, from->as_XMMRegister());
6293 }
6294 } else {
6295 movq(to_addr, from->as_Register());
6296 }
6297 }
6298 } else {
6299 Address from_addr = Address(rsp, from->reg2stack() * VMRegImpl::stack_slot_size + wordSize);
6300 if (to->is_reg()) {
6301 if (to->is_XMMRegister()) {
6302 if (bt == T_DOUBLE) {
6303 movdbl(to->as_XMMRegister(), from_addr);
6304 } else {
6305 assert(bt == T_FLOAT, "must be float");
6306 movflt(to->as_XMMRegister(), from_addr);
6307 }
6308 } else {
6309 movq(to->as_Register(), from_addr);
6310 }
6311 } else {
6312 int st_off = to->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
6313 movq(r13, from_addr);
6314 movq(Address(rsp, st_off), r13);
6315 }
6316 }
6317 }
6318 // Update register states
6319 reg_state[from->value()] = reg_writable;
6320 reg_state[to->value()] = reg_written;
6321 return true;
6322 }
6323
6324 // Calculate the extra stack space required for packing or unpacking inline
6325 // args and adjust the stack pointer (see MacroAssembler::remove_frame).
6326 int MacroAssembler::extend_stack_for_inline_args(int args_on_stack) {
6327 int sp_inc = args_on_stack * VMRegImpl::stack_slot_size;
6328 sp_inc = align_up(sp_inc, StackAlignmentInBytes);
6329 assert(sp_inc > 0, "sanity");
6330 // Two additional slots to account for return address
6331 sp_inc += 2 * VMRegImpl::stack_slot_size;
6332
6333 push(rbp);
6334 subptr(rsp, sp_inc);
6335 #ifdef ASSERT
6336 movl(Address(rsp, 0), badRegWordVal);
6337 movl(Address(rsp, VMRegImpl::stack_slot_size), badRegWordVal);
6338 #endif
6339 return sp_inc + wordSize; // account for rbp space
6340 }
6341
6342 // Read all fields from an inline type buffer and store the field values in registers/stack slots.
6343 bool MacroAssembler::unpack_inline_helper(const GrowableArray<SigEntry>* sig, int& sig_index,
6344 VMReg from, int& from_index, VMRegPair* to, int to_count, int& to_index,
6345 RegState reg_state[]) {
6346 assert(sig->at(sig_index)._bt == T_VOID, "should be at end delimiter");
6347 assert(from->is_valid(), "source must be valid");
6348 bool progress = false;
6349 #ifdef ASSERT
6350 const int start_offset = offset();
6351 #endif
6352
6353 Label L_null, L_notNull;
6354 // Don't use r14 as tmp because it's used for spilling (see MacroAssembler::spill_reg_for)
6355 Register tmp1 = r10;
6356 Register tmp2 = r13;
6357 Register fromReg = noreg;
6358 ScalarizedInlineArgsStream stream(sig, sig_index, to, to_count, to_index, true);
6359 bool done = true;
6360 bool mark_done = true;
6361 VMReg toReg;
6362 BasicType bt;
6363 // Check if argument requires a null check
6364 bool null_check = false;
6365 VMReg nullCheckReg;
6366 while (stream.next(nullCheckReg, bt)) {
6367 if (sig->at(stream.sig_index())._offset == -1) {
6368 null_check = true;
6369 break;
6370 }
6371 }
6372 stream.reset(sig_index, to_index);
6373 while (stream.next(toReg, bt)) {
6374 assert(toReg->is_valid(), "destination must be valid");
6375 int idx = (int)toReg->value();
6376 if (reg_state[idx] == reg_readonly) {
6377 if (idx != from->value()) {
6378 mark_done = false;
6379 }
6380 done = false;
6381 continue;
6382 } else if (reg_state[idx] == reg_written) {
6383 continue;
6384 }
6385 assert(reg_state[idx] == reg_writable, "must be writable");
6386 reg_state[idx] = reg_written;
6387 progress = true;
6388
6389 if (fromReg == noreg) {
6390 if (from->is_reg()) {
6391 fromReg = from->as_Register();
6392 } else {
6393 int st_off = from->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
6394 movq(tmp1, Address(rsp, st_off));
6395 fromReg = tmp1;
6396 }
6397 if (null_check) {
6398 // Nullable inline type argument, emit null check
6399 testptr(fromReg, fromReg);
6400 jcc(Assembler::zero, L_null);
6401 }
6402 }
6403 int off = sig->at(stream.sig_index())._offset;
6404 if (off == -1) {
6405 assert(null_check, "Missing null check at");
6406 if (toReg->is_stack()) {
6407 int st_off = toReg->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
6408 movq(Address(rsp, st_off), 1);
6409 } else {
6410 movq(toReg->as_Register(), 1);
6411 }
6412 continue;
6413 }
6414 if (sig->at(stream.sig_index())._vt_oop) {
6415 if (toReg->is_stack()) {
6416 int st_off = toReg->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
6417 movq(Address(rsp, st_off), fromReg);
6418 } else {
6419 movq(toReg->as_Register(), fromReg);
6420 }
6421 continue;
6422 }
6423 assert(off > 0, "offset in object should be positive");
6424 Address fromAddr = Address(fromReg, off);
6425 if (!toReg->is_XMMRegister()) {
6426 Register dst = toReg->is_stack() ? tmp2 : toReg->as_Register();
6427 if (is_reference_type(bt)) {
6428 load_heap_oop(dst, fromAddr);
6429 } else {
6430 bool is_signed = (bt != T_CHAR) && (bt != T_BOOLEAN);
6431 load_sized_value(dst, fromAddr, type2aelembytes(bt), is_signed);
6432 }
6433 if (toReg->is_stack()) {
6434 int st_off = toReg->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
6435 movq(Address(rsp, st_off), dst);
6436 }
6437 } else if (bt == T_DOUBLE) {
6438 movdbl(toReg->as_XMMRegister(), fromAddr);
6439 } else {
6440 assert(bt == T_FLOAT, "must be float");
6441 movflt(toReg->as_XMMRegister(), fromAddr);
6442 }
6443 }
6444 if (progress && null_check) {
6445 if (done) {
6446 jmp(L_notNull);
6447 bind(L_null);
6448 // Set null marker to zero to signal that the argument is null.
6449 // Also set all fields to zero since the runtime requires a canonical
6450 // representation of a flat null.
6451 stream.reset(sig_index, to_index);
6452 while (stream.next(toReg, bt)) {
6453 if (toReg->is_stack()) {
6454 int st_off = toReg->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
6455 movq(Address(rsp, st_off), 0);
6456 } else if (toReg->is_XMMRegister()) {
6457 xorps(toReg->as_XMMRegister(), toReg->as_XMMRegister());
6458 } else {
6459 xorl(toReg->as_Register(), toReg->as_Register());
6460 }
6461 }
6462 bind(L_notNull);
6463 } else {
6464 bind(L_null);
6465 }
6466 }
6467
6468 sig_index = stream.sig_index();
6469 to_index = stream.regs_index();
6470
6471 if (mark_done && reg_state[from->value()] != reg_written) {
6472 // This is okay because no one else will write to that slot
6473 reg_state[from->value()] = reg_writable;
6474 }
6475 from_index--;
6476 assert(progress || (start_offset == offset()), "should not emit code");
6477 return done;
6478 }
6479
6480 bool MacroAssembler::pack_inline_helper(const GrowableArray<SigEntry>* sig, int& sig_index, int vtarg_index,
6481 VMRegPair* from, int from_count, int& from_index, VMReg to,
6482 RegState reg_state[], Register val_array) {
6483 assert(sig->at(sig_index)._bt == T_METADATA, "should be at delimiter");
6484 assert(to->is_valid(), "destination must be valid");
6485
6486 if (reg_state[to->value()] == reg_written) {
6487 skip_unpacked_fields(sig, sig_index, from, from_count, from_index);
6488 return true; // Already written
6489 }
6490
6491 // Be careful with r14 because it's used for spilling (see MacroAssembler::spill_reg_for).
6492 Register val_obj_tmp = r11;
6493 Register from_reg_tmp = r14;
6494 Register tmp1 = r10;
6495 Register tmp2 = r13;
6496 Register tmp3 = rbx;
6497 Register val_obj = to->is_stack() ? val_obj_tmp : to->as_Register();
6498
6499 assert_different_registers(val_obj_tmp, from_reg_tmp, tmp1, tmp2, tmp3, val_array);
6500
6501 if (reg_state[to->value()] == reg_readonly) {
6502 if (!is_reg_in_unpacked_fields(sig, sig_index, to, from, from_count, from_index)) {
6503 skip_unpacked_fields(sig, sig_index, from, from_count, from_index);
6504 return false; // Not yet writable
6505 }
6506 val_obj = val_obj_tmp;
6507 }
6508
6509 ScalarizedInlineArgsStream stream(sig, sig_index, from, from_count, from_index);
6510 VMReg fromReg;
6511 BasicType bt;
6512 Label L_null;
6513 while (stream.next(fromReg, bt)) {
6514 assert(fromReg->is_valid(), "source must be valid");
6515 reg_state[fromReg->value()] = reg_writable;
6516
6517 int off = sig->at(stream.sig_index())._offset;
6518 if (off == -1) {
6519 // Nullable inline type argument, emit null check
6520 Label L_notNull;
6521 if (fromReg->is_stack()) {
6522 int ld_off = fromReg->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
6523 testb(Address(rsp, ld_off), 1);
6524 } else {
6525 testb(fromReg->as_Register(), 1);
6526 }
6527 jcc(Assembler::notZero, L_notNull);
6528 movptr(val_obj, 0);
6529 jmp(L_null);
6530 bind(L_notNull);
6531 continue;
6532 }
6533 if (sig->at(stream.sig_index())._vt_oop) {
6534 // buffer argument: use if non null
6535 if (fromReg->is_stack()) {
6536 int ld_off = fromReg->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
6537 movptr(val_obj, Address(rsp, ld_off));
6538 } else {
6539 movptr(val_obj, fromReg->as_Register());
6540 }
6541 testptr(val_obj, val_obj);
6542 jcc(Assembler::notEqual, L_null);
6543 // otherwise get the buffer from the just allocated pool of buffers
6544 int index = arrayOopDesc::base_offset_in_bytes(T_OBJECT) + vtarg_index * type2aelembytes(T_OBJECT);
6545 load_heap_oop(val_obj, Address(val_array, index));
6546 continue;
6547 }
6548
6549 assert(off > 0, "offset in object should be positive");
6550 size_t size_in_bytes = is_java_primitive(bt) ? type2aelembytes(bt) : wordSize;
6551
6552 // Pack the scalarized field into the value object.
6553 Address dst(val_obj, off);
6554 if (!fromReg->is_XMMRegister()) {
6555 Register src;
6556 if (fromReg->is_stack()) {
6557 src = from_reg_tmp;
6558 int ld_off = fromReg->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
6559 load_sized_value(src, Address(rsp, ld_off), size_in_bytes, /* is_signed */ false);
6560 } else {
6561 src = fromReg->as_Register();
6562 }
6563 assert_different_registers(dst.base(), src, tmp1, tmp2, tmp3, val_array);
6564 if (is_reference_type(bt)) {
6565 // store_heap_oop transitively calls oop_store_at which corrupts to.base(). We need to keep val_obj valid.
6566 mov(tmp3, val_obj);
6567 Address dst_with_tmp3(tmp3, off);
6568 store_heap_oop(dst_with_tmp3, src, tmp1, tmp2, tmp3, IN_HEAP | ACCESS_WRITE | IS_DEST_UNINITIALIZED);
6569 } else {
6570 store_sized_value(dst, src, size_in_bytes);
6571 }
6572 } else if (bt == T_DOUBLE) {
6573 movdbl(dst, fromReg->as_XMMRegister());
6574 } else {
6575 assert(bt == T_FLOAT, "must be float");
6576 movflt(dst, fromReg->as_XMMRegister());
6577 }
6578 }
6579 bind(L_null);
6580 sig_index = stream.sig_index();
6581 from_index = stream.regs_index();
6582
6583 assert(reg_state[to->value()] == reg_writable, "must have already been read");
6584 bool success = move_helper(val_obj->as_VMReg(), to, T_OBJECT, reg_state);
6585 assert(success, "to register must be writeable");
6586 return true;
6587 }
6588
6589 VMReg MacroAssembler::spill_reg_for(VMReg reg) {
6590 return reg->is_XMMRegister() ? xmm8->as_VMReg() : r14->as_VMReg();
6591 }
6592
6593 void MacroAssembler::remove_frame(int initial_framesize, bool needs_stack_repair) {
6594 assert((initial_framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
6595 if (needs_stack_repair) {
6596 // The method has a scalarized entry point (where fields of value object arguments
6597 // are passed through registers and stack), and a non-scalarized entry point (where
6598 // value object arguments are given as oops). The non-scalarized entry point will
6599 // first load each field of value object arguments and store them in registers and on
6600 // the stack in a way compatible with the scalarized entry point. To do so, some extra
6601 // stack space might be reserved (if argument registers are not enough). On leaving the
6602 // method, this space must be freed.
6603 //
6604 // In case we used the non-scalarized entry point the stack looks like this:
6605 //
6606 // | Arguments from caller |
6607 // |---------------------------| <-- caller's SP
6608 // | Return address #1 |
6609 // | Saved RBP #1 |
6610 // |---------------------------|
6611 // | Extension space for |
6612 // | inline arg (un)packing |
6613 // |---------------------------| <-- start of this method's frame
6614 // | Return address #2 |
6615 // | Saved RBP #2 |
6616 // |---------------------------| <-- RBP (with -XX:+PreserveFramePointer)
6617 // | sp_inc |
6618 // | method locals |
6619 // |---------------------------| <-- SP
6620 //
6621 // Space for the return pc and saved rbp is reserved twice. But only the #1 copies
6622 // contain the real values of return pc and saved rbp. The #2 copies are not reliable
6623 // and should not be used. They are mostly needed to add space between the extension
6624 // space and the locals, as there would be between the real arguments and the locals
6625 // if we don't need to do unpacking (from the scalarized entry point).
6626 //
6627 // When leaving, one must load RBP #1 into RBP, and use the copy #1 of the return address,
6628 // while keeping in mind that from the scalarized entry point, there will be only one
6629 // copy. Indeed, in the case we used the scalarized calling convention, the stack looks like this:
6630 //
6631 // | Arguments from caller |
6632 // |---------------------------| <-- caller's SP
6633 // | Return address |
6634 // | Saved RBP |
6635 // |---------------------------| <-- FP (with -XX:+PreserveFramePointer)
6636 // | sp_inc |
6637 // | method locals |
6638 // |---------------------------| <-- SP
6639 //
6640 // The sp_inc stack slot holds the total size of the frame, including the extension
6641 // space and copies #2 of the return address and the saved RBP (but never the copies
6642 // #1 of the return address and saved RBP). That is how to find the copies #1 of the
6643 // return address and saved rbp. This size is expressed in bytes. Be careful when using
6644 // it from C++ in pointer arithmetic you might need to divide it by wordSize.
6645
6646 // The stack increment resides just below the saved rbp
6647 addq(rsp, Address(rsp, initial_framesize - wordSize));
6648 pop(rbp);
6649 } else {
6650 if (initial_framesize > 0) {
6651 addq(rsp, initial_framesize);
6652 }
6653 pop(rbp);
6654 }
6655 }
6656
6657 #if COMPILER2_OR_JVMCI
6658
6659 // clear memory of size 'cnt' qwords, starting at 'base' using XMM/YMM/ZMM registers
6660 void MacroAssembler::xmm_clear_mem(Register base, Register cnt, Register val, XMMRegister xtmp, KRegister mask) {
6661 // cnt - number of qwords (8-byte words).
6662 // base - start address, qword aligned.
6663 Label L_zero_64_bytes, L_loop, L_sloop, L_tail, L_end;
6664 bool use64byteVector = (MaxVectorSize == 64) && (CopyAVX3Threshold == 0);
6665 if (use64byteVector) {
6666 evpbroadcastq(xtmp, val, AVX_512bit);
6667 } else if (MaxVectorSize >= 32) {
6668 movdq(xtmp, val);
6669 punpcklqdq(xtmp, xtmp);
6670 vinserti128_high(xtmp, xtmp);
6671 } else {
6672 movdq(xtmp, val);
6673 punpcklqdq(xtmp, xtmp);
6674 }
6675 jmp(L_zero_64_bytes);
6676
6677 BIND(L_loop);
6678 if (MaxVectorSize >= 32) {
6679 fill64(base, 0, xtmp, use64byteVector);
6680 } else {
6681 movdqu(Address(base, 0), xtmp);
6682 movdqu(Address(base, 16), xtmp);
6683 movdqu(Address(base, 32), xtmp);
6684 movdqu(Address(base, 48), xtmp);
6685 }
6686 addptr(base, 64);
6687
6688 BIND(L_zero_64_bytes);
6689 subptr(cnt, 8);
6690 jccb(Assembler::greaterEqual, L_loop);
6691
6692 // Copy trailing 64 bytes
6693 if (use64byteVector) {
6694 addptr(cnt, 8);
6695 jccb(Assembler::equal, L_end);
6696 fill64_masked(3, base, 0, xtmp, mask, cnt, val, true);
6697 jmp(L_end);
6698 } else {
6699 addptr(cnt, 4);
6700 jccb(Assembler::less, L_tail);
6701 if (MaxVectorSize >= 32) {
6702 vmovdqu(Address(base, 0), xtmp);
6703 } else {
6704 movdqu(Address(base, 0), xtmp);
6705 movdqu(Address(base, 16), xtmp);
6706 }
6707 }
6708 addptr(base, 32);
6709 subptr(cnt, 4);
6710
6711 BIND(L_tail);
6712 addptr(cnt, 4);
6713 jccb(Assembler::lessEqual, L_end);
6714 if (UseAVX > 2 && MaxVectorSize >= 32 && VM_Version::supports_avx512vl()) {
6715 fill32_masked(3, base, 0, xtmp, mask, cnt, val);
6716 } else {
6717 decrement(cnt);
6718
6719 BIND(L_sloop);
6720 movq(Address(base, 0), xtmp);
6721 addptr(base, 8);
6722 decrement(cnt);
6723 jccb(Assembler::greaterEqual, L_sloop);
6724 }
6725 BIND(L_end);
6726 }
6727
6728 // Clearing constant sized memory using YMM/ZMM registers.
6729 void MacroAssembler::clear_mem(Register base, int cnt, Register rtmp, XMMRegister xtmp, KRegister mask) {
6730 assert(UseAVX > 2 && VM_Version::supports_avx512vl(), "");
6731 bool use64byteVector = (MaxVectorSize > 32) && (CopyAVX3Threshold == 0);
6732
6733 int vector64_count = (cnt & (~0x7)) >> 3;
6734 cnt = cnt & 0x7;
6735 const int fill64_per_loop = 4;
6797 break;
6798 case 7:
6799 if (use64byteVector) {
6800 movl(rtmp, 0x7F);
6801 kmovwl(mask, rtmp);
6802 evmovdqu(T_LONG, mask, Address(base, disp), xtmp, true, Assembler::AVX_512bit);
6803 } else {
6804 evmovdqu(T_LONG, k0, Address(base, disp), xtmp, false, Assembler::AVX_256bit);
6805 movl(rtmp, 0x7);
6806 kmovwl(mask, rtmp);
6807 evmovdqu(T_LONG, mask, Address(base, disp + 32), xtmp, true, Assembler::AVX_256bit);
6808 }
6809 break;
6810 default:
6811 fatal("Unexpected length : %d\n",cnt);
6812 break;
6813 }
6814 }
6815 }
6816
6817 void MacroAssembler::clear_mem(Register base, Register cnt, Register val, XMMRegister xtmp,
6818 bool is_large, bool word_copy_only, KRegister mask) {
6819 // cnt - number of qwords (8-byte words).
6820 // base - start address, qword aligned.
6821 // is_large - if optimizers know cnt is larger than InitArrayShortSize
6822 assert(base==rdi, "base register must be edi for rep stos");
6823 assert(val==rax, "val register must be eax for rep stos");
6824 assert(cnt==rcx, "cnt register must be ecx for rep stos");
6825 assert(InitArrayShortSize % BytesPerLong == 0,
6826 "InitArrayShortSize should be the multiple of BytesPerLong");
6827
6828 Label DONE;
6829
6830 if (!is_large) {
6831 Label LOOP, LONG;
6832 cmpptr(cnt, InitArrayShortSize/BytesPerLong);
6833 jccb(Assembler::greater, LONG);
6834
6835 decrement(cnt);
6836 jccb(Assembler::negative, DONE); // Zero length
6837
6838 // Use individual pointer-sized stores for small counts:
6839 BIND(LOOP);
6840 movptr(Address(base, cnt, Address::times_ptr), val);
6841 decrement(cnt);
6842 jccb(Assembler::greaterEqual, LOOP);
6843 jmpb(DONE);
6844
6845 BIND(LONG);
6846 }
6847
6848 // Use longer rep-prefixed ops for non-small counts:
6849 if (UseFastStosb && !word_copy_only) {
6850 shlptr(cnt, 3); // convert to number of bytes
6851 rep_stosb();
6852 } else if (UseXMMForObjInit) {
6853 xmm_clear_mem(base, cnt, val, xtmp, mask);
6854 } else {
6855 rep_stos();
6856 }
6857
6858 BIND(DONE);
6859 }
6860
6861 #endif //COMPILER2_OR_JVMCI
6862
6863
6864 void MacroAssembler::generate_fill(BasicType t, bool aligned,
6865 Register to, Register value, Register count,
6866 Register rtmp, XMMRegister xtmp) {
6867 ShortBranchVerifier sbv(this);
6868 assert_different_registers(to, value, count, rtmp);
6869 Label L_exit;
6870 Label L_fill_2_bytes, L_fill_4_bytes;
6871
6872 #if defined(COMPILER2)
6873 if(MaxVectorSize >=32 &&
10752
10753 // Load top.
10754 movl(top, Address(thread, JavaThread::lock_stack_top_offset()));
10755
10756 // Check if the lock-stack is full.
10757 cmpl(top, LockStack::end_offset());
10758 jcc(Assembler::greaterEqual, slow);
10759
10760 // Check for recursion.
10761 cmpptr(obj, Address(thread, top, Address::times_1, -oopSize));
10762 jcc(Assembler::equal, push);
10763
10764 // Check header for monitor (0b10).
10765 testptr(reg_rax, markWord::monitor_value);
10766 jcc(Assembler::notZero, slow);
10767
10768 // Try to lock. Transition lock bits 0b01 => 0b00
10769 movptr(tmp, reg_rax);
10770 andptr(tmp, ~(int32_t)markWord::unlocked_value);
10771 orptr(reg_rax, markWord::unlocked_value);
10772 // Mask inline_type bit such that we go to the slow path if object is an inline type
10773 andptr(reg_rax, ~((int) markWord::inline_type_bit_in_place));
10774
10775 lock(); cmpxchgptr(tmp, Address(obj, oopDesc::mark_offset_in_bytes()));
10776 jcc(Assembler::notEqual, slow);
10777
10778 // Restore top, CAS clobbers register.
10779 movl(top, Address(thread, JavaThread::lock_stack_top_offset()));
10780
10781 bind(push);
10782 // After successful lock, push object on lock-stack.
10783 movptr(Address(thread, top), obj);
10784 incrementl(top, oopSize);
10785 movl(Address(thread, JavaThread::lock_stack_top_offset()), top);
10786 }
10787
10788 // Implements fast-unlocking.
10789 //
10790 // obj: the object to be unlocked
10791 // reg_rax: rax
10792 // thread: the thread
10793 // tmp: a temporary register
10794 void MacroAssembler::fast_unlock(Register obj, Register reg_rax, Register tmp, Label& slow) {
|