985
986 void MacroAssembler::c2bool(Register x) {
987 // implements x == 0 ? 0 : 1
988 // note: must only look at least-significant byte of x
989 // since C-style booleans are stored in one byte
990 // only! (was bug)
991 tst(x, 0xff);
992 cset(x, Assembler::NE);
993 }
994
995 address MacroAssembler::ic_call(address entry, jint method_index) {
996 RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index);
997 // address const_ptr = long_constant((jlong)Universe::non_oop_word());
998 // uintptr_t offset;
999 // ldr_constant(rscratch2, const_ptr);
1000 movptr(rscratch2, (intptr_t)Universe::non_oop_word());
1001 return trampoline_call(Address(entry, rh));
1002 }
1003
1004 int MacroAssembler::ic_check_size() {
1005 if (target_needs_far_branch(CAST_FROM_FN_PTR(address, SharedRuntime::get_ic_miss_stub()))) {
1006 return NativeInstruction::instruction_size * 7;
1007 } else {
1008 return NativeInstruction::instruction_size * 5;
1009 }
1010 }
1011
1012 int MacroAssembler::ic_check(int end_alignment) {
1013 Register receiver = j_rarg0;
1014 Register data = rscratch2;
1015 Register tmp1 = rscratch1;
1016 Register tmp2 = r10;
1017
1018 // The UEP of a code blob ensures that the VEP is padded. However, the padding of the UEP is placed
1019 // before the inline cache check, so we don't have to execute any nop instructions when dispatching
1020 // through the UEP, yet we can ensure that the VEP is aligned appropriately. That's why we align
1021 // before the inline cache check here, and not after
1022 align(end_alignment, offset() + ic_check_size());
1023
1024 int uep_offset = offset();
1025
1026 if (UseCompressedClassPointers) {
1027 ldrw(tmp1, Address(receiver, oopDesc::klass_offset_in_bytes()));
1028 ldrw(tmp2, Address(data, CompiledICData::speculated_klass_offset()));
1029 cmpw(tmp1, tmp2);
1030 } else {
1031 ldr(tmp1, Address(receiver, oopDesc::klass_offset_in_bytes()));
1032 ldr(tmp2, Address(data, CompiledICData::speculated_klass_offset()));
1033 cmp(tmp1, tmp2);
1034 }
1035
1036 Label dont;
1037 br(Assembler::EQ, dont);
1038 far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1039 bind(dont);
1040 assert((offset() % end_alignment) == 0, "Misaligned verified entry point");
1041
1042 return uep_offset;
1043 }
1044
1045 // Implementation of call_VM versions
1046
4461 adrp(rscratch1, src2, offset);
4462 ldr(rscratch1, Address(rscratch1, offset));
4463 cmp(src1, rscratch1);
4464 }
4465
4466 void MacroAssembler::cmpoop(Register obj1, Register obj2) {
4467 cmp(obj1, obj2);
4468 }
4469
4470 void MacroAssembler::load_method_holder_cld(Register rresult, Register rmethod) {
4471 load_method_holder(rresult, rmethod);
4472 ldr(rresult, Address(rresult, InstanceKlass::class_loader_data_offset()));
4473 }
4474
4475 void MacroAssembler::load_method_holder(Register holder, Register method) {
4476 ldr(holder, Address(method, Method::const_offset())); // ConstMethod*
4477 ldr(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool*
4478 ldr(holder, Address(holder, ConstantPool::pool_holder_offset())); // InstanceKlass*
4479 }
4480
4481 void MacroAssembler::load_klass(Register dst, Register src) {
4482 if (UseCompressedClassPointers) {
4483 ldrw(dst, Address(src, oopDesc::klass_offset_in_bytes()));
4484 decode_klass_not_null(dst);
4485 } else {
4486 ldr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
4487 }
4488 }
4489
4490 void MacroAssembler::restore_cpu_control_state_after_jni(Register tmp1, Register tmp2) {
4491 if (RestoreMXCSROnJNICalls) {
4492 Label OK;
4493 get_fpcr(tmp1);
4494 mov(tmp2, tmp1);
4495 // Set FPCR to the state we need. We do want Round to Nearest. We
4496 // don't want non-IEEE rounding modes or floating-point traps.
4497 bfi(tmp1, zr, 22, 4); // Clear DN, FZ, and Rmode
4498 bfi(tmp1, zr, 8, 5); // Clear exception-control bits (8-12)
4499 bfi(tmp1, zr, 0, 2); // Clear AH:FIZ
4500 eor(tmp2, tmp1, tmp2);
4501 cbz(tmp2, OK); // Only reset FPCR if it's wrong
4502 set_fpcr(tmp1);
4518 // A null weak handle resolves to null.
4519 cbz(result, resolved);
4520
4521 // Only 64 bit platforms support GCs that require a tmp register
4522 // WeakHandle::resolve is an indirection like jweak.
4523 access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF,
4524 result, Address(result), tmp1, tmp2);
4525 bind(resolved);
4526 }
4527
4528 void MacroAssembler::load_mirror(Register dst, Register method, Register tmp1, Register tmp2) {
4529 const int mirror_offset = in_bytes(Klass::java_mirror_offset());
4530 ldr(dst, Address(rmethod, Method::const_offset()));
4531 ldr(dst, Address(dst, ConstMethod::constants_offset()));
4532 ldr(dst, Address(dst, ConstantPool::pool_holder_offset()));
4533 ldr(dst, Address(dst, mirror_offset));
4534 resolve_oop_handle(dst, tmp1, tmp2);
4535 }
4536
4537 void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp) {
4538 if (UseCompressedClassPointers) {
4539 ldrw(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
4540 if (CompressedKlassPointers::base() == nullptr) {
4541 cmp(trial_klass, tmp, LSL, CompressedKlassPointers::shift());
4542 return;
4543 } else if (((uint64_t)CompressedKlassPointers::base() & 0xffffffff) == 0
4544 && CompressedKlassPointers::shift() == 0) {
4545 // Only the bottom 32 bits matter
4546 cmpw(trial_klass, tmp);
4547 return;
4548 }
4549 decode_klass_not_null(tmp);
4550 } else {
4551 ldr(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
4552 }
4553 cmp(trial_klass, tmp);
4554 }
4555
4556 void MacroAssembler::store_klass(Register dst, Register src) {
4557 // FIXME: Should this be a store release? concurrent gcs assumes
4558 // klass length is valid if klass field is not null.
4559 if (UseCompressedClassPointers) {
4560 encode_klass_not_null(src);
4561 strw(src, Address(dst, oopDesc::klass_offset_in_bytes()));
4562 } else {
4563 str(src, Address(dst, oopDesc::klass_offset_in_bytes()));
4564 }
4565 }
4566
4567 void MacroAssembler::store_klass_gap(Register dst, Register src) {
4568 if (UseCompressedClassPointers) {
4569 // Store to klass gap in destination
4570 strw(src, Address(dst, oopDesc::klass_gap_offset_in_bytes()));
4571 }
4572 }
4573
4574 // Algorithm must match CompressedOops::encode.
4575 void MacroAssembler::encode_heap_oop(Register d, Register s) {
4576 #ifdef ASSERT
4577 verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?");
4578 #endif
4579 verify_oop_msg(s, "broken oop in encode_heap_oop");
4580 if (CompressedOops::base() == nullptr) {
4581 if (CompressedOops::shift() != 0) {
4582 assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
4583 lsr(d, s, LogMinObjAlignmentInBytes);
4584 } else {
4585 mov(d, s);
4586 }
4587 } else {
4696 add(dst, zr, src, Assembler::LSL, LogMinObjAlignmentInBytes);
4697 }
4698 } else {
4699 assert (CompressedOops::base() == nullptr, "sanity");
4700 if (dst != src) {
4701 mov(dst, src);
4702 }
4703 }
4704 }
4705
4706 MacroAssembler::KlassDecodeMode MacroAssembler::_klass_decode_mode(KlassDecodeNone);
4707
4708 MacroAssembler::KlassDecodeMode MacroAssembler::klass_decode_mode() {
4709 assert(UseCompressedClassPointers, "not using compressed class pointers");
4710 assert(Metaspace::initialized(), "metaspace not initialized yet");
4711
4712 if (_klass_decode_mode != KlassDecodeNone) {
4713 return _klass_decode_mode;
4714 }
4715
4716 assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift()
4717 || 0 == CompressedKlassPointers::shift(), "decode alg wrong");
4718
4719 if (CompressedKlassPointers::base() == nullptr) {
4720 return (_klass_decode_mode = KlassDecodeZero);
4721 }
4722
4723 if (operand_valid_for_logical_immediate(
4724 /*is32*/false, (uint64_t)CompressedKlassPointers::base())) {
4725 const uint64_t range_mask =
4726 (1ULL << log2i(CompressedKlassPointers::range())) - 1;
4727 if (((uint64_t)CompressedKlassPointers::base() & range_mask) == 0) {
4728 return (_klass_decode_mode = KlassDecodeXor);
4729 }
4730 }
4731
4732 const uint64_t shifted_base =
4733 (uint64_t)CompressedKlassPointers::base() >> CompressedKlassPointers::shift();
4734 guarantee((shifted_base & 0xffff0000ffffffff) == 0,
4735 "compressed class base bad alignment");
4736
4737 return (_klass_decode_mode = KlassDecodeMovk);
4738 }
4739
4740 void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
4741 switch (klass_decode_mode()) {
4742 case KlassDecodeZero:
4743 if (CompressedKlassPointers::shift() != 0) {
4744 lsr(dst, src, LogKlassAlignmentInBytes);
4745 } else {
4746 if (dst != src) mov(dst, src);
4747 }
4748 break;
4749
4750 case KlassDecodeXor:
4751 if (CompressedKlassPointers::shift() != 0) {
4752 eor(dst, src, (uint64_t)CompressedKlassPointers::base());
4753 lsr(dst, dst, LogKlassAlignmentInBytes);
4754 } else {
4755 eor(dst, src, (uint64_t)CompressedKlassPointers::base());
4756 }
4757 break;
4758
4759 case KlassDecodeMovk:
4760 if (CompressedKlassPointers::shift() != 0) {
4761 ubfx(dst, src, LogKlassAlignmentInBytes, 32);
4762 } else {
4763 movw(dst, src);
4764 }
4765 break;
4766
4767 case KlassDecodeNone:
4768 ShouldNotReachHere();
4769 break;
4770 }
4771 }
4772
4773 void MacroAssembler::encode_klass_not_null(Register r) {
4774 encode_klass_not_null(r, r);
4775 }
4776
4777 void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
4778 assert (UseCompressedClassPointers, "should only be used for compressed headers");
4779
4780 switch (klass_decode_mode()) {
4781 case KlassDecodeZero:
4782 if (CompressedKlassPointers::shift() != 0) {
4783 lsl(dst, src, LogKlassAlignmentInBytes);
4784 } else {
4785 if (dst != src) mov(dst, src);
4786 }
4787 break;
4788
4789 case KlassDecodeXor:
4790 if (CompressedKlassPointers::shift() != 0) {
4791 lsl(dst, src, LogKlassAlignmentInBytes);
4792 eor(dst, dst, (uint64_t)CompressedKlassPointers::base());
4793 } else {
4794 eor(dst, src, (uint64_t)CompressedKlassPointers::base());
4795 }
4796 break;
4797
4798 case KlassDecodeMovk: {
4799 const uint64_t shifted_base =
4800 (uint64_t)CompressedKlassPointers::base() >> CompressedKlassPointers::shift();
4801
4802 if (dst != src) movw(dst, src);
4803 movk(dst, shifted_base >> 32, 32);
4804
4805 if (CompressedKlassPointers::shift() != 0) {
4806 lsl(dst, dst, LogKlassAlignmentInBytes);
4807 }
4808
4809 break;
4810 }
4811
4812 case KlassDecodeNone:
4813 ShouldNotReachHere();
4814 break;
4815 }
4816 }
4817
4818 void MacroAssembler::decode_klass_not_null(Register r) {
4819 decode_klass_not_null(r, r);
4820 }
4821
4822 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
4823 #ifdef ASSERT
4824 {
4825 ThreadInVMfromUnknown tiv;
4826 assert (UseCompressedOops, "should only be used for compressed oops");
5186
5187 BIND(DONE);
5188 postcond(pc() != badAddress);
5189 return pc();
5190 }
5191
5192 // Clobbers: rscratch1, rscratch2, rflags
5193 // May also clobber v0-v7 when (!UseSimpleArrayEquals && UseSIMDForArrayEquals)
5194 address MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
5195 Register tmp4, Register tmp5, Register result,
5196 Register cnt1, int elem_size) {
5197 Label DONE, SAME;
5198 Register tmp1 = rscratch1;
5199 Register tmp2 = rscratch2;
5200 Register cnt2 = tmp2; // cnt2 only used in array length compare
5201 int elem_per_word = wordSize/elem_size;
5202 int log_elem_size = exact_log2(elem_size);
5203 int length_offset = arrayOopDesc::length_offset_in_bytes();
5204 int base_offset
5205 = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
5206 int stubBytesThreshold = 3 * 64 + (UseSIMDForArrayEquals ? 0 : 16);
5207
5208 assert(elem_size == 1 || elem_size == 2, "must be char or byte");
5209 assert_different_registers(a1, a2, result, cnt1, rscratch1, rscratch2);
5210
5211 #ifndef PRODUCT
5212 {
5213 const char kind = (elem_size == 2) ? 'U' : 'L';
5214 char comment[64];
5215 snprintf(comment, sizeof comment, "array_equals%c{", kind);
5216 BLOCK_COMMENT(comment);
5217 }
5218 #endif
5219
5220 // if (a1 == a2)
5221 // return true;
5222 cmpoop(a1, a2); // May have read barriers for a1 and a2.
5223 br(EQ, SAME);
5224
5225 if (UseSimpleArrayEquals) {
5226 Label NEXT_WORD, SHORT, TAIL03, TAIL01, A_MIGHT_BE_NULL, A_IS_NOT_NULL;
5227 // if (a1 == nullptr || a2 == nullptr)
5228 // return false;
5229 // a1 & a2 == 0 means (some-pointer is null) or
5230 // (very-rare-or-even-probably-impossible-pointer-values)
5231 // so, we can save one branch in most cases
5232 tst(a1, a2);
5233 mov(result, false);
5234 br(EQ, A_MIGHT_BE_NULL);
5235 // if (a1.length != a2.length)
5236 // return false;
5237 bind(A_IS_NOT_NULL);
5238 ldrw(cnt1, Address(a1, length_offset));
5239 ldrw(cnt2, Address(a2, length_offset));
5240 eorw(tmp5, cnt1, cnt2);
5241 cbnzw(tmp5, DONE);
5242 lea(a1, Address(a1, base_offset));
5243 lea(a2, Address(a2, base_offset));
5244 // Check for short strings, i.e. smaller than wordSize.
5245 subs(cnt1, cnt1, elem_per_word);
5246 br(Assembler::LT, SHORT);
5247 // Main 8 byte comparison loop.
5248 bind(NEXT_WORD); {
5249 ldr(tmp1, Address(post(a1, wordSize)));
5250 ldr(tmp2, Address(post(a2, wordSize)));
5251 subs(cnt1, cnt1, elem_per_word);
5252 eor(tmp5, tmp1, tmp2);
5253 cbnz(tmp5, DONE);
5254 } br(GT, NEXT_WORD);
5255 // Last longword. In the case where length == 4 we compare the
5256 // same longword twice, but that's still faster than another
5257 // conditional branch.
5258 // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
5259 // length == 4.
5260 if (log_elem_size > 0)
5261 lsl(cnt1, cnt1, log_elem_size);
5262 ldr(tmp3, Address(a1, cnt1));
5263 ldr(tmp4, Address(a2, cnt1));
5287 cbnzw(tmp5, DONE);
5288 }
5289 bind(TAIL01);
5290 if (elem_size == 1) { // Only needed when comparing byte arrays.
5291 tbz(cnt1, 0, SAME); // 0-1 bytes left.
5292 {
5293 ldrb(tmp1, a1);
5294 ldrb(tmp2, a2);
5295 eorw(tmp5, tmp1, tmp2);
5296 cbnzw(tmp5, DONE);
5297 }
5298 }
5299 } else {
5300 Label NEXT_DWORD, SHORT, TAIL, TAIL2, STUB,
5301 CSET_EQ, LAST_CHECK;
5302 mov(result, false);
5303 cbz(a1, DONE);
5304 ldrw(cnt1, Address(a1, length_offset));
5305 cbz(a2, DONE);
5306 ldrw(cnt2, Address(a2, length_offset));
5307 // on most CPUs a2 is still "locked"(surprisingly) in ldrw and it's
5308 // faster to perform another branch before comparing a1 and a2
5309 cmp(cnt1, (u1)elem_per_word);
5310 br(LE, SHORT); // short or same
5311 ldr(tmp3, Address(pre(a1, base_offset)));
5312 subs(zr, cnt1, stubBytesThreshold);
5313 br(GE, STUB);
5314 ldr(tmp4, Address(pre(a2, base_offset)));
5315 sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
5316 cmp(cnt2, cnt1);
5317 br(NE, DONE);
5318
5319 // Main 16 byte comparison loop with 2 exits
5320 bind(NEXT_DWORD); {
5321 ldr(tmp1, Address(pre(a1, wordSize)));
5322 ldr(tmp2, Address(pre(a2, wordSize)));
5323 subs(cnt1, cnt1, 2 * elem_per_word);
5324 br(LE, TAIL);
5325 eor(tmp4, tmp3, tmp4);
5326 cbnz(tmp4, DONE);
5327 ldr(tmp3, Address(pre(a1, wordSize)));
5328 ldr(tmp4, Address(pre(a2, wordSize)));
5329 cmp(cnt1, (u1)elem_per_word);
5330 br(LE, TAIL2);
5331 cmp(tmp1, tmp2);
5332 } br(EQ, NEXT_DWORD);
5333 b(DONE);
5334
5335 bind(TAIL);
5336 eor(tmp4, tmp3, tmp4);
5337 eor(tmp2, tmp1, tmp2);
5338 lslv(tmp2, tmp2, tmp5);
5339 orr(tmp5, tmp4, tmp2);
5340 cmp(tmp5, zr);
5341 b(CSET_EQ);
5342
5343 bind(TAIL2);
5344 eor(tmp2, tmp1, tmp2);
5345 cbnz(tmp2, DONE);
5346 b(LAST_CHECK);
5347
5348 bind(STUB);
5349 ldr(tmp4, Address(pre(a2, base_offset)));
5350 cmp(cnt2, cnt1);
5351 br(NE, DONE);
5352 if (elem_size == 2) { // convert to byte counter
5353 lsl(cnt1, cnt1, 1);
5354 }
5355 eor(tmp5, tmp3, tmp4);
5356 cbnz(tmp5, DONE);
5357 RuntimeAddress stub = RuntimeAddress(StubRoutines::aarch64::large_array_equals());
5358 assert(stub.target() != nullptr, "array_equals_long stub has not been generated");
5359 address tpc = trampoline_call(stub);
5360 if (tpc == nullptr) {
5361 DEBUG_ONLY(reset_labels(SHORT, LAST_CHECK, CSET_EQ, SAME, DONE));
5362 postcond(pc() == badAddress);
5363 return nullptr;
5364 }
5365 b(DONE);
5366
5367 // (a1 != null && a2 == null) || (a1 != null && a2 != null && a1 == a2)
5368 // so, if a2 == null => return false(0), else return true, so we can return a2
5369 mov(result, a2);
5370 b(DONE);
5371 bind(SHORT);
5372 cmp(cnt2, cnt1);
5373 br(NE, DONE);
5374 cbz(cnt1, SAME);
5375 sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
5376 ldr(tmp3, Address(a1, base_offset));
5377 ldr(tmp4, Address(a2, base_offset));
5378 bind(LAST_CHECK);
5379 eor(tmp4, tmp3, tmp4);
5380 lslv(tmp5, tmp4, tmp5);
5381 cmp(tmp5, zr);
5382 bind(CSET_EQ);
5383 cset(result, EQ);
5384 b(DONE);
5385 }
5386
5387 bind(SAME);
5388 mov(result, true);
5389 // That's it.
5390 bind(DONE);
5391
5392 BLOCK_COMMENT("} array_equals");
5393 postcond(pc() != badAddress);
5394 return pc();
5395 }
5396
5397 // Compare Strings
6369 if (src.first()->is_stack()) {
6370 if (dst.first()->is_stack()) {
6371 ldr(tmp, Address(rfp, reg2offset_in(src.first())));
6372 str(tmp, Address(sp, reg2offset_out(dst.first())));
6373 } else {
6374 ldrd(dst.first()->as_FloatRegister(), Address(rfp, reg2offset_in(src.first())));
6375 }
6376 } else if (src.first() != dst.first()) {
6377 if (src.is_single_phys_reg() && dst.is_single_phys_reg())
6378 fmovd(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
6379 else
6380 strd(src.first()->as_FloatRegister(), Address(sp, reg2offset_out(dst.first())));
6381 }
6382 }
6383
6384 // Implements lightweight-locking.
6385 //
6386 // - obj: the object to be locked
6387 // - t1, t2, t3: temporary registers, will be destroyed
6388 // - slow: branched to if locking fails, absolute offset may larger than 32KB (imm14 encoding).
6389 void MacroAssembler::lightweight_lock(Register obj, Register t1, Register t2, Register t3, Label& slow) {
6390 assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking");
6391 assert_different_registers(obj, t1, t2, t3, rscratch1);
6392
6393 Label push;
6394 const Register top = t1;
6395 const Register mark = t2;
6396 const Register t = t3;
6397
6398 // Preload the markWord. It is important that this is the first
6399 // instruction emitted as it is part of C1's null check semantics.
6400 ldr(mark, Address(obj, oopDesc::mark_offset_in_bytes()));
6401
6402 // Check if the lock-stack is full.
6403 ldrw(top, Address(rthread, JavaThread::lock_stack_top_offset()));
6404 cmpw(top, (unsigned)LockStack::end_offset());
6405 br(Assembler::GE, slow);
6406
6407 // Check for recursion.
6408 subw(t, top, oopSize);
6409 ldr(t, Address(rthread, t));
6410 cmp(obj, t);
6411 br(Assembler::EQ, push);
6412
6413 // Check header for monitor (0b10).
6414 tst(mark, markWord::monitor_value);
6415 br(Assembler::NE, slow);
6416
6417 // Try to lock. Transition lock bits 0b01 => 0b00
6418 assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid lea");
6419 orr(mark, mark, markWord::unlocked_value);
6420 eor(t, mark, markWord::unlocked_value);
6421 cmpxchg(/*addr*/ obj, /*expected*/ mark, /*new*/ t, Assembler::xword,
|
985
986 void MacroAssembler::c2bool(Register x) {
987 // implements x == 0 ? 0 : 1
988 // note: must only look at least-significant byte of x
989 // since C-style booleans are stored in one byte
990 // only! (was bug)
991 tst(x, 0xff);
992 cset(x, Assembler::NE);
993 }
994
995 address MacroAssembler::ic_call(address entry, jint method_index) {
996 RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index);
997 // address const_ptr = long_constant((jlong)Universe::non_oop_word());
998 // uintptr_t offset;
999 // ldr_constant(rscratch2, const_ptr);
1000 movptr(rscratch2, (intptr_t)Universe::non_oop_word());
1001 return trampoline_call(Address(entry, rh));
1002 }
1003
1004 int MacroAssembler::ic_check_size() {
1005 int extra_instructions = UseCompactObjectHeaders ? 1 : 0;
1006 if (target_needs_far_branch(CAST_FROM_FN_PTR(address, SharedRuntime::get_ic_miss_stub()))) {
1007 return NativeInstruction::instruction_size * (7 + extra_instructions);
1008 } else {
1009 return NativeInstruction::instruction_size * (5 + extra_instructions);
1010 }
1011 }
1012
1013 int MacroAssembler::ic_check(int end_alignment) {
1014 Register receiver = j_rarg0;
1015 Register data = rscratch2;
1016 Register tmp1 = rscratch1;
1017 Register tmp2 = r10;
1018
1019 // The UEP of a code blob ensures that the VEP is padded. However, the padding of the UEP is placed
1020 // before the inline cache check, so we don't have to execute any nop instructions when dispatching
1021 // through the UEP, yet we can ensure that the VEP is aligned appropriately. That's why we align
1022 // before the inline cache check here, and not after
1023 align(end_alignment, offset() + ic_check_size());
1024
1025 int uep_offset = offset();
1026
1027 if (UseCompactObjectHeaders) {
1028 load_nklass_compact(tmp1, receiver);
1029 ldrw(tmp2, Address(data, CompiledICData::speculated_klass_offset()));
1030 cmpw(tmp1, tmp2);
1031 } else if (UseCompressedClassPointers) {
1032 ldrw(tmp1, Address(receiver, oopDesc::klass_offset_in_bytes()));
1033 ldrw(tmp2, Address(data, CompiledICData::speculated_klass_offset()));
1034 cmpw(tmp1, tmp2);
1035 } else {
1036 ldr(tmp1, Address(receiver, oopDesc::klass_offset_in_bytes()));
1037 ldr(tmp2, Address(data, CompiledICData::speculated_klass_offset()));
1038 cmp(tmp1, tmp2);
1039 }
1040
1041 Label dont;
1042 br(Assembler::EQ, dont);
1043 far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1044 bind(dont);
1045 assert((offset() % end_alignment) == 0, "Misaligned verified entry point");
1046
1047 return uep_offset;
1048 }
1049
1050 // Implementation of call_VM versions
1051
4466 adrp(rscratch1, src2, offset);
4467 ldr(rscratch1, Address(rscratch1, offset));
4468 cmp(src1, rscratch1);
4469 }
4470
4471 void MacroAssembler::cmpoop(Register obj1, Register obj2) {
4472 cmp(obj1, obj2);
4473 }
4474
4475 void MacroAssembler::load_method_holder_cld(Register rresult, Register rmethod) {
4476 load_method_holder(rresult, rmethod);
4477 ldr(rresult, Address(rresult, InstanceKlass::class_loader_data_offset()));
4478 }
4479
4480 void MacroAssembler::load_method_holder(Register holder, Register method) {
4481 ldr(holder, Address(method, Method::const_offset())); // ConstMethod*
4482 ldr(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool*
4483 ldr(holder, Address(holder, ConstantPool::pool_holder_offset())); // InstanceKlass*
4484 }
4485
4486 // Loads the obj's Klass* into dst.
4487 // Preserves all registers (incl src, rscratch1 and rscratch2).
4488 // Input:
4489 // src - the oop we want to load the klass from.
4490 // dst - output nklass.
4491 void MacroAssembler::load_nklass_compact(Register dst, Register src) {
4492 assert(UseCompactObjectHeaders, "expects UseCompactObjectHeaders");
4493 ldr(dst, Address(src, oopDesc::mark_offset_in_bytes()));
4494 lsr(dst, dst, markWord::klass_shift);
4495 }
4496
4497 void MacroAssembler::load_klass(Register dst, Register src) {
4498 if (UseCompactObjectHeaders) {
4499 load_nklass_compact(dst, src);
4500 decode_klass_not_null(dst);
4501 } else if (UseCompressedClassPointers) {
4502 ldrw(dst, Address(src, oopDesc::klass_offset_in_bytes()));
4503 decode_klass_not_null(dst);
4504 } else {
4505 ldr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
4506 }
4507 }
4508
4509 void MacroAssembler::restore_cpu_control_state_after_jni(Register tmp1, Register tmp2) {
4510 if (RestoreMXCSROnJNICalls) {
4511 Label OK;
4512 get_fpcr(tmp1);
4513 mov(tmp2, tmp1);
4514 // Set FPCR to the state we need. We do want Round to Nearest. We
4515 // don't want non-IEEE rounding modes or floating-point traps.
4516 bfi(tmp1, zr, 22, 4); // Clear DN, FZ, and Rmode
4517 bfi(tmp1, zr, 8, 5); // Clear exception-control bits (8-12)
4518 bfi(tmp1, zr, 0, 2); // Clear AH:FIZ
4519 eor(tmp2, tmp1, tmp2);
4520 cbz(tmp2, OK); // Only reset FPCR if it's wrong
4521 set_fpcr(tmp1);
4537 // A null weak handle resolves to null.
4538 cbz(result, resolved);
4539
4540 // Only 64 bit platforms support GCs that require a tmp register
4541 // WeakHandle::resolve is an indirection like jweak.
4542 access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF,
4543 result, Address(result), tmp1, tmp2);
4544 bind(resolved);
4545 }
4546
4547 void MacroAssembler::load_mirror(Register dst, Register method, Register tmp1, Register tmp2) {
4548 const int mirror_offset = in_bytes(Klass::java_mirror_offset());
4549 ldr(dst, Address(rmethod, Method::const_offset()));
4550 ldr(dst, Address(dst, ConstMethod::constants_offset()));
4551 ldr(dst, Address(dst, ConstantPool::pool_holder_offset()));
4552 ldr(dst, Address(dst, mirror_offset));
4553 resolve_oop_handle(dst, tmp1, tmp2);
4554 }
4555
4556 void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp) {
4557 assert_different_registers(oop, trial_klass, tmp);
4558 if (UseCompressedClassPointers) {
4559 if (UseCompactObjectHeaders) {
4560 load_nklass_compact(tmp, oop);
4561 } else {
4562 ldrw(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
4563 }
4564 if (CompressedKlassPointers::base() == nullptr) {
4565 cmp(trial_klass, tmp, LSL, CompressedKlassPointers::shift());
4566 return;
4567 } else if (((uint64_t)CompressedKlassPointers::base() & 0xffffffff) == 0
4568 && CompressedKlassPointers::shift() == 0) {
4569 // Only the bottom 32 bits matter
4570 cmpw(trial_klass, tmp);
4571 return;
4572 }
4573 decode_klass_not_null(tmp);
4574 } else {
4575 ldr(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
4576 }
4577 cmp(trial_klass, tmp);
4578 }
4579
4580 void MacroAssembler::cmp_klass(Register src, Register dst, Register tmp1, Register tmp2) {
4581 if (UseCompactObjectHeaders) {
4582 load_nklass_compact(tmp1, src);
4583 load_nklass_compact(tmp2, dst);
4584 cmpw(tmp1, tmp2);
4585 } else if (UseCompressedClassPointers) {
4586 ldrw(tmp1, Address(src, oopDesc::klass_offset_in_bytes()));
4587 ldrw(tmp2, Address(dst, oopDesc::klass_offset_in_bytes()));
4588 cmpw(tmp1, tmp2);
4589 } else {
4590 ldr(tmp1, Address(src, oopDesc::klass_offset_in_bytes()));
4591 ldr(tmp2, Address(dst, oopDesc::klass_offset_in_bytes()));
4592 cmp(tmp1, tmp2);
4593 }
4594 }
4595
4596 void MacroAssembler::store_klass(Register dst, Register src) {
4597 // FIXME: Should this be a store release? concurrent gcs assumes
4598 // klass length is valid if klass field is not null.
4599 assert(!UseCompactObjectHeaders, "not with compact headers");
4600 if (UseCompressedClassPointers) {
4601 encode_klass_not_null(src);
4602 strw(src, Address(dst, oopDesc::klass_offset_in_bytes()));
4603 } else {
4604 str(src, Address(dst, oopDesc::klass_offset_in_bytes()));
4605 }
4606 }
4607
4608 void MacroAssembler::store_klass_gap(Register dst, Register src) {
4609 assert(!UseCompactObjectHeaders, "not with compact headers");
4610 if (UseCompressedClassPointers) {
4611 // Store to klass gap in destination
4612 strw(src, Address(dst, oopDesc::klass_gap_offset_in_bytes()));
4613 }
4614 }
4615
4616 // Algorithm must match CompressedOops::encode.
4617 void MacroAssembler::encode_heap_oop(Register d, Register s) {
4618 #ifdef ASSERT
4619 verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?");
4620 #endif
4621 verify_oop_msg(s, "broken oop in encode_heap_oop");
4622 if (CompressedOops::base() == nullptr) {
4623 if (CompressedOops::shift() != 0) {
4624 assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
4625 lsr(d, s, LogMinObjAlignmentInBytes);
4626 } else {
4627 mov(d, s);
4628 }
4629 } else {
4738 add(dst, zr, src, Assembler::LSL, LogMinObjAlignmentInBytes);
4739 }
4740 } else {
4741 assert (CompressedOops::base() == nullptr, "sanity");
4742 if (dst != src) {
4743 mov(dst, src);
4744 }
4745 }
4746 }
4747
4748 MacroAssembler::KlassDecodeMode MacroAssembler::_klass_decode_mode(KlassDecodeNone);
4749
4750 MacroAssembler::KlassDecodeMode MacroAssembler::klass_decode_mode() {
4751 assert(UseCompressedClassPointers, "not using compressed class pointers");
4752 assert(Metaspace::initialized(), "metaspace not initialized yet");
4753
4754 if (_klass_decode_mode != KlassDecodeNone) {
4755 return _klass_decode_mode;
4756 }
4757
4758 if (CompressedKlassPointers::base() == nullptr) {
4759 return (_klass_decode_mode = KlassDecodeZero);
4760 }
4761
4762 if (operand_valid_for_logical_immediate(
4763 /*is32*/false, (uint64_t)CompressedKlassPointers::base())) {
4764 const uint64_t range_mask =
4765 (1ULL << log2i(CompressedKlassPointers::range())) - 1;
4766 if (((uint64_t)CompressedKlassPointers::base() & range_mask) == 0) {
4767 return (_klass_decode_mode = KlassDecodeXor);
4768 }
4769 }
4770
4771 const uint64_t shifted_base =
4772 (uint64_t)CompressedKlassPointers::base() >> CompressedKlassPointers::shift();
4773 guarantee((shifted_base & 0xffff0000ffffffff) == 0,
4774 "compressed class base bad alignment");
4775
4776 return (_klass_decode_mode = KlassDecodeMovk);
4777 }
4778
4779 void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
4780 switch (klass_decode_mode()) {
4781 case KlassDecodeZero:
4782 if (CompressedKlassPointers::shift() != 0) {
4783 lsr(dst, src, CompressedKlassPointers::shift());
4784 } else {
4785 if (dst != src) mov(dst, src);
4786 }
4787 break;
4788
4789 case KlassDecodeXor:
4790 if (CompressedKlassPointers::shift() != 0) {
4791 eor(dst, src, (uint64_t)CompressedKlassPointers::base());
4792 lsr(dst, dst, CompressedKlassPointers::shift());
4793 } else {
4794 eor(dst, src, (uint64_t)CompressedKlassPointers::base());
4795 }
4796 break;
4797
4798 case KlassDecodeMovk:
4799 if (CompressedKlassPointers::shift() != 0) {
4800 ubfx(dst, src, CompressedKlassPointers::shift(), 32);
4801 } else {
4802 movw(dst, src);
4803 }
4804 break;
4805
4806 case KlassDecodeNone:
4807 ShouldNotReachHere();
4808 break;
4809 }
4810 }
4811
4812 void MacroAssembler::encode_klass_not_null(Register r) {
4813 encode_klass_not_null(r, r);
4814 }
4815
4816 void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
4817 assert (UseCompressedClassPointers, "should only be used for compressed headers");
4818
4819 switch (klass_decode_mode()) {
4820 case KlassDecodeZero:
4821 if (CompressedKlassPointers::shift() != 0) {
4822 lsl(dst, src, CompressedKlassPointers::shift());
4823 } else {
4824 if (dst != src) mov(dst, src);
4825 }
4826 break;
4827
4828 case KlassDecodeXor:
4829 if (CompressedKlassPointers::shift() != 0) {
4830 lsl(dst, src, CompressedKlassPointers::shift());
4831 eor(dst, dst, (uint64_t)CompressedKlassPointers::base());
4832 } else {
4833 eor(dst, src, (uint64_t)CompressedKlassPointers::base());
4834 }
4835 break;
4836
4837 case KlassDecodeMovk: {
4838 const uint64_t shifted_base =
4839 (uint64_t)CompressedKlassPointers::base() >> CompressedKlassPointers::shift();
4840
4841 if (dst != src) movw(dst, src);
4842 movk(dst, shifted_base >> 32, 32);
4843
4844 if (CompressedKlassPointers::shift() != 0) {
4845 lsl(dst, dst, CompressedKlassPointers::shift());
4846 }
4847
4848 break;
4849 }
4850
4851 case KlassDecodeNone:
4852 ShouldNotReachHere();
4853 break;
4854 }
4855 }
4856
4857 void MacroAssembler::decode_klass_not_null(Register r) {
4858 decode_klass_not_null(r, r);
4859 }
4860
4861 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
4862 #ifdef ASSERT
4863 {
4864 ThreadInVMfromUnknown tiv;
4865 assert (UseCompressedOops, "should only be used for compressed oops");
5225
5226 BIND(DONE);
5227 postcond(pc() != badAddress);
5228 return pc();
5229 }
5230
5231 // Clobbers: rscratch1, rscratch2, rflags
5232 // May also clobber v0-v7 when (!UseSimpleArrayEquals && UseSIMDForArrayEquals)
5233 address MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
5234 Register tmp4, Register tmp5, Register result,
5235 Register cnt1, int elem_size) {
5236 Label DONE, SAME;
5237 Register tmp1 = rscratch1;
5238 Register tmp2 = rscratch2;
5239 Register cnt2 = tmp2; // cnt2 only used in array length compare
5240 int elem_per_word = wordSize/elem_size;
5241 int log_elem_size = exact_log2(elem_size);
5242 int length_offset = arrayOopDesc::length_offset_in_bytes();
5243 int base_offset
5244 = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
5245 // When the base is not aligned to 8 bytes, then we let
5246 // the compare loop include the array length, and skip
5247 // the explicit comparison of length.
5248 bool is_8aligned = is_aligned(base_offset, BytesPerWord);
5249 assert(is_aligned(base_offset, BytesPerWord) || is_aligned(length_offset, BytesPerWord),
5250 "base_offset or length_offset must be 8-byte aligned");
5251 int start_offset = is_8aligned ? base_offset : length_offset;
5252 int extra_length = is_8aligned ? 0 : BytesPerInt / elem_size;
5253 int stubBytesThreshold = 3 * 64 + (UseSIMDForArrayEquals ? 0 : 16);
5254
5255 assert(elem_size == 1 || elem_size == 2, "must be char or byte");
5256 assert_different_registers(a1, a2, result, cnt1, rscratch1, rscratch2);
5257
5258 #ifndef PRODUCT
5259 {
5260 const char kind = (elem_size == 2) ? 'U' : 'L';
5261 char comment[64];
5262 snprintf(comment, sizeof comment, "array_equals%c{", kind);
5263 BLOCK_COMMENT(comment);
5264 }
5265 #endif
5266
5267 // if (a1 == a2)
5268 // return true;
5269 cmpoop(a1, a2); // May have read barriers for a1 and a2.
5270 br(EQ, SAME);
5271
5272 if (UseSimpleArrayEquals) {
5273 Label NEXT_WORD, SHORT, TAIL03, TAIL01, A_MIGHT_BE_NULL, A_IS_NOT_NULL;
5274 // if (a1 == nullptr || a2 == nullptr)
5275 // return false;
5276 // a1 & a2 == 0 means (some-pointer is null) or
5277 // (very-rare-or-even-probably-impossible-pointer-values)
5278 // so, we can save one branch in most cases
5279 tst(a1, a2);
5280 mov(result, false);
5281 br(EQ, A_MIGHT_BE_NULL);
5282 // if (a1.length != a2.length)
5283 // return false;
5284 bind(A_IS_NOT_NULL);
5285 ldrw(cnt1, Address(a1, length_offset));
5286 ldrw(cnt2, Address(a2, length_offset));
5287 if (extra_length != 0) {
5288 // Increase loop counter by size of length field.
5289 addw(cnt1, cnt1, extra_length);
5290 addw(cnt2, cnt2, extra_length);
5291 }
5292 eorw(tmp5, cnt1, cnt2);
5293 cbnzw(tmp5, DONE);
5294 lea(a1, Address(a1, start_offset));
5295 lea(a2, Address(a2, start_offset));
5296 // Check for short strings, i.e. smaller than wordSize.
5297 subs(cnt1, cnt1, elem_per_word);
5298 br(Assembler::LT, SHORT);
5299 // Main 8 byte comparison loop.
5300 bind(NEXT_WORD); {
5301 ldr(tmp1, Address(post(a1, wordSize)));
5302 ldr(tmp2, Address(post(a2, wordSize)));
5303 subs(cnt1, cnt1, elem_per_word);
5304 eor(tmp5, tmp1, tmp2);
5305 cbnz(tmp5, DONE);
5306 } br(GT, NEXT_WORD);
5307 // Last longword. In the case where length == 4 we compare the
5308 // same longword twice, but that's still faster than another
5309 // conditional branch.
5310 // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
5311 // length == 4.
5312 if (log_elem_size > 0)
5313 lsl(cnt1, cnt1, log_elem_size);
5314 ldr(tmp3, Address(a1, cnt1));
5315 ldr(tmp4, Address(a2, cnt1));
5339 cbnzw(tmp5, DONE);
5340 }
5341 bind(TAIL01);
5342 if (elem_size == 1) { // Only needed when comparing byte arrays.
5343 tbz(cnt1, 0, SAME); // 0-1 bytes left.
5344 {
5345 ldrb(tmp1, a1);
5346 ldrb(tmp2, a2);
5347 eorw(tmp5, tmp1, tmp2);
5348 cbnzw(tmp5, DONE);
5349 }
5350 }
5351 } else {
5352 Label NEXT_DWORD, SHORT, TAIL, TAIL2, STUB,
5353 CSET_EQ, LAST_CHECK;
5354 mov(result, false);
5355 cbz(a1, DONE);
5356 ldrw(cnt1, Address(a1, length_offset));
5357 cbz(a2, DONE);
5358 ldrw(cnt2, Address(a2, length_offset));
5359 if (extra_length != 0) {
5360 // Increase loop counter by size of length field.
5361 addw(cnt1, cnt1, extra_length);
5362 addw(cnt2, cnt2, extra_length);
5363 }
5364 // on most CPUs a2 is still "locked"(surprisingly) in ldrw and it's
5365 // faster to perform another branch before comparing a1 and a2
5366 cmp(cnt1, (u1)elem_per_word);
5367 br(LE, SHORT); // short or same
5368 ldr(tmp3, Address(pre(a1, start_offset)));
5369 subs(zr, cnt1, stubBytesThreshold);
5370 br(GE, STUB);
5371 ldr(tmp4, Address(pre(a2, start_offset)));
5372 sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
5373 cmp(cnt2, cnt1);
5374 br(NE, DONE);
5375
5376 // Main 16 byte comparison loop with 2 exits
5377 bind(NEXT_DWORD); {
5378 ldr(tmp1, Address(pre(a1, wordSize)));
5379 ldr(tmp2, Address(pre(a2, wordSize)));
5380 subs(cnt1, cnt1, 2 * elem_per_word);
5381 br(LE, TAIL);
5382 eor(tmp4, tmp3, tmp4);
5383 cbnz(tmp4, DONE);
5384 ldr(tmp3, Address(pre(a1, wordSize)));
5385 ldr(tmp4, Address(pre(a2, wordSize)));
5386 cmp(cnt1, (u1)elem_per_word);
5387 br(LE, TAIL2);
5388 cmp(tmp1, tmp2);
5389 } br(EQ, NEXT_DWORD);
5390 b(DONE);
5391
5392 bind(TAIL);
5393 eor(tmp4, tmp3, tmp4);
5394 eor(tmp2, tmp1, tmp2);
5395 lslv(tmp2, tmp2, tmp5);
5396 orr(tmp5, tmp4, tmp2);
5397 cmp(tmp5, zr);
5398 b(CSET_EQ);
5399
5400 bind(TAIL2);
5401 eor(tmp2, tmp1, tmp2);
5402 cbnz(tmp2, DONE);
5403 b(LAST_CHECK);
5404
5405 bind(STUB);
5406 ldr(tmp4, Address(pre(a2, start_offset)));
5407 cmp(cnt2, cnt1);
5408 br(NE, DONE);
5409 if (elem_size == 2) { // convert to byte counter
5410 lsl(cnt1, cnt1, 1);
5411 }
5412 eor(tmp5, tmp3, tmp4);
5413 cbnz(tmp5, DONE);
5414 RuntimeAddress stub = RuntimeAddress(StubRoutines::aarch64::large_array_equals());
5415 assert(stub.target() != nullptr, "array_equals_long stub has not been generated");
5416 address tpc = trampoline_call(stub);
5417 if (tpc == nullptr) {
5418 DEBUG_ONLY(reset_labels(SHORT, LAST_CHECK, CSET_EQ, SAME, DONE));
5419 postcond(pc() == badAddress);
5420 return nullptr;
5421 }
5422 b(DONE);
5423
5424 // (a1 != null && a2 == null) || (a1 != null && a2 != null && a1 == a2)
5425 // so, if a2 == null => return false(0), else return true, so we can return a2
5426 mov(result, a2);
5427 b(DONE);
5428 bind(SHORT);
5429 cmp(cnt2, cnt1);
5430 br(NE, DONE);
5431 cbz(cnt1, SAME);
5432 sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
5433 ldr(tmp3, Address(pre(a1, start_offset)));
5434 ldr(tmp4, Address(pre(a2, start_offset)));
5435 bind(LAST_CHECK);
5436 eor(tmp4, tmp3, tmp4);
5437 lslv(tmp5, tmp4, tmp5);
5438 cmp(tmp5, zr);
5439 bind(CSET_EQ);
5440 cset(result, EQ);
5441 b(DONE);
5442 }
5443
5444 bind(SAME);
5445 mov(result, true);
5446 // That's it.
5447 bind(DONE);
5448
5449 BLOCK_COMMENT("} array_equals");
5450 postcond(pc() != badAddress);
5451 return pc();
5452 }
5453
5454 // Compare Strings
6426 if (src.first()->is_stack()) {
6427 if (dst.first()->is_stack()) {
6428 ldr(tmp, Address(rfp, reg2offset_in(src.first())));
6429 str(tmp, Address(sp, reg2offset_out(dst.first())));
6430 } else {
6431 ldrd(dst.first()->as_FloatRegister(), Address(rfp, reg2offset_in(src.first())));
6432 }
6433 } else if (src.first() != dst.first()) {
6434 if (src.is_single_phys_reg() && dst.is_single_phys_reg())
6435 fmovd(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
6436 else
6437 strd(src.first()->as_FloatRegister(), Address(sp, reg2offset_out(dst.first())));
6438 }
6439 }
6440
6441 // Implements lightweight-locking.
6442 //
6443 // - obj: the object to be locked
6444 // - t1, t2, t3: temporary registers, will be destroyed
6445 // - slow: branched to if locking fails, absolute offset may larger than 32KB (imm14 encoding).
6446 void MacroAssembler::lightweight_lock(Register basic_lock, Register obj, Register t1, Register t2, Register t3, Label& slow) {
6447 assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking");
6448 assert_different_registers(basic_lock, obj, t1, t2, t3, rscratch1);
6449
6450 Label push;
6451 const Register top = t1;
6452 const Register mark = t2;
6453 const Register t = t3;
6454
6455 // Preload the markWord. It is important that this is the first
6456 // instruction emitted as it is part of C1's null check semantics.
6457 ldr(mark, Address(obj, oopDesc::mark_offset_in_bytes()));
6458
6459 str(zr, Address(basic_lock, BasicObjectLock::lock_offset() + in_ByteSize((BasicLock::object_monitor_cache_offset_in_bytes()))));
6460
6461 // Check if the lock-stack is full.
6462 ldrw(top, Address(rthread, JavaThread::lock_stack_top_offset()));
6463 cmpw(top, (unsigned)LockStack::end_offset());
6464 br(Assembler::GE, slow);
6465
6466 // Check for recursion.
6467 subw(t, top, oopSize);
6468 ldr(t, Address(rthread, t));
6469 cmp(obj, t);
6470 br(Assembler::EQ, push);
6471
6472 // Check header for monitor (0b10).
6473 tst(mark, markWord::monitor_value);
6474 br(Assembler::NE, slow);
6475
6476 // Try to lock. Transition lock bits 0b01 => 0b00
6477 assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid lea");
6478 orr(mark, mark, markWord::unlocked_value);
6479 eor(t, mark, markWord::unlocked_value);
6480 cmpxchg(/*addr*/ obj, /*expected*/ mark, /*new*/ t, Assembler::xword,
|