< prev index next >

src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp

Print this page

 985 
 986 void MacroAssembler::c2bool(Register x) {
 987   // implements x == 0 ? 0 : 1
 988   // note: must only look at least-significant byte of x
 989   //       since C-style booleans are stored in one byte
 990   //       only! (was bug)
 991   tst(x, 0xff);
 992   cset(x, Assembler::NE);
 993 }
 994 
 995 address MacroAssembler::ic_call(address entry, jint method_index) {
 996   RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index);
 997   // address const_ptr = long_constant((jlong)Universe::non_oop_word());
 998   // uintptr_t offset;
 999   // ldr_constant(rscratch2, const_ptr);
1000   movptr(rscratch2, (intptr_t)Universe::non_oop_word());
1001   return trampoline_call(Address(entry, rh));
1002 }
1003 
1004 int MacroAssembler::ic_check_size() {

1005   if (target_needs_far_branch(CAST_FROM_FN_PTR(address, SharedRuntime::get_ic_miss_stub()))) {
1006     return NativeInstruction::instruction_size * 7;
1007   } else {
1008     return NativeInstruction::instruction_size * 5;
1009   }
1010 }
1011 
1012 int MacroAssembler::ic_check(int end_alignment) {
1013   Register receiver = j_rarg0;
1014   Register data = rscratch2;
1015   Register tmp1 = rscratch1;
1016   Register tmp2 = r10;
1017 
1018   // The UEP of a code blob ensures that the VEP is padded. However, the padding of the UEP is placed
1019   // before the inline cache check, so we don't have to execute any nop instructions when dispatching
1020   // through the UEP, yet we can ensure that the VEP is aligned appropriately. That's why we align
1021   // before the inline cache check here, and not after
1022   align(end_alignment, offset() + ic_check_size());
1023 
1024   int uep_offset = offset();
1025 
1026   if (UseCompressedClassPointers) {




1027     ldrw(tmp1, Address(receiver, oopDesc::klass_offset_in_bytes()));
1028     ldrw(tmp2, Address(data, CompiledICData::speculated_klass_offset()));
1029     cmpw(tmp1, tmp2);
1030   } else {
1031     ldr(tmp1, Address(receiver, oopDesc::klass_offset_in_bytes()));
1032     ldr(tmp2, Address(data, CompiledICData::speculated_klass_offset()));
1033     cmp(tmp1, tmp2);
1034   }
1035 
1036   Label dont;
1037   br(Assembler::EQ, dont);
1038   far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1039   bind(dont);
1040   assert((offset() % end_alignment) == 0, "Misaligned verified entry point");
1041 
1042   return uep_offset;
1043 }
1044 
1045 // Implementation of call_VM versions
1046 

4461   adrp(rscratch1, src2, offset);
4462   ldr(rscratch1, Address(rscratch1, offset));
4463   cmp(src1, rscratch1);
4464 }
4465 
4466 void MacroAssembler::cmpoop(Register obj1, Register obj2) {
4467   cmp(obj1, obj2);
4468 }
4469 
4470 void MacroAssembler::load_method_holder_cld(Register rresult, Register rmethod) {
4471   load_method_holder(rresult, rmethod);
4472   ldr(rresult, Address(rresult, InstanceKlass::class_loader_data_offset()));
4473 }
4474 
4475 void MacroAssembler::load_method_holder(Register holder, Register method) {
4476   ldr(holder, Address(method, Method::const_offset()));                      // ConstMethod*
4477   ldr(holder, Address(holder, ConstMethod::constants_offset()));             // ConstantPool*
4478   ldr(holder, Address(holder, ConstantPool::pool_holder_offset()));          // InstanceKlass*
4479 }
4480 











4481 void MacroAssembler::load_klass(Register dst, Register src) {
4482   if (UseCompressedClassPointers) {



4483     ldrw(dst, Address(src, oopDesc::klass_offset_in_bytes()));
4484     decode_klass_not_null(dst);
4485   } else {
4486     ldr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
4487   }
4488 }
4489 
4490 void MacroAssembler::restore_cpu_control_state_after_jni(Register tmp1, Register tmp2) {
4491   if (RestoreMXCSROnJNICalls) {
4492     Label OK;
4493     get_fpcr(tmp1);
4494     mov(tmp2, tmp1);
4495     // Set FPCR to the state we need. We do want Round to Nearest. We
4496     // don't want non-IEEE rounding modes or floating-point traps.
4497     bfi(tmp1, zr, 22, 4); // Clear DN, FZ, and Rmode
4498     bfi(tmp1, zr, 8, 5);  // Clear exception-control bits (8-12)
4499     bfi(tmp1, zr, 0, 2);  // Clear AH:FIZ
4500     eor(tmp2, tmp1, tmp2);
4501     cbz(tmp2, OK);        // Only reset FPCR if it's wrong
4502     set_fpcr(tmp1);

4518   // A null weak handle resolves to null.
4519   cbz(result, resolved);
4520 
4521   // Only 64 bit platforms support GCs that require a tmp register
4522   // WeakHandle::resolve is an indirection like jweak.
4523   access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF,
4524                  result, Address(result), tmp1, tmp2);
4525   bind(resolved);
4526 }
4527 
4528 void MacroAssembler::load_mirror(Register dst, Register method, Register tmp1, Register tmp2) {
4529   const int mirror_offset = in_bytes(Klass::java_mirror_offset());
4530   ldr(dst, Address(rmethod, Method::const_offset()));
4531   ldr(dst, Address(dst, ConstMethod::constants_offset()));
4532   ldr(dst, Address(dst, ConstantPool::pool_holder_offset()));
4533   ldr(dst, Address(dst, mirror_offset));
4534   resolve_oop_handle(dst, tmp1, tmp2);
4535 }
4536 
4537 void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp) {

4538   if (UseCompressedClassPointers) {
4539     ldrw(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));




4540     if (CompressedKlassPointers::base() == nullptr) {
4541       cmp(trial_klass, tmp, LSL, CompressedKlassPointers::shift());
4542       return;
4543     } else if (((uint64_t)CompressedKlassPointers::base() & 0xffffffff) == 0
4544                && CompressedKlassPointers::shift() == 0) {
4545       // Only the bottom 32 bits matter
4546       cmpw(trial_klass, tmp);
4547       return;
4548     }
4549     decode_klass_not_null(tmp);
4550   } else {
4551     ldr(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
4552   }
4553   cmp(trial_klass, tmp);
4554 }
4555 
















4556 void MacroAssembler::store_klass(Register dst, Register src) {
4557   // FIXME: Should this be a store release?  concurrent gcs assumes
4558   // klass length is valid if klass field is not null.

4559   if (UseCompressedClassPointers) {
4560     encode_klass_not_null(src);
4561     strw(src, Address(dst, oopDesc::klass_offset_in_bytes()));
4562   } else {
4563     str(src, Address(dst, oopDesc::klass_offset_in_bytes()));
4564   }
4565 }
4566 
4567 void MacroAssembler::store_klass_gap(Register dst, Register src) {

4568   if (UseCompressedClassPointers) {
4569     // Store to klass gap in destination
4570     strw(src, Address(dst, oopDesc::klass_gap_offset_in_bytes()));
4571   }
4572 }
4573 
4574 // Algorithm must match CompressedOops::encode.
4575 void MacroAssembler::encode_heap_oop(Register d, Register s) {
4576 #ifdef ASSERT
4577   verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?");
4578 #endif
4579   verify_oop_msg(s, "broken oop in encode_heap_oop");
4580   if (CompressedOops::base() == nullptr) {
4581     if (CompressedOops::shift() != 0) {
4582       assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
4583       lsr(d, s, LogMinObjAlignmentInBytes);
4584     } else {
4585       mov(d, s);
4586     }
4587   } else {

4696       add(dst, zr, src, Assembler::LSL, LogMinObjAlignmentInBytes);
4697     }
4698   } else {
4699     assert (CompressedOops::base() == nullptr, "sanity");
4700     if (dst != src) {
4701       mov(dst, src);
4702     }
4703   }
4704 }
4705 
4706 MacroAssembler::KlassDecodeMode MacroAssembler::_klass_decode_mode(KlassDecodeNone);
4707 
4708 MacroAssembler::KlassDecodeMode MacroAssembler::klass_decode_mode() {
4709   assert(UseCompressedClassPointers, "not using compressed class pointers");
4710   assert(Metaspace::initialized(), "metaspace not initialized yet");
4711 
4712   if (_klass_decode_mode != KlassDecodeNone) {
4713     return _klass_decode_mode;
4714   }
4715 
4716   assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift()
4717          || 0 == CompressedKlassPointers::shift(), "decode alg wrong");
4718 
4719   if (CompressedKlassPointers::base() == nullptr) {
4720     return (_klass_decode_mode = KlassDecodeZero);
4721   }
4722 
4723   if (operand_valid_for_logical_immediate(
4724         /*is32*/false, (uint64_t)CompressedKlassPointers::base())) {
4725     const uint64_t range_mask =
4726       (1ULL << log2i(CompressedKlassPointers::range())) - 1;
4727     if (((uint64_t)CompressedKlassPointers::base() & range_mask) == 0) {
4728       return (_klass_decode_mode = KlassDecodeXor);
4729     }
4730   }
4731 
4732   const uint64_t shifted_base =
4733     (uint64_t)CompressedKlassPointers::base() >> CompressedKlassPointers::shift();
4734   guarantee((shifted_base & 0xffff0000ffffffff) == 0,
4735             "compressed class base bad alignment");
4736 
4737   return (_klass_decode_mode = KlassDecodeMovk);
4738 }
4739 
4740 void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
4741   switch (klass_decode_mode()) {
4742   case KlassDecodeZero:
4743     if (CompressedKlassPointers::shift() != 0) {
4744       lsr(dst, src, LogKlassAlignmentInBytes);
4745     } else {
4746       if (dst != src) mov(dst, src);
4747     }
4748     break;
4749 
4750   case KlassDecodeXor:
4751     if (CompressedKlassPointers::shift() != 0) {
4752       eor(dst, src, (uint64_t)CompressedKlassPointers::base());
4753       lsr(dst, dst, LogKlassAlignmentInBytes);
4754     } else {
4755       eor(dst, src, (uint64_t)CompressedKlassPointers::base());
4756     }
4757     break;
4758 
4759   case KlassDecodeMovk:
4760     if (CompressedKlassPointers::shift() != 0) {
4761       ubfx(dst, src, LogKlassAlignmentInBytes, 32);
4762     } else {
4763       movw(dst, src);
4764     }
4765     break;
4766 
4767   case KlassDecodeNone:
4768     ShouldNotReachHere();
4769     break;
4770   }
4771 }
4772 
4773 void MacroAssembler::encode_klass_not_null(Register r) {
4774   encode_klass_not_null(r, r);
4775 }
4776 
4777 void  MacroAssembler::decode_klass_not_null(Register dst, Register src) {
4778   assert (UseCompressedClassPointers, "should only be used for compressed headers");
4779 
4780   switch (klass_decode_mode()) {
4781   case KlassDecodeZero:
4782     if (CompressedKlassPointers::shift() != 0) {
4783       lsl(dst, src, LogKlassAlignmentInBytes);
4784     } else {
4785       if (dst != src) mov(dst, src);
4786     }
4787     break;
4788 
4789   case KlassDecodeXor:
4790     if (CompressedKlassPointers::shift() != 0) {
4791       lsl(dst, src, LogKlassAlignmentInBytes);
4792       eor(dst, dst, (uint64_t)CompressedKlassPointers::base());
4793     } else {
4794       eor(dst, src, (uint64_t)CompressedKlassPointers::base());
4795     }
4796     break;
4797 
4798   case KlassDecodeMovk: {
4799     const uint64_t shifted_base =
4800       (uint64_t)CompressedKlassPointers::base() >> CompressedKlassPointers::shift();
4801 
4802     if (dst != src) movw(dst, src);
4803     movk(dst, shifted_base >> 32, 32);
4804 
4805     if (CompressedKlassPointers::shift() != 0) {
4806       lsl(dst, dst, LogKlassAlignmentInBytes);
4807     }
4808 
4809     break;
4810   }
4811 
4812   case KlassDecodeNone:
4813     ShouldNotReachHere();
4814     break;
4815   }
4816 }
4817 
4818 void  MacroAssembler::decode_klass_not_null(Register r) {
4819   decode_klass_not_null(r, r);
4820 }
4821 
4822 void  MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
4823 #ifdef ASSERT
4824   {
4825     ThreadInVMfromUnknown tiv;
4826     assert (UseCompressedOops, "should only be used for compressed oops");

5186 
5187   BIND(DONE);
5188   postcond(pc() != badAddress);
5189   return pc();
5190 }
5191 
5192 // Clobbers: rscratch1, rscratch2, rflags
5193 // May also clobber v0-v7 when (!UseSimpleArrayEquals && UseSIMDForArrayEquals)
5194 address MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
5195                                       Register tmp4, Register tmp5, Register result,
5196                                       Register cnt1, int elem_size) {
5197   Label DONE, SAME;
5198   Register tmp1 = rscratch1;
5199   Register tmp2 = rscratch2;
5200   Register cnt2 = tmp2;  // cnt2 only used in array length compare
5201   int elem_per_word = wordSize/elem_size;
5202   int log_elem_size = exact_log2(elem_size);
5203   int length_offset = arrayOopDesc::length_offset_in_bytes();
5204   int base_offset
5205     = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);








5206   int stubBytesThreshold = 3 * 64 + (UseSIMDForArrayEquals ? 0 : 16);
5207 
5208   assert(elem_size == 1 || elem_size == 2, "must be char or byte");
5209   assert_different_registers(a1, a2, result, cnt1, rscratch1, rscratch2);
5210 
5211 #ifndef PRODUCT
5212   {
5213     const char kind = (elem_size == 2) ? 'U' : 'L';
5214     char comment[64];
5215     snprintf(comment, sizeof comment, "array_equals%c{", kind);
5216     BLOCK_COMMENT(comment);
5217   }
5218 #endif
5219 
5220   // if (a1 == a2)
5221   //     return true;
5222   cmpoop(a1, a2); // May have read barriers for a1 and a2.
5223   br(EQ, SAME);
5224 
5225   if (UseSimpleArrayEquals) {
5226     Label NEXT_WORD, SHORT, TAIL03, TAIL01, A_MIGHT_BE_NULL, A_IS_NOT_NULL;
5227     // if (a1 == nullptr || a2 == nullptr)
5228     //     return false;
5229     // a1 & a2 == 0 means (some-pointer is null) or
5230     // (very-rare-or-even-probably-impossible-pointer-values)
5231     // so, we can save one branch in most cases
5232     tst(a1, a2);
5233     mov(result, false);
5234     br(EQ, A_MIGHT_BE_NULL);
5235     // if (a1.length != a2.length)
5236     //      return false;
5237     bind(A_IS_NOT_NULL);
5238     ldrw(cnt1, Address(a1, length_offset));
5239     ldrw(cnt2, Address(a2, length_offset));





5240     eorw(tmp5, cnt1, cnt2);
5241     cbnzw(tmp5, DONE);
5242     lea(a1, Address(a1, base_offset));
5243     lea(a2, Address(a2, base_offset));
5244     // Check for short strings, i.e. smaller than wordSize.
5245     subs(cnt1, cnt1, elem_per_word);
5246     br(Assembler::LT, SHORT);
5247     // Main 8 byte comparison loop.
5248     bind(NEXT_WORD); {
5249       ldr(tmp1, Address(post(a1, wordSize)));
5250       ldr(tmp2, Address(post(a2, wordSize)));
5251       subs(cnt1, cnt1, elem_per_word);
5252       eor(tmp5, tmp1, tmp2);
5253       cbnz(tmp5, DONE);
5254     } br(GT, NEXT_WORD);
5255     // Last longword.  In the case where length == 4 we compare the
5256     // same longword twice, but that's still faster than another
5257     // conditional branch.
5258     // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
5259     // length == 4.
5260     if (log_elem_size > 0)
5261       lsl(cnt1, cnt1, log_elem_size);
5262     ldr(tmp3, Address(a1, cnt1));
5263     ldr(tmp4, Address(a2, cnt1));

5287       cbnzw(tmp5, DONE);
5288     }
5289     bind(TAIL01);
5290     if (elem_size == 1) { // Only needed when comparing byte arrays.
5291       tbz(cnt1, 0, SAME); // 0-1 bytes left.
5292       {
5293         ldrb(tmp1, a1);
5294         ldrb(tmp2, a2);
5295         eorw(tmp5, tmp1, tmp2);
5296         cbnzw(tmp5, DONE);
5297       }
5298     }
5299   } else {
5300     Label NEXT_DWORD, SHORT, TAIL, TAIL2, STUB,
5301         CSET_EQ, LAST_CHECK;
5302     mov(result, false);
5303     cbz(a1, DONE);
5304     ldrw(cnt1, Address(a1, length_offset));
5305     cbz(a2, DONE);
5306     ldrw(cnt2, Address(a2, length_offset));





5307     // on most CPUs a2 is still "locked"(surprisingly) in ldrw and it's
5308     // faster to perform another branch before comparing a1 and a2
5309     cmp(cnt1, (u1)elem_per_word);
5310     br(LE, SHORT); // short or same
5311     ldr(tmp3, Address(pre(a1, base_offset)));
5312     subs(zr, cnt1, stubBytesThreshold);
5313     br(GE, STUB);
5314     ldr(tmp4, Address(pre(a2, base_offset)));
5315     sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
5316     cmp(cnt2, cnt1);
5317     br(NE, DONE);
5318 
5319     // Main 16 byte comparison loop with 2 exits
5320     bind(NEXT_DWORD); {
5321       ldr(tmp1, Address(pre(a1, wordSize)));
5322       ldr(tmp2, Address(pre(a2, wordSize)));
5323       subs(cnt1, cnt1, 2 * elem_per_word);
5324       br(LE, TAIL);
5325       eor(tmp4, tmp3, tmp4);
5326       cbnz(tmp4, DONE);
5327       ldr(tmp3, Address(pre(a1, wordSize)));
5328       ldr(tmp4, Address(pre(a2, wordSize)));
5329       cmp(cnt1, (u1)elem_per_word);
5330       br(LE, TAIL2);
5331       cmp(tmp1, tmp2);
5332     } br(EQ, NEXT_DWORD);
5333     b(DONE);
5334 
5335     bind(TAIL);
5336     eor(tmp4, tmp3, tmp4);
5337     eor(tmp2, tmp1, tmp2);
5338     lslv(tmp2, tmp2, tmp5);
5339     orr(tmp5, tmp4, tmp2);
5340     cmp(tmp5, zr);
5341     b(CSET_EQ);
5342 
5343     bind(TAIL2);
5344     eor(tmp2, tmp1, tmp2);
5345     cbnz(tmp2, DONE);
5346     b(LAST_CHECK);
5347 
5348     bind(STUB);
5349     ldr(tmp4, Address(pre(a2, base_offset)));
5350     cmp(cnt2, cnt1);
5351     br(NE, DONE);
5352     if (elem_size == 2) { // convert to byte counter
5353       lsl(cnt1, cnt1, 1);
5354     }
5355     eor(tmp5, tmp3, tmp4);
5356     cbnz(tmp5, DONE);
5357     RuntimeAddress stub = RuntimeAddress(StubRoutines::aarch64::large_array_equals());
5358     assert(stub.target() != nullptr, "array_equals_long stub has not been generated");
5359     address tpc = trampoline_call(stub);
5360     if (tpc == nullptr) {
5361       DEBUG_ONLY(reset_labels(SHORT, LAST_CHECK, CSET_EQ, SAME, DONE));
5362       postcond(pc() == badAddress);
5363       return nullptr;
5364     }
5365     b(DONE);
5366 
5367     // (a1 != null && a2 == null) || (a1 != null && a2 != null && a1 == a2)
5368     // so, if a2 == null => return false(0), else return true, so we can return a2
5369     mov(result, a2);
5370     b(DONE);
5371     bind(SHORT);
5372     cmp(cnt2, cnt1);
5373     br(NE, DONE);
5374     cbz(cnt1, SAME);
5375     sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
5376     ldr(tmp3, Address(a1, base_offset));
5377     ldr(tmp4, Address(a2, base_offset));
5378     bind(LAST_CHECK);
5379     eor(tmp4, tmp3, tmp4);
5380     lslv(tmp5, tmp4, tmp5);
5381     cmp(tmp5, zr);
5382     bind(CSET_EQ);
5383     cset(result, EQ);
5384     b(DONE);
5385   }
5386 
5387   bind(SAME);
5388   mov(result, true);
5389   // That's it.
5390   bind(DONE);
5391 
5392   BLOCK_COMMENT("} array_equals");
5393   postcond(pc() != badAddress);
5394   return pc();
5395 }
5396 
5397 // Compare Strings

6369  if (src.first()->is_stack()) {
6370     if (dst.first()->is_stack()) {
6371       ldr(tmp, Address(rfp, reg2offset_in(src.first())));
6372       str(tmp, Address(sp, reg2offset_out(dst.first())));
6373     } else {
6374       ldrd(dst.first()->as_FloatRegister(), Address(rfp, reg2offset_in(src.first())));
6375     }
6376   } else if (src.first() != dst.first()) {
6377     if (src.is_single_phys_reg() && dst.is_single_phys_reg())
6378       fmovd(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
6379     else
6380       strd(src.first()->as_FloatRegister(), Address(sp, reg2offset_out(dst.first())));
6381   }
6382 }
6383 
6384 // Implements lightweight-locking.
6385 //
6386 //  - obj: the object to be locked
6387 //  - t1, t2, t3: temporary registers, will be destroyed
6388 //  - slow: branched to if locking fails, absolute offset may larger than 32KB (imm14 encoding).
6389 void MacroAssembler::lightweight_lock(Register obj, Register t1, Register t2, Register t3, Label& slow) {
6390   assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking");
6391   assert_different_registers(obj, t1, t2, t3, rscratch1);
6392 
6393   Label push;
6394   const Register top = t1;
6395   const Register mark = t2;
6396   const Register t = t3;
6397 
6398   // Preload the markWord. It is important that this is the first
6399   // instruction emitted as it is part of C1's null check semantics.
6400   ldr(mark, Address(obj, oopDesc::mark_offset_in_bytes()));
6401 


6402   // Check if the lock-stack is full.
6403   ldrw(top, Address(rthread, JavaThread::lock_stack_top_offset()));
6404   cmpw(top, (unsigned)LockStack::end_offset());
6405   br(Assembler::GE, slow);
6406 
6407   // Check for recursion.
6408   subw(t, top, oopSize);
6409   ldr(t, Address(rthread, t));
6410   cmp(obj, t);
6411   br(Assembler::EQ, push);
6412 
6413   // Check header for monitor (0b10).
6414   tst(mark, markWord::monitor_value);
6415   br(Assembler::NE, slow);
6416 
6417   // Try to lock. Transition lock bits 0b01 => 0b00
6418   assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid lea");
6419   orr(mark, mark, markWord::unlocked_value);
6420   eor(t, mark, markWord::unlocked_value);
6421   cmpxchg(/*addr*/ obj, /*expected*/ mark, /*new*/ t, Assembler::xword,

 985 
 986 void MacroAssembler::c2bool(Register x) {
 987   // implements x == 0 ? 0 : 1
 988   // note: must only look at least-significant byte of x
 989   //       since C-style booleans are stored in one byte
 990   //       only! (was bug)
 991   tst(x, 0xff);
 992   cset(x, Assembler::NE);
 993 }
 994 
 995 address MacroAssembler::ic_call(address entry, jint method_index) {
 996   RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index);
 997   // address const_ptr = long_constant((jlong)Universe::non_oop_word());
 998   // uintptr_t offset;
 999   // ldr_constant(rscratch2, const_ptr);
1000   movptr(rscratch2, (intptr_t)Universe::non_oop_word());
1001   return trampoline_call(Address(entry, rh));
1002 }
1003 
1004 int MacroAssembler::ic_check_size() {
1005   int extra_instructions = UseCompactObjectHeaders ? 1 : 0;
1006   if (target_needs_far_branch(CAST_FROM_FN_PTR(address, SharedRuntime::get_ic_miss_stub()))) {
1007     return NativeInstruction::instruction_size * (7 + extra_instructions);
1008   } else {
1009     return NativeInstruction::instruction_size * (5 + extra_instructions);
1010   }
1011 }
1012 
1013 int MacroAssembler::ic_check(int end_alignment) {
1014   Register receiver = j_rarg0;
1015   Register data = rscratch2;
1016   Register tmp1 = rscratch1;
1017   Register tmp2 = r10;
1018 
1019   // The UEP of a code blob ensures that the VEP is padded. However, the padding of the UEP is placed
1020   // before the inline cache check, so we don't have to execute any nop instructions when dispatching
1021   // through the UEP, yet we can ensure that the VEP is aligned appropriately. That's why we align
1022   // before the inline cache check here, and not after
1023   align(end_alignment, offset() + ic_check_size());
1024 
1025   int uep_offset = offset();
1026 
1027   if (UseCompactObjectHeaders) {
1028     load_nklass_compact(tmp1, receiver);
1029     ldrw(tmp2, Address(data, CompiledICData::speculated_klass_offset()));
1030     cmpw(tmp1, tmp2);
1031   } else if (UseCompressedClassPointers) {
1032     ldrw(tmp1, Address(receiver, oopDesc::klass_offset_in_bytes()));
1033     ldrw(tmp2, Address(data, CompiledICData::speculated_klass_offset()));
1034     cmpw(tmp1, tmp2);
1035   } else {
1036     ldr(tmp1, Address(receiver, oopDesc::klass_offset_in_bytes()));
1037     ldr(tmp2, Address(data, CompiledICData::speculated_klass_offset()));
1038     cmp(tmp1, tmp2);
1039   }
1040 
1041   Label dont;
1042   br(Assembler::EQ, dont);
1043   far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1044   bind(dont);
1045   assert((offset() % end_alignment) == 0, "Misaligned verified entry point");
1046 
1047   return uep_offset;
1048 }
1049 
1050 // Implementation of call_VM versions
1051 

4466   adrp(rscratch1, src2, offset);
4467   ldr(rscratch1, Address(rscratch1, offset));
4468   cmp(src1, rscratch1);
4469 }
4470 
4471 void MacroAssembler::cmpoop(Register obj1, Register obj2) {
4472   cmp(obj1, obj2);
4473 }
4474 
4475 void MacroAssembler::load_method_holder_cld(Register rresult, Register rmethod) {
4476   load_method_holder(rresult, rmethod);
4477   ldr(rresult, Address(rresult, InstanceKlass::class_loader_data_offset()));
4478 }
4479 
4480 void MacroAssembler::load_method_holder(Register holder, Register method) {
4481   ldr(holder, Address(method, Method::const_offset()));                      // ConstMethod*
4482   ldr(holder, Address(holder, ConstMethod::constants_offset()));             // ConstantPool*
4483   ldr(holder, Address(holder, ConstantPool::pool_holder_offset()));          // InstanceKlass*
4484 }
4485 
4486 // Loads the obj's Klass* into dst.
4487 // Preserves all registers (incl src, rscratch1 and rscratch2).
4488 // Input:
4489 // src - the oop we want to load the klass from.
4490 // dst - output nklass.
4491 void MacroAssembler::load_nklass_compact(Register dst, Register src) {
4492   assert(UseCompactObjectHeaders, "expects UseCompactObjectHeaders");
4493   ldr(dst, Address(src, oopDesc::mark_offset_in_bytes()));
4494   lsr(dst, dst, markWord::klass_shift);
4495 }
4496 
4497 void MacroAssembler::load_klass(Register dst, Register src) {
4498   if (UseCompactObjectHeaders) {
4499     load_nklass_compact(dst, src);
4500     decode_klass_not_null(dst);
4501   } else if (UseCompressedClassPointers) {
4502     ldrw(dst, Address(src, oopDesc::klass_offset_in_bytes()));
4503     decode_klass_not_null(dst);
4504   } else {
4505     ldr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
4506   }
4507 }
4508 
4509 void MacroAssembler::restore_cpu_control_state_after_jni(Register tmp1, Register tmp2) {
4510   if (RestoreMXCSROnJNICalls) {
4511     Label OK;
4512     get_fpcr(tmp1);
4513     mov(tmp2, tmp1);
4514     // Set FPCR to the state we need. We do want Round to Nearest. We
4515     // don't want non-IEEE rounding modes or floating-point traps.
4516     bfi(tmp1, zr, 22, 4); // Clear DN, FZ, and Rmode
4517     bfi(tmp1, zr, 8, 5);  // Clear exception-control bits (8-12)
4518     bfi(tmp1, zr, 0, 2);  // Clear AH:FIZ
4519     eor(tmp2, tmp1, tmp2);
4520     cbz(tmp2, OK);        // Only reset FPCR if it's wrong
4521     set_fpcr(tmp1);

4537   // A null weak handle resolves to null.
4538   cbz(result, resolved);
4539 
4540   // Only 64 bit platforms support GCs that require a tmp register
4541   // WeakHandle::resolve is an indirection like jweak.
4542   access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF,
4543                  result, Address(result), tmp1, tmp2);
4544   bind(resolved);
4545 }
4546 
4547 void MacroAssembler::load_mirror(Register dst, Register method, Register tmp1, Register tmp2) {
4548   const int mirror_offset = in_bytes(Klass::java_mirror_offset());
4549   ldr(dst, Address(rmethod, Method::const_offset()));
4550   ldr(dst, Address(dst, ConstMethod::constants_offset()));
4551   ldr(dst, Address(dst, ConstantPool::pool_holder_offset()));
4552   ldr(dst, Address(dst, mirror_offset));
4553   resolve_oop_handle(dst, tmp1, tmp2);
4554 }
4555 
4556 void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp) {
4557   assert_different_registers(oop, trial_klass, tmp);
4558   if (UseCompressedClassPointers) {
4559     if (UseCompactObjectHeaders) {
4560       load_nklass_compact(tmp, oop);
4561     } else {
4562       ldrw(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
4563     }
4564     if (CompressedKlassPointers::base() == nullptr) {
4565       cmp(trial_klass, tmp, LSL, CompressedKlassPointers::shift());
4566       return;
4567     } else if (((uint64_t)CompressedKlassPointers::base() & 0xffffffff) == 0
4568                && CompressedKlassPointers::shift() == 0) {
4569       // Only the bottom 32 bits matter
4570       cmpw(trial_klass, tmp);
4571       return;
4572     }
4573     decode_klass_not_null(tmp);
4574   } else {
4575     ldr(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
4576   }
4577   cmp(trial_klass, tmp);
4578 }
4579 
4580 void MacroAssembler::cmp_klass(Register src, Register dst, Register tmp1, Register tmp2) {
4581   if (UseCompactObjectHeaders) {
4582     load_nklass_compact(tmp1, src);
4583     load_nklass_compact(tmp2, dst);
4584     cmpw(tmp1, tmp2);
4585   } else if (UseCompressedClassPointers) {
4586     ldrw(tmp1, Address(src, oopDesc::klass_offset_in_bytes()));
4587     ldrw(tmp2, Address(dst, oopDesc::klass_offset_in_bytes()));
4588     cmpw(tmp1, tmp2);
4589   } else {
4590     ldr(tmp1, Address(src, oopDesc::klass_offset_in_bytes()));
4591     ldr(tmp2, Address(dst, oopDesc::klass_offset_in_bytes()));
4592     cmp(tmp1, tmp2);
4593   }
4594 }
4595 
4596 void MacroAssembler::store_klass(Register dst, Register src) {
4597   // FIXME: Should this be a store release?  concurrent gcs assumes
4598   // klass length is valid if klass field is not null.
4599   assert(!UseCompactObjectHeaders, "not with compact headers");
4600   if (UseCompressedClassPointers) {
4601     encode_klass_not_null(src);
4602     strw(src, Address(dst, oopDesc::klass_offset_in_bytes()));
4603   } else {
4604     str(src, Address(dst, oopDesc::klass_offset_in_bytes()));
4605   }
4606 }
4607 
4608 void MacroAssembler::store_klass_gap(Register dst, Register src) {
4609   assert(!UseCompactObjectHeaders, "not with compact headers");
4610   if (UseCompressedClassPointers) {
4611     // Store to klass gap in destination
4612     strw(src, Address(dst, oopDesc::klass_gap_offset_in_bytes()));
4613   }
4614 }
4615 
4616 // Algorithm must match CompressedOops::encode.
4617 void MacroAssembler::encode_heap_oop(Register d, Register s) {
4618 #ifdef ASSERT
4619   verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?");
4620 #endif
4621   verify_oop_msg(s, "broken oop in encode_heap_oop");
4622   if (CompressedOops::base() == nullptr) {
4623     if (CompressedOops::shift() != 0) {
4624       assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
4625       lsr(d, s, LogMinObjAlignmentInBytes);
4626     } else {
4627       mov(d, s);
4628     }
4629   } else {

4738       add(dst, zr, src, Assembler::LSL, LogMinObjAlignmentInBytes);
4739     }
4740   } else {
4741     assert (CompressedOops::base() == nullptr, "sanity");
4742     if (dst != src) {
4743       mov(dst, src);
4744     }
4745   }
4746 }
4747 
4748 MacroAssembler::KlassDecodeMode MacroAssembler::_klass_decode_mode(KlassDecodeNone);
4749 
4750 MacroAssembler::KlassDecodeMode MacroAssembler::klass_decode_mode() {
4751   assert(UseCompressedClassPointers, "not using compressed class pointers");
4752   assert(Metaspace::initialized(), "metaspace not initialized yet");
4753 
4754   if (_klass_decode_mode != KlassDecodeNone) {
4755     return _klass_decode_mode;
4756   }
4757 



4758   if (CompressedKlassPointers::base() == nullptr) {
4759     return (_klass_decode_mode = KlassDecodeZero);
4760   }
4761 
4762   if (operand_valid_for_logical_immediate(
4763         /*is32*/false, (uint64_t)CompressedKlassPointers::base())) {
4764     const uint64_t range_mask =
4765       (1ULL << log2i(CompressedKlassPointers::range())) - 1;
4766     if (((uint64_t)CompressedKlassPointers::base() & range_mask) == 0) {
4767       return (_klass_decode_mode = KlassDecodeXor);
4768     }
4769   }
4770 
4771   const uint64_t shifted_base =
4772     (uint64_t)CompressedKlassPointers::base() >> CompressedKlassPointers::shift();
4773   guarantee((shifted_base & 0xffff0000ffffffff) == 0,
4774             "compressed class base bad alignment");
4775 
4776   return (_klass_decode_mode = KlassDecodeMovk);
4777 }
4778 
4779 void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
4780   switch (klass_decode_mode()) {
4781   case KlassDecodeZero:
4782     if (CompressedKlassPointers::shift() != 0) {
4783       lsr(dst, src, CompressedKlassPointers::shift());
4784     } else {
4785       if (dst != src) mov(dst, src);
4786     }
4787     break;
4788 
4789   case KlassDecodeXor:
4790     if (CompressedKlassPointers::shift() != 0) {
4791       eor(dst, src, (uint64_t)CompressedKlassPointers::base());
4792       lsr(dst, dst, CompressedKlassPointers::shift());
4793     } else {
4794       eor(dst, src, (uint64_t)CompressedKlassPointers::base());
4795     }
4796     break;
4797 
4798   case KlassDecodeMovk:
4799     if (CompressedKlassPointers::shift() != 0) {
4800       ubfx(dst, src, CompressedKlassPointers::shift(), 32);
4801     } else {
4802       movw(dst, src);
4803     }
4804     break;
4805 
4806   case KlassDecodeNone:
4807     ShouldNotReachHere();
4808     break;
4809   }
4810 }
4811 
4812 void MacroAssembler::encode_klass_not_null(Register r) {
4813   encode_klass_not_null(r, r);
4814 }
4815 
4816 void  MacroAssembler::decode_klass_not_null(Register dst, Register src) {
4817   assert (UseCompressedClassPointers, "should only be used for compressed headers");
4818 
4819   switch (klass_decode_mode()) {
4820   case KlassDecodeZero:
4821     if (CompressedKlassPointers::shift() != 0) {
4822       lsl(dst, src, CompressedKlassPointers::shift());
4823     } else {
4824       if (dst != src) mov(dst, src);
4825     }
4826     break;
4827 
4828   case KlassDecodeXor:
4829     if (CompressedKlassPointers::shift() != 0) {
4830       lsl(dst, src, CompressedKlassPointers::shift());
4831       eor(dst, dst, (uint64_t)CompressedKlassPointers::base());
4832     } else {
4833       eor(dst, src, (uint64_t)CompressedKlassPointers::base());
4834     }
4835     break;
4836 
4837   case KlassDecodeMovk: {
4838     const uint64_t shifted_base =
4839       (uint64_t)CompressedKlassPointers::base() >> CompressedKlassPointers::shift();
4840 
4841     if (dst != src) movw(dst, src);
4842     movk(dst, shifted_base >> 32, 32);
4843 
4844     if (CompressedKlassPointers::shift() != 0) {
4845       lsl(dst, dst, CompressedKlassPointers::shift());
4846     }
4847 
4848     break;
4849   }
4850 
4851   case KlassDecodeNone:
4852     ShouldNotReachHere();
4853     break;
4854   }
4855 }
4856 
4857 void  MacroAssembler::decode_klass_not_null(Register r) {
4858   decode_klass_not_null(r, r);
4859 }
4860 
4861 void  MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
4862 #ifdef ASSERT
4863   {
4864     ThreadInVMfromUnknown tiv;
4865     assert (UseCompressedOops, "should only be used for compressed oops");

5225 
5226   BIND(DONE);
5227   postcond(pc() != badAddress);
5228   return pc();
5229 }
5230 
5231 // Clobbers: rscratch1, rscratch2, rflags
5232 // May also clobber v0-v7 when (!UseSimpleArrayEquals && UseSIMDForArrayEquals)
5233 address MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
5234                                       Register tmp4, Register tmp5, Register result,
5235                                       Register cnt1, int elem_size) {
5236   Label DONE, SAME;
5237   Register tmp1 = rscratch1;
5238   Register tmp2 = rscratch2;
5239   Register cnt2 = tmp2;  // cnt2 only used in array length compare
5240   int elem_per_word = wordSize/elem_size;
5241   int log_elem_size = exact_log2(elem_size);
5242   int length_offset = arrayOopDesc::length_offset_in_bytes();
5243   int base_offset
5244     = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
5245   // When the base is not aligned to 8 bytes, then we let
5246   // the compare loop include the array length, and skip
5247   // the explicit comparison of length.
5248   bool is_8aligned = is_aligned(base_offset, BytesPerWord);
5249   assert(is_aligned(base_offset, BytesPerWord) || is_aligned(length_offset, BytesPerWord),
5250          "base_offset or length_offset must be 8-byte aligned");
5251   int start_offset = is_8aligned ? base_offset : length_offset;
5252   int extra_length = is_8aligned ? 0 : BytesPerInt / elem_size;
5253   int stubBytesThreshold = 3 * 64 + (UseSIMDForArrayEquals ? 0 : 16);
5254 
5255   assert(elem_size == 1 || elem_size == 2, "must be char or byte");
5256   assert_different_registers(a1, a2, result, cnt1, rscratch1, rscratch2);
5257 
5258 #ifndef PRODUCT
5259   {
5260     const char kind = (elem_size == 2) ? 'U' : 'L';
5261     char comment[64];
5262     snprintf(comment, sizeof comment, "array_equals%c{", kind);
5263     BLOCK_COMMENT(comment);
5264   }
5265 #endif
5266 
5267   // if (a1 == a2)
5268   //     return true;
5269   cmpoop(a1, a2); // May have read barriers for a1 and a2.
5270   br(EQ, SAME);
5271 
5272   if (UseSimpleArrayEquals) {
5273     Label NEXT_WORD, SHORT, TAIL03, TAIL01, A_MIGHT_BE_NULL, A_IS_NOT_NULL;
5274     // if (a1 == nullptr || a2 == nullptr)
5275     //     return false;
5276     // a1 & a2 == 0 means (some-pointer is null) or
5277     // (very-rare-or-even-probably-impossible-pointer-values)
5278     // so, we can save one branch in most cases
5279     tst(a1, a2);
5280     mov(result, false);
5281     br(EQ, A_MIGHT_BE_NULL);
5282     // if (a1.length != a2.length)
5283     //      return false;
5284     bind(A_IS_NOT_NULL);
5285     ldrw(cnt1, Address(a1, length_offset));
5286     ldrw(cnt2, Address(a2, length_offset));
5287     if (extra_length != 0) {
5288       // Increase loop counter by size of length field.
5289       addw(cnt1, cnt1, extra_length);
5290       addw(cnt2, cnt2, extra_length);
5291     }
5292     eorw(tmp5, cnt1, cnt2);
5293     cbnzw(tmp5, DONE);
5294     lea(a1, Address(a1, start_offset));
5295     lea(a2, Address(a2, start_offset));
5296     // Check for short strings, i.e. smaller than wordSize.
5297     subs(cnt1, cnt1, elem_per_word);
5298     br(Assembler::LT, SHORT);
5299     // Main 8 byte comparison loop.
5300     bind(NEXT_WORD); {
5301       ldr(tmp1, Address(post(a1, wordSize)));
5302       ldr(tmp2, Address(post(a2, wordSize)));
5303       subs(cnt1, cnt1, elem_per_word);
5304       eor(tmp5, tmp1, tmp2);
5305       cbnz(tmp5, DONE);
5306     } br(GT, NEXT_WORD);
5307     // Last longword.  In the case where length == 4 we compare the
5308     // same longword twice, but that's still faster than another
5309     // conditional branch.
5310     // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
5311     // length == 4.
5312     if (log_elem_size > 0)
5313       lsl(cnt1, cnt1, log_elem_size);
5314     ldr(tmp3, Address(a1, cnt1));
5315     ldr(tmp4, Address(a2, cnt1));

5339       cbnzw(tmp5, DONE);
5340     }
5341     bind(TAIL01);
5342     if (elem_size == 1) { // Only needed when comparing byte arrays.
5343       tbz(cnt1, 0, SAME); // 0-1 bytes left.
5344       {
5345         ldrb(tmp1, a1);
5346         ldrb(tmp2, a2);
5347         eorw(tmp5, tmp1, tmp2);
5348         cbnzw(tmp5, DONE);
5349       }
5350     }
5351   } else {
5352     Label NEXT_DWORD, SHORT, TAIL, TAIL2, STUB,
5353         CSET_EQ, LAST_CHECK;
5354     mov(result, false);
5355     cbz(a1, DONE);
5356     ldrw(cnt1, Address(a1, length_offset));
5357     cbz(a2, DONE);
5358     ldrw(cnt2, Address(a2, length_offset));
5359     if (extra_length != 0) {
5360       // Increase loop counter by size of length field.
5361       addw(cnt1, cnt1, extra_length);
5362       addw(cnt2, cnt2, extra_length);
5363     }
5364     // on most CPUs a2 is still "locked"(surprisingly) in ldrw and it's
5365     // faster to perform another branch before comparing a1 and a2
5366     cmp(cnt1, (u1)elem_per_word);
5367     br(LE, SHORT); // short or same
5368     ldr(tmp3, Address(pre(a1, start_offset)));
5369     subs(zr, cnt1, stubBytesThreshold);
5370     br(GE, STUB);
5371     ldr(tmp4, Address(pre(a2, start_offset)));
5372     sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
5373     cmp(cnt2, cnt1);
5374     br(NE, DONE);
5375 
5376     // Main 16 byte comparison loop with 2 exits
5377     bind(NEXT_DWORD); {
5378       ldr(tmp1, Address(pre(a1, wordSize)));
5379       ldr(tmp2, Address(pre(a2, wordSize)));
5380       subs(cnt1, cnt1, 2 * elem_per_word);
5381       br(LE, TAIL);
5382       eor(tmp4, tmp3, tmp4);
5383       cbnz(tmp4, DONE);
5384       ldr(tmp3, Address(pre(a1, wordSize)));
5385       ldr(tmp4, Address(pre(a2, wordSize)));
5386       cmp(cnt1, (u1)elem_per_word);
5387       br(LE, TAIL2);
5388       cmp(tmp1, tmp2);
5389     } br(EQ, NEXT_DWORD);
5390     b(DONE);
5391 
5392     bind(TAIL);
5393     eor(tmp4, tmp3, tmp4);
5394     eor(tmp2, tmp1, tmp2);
5395     lslv(tmp2, tmp2, tmp5);
5396     orr(tmp5, tmp4, tmp2);
5397     cmp(tmp5, zr);
5398     b(CSET_EQ);
5399 
5400     bind(TAIL2);
5401     eor(tmp2, tmp1, tmp2);
5402     cbnz(tmp2, DONE);
5403     b(LAST_CHECK);
5404 
5405     bind(STUB);
5406     ldr(tmp4, Address(pre(a2, start_offset)));
5407     cmp(cnt2, cnt1);
5408     br(NE, DONE);
5409     if (elem_size == 2) { // convert to byte counter
5410       lsl(cnt1, cnt1, 1);
5411     }
5412     eor(tmp5, tmp3, tmp4);
5413     cbnz(tmp5, DONE);
5414     RuntimeAddress stub = RuntimeAddress(StubRoutines::aarch64::large_array_equals());
5415     assert(stub.target() != nullptr, "array_equals_long stub has not been generated");
5416     address tpc = trampoline_call(stub);
5417     if (tpc == nullptr) {
5418       DEBUG_ONLY(reset_labels(SHORT, LAST_CHECK, CSET_EQ, SAME, DONE));
5419       postcond(pc() == badAddress);
5420       return nullptr;
5421     }
5422     b(DONE);
5423 
5424     // (a1 != null && a2 == null) || (a1 != null && a2 != null && a1 == a2)
5425     // so, if a2 == null => return false(0), else return true, so we can return a2
5426     mov(result, a2);
5427     b(DONE);
5428     bind(SHORT);
5429     cmp(cnt2, cnt1);
5430     br(NE, DONE);
5431     cbz(cnt1, SAME);
5432     sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
5433     ldr(tmp3, Address(pre(a1, start_offset)));
5434     ldr(tmp4, Address(pre(a2, start_offset)));
5435     bind(LAST_CHECK);
5436     eor(tmp4, tmp3, tmp4);
5437     lslv(tmp5, tmp4, tmp5);
5438     cmp(tmp5, zr);
5439     bind(CSET_EQ);
5440     cset(result, EQ);
5441     b(DONE);
5442   }
5443 
5444   bind(SAME);
5445   mov(result, true);
5446   // That's it.
5447   bind(DONE);
5448 
5449   BLOCK_COMMENT("} array_equals");
5450   postcond(pc() != badAddress);
5451   return pc();
5452 }
5453 
5454 // Compare Strings

6426  if (src.first()->is_stack()) {
6427     if (dst.first()->is_stack()) {
6428       ldr(tmp, Address(rfp, reg2offset_in(src.first())));
6429       str(tmp, Address(sp, reg2offset_out(dst.first())));
6430     } else {
6431       ldrd(dst.first()->as_FloatRegister(), Address(rfp, reg2offset_in(src.first())));
6432     }
6433   } else if (src.first() != dst.first()) {
6434     if (src.is_single_phys_reg() && dst.is_single_phys_reg())
6435       fmovd(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
6436     else
6437       strd(src.first()->as_FloatRegister(), Address(sp, reg2offset_out(dst.first())));
6438   }
6439 }
6440 
6441 // Implements lightweight-locking.
6442 //
6443 //  - obj: the object to be locked
6444 //  - t1, t2, t3: temporary registers, will be destroyed
6445 //  - slow: branched to if locking fails, absolute offset may larger than 32KB (imm14 encoding).
6446 void MacroAssembler::lightweight_lock(Register basic_lock, Register obj, Register t1, Register t2, Register t3, Label& slow) {
6447   assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking");
6448   assert_different_registers(basic_lock, obj, t1, t2, t3, rscratch1);
6449 
6450   Label push;
6451   const Register top = t1;
6452   const Register mark = t2;
6453   const Register t = t3;
6454 
6455   // Preload the markWord. It is important that this is the first
6456   // instruction emitted as it is part of C1's null check semantics.
6457   ldr(mark, Address(obj, oopDesc::mark_offset_in_bytes()));
6458 
6459   str(zr, Address(basic_lock, BasicObjectLock::lock_offset() + in_ByteSize((BasicLock::object_monitor_cache_offset_in_bytes()))));
6460 
6461   // Check if the lock-stack is full.
6462   ldrw(top, Address(rthread, JavaThread::lock_stack_top_offset()));
6463   cmpw(top, (unsigned)LockStack::end_offset());
6464   br(Assembler::GE, slow);
6465 
6466   // Check for recursion.
6467   subw(t, top, oopSize);
6468   ldr(t, Address(rthread, t));
6469   cmp(obj, t);
6470   br(Assembler::EQ, push);
6471 
6472   // Check header for monitor (0b10).
6473   tst(mark, markWord::monitor_value);
6474   br(Assembler::NE, slow);
6475 
6476   // Try to lock. Transition lock bits 0b01 => 0b00
6477   assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid lea");
6478   orr(mark, mark, markWord::unlocked_value);
6479   eor(t, mark, markWord::unlocked_value);
6480   cmpxchg(/*addr*/ obj, /*expected*/ mark, /*new*/ t, Assembler::xword,
< prev index next >