< prev index next > src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
Print this page
movptr(rscratch2, (intptr_t)Universe::non_oop_word());
return trampoline_call(Address(entry, rh));
}
int MacroAssembler::ic_check_size() {
if (target_needs_far_branch(CAST_FROM_FN_PTR(address, SharedRuntime::get_ic_miss_stub()))) {
! return NativeInstruction::instruction_size * 7;
} else {
! return NativeInstruction::instruction_size * 5;
}
}
int MacroAssembler::ic_check(int end_alignment) {
Register receiver = j_rarg0;
movptr(rscratch2, (intptr_t)Universe::non_oop_word());
return trampoline_call(Address(entry, rh));
}
int MacroAssembler::ic_check_size() {
+ int extra_instructions = UseCompactObjectHeaders ? 1 : 0;
if (target_needs_far_branch(CAST_FROM_FN_PTR(address, SharedRuntime::get_ic_miss_stub()))) {
! return NativeInstruction::instruction_size * (7 + extra_instructions);
} else {
! return NativeInstruction::instruction_size * (5 + extra_instructions);
}
}
int MacroAssembler::ic_check(int end_alignment) {
Register receiver = j_rarg0;
// before the inline cache check here, and not after
align(end_alignment, offset() + ic_check_size());
int uep_offset = offset();
! if (UseCompressedClassPointers) {
ldrw(tmp1, Address(receiver, oopDesc::klass_offset_in_bytes()));
ldrw(tmp2, Address(data, CompiledICData::speculated_klass_offset()));
cmpw(tmp1, tmp2);
} else {
ldr(tmp1, Address(receiver, oopDesc::klass_offset_in_bytes()));
// before the inline cache check here, and not after
align(end_alignment, offset() + ic_check_size());
int uep_offset = offset();
! if (UseCompactObjectHeaders) {
+ load_nklass_compact(tmp1, receiver);
+ ldrw(tmp2, Address(data, CompiledICData::speculated_klass_offset()));
+ cmpw(tmp1, tmp2);
+ } else if (UseCompressedClassPointers) {
ldrw(tmp1, Address(receiver, oopDesc::klass_offset_in_bytes()));
ldrw(tmp2, Address(data, CompiledICData::speculated_klass_offset()));
cmpw(tmp1, tmp2);
} else {
ldr(tmp1, Address(receiver, oopDesc::klass_offset_in_bytes()));
ldr(holder, Address(method, Method::const_offset())); // ConstMethod*
ldr(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool*
ldr(holder, Address(holder, ConstantPool::pool_holder_offset())); // InstanceKlass*
}
void MacroAssembler::load_klass(Register dst, Register src) {
! if (UseCompressedClassPointers) {
ldrw(dst, Address(src, oopDesc::klass_offset_in_bytes()));
decode_klass_not_null(dst);
} else {
ldr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
}
ldr(holder, Address(method, Method::const_offset())); // ConstMethod*
ldr(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool*
ldr(holder, Address(holder, ConstantPool::pool_holder_offset())); // InstanceKlass*
}
+ // Loads the obj's Klass* into dst.
+ // Preserves all registers (incl src, rscratch1 and rscratch2).
+ // Input:
+ // src - the oop we want to load the klass from.
+ // dst - output nklass.
+ void MacroAssembler::load_nklass_compact(Register dst, Register src) {
+ assert(UseCompactObjectHeaders, "expects UseCompactObjectHeaders");
+ ldr(dst, Address(src, oopDesc::mark_offset_in_bytes()));
+ lsr(dst, dst, markWord::klass_shift);
+ }
+
void MacroAssembler::load_klass(Register dst, Register src) {
! if (UseCompactObjectHeaders) {
+ load_nklass_compact(dst, src);
+ decode_klass_not_null(dst);
+ } else if (UseCompressedClassPointers) {
ldrw(dst, Address(src, oopDesc::klass_offset_in_bytes()));
decode_klass_not_null(dst);
} else {
ldr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
}
ldr(dst, Address(dst, mirror_offset));
resolve_oop_handle(dst, tmp1, tmp2);
}
void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp) {
if (UseCompressedClassPointers) {
! ldrw(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
if (CompressedKlassPointers::base() == nullptr) {
cmp(trial_klass, tmp, LSL, CompressedKlassPointers::shift());
return;
} else if (((uint64_t)CompressedKlassPointers::base() & 0xffffffff) == 0
&& CompressedKlassPointers::shift() == 0) {
ldr(dst, Address(dst, mirror_offset));
resolve_oop_handle(dst, tmp1, tmp2);
}
void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp) {
+ assert_different_registers(oop, trial_klass, tmp);
if (UseCompressedClassPointers) {
! if (UseCompactObjectHeaders) {
+ load_nklass_compact(tmp, oop);
+ } else {
+ ldrw(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
+ }
if (CompressedKlassPointers::base() == nullptr) {
cmp(trial_klass, tmp, LSL, CompressedKlassPointers::shift());
return;
} else if (((uint64_t)CompressedKlassPointers::base() & 0xffffffff) == 0
&& CompressedKlassPointers::shift() == 0) {
ldr(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
}
cmp(trial_klass, tmp);
}
+ void MacroAssembler::cmp_klass(Register src, Register dst, Register tmp1, Register tmp2) {
+ if (UseCompactObjectHeaders) {
+ load_nklass_compact(tmp1, src);
+ load_nklass_compact(tmp2, dst);
+ cmpw(tmp1, tmp2);
+ } else if (UseCompressedClassPointers) {
+ ldrw(tmp1, Address(src, oopDesc::klass_offset_in_bytes()));
+ ldrw(tmp2, Address(dst, oopDesc::klass_offset_in_bytes()));
+ cmpw(tmp1, tmp2);
+ } else {
+ ldr(tmp1, Address(src, oopDesc::klass_offset_in_bytes()));
+ ldr(tmp2, Address(dst, oopDesc::klass_offset_in_bytes()));
+ cmp(tmp1, tmp2);
+ }
+ }
+
void MacroAssembler::store_klass(Register dst, Register src) {
// FIXME: Should this be a store release? concurrent gcs assumes
// klass length is valid if klass field is not null.
+ assert(!UseCompactObjectHeaders, "not with compact headers");
if (UseCompressedClassPointers) {
encode_klass_not_null(src);
strw(src, Address(dst, oopDesc::klass_offset_in_bytes()));
} else {
str(src, Address(dst, oopDesc::klass_offset_in_bytes()));
}
}
void MacroAssembler::store_klass_gap(Register dst, Register src) {
+ assert(!UseCompactObjectHeaders, "not with compact headers");
if (UseCompressedClassPointers) {
// Store to klass gap in destination
strw(src, Address(dst, oopDesc::klass_gap_offset_in_bytes()));
}
}
if (_klass_decode_mode != KlassDecodeNone) {
return _klass_decode_mode;
}
- assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift()
- || 0 == CompressedKlassPointers::shift(), "decode alg wrong");
-
if (CompressedKlassPointers::base() == nullptr) {
return (_klass_decode_mode = KlassDecodeZero);
}
if (operand_valid_for_logical_immediate(
void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
switch (klass_decode_mode()) {
case KlassDecodeZero:
if (CompressedKlassPointers::shift() != 0) {
! lsr(dst, src, LogKlassAlignmentInBytes);
} else {
if (dst != src) mov(dst, src);
}
break;
case KlassDecodeXor:
if (CompressedKlassPointers::shift() != 0) {
eor(dst, src, (uint64_t)CompressedKlassPointers::base());
! lsr(dst, dst, LogKlassAlignmentInBytes);
} else {
eor(dst, src, (uint64_t)CompressedKlassPointers::base());
}
break;
case KlassDecodeMovk:
if (CompressedKlassPointers::shift() != 0) {
! ubfx(dst, src, LogKlassAlignmentInBytes, 32);
} else {
movw(dst, src);
}
break;
void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
switch (klass_decode_mode()) {
case KlassDecodeZero:
if (CompressedKlassPointers::shift() != 0) {
! lsr(dst, src, CompressedKlassPointers::shift());
} else {
if (dst != src) mov(dst, src);
}
break;
case KlassDecodeXor:
if (CompressedKlassPointers::shift() != 0) {
eor(dst, src, (uint64_t)CompressedKlassPointers::base());
! lsr(dst, dst, CompressedKlassPointers::shift());
} else {
eor(dst, src, (uint64_t)CompressedKlassPointers::base());
}
break;
case KlassDecodeMovk:
if (CompressedKlassPointers::shift() != 0) {
! ubfx(dst, src, CompressedKlassPointers::shift(), 32);
} else {
movw(dst, src);
}
break;
assert (UseCompressedClassPointers, "should only be used for compressed headers");
switch (klass_decode_mode()) {
case KlassDecodeZero:
if (CompressedKlassPointers::shift() != 0) {
! lsl(dst, src, LogKlassAlignmentInBytes);
} else {
if (dst != src) mov(dst, src);
}
break;
case KlassDecodeXor:
if (CompressedKlassPointers::shift() != 0) {
! lsl(dst, src, LogKlassAlignmentInBytes);
eor(dst, dst, (uint64_t)CompressedKlassPointers::base());
} else {
eor(dst, src, (uint64_t)CompressedKlassPointers::base());
}
break;
assert (UseCompressedClassPointers, "should only be used for compressed headers");
switch (klass_decode_mode()) {
case KlassDecodeZero:
if (CompressedKlassPointers::shift() != 0) {
! lsl(dst, src, CompressedKlassPointers::shift());
} else {
if (dst != src) mov(dst, src);
}
break;
case KlassDecodeXor:
if (CompressedKlassPointers::shift() != 0) {
! lsl(dst, src, CompressedKlassPointers::shift());
eor(dst, dst, (uint64_t)CompressedKlassPointers::base());
} else {
eor(dst, src, (uint64_t)CompressedKlassPointers::base());
}
break;
if (dst != src) movw(dst, src);
movk(dst, shifted_base >> 32, 32);
if (CompressedKlassPointers::shift() != 0) {
! lsl(dst, dst, LogKlassAlignmentInBytes);
}
break;
}
if (dst != src) movw(dst, src);
movk(dst, shifted_base >> 32, 32);
if (CompressedKlassPointers::shift() != 0) {
! lsl(dst, dst, CompressedKlassPointers::shift());
}
break;
}
int elem_per_word = wordSize/elem_size;
int log_elem_size = exact_log2(elem_size);
int length_offset = arrayOopDesc::length_offset_in_bytes();
int base_offset
= arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
+ // When the base is not aligned to 8 bytes, then we let
+ // the compare loop include the array length, and skip
+ // the explicit comparison of length.
+ bool is_8aligned = is_aligned(base_offset, BytesPerWord);
+ assert(is_aligned(base_offset, BytesPerWord) || is_aligned(length_offset, BytesPerWord),
+ "base_offset or length_offset must be 8-byte aligned");
+ int start_offset = is_8aligned ? base_offset : length_offset;
+ int extra_length = is_8aligned ? 0 : BytesPerInt / elem_size;
int stubBytesThreshold = 3 * 64 + (UseSIMDForArrayEquals ? 0 : 16);
assert(elem_size == 1 || elem_size == 2, "must be char or byte");
assert_different_registers(a1, a2, result, cnt1, rscratch1, rscratch2);
// if (a1.length != a2.length)
// return false;
bind(A_IS_NOT_NULL);
ldrw(cnt1, Address(a1, length_offset));
ldrw(cnt2, Address(a2, length_offset));
eorw(tmp5, cnt1, cnt2);
cbnzw(tmp5, DONE);
! lea(a1, Address(a1, base_offset));
! lea(a2, Address(a2, base_offset));
// Check for short strings, i.e. smaller than wordSize.
subs(cnt1, cnt1, elem_per_word);
br(Assembler::LT, SHORT);
// Main 8 byte comparison loop.
bind(NEXT_WORD); {
// if (a1.length != a2.length)
// return false;
bind(A_IS_NOT_NULL);
ldrw(cnt1, Address(a1, length_offset));
ldrw(cnt2, Address(a2, length_offset));
+ if (extra_length != 0) {
+ // Increase loop counter by size of length field.
+ addw(cnt1, cnt1, extra_length);
+ addw(cnt2, cnt2, extra_length);
+ }
eorw(tmp5, cnt1, cnt2);
cbnzw(tmp5, DONE);
! lea(a1, Address(a1, start_offset));
! lea(a2, Address(a2, start_offset));
// Check for short strings, i.e. smaller than wordSize.
subs(cnt1, cnt1, elem_per_word);
br(Assembler::LT, SHORT);
// Main 8 byte comparison loop.
bind(NEXT_WORD); {
mov(result, false);
cbz(a1, DONE);
ldrw(cnt1, Address(a1, length_offset));
cbz(a2, DONE);
ldrw(cnt2, Address(a2, length_offset));
// on most CPUs a2 is still "locked"(surprisingly) in ldrw and it's
// faster to perform another branch before comparing a1 and a2
cmp(cnt1, (u1)elem_per_word);
br(LE, SHORT); // short or same
! ldr(tmp3, Address(pre(a1, base_offset)));
subs(zr, cnt1, stubBytesThreshold);
br(GE, STUB);
! ldr(tmp4, Address(pre(a2, base_offset)));
sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
cmp(cnt2, cnt1);
br(NE, DONE);
// Main 16 byte comparison loop with 2 exits
mov(result, false);
cbz(a1, DONE);
ldrw(cnt1, Address(a1, length_offset));
cbz(a2, DONE);
ldrw(cnt2, Address(a2, length_offset));
+ if (extra_length != 0) {
+ // Increase loop counter by size of length field.
+ addw(cnt1, cnt1, extra_length);
+ addw(cnt2, cnt2, extra_length);
+ }
// on most CPUs a2 is still "locked"(surprisingly) in ldrw and it's
// faster to perform another branch before comparing a1 and a2
cmp(cnt1, (u1)elem_per_word);
br(LE, SHORT); // short or same
! ldr(tmp3, Address(pre(a1, start_offset)));
subs(zr, cnt1, stubBytesThreshold);
br(GE, STUB);
! ldr(tmp4, Address(pre(a2, start_offset)));
sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
cmp(cnt2, cnt1);
br(NE, DONE);
// Main 16 byte comparison loop with 2 exits
eor(tmp2, tmp1, tmp2);
cbnz(tmp2, DONE);
b(LAST_CHECK);
bind(STUB);
! ldr(tmp4, Address(pre(a2, base_offset)));
cmp(cnt2, cnt1);
br(NE, DONE);
if (elem_size == 2) { // convert to byte counter
lsl(cnt1, cnt1, 1);
}
eor(tmp2, tmp1, tmp2);
cbnz(tmp2, DONE);
b(LAST_CHECK);
bind(STUB);
! ldr(tmp4, Address(pre(a2, start_offset)));
cmp(cnt2, cnt1);
br(NE, DONE);
if (elem_size == 2) { // convert to byte counter
lsl(cnt1, cnt1, 1);
}
bind(SHORT);
cmp(cnt2, cnt1);
br(NE, DONE);
cbz(cnt1, SAME);
sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
! ldr(tmp3, Address(a1, base_offset));
! ldr(tmp4, Address(a2, base_offset));
bind(LAST_CHECK);
eor(tmp4, tmp3, tmp4);
lslv(tmp5, tmp4, tmp5);
cmp(tmp5, zr);
bind(CSET_EQ);
bind(SHORT);
cmp(cnt2, cnt1);
br(NE, DONE);
cbz(cnt1, SAME);
sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
! ldr(tmp3, Address(pre(a1, start_offset)));
! ldr(tmp4, Address(pre(a2, start_offset)));
bind(LAST_CHECK);
eor(tmp4, tmp3, tmp4);
lslv(tmp5, tmp4, tmp5);
cmp(tmp5, zr);
bind(CSET_EQ);
// Implements lightweight-locking.
//
// - obj: the object to be locked
// - t1, t2, t3: temporary registers, will be destroyed
// - slow: branched to if locking fails, absolute offset may larger than 32KB (imm14 encoding).
! void MacroAssembler::lightweight_lock(Register obj, Register t1, Register t2, Register t3, Label& slow) {
assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking");
! assert_different_registers(obj, t1, t2, t3, rscratch1);
Label push;
const Register top = t1;
const Register mark = t2;
const Register t = t3;
// Preload the markWord. It is important that this is the first
// instruction emitted as it is part of C1's null check semantics.
ldr(mark, Address(obj, oopDesc::mark_offset_in_bytes()));
// Check if the lock-stack is full.
ldrw(top, Address(rthread, JavaThread::lock_stack_top_offset()));
cmpw(top, (unsigned)LockStack::end_offset());
br(Assembler::GE, slow);
// Implements lightweight-locking.
//
// - obj: the object to be locked
// - t1, t2, t3: temporary registers, will be destroyed
// - slow: branched to if locking fails, absolute offset may larger than 32KB (imm14 encoding).
! void MacroAssembler::lightweight_lock(Register basic_lock, Register obj, Register t1, Register t2, Register t3, Label& slow) {
assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking");
! assert_different_registers(basic_lock, obj, t1, t2, t3, rscratch1);
Label push;
const Register top = t1;
const Register mark = t2;
const Register t = t3;
// Preload the markWord. It is important that this is the first
// instruction emitted as it is part of C1's null check semantics.
ldr(mark, Address(obj, oopDesc::mark_offset_in_bytes()));
+ str(zr, Address(basic_lock, BasicObjectLock::lock_offset() + in_ByteSize((BasicLock::object_monitor_cache_offset_in_bytes()))));
+
// Check if the lock-stack is full.
ldrw(top, Address(rthread, JavaThread::lock_stack_top_offset()));
cmpw(top, (unsigned)LockStack::end_offset());
br(Assembler::GE, slow);
< prev index next >