< prev index next >

src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp

Print this page
*** 1000,14 ***
    movptr(rscratch2, (intptr_t)Universe::non_oop_word());
    return trampoline_call(Address(entry, rh));
  }
  
  int MacroAssembler::ic_check_size() {
    if (target_needs_far_branch(CAST_FROM_FN_PTR(address, SharedRuntime::get_ic_miss_stub()))) {
!     return NativeInstruction::instruction_size * 7;
    } else {
!     return NativeInstruction::instruction_size * 5;
    }
  }
  
  int MacroAssembler::ic_check(int end_alignment) {
    Register receiver = j_rarg0;
--- 1000,15 ---
    movptr(rscratch2, (intptr_t)Universe::non_oop_word());
    return trampoline_call(Address(entry, rh));
  }
  
  int MacroAssembler::ic_check_size() {
+   int extra_instructions = UseCompactObjectHeaders ? 1 : 0;
    if (target_needs_far_branch(CAST_FROM_FN_PTR(address, SharedRuntime::get_ic_miss_stub()))) {
!     return NativeInstruction::instruction_size * (7 + extra_instructions);
    } else {
!     return NativeInstruction::instruction_size * (5 + extra_instructions);
    }
  }
  
  int MacroAssembler::ic_check(int end_alignment) {
    Register receiver = j_rarg0;

*** 1021,11 ***
    // before the inline cache check here, and not after
    align(end_alignment, offset() + ic_check_size());
  
    int uep_offset = offset();
  
!   if (UseCompressedClassPointers) {
      ldrw(tmp1, Address(receiver, oopDesc::klass_offset_in_bytes()));
      ldrw(tmp2, Address(data, CompiledICData::speculated_klass_offset()));
      cmpw(tmp1, tmp2);
    } else {
      ldr(tmp1, Address(receiver, oopDesc::klass_offset_in_bytes()));
--- 1022,15 ---
    // before the inline cache check here, and not after
    align(end_alignment, offset() + ic_check_size());
  
    int uep_offset = offset();
  
!   if (UseCompactObjectHeaders) {
+     load_nklass_compact(tmp1, receiver);
+     ldrw(tmp2, Address(data, CompiledICData::speculated_klass_offset()));
+     cmpw(tmp1, tmp2);
+   } else if (UseCompressedClassPointers) {
      ldrw(tmp1, Address(receiver, oopDesc::klass_offset_in_bytes()));
      ldrw(tmp2, Address(data, CompiledICData::speculated_klass_offset()));
      cmpw(tmp1, tmp2);
    } else {
      ldr(tmp1, Address(receiver, oopDesc::klass_offset_in_bytes()));

*** 4476,12 ***
    ldr(holder, Address(method, Method::const_offset()));                      // ConstMethod*
    ldr(holder, Address(holder, ConstMethod::constants_offset()));             // ConstantPool*
    ldr(holder, Address(holder, ConstantPool::pool_holder_offset()));          // InstanceKlass*
  }
  
  void MacroAssembler::load_klass(Register dst, Register src) {
!   if (UseCompressedClassPointers) {
      ldrw(dst, Address(src, oopDesc::klass_offset_in_bytes()));
      decode_klass_not_null(dst);
    } else {
      ldr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
    }
--- 4481,26 ---
    ldr(holder, Address(method, Method::const_offset()));                      // ConstMethod*
    ldr(holder, Address(holder, ConstMethod::constants_offset()));             // ConstantPool*
    ldr(holder, Address(holder, ConstantPool::pool_holder_offset()));          // InstanceKlass*
  }
  
+ // Loads the obj's Klass* into dst.
+ // Preserves all registers (incl src, rscratch1 and rscratch2).
+ // Input:
+ // src - the oop we want to load the klass from.
+ // dst - output nklass.
+ void MacroAssembler::load_nklass_compact(Register dst, Register src) {
+   assert(UseCompactObjectHeaders, "expects UseCompactObjectHeaders");
+   ldr(dst, Address(src, oopDesc::mark_offset_in_bytes()));
+   lsr(dst, dst, markWord::klass_shift);
+ }
+ 
  void MacroAssembler::load_klass(Register dst, Register src) {
!   if (UseCompactObjectHeaders) {
+     load_nklass_compact(dst, src);
+     decode_klass_not_null(dst);
+   } else if (UseCompressedClassPointers) {
      ldrw(dst, Address(src, oopDesc::klass_offset_in_bytes()));
      decode_klass_not_null(dst);
    } else {
      ldr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
    }

*** 4533,12 ***
    ldr(dst, Address(dst, mirror_offset));
    resolve_oop_handle(dst, tmp1, tmp2);
  }
  
  void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp) {
    if (UseCompressedClassPointers) {
!     ldrw(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
      if (CompressedKlassPointers::base() == nullptr) {
        cmp(trial_klass, tmp, LSL, CompressedKlassPointers::shift());
        return;
      } else if (((uint64_t)CompressedKlassPointers::base() & 0xffffffff) == 0
                 && CompressedKlassPointers::shift() == 0) {
--- 4552,17 ---
    ldr(dst, Address(dst, mirror_offset));
    resolve_oop_handle(dst, tmp1, tmp2);
  }
  
  void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp) {
+   assert_different_registers(oop, trial_klass, tmp);
    if (UseCompressedClassPointers) {
!     if (UseCompactObjectHeaders) {
+       load_nklass_compact(tmp, oop);
+     } else {
+       ldrw(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
+     }
      if (CompressedKlassPointers::base() == nullptr) {
        cmp(trial_klass, tmp, LSL, CompressedKlassPointers::shift());
        return;
      } else if (((uint64_t)CompressedKlassPointers::base() & 0xffffffff) == 0
                 && CompressedKlassPointers::shift() == 0) {

*** 4551,22 ***
--- 4575,40 ---
      ldr(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
    }
    cmp(trial_klass, tmp);
  }
  
+ void MacroAssembler::cmp_klass(Register src, Register dst, Register tmp1, Register tmp2) {
+   if (UseCompactObjectHeaders) {
+     load_nklass_compact(tmp1, src);
+     load_nklass_compact(tmp2, dst);
+     cmpw(tmp1, tmp2);
+   } else if (UseCompressedClassPointers) {
+     ldrw(tmp1, Address(src, oopDesc::klass_offset_in_bytes()));
+     ldrw(tmp2, Address(dst, oopDesc::klass_offset_in_bytes()));
+     cmpw(tmp1, tmp2);
+   } else {
+     ldr(tmp1, Address(src, oopDesc::klass_offset_in_bytes()));
+     ldr(tmp2, Address(dst, oopDesc::klass_offset_in_bytes()));
+     cmp(tmp1, tmp2);
+   }
+ }
+ 
  void MacroAssembler::store_klass(Register dst, Register src) {
    // FIXME: Should this be a store release?  concurrent gcs assumes
    // klass length is valid if klass field is not null.
+   assert(!UseCompactObjectHeaders, "not with compact headers");
    if (UseCompressedClassPointers) {
      encode_klass_not_null(src);
      strw(src, Address(dst, oopDesc::klass_offset_in_bytes()));
    } else {
      str(src, Address(dst, oopDesc::klass_offset_in_bytes()));
    }
  }
  
  void MacroAssembler::store_klass_gap(Register dst, Register src) {
+   assert(!UseCompactObjectHeaders, "not with compact headers");
    if (UseCompressedClassPointers) {
      // Store to klass gap in destination
      strw(src, Address(dst, oopDesc::klass_gap_offset_in_bytes()));
    }
  }

*** 4711,13 ***
  
    if (_klass_decode_mode != KlassDecodeNone) {
      return _klass_decode_mode;
    }
  
-   assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift()
-          || 0 == CompressedKlassPointers::shift(), "decode alg wrong");
- 
    if (CompressedKlassPointers::base() == nullptr) {
      return (_klass_decode_mode = KlassDecodeZero);
    }
  
    if (operand_valid_for_logical_immediate(
--- 4753,10 ---

*** 4739,28 ***
  
  void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
    switch (klass_decode_mode()) {
    case KlassDecodeZero:
      if (CompressedKlassPointers::shift() != 0) {
!       lsr(dst, src, LogKlassAlignmentInBytes);
      } else {
        if (dst != src) mov(dst, src);
      }
      break;
  
    case KlassDecodeXor:
      if (CompressedKlassPointers::shift() != 0) {
        eor(dst, src, (uint64_t)CompressedKlassPointers::base());
!       lsr(dst, dst, LogKlassAlignmentInBytes);
      } else {
        eor(dst, src, (uint64_t)CompressedKlassPointers::base());
      }
      break;
  
    case KlassDecodeMovk:
      if (CompressedKlassPointers::shift() != 0) {
!       ubfx(dst, src, LogKlassAlignmentInBytes, 32);
      } else {
        movw(dst, src);
      }
      break;
  
--- 4778,28 ---
  
  void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
    switch (klass_decode_mode()) {
    case KlassDecodeZero:
      if (CompressedKlassPointers::shift() != 0) {
!       lsr(dst, src, CompressedKlassPointers::shift());
      } else {
        if (dst != src) mov(dst, src);
      }
      break;
  
    case KlassDecodeXor:
      if (CompressedKlassPointers::shift() != 0) {
        eor(dst, src, (uint64_t)CompressedKlassPointers::base());
!       lsr(dst, dst, CompressedKlassPointers::shift());
      } else {
        eor(dst, src, (uint64_t)CompressedKlassPointers::base());
      }
      break;
  
    case KlassDecodeMovk:
      if (CompressedKlassPointers::shift() != 0) {
!       ubfx(dst, src, CompressedKlassPointers::shift(), 32);
      } else {
        movw(dst, src);
      }
      break;
  

*** 4778,19 ***
    assert (UseCompressedClassPointers, "should only be used for compressed headers");
  
    switch (klass_decode_mode()) {
    case KlassDecodeZero:
      if (CompressedKlassPointers::shift() != 0) {
!       lsl(dst, src, LogKlassAlignmentInBytes);
      } else {
        if (dst != src) mov(dst, src);
      }
      break;
  
    case KlassDecodeXor:
      if (CompressedKlassPointers::shift() != 0) {
!       lsl(dst, src, LogKlassAlignmentInBytes);
        eor(dst, dst, (uint64_t)CompressedKlassPointers::base());
      } else {
        eor(dst, src, (uint64_t)CompressedKlassPointers::base());
      }
      break;
--- 4817,19 ---
    assert (UseCompressedClassPointers, "should only be used for compressed headers");
  
    switch (klass_decode_mode()) {
    case KlassDecodeZero:
      if (CompressedKlassPointers::shift() != 0) {
!       lsl(dst, src, CompressedKlassPointers::shift());
      } else {
        if (dst != src) mov(dst, src);
      }
      break;
  
    case KlassDecodeXor:
      if (CompressedKlassPointers::shift() != 0) {
!       lsl(dst, src, CompressedKlassPointers::shift());
        eor(dst, dst, (uint64_t)CompressedKlassPointers::base());
      } else {
        eor(dst, src, (uint64_t)CompressedKlassPointers::base());
      }
      break;

*** 4801,11 ***
  
      if (dst != src) movw(dst, src);
      movk(dst, shifted_base >> 32, 32);
  
      if (CompressedKlassPointers::shift() != 0) {
!       lsl(dst, dst, LogKlassAlignmentInBytes);
      }
  
      break;
    }
  
--- 4840,11 ---
  
      if (dst != src) movw(dst, src);
      movk(dst, shifted_base >> 32, 32);
  
      if (CompressedKlassPointers::shift() != 0) {
!       lsl(dst, dst, CompressedKlassPointers::shift());
      }
  
      break;
    }
  

*** 5201,10 ***
--- 5240,18 ---
    int elem_per_word = wordSize/elem_size;
    int log_elem_size = exact_log2(elem_size);
    int length_offset = arrayOopDesc::length_offset_in_bytes();
    int base_offset
      = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
+   // When the base is not aligned to 8 bytes, then we let
+   // the compare loop include the array length, and skip
+   // the explicit comparison of length.
+   bool is_8aligned = is_aligned(base_offset, BytesPerWord);
+   assert(is_aligned(base_offset, BytesPerWord) || is_aligned(length_offset, BytesPerWord),
+          "base_offset or length_offset must be 8-byte aligned");
+   int start_offset = is_8aligned ? base_offset : length_offset;
+   int extra_length = is_8aligned ? 0 : BytesPerInt / elem_size;
    int stubBytesThreshold = 3 * 64 + (UseSIMDForArrayEquals ? 0 : 16);
  
    assert(elem_size == 1 || elem_size == 2, "must be char or byte");
    assert_different_registers(a1, a2, result, cnt1, rscratch1, rscratch2);
  

*** 5235,14 ***
      // if (a1.length != a2.length)
      //      return false;
      bind(A_IS_NOT_NULL);
      ldrw(cnt1, Address(a1, length_offset));
      ldrw(cnt2, Address(a2, length_offset));
      eorw(tmp5, cnt1, cnt2);
      cbnzw(tmp5, DONE);
!     lea(a1, Address(a1, base_offset));
!     lea(a2, Address(a2, base_offset));
      // Check for short strings, i.e. smaller than wordSize.
      subs(cnt1, cnt1, elem_per_word);
      br(Assembler::LT, SHORT);
      // Main 8 byte comparison loop.
      bind(NEXT_WORD); {
--- 5282,19 ---
      // if (a1.length != a2.length)
      //      return false;
      bind(A_IS_NOT_NULL);
      ldrw(cnt1, Address(a1, length_offset));
      ldrw(cnt2, Address(a2, length_offset));
+     if (extra_length != 0) {
+       // Increase loop counter by size of length field.
+       addw(cnt1, cnt1, extra_length);
+       addw(cnt2, cnt2, extra_length);
+     }
      eorw(tmp5, cnt1, cnt2);
      cbnzw(tmp5, DONE);
!     lea(a1, Address(a1, start_offset));
!     lea(a2, Address(a2, start_offset));
      // Check for short strings, i.e. smaller than wordSize.
      subs(cnt1, cnt1, elem_per_word);
      br(Assembler::LT, SHORT);
      // Main 8 byte comparison loop.
      bind(NEXT_WORD); {

*** 5302,18 ***
      mov(result, false);
      cbz(a1, DONE);
      ldrw(cnt1, Address(a1, length_offset));
      cbz(a2, DONE);
      ldrw(cnt2, Address(a2, length_offset));
      // on most CPUs a2 is still "locked"(surprisingly) in ldrw and it's
      // faster to perform another branch before comparing a1 and a2
      cmp(cnt1, (u1)elem_per_word);
      br(LE, SHORT); // short or same
!     ldr(tmp3, Address(pre(a1, base_offset)));
      subs(zr, cnt1, stubBytesThreshold);
      br(GE, STUB);
!     ldr(tmp4, Address(pre(a2, base_offset)));
      sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
      cmp(cnt2, cnt1);
      br(NE, DONE);
  
      // Main 16 byte comparison loop with 2 exits
--- 5354,23 ---
      mov(result, false);
      cbz(a1, DONE);
      ldrw(cnt1, Address(a1, length_offset));
      cbz(a2, DONE);
      ldrw(cnt2, Address(a2, length_offset));
+     if (extra_length != 0) {
+       // Increase loop counter by size of length field.
+       addw(cnt1, cnt1, extra_length);
+       addw(cnt2, cnt2, extra_length);
+     }
      // on most CPUs a2 is still "locked"(surprisingly) in ldrw and it's
      // faster to perform another branch before comparing a1 and a2
      cmp(cnt1, (u1)elem_per_word);
      br(LE, SHORT); // short or same
!     ldr(tmp3, Address(pre(a1, start_offset)));
      subs(zr, cnt1, stubBytesThreshold);
      br(GE, STUB);
!     ldr(tmp4, Address(pre(a2, start_offset)));
      sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
      cmp(cnt2, cnt1);
      br(NE, DONE);
  
      // Main 16 byte comparison loop with 2 exits

*** 5344,11 ***
      eor(tmp2, tmp1, tmp2);
      cbnz(tmp2, DONE);
      b(LAST_CHECK);
  
      bind(STUB);
!     ldr(tmp4, Address(pre(a2, base_offset)));
      cmp(cnt2, cnt1);
      br(NE, DONE);
      if (elem_size == 2) { // convert to byte counter
        lsl(cnt1, cnt1, 1);
      }
--- 5401,11 ---
      eor(tmp2, tmp1, tmp2);
      cbnz(tmp2, DONE);
      b(LAST_CHECK);
  
      bind(STUB);
!     ldr(tmp4, Address(pre(a2, start_offset)));
      cmp(cnt2, cnt1);
      br(NE, DONE);
      if (elem_size == 2) { // convert to byte counter
        lsl(cnt1, cnt1, 1);
      }

*** 5371,12 ***
      bind(SHORT);
      cmp(cnt2, cnt1);
      br(NE, DONE);
      cbz(cnt1, SAME);
      sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
!     ldr(tmp3, Address(a1, base_offset));
!     ldr(tmp4, Address(a2, base_offset));
      bind(LAST_CHECK);
      eor(tmp4, tmp3, tmp4);
      lslv(tmp5, tmp4, tmp5);
      cmp(tmp5, zr);
      bind(CSET_EQ);
--- 5428,12 ---
      bind(SHORT);
      cmp(cnt2, cnt1);
      br(NE, DONE);
      cbz(cnt1, SAME);
      sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
!     ldr(tmp3, Address(pre(a1, start_offset)));
!     ldr(tmp4, Address(pre(a2, start_offset)));
      bind(LAST_CHECK);
      eor(tmp4, tmp3, tmp4);
      lslv(tmp5, tmp4, tmp5);
      cmp(tmp5, zr);
      bind(CSET_EQ);

*** 6384,23 ***
  // Implements lightweight-locking.
  //
  //  - obj: the object to be locked
  //  - t1, t2, t3: temporary registers, will be destroyed
  //  - slow: branched to if locking fails, absolute offset may larger than 32KB (imm14 encoding).
! void MacroAssembler::lightweight_lock(Register obj, Register t1, Register t2, Register t3, Label& slow) {
    assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking");
!   assert_different_registers(obj, t1, t2, t3, rscratch1);
  
    Label push;
    const Register top = t1;
    const Register mark = t2;
    const Register t = t3;
  
    // Preload the markWord. It is important that this is the first
    // instruction emitted as it is part of C1's null check semantics.
    ldr(mark, Address(obj, oopDesc::mark_offset_in_bytes()));
  
    // Check if the lock-stack is full.
    ldrw(top, Address(rthread, JavaThread::lock_stack_top_offset()));
    cmpw(top, (unsigned)LockStack::end_offset());
    br(Assembler::GE, slow);
  
--- 6441,25 ---
  // Implements lightweight-locking.
  //
  //  - obj: the object to be locked
  //  - t1, t2, t3: temporary registers, will be destroyed
  //  - slow: branched to if locking fails, absolute offset may larger than 32KB (imm14 encoding).
! void MacroAssembler::lightweight_lock(Register basic_lock, Register obj, Register t1, Register t2, Register t3, Label& slow) {
    assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking");
!   assert_different_registers(basic_lock, obj, t1, t2, t3, rscratch1);
  
    Label push;
    const Register top = t1;
    const Register mark = t2;
    const Register t = t3;
  
    // Preload the markWord. It is important that this is the first
    // instruction emitted as it is part of C1's null check semantics.
    ldr(mark, Address(obj, oopDesc::mark_offset_in_bytes()));
  
+   str(zr, Address(basic_lock, BasicObjectLock::lock_offset() + in_ByteSize((BasicLock::object_monitor_cache_offset_in_bytes()))));
+ 
    // Check if the lock-stack is full.
    ldrw(top, Address(rthread, JavaThread::lock_stack_top_offset()));
    cmpw(top, (unsigned)LockStack::end_offset());
    br(Assembler::GE, slow);
  
< prev index next >