< prev index next >

src/hotspot/cpu/x86/macroAssembler_x86.cpp

Print this page
*** 32,13 ***
--- 32,15 ---
  #include "gc/shared/barrierSetAssembler.hpp"
  #include "gc/shared/collectedHeap.inline.hpp"
  #include "gc/shared/tlab_globals.hpp"
  #include "interpreter/bytecodeHistogram.hpp"
  #include "interpreter/interpreter.hpp"
+ #include "logging/log.hpp"
  #include "memory/resourceArea.hpp"
  #include "memory/universe.hpp"
  #include "oops/accessDecorators.hpp"
+ #include "oops/compressedKlass.inline.hpp"
  #include "oops/compressedOops.inline.hpp"
  #include "oops/klass.inline.hpp"
  #include "prims/methodHandles.hpp"
  #include "runtime/flags/flagSetting.hpp"
  #include "runtime/interfaceSupport.inline.hpp"

*** 48,10 ***
--- 50,11 ---
  #include "runtime/safepoint.hpp"
  #include "runtime/safepointMechanism.hpp"
  #include "runtime/sharedRuntime.hpp"
  #include "runtime/stubRoutines.hpp"
  #include "runtime/thread.hpp"
+ #include "utilities/align.hpp"
  #include "utilities/macros.hpp"
  #include "crc32c.h"
  
  #ifdef PRODUCT
  #define BLOCK_COMMENT(str) /* nothing */

*** 3774,16 ***
  }
  
  // Preserves the contents of address, destroys the contents length_in_bytes and temp.
  void MacroAssembler::zero_memory(Register address, Register length_in_bytes, int offset_in_bytes, Register temp) {
    assert(address != length_in_bytes && address != temp && temp != length_in_bytes, "registers must be different");
!   assert((offset_in_bytes & (BytesPerWord - 1)) == 0, "offset must be a multiple of BytesPerWord");
    Label done;
  
    testptr(length_in_bytes, length_in_bytes);
    jcc(Assembler::zero, done);
  
    // initialize topmost word, divide index by 2, check if odd and test if zero
    // note: for the remaining code to work, index must be a multiple of BytesPerWord
  #ifdef ASSERT
    {
      Label L;
--- 3777,29 ---
  }
  
  // Preserves the contents of address, destroys the contents length_in_bytes and temp.
  void MacroAssembler::zero_memory(Register address, Register length_in_bytes, int offset_in_bytes, Register temp) {
    assert(address != length_in_bytes && address != temp && temp != length_in_bytes, "registers must be different");
!   assert((offset_in_bytes & (BytesPerInt - 1)) == 0, "offset must be a multiple of BytesPerInt");
    Label done;
  
    testptr(length_in_bytes, length_in_bytes);
    jcc(Assembler::zero, done);
  
+   // Emit single 32bit store to clear leading bytes, if necessary.
+   xorptr(temp, temp);    // use _zero reg to clear memory (shorter code)
+ #ifdef _LP64
+   if (!is_aligned(offset_in_bytes, BytesPerWord)) {
+     movl(Address(address, offset_in_bytes), temp);
+     offset_in_bytes += BytesPerInt;
+     decrement(length_in_bytes, BytesPerInt);
+   }
+   assert((offset_in_bytes & (BytesPerWord - 1)) == 0, "offset must be a multiple of BytesPerWord");
+   testptr(length_in_bytes, length_in_bytes);
+   jcc(Assembler::zero, done);
+ #endif
+ 
    // initialize topmost word, divide index by 2, check if odd and test if zero
    // note: for the remaining code to work, index must be a multiple of BytesPerWord
  #ifdef ASSERT
    {
      Label L;

*** 3792,11 ***
      stop("length must be a multiple of BytesPerWord");
      bind(L);
    }
  #endif
    Register index = length_in_bytes;
-   xorptr(temp, temp);    // use _zero reg to clear memory (shorter code)
    if (UseIncDec) {
      shrptr(index, 3);  // divide by 8/16 and set carry flag if bit 2 was set
    } else {
      shrptr(index, 2);  // use 2 instructions to avoid partial flag stall
      shrptr(index, 1);
--- 3808,10 ---

*** 4726,34 ***
    movptr(holder, Address(method, Method::const_offset()));                      // ConstMethod*
    movptr(holder, Address(holder, ConstMethod::constants_offset()));             // ConstantPool*
    movptr(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); // InstanceKlass*
  }
  
- void MacroAssembler::load_klass(Register dst, Register src, Register tmp) {
-   assert_different_registers(src, tmp);
-   assert_different_registers(dst, tmp);
  #ifdef _LP64
!   if (UseCompressedClassPointers) {
!     movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
!     decode_klass_not_null(dst, tmp);
!   } else
! #endif
!     movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
  }
  
! void MacroAssembler::store_klass(Register dst, Register src, Register tmp) {
    assert_different_registers(src, tmp);
    assert_different_registers(dst, tmp);
  #ifdef _LP64
!   if (UseCompressedClassPointers) {
!     encode_klass_not_null(src, tmp);
!     movl(Address(dst, oopDesc::klass_offset_in_bytes()), src);
!   } else
  #endif
-     movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src);
  }
  
  void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src,
                                      Register tmp1, Register thread_tmp) {
    BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
    decorators = AccessInternal::decorator_fixup(decorators);
    bool as_raw = (decorators & AS_RAW) != 0;
--- 4741,75 ---
    movptr(holder, Address(method, Method::const_offset()));                      // ConstMethod*
    movptr(holder, Address(holder, ConstMethod::constants_offset()));             // ConstantPool*
    movptr(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); // InstanceKlass*
  }
  
  #ifdef _LP64
! void MacroAssembler::load_nklass(Register dst, Register src) {
!   assert_different_registers(src, dst);
!   assert(UseCompressedClassPointers, "expect compressed class pointers");
! 
!   Label slow, done;
!   movq(dst, Address(src, oopDesc::mark_offset_in_bytes()));
+   // NOTE: While it would seem nice to use xorb instead (for which we don't have an encoding in our assembler),
+   // the encoding for xorq uses the signed version (0x81/6) of xor, which encodes as compact as xorb would,
+   // and does't make a difference performance-wise.
+   xorq(dst, markWord::unlocked_value);
+   testb(dst, markWord::lock_mask_in_place);
+   jccb(Assembler::notZero, slow);
+ 
+   shrq(dst, markWord::klass_shift);
+   jmp(done);
+   bind(slow);
+ 
+   if (dst != rax) {
+     push(rax);
+   }
+   if (src != rax) {
+     mov(rax, src);
+   }
+   call(RuntimeAddress(StubRoutines::load_nklass()));
+   if (dst != rax) {
+     mov(dst, rax);
+     pop(rax);
+   }
+ 
+   bind(done);
  }
+ #endif
  
! void MacroAssembler::load_klass(Register dst, Register src, Register tmp, bool null_check_src) {
    assert_different_registers(src, tmp);
    assert_different_registers(dst, tmp);
  #ifdef _LP64
!   assert(UseCompressedClassPointers, "expect compressed class pointers");
!   Register d = dst;
!   if (src == dst) {
!     d = tmp;
+   }
+   if (null_check_src) {
+     null_check(src, oopDesc::mark_offset_in_bytes());
+   }
+   load_nklass(d, src);
+   if (src == dst) {
+     mov(dst, d);
+   }
+   decode_klass_not_null(dst, tmp);
+ #else
+   if (null_check_src) {
+     null_check(src, oopDesc::klass_offset_in_bytes());
+   }
+   movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
  #endif
  }
  
+ #ifndef _LP64
+ void MacroAssembler::store_klass(Register dst, Register src) {
+   movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src);
+ }
+ #endif
+ 
  void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src,
                                      Register tmp1, Register thread_tmp) {
    BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
    decorators = AccessInternal::decorator_fixup(decorators);
    bool as_raw = (decorators & AS_RAW) != 0;

*** 4796,17 ***
  void MacroAssembler::store_heap_oop_null(Address dst) {
    access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg, noreg);
  }
  
  #ifdef _LP64
- void MacroAssembler::store_klass_gap(Register dst, Register src) {
-   if (UseCompressedClassPointers) {
-     // Store to klass gap in destination
-     movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src);
-   }
- }
- 
  #ifdef ASSERT
  void MacroAssembler::verify_heapbase(const char* msg) {
    assert (UseCompressedOops, "should be compressed");
    assert (Universe::heap() != NULL, "java heap should be initialized");
    if (CheckCompressedOops) {
--- 4852,10 ---

*** 4955,79 ***
        movq(dst, src);
      }
    }
  }
  
  void MacroAssembler::encode_klass_not_null(Register r, Register tmp) {
    assert_different_registers(r, tmp);
!   if (CompressedKlassPointers::base() != NULL) {
      mov64(tmp, (int64_t)CompressedKlassPointers::base());
      subq(r, tmp);
    }
!   if (CompressedKlassPointers::shift() != 0) {
!     assert (LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
-     shrq(r, LogKlassAlignmentInBytes);
    }
  }
  
  void MacroAssembler::encode_and_move_klass_not_null(Register dst, Register src) {
    assert_different_registers(src, dst);
!   if (CompressedKlassPointers::base() != NULL) {
      mov64(dst, -(int64_t)CompressedKlassPointers::base());
      addq(dst, src);
!   } else {
!     movptr(dst, src);
    }
!   if (CompressedKlassPointers::shift() != 0) {
!     assert (LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
-     shrq(dst, LogKlassAlignmentInBytes);
    }
  }
  
  void  MacroAssembler::decode_klass_not_null(Register r, Register tmp) {
    assert_different_registers(r, tmp);
!   // Note: it will change flags
!   assert(UseCompressedClassPointers, "should only be used for compressed headers");
!   // Cannot assert, unverified entry point counts instructions (see .ad file)
!   // vtableStubs also counts instructions in pd_code_size_limit.
!   // Also do not verify_oop as this is called by verify_oop.
-   if (CompressedKlassPointers::shift() != 0) {
-     assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
-     shlq(r, LogKlassAlignmentInBytes);
    }
!   if (CompressedKlassPointers::base() != NULL) {
!     mov64(tmp, (int64_t)CompressedKlassPointers::base());
      addq(r, tmp);
    }
  }
  
  void  MacroAssembler::decode_and_move_klass_not_null(Register dst, Register src) {
    assert_different_registers(src, dst);
!   // Note: it will change flags
-   assert (UseCompressedClassPointers, "should only be used for compressed headers");
-   // Cannot assert, unverified entry point counts instructions (see .ad file)
    // vtableStubs also counts instructions in pd_code_size_limit.
    // Also do not verify_oop as this is called by verify_oop.
  
!   if (CompressedKlassPointers::base() == NULL &&
!       CompressedKlassPointers::shift() == 0) {
!     // The best case scenario is that there is no base or shift. Then it is already
!     // a pointer that needs nothing but a register rename.
!     movl(dst, src);
!   } else {
!     if (CompressedKlassPointers::base() != NULL) {
!       mov64(dst, (int64_t)CompressedKlassPointers::base());
!     } else {
!       xorq(dst, dst);
!     }
!     if (CompressedKlassPointers::shift() != 0) {
!       assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
!       assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?");
!       leaq(dst, Address(dst, src, Address::times_8, 0));
!     } else {
!       addq(dst, src);
!     }
    }
  }
  
  void  MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
    assert (UseCompressedOops, "should only be used for compressed headers");
--- 5004,182 ---
        movq(dst, src);
      }
    }
  }
  
+ MacroAssembler::KlassDecodeMode MacroAssembler::_klass_decode_mode = KlassDecodeNone;
+ 
+ // Returns a static string
+ const char* MacroAssembler::describe_klass_decode_mode(MacroAssembler::KlassDecodeMode mode) {
+   switch (mode) {
+   case KlassDecodeNone: return "none";
+   case KlassDecodeZero: return "zero";
+   case KlassDecodeXor:  return "xor";
+   case KlassDecodeAdd:  return "add";
+   default:
+     ShouldNotReachHere();
+   }
+   return NULL;
+ }
+ 
+ // Return the current narrow Klass pointer decode mode.
+ MacroAssembler::KlassDecodeMode MacroAssembler::klass_decode_mode() {
+   if (_klass_decode_mode == KlassDecodeNone) {
+     // First time initialization
+     assert(UseCompressedClassPointers, "not using compressed class pointers");
+     assert(Metaspace::initialized(), "metaspace not initialized yet");
+ 
+     _klass_decode_mode = klass_decode_mode_for_base(CompressedKlassPointers::base());
+     guarantee(_klass_decode_mode != KlassDecodeNone,
+               PTR_FORMAT " is not a valid encoding base on aarch64",
+               p2i(CompressedKlassPointers::base()));
+     log_info(metaspace)("klass decode mode initialized: %s", describe_klass_decode_mode(_klass_decode_mode));
+   }
+   return _klass_decode_mode;
+ }
+ 
+ // Given an arbitrary base address, return the KlassDecodeMode that would be used. Return KlassDecodeNone
+ // if base address is not valid for encoding.
+ MacroAssembler::KlassDecodeMode MacroAssembler::klass_decode_mode_for_base(address base) {
+   assert(CompressedKlassPointers::shift() != 0, "not lilliput?");
+ 
+   const uint64_t base_u64 = (uint64_t) base;
+ 
+   if (base_u64 == 0) {
+     return KlassDecodeZero;
+   }
+ 
+   if ((base_u64 & (KlassEncodingMetaspaceMax - 1)) == 0) {
+     return KlassDecodeXor;
+   }
+ 
+   // Note that there is no point in optimizing for shift=3 since lilliput
+   // will use larger shifts
+ 
+   // The add+shift mode for decode_and_move_klass_not_null() requires the base to be
+   //  shiftable-without-loss. So, this is the minimum restriction on x64 for a valid
+   //  encoding base. This does not matter in reality since the shift values we use for
+   //  Lilliput, while large, won't be larger than a page size. And the encoding base
+   //  will be quite likely page aligned since it usually falls to the beginning of
+   //  either CDS or CCS.
+   if ((base_u64 & (KlassAlignmentInBytes - 1)) == 0) {
+     return KlassDecodeAdd;
+   }
+ 
+   return KlassDecodeNone;
+ }
+ 
  void MacroAssembler::encode_klass_not_null(Register r, Register tmp) {
    assert_different_registers(r, tmp);
!   switch (klass_decode_mode()) {
+   case KlassDecodeZero: {
+     shrq(r, CompressedKlassPointers::shift());
+     break;
+   }
+   case KlassDecodeXor: {
+     mov64(tmp, (int64_t)CompressedKlassPointers::base());
+     xorq(r, tmp);
+     shrq(r, CompressedKlassPointers::shift());
+     break;
+   }
+   case KlassDecodeAdd: {
      mov64(tmp, (int64_t)CompressedKlassPointers::base());
      subq(r, tmp);
+     shrq(r, CompressedKlassPointers::shift());
+     break;
    }
!   default:
!     ShouldNotReachHere();
    }
  }
  
  void MacroAssembler::encode_and_move_klass_not_null(Register dst, Register src) {
    assert_different_registers(src, dst);
!   switch (klass_decode_mode()) {
+   case KlassDecodeZero: {
+     movptr(dst, src);
+     shrq(dst, CompressedKlassPointers::shift());
+     break;
+   }
+   case KlassDecodeXor: {
+     mov64(dst, (int64_t)CompressedKlassPointers::base());
+     xorq(dst, src);
+     shrq(dst, CompressedKlassPointers::shift());
+     break;
+   }
+   case KlassDecodeAdd: {
      mov64(dst, -(int64_t)CompressedKlassPointers::base());
      addq(dst, src);
!     shrq(dst, CompressedKlassPointers::shift());
!     break;
    }
!   default:
!     ShouldNotReachHere();
    }
  }
  
  void  MacroAssembler::decode_klass_not_null(Register r, Register tmp) {
    assert_different_registers(r, tmp);
!   const uint64_t base_u64 = (uint64_t)CompressedKlassPointers::base();
!   switch (klass_decode_mode()) {
!   case KlassDecodeZero: {
!     shlq(r, CompressedKlassPointers::shift());
!     break;
    }
!   case KlassDecodeXor: {
!     assert((base_u64 & (KlassEncodingMetaspaceMax - 1)) == 0,
+            "base " UINT64_FORMAT_X " invalid for xor mode", base_u64); // should have been handled at VM init.
+     shlq(r, CompressedKlassPointers::shift());
+     mov64(tmp, base_u64);
+     xorq(r, tmp);
+     break;
+   }
+   case KlassDecodeAdd: {
+     shlq(r, CompressedKlassPointers::shift());
+     mov64(tmp, base_u64);
      addq(r, tmp);
+     break;
+   }
+   default:
+     ShouldNotReachHere();
    }
  }
  
  void  MacroAssembler::decode_and_move_klass_not_null(Register dst, Register src) {
    assert_different_registers(src, dst);
!   // Note: Cannot assert, unverified entry point counts instructions (see .ad file)
    // vtableStubs also counts instructions in pd_code_size_limit.
    // Also do not verify_oop as this is called by verify_oop.
  
!   const uint64_t base_u64 = (uint64_t)CompressedKlassPointers::base();
! 
!   switch (klass_decode_mode()) {
!   case KlassDecodeZero: {
!     movq(dst, src);
!     shlq(dst, CompressedKlassPointers::shift());
!     break;
!   }
!   case KlassDecodeXor: {
!     assert((base_u64 & (KlassEncodingMetaspaceMax - 1)) == 0,
!            "base " UINT64_FORMAT_X " invalid for xor mode", base_u64); // should have been handled at VM init.
!     const uint64_t base_right_shifted = base_u64 >> CompressedKlassPointers::shift();
!     mov64(dst, base_right_shifted);
!     xorq(dst, src);
!     shlq(dst, CompressedKlassPointers::shift());
!     break;
!   }
!   case KlassDecodeAdd: {
+     assert((base_u64 & (KlassAlignmentInBytes - 1)) == 0,
+            "base " UINT64_FORMAT_X " invalid for add mode", base_u64); // should have been handled at VM init.
+     const uint64_t base_right_shifted = base_u64 >> CompressedKlassPointers::shift();
+     mov64(dst, base_right_shifted);
+     addq(dst, src);
+     shlq(dst, CompressedKlassPointers::shift());
+     break;
+   }
+   default:
+     ShouldNotReachHere();
    }
  }
  
  void  MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
    assert (UseCompressedOops, "should only be used for compressed headers");
< prev index next >