< prev index next >

src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp

Print this page
@@ -24,17 +24,23 @@
  
  #include "precompiled.hpp"
  #include "asm/assembler.hpp"
  #include "asm/assembler.inline.hpp"
  #include "oops/methodData.hpp"
+ #include "opto/c2_CodeStubs.hpp"
  #include "opto/c2_MacroAssembler.hpp"
  #include "opto/intrinsicnode.hpp"
  #include "opto/opcodes.hpp"
+ #include "opto/output.hpp"
  #include "opto/subnode.hpp"
  #include "runtime/biasedLocking.hpp"
+ #include "runtime/globals.hpp"
  #include "runtime/objectMonitor.hpp"
  #include "runtime/stubRoutines.hpp"
+ #include "utilities/globalDefinitions.hpp"
+ #include "utilities/powerOfTwo.hpp"
+ #include "utilities/sizes.hpp"
  
  inline Assembler::AvxVectorLen C2_MacroAssembler::vector_length_encoding(int vlen_in_bytes) {
    switch (vlen_in_bytes) {
      case  4: // fall-through
      case  8: // fall-through

@@ -444,16 +450,17 @@
  // obj: object to lock
  // box: on-stack box address (displaced header location) - KILLED
  // rax,: tmp -- KILLED
  // scr: tmp -- KILLED
  void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg,
-                                  Register scrReg, Register cx1Reg, Register cx2Reg,
+                                  Register scrReg, Register cx1Reg, Register cx2Reg, Register thread,
                                   BiasedLockingCounters* counters,
                                   RTMLockingCounters* rtm_counters,
                                   RTMLockingCounters* stack_rtm_counters,
                                   Metadata* method_data,
                                   bool use_rtm, bool profile_rtm) {
+   assert(LockingMode != LM_LIGHTWEIGHT, "lightweight locking should use fast_lock_lightweight");
    // Ensure the register assignments are disjoint
    assert(tmpReg == rax, "");
  
    if (use_rtm) {
      assert_different_registers(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg);

@@ -511,33 +518,39 @@
    }
  #endif // INCLUDE_RTM_OPT
  
    movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes()));          // [FETCH]
    testptr(tmpReg, markWord::monitor_value); // inflated vs stack-locked|neutral|biased
-   jccb(Assembler::notZero, IsInflated);
+   jcc(Assembler::notZero, IsInflated);
  
-   // Attempt stack-locking ...
-   orptr (tmpReg, markWord::unlocked_value);
-   movptr(Address(boxReg, 0), tmpReg);          // Anticipate successful CAS
-   lock();
-   cmpxchgptr(boxReg, Address(objReg, oopDesc::mark_offset_in_bytes()));      // Updates tmpReg
-   if (counters != NULL) {
-     cond_inc32(Assembler::equal,
-                ExternalAddress((address)counters->fast_path_entry_count_addr()));
-   }
-   jcc(Assembler::equal, DONE_LABEL);           // Success
- 
-   // Recursive locking.
-   // The object is stack-locked: markword contains stack pointer to BasicLock.
-   // Locked by current thread if difference with current SP is less than one page.
-   subptr(tmpReg, rsp);
-   // Next instruction set ZFlag == 1 (Success) if difference is less then one page.
-   andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) );
-   movptr(Address(boxReg, 0), tmpReg);
-   if (counters != NULL) {
-     cond_inc32(Assembler::equal,
-                ExternalAddress((address)counters->fast_path_entry_count_addr()));
+   if (LockingMode == LM_MONITOR) {
+     // Clear ZF so that we take the slow path at the DONE label. objReg is known to be not 0.
+     testptr(objReg, objReg);
+   } else {
+     assert(LockingMode == LM_LEGACY, "must be");
+     // Attempt stack-locking ...
+     orptr (tmpReg, markWord::unlocked_value);
+     movptr(Address(boxReg, 0), tmpReg);          // Anticipate successful CAS
+     lock();
+     cmpxchgptr(boxReg, Address(objReg, oopDesc::mark_offset_in_bytes()));      // Updates tmpReg
+     if (counters != NULL) {
+       cond_inc32(Assembler::equal,
+                  ExternalAddress((address)counters->fast_path_entry_count_addr()));
+     }
+     jcc(Assembler::equal, DONE_LABEL);           // Success
+ 
+     // Recursive locking.
+     // The object is stack-locked: markword contains stack pointer to BasicLock.
+     // Locked by current thread if difference with current SP is less than one page.
+     subptr(tmpReg, rsp);
+     // Next instruction set ZFlag == 1 (Success) if difference is less then one page.
+     andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) );
+     movptr(Address(boxReg, 0), tmpReg);
+     if (counters != NULL) {
+       cond_inc32(Assembler::equal,
+                  ExternalAddress((address)counters->fast_path_entry_count_addr()));
+     }
    }
    jmp(DONE_LABEL);
  
    bind(IsInflated);
    // The object is inflated. tmpReg contains pointer to ObjectMonitor* + markWord::monitor_value

@@ -657,10 +670,11 @@
  // In the interest of performance we elide m->Owner==Self check in unlock.
  // A perfectly viable alternative is to elide the owner check except when
  // Xcheck:jni is enabled.
  
  void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg, bool use_rtm) {
+   assert(LockingMode != LM_LIGHTWEIGHT, "lightweight locking should use fast_unlock_lightweight");
    assert(boxReg == rax, "");
    assert_different_registers(objReg, boxReg, tmpReg);
  
    Label DONE_LABEL, Stacked, CheckSucc;
  

@@ -682,26 +696,31 @@
      jmp(DONE_LABEL);                                                  // ... and we're done
      bind(L_regular_unlock);
    }
  #endif
  
-   cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD);                   // Examine the displaced header
-   jcc   (Assembler::zero, DONE_LABEL);                              // 0 indicates recursive stack-lock
+   if (LockingMode == LM_LEGACY) {
+     cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD);                   // Examine the displaced header
+     jcc   (Assembler::zero, DONE_LABEL);                              // 0 indicates recursive stack-lock
+   }
    movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Examine the object's markword
-   testptr(tmpReg, markWord::monitor_value);                         // Inflated?
-   jccb  (Assembler::zero, Stacked);
+   if (LockingMode != LM_MONITOR) {
+     testptr(tmpReg, markWord::monitor_value);                         // Inflated?
+     jcc(Assembler::zero, Stacked);
+   }
  
    // It's inflated.
+ 
  #if INCLUDE_RTM_OPT
    if (use_rtm) {
      Label L_regular_inflated_unlock;
      int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
      movptr(boxReg, Address(tmpReg, owner_offset));
      testptr(boxReg, boxReg);
      jccb(Assembler::notZero, L_regular_inflated_unlock);
      xend();
-     jmpb(DONE_LABEL);
+     jmp(DONE_LABEL);
      bind(L_regular_inflated_unlock);
    }
  #endif
  
    // Despite our balanced locking property we still check that m->_owner == Self

@@ -732,23 +751,14 @@
    xorptr(boxReg, boxReg);
    orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
    jccb  (Assembler::notZero, DONE_LABEL);
    movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
    orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
-   jccb  (Assembler::notZero, CheckSucc);
+   jccb  (Assembler::notZero, DONE_LABEL);
    movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
    jmpb  (DONE_LABEL);
  
-   bind (Stacked);
-   // It's not inflated and it's not recursively stack-locked and it's not biased.
-   // It must be stack-locked.
-   // Try to reset the header to displaced header.
-   // The "box" value on the stack is stable, so we can reload
-   // and be assured we observe the same value as above.
-   movptr(tmpReg, Address(boxReg, 0));
-   lock();
-   cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
    // Intention fall-thru into DONE_LABEL
  
    // DONE_LABEL is a hot target - we'd really like to place it at the
    // start of cache line by padding with NOPs.
    // See the AMD and Intel software optimization manuals for the

@@ -828,19 +838,259 @@
  
    bind  (LSuccess);
    testl (boxReg, 0);                      // set ICC.ZF=1 to indicate success
    jmpb  (DONE_LABEL);
  
-   bind  (Stacked);
-   movptr(tmpReg, Address (boxReg, 0));      // re-fetch
-   lock();
-   cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
- 
  #endif
+   if (LockingMode == LM_LEGACY) {
+     bind  (Stacked);
+     movptr(tmpReg, Address (boxReg, 0));      // re-fetch
+     lock();
+     cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
+     // Intentional fall-thru into DONE_LABEL
+   }
+ 
    bind(DONE_LABEL);
  }
  
+ void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register box, Register rax_reg,
+                                               Register t, Register thread) {
+   assert(LockingMode == LM_LIGHTWEIGHT, "must be");
+   assert(rax_reg == rax, "Used for CAS");
+   assert_different_registers(obj, box, rax_reg, t, thread);
+ 
+   // Handle inflated monitor.
+   Label inflated;
+   // Finish fast lock successfully. ZF value is irrelevant.
+   Label locked;
+   // Finish fast lock unsuccessfully. MUST jump with ZF == 0
+   Label slow_path;
+ 
+   if (DiagnoseSyncOnValueBasedClasses != 0) {
+     load_klass(rax_reg, obj, t);
+     movl(rax_reg, Address(rax_reg, Klass::access_flags_offset()));
+     testl(rax_reg, JVM_ACC_IS_VALUE_BASED_CLASS);
+     jcc(Assembler::notZero, slow_path);
+   }
+ 
+   const Register mark = t;
+ 
+   { // Lightweight Lock
+ 
+     Label push;
+ 
+     const Register top = box;
+ 
+     // Load the mark.
+     movptr(mark, Address(obj, oopDesc::mark_offset_in_bytes()));
+ 
+     // Prefetch top.
+     movl(top, Address(thread, JavaThread::lock_stack_top_offset()));
+ 
+     // Check for monitor (0b10).
+     testptr(mark, markWord::monitor_value);
+     jcc(Assembler::notZero, inflated);
+ 
+     // Check if lock-stack is full.
+     cmpl(top, LockStack::end_offset() - 1);
+     jcc(Assembler::greater, slow_path);
+ 
+     // Check if recursive.
+     cmpptr(obj, Address(thread, top, Address::times_1, -oopSize));
+     jccb(Assembler::equal, push);
+ 
+     // Try to lock. Transition lock bits 0b01 => 0b00
+     movptr(rax_reg, mark);
+     orptr(rax_reg, markWord::unlocked_value);
+     andptr(mark, ~(int32_t)markWord::unlocked_value);
+     lock(); cmpxchgptr(mark, Address(obj, oopDesc::mark_offset_in_bytes()));
+     jcc(Assembler::notEqual, slow_path);
+ 
+     bind(push);
+     // After successful lock, push object on lock-stack.
+     movptr(Address(thread, top), obj);
+     addl(Address(thread, JavaThread::lock_stack_top_offset()), oopSize);
+     jmpb(locked);
+   }
+ 
+   { // Handle inflated monitor.
+     bind(inflated);
+ 
+     const Register tagged_monitor = mark;
+ 
+     // CAS owner (null => current thread).
+     xorptr(rax_reg, rax_reg);
+     lock(); cmpxchgptr(thread, Address(tagged_monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
+     jccb(Assembler::equal, locked);
+ 
+     // Check if recursive.
+     cmpptr(thread, rax_reg);
+     jccb(Assembler::notEqual, slow_path);
+ 
+     // Recursive.
+     increment(Address(tagged_monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
+   }
+ 
+   bind(locked);
+   // Set ZF = 1
+   xorl(rax_reg, rax_reg);
+ 
+ #ifdef ASSERT
+   // Check that locked label is reached with ZF set.
+   Label zf_correct;
+   jccb(Assembler::zero, zf_correct);
+   stop("Fast Lock ZF != 1");
+ #endif
+ 
+   bind(slow_path);
+ #ifdef ASSERT
+   // Check that slow_path label is reached with ZF not set.
+   jccb(Assembler::notZero, zf_correct);
+   stop("Fast Lock ZF != 0");
+   bind(zf_correct);
+ #endif
+   // C2 uses the value of ZF to determine the continuation.
+ }
+ 
+ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax, Register t, Register thread) {
+   assert(LockingMode == LM_LIGHTWEIGHT, "must be");
+   assert(reg_rax == rax, "Used for CAS");
+   assert_different_registers(obj, reg_rax, t);
+ 
+   // Handle inflated monitor.
+   Label inflated, inflated_check_lock_stack;
+   // Finish fast unlock successfully.  MUST jump with ZF == 1
+   Label unlocked;
+ 
+   const Register mark = t;
+   const Register top = reg_rax;
+ 
+   Label dummy;
+   C2FastUnlockLightweightStub* stub = nullptr;
+ 
+   if (!Compile::current()->output()->in_scratch_emit_size()) {
+     stub = new (Compile::current()->comp_arena()) C2FastUnlockLightweightStub(obj, mark, reg_rax, thread);
+     Compile::current()->output()->add_stub(stub);
+   }
+ 
+   Label& push_and_slow_path = stub == nullptr ? dummy : stub->push_and_slow_path();
+   Label& check_successor = stub == nullptr ? dummy : stub->check_successor();
+ 
+   { // Lightweight Unlock
+ 
+     // Load top.
+     movl(top, Address(thread, JavaThread::lock_stack_top_offset()));
+ 
+     // Prefetch mark.
+     movptr(mark, Address(obj, oopDesc::mark_offset_in_bytes()));
+ 
+     // Check if obj is top of lock-stack.
+     cmpptr(obj, Address(thread, top, Address::times_1, -oopSize));
+     // Top of lock stack was not obj. Must be monitor.
+     jcc(Assembler::notEqual, inflated_check_lock_stack);
+ 
+     // Pop lock-stack.
+     DEBUG_ONLY(movptr(Address(thread, top, Address::times_1, -oopSize), 0);)
+     subl(Address(thread, JavaThread::lock_stack_top_offset()), oopSize);
+ 
+     // Check if recursive.
+     cmpptr(obj, Address(thread, top, Address::times_1, -2 * oopSize));
+     jcc(Assembler::equal, unlocked);
+ 
+     // We elide the monitor check, let the CAS fail instead.
+ 
+     // Try to unlock. Transition lock bits 0b00 => 0b01
+     movptr(reg_rax, mark);
+     andptr(reg_rax, ~(int32_t)markWord::lock_mask);
+     orptr(mark, markWord::unlocked_value);
+     lock(); cmpxchgptr(mark, Address(obj, oopDesc::mark_offset_in_bytes()));
+     jcc(Assembler::notEqual, push_and_slow_path);
+     jmp(unlocked);
+   }
+ 
+ 
+   { // Handle inflated monitor.
+     bind(inflated_check_lock_stack);
+ #ifdef ASSERT
+     Label check_done;
+     subl(top, oopSize);
+     cmpl(top, in_bytes(JavaThread::lock_stack_base_offset()));
+     jcc(Assembler::below, check_done);
+     cmpptr(obj, Address(thread, top));
+     jccb(Assembler::notEqual, inflated_check_lock_stack);
+     stop("Fast Unlock lock on stack");
+     bind(check_done);
+     testptr(mark, markWord::monitor_value);
+     jccb(Assembler::notZero, inflated);
+     stop("Fast Unlock not monitor");
+ #endif
+ 
+     bind(inflated);
+ 
+     // mark contains the tagged ObjectMonitor*.
+     const Register monitor = mark;
+ 
+ #ifndef _LP64
+     // Check if recursive.
+     xorptr(reg_rax, reg_rax);
+     orptr(reg_rax, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
+     jcc(Assembler::notZero, check_successor);
+ 
+     // Check if the entry lists are empty.
+     movptr(reg_rax, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
+     orptr(reg_rax, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
+     jcc(Assembler::notZero, check_successor);
+ 
+     // Release lock.
+     movptr(Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
+ #else // _LP64
+     Label recursive;
+ 
+     // Check if recursive.
+     cmpptr(Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), 0);
+     jccb(Assembler::notEqual, recursive);
+ 
+     // Check if the entry lists are empty.
+     movptr(reg_rax, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
+     orptr(reg_rax, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
+     jcc(Assembler::notZero, check_successor);
+ 
+     // Release lock.
+     movptr(Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
+     jmpb(unlocked);
+ 
+     // Recursive unlock.
+     bind(recursive);
+     decrement(Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
+     xorl(t, t);
+ #endif
+   }
+ 
+   bind(unlocked);
+   if (stub != nullptr) {
+     bind(stub->unlocked_continuation());
+   }
+ 
+ #ifdef ASSERT
+   // Check that unlocked label is reached with ZF set.
+   Label zf_correct;
+   jccb(Assembler::zero, zf_correct);
+   stop("Fast Unlock ZF != 1");
+ #endif
+ 
+   if (stub != nullptr) {
+     bind(stub->slow_path_continuation());
+   }
+ #ifdef ASSERT
+   // Check that stub->continuation() label is reached with ZF not set.
+   jccb(Assembler::notZero, zf_correct);
+   stop("Fast Unlock ZF != 0");
+   bind(zf_correct);
+ #endif
+   // C2 uses the value of ZF to determine the continuation.
+ }
+ 
  //-------------------------------------------------------------------------------------------
  // Generic instructions support for use in .ad files C2 code generation
  
  void C2_MacroAssembler::vabsnegd(int opcode, XMMRegister dst, XMMRegister src, Register scr) {
    if (dst != src) {

@@ -3972,5 +4222,27 @@
    vpsllq(xtmp2, xtmp2, 0x1, vlen_enc);
    evpcmpb(ktmp, ktmp, shuffle,  xtmp2, Assembler::lt, true, vlen_enc);
    evshufi64x2(xtmp3, src, src, 0xFF, vlen_enc);
    evpshufb(dst, ktmp, xtmp3, shuffle, true, vlen_enc);
  }
+ 
+ #ifdef _LP64
+ void C2_MacroAssembler::load_nklass_compact_c2(Register dst, Register obj, Register index, Address::ScaleFactor scale, int disp) {
+   C2LoadNKlassStub* stub = new (Compile::current()->comp_arena()) C2LoadNKlassStub(dst);
+   Compile::current()->output()->add_stub(stub);
+ 
+   // Note: Don't clobber obj anywhere in that method!
+ 
+   // The incoming address is pointing into obj-start + klass_offset_in_bytes. We need to extract
+   // obj-start, so that we can load from the object's mark-word instead. Usually the address
+   // comes as obj-start in obj and klass_offset_in_bytes in disp. However, sometimes C2
+   // emits code that pre-computes obj-start + klass_offset_in_bytes into a register, and
+   // then passes that register as obj and 0 in disp. The following code extracts the base
+   // and offset to load the mark-word.
+   int offset = oopDesc::mark_offset_in_bytes() + disp - oopDesc::klass_offset_in_bytes();
+   movq(dst, Address(obj, index, scale, offset));
+   testb(dst, markWord::monitor_value);
+   jcc(Assembler::notZero, stub->entry());
+   bind(stub->continuation());
+   shrq(dst, markWord::klass_shift);
+ }
+ #endif
< prev index next >