< prev index next >

src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp

Print this page
@@ -409,11 +409,10 @@
    assert(tmpReg == rax, "");
    assert(scrReg == rdx, "");
    Label L_rtm_retry, L_decrement_retry, L_on_abort;
    int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
  
-   movptr(Address(boxReg, 0), checked_cast<int32_t>(markWord::unused_mark().value()));
    movptr(boxReg, tmpReg); // Save ObjectMonitor address
  
    if (RTMRetryCount > 0) {
      movl(retry_on_busy_count_Reg, RTMRetryCount);  // Retry on lock busy
      movl(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort

@@ -555,11 +554,11 @@
  // obj: object to lock
  // box: on-stack box address (displaced header location) - KILLED
  // rax,: tmp -- KILLED
  // scr: tmp -- KILLED
  void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg,
-                                  Register scrReg, Register cx1Reg, Register cx2Reg,
+                                  Register scrReg, Register cx1Reg, Register cx2Reg, Register thread,
                                   RTMLockingCounters* rtm_counters,
                                   RTMLockingCounters* stack_rtm_counters,
                                   Metadata* method_data,
                                   bool use_rtm, bool profile_rtm) {
    // Ensure the register assignments are disjoint

@@ -567,11 +566,11 @@
  
    if (use_rtm) {
      assert_different_registers(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg);
    } else {
      assert(cx2Reg == noreg, "");
-     assert_different_registers(objReg, boxReg, tmpReg, scrReg);
+     assert_different_registers(objReg, boxReg, tmpReg, scrReg, cx1Reg);
    }
  
    // Possible cases that we'll encounter in fast_lock
    // ------------------------------------------------
    // * Inflated

@@ -585,11 +584,11 @@
    //       = sp-proximity test hits
    //       = sp-proximity test generates false-negative
    //    -- by other
    //
  
-   Label IsInflated, DONE_LABEL, NO_COUNT, COUNT;
+   Label IsInflated, DONE_LABEL, slow_path, NO_COUNT, COUNT;
  
    if (DiagnoseSyncOnValueBasedClasses != 0) {
      load_klass(tmpReg, objReg, cx1Reg);
      movl(tmpReg, Address(tmpReg, Klass::access_flags_offset()));
      testl(tmpReg, JVM_ACC_IS_VALUE_BASED_CLASS);

@@ -608,28 +607,17 @@
    movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes()));          // [FETCH]
    testptr(tmpReg, markWord::monitor_value); // inflated vs stack-locked|neutral
    jccb(Assembler::notZero, IsInflated);
  
    if (!UseHeavyMonitors) {
-     // Attempt stack-locking ...
-     orptr (tmpReg, markWord::unlocked_value);
-     movptr(Address(boxReg, 0), tmpReg);          // Anticipate successful CAS
-     lock();
-     cmpxchgptr(boxReg, Address(objReg, oopDesc::mark_offset_in_bytes()));      // Updates tmpReg
-     jcc(Assembler::equal, COUNT);           // Success
- 
-     // Recursive locking.
-     // The object is stack-locked: markword contains stack pointer to BasicLock.
-     // Locked by current thread if difference with current SP is less than one page.
-     subptr(tmpReg, rsp);
-     // Next instruction set ZFlag == 1 (Success) if difference is less then one page.
-     andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) );
-     movptr(Address(boxReg, 0), tmpReg);
-   } else {
-     // Clear ZF so that we take the slow path at the DONE label. objReg is known to be not 0.
-     testptr(objReg, objReg);
+     fast_lock_impl(objReg, tmpReg, thread, scrReg, cx1Reg, slow_path);
+     xorptr(rax, rax); // Set ZF = 1 (success)
+     jmp(COUNT);
    }
+   bind(slow_path);
+   // Clear ZF so that we take the slow path at the DONE label. objReg is known to be not 0.
+   testptr(objReg, objReg);
    jmp(DONE_LABEL);
  
    bind(IsInflated);
    // The object is inflated. tmpReg contains pointer to ObjectMonitor* + markWord::monitor_value
  

@@ -665,18 +653,14 @@
    // rsp or the address of the box (in scr) into &m->owner.  If the CAS succeeds
    // we later store "Self" into m->Owner.  Transiently storing a stack address
    // (rsp or the address of the box) into  m->owner is harmless.
    // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
    lock();
-   cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
-   movptr(Address(scrReg, 0), 3);          // box->_displaced_header = 3
-   // If we weren't able to swing _owner from NULL to the BasicLock
+   cmpxchgptr(thread, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
+   // If we weren't able to swing _owner from NULL to the thread
    // then take the slow path.
    jccb  (Assembler::notZero, NO_COUNT);
-   // update _owner from BasicLock to thread
-   get_thread (scrReg);                    // beware: clobbers ICCs
-   movptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), scrReg);
    xorptr(boxReg, boxReg);                 // set icc.ZFlag = 1 to indicate success
  
    // If the CAS fails we can either retry or pass control to the slow path.
    // We use the latter tactic.
    // Pass the CAS result in the icc.ZFlag into DONE_LABEL

@@ -687,18 +671,15 @@
  #else // _LP64
    // It's inflated and we use scrReg for ObjectMonitor* in this section.
    movq(scrReg, tmpReg);
    xorq(tmpReg, tmpReg);
    lock();
-   cmpxchgptr(r15_thread, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
-   // Unconditionally set box->_displaced_header = markWord::unused_mark().
-   // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
-   movptr(Address(boxReg, 0), checked_cast<int32_t>(markWord::unused_mark().value()));
+   cmpxchgptr(thread, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
    // Propagate ICC.ZF from CAS above into DONE_LABEL.
    jccb(Assembler::equal, COUNT);          // CAS above succeeded; propagate ZF = 1 (success)
  
-   cmpptr(r15_thread, rax);                // Check if we are already the owner (recursive lock)
+   cmpptr(thread, rax);                     // Check if we are already the owner (recursive lock)
    jccb(Assembler::notEqual, NO_COUNT);    // If not recursive, ZF = 0 at this point (fail)
    incq(Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
    xorq(rax, rax); // Set ZF = 1 (success) for recursive lock, denoting locking success
  #endif // _LP64
  #if INCLUDE_RTM_OPT

@@ -784,30 +765,34 @@
      jmp(DONE_LABEL);                                                  // ... and we're done
      bind(L_regular_unlock);
    }
  #endif
  
+   movptr(boxReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Examine the object's markword
    if (!UseHeavyMonitors) {
-     cmpptr(Address(boxReg, 0), NULL_WORD);                            // Examine the displaced header
-     jcc   (Assembler::zero, COUNT);                                   // 0 indicates recursive stack-lock
-   }
-   movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes()));   // Examine the object's markword
-   if (!UseHeavyMonitors) {
-     testptr(tmpReg, markWord::monitor_value);                         // Inflated?
-     jccb   (Assembler::zero, Stacked);
+     testptr(boxReg, markWord::monitor_value);
+     jcc(Assembler::zero, Stacked);
+ 
+     // If the owner is ANONYMOUS, we need to fix it - in the slow-path.
+     Label L;
+     cmpptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t) (intptr_t) ANONYMOUS_OWNER);
+     jccb(Assembler::notEqual, L);
+     testptr(objReg, objReg); // Clear ZF to indicate failure at DONE_LABEL.
+     jmp(DONE_LABEL);
+     bind(L);
    }
  
    // It's inflated.
  #if INCLUDE_RTM_OPT
    if (use_rtm) {
      Label L_regular_inflated_unlock;
      int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
-     movptr(boxReg, Address(tmpReg, owner_offset));
-     testptr(boxReg, boxReg);
+     movptr(tmpReg, Address(boxReg, owner_offset));
+     testptr(tmpReg, tmpReg);
      jccb(Assembler::notZero, L_regular_inflated_unlock);
      xend();
-     jmpb(DONE_LABEL);
+     jmp(DONE_LABEL);
      bind(L_regular_inflated_unlock);
    }
  #endif
  
    // Despite our balanced locking property we still check that m->_owner == Self

@@ -827,87 +812,69 @@
    // the lock and observe the fields protected by the lock).
    // IA32's memory-model is SPO, so STs are ordered with respect to
    // each other and there's no need for an explicit barrier (fence).
    // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
  #ifndef _LP64
-   get_thread (boxReg);
- 
    // Note that we could employ various encoding schemes to reduce
    // the number of loads below (currently 4) to just 2 or 3.
    // Refer to the comments in synchronizer.cpp.
    // In practice the chain of fetches doesn't seem to impact performance, however.
-   xorptr(boxReg, boxReg);
-   orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
+   xorptr(tmpReg, tmpReg);
+   orptr(tmpReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
    jccb  (Assembler::notZero, DONE_LABEL);
-   movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
-   orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
-   jccb  (Assembler::notZero, CheckSucc);
-   movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
+   movptr(tmpReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
+   orptr(tmpReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
+   jccb  (Assembler::notZero, DONE_LABEL);
+   movptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
    jmpb  (DONE_LABEL);
- 
-   bind (Stacked);
-   // It's not inflated and it's not recursively stack-locked.
-   // It must be stack-locked.
-   // Try to reset the header to displaced header.
-   // The "box" value on the stack is stable, so we can reload
-   // and be assured we observe the same value as above.
-   movptr(tmpReg, Address(boxReg, 0));
-   lock();
-   cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
-   // Intention fall-thru into DONE_LABEL
- 
-   // DONE_LABEL is a hot target - we'd really like to place it at the
-   // start of cache line by padding with NOPs.
-   // See the AMD and Intel software optimization manuals for the
-   // most efficient "long" NOP encodings.
-   // Unfortunately none of our alignment mechanisms suffice.
-   bind (CheckSucc);
  #else // _LP64
    // It's inflated
    Label LNotRecursive, LSuccess, LGoSlowPath;
  
-   cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), 0);
+   cmpptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), 0);
    jccb(Assembler::equal, LNotRecursive);
  
    // Recursive inflated unlock
-   decq(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
+   decq(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
    jmpb(LSuccess);
  
    bind(LNotRecursive);
-   movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
-   orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
+   movptr(tmpReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
+   orptr(tmpReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
    jccb  (Assembler::notZero, CheckSucc);
    // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
-   movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
+   movptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
    jmpb  (DONE_LABEL);
  
    // Try to avoid passing control into the slow_path ...
    bind  (CheckSucc);
  
    // The following optional optimization can be elided if necessary
    // Effectively: if (succ == null) goto slow path
    // The code reduces the window for a race, however,
    // and thus benefits performance.
-   cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), NULL_WORD);
+   cmpptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
    jccb  (Assembler::zero, LGoSlowPath);
  
-   xorptr(boxReg, boxReg);
    // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
-   movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
+   movptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
  
    // Memory barrier/fence
    // Dekker pivot point -- fulcrum : ST Owner; MEMBAR; LD Succ
    // Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack.
    // This is faster on Nehalem and AMD Shanghai/Barcelona.
    // See https://blogs.oracle.com/dave/entry/instruction_selection_for_volatile_fences
    // We might also restructure (ST Owner=0;barrier;LD _Succ) to
    // (mov box,0; xchgq box, &m->Owner; LD _succ) .
    lock(); addl(Address(rsp, 0), 0);
  
-   cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), NULL_WORD);
+   cmpptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
    jccb  (Assembler::notZero, LSuccess);
  
+   mov(tmpReg, boxReg);
+   xorptr(boxReg, boxReg);
+ 
    // Rare inopportune interleaving - race.
    // The successor vanished in the small window above.
    // The lock is contended -- (cxq|EntryList) != null -- and there's no apparent successor.
    // We need to ensure progress and succession.
    // Try to reacquire the lock.

@@ -934,17 +901,17 @@
  
    bind  (LSuccess);
    testl (boxReg, 0);                      // set ICC.ZF=1 to indicate success
    jmpb  (DONE_LABEL);
  
+ #endif
    if (!UseHeavyMonitors) {
-     bind  (Stacked);
-     movptr(tmpReg, Address (boxReg, 0));      // re-fetch
-     lock();
-     cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
+     bind(Stacked);
+     // Mark-word must be 00 now, try to swing it back to 01 (unlocked)
+     fast_unlock_impl(objReg, boxReg, tmpReg, DONE_LABEL);
+     xorptr(rax, rax); // Set ZF = 1 (success)
    }
- #endif
    bind(DONE_LABEL);
  
    // ZFlag == 1 count in fast path
    // ZFlag == 0 count in slow path
    jccb(Assembler::notZero, NO_COUNT);
< prev index next >