< prev index next >

src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp

Print this page
@@ -222,38 +222,41 @@
    decrement(Address(rthread, JavaThread::held_monitor_count_offset()));
  
    bind(no_count);
  }
  
- void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register t1,
+ void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register box, Register t1,
                                                Register t2, Register t3) {
    assert(LockingMode == LM_LIGHTWEIGHT, "must be");
-   assert_different_registers(obj, t1, t2, t3);
+   assert_different_registers(obj, box, t1, t2, t3);
  
    // Handle inflated monitor.
    Label inflated;
    // Finish fast lock successfully. MUST branch to with flag == EQ
    Label locked;
    // Finish fast lock unsuccessfully. MUST branch to with flag == NE
    Label slow_path;
  
+   // Clear box. TODO[OMWorld]: Is this necessary? May also defer this to not write twice.
+   str(zr, Address(box, BasicLock::object_monitor_cache_offset_in_bytes()));
+ 
    if (DiagnoseSyncOnValueBasedClasses != 0) {
      load_klass(t1, obj);
      ldrw(t1, Address(t1, Klass::access_flags_offset()));
      tstw(t1, JVM_ACC_IS_VALUE_BASED_CLASS);
      br(Assembler::NE, slow_path);
    }
  
    const Register t1_mark = t1;
+   const Register t3_t = t3;
  
    { // Lightweight locking
  
      // Push lock to the lock stack and finish successfully. MUST branch to with flag == EQ
      Label push;
  
      const Register t2_top = t2;
-     const Register t3_t = t3;
  
      // Check if lock-stack is full.
      ldrw(t2_top, Address(rthread, JavaThread::lock_stack_top_offset()));
      cmpw(t2_top, (unsigned)LockStack::end_offset() - 1);
      br(Assembler::GT, slow_path);

@@ -287,30 +290,100 @@
    }
  
    { // Handle inflated monitor.
      bind(inflated);
  
-     // mark contains the tagged ObjectMonitor*.
-     const Register t1_tagged_monitor = t1_mark;
-     const uintptr_t monitor_tag = markWord::monitor_value;
-     const Register t2_owner_addr = t2;
-     const Register t3_owner = t3;
+     if (!OMUseC2Cache) {
+       // Set Flags == NE
+       cmp(zr, obj);
+       b(slow_path);
+     } else {
+ 
+       if (OMCacheHitRate) increment(Address(rthread, JavaThread::lock_lookup_offset()));
  
-     // Compute owner address.
-     lea(t2_owner_addr, Address(t1_tagged_monitor, (in_bytes(ObjectMonitor::owner_offset()) - monitor_tag)));
+       Label monitor_found;
  
-     // CAS owner (null => current thread).
-     cmpxchg(t2_owner_addr, zr, rthread, Assembler::xword, /*acquire*/ true,
-             /*release*/ false, /*weak*/ false, t3_owner);
-     br(Assembler::EQ, locked);
+       // Load cache address
+       lea(t3_t, Address(rthread, JavaThread::om_cache_oops_offset()));
  
-     // Check if recursive.
-     cmp(t3_owner, rthread);
-     br(Assembler::NE, slow_path);
+       const int num_unrolled = MIN2(OMC2UnrollCacheEntries, OMCacheSize);
+       for (int i = 0; i < num_unrolled; i++) {
+         ldr(t1, Address(t3_t));
+         cmp(obj, t1);
+         br(Assembler::EQ, monitor_found);
+         if (i + 1 != num_unrolled) {
+           increment(t3_t, in_bytes(OMCache::oop_to_oop_difference()));
+         }
+       }
+ 
+       if (num_unrolled == 0 || (OMC2UnrollCacheLookupLoopTail && num_unrolled != OMCacheSize)) {
+         if (num_unrolled != 0) {
+           // Loop after unrolling, advance iterator.
+           increment(t3_t, in_bytes(OMCache::oop_to_oop_difference()));
+         }
+ 
+         Label loop;
+ 
+         // Search for obj in cache.
+         bind(loop);
+ 
+         // Check for match.
+         ldr(t1, Address(t3_t));
+         cmp(obj, t1);
+         br(Assembler::EQ, monitor_found);
+ 
+         // Search until null encountered, guaranteed _null_sentinel at end.
+         increment(t3_t, in_bytes(OMCache::oop_to_oop_difference()));
+         cbnz(t1, loop);
+         // Cache Miss, NE set from cmp above, cbnz does not set flags
+         b(slow_path);
+       } else {
+         b(slow_path);
+       }
+ 
+       bind(monitor_found);
+       ldr(t1, Address(t3_t, OMCache::oop_to_monitor_difference()));
+       if (OMCacheHitRate) increment(Address(rthread, JavaThread::lock_hit_offset()));
+ 
+       // ObjectMonitor* is in t1
+       const Register t1_monitor = t1;
+       const Register t2_owner_addr = t2;
+       const Register t3_owner = t3;
+ 
+       Label recursive;
+       Label monitor_locked;
+ 
+       // Compute owner address.
+       lea(t2_owner_addr, Address(t1_monitor, ObjectMonitor::owner_offset()));
+ 
+       if (OMRecursiveFastPath) {
+         ldr(t3_owner, Address(t2_owner_addr));
+         cmp(t3_owner, rthread);
+         br(Assembler::EQ, recursive);
+       }
+ 
+       // CAS owner (null => current thread).
+       cmpxchg(t2_owner_addr, zr, rthread, Assembler::xword, /*acquire*/ true,
+               /*release*/ false, /*weak*/ false, t3_owner);
+       br(Assembler::EQ, monitor_locked);
+ 
+       if (OMRecursiveFastPath) {
+         b(slow_path);
+       } else {
+         // Check if recursive.
+         cmp(t3_owner, rthread);
+         br(Assembler::NE, slow_path);
+       }
+ 
+       // Recursive.
+       bind(recursive);
+       increment(Address(t1_monitor, ObjectMonitor::recursions_offset()), 1);
+ 
+       bind(monitor_locked);
+       str(t1_monitor, Address(box, BasicLock::object_monitor_cache_offset_in_bytes()));
+     }
  
-     // Recursive.
-     increment(Address(t1_tagged_monitor, in_bytes(ObjectMonitor::recursions_offset()) - monitor_tag), 1);
    }
  
    bind(locked);
    increment(Address(rthread, JavaThread::held_monitor_count_offset()));
  

@@ -329,14 +402,14 @@
    bind(flag_correct);
  #endif
    // C2 uses the value of Flags (NE vs EQ) to determine the continuation.
  }
  
- void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register t1, Register t2,
-                                                 Register t3) {
+ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register box, Register t1,
+                                                 Register t2, Register t3) {
    assert(LockingMode == LM_LIGHTWEIGHT, "must be");
-   assert_different_registers(obj, t1, t2, t3);
+   assert_different_registers(obj, box, t1, t2, t3);
  
    // Handle inflated monitor.
    Label inflated, inflated_load_monitor;
    // Finish fast unlock successfully. MUST branch to with flag == EQ
    Label unlocked;

@@ -347,10 +420,12 @@
    const Register t2_top = t2;
    const Register t3_t = t3;
  
    { // Lightweight unlock
  
+     Label push_and_slow_path;
+ 
      // Check if obj is top of lock-stack.
      ldrw(t2_top, Address(rthread, JavaThread::lock_stack_top_offset()));
      subw(t2_top, t2_top, oopSize);
      ldr(t3_t, Address(rthread, t2_top));
      cmp(obj, t3_t);

@@ -370,19 +445,23 @@
      // Not recursive.
      // Load Mark.
      ldr(t1_mark, Address(obj, oopDesc::mark_offset_in_bytes()));
  
      // Check header for monitor (0b10).
-     tbnz(t1_mark, exact_log2(markWord::monitor_value), inflated);
+     // Because we got here by popping (meaning we pushed in locked)
+     // there will be no monitor in the box. So we need to push back the obj
+     // so that the runtime can fix any potential anonymous owner.
+     tbnz(t1_mark, exact_log2(markWord::monitor_value), push_and_slow_path);
  
      // Try to unlock. Transition lock bits 0b00 => 0b01
      assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid lea");
      orr(t3_t, t1_mark, markWord::unlocked_value);
      cmpxchg(/*addr*/ obj, /*expected*/ t1_mark, /*new*/ t3_t, Assembler::xword,
              /*acquire*/ false, /*release*/ true, /*weak*/ false, noreg);
      br(Assembler::EQ, unlocked);
  
+     bind(push_and_slow_path);
      // Compare and exchange failed.
      // Restore lock-stack and handle the unlock in runtime.
      DEBUG_ONLY(str(obj, Address(rthread, t2_top));)
      addw(t2_top, t2_top, oopSize);
      str(t2_top, Address(rthread, JavaThread::lock_stack_top_offset()));

@@ -410,56 +489,63 @@
      br(Assembler::NE, inflated);
      stop("Fast Unlock lock on stack");
      bind(check_done);
  #endif
  
-     // mark contains the tagged ObjectMonitor*.
-     const Register t1_monitor = t1_mark;
-     const uintptr_t monitor_tag = markWord::monitor_value;
- 
-     // Untag the monitor.
-     sub(t1_monitor, t1_mark, monitor_tag);
- 
-     const Register t2_recursions = t2;
-     Label not_recursive;
- 
-     // Check if recursive.
-     ldr(t2_recursions, Address(t1_monitor, ObjectMonitor::recursions_offset()));
-     cbz(t2_recursions, not_recursive);
- 
-     // Recursive unlock.
-     sub(t2_recursions, t2_recursions, 1u);
-     str(t2_recursions, Address(t1_monitor, ObjectMonitor::recursions_offset()));
-     // Set flag == EQ
-     cmp(t2_recursions, t2_recursions);
-     b(unlocked);
- 
-     bind(not_recursive);
- 
-     Label release;
-     const Register t2_owner_addr = t2;
- 
-     // Compute owner address.
-     lea(t2_owner_addr, Address(t1_monitor, ObjectMonitor::owner_offset()));
- 
-     // Check if the entry lists are empty.
-     ldr(rscratch1, Address(t1_monitor, ObjectMonitor::EntryList_offset()));
-     ldr(t3_t, Address(t1_monitor, ObjectMonitor::cxq_offset()));
-     orr(rscratch1, rscratch1, t3_t);
-     cmp(rscratch1, zr);
-     br(Assembler::EQ, release);
- 
-     // The owner may be anonymous and we removed the last obj entry in
-     // the lock-stack. This loses the information about the owner.
-     // Write the thread to the owner field so the runtime knows the owner.
-     str(rthread, Address(t2_owner_addr));
-     b(slow_path);
- 
-     bind(release);
-     // Set owner to null.
-     // Release to satisfy the JMM
-     stlr(zr, t2_owner_addr);
+     if (!OMUseC2Cache) {
+       b(slow_path);
+     } else {
+       const Register t1_monitor = t1;
+ 
+       if (OMCacheHitRate) increment(Address(rthread, JavaThread::unlock_lookup_offset()));
+       ldr(t1_monitor, Address(box, BasicLock::object_monitor_cache_offset_in_bytes()));
+       // TODO: Cleanup these constants (with an enum and asserts)
+       cmp(t1_monitor, (uint8_t)2);
+       // Non symmetrical, take slow path monitor == 0 or 1, 0 and 1 < 2, both LS and NE
+       br(Assembler::LO, slow_path);
+       if (OMCacheHitRate) increment(Address(rthread, JavaThread::unlock_hit_offset()));
+ 
+       const Register t2_recursions = t2;
+       Label not_recursive;
+ 
+       // Check if recursive.
+       ldr(t2_recursions, Address(t1_monitor, ObjectMonitor::recursions_offset()));
+       cbz(t2_recursions, not_recursive);
+ 
+       // Recursive unlock.
+       sub(t2_recursions, t2_recursions, 1u);
+       str(t2_recursions, Address(t1_monitor, ObjectMonitor::recursions_offset()));
+       // Set flag == EQ
+       cmp(t2_recursions, t2_recursions);
+       b(unlocked);
+ 
+       bind(not_recursive);
+ 
+       Label release;
+       const Register t2_owner_addr = t2;
+ 
+       // Compute owner address.
+       lea(t2_owner_addr, Address(t1_monitor, ObjectMonitor::owner_offset()));
+ 
+       // Check if the entry lists are empty.
+       ldr(rscratch1, Address(t1_monitor, ObjectMonitor::EntryList_offset()));
+       ldr(t3_t, Address(t1_monitor, ObjectMonitor::cxq_offset()));
+       orr(rscratch1, rscratch1, t3_t);
+       cmp(rscratch1, zr);
+       br(Assembler::EQ, release);
+ 
+       // The owner may be anonymous and we removed the last obj entry in
+       // the lock-stack. This loses the information about the owner.
+       // Write the thread to the owner field so the runtime knows the owner.
+       str(rthread, Address(t2_owner_addr));
+       b(slow_path);
+ 
+       bind(release);
+       // Set owner to null.
+       // Release to satisfy the JMM
+       stlr(zr, t2_owner_addr);
+     }
    }
  
    bind(unlocked);
    decrement(Address(rthread, JavaThread::held_monitor_count_offset()));
  

@@ -2492,5 +2578,24 @@
        return true;
      }
    }
    return MacroAssembler::in_scratch_emit_size();
  }
+ 
+ void C2_MacroAssembler::load_nklass_compact(Register dst, Register obj, Register index, int scale, int disp) {
+   // Note: Don't clobber obj anywhere in that method!
+ 
+   // The incoming address is pointing into obj-start + klass_offset_in_bytes. We need to extract
+   // obj-start, so that we can load from the object's mark-word instead. Usually the address
+   // comes as obj-start in obj and klass_offset_in_bytes in disp. However, sometimes C2
+   // emits code that pre-computes obj-start + klass_offset_in_bytes into a register, and
+   // then passes that register as obj and 0 in disp. The following code extracts the base
+   // and offset to load the mark-word.
+   int offset = oopDesc::mark_offset_in_bytes() + disp - oopDesc::klass_offset_in_bytes();
+   if (index == noreg) {
+     ldr(dst, Address(obj, offset));
+   } else {
+     lea(dst, Address(obj, index, Address::lsl(scale)));
+     ldr(dst, Address(dst, offset));
+   }
+   lsr(dst, dst, markWord::klass_shift);
+ }
< prev index next >