Udiff src/hotspot/cpu/aarch64/interp_masm

src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp

@@ -1,7 +1,7 @@
  /*
-  * Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved.
+  * Copyright (c) 2003, 2024, Oracle and/or its affiliates. All rights reserved.
   * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
   * under the terms of the GNU General Public License version 2 only, as

@@ -719,25 +719,27 @@
  // Args:
  //      c_rarg1: BasicObjectLock to be used for locking
  //
  // Kills:
  //      r0
- //      c_rarg0, c_rarg1, c_rarg2, c_rarg3, .. (param regs)
+ //      c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, .. (param regs)
  //      rscratch1, rscratch2 (scratch regs)
  void InterpreterMacroAssembler::lock_object(Register lock_reg)
  {
    assert(lock_reg == c_rarg1, "The argument is only for looks. It must be c_rarg1");
-   if (UseHeavyMonitors) {
+   if (LockingMode == LM_MONITOR) {
      call_VM(noreg,
              CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
              lock_reg);
    } else {
      Label done;
  
      const Register swap_reg = r0;
      const Register tmp = c_rarg2;
      const Register obj_reg = c_rarg3; // Will contain the oop
+     const Register tmp2 = c_rarg4;
+     const Register tmp3 = c_rarg5;
  
      const int obj_offset = BasicObjectLock::obj_offset_in_bytes();
      const int lock_offset = BasicObjectLock::lock_offset_in_bytes ();
      const int mark_offset = lock_offset +
                              BasicLock::displaced_header_offset_in_bytes();

@@ -752,86 +754,96 @@
        ldrw(tmp, Address(tmp, Klass::access_flags_offset()));
        tstw(tmp, JVM_ACC_IS_VALUE_BASED_CLASS);
        br(Assembler::NE, slow_case);
      }
  
-     if (UseBiasedLocking) {
-       biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, done, &slow_case);
-     }
- 
-     // Load (object->mark() | 1) into swap_reg
-     ldr(rscratch1, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
-     orr(swap_reg, rscratch1, 1);
- 
-     // Save (object->mark() | 1) into BasicLock's displaced header
-     str(swap_reg, Address(lock_reg, mark_offset));
- 
-     assert(lock_offset == 0,
-            "displached header must be first word in BasicObjectLock");
- 
-     Label fail;
-     if (PrintBiasedLockingStatistics) {
-       Label fast;
-       cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, rscratch1, fast, &fail);
-       bind(fast);
-       atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()),
-                   rscratch2, rscratch1, tmp);
+     if (LockingMode == LM_LIGHTWEIGHT) {
+       lightweight_lock(obj_reg, tmp, tmp2, tmp3, slow_case);
        b(done);
-       bind(fail);
      } else {
-       cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, rscratch1, done, /*fallthrough*/NULL);
-     }
+       if (UseBiasedLocking) {
+         biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, done, &slow_case);
+       }
  
-     // Fast check for recursive lock.
-     //
-     // Can apply the optimization only if this is a stack lock
-     // allocated in this thread. For efficiency, we can focus on
-     // recently allocated stack locks (instead of reading the stack
-     // base and checking whether 'mark' points inside the current
-     // thread stack):
-     //  1) (mark & 7) == 0, and
-     //  2) sp <= mark < mark + os::pagesize()
-     //
-     // Warning: sp + os::pagesize can overflow the stack base. We must
-     // neither apply the optimization for an inflated lock allocated
-     // just above the thread stack (this is why condition 1 matters)
-     // nor apply the optimization if the stack lock is inside the stack
-     // of another thread. The latter is avoided even in case of overflow
-     // because we have guard pages at the end of all stacks. Hence, if
-     // we go over the stack base and hit the stack of another thread,
-     // this should not be in a writeable area that could contain a
-     // stack lock allocated by that thread. As a consequence, a stack
-     // lock less than page size away from sp is guaranteed to be
-     // owned by the current thread.
-     //
-     // These 3 tests can be done by evaluating the following
-     // expression: ((mark - sp) & (7 - os::vm_page_size())),
-     // assuming both stack pointer and pagesize have their
-     // least significant 3 bits clear.
-     // NOTE: the mark is in swap_reg %r0 as the result of cmpxchg
-     // NOTE2: aarch64 does not like to subtract sp from rn so take a
-     // copy
-     mov(rscratch1, sp);
-     sub(swap_reg, swap_reg, rscratch1);
-     ands(swap_reg, swap_reg, (uint64_t)(7 - os::vm_page_size()));
- 
-     // Save the test result, for recursive case, the result is zero
-     str(swap_reg, Address(lock_reg, mark_offset));
- 
-     if (PrintBiasedLockingStatistics) {
-       br(Assembler::NE, slow_case);
-       atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()),
-                   rscratch2, rscratch1, tmp);
-     }
-     br(Assembler::EQ, done);
+       // Load (object->mark() | 1) into swap_reg
+       ldr(rscratch1, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
+       orr(swap_reg, rscratch1, 1);
+ 
+       // Save (object->mark() | 1) into BasicLock's displaced header
+       str(swap_reg, Address(lock_reg, mark_offset));
+ 
+       assert(lock_offset == 0,
+              "displached header must be first word in BasicObjectLock");
+ 
+       Label fail;
+       if (PrintBiasedLockingStatistics) {
+         Label fast;
+         cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, rscratch1, fast, &fail);
+         bind(fast);
+         atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()),
+                     rscratch2, rscratch1, tmp);
+         b(done);
+         bind(fail);
+       } else {
+         cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, rscratch1, done, /*fallthrough*/NULL);
+       }
  
+       // Fast check for recursive lock.
+       //
+       // Can apply the optimization only if this is a stack lock
+       // allocated in this thread. For efficiency, we can focus on
+       // recently allocated stack locks (instead of reading the stack
+       // base and checking whether 'mark' points inside the current
+       // thread stack):
+       //  1) (mark & 7) == 0, and
+       //  2) sp <= mark < mark + os::pagesize()
+       //
+       // Warning: sp + os::pagesize can overflow the stack base. We must
+       // neither apply the optimization for an inflated lock allocated
+       // just above the thread stack (this is why condition 1 matters)
+       // nor apply the optimization if the stack lock is inside the stack
+       // of another thread. The latter is avoided even in case of overflow
+       // because we have guard pages at the end of all stacks. Hence, if
+       // we go over the stack base and hit the stack of another thread,
+       // this should not be in a writeable area that could contain a
+       // stack lock allocated by that thread. As a consequence, a stack
+       // lock less than page size away from sp is guaranteed to be
+       // owned by the current thread.
+       //
+       // These 3 tests can be done by evaluating the following
+       // expression: ((mark - sp) & (7 - os::vm_page_size())),
+       // assuming both stack pointer and pagesize have their
+       // least significant 3 bits clear.
+       // NOTE: the mark is in swap_reg %r0 as the result of cmpxchg
+       // NOTE2: aarch64 does not like to subtract sp from rn so take a
+       // copy
+       mov(rscratch1, sp);
+       sub(swap_reg, swap_reg, rscratch1);
+       ands(swap_reg, swap_reg, (uint64_t)(7 - os::vm_page_size()));
+ 
+       // Save the test result, for recursive case, the result is zero
+       str(swap_reg, Address(lock_reg, mark_offset));
+ 
+       if (PrintBiasedLockingStatistics) {
+         br(Assembler::NE, slow_case);
+         atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()),
+                     rscratch2, rscratch1, tmp);
+       }
+       br(Assembler::EQ, done);
+     }
      bind(slow_case);
  
      // Call the runtime routine for slow case
-     call_VM(noreg,
-             CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
-             lock_reg);
+     if (LockingMode == LM_LIGHTWEIGHT) {
+       call_VM(noreg,
+               CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter_obj),
+               obj_reg);
+     } else {
+       call_VM(noreg,
+               CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
+               lock_reg);
+     }
  
      bind(done);
    }
  }

@@ -849,45 +861,54 @@
  //      rscratch1, rscratch2 (scratch regs)
  void InterpreterMacroAssembler::unlock_object(Register lock_reg)
  {
    assert(lock_reg == c_rarg1, "The argument is only for looks. It must be rarg1");
  
-   if (UseHeavyMonitors) {
+   if (LockingMode == LM_MONITOR) {
      call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg);
    } else {
      Label done;
  
      const Register swap_reg   = r0;
      const Register header_reg = c_rarg2;  // Will contain the old oopMark
      const Register obj_reg    = c_rarg3;  // Will contain the oop
+     const Register tmp_reg    = c_rarg4;  // Temporary used by lightweight_unlock
  
      save_bcp(); // Save in case of exception
  
-     // Convert from BasicObjectLock structure to object and BasicLock
-     // structure Store the BasicLock address into %r0
-     lea(swap_reg, Address(lock_reg, BasicObjectLock::lock_offset_in_bytes()));
+     if (LockingMode != LM_LIGHTWEIGHT) {
+       // Convert from BasicObjectLock structure to object and BasicLock
+       // structure Store the BasicLock address into %r0
+       lea(swap_reg, Address(lock_reg, BasicObjectLock::lock_offset_in_bytes()));
+     }
  
      // Load oop into obj_reg(%c_rarg3)
      ldr(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()));
  
      // Free entry
      str(zr, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()));
  
-     if (UseBiasedLocking) {
-       biased_locking_exit(obj_reg, header_reg, done);
-     }
- 
-     // Load the old header from BasicLock structure
-     ldr(header_reg, Address(swap_reg,
-                             BasicLock::displaced_header_offset_in_bytes()));
+     if (LockingMode == LM_LIGHTWEIGHT) {
+       Label slow_case;
+       lightweight_unlock(obj_reg, header_reg, swap_reg, tmp_reg, slow_case);
+       b(done);
+       bind(slow_case);
+     } else {
+       if (UseBiasedLocking) {
+         biased_locking_exit(obj_reg, header_reg, done);
+       }
  
-     // Test for recursion
-     cbz(header_reg, done);
+       // Load the old header from BasicLock structure
+       ldr(header_reg, Address(swap_reg,
+                               BasicLock::displaced_header_offset_in_bytes()));
  
-     // Atomic swap back the old header
-     cmpxchg_obj_header(swap_reg, header_reg, obj_reg, rscratch1, done, /*fallthrough*/NULL);
+       // Test for recursion
+       cbz(header_reg, done);
  
+       // Atomic swap back the old header
+       cmpxchg_obj_header(swap_reg, header_reg, obj_reg, rscratch1, done, /*fallthrough*/NULL);
+     }
      // Call the runtime routine for slow case.
      str(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); // restore obj
      call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg);
  
      bind(done);

< prev index next >