< prev index next >

src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp

Print this page
*** 1,7 ***
  /*
!  * Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved.
   * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
   * under the terms of the GNU General Public License version 2 only, as
--- 1,7 ---
  /*
!  * Copyright (c) 2003, 2024, Oracle and/or its affiliates. All rights reserved.
   * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
   * under the terms of the GNU General Public License version 2 only, as

*** 719,25 ***
  // Args:
  //      c_rarg1: BasicObjectLock to be used for locking
  //
  // Kills:
  //      r0
! //      c_rarg0, c_rarg1, c_rarg2, c_rarg3, .. (param regs)
  //      rscratch1, rscratch2 (scratch regs)
  void InterpreterMacroAssembler::lock_object(Register lock_reg)
  {
    assert(lock_reg == c_rarg1, "The argument is only for looks. It must be c_rarg1");
!   if (UseHeavyMonitors) {
      call_VM(noreg,
              CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
              lock_reg);
    } else {
      Label done;
  
      const Register swap_reg = r0;
      const Register tmp = c_rarg2;
      const Register obj_reg = c_rarg3; // Will contain the oop
  
      const int obj_offset = BasicObjectLock::obj_offset_in_bytes();
      const int lock_offset = BasicObjectLock::lock_offset_in_bytes ();
      const int mark_offset = lock_offset +
                              BasicLock::displaced_header_offset_in_bytes();
--- 719,27 ---
  // Args:
  //      c_rarg1: BasicObjectLock to be used for locking
  //
  // Kills:
  //      r0
! //      c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, .. (param regs)
  //      rscratch1, rscratch2 (scratch regs)
  void InterpreterMacroAssembler::lock_object(Register lock_reg)
  {
    assert(lock_reg == c_rarg1, "The argument is only for looks. It must be c_rarg1");
!   if (LockingMode == LM_MONITOR) {
      call_VM(noreg,
              CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
              lock_reg);
    } else {
      Label done;
  
      const Register swap_reg = r0;
      const Register tmp = c_rarg2;
      const Register obj_reg = c_rarg3; // Will contain the oop
+     const Register tmp2 = c_rarg4;
+     const Register tmp3 = c_rarg5;
  
      const int obj_offset = BasicObjectLock::obj_offset_in_bytes();
      const int lock_offset = BasicObjectLock::lock_offset_in_bytes ();
      const int mark_offset = lock_offset +
                              BasicLock::displaced_header_offset_in_bytes();

*** 752,86 ***
        ldrw(tmp, Address(tmp, Klass::access_flags_offset()));
        tstw(tmp, JVM_ACC_IS_VALUE_BASED_CLASS);
        br(Assembler::NE, slow_case);
      }
  
!     if (UseBiasedLocking) {
!       biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, done, &slow_case);
-     }
- 
-     // Load (object->mark() | 1) into swap_reg
-     ldr(rscratch1, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
-     orr(swap_reg, rscratch1, 1);
- 
-     // Save (object->mark() | 1) into BasicLock's displaced header
-     str(swap_reg, Address(lock_reg, mark_offset));
- 
-     assert(lock_offset == 0,
-            "displached header must be first word in BasicObjectLock");
- 
-     Label fail;
-     if (PrintBiasedLockingStatistics) {
-       Label fast;
-       cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, rscratch1, fast, &fail);
-       bind(fast);
-       atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()),
-                   rscratch2, rscratch1, tmp);
        b(done);
-       bind(fail);
      } else {
!       cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, rscratch1, done, /*fallthrough*/NULL);
!     }
  
!     // Fast check for recursive lock.
!     //
!     // Can apply the optimization only if this is a stack lock
!     // allocated in this thread. For efficiency, we can focus on
!     // recently allocated stack locks (instead of reading the stack
!     // base and checking whether 'mark' points inside the current
!     // thread stack):
!     //  1) (mark & 7) == 0, and
!     //  2) sp <= mark < mark + os::pagesize()
!     //
!     // Warning: sp + os::pagesize can overflow the stack base. We must
!     // neither apply the optimization for an inflated lock allocated
!     // just above the thread stack (this is why condition 1 matters)
!     // nor apply the optimization if the stack lock is inside the stack
!     // of another thread. The latter is avoided even in case of overflow
!     // because we have guard pages at the end of all stacks. Hence, if
!     // we go over the stack base and hit the stack of another thread,
!     // this should not be in a writeable area that could contain a
!     // stack lock allocated by that thread. As a consequence, a stack
!     // lock less than page size away from sp is guaranteed to be
!     // owned by the current thread.
!     //
-     // These 3 tests can be done by evaluating the following
-     // expression: ((mark - sp) & (7 - os::vm_page_size())),
-     // assuming both stack pointer and pagesize have their
-     // least significant 3 bits clear.
-     // NOTE: the mark is in swap_reg %r0 as the result of cmpxchg
-     // NOTE2: aarch64 does not like to subtract sp from rn so take a
-     // copy
-     mov(rscratch1, sp);
-     sub(swap_reg, swap_reg, rscratch1);
-     ands(swap_reg, swap_reg, (uint64_t)(7 - os::vm_page_size()));
- 
-     // Save the test result, for recursive case, the result is zero
-     str(swap_reg, Address(lock_reg, mark_offset));
- 
-     if (PrintBiasedLockingStatistics) {
-       br(Assembler::NE, slow_case);
-       atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()),
-                   rscratch2, rscratch1, tmp);
-     }
-     br(Assembler::EQ, done);
  
      bind(slow_case);
  
      // Call the runtime routine for slow case
!     call_VM(noreg,
!             CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
!             lock_reg);
  
      bind(done);
    }
  }
  
--- 754,96 ---
        ldrw(tmp, Address(tmp, Klass::access_flags_offset()));
        tstw(tmp, JVM_ACC_IS_VALUE_BASED_CLASS);
        br(Assembler::NE, slow_case);
      }
  
!     if (LockingMode == LM_LIGHTWEIGHT) {
!       lightweight_lock(obj_reg, tmp, tmp2, tmp3, slow_case);
        b(done);
      } else {
!       if (UseBiasedLocking) {
!         biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, done, &slow_case);
+       }
  
!       // Load (object->mark() | 1) into swap_reg
!       ldr(rscratch1, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
!       orr(swap_reg, rscratch1, 1);
! 
!       // Save (object->mark() | 1) into BasicLock's displaced header
!       str(swap_reg, Address(lock_reg, mark_offset));
! 
!       assert(lock_offset == 0,
!              "displached header must be first word in BasicObjectLock");
! 
!       Label fail;
!       if (PrintBiasedLockingStatistics) {
!         Label fast;
!         cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, rscratch1, fast, &fail);
!         bind(fast);
!         atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()),
!                     rscratch2, rscratch1, tmp);
!         b(done);
!         bind(fail);
!       } else {
!         cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, rscratch1, done, /*fallthrough*/NULL);
!       }
  
+       // Fast check for recursive lock.
+       //
+       // Can apply the optimization only if this is a stack lock
+       // allocated in this thread. For efficiency, we can focus on
+       // recently allocated stack locks (instead of reading the stack
+       // base and checking whether 'mark' points inside the current
+       // thread stack):
+       //  1) (mark & 7) == 0, and
+       //  2) sp <= mark < mark + os::pagesize()
+       //
+       // Warning: sp + os::pagesize can overflow the stack base. We must
+       // neither apply the optimization for an inflated lock allocated
+       // just above the thread stack (this is why condition 1 matters)
+       // nor apply the optimization if the stack lock is inside the stack
+       // of another thread. The latter is avoided even in case of overflow
+       // because we have guard pages at the end of all stacks. Hence, if
+       // we go over the stack base and hit the stack of another thread,
+       // this should not be in a writeable area that could contain a
+       // stack lock allocated by that thread. As a consequence, a stack
+       // lock less than page size away from sp is guaranteed to be
+       // owned by the current thread.
+       //
+       // These 3 tests can be done by evaluating the following
+       // expression: ((mark - sp) & (7 - os::vm_page_size())),
+       // assuming both stack pointer and pagesize have their
+       // least significant 3 bits clear.
+       // NOTE: the mark is in swap_reg %r0 as the result of cmpxchg
+       // NOTE2: aarch64 does not like to subtract sp from rn so take a
+       // copy
+       mov(rscratch1, sp);
+       sub(swap_reg, swap_reg, rscratch1);
+       ands(swap_reg, swap_reg, (uint64_t)(7 - os::vm_page_size()));
+ 
+       // Save the test result, for recursive case, the result is zero
+       str(swap_reg, Address(lock_reg, mark_offset));
+ 
+       if (PrintBiasedLockingStatistics) {
+         br(Assembler::NE, slow_case);
+         atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()),
+                     rscratch2, rscratch1, tmp);
+       }
+       br(Assembler::EQ, done);
+     }
      bind(slow_case);
  
      // Call the runtime routine for slow case
!     if (LockingMode == LM_LIGHTWEIGHT) {
!       call_VM(noreg,
!               CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter_obj),
+               obj_reg);
+     } else {
+       call_VM(noreg,
+               CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
+               lock_reg);
+     }
  
      bind(done);
    }
  }
  

*** 849,45 ***
  //      rscratch1, rscratch2 (scratch regs)
  void InterpreterMacroAssembler::unlock_object(Register lock_reg)
  {
    assert(lock_reg == c_rarg1, "The argument is only for looks. It must be rarg1");
  
!   if (UseHeavyMonitors) {
      call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg);
    } else {
      Label done;
  
      const Register swap_reg   = r0;
      const Register header_reg = c_rarg2;  // Will contain the old oopMark
      const Register obj_reg    = c_rarg3;  // Will contain the oop
  
      save_bcp(); // Save in case of exception
  
!     // Convert from BasicObjectLock structure to object and BasicLock
!     // structure Store the BasicLock address into %r0
!     lea(swap_reg, Address(lock_reg, BasicObjectLock::lock_offset_in_bytes()));
  
      // Load oop into obj_reg(%c_rarg3)
      ldr(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()));
  
      // Free entry
      str(zr, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()));
  
!     if (UseBiasedLocking) {
!       biased_locking_exit(obj_reg, header_reg, done);
!     }
! 
!     // Load the old header from BasicLock structure
!     ldr(header_reg, Address(swap_reg,
!                             BasicLock::displaced_header_offset_in_bytes()));
  
!     // Test for recursion
!     cbz(header_reg, done);
  
!     // Atomic swap back the old header
!     cmpxchg_obj_header(swap_reg, header_reg, obj_reg, rscratch1, done, /*fallthrough*/NULL);
  
      // Call the runtime routine for slow case.
      str(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); // restore obj
      call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg);
  
      bind(done);
--- 861,54 ---
  //      rscratch1, rscratch2 (scratch regs)
  void InterpreterMacroAssembler::unlock_object(Register lock_reg)
  {
    assert(lock_reg == c_rarg1, "The argument is only for looks. It must be rarg1");
  
!   if (LockingMode == LM_MONITOR) {
      call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg);
    } else {
      Label done;
  
      const Register swap_reg   = r0;
      const Register header_reg = c_rarg2;  // Will contain the old oopMark
      const Register obj_reg    = c_rarg3;  // Will contain the oop
+     const Register tmp_reg    = c_rarg4;  // Temporary used by lightweight_unlock
  
      save_bcp(); // Save in case of exception
  
!     if (LockingMode != LM_LIGHTWEIGHT) {
!       // Convert from BasicObjectLock structure to object and BasicLock
!       // structure Store the BasicLock address into %r0
+       lea(swap_reg, Address(lock_reg, BasicObjectLock::lock_offset_in_bytes()));
+     }
  
      // Load oop into obj_reg(%c_rarg3)
      ldr(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()));
  
      // Free entry
      str(zr, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()));
  
!     if (LockingMode == LM_LIGHTWEIGHT) {
!       Label slow_case;
!       lightweight_unlock(obj_reg, header_reg, swap_reg, tmp_reg, slow_case);
!       b(done);
!       bind(slow_case);
!     } else {
!       if (UseBiasedLocking) {
+         biased_locking_exit(obj_reg, header_reg, done);
+       }
  
!       // Load the old header from BasicLock structure
!       ldr(header_reg, Address(swap_reg,
+                               BasicLock::displaced_header_offset_in_bytes()));
  
!       // Test for recursion
!       cbz(header_reg, done);
  
+       // Atomic swap back the old header
+       cmpxchg_obj_header(swap_reg, header_reg, obj_reg, rscratch1, done, /*fallthrough*/NULL);
+     }
      // Call the runtime routine for slow case.
      str(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); // restore obj
      call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg);
  
      bind(done);
< prev index next >