Udiff src/hotspot/cpu/x86/interp_masm

src/hotspot/cpu/x86/interp_masm_x86.cpp

@@ -1,7 +1,7 @@
  /*
-  * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
+  * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
   * under the terms of the GNU General Public License version 2 only, as
   * published by the Free Software Foundation.

@@ -65,11 +65,11 @@
    testptr(mdo_addr, TypeEntries::null_seen);
    jccb(Assembler::notZero, next); // null already seen. Nothing to do anymore.
    // atomic update to prevent overwriting Klass* with 0
    lock();
    orptr(mdo_addr, TypeEntries::null_seen);
-   jmpb(next);
+   jmp(next);
  
    bind(update);
    Register tmp_load_klass = LP64_ONLY(rscratch1) NOT_LP64(noreg);
    load_klass(obj, obj, tmp_load_klass);
  #ifdef _LP64

@@ -1215,11 +1215,11 @@
  //      rax, rbx
  void InterpreterMacroAssembler::lock_object(Register lock_reg) {
    assert(lock_reg == LP64_ONLY(c_rarg1) NOT_LP64(rdx),
           "The argument is only for looks. It must be c_rarg1");
  
-   if (UseHeavyMonitors) {
+   if (LockingMode == LM_MONITOR) {
      call_VM(noreg,
              CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
              lock_reg);
    } else {
      Label done;

@@ -1249,78 +1249,93 @@
  
      if (UseBiasedLocking) {
        biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp_reg, rklass_decode_tmp, false, done, &slow_case);
      }
  
-     // Load immediate 1 into swap_reg %rax
-     movl(swap_reg, (int32_t)1);
+     if (LockingMode == LM_LIGHTWEIGHT) {
+ #ifdef _LP64
+       const Register thread = r15_thread;
+ #else
+       const Register thread = lock_reg;
+       get_thread(thread);
+ #endif
+       lightweight_lock(obj_reg, swap_reg, thread, tmp_reg, slow_case);
+       jmp(done);
+     } else {
+       // Load immediate 1 into swap_reg %rax
+       movl(swap_reg, (int32_t)1);
  
-     // Load (object->mark() | 1) into swap_reg %rax
-     orptr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
+       // Load (object->mark() | 1) into swap_reg %rax
+       orptr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
  
-     // Save (object->mark() | 1) into BasicLock's displaced header
-     movptr(Address(lock_reg, mark_offset), swap_reg);
+       // Save (object->mark() | 1) into BasicLock's displaced header
+       movptr(Address(lock_reg, mark_offset), swap_reg);
  
-     assert(lock_offset == 0,
-            "displaced header must be first word in BasicObjectLock");
+       assert(lock_offset == 0,
+              "displaced header must be first word in BasicObjectLock");
  
-     lock();
-     cmpxchgptr(lock_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
-     if (PrintBiasedLockingStatistics) {
-       cond_inc32(Assembler::zero,
-                  ExternalAddress((address) BiasedLocking::fast_path_entry_count_addr()));
-     }
-     jcc(Assembler::zero, done);
- 
-     const int zero_bits = LP64_ONLY(7) NOT_LP64(3);
- 
-     // Fast check for recursive lock.
-     //
-     // Can apply the optimization only if this is a stack lock
-     // allocated in this thread. For efficiency, we can focus on
-     // recently allocated stack locks (instead of reading the stack
-     // base and checking whether 'mark' points inside the current
-     // thread stack):
-     //  1) (mark & zero_bits) == 0, and
-     //  2) rsp <= mark < mark + os::pagesize()
-     //
-     // Warning: rsp + os::pagesize can overflow the stack base. We must
-     // neither apply the optimization for an inflated lock allocated
-     // just above the thread stack (this is why condition 1 matters)
-     // nor apply the optimization if the stack lock is inside the stack
-     // of another thread. The latter is avoided even in case of overflow
-     // because we have guard pages at the end of all stacks. Hence, if
-     // we go over the stack base and hit the stack of another thread,
-     // this should not be in a writeable area that could contain a
-     // stack lock allocated by that thread. As a consequence, a stack
-     // lock less than page size away from rsp is guaranteed to be
-     // owned by the current thread.
-     //
-     // These 3 tests can be done by evaluating the following
-     // expression: ((mark - rsp) & (zero_bits - os::vm_page_size())),
-     // assuming both stack pointer and pagesize have their
-     // least significant bits clear.
-     // NOTE: the mark is in swap_reg %rax as the result of cmpxchg
-     subptr(swap_reg, rsp);
-     andptr(swap_reg, zero_bits - os::vm_page_size());
- 
-     // Save the test result, for recursive case, the result is zero
-     movptr(Address(lock_reg, mark_offset), swap_reg);
- 
-     if (PrintBiasedLockingStatistics) {
-       cond_inc32(Assembler::zero,
-                  ExternalAddress((address) BiasedLocking::fast_path_entry_count_addr()));
+       lock();
+       cmpxchgptr(lock_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
+       if (PrintBiasedLockingStatistics) {
+         cond_inc32(Assembler::zero,
+                    ExternalAddress((address) BiasedLocking::fast_path_entry_count_addr()));
+       }
+       jcc(Assembler::zero, done);
+ 
+       const int zero_bits = LP64_ONLY(7) NOT_LP64(3);
+ 
+       // Fast check for recursive lock.
+       //
+       // Can apply the optimization only if this is a stack lock
+       // allocated in this thread. For efficiency, we can focus on
+       // recently allocated stack locks (instead of reading the stack
+       // base and checking whether 'mark' points inside the current
+       // thread stack):
+       //  1) (mark & zero_bits) == 0, and
+       //  2) rsp <= mark < mark + os::pagesize()
+       //
+       // Warning: rsp + os::pagesize can overflow the stack base. We must
+       // neither apply the optimization for an inflated lock allocated
+       // just above the thread stack (this is why condition 1 matters)
+       // nor apply the optimization if the stack lock is inside the stack
+       // of another thread. The latter is avoided even in case of overflow
+       // because we have guard pages at the end of all stacks. Hence, if
+       // we go over the stack base and hit the stack of another thread,
+       // this should not be in a writeable area that could contain a
+       // stack lock allocated by that thread. As a consequence, a stack
+       // lock less than page size away from rsp is guaranteed to be
+       // owned by the current thread.
+       //
+       // These 3 tests can be done by evaluating the following
+       // expression: ((mark - rsp) & (zero_bits - os::vm_page_size())),
+       // assuming both stack pointer and pagesize have their
+       // least significant bits clear.
+       // NOTE: the mark is in swap_reg %rax as the result of cmpxchg
+       subptr(swap_reg, rsp);
+       andptr(swap_reg, zero_bits - os::vm_page_size());
+ 
+       // Save the test result, for recursive case, the result is zero
+       movptr(Address(lock_reg, mark_offset), swap_reg);
+ 
+       if (PrintBiasedLockingStatistics) {
+         cond_inc32(Assembler::zero,
+                    ExternalAddress((address) BiasedLocking::fast_path_entry_count_addr()));
+       }
+       jcc(Assembler::zero, done);
      }
-     jcc(Assembler::zero, done);
- 
      bind(slow_case);
  
      // Call the runtime routine for slow case
-     call_VM(noreg,
-             CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
-             lock_reg);
- 
+     if (LockingMode == LM_LIGHTWEIGHT) {
+       call_VM(noreg,
+               CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter_obj),
+               obj_reg);
+     } else {
+       call_VM(noreg,
+               CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
+               lock_reg);
+     }
      bind(done);
    }
  }

@@ -1338,53 +1353,67 @@
  // rax, rbx, rcx, rdx
  void InterpreterMacroAssembler::unlock_object(Register lock_reg) {
    assert(lock_reg == LP64_ONLY(c_rarg1) NOT_LP64(rdx),
           "The argument is only for looks. It must be c_rarg1");
  
-   if (UseHeavyMonitors) {
+   if (LockingMode == LM_MONITOR) {
      call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg);
    } else {
-     Label done;
+     Label done, slow_case;
  
      const Register swap_reg   = rax;  // Must use rax for cmpxchg instruction
      const Register header_reg = LP64_ONLY(c_rarg2) NOT_LP64(rbx);  // Will contain the old oopMark
      const Register obj_reg    = LP64_ONLY(c_rarg3) NOT_LP64(rcx);  // Will contain the oop
  
      save_bcp(); // Save in case of exception
  
-     // Convert from BasicObjectLock structure to object and BasicLock
-     // structure Store the BasicLock address into %rax
-     lea(swap_reg, Address(lock_reg, BasicObjectLock::lock_offset_in_bytes()));
+     if (LockingMode != LM_LIGHTWEIGHT) {
+       // Convert from BasicObjectLock structure to object and BasicLock
+       // structure Store the BasicLock address into %rax
+       lea(swap_reg, Address(lock_reg, BasicObjectLock::lock_offset_in_bytes()));
+     }
  
      // Load oop into obj_reg(%c_rarg3)
      movptr(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()));
  
      // Free entry
      movptr(Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()), (int32_t)NULL_WORD);
  
-     if (UseBiasedLocking) {
-       biased_locking_exit(obj_reg, header_reg, done);
-     }
- 
-     // Load the old header from BasicLock structure
-     movptr(header_reg, Address(swap_reg,
-                                BasicLock::displaced_header_offset_in_bytes()));
+     if (LockingMode == LM_LIGHTWEIGHT) {
+ #ifdef _LP64
+       lightweight_unlock(obj_reg, swap_reg, r15_thread, header_reg, slow_case);
+ #else
+       // This relies on the implementation of lightweight_unlock being able to handle
+       // that the reg_rax and thread Register parameters may alias each other.
+       get_thread(swap_reg);
+       lightweight_unlock(obj_reg, swap_reg, swap_reg, header_reg, slow_case);
+ #endif
+       jmp(done);
+     } else {
+       if (UseBiasedLocking) {
+         biased_locking_exit(obj_reg, header_reg, done);
+       }
  
-     // Test for recursion
-     testptr(header_reg, header_reg);
+       // Load the old header from BasicLock structure
+       movptr(header_reg, Address(swap_reg,
+                                  BasicLock::displaced_header_offset_in_bytes()));
  
-     // zero for recursive case
-     jcc(Assembler::zero, done);
+       // Test for recursion
+       testptr(header_reg, header_reg);
  
-     // Atomic swap back the old header
-     lock();
-     cmpxchgptr(header_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
+       // zero for recursive case
+       jcc(Assembler::zero, done);
  
-     // zero for simple unlock of a stack-lock case
-     jcc(Assembler::zero, done);
+       // Atomic swap back the old header
+       lock();
+       cmpxchgptr(header_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
  
+       // zero for simple unlock of a stack-lock case
+       jcc(Assembler::zero, done);
+     }
  
+     bind(slow_case);
      // Call the runtime routine for slow case.
      movptr(Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()), obj_reg); // restore obj
      call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg);
  
      bind(done);

< prev index next >