< prev index next >

src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp

Print this page

  28 #include "gc/shared/barrierSet.hpp"
  29 #include "gc/shared/barrierSetAssembler.hpp"
  30 #include "oops/methodData.hpp"
  31 #include "opto/c2_MacroAssembler.hpp"
  32 #include "opto/intrinsicnode.hpp"
  33 #include "opto/output.hpp"
  34 #include "opto/opcodes.hpp"
  35 #include "opto/subnode.hpp"
  36 #include "runtime/objectMonitor.hpp"
  37 #include "runtime/stubRoutines.hpp"
  38 
  39 #ifdef PRODUCT
  40 #define BLOCK_COMMENT(str) /* nothing */
  41 #define STOP(error) stop(error)
  42 #else
  43 #define BLOCK_COMMENT(str) block_comment(str)
  44 #define STOP(error) block_comment(error); stop(error)
  45 #endif
  46 
  47 // C2 compiled method's prolog code.
  48 void C2_MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b, bool is_stub) {
  49 
  50   // WARNING: Initial instruction MUST be 5 bytes or longer so that
  51   // NativeJump::patch_verified_entry will be able to patch out the entry
  52   // code safely. The push to verify stack depth is ok at 5 bytes,
  53   // the frame allocation can be either 3 or 6 bytes. So if we don't do
  54   // stack bang then we must use the 6 byte frame allocation even if
  55   // we have no frame. :-(
  56   assert(stack_bang_size >= framesize || stack_bang_size <= 0, "stack bang size incorrect");
  57 
  58   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  59   // Remove word for return addr
  60   framesize -= wordSize;
  61   stack_bang_size -= wordSize;
  62 
  63   // Calls to C2R adapters often do not accept exceptional returns.
  64   // We require that their callers must bang for them.  But be careful, because
  65   // some VM calls (such as call site linkage) can use several kilobytes of
  66   // stack.  But the stack safety zone should account for that.
  67   // See bugs 4446381, 4468289, 4497237.
  68   if (stack_bang_size > 0) {

 110   }
 111   if (UseSSE >= 2 && VerifyFPU) {
 112     verify_FPU(0, "FPU stack must be clean on entry");
 113   }
 114 #endif
 115 
 116 #ifdef ASSERT
 117   if (VerifyStackAtCalls) {
 118     Label L;
 119     push(rax);
 120     mov(rax, rsp);
 121     andptr(rax, StackAlignmentInBytes-1);
 122     cmpptr(rax, StackAlignmentInBytes-wordSize);
 123     pop(rax);
 124     jcc(Assembler::equal, L);
 125     STOP("Stack is not properly aligned!");
 126     bind(L);
 127   }
 128 #endif
 129 














 130   if (!is_stub) {
 131     BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 132  #ifdef _LP64
 133     if (BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
 134       // We put the non-hot code of the nmethod entry barrier out-of-line in a stub.
 135       Label dummy_slow_path;
 136       Label dummy_continuation;
 137       Label* slow_path = &dummy_slow_path;
 138       Label* continuation = &dummy_continuation;
 139       if (!Compile::current()->output()->in_scratch_emit_size()) {
 140         // Use real labels from actual stub when not emitting code for the purpose of measuring its size
 141         C2EntryBarrierStub* stub = new (Compile::current()->comp_arena()) C2EntryBarrierStub();
 142         Compile::current()->output()->add_stub(stub);
 143         slow_path = &stub->entry();
 144         continuation = &stub->continuation();
 145       }
 146       bs->nmethod_entry_barrier(this, slow_path, continuation);
 147     }
 148 #else
 149     // Don't bother with out-of-line nmethod entry barrier stub for x86_32.

 531 //    But beware of excessive branch density on AMD Opterons.
 532 //
 533 // *  Both fast_lock and fast_unlock set the ICC.ZF to indicate success
 534 //    or failure of the fast path.  If the fast path fails then we pass
 535 //    control to the slow path, typically in C.  In fast_lock and
 536 //    fast_unlock we often branch to DONE_LABEL, just to find that C2
 537 //    will emit a conditional branch immediately after the node.
 538 //    So we have branches to branches and lots of ICC.ZF games.
 539 //    Instead, it might be better to have C2 pass a "FailureLabel"
 540 //    into fast_lock and fast_unlock.  In the case of success, control
 541 //    will drop through the node.  ICC.ZF is undefined at exit.
 542 //    In the case of failure, the node will branch directly to the
 543 //    FailureLabel
 544 
 545 
 546 // obj: object to lock
 547 // box: on-stack box address (displaced header location) - KILLED
 548 // rax,: tmp -- KILLED
 549 // scr: tmp -- KILLED
 550 void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg,
 551                                  Register scrReg, Register cx1Reg, Register cx2Reg,
 552                                  RTMLockingCounters* rtm_counters,
 553                                  RTMLockingCounters* stack_rtm_counters,
 554                                  Metadata* method_data,
 555                                  bool use_rtm, bool profile_rtm) {
 556   // Ensure the register assignments are disjoint
 557   assert(tmpReg == rax, "");
 558 
 559   if (use_rtm) {
 560     assert_different_registers(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg);
 561   } else {
 562     assert(cx1Reg == noreg, "");
 563     assert(cx2Reg == noreg, "");
 564     assert_different_registers(objReg, boxReg, tmpReg, scrReg);
 565   }
 566 
 567   // Possible cases that we'll encounter in fast_lock
 568   // ------------------------------------------------
 569   // * Inflated
 570   //    -- unlocked
 571   //    -- Locked

 585     load_klass(tmpReg, objReg, scrReg);
 586     movl(tmpReg, Address(tmpReg, Klass::access_flags_offset()));
 587     testl(tmpReg, JVM_ACC_IS_VALUE_BASED_CLASS);
 588     jcc(Assembler::notZero, DONE_LABEL);
 589   }
 590 
 591 #if INCLUDE_RTM_OPT
 592   if (UseRTMForStackLocks && use_rtm) {
 593     assert(!UseHeavyMonitors, "+UseHeavyMonitors and +UseRTMForStackLocks are mutually exclusive");
 594     rtm_stack_locking(objReg, tmpReg, scrReg, cx2Reg,
 595                       stack_rtm_counters, method_data, profile_rtm,
 596                       DONE_LABEL, IsInflated);
 597   }
 598 #endif // INCLUDE_RTM_OPT
 599 
 600   movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes()));          // [FETCH]
 601   testptr(tmpReg, markWord::monitor_value); // inflated vs stack-locked|neutral
 602   jccb(Assembler::notZero, IsInflated);
 603 
 604   if (!UseHeavyMonitors) {
 605     // Attempt stack-locking ...
 606     orptr (tmpReg, markWord::unlocked_value);
 607     movptr(Address(boxReg, 0), tmpReg);          // Anticipate successful CAS
 608     lock();
 609     cmpxchgptr(boxReg, Address(objReg, oopDesc::mark_offset_in_bytes()));      // Updates tmpReg
 610     jcc(Assembler::equal, COUNT);           // Success
 611 
 612     // Recursive locking.
 613     // The object is stack-locked: markword contains stack pointer to BasicLock.
 614     // Locked by current thread if difference with current SP is less than one page.
 615     subptr(tmpReg, rsp);
 616     // Next instruction set ZFlag == 1 (Success) if difference is less then one page.
 617     andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - (int)os::vm_page_size())) );
 618     movptr(Address(boxReg, 0), tmpReg);


















 619   } else {
 620     // Clear ZF so that we take the slow path at the DONE label. objReg is known to be not 0.
 621     testptr(objReg, objReg);
 622   }
 623   jmp(DONE_LABEL);
 624 
 625   bind(IsInflated);
 626   // The object is inflated. tmpReg contains pointer to ObjectMonitor* + markWord::monitor_value
 627 
 628 #if INCLUDE_RTM_OPT
 629   // Use the same RTM locking code in 32- and 64-bit VM.
 630   if (use_rtm) {
 631     rtm_inflated_locking(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg,
 632                          rtm_counters, method_data, profile_rtm, DONE_LABEL);
 633   } else {
 634 #endif // INCLUDE_RTM_OPT
 635 
 636 #ifndef _LP64
 637   // The object is inflated.
 638 

 642   // This is convenient but results a ST-before-CAS penalty.  The following CAS suffers
 643   // additional latency as we have another ST in the store buffer that must drain.
 644 
 645   // avoid ST-before-CAS
 646   // register juggle because we need tmpReg for cmpxchgptr below
 647   movptr(scrReg, boxReg);
 648   movptr(boxReg, tmpReg);                   // consider: LEA box, [tmp-2]
 649 
 650   // Optimistic form: consider XORL tmpReg,tmpReg
 651   movptr(tmpReg, NULL_WORD);
 652 
 653   // Appears unlocked - try to swing _owner from null to non-null.
 654   // Ideally, I'd manifest "Self" with get_thread and then attempt
 655   // to CAS the register containing Self into m->Owner.
 656   // But we don't have enough registers, so instead we can either try to CAS
 657   // rsp or the address of the box (in scr) into &m->owner.  If the CAS succeeds
 658   // we later store "Self" into m->Owner.  Transiently storing a stack address
 659   // (rsp or the address of the box) into  m->owner is harmless.
 660   // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
 661   lock();
 662   cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
 663   movptr(Address(scrReg, 0), 3);          // box->_displaced_header = 3
 664   // If we weren't able to swing _owner from NULL to the BasicLock
 665   // then take the slow path.
 666   jccb  (Assembler::notZero, NO_COUNT);
 667   // update _owner from BasicLock to thread
 668   get_thread (scrReg);                    // beware: clobbers ICCs
 669   movptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), scrReg);
 670   xorptr(boxReg, boxReg);                 // set icc.ZFlag = 1 to indicate success
 671 
 672   // If the CAS fails we can either retry or pass control to the slow path.
 673   // We use the latter tactic.
 674   // Pass the CAS result in the icc.ZFlag into DONE_LABEL
 675   // If the CAS was successful ...
 676   //   Self has acquired the lock
 677   //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
 678   // Intentional fall-through into DONE_LABEL ...
 679 #else // _LP64
 680   // It's inflated and we use scrReg for ObjectMonitor* in this section.
 681   movq(scrReg, tmpReg);
 682   xorq(tmpReg, tmpReg);
 683   lock();
 684   cmpxchgptr(r15_thread, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
 685   // Unconditionally set box->_displaced_header = markWord::unused_mark().
 686   // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
 687   movptr(Address(boxReg, 0), checked_cast<int32_t>(markWord::unused_mark().value()));
 688   // Propagate ICC.ZF from CAS above into DONE_LABEL.
 689   jccb(Assembler::equal, COUNT);          // CAS above succeeded; propagate ZF = 1 (success)
 690 

 756 void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg, bool use_rtm) {
 757   assert(boxReg == rax, "");
 758   assert_different_registers(objReg, boxReg, tmpReg);
 759 
 760   Label DONE_LABEL, Stacked, COUNT, NO_COUNT;
 761 
 762 #if INCLUDE_RTM_OPT
 763   if (UseRTMForStackLocks && use_rtm) {
 764     assert(!UseHeavyMonitors, "+UseHeavyMonitors and +UseRTMForStackLocks are mutually exclusive");
 765     Label L_regular_unlock;
 766     movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // fetch markword
 767     andptr(tmpReg, markWord::lock_mask_in_place);                     // look at 2 lock bits
 768     cmpptr(tmpReg, markWord::unlocked_value);                         // bits = 01 unlocked
 769     jccb(Assembler::notEqual, L_regular_unlock);                      // if !HLE RegularLock
 770     xend();                                                           // otherwise end...
 771     jmp(DONE_LABEL);                                                  // ... and we're done
 772     bind(L_regular_unlock);
 773   }
 774 #endif
 775 
 776   if (!UseHeavyMonitors) {
 777     cmpptr(Address(boxReg, 0), NULL_WORD);                            // Examine the displaced header
 778     jcc   (Assembler::zero, COUNT);                                   // 0 indicates recursive stack-lock
 779   }
 780   movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes()));   // Examine the object's markword
 781   if (!UseHeavyMonitors) {
 782     testptr(tmpReg, markWord::monitor_value);                         // Inflated?
 783     jccb   (Assembler::zero, Stacked);



















 784   }
 785 
 786   // It's inflated.
 787 #if INCLUDE_RTM_OPT
 788   if (use_rtm) {
 789     Label L_regular_inflated_unlock;
 790     int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
 791     movptr(boxReg, Address(tmpReg, owner_offset));
 792     testptr(boxReg, boxReg);
 793     jccb(Assembler::notZero, L_regular_inflated_unlock);
 794     xend();
 795     jmpb(DONE_LABEL);
 796     bind(L_regular_inflated_unlock);
 797   }
 798 #endif
 799 
 800   // Despite our balanced locking property we still check that m->_owner == Self
 801   // as java routines or native JNI code called by this thread might
 802   // have released the lock.
 803   // Refer to the comments in synchronizer.cpp for how we might encode extra
 804   // state in _succ so we can avoid fetching EntryList|cxq.
 805   //
 806   // If there's no contention try a 1-0 exit.  That is, exit without
 807   // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
 808   // we detect and recover from the race that the 1-0 exit admits.
 809   //
 810   // Conceptually fast_unlock() must execute a STST|LDST "release" barrier
 811   // before it STs null into _owner, releasing the lock.  Updates
 812   // to data protected by the critical section must be visible before
 813   // we drop the lock (and thus before any other thread could acquire
 814   // the lock and observe the fields protected by the lock).
 815   // IA32's memory-model is SPO, so STs are ordered with respect to

 889   // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
 890   lock();
 891   cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
 892   // There's no successor so we tried to regrab the lock.
 893   // If that didn't work, then another thread grabbed the
 894   // lock so we're done (and exit was a success).
 895   jccb  (Assembler::notEqual, LSuccess);
 896   // Intentional fall-through into slow path
 897 
 898   bind  (LGoSlowPath);
 899   orl   (boxReg, 1);                      // set ICC.ZF=0 to indicate failure
 900   jmpb  (DONE_LABEL);
 901 
 902   bind  (LSuccess);
 903   testl (boxReg, 0);                      // set ICC.ZF=1 to indicate success
 904   jmpb  (DONE_LABEL);
 905 
 906 #endif
 907   if (!UseHeavyMonitors) {
 908     bind  (Stacked);
 909     movptr(tmpReg, Address (boxReg, 0));      // re-fetch
 910     lock();
 911     cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box






 912     // Intentional fall-thru into DONE_LABEL
 913   }
 914   bind(DONE_LABEL);
 915 
 916   // ZFlag == 1 count in fast path
 917   // ZFlag == 0 count in slow path
 918   jccb(Assembler::notZero, NO_COUNT);
 919 
 920   bind(COUNT);
 921   // Count monitors in fast path
 922 #ifndef _LP64
 923   get_thread(tmpReg);
 924   decrementl(Address(tmpReg, JavaThread::held_monitor_count_offset()));
 925 #else // _LP64
 926   decrementq(Address(r15_thread, JavaThread::held_monitor_count_offset()));
 927 #endif
 928 
 929   xorl(tmpReg, tmpReg); // Set ZF == 1
 930 
 931   bind(NO_COUNT);

  28 #include "gc/shared/barrierSet.hpp"
  29 #include "gc/shared/barrierSetAssembler.hpp"
  30 #include "oops/methodData.hpp"
  31 #include "opto/c2_MacroAssembler.hpp"
  32 #include "opto/intrinsicnode.hpp"
  33 #include "opto/output.hpp"
  34 #include "opto/opcodes.hpp"
  35 #include "opto/subnode.hpp"
  36 #include "runtime/objectMonitor.hpp"
  37 #include "runtime/stubRoutines.hpp"
  38 
  39 #ifdef PRODUCT
  40 #define BLOCK_COMMENT(str) /* nothing */
  41 #define STOP(error) stop(error)
  42 #else
  43 #define BLOCK_COMMENT(str) block_comment(str)
  44 #define STOP(error) block_comment(error); stop(error)
  45 #endif
  46 
  47 // C2 compiled method's prolog code.
  48 void C2_MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b, bool is_stub, int max_monitors) {
  49 
  50   // WARNING: Initial instruction MUST be 5 bytes or longer so that
  51   // NativeJump::patch_verified_entry will be able to patch out the entry
  52   // code safely. The push to verify stack depth is ok at 5 bytes,
  53   // the frame allocation can be either 3 or 6 bytes. So if we don't do
  54   // stack bang then we must use the 6 byte frame allocation even if
  55   // we have no frame. :-(
  56   assert(stack_bang_size >= framesize || stack_bang_size <= 0, "stack bang size incorrect");
  57 
  58   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  59   // Remove word for return addr
  60   framesize -= wordSize;
  61   stack_bang_size -= wordSize;
  62 
  63   // Calls to C2R adapters often do not accept exceptional returns.
  64   // We require that their callers must bang for them.  But be careful, because
  65   // some VM calls (such as call site linkage) can use several kilobytes of
  66   // stack.  But the stack safety zone should account for that.
  67   // See bugs 4446381, 4468289, 4497237.
  68   if (stack_bang_size > 0) {

 110   }
 111   if (UseSSE >= 2 && VerifyFPU) {
 112     verify_FPU(0, "FPU stack must be clean on entry");
 113   }
 114 #endif
 115 
 116 #ifdef ASSERT
 117   if (VerifyStackAtCalls) {
 118     Label L;
 119     push(rax);
 120     mov(rax, rsp);
 121     andptr(rax, StackAlignmentInBytes-1);
 122     cmpptr(rax, StackAlignmentInBytes-wordSize);
 123     pop(rax);
 124     jcc(Assembler::equal, L);
 125     STOP("Stack is not properly aligned!");
 126     bind(L);
 127   }
 128 #endif
 129 
 130 #ifdef _LP64
 131   if (UseFastLocking && max_monitors > 0) {
 132     C2CheckLockStackStub* stub = new (Compile::current()->comp_arena()) C2CheckLockStackStub();
 133     Compile::current()->output()->add_stub(stub);
 134     assert(!is_stub, "only methods have monitors");
 135     Register thread = r15_thread;
 136     movptr(rax, Address(thread, JavaThread::lock_stack_current_offset()));
 137     addptr(rax, max_monitors * oopSize);
 138     cmpptr(rax, Address(thread, JavaThread::lock_stack_limit_offset()));
 139     jcc(Assembler::greaterEqual, stub->entry());
 140     bind(stub->continuation());
 141   }
 142 #endif
 143 
 144   if (!is_stub) {
 145     BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 146  #ifdef _LP64
 147     if (BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
 148       // We put the non-hot code of the nmethod entry barrier out-of-line in a stub.
 149       Label dummy_slow_path;
 150       Label dummy_continuation;
 151       Label* slow_path = &dummy_slow_path;
 152       Label* continuation = &dummy_continuation;
 153       if (!Compile::current()->output()->in_scratch_emit_size()) {
 154         // Use real labels from actual stub when not emitting code for the purpose of measuring its size
 155         C2EntryBarrierStub* stub = new (Compile::current()->comp_arena()) C2EntryBarrierStub();
 156         Compile::current()->output()->add_stub(stub);
 157         slow_path = &stub->entry();
 158         continuation = &stub->continuation();
 159       }
 160       bs->nmethod_entry_barrier(this, slow_path, continuation);
 161     }
 162 #else
 163     // Don't bother with out-of-line nmethod entry barrier stub for x86_32.

 545 //    But beware of excessive branch density on AMD Opterons.
 546 //
 547 // *  Both fast_lock and fast_unlock set the ICC.ZF to indicate success
 548 //    or failure of the fast path.  If the fast path fails then we pass
 549 //    control to the slow path, typically in C.  In fast_lock and
 550 //    fast_unlock we often branch to DONE_LABEL, just to find that C2
 551 //    will emit a conditional branch immediately after the node.
 552 //    So we have branches to branches and lots of ICC.ZF games.
 553 //    Instead, it might be better to have C2 pass a "FailureLabel"
 554 //    into fast_lock and fast_unlock.  In the case of success, control
 555 //    will drop through the node.  ICC.ZF is undefined at exit.
 556 //    In the case of failure, the node will branch directly to the
 557 //    FailureLabel
 558 
 559 
 560 // obj: object to lock
 561 // box: on-stack box address (displaced header location) - KILLED
 562 // rax,: tmp -- KILLED
 563 // scr: tmp -- KILLED
 564 void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg,
 565                                  Register scrReg, Register cx1Reg, Register cx2Reg, Register thread,
 566                                  RTMLockingCounters* rtm_counters,
 567                                  RTMLockingCounters* stack_rtm_counters,
 568                                  Metadata* method_data,
 569                                  bool use_rtm, bool profile_rtm) {
 570   // Ensure the register assignments are disjoint
 571   assert(tmpReg == rax, "");
 572 
 573   if (use_rtm) {
 574     assert_different_registers(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg);
 575   } else {
 576     assert(cx1Reg == noreg, "");
 577     assert(cx2Reg == noreg, "");
 578     assert_different_registers(objReg, boxReg, tmpReg, scrReg);
 579   }
 580 
 581   // Possible cases that we'll encounter in fast_lock
 582   // ------------------------------------------------
 583   // * Inflated
 584   //    -- unlocked
 585   //    -- Locked

 599     load_klass(tmpReg, objReg, scrReg);
 600     movl(tmpReg, Address(tmpReg, Klass::access_flags_offset()));
 601     testl(tmpReg, JVM_ACC_IS_VALUE_BASED_CLASS);
 602     jcc(Assembler::notZero, DONE_LABEL);
 603   }
 604 
 605 #if INCLUDE_RTM_OPT
 606   if (UseRTMForStackLocks && use_rtm) {
 607     assert(!UseHeavyMonitors, "+UseHeavyMonitors and +UseRTMForStackLocks are mutually exclusive");
 608     rtm_stack_locking(objReg, tmpReg, scrReg, cx2Reg,
 609                       stack_rtm_counters, method_data, profile_rtm,
 610                       DONE_LABEL, IsInflated);
 611   }
 612 #endif // INCLUDE_RTM_OPT
 613 
 614   movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes()));          // [FETCH]
 615   testptr(tmpReg, markWord::monitor_value); // inflated vs stack-locked|neutral
 616   jccb(Assembler::notZero, IsInflated);
 617 
 618   if (!UseHeavyMonitors) {
 619     if (UseFastLocking) {
 620 #ifdef _LP64
 621       fast_lock_impl(objReg, tmpReg, thread, scrReg, NO_COUNT, false);
 622       jmp(COUNT);
 623 #else
 624       // We can not emit the lock-stack-check in verified_entry() because we don't have enough
 625       // registers (for thread ptr). Therefore we have to emit the lock-stack-check in
 626       // fast_lock_impl(). However, that check can take a slow-path with ZF=1, therefore
 627       // we need to handle it specially and force ZF=0 before taking the actual slow-path.
 628       Label slow;
 629       fast_lock_impl(objReg, tmpReg, thread, scrReg, slow);
 630       jmp(COUNT);
 631       bind(slow);
 632       testptr(objReg, objReg); // ZF=0 to indicate failure
 633       jmp(NO_COUNT);
 634 #endif
 635     } else {
 636       // Attempt stack-locking ...
 637       orptr (tmpReg, markWord::unlocked_value);
 638       movptr(Address(boxReg, 0), tmpReg);          // Anticipate successful CAS
 639       lock();
 640       cmpxchgptr(boxReg, Address(objReg, oopDesc::mark_offset_in_bytes()));      // Updates tmpReg
 641       jcc(Assembler::equal, COUNT);           // Success
 642 
 643       // Recursive locking.
 644       // The object is stack-locked: markword contains stack pointer to BasicLock.
 645       // Locked by current thread if difference with current SP is less than one page.
 646       subptr(tmpReg, rsp);
 647       // Next instruction set ZFlag == 1 (Success) if difference is less then one page.
 648       andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - (int)os::vm_page_size())) );
 649       movptr(Address(boxReg, 0), tmpReg);
 650     }
 651   } else {
 652     // Clear ZF so that we take the slow path at the DONE label. objReg is known to be not 0.
 653     testptr(objReg, objReg);
 654   }
 655   jmp(DONE_LABEL);
 656 
 657   bind(IsInflated);
 658   // The object is inflated. tmpReg contains pointer to ObjectMonitor* + markWord::monitor_value
 659 
 660 #if INCLUDE_RTM_OPT
 661   // Use the same RTM locking code in 32- and 64-bit VM.
 662   if (use_rtm) {
 663     rtm_inflated_locking(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg,
 664                          rtm_counters, method_data, profile_rtm, DONE_LABEL);
 665   } else {
 666 #endif // INCLUDE_RTM_OPT
 667 
 668 #ifndef _LP64
 669   // The object is inflated.
 670 

 674   // This is convenient but results a ST-before-CAS penalty.  The following CAS suffers
 675   // additional latency as we have another ST in the store buffer that must drain.
 676 
 677   // avoid ST-before-CAS
 678   // register juggle because we need tmpReg for cmpxchgptr below
 679   movptr(scrReg, boxReg);
 680   movptr(boxReg, tmpReg);                   // consider: LEA box, [tmp-2]
 681 
 682   // Optimistic form: consider XORL tmpReg,tmpReg
 683   movptr(tmpReg, NULL_WORD);
 684 
 685   // Appears unlocked - try to swing _owner from null to non-null.
 686   // Ideally, I'd manifest "Self" with get_thread and then attempt
 687   // to CAS the register containing Self into m->Owner.
 688   // But we don't have enough registers, so instead we can either try to CAS
 689   // rsp or the address of the box (in scr) into &m->owner.  If the CAS succeeds
 690   // we later store "Self" into m->Owner.  Transiently storing a stack address
 691   // (rsp or the address of the box) into  m->owner is harmless.
 692   // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
 693   lock();
 694   cmpxchgptr(thread, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
 695   movptr(Address(scrReg, 0), 3);          // box->_displaced_header = 3







 696 
 697   // If the CAS fails we can either retry or pass control to the slow path.
 698   // We use the latter tactic.
 699   // Pass the CAS result in the icc.ZFlag into DONE_LABEL
 700   // If the CAS was successful ...
 701   //   Self has acquired the lock
 702   //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
 703   // Intentional fall-through into DONE_LABEL ...
 704 #else // _LP64
 705   // It's inflated and we use scrReg for ObjectMonitor* in this section.
 706   movq(scrReg, tmpReg);
 707   xorq(tmpReg, tmpReg);
 708   lock();
 709   cmpxchgptr(r15_thread, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
 710   // Unconditionally set box->_displaced_header = markWord::unused_mark().
 711   // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
 712   movptr(Address(boxReg, 0), checked_cast<int32_t>(markWord::unused_mark().value()));
 713   // Propagate ICC.ZF from CAS above into DONE_LABEL.
 714   jccb(Assembler::equal, COUNT);          // CAS above succeeded; propagate ZF = 1 (success)
 715 

 781 void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg, bool use_rtm) {
 782   assert(boxReg == rax, "");
 783   assert_different_registers(objReg, boxReg, tmpReg);
 784 
 785   Label DONE_LABEL, Stacked, COUNT, NO_COUNT;
 786 
 787 #if INCLUDE_RTM_OPT
 788   if (UseRTMForStackLocks && use_rtm) {
 789     assert(!UseHeavyMonitors, "+UseHeavyMonitors and +UseRTMForStackLocks are mutually exclusive");
 790     Label L_regular_unlock;
 791     movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // fetch markword
 792     andptr(tmpReg, markWord::lock_mask_in_place);                     // look at 2 lock bits
 793     cmpptr(tmpReg, markWord::unlocked_value);                         // bits = 01 unlocked
 794     jccb(Assembler::notEqual, L_regular_unlock);                      // if !HLE RegularLock
 795     xend();                                                           // otherwise end...
 796     jmp(DONE_LABEL);                                                  // ... and we're done
 797     bind(L_regular_unlock);
 798   }
 799 #endif
 800 
 801   if (!UseHeavyMonitors && !UseFastLocking) {
 802     cmpptr(Address(boxReg, 0), NULL_WORD);                            // Examine the displaced header
 803     jcc   (Assembler::zero, COUNT);                                   // 0 indicates recursive stack-lock
 804   }
 805   movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes()));   // Examine the object's markword
 806   if (!UseHeavyMonitors) {
 807     testptr(tmpReg, markWord::monitor_value);                         // Inflated?
 808 #if INCLUDE_RTM_OPT
 809     if (UseFastLocking && use_rtm) {
 810       jcc(Assembler::zero, Stacked);
 811     } else
 812 #endif
 813     jccb(Assembler::zero, Stacked);
 814     if (UseFastLocking) {
 815       // If the owner is ANONYMOUS, we need to fix it.
 816       testb(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t) (intptr_t) ANONYMOUS_OWNER);
 817 #ifdef _LP64
 818       C2HandleAnonOMOwnerStub* stub = new (Compile::current()->comp_arena()) C2HandleAnonOMOwnerStub(tmpReg);
 819       Compile::current()->output()->add_stub(stub);
 820       jcc(Assembler::notEqual, stub->entry());
 821       bind(stub->continuation());
 822 #else
 823       // We can't easily implement this optimization on 32 bit because we don't have a thread register.
 824       // Call the slow-path instead.
 825       jcc(Assembler::notEqual, NO_COUNT);
 826 #endif
 827     }
 828   }
 829 
 830   // It's inflated.
 831 #if INCLUDE_RTM_OPT
 832   if (use_rtm) {
 833     Label L_regular_inflated_unlock;
 834     int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
 835     movptr(boxReg, Address(tmpReg, owner_offset));
 836     testptr(boxReg, boxReg);
 837     jccb(Assembler::notZero, L_regular_inflated_unlock);
 838     xend();
 839     jmp(DONE_LABEL);
 840     bind(L_regular_inflated_unlock);
 841   }
 842 #endif
 843 
 844   // Despite our balanced locking property we still check that m->_owner == Self
 845   // as java routines or native JNI code called by this thread might
 846   // have released the lock.
 847   // Refer to the comments in synchronizer.cpp for how we might encode extra
 848   // state in _succ so we can avoid fetching EntryList|cxq.
 849   //
 850   // If there's no contention try a 1-0 exit.  That is, exit without
 851   // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
 852   // we detect and recover from the race that the 1-0 exit admits.
 853   //
 854   // Conceptually fast_unlock() must execute a STST|LDST "release" barrier
 855   // before it STs null into _owner, releasing the lock.  Updates
 856   // to data protected by the critical section must be visible before
 857   // we drop the lock (and thus before any other thread could acquire
 858   // the lock and observe the fields protected by the lock).
 859   // IA32's memory-model is SPO, so STs are ordered with respect to

 933   // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
 934   lock();
 935   cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
 936   // There's no successor so we tried to regrab the lock.
 937   // If that didn't work, then another thread grabbed the
 938   // lock so we're done (and exit was a success).
 939   jccb  (Assembler::notEqual, LSuccess);
 940   // Intentional fall-through into slow path
 941 
 942   bind  (LGoSlowPath);
 943   orl   (boxReg, 1);                      // set ICC.ZF=0 to indicate failure
 944   jmpb  (DONE_LABEL);
 945 
 946   bind  (LSuccess);
 947   testl (boxReg, 0);                      // set ICC.ZF=1 to indicate success
 948   jmpb  (DONE_LABEL);
 949 
 950 #endif
 951   if (!UseHeavyMonitors) {
 952     bind  (Stacked);
 953     if (UseFastLocking) {
 954       mov(boxReg, tmpReg);
 955       fast_unlock_impl(objReg, boxReg, tmpReg, NO_COUNT);
 956       jmp(COUNT);
 957     } else {
 958       movptr(tmpReg, Address (boxReg, 0));      // re-fetch
 959       lock();
 960       cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
 961     }
 962     // Intentional fall-thru into DONE_LABEL
 963   }
 964   bind(DONE_LABEL);
 965 
 966   // ZFlag == 1 count in fast path
 967   // ZFlag == 0 count in slow path
 968   jccb(Assembler::notZero, NO_COUNT);
 969 
 970   bind(COUNT);
 971   // Count monitors in fast path
 972 #ifndef _LP64
 973   get_thread(tmpReg);
 974   decrementl(Address(tmpReg, JavaThread::held_monitor_count_offset()));
 975 #else // _LP64
 976   decrementq(Address(r15_thread, JavaThread::held_monitor_count_offset()));
 977 #endif
 978 
 979   xorl(tmpReg, tmpReg); // Set ZF == 1
 980 
 981   bind(NO_COUNT);
< prev index next >