< prev index next >

src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp

Print this page

   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/assembler.hpp"
  27 #include "asm/assembler.inline.hpp"
  28 #include "oops/methodData.hpp"

  29 #include "opto/c2_MacroAssembler.hpp"
  30 #include "opto/intrinsicnode.hpp"
  31 #include "opto/opcodes.hpp"

  32 #include "opto/subnode.hpp"
  33 #include "runtime/biasedLocking.hpp"
  34 #include "runtime/objectMonitor.hpp"
  35 #include "runtime/stubRoutines.hpp"
  36 
  37 inline Assembler::AvxVectorLen C2_MacroAssembler::vector_length_encoding(int vlen_in_bytes) {
  38   switch (vlen_in_bytes) {
  39     case  4: // fall-through
  40     case  8: // fall-through
  41     case 16: return Assembler::AVX_128bit;
  42     case 32: return Assembler::AVX_256bit;
  43     case 64: return Assembler::AVX_512bit;
  44 
  45     default: {
  46       ShouldNotReachHere();
  47       return Assembler::AVX_NoVec;
  48     }
  49   }
  50 }
  51 

 429 //    But beware of excessive branch density on AMD Opterons.
 430 //
 431 // *  Both fast_lock and fast_unlock set the ICC.ZF to indicate success
 432 //    or failure of the fast path.  If the fast path fails then we pass
 433 //    control to the slow path, typically in C.  In fast_lock and
 434 //    fast_unlock we often branch to DONE_LABEL, just to find that C2
 435 //    will emit a conditional branch immediately after the node.
 436 //    So we have branches to branches and lots of ICC.ZF games.
 437 //    Instead, it might be better to have C2 pass a "FailureLabel"
 438 //    into fast_lock and fast_unlock.  In the case of success, control
 439 //    will drop through the node.  ICC.ZF is undefined at exit.
 440 //    In the case of failure, the node will branch directly to the
 441 //    FailureLabel
 442 
 443 
 444 // obj: object to lock
 445 // box: on-stack box address (displaced header location) - KILLED
 446 // rax,: tmp -- KILLED
 447 // scr: tmp -- KILLED
 448 void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg,
 449                                  Register scrReg, Register cx1Reg, Register cx2Reg,
 450                                  BiasedLockingCounters* counters,
 451                                  RTMLockingCounters* rtm_counters,
 452                                  RTMLockingCounters* stack_rtm_counters,
 453                                  Metadata* method_data,
 454                                  bool use_rtm, bool profile_rtm) {
 455   // Ensure the register assignments are disjoint
 456   assert(tmpReg == rax, "");
 457 
 458   if (use_rtm) {
 459     assert_different_registers(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg);
 460   } else {
 461     assert(cx2Reg == noreg, "");
 462     assert_different_registers(objReg, boxReg, tmpReg, scrReg);
 463   }
 464 
 465   if (counters != NULL) {
 466     atomic_incl(ExternalAddress((address)counters->total_entry_count_addr()), scrReg);
 467   }
 468 
 469   // Possible cases that we'll encounter in fast_lock

 498   // order to reduce the number of conditional branches in the most common cases.
 499   // Beware -- there's a subtle invariant that fetch of the markword
 500   // at [FETCH], below, will never observe a biased encoding (*101b).
 501   // If this invariant is not held we risk exclusion (safety) failure.
 502   if (UseBiasedLocking && !UseOptoBiasInlining) {
 503     biased_locking_enter(boxReg, objReg, tmpReg, scrReg, cx1Reg, false, DONE_LABEL, NULL, counters);
 504   }
 505 
 506 #if INCLUDE_RTM_OPT
 507   if (UseRTMForStackLocks && use_rtm) {
 508     rtm_stack_locking(objReg, tmpReg, scrReg, cx2Reg,
 509                       stack_rtm_counters, method_data, profile_rtm,
 510                       DONE_LABEL, IsInflated);
 511   }
 512 #endif // INCLUDE_RTM_OPT
 513 
 514   movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes()));          // [FETCH]
 515   testptr(tmpReg, markWord::monitor_value); // inflated vs stack-locked|neutral|biased
 516   jccb(Assembler::notZero, IsInflated);
 517 
 518   // Attempt stack-locking ...
 519   orptr (tmpReg, markWord::unlocked_value);
 520   movptr(Address(boxReg, 0), tmpReg);          // Anticipate successful CAS
 521   lock();
 522   cmpxchgptr(boxReg, Address(objReg, oopDesc::mark_offset_in_bytes()));      // Updates tmpReg
 523   if (counters != NULL) {
 524     cond_inc32(Assembler::equal,
 525                ExternalAddress((address)counters->fast_path_entry_count_addr()));
 526   }
 527   jcc(Assembler::equal, DONE_LABEL);           // Success
 528 
 529   // Recursive locking.
 530   // The object is stack-locked: markword contains stack pointer to BasicLock.
 531   // Locked by current thread if difference with current SP is less than one page.
 532   subptr(tmpReg, rsp);
 533   // Next instruction set ZFlag == 1 (Success) if difference is less then one page.
 534   andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) );
 535   movptr(Address(boxReg, 0), tmpReg);
 536   if (counters != NULL) {
 537     cond_inc32(Assembler::equal,
 538                ExternalAddress((address)counters->fast_path_entry_count_addr()));


















 539   }
 540   jmp(DONE_LABEL);
 541 
 542   bind(IsInflated);
 543   // The object is inflated. tmpReg contains pointer to ObjectMonitor* + markWord::monitor_value
 544 
 545 #if INCLUDE_RTM_OPT
 546   // Use the same RTM locking code in 32- and 64-bit VM.
 547   if (use_rtm) {
 548     rtm_inflated_locking(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg,
 549                          rtm_counters, method_data, profile_rtm, DONE_LABEL);
 550   } else {
 551 #endif // INCLUDE_RTM_OPT
 552 
 553 #ifndef _LP64
 554   // The object is inflated.
 555 
 556   // boxReg refers to the on-stack BasicLock in the current frame.
 557   // We'd like to write:
 558   //   set box->_displaced_header = markWord::unused_mark().  Any non-0 value suffices.
 559   // This is convenient but results a ST-before-CAS penalty.  The following CAS suffers
 560   // additional latency as we have another ST in the store buffer that must drain.
 561 
 562   // avoid ST-before-CAS
 563   // register juggle because we need tmpReg for cmpxchgptr below
 564   movptr(scrReg, boxReg);
 565   movptr(boxReg, tmpReg);                   // consider: LEA box, [tmp-2]
 566 
 567   // Optimistic form: consider XORL tmpReg,tmpReg
 568   movptr(tmpReg, NULL_WORD);
 569 
 570   // Appears unlocked - try to swing _owner from null to non-null.
 571   // Ideally, I'd manifest "Self" with get_thread and then attempt
 572   // to CAS the register containing Self into m->Owner.
 573   // But we don't have enough registers, so instead we can either try to CAS
 574   // rsp or the address of the box (in scr) into &m->owner.  If the CAS succeeds
 575   // we later store "Self" into m->Owner.  Transiently storing a stack address
 576   // (rsp or the address of the box) into  m->owner is harmless.
 577   // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
 578   lock();
 579   cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
 580   movptr(Address(scrReg, 0), 3);          // box->_displaced_header = 3
 581   // If we weren't able to swing _owner from NULL to the BasicLock
 582   // then take the slow path.
 583   jccb  (Assembler::notZero, DONE_LABEL);
 584   // update _owner from BasicLock to thread
 585   get_thread (scrReg);                    // beware: clobbers ICCs
 586   movptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), scrReg);
 587   xorptr(boxReg, boxReg);                 // set icc.ZFlag = 1 to indicate success
 588 
 589   // If the CAS fails we can either retry or pass control to the slow path.
 590   // We use the latter tactic.
 591   // Pass the CAS result in the icc.ZFlag into DONE_LABEL
 592   // If the CAS was successful ...
 593   //   Self has acquired the lock
 594   //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
 595   // Intentional fall-through into DONE_LABEL ...
 596 #else // _LP64
 597   // It's inflated and we use scrReg for ObjectMonitor* in this section.
 598   movq(scrReg, tmpReg);
 599   xorq(tmpReg, tmpReg);
 600   lock();
 601   cmpxchgptr(r15_thread, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
 602   // Unconditionally set box->_displaced_header = markWord::unused_mark().
 603   // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
 604   movptr(Address(boxReg, 0), (int32_t)intptr_t(markWord::unused_mark().value()));
 605   // Propagate ICC.ZF from CAS above into DONE_LABEL.
 606   jcc(Assembler::equal, DONE_LABEL);           // CAS above succeeded; propagate ZF = 1 (success)
 607 
 608   cmpptr(r15_thread, rax);                     // Check if we are already the owner (recursive lock)

 667   // Critically, the biased locking test must have precedence over
 668   // and appear before the (box->dhw == 0) recursive stack-lock test.
 669   if (UseBiasedLocking && !UseOptoBiasInlining) {
 670     biased_locking_exit(objReg, tmpReg, DONE_LABEL);
 671   }
 672 
 673 #if INCLUDE_RTM_OPT
 674   if (UseRTMForStackLocks && use_rtm) {
 675     assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
 676     Label L_regular_unlock;
 677     movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // fetch markword
 678     andptr(tmpReg, markWord::biased_lock_mask_in_place);              // look at 3 lock bits
 679     cmpptr(tmpReg, markWord::unlocked_value);                         // bits = 001 unlocked
 680     jccb(Assembler::notEqual, L_regular_unlock);                      // if !HLE RegularLock
 681     xend();                                                           // otherwise end...
 682     jmp(DONE_LABEL);                                                  // ... and we're done
 683     bind(L_regular_unlock);
 684   }
 685 #endif
 686 
 687   cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD);                   // Examine the displaced header
 688   jcc   (Assembler::zero, DONE_LABEL);                              // 0 indicates recursive stack-lock


 689   movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Examine the object's markword
 690   testptr(tmpReg, markWord::monitor_value);                         // Inflated?
 691   jccb  (Assembler::zero, Stacked);















 692 
 693   // It's inflated.
 694 #if INCLUDE_RTM_OPT
 695   if (use_rtm) {
 696     Label L_regular_inflated_unlock;
 697     int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
 698     movptr(boxReg, Address(tmpReg, owner_offset));
 699     testptr(boxReg, boxReg);
 700     jccb(Assembler::notZero, L_regular_inflated_unlock);
 701     xend();
 702     jmpb(DONE_LABEL);
 703     bind(L_regular_inflated_unlock);
 704   }
 705 #endif
 706 
 707   // Despite our balanced locking property we still check that m->_owner == Self
 708   // as java routines or native JNI code called by this thread might
 709   // have released the lock.
 710   // Refer to the comments in synchronizer.cpp for how we might encode extra
 711   // state in _succ so we can avoid fetching EntryList|cxq.

 722   // IA32's memory-model is SPO, so STs are ordered with respect to
 723   // each other and there's no need for an explicit barrier (fence).
 724   // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
 725 #ifndef _LP64
 726   get_thread (boxReg);
 727 
 728   // Note that we could employ various encoding schemes to reduce
 729   // the number of loads below (currently 4) to just 2 or 3.
 730   // Refer to the comments in synchronizer.cpp.
 731   // In practice the chain of fetches doesn't seem to impact performance, however.
 732   xorptr(boxReg, boxReg);
 733   orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
 734   jccb  (Assembler::notZero, DONE_LABEL);
 735   movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
 736   orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
 737   jccb  (Assembler::notZero, CheckSucc);
 738   movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
 739   jmpb  (DONE_LABEL);
 740 
 741   bind (Stacked);
 742   // It's not inflated and it's not recursively stack-locked and it's not biased.
 743   // It must be stack-locked.
 744   // Try to reset the header to displaced header.
 745   // The "box" value on the stack is stable, so we can reload
 746   // and be assured we observe the same value as above.
 747   movptr(tmpReg, Address(boxReg, 0));
 748   lock();
 749   cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box






 750   // Intention fall-thru into DONE_LABEL
 751 
 752   // DONE_LABEL is a hot target - we'd really like to place it at the
 753   // start of cache line by padding with NOPs.
 754   // See the AMD and Intel software optimization manuals for the
 755   // most efficient "long" NOP encodings.
 756   // Unfortunately none of our alignment mechanisms suffice.
 757   bind (CheckSucc);
 758 #else // _LP64
 759   // It's inflated
 760   Label LNotRecursive, LSuccess, LGoSlowPath;
 761 
 762   cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), 0);
 763   jccb(Assembler::equal, LNotRecursive);
 764 
 765   // Recursive inflated unlock
 766   decq(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
 767   jmpb(LSuccess);
 768 
 769   bind(LNotRecursive);

 814 
 815   // box is really RAX -- the following CMPXCHG depends on that binding
 816   // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
 817   lock();
 818   cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
 819   // There's no successor so we tried to regrab the lock.
 820   // If that didn't work, then another thread grabbed the
 821   // lock so we're done (and exit was a success).
 822   jccb  (Assembler::notEqual, LSuccess);
 823   // Intentional fall-through into slow path
 824 
 825   bind  (LGoSlowPath);
 826   orl   (boxReg, 1);                      // set ICC.ZF=0 to indicate failure
 827   jmpb  (DONE_LABEL);
 828 
 829   bind  (LSuccess);
 830   testl (boxReg, 0);                      // set ICC.ZF=1 to indicate success
 831   jmpb  (DONE_LABEL);
 832 
 833   bind  (Stacked);
 834   movptr(tmpReg, Address (boxReg, 0));      // re-fetch
 835   lock();
 836   cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box







 837 
 838 #endif
 839   bind(DONE_LABEL);
 840 }
 841 
 842 //-------------------------------------------------------------------------------------------
 843 // Generic instructions support for use in .ad files C2 code generation
 844 
 845 void C2_MacroAssembler::vabsnegd(int opcode, XMMRegister dst, XMMRegister src, Register scr) {
 846   if (dst != src) {
 847     movdqu(dst, src);
 848   }
 849   if (opcode == Op_AbsVD) {
 850     andpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_mask()), scr);
 851   } else {
 852     assert((opcode == Op_NegVD),"opcode should be Op_NegD");
 853     xorpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), scr);
 854   }
 855 }
 856 

   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/assembler.hpp"
  27 #include "asm/assembler.inline.hpp"
  28 #include "oops/methodData.hpp"
  29 #include "opto/c2_CodeStubs.hpp"
  30 #include "opto/c2_MacroAssembler.hpp"
  31 #include "opto/intrinsicnode.hpp"
  32 #include "opto/opcodes.hpp"
  33 #include "opto/output.hpp"
  34 #include "opto/subnode.hpp"
  35 #include "runtime/biasedLocking.hpp"
  36 #include "runtime/objectMonitor.hpp"
  37 #include "runtime/stubRoutines.hpp"
  38 
  39 inline Assembler::AvxVectorLen C2_MacroAssembler::vector_length_encoding(int vlen_in_bytes) {
  40   switch (vlen_in_bytes) {
  41     case  4: // fall-through
  42     case  8: // fall-through
  43     case 16: return Assembler::AVX_128bit;
  44     case 32: return Assembler::AVX_256bit;
  45     case 64: return Assembler::AVX_512bit;
  46 
  47     default: {
  48       ShouldNotReachHere();
  49       return Assembler::AVX_NoVec;
  50     }
  51   }
  52 }
  53 

 431 //    But beware of excessive branch density on AMD Opterons.
 432 //
 433 // *  Both fast_lock and fast_unlock set the ICC.ZF to indicate success
 434 //    or failure of the fast path.  If the fast path fails then we pass
 435 //    control to the slow path, typically in C.  In fast_lock and
 436 //    fast_unlock we often branch to DONE_LABEL, just to find that C2
 437 //    will emit a conditional branch immediately after the node.
 438 //    So we have branches to branches and lots of ICC.ZF games.
 439 //    Instead, it might be better to have C2 pass a "FailureLabel"
 440 //    into fast_lock and fast_unlock.  In the case of success, control
 441 //    will drop through the node.  ICC.ZF is undefined at exit.
 442 //    In the case of failure, the node will branch directly to the
 443 //    FailureLabel
 444 
 445 
 446 // obj: object to lock
 447 // box: on-stack box address (displaced header location) - KILLED
 448 // rax,: tmp -- KILLED
 449 // scr: tmp -- KILLED
 450 void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg,
 451                                  Register scrReg, Register cx1Reg, Register cx2Reg, Register thread,
 452                                  BiasedLockingCounters* counters,
 453                                  RTMLockingCounters* rtm_counters,
 454                                  RTMLockingCounters* stack_rtm_counters,
 455                                  Metadata* method_data,
 456                                  bool use_rtm, bool profile_rtm) {
 457   // Ensure the register assignments are disjoint
 458   assert(tmpReg == rax, "");
 459 
 460   if (use_rtm) {
 461     assert_different_registers(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg);
 462   } else {
 463     assert(cx2Reg == noreg, "");
 464     assert_different_registers(objReg, boxReg, tmpReg, scrReg);
 465   }
 466 
 467   if (counters != NULL) {
 468     atomic_incl(ExternalAddress((address)counters->total_entry_count_addr()), scrReg);
 469   }
 470 
 471   // Possible cases that we'll encounter in fast_lock

 500   // order to reduce the number of conditional branches in the most common cases.
 501   // Beware -- there's a subtle invariant that fetch of the markword
 502   // at [FETCH], below, will never observe a biased encoding (*101b).
 503   // If this invariant is not held we risk exclusion (safety) failure.
 504   if (UseBiasedLocking && !UseOptoBiasInlining) {
 505     biased_locking_enter(boxReg, objReg, tmpReg, scrReg, cx1Reg, false, DONE_LABEL, NULL, counters);
 506   }
 507 
 508 #if INCLUDE_RTM_OPT
 509   if (UseRTMForStackLocks && use_rtm) {
 510     rtm_stack_locking(objReg, tmpReg, scrReg, cx2Reg,
 511                       stack_rtm_counters, method_data, profile_rtm,
 512                       DONE_LABEL, IsInflated);
 513   }
 514 #endif // INCLUDE_RTM_OPT
 515 
 516   movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes()));          // [FETCH]
 517   testptr(tmpReg, markWord::monitor_value); // inflated vs stack-locked|neutral|biased
 518   jccb(Assembler::notZero, IsInflated);
 519 
 520   if (UseFastLocking) {
 521 #ifdef _LP64
 522     fast_lock_impl(objReg, tmpReg, thread, scrReg, DONE_LABEL, false);
 523     xorl(tmpReg, tmpReg); // Set ZF=1 to indicate success
 524 #else
 525     // We can not emit the lock-stack-check in verified_entry() because we don't have enough
 526     // registers (for thread ptr). Therefor we have to emit the lock-stack-check in
 527     // fast_lock_impl(). However, that check can take a slow-path with ZF=1, therefore
 528     // we need to handle it specially and force ZF=0 before taking the actual slow-path.
 529     Label slow;
 530     fast_lock_impl(objReg, tmpReg, thread, scrReg, slow);
 531     xorl(tmpReg, tmpReg);
 532     jmp(DONE_LABEL);
 533     bind(slow);
 534     testptr(objReg, objReg); // ZF=0 to indicate failure
 535 #endif
 536   } else {
 537     // Attempt stack-locking ...
 538     orptr (tmpReg, markWord::unlocked_value);
 539     movptr(Address(boxReg, 0), tmpReg);          // Anticipate successful CAS
 540     lock();
 541     cmpxchgptr(boxReg, Address(objReg, oopDesc::mark_offset_in_bytes()));      // Updates tmpReg
 542     if (counters != NULL) {
 543       cond_inc32(Assembler::equal,
 544                  ExternalAddress((address)counters->fast_path_entry_count_addr()));
 545     }
 546     jcc(Assembler::equal, DONE_LABEL);           // Success
 547 
 548     // Recursive locking.
 549     // The object is stack-locked: markword contains stack pointer to BasicLock.
 550     // Locked by current thread if difference with current SP is less than one page.
 551     subptr(tmpReg, rsp);
 552     // Next instruction set ZFlag == 1 (Success) if difference is less then one page.
 553     andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) );
 554     movptr(Address(boxReg, 0), tmpReg);
 555     if (counters != NULL) {
 556       cond_inc32(Assembler::equal,
 557                  ExternalAddress((address)counters->fast_path_entry_count_addr()));
 558     }
 559   }
 560   jmp(DONE_LABEL);
 561 
 562   bind(IsInflated);
 563   // The object is inflated. tmpReg contains pointer to ObjectMonitor* + markWord::monitor_value
 564 
 565 #if INCLUDE_RTM_OPT
 566   // Use the same RTM locking code in 32- and 64-bit VM.
 567   if (use_rtm) {
 568     rtm_inflated_locking(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg,
 569                          rtm_counters, method_data, profile_rtm, DONE_LABEL);
 570   } else {
 571 #endif // INCLUDE_RTM_OPT
 572 
 573 #ifndef _LP64
 574   // The object is inflated.
 575 
 576   // boxReg refers to the on-stack BasicLock in the current frame.
 577   // We'd like to write:
 578   //   set box->_displaced_header = markWord::unused_mark().  Any non-0 value suffices.
 579   // This is convenient but results a ST-before-CAS penalty.  The following CAS suffers
 580   // additional latency as we have another ST in the store buffer that must drain.
 581 
 582   // avoid ST-before-CAS
 583   // register juggle because we need tmpReg for cmpxchgptr below
 584   movptr(scrReg, boxReg);
 585   movptr(boxReg, tmpReg);                   // consider: LEA box, [tmp-2]
 586 
 587   // Optimistic form: consider XORL tmpReg,tmpReg
 588   movptr(tmpReg, NULL_WORD);
 589 
 590   // Appears unlocked - try to swing _owner from null to non-null.
 591   // Ideally, I'd manifest "Self" with get_thread and then attempt
 592   // to CAS the register containing Self into m->Owner.
 593   // But we don't have enough registers, so instead we can either try to CAS
 594   // rsp or the address of the box (in scr) into &m->owner.  If the CAS succeeds
 595   // we later store "Self" into m->Owner.  Transiently storing a stack address
 596   // (rsp or the address of the box) into  m->owner is harmless.
 597   // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
 598   lock();
 599   cmpxchgptr(thread, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
 600   movptr(Address(scrReg, 0), 3);          // box->_displaced_header = 3








 601   // If the CAS fails we can either retry or pass control to the slow path.
 602   // We use the latter tactic.
 603   // Pass the CAS result in the icc.ZFlag into DONE_LABEL
 604   // If the CAS was successful ...
 605   //   Self has acquired the lock
 606   //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
 607   // Intentional fall-through into DONE_LABEL ...
 608 #else // _LP64
 609   // It's inflated and we use scrReg for ObjectMonitor* in this section.
 610   movq(scrReg, tmpReg);
 611   xorq(tmpReg, tmpReg);
 612   lock();
 613   cmpxchgptr(r15_thread, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
 614   // Unconditionally set box->_displaced_header = markWord::unused_mark().
 615   // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
 616   movptr(Address(boxReg, 0), (int32_t)intptr_t(markWord::unused_mark().value()));
 617   // Propagate ICC.ZF from CAS above into DONE_LABEL.
 618   jcc(Assembler::equal, DONE_LABEL);           // CAS above succeeded; propagate ZF = 1 (success)
 619 
 620   cmpptr(r15_thread, rax);                     // Check if we are already the owner (recursive lock)

 679   // Critically, the biased locking test must have precedence over
 680   // and appear before the (box->dhw == 0) recursive stack-lock test.
 681   if (UseBiasedLocking && !UseOptoBiasInlining) {
 682     biased_locking_exit(objReg, tmpReg, DONE_LABEL);
 683   }
 684 
 685 #if INCLUDE_RTM_OPT
 686   if (UseRTMForStackLocks && use_rtm) {
 687     assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
 688     Label L_regular_unlock;
 689     movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // fetch markword
 690     andptr(tmpReg, markWord::biased_lock_mask_in_place);              // look at 3 lock bits
 691     cmpptr(tmpReg, markWord::unlocked_value);                         // bits = 001 unlocked
 692     jccb(Assembler::notEqual, L_regular_unlock);                      // if !HLE RegularLock
 693     xend();                                                           // otherwise end...
 694     jmp(DONE_LABEL);                                                  // ... and we're done
 695     bind(L_regular_unlock);
 696   }
 697 #endif
 698 
 699   if (!UseFastLocking) {
 700     cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD);                   // Examine the displaced header
 701     jcc   (Assembler::zero, DONE_LABEL);                              // 0 indicates recursive stack-lock
 702   }
 703   movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Examine the object's markword
 704   testptr(tmpReg, markWord::monitor_value);                         // Inflated?
 705   jcc(Assembler::zero, Stacked);
 706 
 707   if (UseFastLocking) {
 708     // If the owner is ANONYMOUS, we need to fix it.
 709     testb(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int) (intptr_t) ANONYMOUS_OWNER);
 710 #ifdef _LP64
 711     C2HandleAnonOMOwnerStub* stub = new (Compile::current()->comp_arena()) C2HandleAnonOMOwnerStub(tmpReg);
 712     Compile::current()->output()->add_stub(stub);
 713     jcc(Assembler::notEqual, stub->entry());
 714     bind(stub->continuation());
 715 #else
 716     // We can't easily implement this optimization on 32 bit because we don't have a thread register.
 717     // Call the slow-path instead.
 718     jcc(Assembler::notEqual, DONE_LABEL);
 719 #endif
 720   }
 721 
 722   // It's inflated.
 723 #if INCLUDE_RTM_OPT
 724   if (use_rtm) {
 725     Label L_regular_inflated_unlock;
 726     int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
 727     movptr(boxReg, Address(tmpReg, owner_offset));
 728     testptr(boxReg, boxReg);
 729     jccb(Assembler::notZero, L_regular_inflated_unlock);
 730     xend();
 731     jmpb(DONE_LABEL);
 732     bind(L_regular_inflated_unlock);
 733   }
 734 #endif
 735 
 736   // Despite our balanced locking property we still check that m->_owner == Self
 737   // as java routines or native JNI code called by this thread might
 738   // have released the lock.
 739   // Refer to the comments in synchronizer.cpp for how we might encode extra
 740   // state in _succ so we can avoid fetching EntryList|cxq.

 751   // IA32's memory-model is SPO, so STs are ordered with respect to
 752   // each other and there's no need for an explicit barrier (fence).
 753   // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
 754 #ifndef _LP64
 755   get_thread (boxReg);
 756 
 757   // Note that we could employ various encoding schemes to reduce
 758   // the number of loads below (currently 4) to just 2 or 3.
 759   // Refer to the comments in synchronizer.cpp.
 760   // In practice the chain of fetches doesn't seem to impact performance, however.
 761   xorptr(boxReg, boxReg);
 762   orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
 763   jccb  (Assembler::notZero, DONE_LABEL);
 764   movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
 765   orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
 766   jccb  (Assembler::notZero, CheckSucc);
 767   movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
 768   jmpb  (DONE_LABEL);
 769 
 770   bind (Stacked);
 771   if (UseFastLocking) {
 772     mov(boxReg, tmpReg);
 773     fast_unlock_impl(objReg, boxReg, tmpReg, DONE_LABEL);
 774     xorl(tmpReg, tmpReg);
 775   } else {
 776     // It's not inflated and it's not recursively stack-locked and it's not biased.
 777     // It must be stack-locked.
 778     // Try to reset the header to displaced header.
 779     // The "box" value on the stack is stable, so we can reload
 780     // and be assured we observe the same value as above.
 781     movptr(tmpReg, Address(boxReg, 0));
 782     lock();
 783     cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
 784   }
 785   // Intention fall-thru into DONE_LABEL
 786 
 787   // DONE_LABEL is a hot target - we'd really like to place it at the
 788   // start of cache line by padding with NOPs.
 789   // See the AMD and Intel software optimization manuals for the
 790   // most efficient "long" NOP encodings.
 791   // Unfortunately none of our alignment mechanisms suffice.
 792   bind (CheckSucc);
 793 #else // _LP64
 794   // It's inflated
 795   Label LNotRecursive, LSuccess, LGoSlowPath;
 796 
 797   cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), 0);
 798   jccb(Assembler::equal, LNotRecursive);
 799 
 800   // Recursive inflated unlock
 801   decq(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
 802   jmpb(LSuccess);
 803 
 804   bind(LNotRecursive);

 849 
 850   // box is really RAX -- the following CMPXCHG depends on that binding
 851   // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
 852   lock();
 853   cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
 854   // There's no successor so we tried to regrab the lock.
 855   // If that didn't work, then another thread grabbed the
 856   // lock so we're done (and exit was a success).
 857   jccb  (Assembler::notEqual, LSuccess);
 858   // Intentional fall-through into slow path
 859 
 860   bind  (LGoSlowPath);
 861   orl   (boxReg, 1);                      // set ICC.ZF=0 to indicate failure
 862   jmpb  (DONE_LABEL);
 863 
 864   bind  (LSuccess);
 865   testl (boxReg, 0);                      // set ICC.ZF=1 to indicate success
 866   jmpb  (DONE_LABEL);
 867 
 868   bind  (Stacked);
 869 
 870   if (UseFastLocking) {
 871     mov(boxReg, tmpReg);
 872     fast_unlock_impl(objReg, boxReg, tmpReg, DONE_LABEL);
 873     xorl(tmpReg, tmpReg);
 874   } else {
 875     movptr(tmpReg, Address (boxReg, 0));      // re-fetch
 876     lock();
 877     cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
 878   }
 879 
 880 #endif
 881   bind(DONE_LABEL);
 882 }
 883 
 884 //-------------------------------------------------------------------------------------------
 885 // Generic instructions support for use in .ad files C2 code generation
 886 
 887 void C2_MacroAssembler::vabsnegd(int opcode, XMMRegister dst, XMMRegister src, Register scr) {
 888   if (dst != src) {
 889     movdqu(dst, src);
 890   }
 891   if (opcode == Op_AbsVD) {
 892     andpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_mask()), scr);
 893   } else {
 894     assert((opcode == Op_NegVD),"opcode should be Op_NegD");
 895     xorpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), scr);
 896   }
 897 }
 898 
< prev index next >