< prev index next >

src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp

Print this page

   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/assembler.hpp"
  27 #include "asm/assembler.inline.hpp"
  28 #include "oops/methodData.hpp"

  29 #include "opto/c2_MacroAssembler.hpp"
  30 #include "opto/intrinsicnode.hpp"
  31 #include "opto/opcodes.hpp"

  32 #include "opto/subnode.hpp"
  33 #include "runtime/biasedLocking.hpp"
  34 #include "runtime/objectMonitor.hpp"
  35 #include "runtime/stubRoutines.hpp"
  36 
  37 inline Assembler::AvxVectorLen C2_MacroAssembler::vector_length_encoding(int vlen_in_bytes) {
  38   switch (vlen_in_bytes) {
  39     case  4: // fall-through
  40     case  8: // fall-through
  41     case 16: return Assembler::AVX_128bit;
  42     case 32: return Assembler::AVX_256bit;
  43     case 64: return Assembler::AVX_512bit;
  44 
  45     default: {
  46       ShouldNotReachHere();
  47       return Assembler::AVX_NoVec;
  48     }
  49   }
  50 }
  51 

 429 //    But beware of excessive branch density on AMD Opterons.
 430 //
 431 // *  Both fast_lock and fast_unlock set the ICC.ZF to indicate success
 432 //    or failure of the fast path.  If the fast path fails then we pass
 433 //    control to the slow path, typically in C.  In fast_lock and
 434 //    fast_unlock we often branch to DONE_LABEL, just to find that C2
 435 //    will emit a conditional branch immediately after the node.
 436 //    So we have branches to branches and lots of ICC.ZF games.
 437 //    Instead, it might be better to have C2 pass a "FailureLabel"
 438 //    into fast_lock and fast_unlock.  In the case of success, control
 439 //    will drop through the node.  ICC.ZF is undefined at exit.
 440 //    In the case of failure, the node will branch directly to the
 441 //    FailureLabel
 442 
 443 
 444 // obj: object to lock
 445 // box: on-stack box address (displaced header location) - KILLED
 446 // rax,: tmp -- KILLED
 447 // scr: tmp -- KILLED
 448 void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg,
 449                                  Register scrReg, Register cx1Reg, Register cx2Reg,
 450                                  BiasedLockingCounters* counters,
 451                                  RTMLockingCounters* rtm_counters,
 452                                  RTMLockingCounters* stack_rtm_counters,
 453                                  Metadata* method_data,
 454                                  bool use_rtm, bool profile_rtm) {
 455   // Ensure the register assignments are disjoint
 456   assert(tmpReg == rax, "");
 457 
 458   if (use_rtm) {
 459     assert_different_registers(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg);
 460   } else {
 461     assert(cx2Reg == noreg, "");
 462     assert_different_registers(objReg, boxReg, tmpReg, scrReg);
 463   }
 464 
 465   if (counters != NULL) {
 466     atomic_incl(ExternalAddress((address)counters->total_entry_count_addr()), scrReg);
 467   }
 468 
 469   // Possible cases that we'll encounter in fast_lock

 496   // it's stack-locked, biased or neutral
 497   // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
 498   // order to reduce the number of conditional branches in the most common cases.
 499   // Beware -- there's a subtle invariant that fetch of the markword
 500   // at [FETCH], below, will never observe a biased encoding (*101b).
 501   // If this invariant is not held we risk exclusion (safety) failure.
 502   if (UseBiasedLocking && !UseOptoBiasInlining) {
 503     biased_locking_enter(boxReg, objReg, tmpReg, scrReg, cx1Reg, false, DONE_LABEL, NULL, counters);
 504   }
 505 
 506 #if INCLUDE_RTM_OPT
 507   if (UseRTMForStackLocks && use_rtm) {
 508     rtm_stack_locking(objReg, tmpReg, scrReg, cx2Reg,
 509                       stack_rtm_counters, method_data, profile_rtm,
 510                       DONE_LABEL, IsInflated);
 511   }
 512 #endif // INCLUDE_RTM_OPT
 513 
 514   movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes()));          // [FETCH]
 515   testptr(tmpReg, markWord::monitor_value); // inflated vs stack-locked|neutral|biased
 516   jccb(Assembler::notZero, IsInflated);
 517 
 518   // Attempt stack-locking ...
 519   orptr (tmpReg, markWord::unlocked_value);
 520   movptr(Address(boxReg, 0), tmpReg);          // Anticipate successful CAS
 521   lock();
 522   cmpxchgptr(boxReg, Address(objReg, oopDesc::mark_offset_in_bytes()));      // Updates tmpReg
 523   if (counters != NULL) {
 524     cond_inc32(Assembler::equal,
 525                ExternalAddress((address)counters->fast_path_entry_count_addr()));
 526   }
 527   jcc(Assembler::equal, DONE_LABEL);           // Success
 528 
 529   // Recursive locking.
 530   // The object is stack-locked: markword contains stack pointer to BasicLock.
 531   // Locked by current thread if difference with current SP is less than one page.
 532   subptr(tmpReg, rsp);
 533   // Next instruction set ZFlag == 1 (Success) if difference is less then one page.
 534   andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) );
 535   movptr(Address(boxReg, 0), tmpReg);
 536   if (counters != NULL) {
 537     cond_inc32(Assembler::equal,
 538                ExternalAddress((address)counters->fast_path_entry_count_addr()));









 539   }
 540   jmp(DONE_LABEL);
 541 
 542   bind(IsInflated);
 543   // The object is inflated. tmpReg contains pointer to ObjectMonitor* + markWord::monitor_value
 544 
 545 #if INCLUDE_RTM_OPT
 546   // Use the same RTM locking code in 32- and 64-bit VM.
 547   if (use_rtm) {
 548     rtm_inflated_locking(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg,
 549                          rtm_counters, method_data, profile_rtm, DONE_LABEL);
 550   } else {
 551 #endif // INCLUDE_RTM_OPT
 552 
 553 #ifndef _LP64
 554   // The object is inflated.
 555 
 556   // boxReg refers to the on-stack BasicLock in the current frame.
 557   // We'd like to write:
 558   //   set box->_displaced_header = markWord::unused_mark().  Any non-0 value suffices.

 667   // Critically, the biased locking test must have precedence over
 668   // and appear before the (box->dhw == 0) recursive stack-lock test.
 669   if (UseBiasedLocking && !UseOptoBiasInlining) {
 670     biased_locking_exit(objReg, tmpReg, DONE_LABEL);
 671   }
 672 
 673 #if INCLUDE_RTM_OPT
 674   if (UseRTMForStackLocks && use_rtm) {
 675     assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
 676     Label L_regular_unlock;
 677     movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // fetch markword
 678     andptr(tmpReg, markWord::biased_lock_mask_in_place);              // look at 3 lock bits
 679     cmpptr(tmpReg, markWord::unlocked_value);                         // bits = 001 unlocked
 680     jccb(Assembler::notEqual, L_regular_unlock);                      // if !HLE RegularLock
 681     xend();                                                           // otherwise end...
 682     jmp(DONE_LABEL);                                                  // ... and we're done
 683     bind(L_regular_unlock);
 684   }
 685 #endif
 686 
 687   cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD);                   // Examine the displaced header
 688   jcc   (Assembler::zero, DONE_LABEL);                              // 0 indicates recursive stack-lock


 689   movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Examine the object's markword
 690   testptr(tmpReg, markWord::monitor_value);                         // Inflated?
 691   jccb  (Assembler::zero, Stacked);


 692 
 693   // It's inflated.


















 694 #if INCLUDE_RTM_OPT
 695   if (use_rtm) {
 696     Label L_regular_inflated_unlock;
 697     int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
 698     movptr(boxReg, Address(tmpReg, owner_offset));
 699     testptr(boxReg, boxReg);
 700     jccb(Assembler::notZero, L_regular_inflated_unlock);
 701     xend();
 702     jmpb(DONE_LABEL);
 703     bind(L_regular_inflated_unlock);
 704   }
 705 #endif
 706 
 707   // Despite our balanced locking property we still check that m->_owner == Self
 708   // as java routines or native JNI code called by this thread might
 709   // have released the lock.
 710   // Refer to the comments in synchronizer.cpp for how we might encode extra
 711   // state in _succ so we can avoid fetching EntryList|cxq.
 712   //
 713   // If there's no contention try a 1-0 exit.  That is, exit without
 714   // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
 715   // we detect and recover from the race that the 1-0 exit admits.
 716   //
 717   // Conceptually fast_unlock() must execute a STST|LDST "release" barrier
 718   // before it STs null into _owner, releasing the lock.  Updates
 719   // to data protected by the critical section must be visible before
 720   // we drop the lock (and thus before any other thread could acquire
 721   // the lock and observe the fields protected by the lock).
 722   // IA32's memory-model is SPO, so STs are ordered with respect to
 723   // each other and there's no need for an explicit barrier (fence).
 724   // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
 725 #ifndef _LP64
 726   get_thread (boxReg);
 727 
 728   // Note that we could employ various encoding schemes to reduce
 729   // the number of loads below (currently 4) to just 2 or 3.
 730   // Refer to the comments in synchronizer.cpp.
 731   // In practice the chain of fetches doesn't seem to impact performance, however.
 732   xorptr(boxReg, boxReg);
 733   orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
 734   jccb  (Assembler::notZero, DONE_LABEL);
 735   movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
 736   orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
 737   jccb  (Assembler::notZero, CheckSucc);
 738   movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
 739   jmpb  (DONE_LABEL);
 740 
 741   bind (Stacked);
 742   // It's not inflated and it's not recursively stack-locked and it's not biased.
 743   // It must be stack-locked.
 744   // Try to reset the header to displaced header.
 745   // The "box" value on the stack is stable, so we can reload
 746   // and be assured we observe the same value as above.
 747   movptr(tmpReg, Address(boxReg, 0));
 748   lock();
 749   cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
 750   // Intention fall-thru into DONE_LABEL
 751 
 752   // DONE_LABEL is a hot target - we'd really like to place it at the
 753   // start of cache line by padding with NOPs.
 754   // See the AMD and Intel software optimization manuals for the
 755   // most efficient "long" NOP encodings.
 756   // Unfortunately none of our alignment mechanisms suffice.
 757   bind (CheckSucc);
 758 #else // _LP64
 759   // It's inflated
 760   Label LNotRecursive, LSuccess, LGoSlowPath;
 761 
 762   cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), 0);
 763   jccb(Assembler::equal, LNotRecursive);
 764 
 765   // Recursive inflated unlock
 766   decq(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
 767   jmpb(LSuccess);
 768 
 769   bind(LNotRecursive);

 813   // length while by virtue of passing control into the slow path.
 814 
 815   // box is really RAX -- the following CMPXCHG depends on that binding
 816   // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
 817   lock();
 818   cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
 819   // There's no successor so we tried to regrab the lock.
 820   // If that didn't work, then another thread grabbed the
 821   // lock so we're done (and exit was a success).
 822   jccb  (Assembler::notEqual, LSuccess);
 823   // Intentional fall-through into slow path
 824 
 825   bind  (LGoSlowPath);
 826   orl   (boxReg, 1);                      // set ICC.ZF=0 to indicate failure
 827   jmpb  (DONE_LABEL);
 828 
 829   bind  (LSuccess);
 830   testl (boxReg, 0);                      // set ICC.ZF=1 to indicate success
 831   jmpb  (DONE_LABEL);
 832 
 833   bind  (Stacked);
 834   movptr(tmpReg, Address (boxReg, 0));      // re-fetch
 835   lock();
 836   cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
 837 
 838 #endif












 839   bind(DONE_LABEL);
 840 }
 841 
 842 //-------------------------------------------------------------------------------------------
 843 // Generic instructions support for use in .ad files C2 code generation
 844 
 845 void C2_MacroAssembler::vabsnegd(int opcode, XMMRegister dst, XMMRegister src, Register scr) {
 846   if (dst != src) {
 847     movdqu(dst, src);
 848   }
 849   if (opcode == Op_AbsVD) {
 850     andpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_mask()), scr);
 851   } else {
 852     assert((opcode == Op_NegVD),"opcode should be Op_NegD");
 853     xorpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), scr);
 854   }
 855 }
 856 
 857 void C2_MacroAssembler::vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr) {
 858   if (opcode == Op_AbsVD) {

3957   evpcmpb(ktmp, ktmp, shuffle, xtmp2, Assembler::lt, true, vlen_enc);
3958   evshufi64x2(xtmp3, src, src, 0x55, vlen_enc);
3959   evpshufb(dst, ktmp, xtmp3, shuffle, true, vlen_enc);
3960 
3961   // Perform above steps with lane comparison expression as INDEX >= 32 && INDEX < 48
3962   // and broadcasting third 128 bit lane.
3963   evpcmpb(ktmp, k0, shuffle,  xtmp2, Assembler::nlt, true, vlen_enc);
3964   vpaddb(xtmp1, xtmp1, xtmp2, vlen_enc);
3965   evpcmpb(ktmp, ktmp, shuffle,  xtmp1, Assembler::lt, true, vlen_enc);
3966   evshufi64x2(xtmp3, src, src, 0xAA, vlen_enc);
3967   evpshufb(dst, ktmp, xtmp3, shuffle, true, vlen_enc);
3968 
3969   // Perform above steps with lane comparison expression as INDEX >= 48 && INDEX < 64
3970   // and broadcasting third 128 bit lane.
3971   evpcmpb(ktmp, k0, shuffle,  xtmp1, Assembler::nlt, true, vlen_enc);
3972   vpsllq(xtmp2, xtmp2, 0x1, vlen_enc);
3973   evpcmpb(ktmp, ktmp, shuffle,  xtmp2, Assembler::lt, true, vlen_enc);
3974   evshufi64x2(xtmp3, src, src, 0xFF, vlen_enc);
3975   evpshufb(dst, ktmp, xtmp3, shuffle, true, vlen_enc);
3976 }























   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/assembler.hpp"
  27 #include "asm/assembler.inline.hpp"
  28 #include "oops/methodData.hpp"
  29 #include "opto/c2_CodeStubs.hpp"
  30 #include "opto/c2_MacroAssembler.hpp"
  31 #include "opto/intrinsicnode.hpp"
  32 #include "opto/opcodes.hpp"
  33 #include "opto/output.hpp"
  34 #include "opto/subnode.hpp"
  35 #include "runtime/biasedLocking.hpp"
  36 #include "runtime/objectMonitor.hpp"
  37 #include "runtime/stubRoutines.hpp"
  38 
  39 inline Assembler::AvxVectorLen C2_MacroAssembler::vector_length_encoding(int vlen_in_bytes) {
  40   switch (vlen_in_bytes) {
  41     case  4: // fall-through
  42     case  8: // fall-through
  43     case 16: return Assembler::AVX_128bit;
  44     case 32: return Assembler::AVX_256bit;
  45     case 64: return Assembler::AVX_512bit;
  46 
  47     default: {
  48       ShouldNotReachHere();
  49       return Assembler::AVX_NoVec;
  50     }
  51   }
  52 }
  53 

 431 //    But beware of excessive branch density on AMD Opterons.
 432 //
 433 // *  Both fast_lock and fast_unlock set the ICC.ZF to indicate success
 434 //    or failure of the fast path.  If the fast path fails then we pass
 435 //    control to the slow path, typically in C.  In fast_lock and
 436 //    fast_unlock we often branch to DONE_LABEL, just to find that C2
 437 //    will emit a conditional branch immediately after the node.
 438 //    So we have branches to branches and lots of ICC.ZF games.
 439 //    Instead, it might be better to have C2 pass a "FailureLabel"
 440 //    into fast_lock and fast_unlock.  In the case of success, control
 441 //    will drop through the node.  ICC.ZF is undefined at exit.
 442 //    In the case of failure, the node will branch directly to the
 443 //    FailureLabel
 444 
 445 
 446 // obj: object to lock
 447 // box: on-stack box address (displaced header location) - KILLED
 448 // rax,: tmp -- KILLED
 449 // scr: tmp -- KILLED
 450 void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg,
 451                                  Register scrReg, Register cx1Reg, Register cx2Reg, Register thread,
 452                                  BiasedLockingCounters* counters,
 453                                  RTMLockingCounters* rtm_counters,
 454                                  RTMLockingCounters* stack_rtm_counters,
 455                                  Metadata* method_data,
 456                                  bool use_rtm, bool profile_rtm) {
 457   // Ensure the register assignments are disjoint
 458   assert(tmpReg == rax, "");
 459 
 460   if (use_rtm) {
 461     assert_different_registers(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg);
 462   } else {
 463     assert(cx2Reg == noreg, "");
 464     assert_different_registers(objReg, boxReg, tmpReg, scrReg);
 465   }
 466 
 467   if (counters != NULL) {
 468     atomic_incl(ExternalAddress((address)counters->total_entry_count_addr()), scrReg);
 469   }
 470 
 471   // Possible cases that we'll encounter in fast_lock

 498   // it's stack-locked, biased or neutral
 499   // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
 500   // order to reduce the number of conditional branches in the most common cases.
 501   // Beware -- there's a subtle invariant that fetch of the markword
 502   // at [FETCH], below, will never observe a biased encoding (*101b).
 503   // If this invariant is not held we risk exclusion (safety) failure.
 504   if (UseBiasedLocking && !UseOptoBiasInlining) {
 505     biased_locking_enter(boxReg, objReg, tmpReg, scrReg, cx1Reg, false, DONE_LABEL, NULL, counters);
 506   }
 507 
 508 #if INCLUDE_RTM_OPT
 509   if (UseRTMForStackLocks && use_rtm) {
 510     rtm_stack_locking(objReg, tmpReg, scrReg, cx2Reg,
 511                       stack_rtm_counters, method_data, profile_rtm,
 512                       DONE_LABEL, IsInflated);
 513   }
 514 #endif // INCLUDE_RTM_OPT
 515 
 516   movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes()));          // [FETCH]
 517   testptr(tmpReg, markWord::monitor_value); // inflated vs stack-locked|neutral|biased
 518   jcc(Assembler::notZero, IsInflated);
 519 
 520   if (LockingMode == LM_MONITOR) {
 521     // Clear ZF so that we take the slow path at the DONE label. objReg is known to be not 0.
 522     testptr(objReg, objReg);
 523   } else if (LockingMode == LM_LEGACY) {
 524     // Attempt stack-locking ...
 525     orptr (tmpReg, markWord::unlocked_value);
 526     movptr(Address(boxReg, 0), tmpReg);          // Anticipate successful CAS
 527     lock();
 528     cmpxchgptr(boxReg, Address(objReg, oopDesc::mark_offset_in_bytes()));      // Updates tmpReg
 529     if (counters != NULL) {
 530       cond_inc32(Assembler::equal,
 531                  ExternalAddress((address)counters->fast_path_entry_count_addr()));
 532     }
 533     jcc(Assembler::equal, DONE_LABEL);           // Success
 534 
 535     // Recursive locking.
 536     // The object is stack-locked: markword contains stack pointer to BasicLock.
 537     // Locked by current thread if difference with current SP is less than one page.
 538     subptr(tmpReg, rsp);
 539     // Next instruction set ZFlag == 1 (Success) if difference is less then one page.
 540     andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) );
 541     movptr(Address(boxReg, 0), tmpReg);
 542     if (counters != NULL) {
 543       cond_inc32(Assembler::equal,
 544                  ExternalAddress((address)counters->fast_path_entry_count_addr()));
 545     }
 546   } else {
 547     assert(LockingMode == LM_LIGHTWEIGHT, "");
 548     lightweight_lock(objReg, tmpReg, thread, scrReg, DONE_LABEL);
 549     xorl(tmpReg, tmpReg); // Set ZF=1 to indicate success
 550   }
 551   jmp(DONE_LABEL);
 552 
 553   bind(IsInflated);
 554   // The object is inflated. tmpReg contains pointer to ObjectMonitor* + markWord::monitor_value
 555 
 556 #if INCLUDE_RTM_OPT
 557   // Use the same RTM locking code in 32- and 64-bit VM.
 558   if (use_rtm) {
 559     rtm_inflated_locking(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg,
 560                          rtm_counters, method_data, profile_rtm, DONE_LABEL);
 561   } else {
 562 #endif // INCLUDE_RTM_OPT
 563 
 564 #ifndef _LP64
 565   // The object is inflated.
 566 
 567   // boxReg refers to the on-stack BasicLock in the current frame.
 568   // We'd like to write:
 569   //   set box->_displaced_header = markWord::unused_mark().  Any non-0 value suffices.

 678   // Critically, the biased locking test must have precedence over
 679   // and appear before the (box->dhw == 0) recursive stack-lock test.
 680   if (UseBiasedLocking && !UseOptoBiasInlining) {
 681     biased_locking_exit(objReg, tmpReg, DONE_LABEL);
 682   }
 683 
 684 #if INCLUDE_RTM_OPT
 685   if (UseRTMForStackLocks && use_rtm) {
 686     assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
 687     Label L_regular_unlock;
 688     movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // fetch markword
 689     andptr(tmpReg, markWord::biased_lock_mask_in_place);              // look at 3 lock bits
 690     cmpptr(tmpReg, markWord::unlocked_value);                         // bits = 001 unlocked
 691     jccb(Assembler::notEqual, L_regular_unlock);                      // if !HLE RegularLock
 692     xend();                                                           // otherwise end...
 693     jmp(DONE_LABEL);                                                  // ... and we're done
 694     bind(L_regular_unlock);
 695   }
 696 #endif
 697 
 698   if (LockingMode == LM_LEGACY) {
 699     cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD);                   // Examine the displaced header
 700     jcc   (Assembler::zero, DONE_LABEL);                              // 0 indicates recursive stack-lock
 701   }
 702   movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Examine the object's markword
 703   if (LockingMode != LM_MONITOR) {
 704     testptr(tmpReg, markWord::monitor_value);                         // Inflated?
 705     jcc(Assembler::zero, Stacked);
 706   }
 707 
 708   // It's inflated.
 709   if (LockingMode == LM_LIGHTWEIGHT) {
 710     // If the owner is ANONYMOUS, we need to fix it.
 711     testb(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t) ObjectMonitor::ANONYMOUS_OWNER);
 712 #ifdef _LP64
 713     if (!Compile::current()->output()->in_scratch_emit_size()) {
 714       C2HandleAnonOMOwnerStub* stub = new (Compile::current()->comp_arena()) C2HandleAnonOMOwnerStub(tmpReg, boxReg);
 715       Compile::current()->output()->add_stub(stub);
 716       jcc(Assembler::notEqual, stub->entry());
 717       bind(stub->continuation());
 718     } else
 719 #endif
 720     {
 721       // We can't easily implement this optimization on 32 bit because we don't have a thread register.
 722       // Call the slow-path instead.
 723       jcc(Assembler::notEqual, DONE_LABEL);
 724     }
 725   }
 726 
 727 #if INCLUDE_RTM_OPT
 728   if (use_rtm) {
 729     Label L_regular_inflated_unlock;
 730     int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
 731     movptr(boxReg, Address(tmpReg, owner_offset));
 732     testptr(boxReg, boxReg);
 733     jccb(Assembler::notZero, L_regular_inflated_unlock);
 734     xend();
 735     jmp(DONE_LABEL);
 736     bind(L_regular_inflated_unlock);
 737   }
 738 #endif
 739 
 740   // Despite our balanced locking property we still check that m->_owner == Self
 741   // as java routines or native JNI code called by this thread might
 742   // have released the lock.
 743   // Refer to the comments in synchronizer.cpp for how we might encode extra
 744   // state in _succ so we can avoid fetching EntryList|cxq.
 745   //
 746   // If there's no contention try a 1-0 exit.  That is, exit without
 747   // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
 748   // we detect and recover from the race that the 1-0 exit admits.
 749   //
 750   // Conceptually fast_unlock() must execute a STST|LDST "release" barrier
 751   // before it STs null into _owner, releasing the lock.  Updates
 752   // to data protected by the critical section must be visible before
 753   // we drop the lock (and thus before any other thread could acquire
 754   // the lock and observe the fields protected by the lock).
 755   // IA32's memory-model is SPO, so STs are ordered with respect to
 756   // each other and there's no need for an explicit barrier (fence).
 757   // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
 758 #ifndef _LP64
 759   get_thread (boxReg);
 760 
 761   // Note that we could employ various encoding schemes to reduce
 762   // the number of loads below (currently 4) to just 2 or 3.
 763   // Refer to the comments in synchronizer.cpp.
 764   // In practice the chain of fetches doesn't seem to impact performance, however.
 765   xorptr(boxReg, boxReg);
 766   orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
 767   jccb  (Assembler::notZero, DONE_LABEL);
 768   movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
 769   orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
 770   jccb  (Assembler::notZero, DONE_LABEL);
 771   movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
 772   jmpb  (DONE_LABEL);
 773 









 774   // Intention fall-thru into DONE_LABEL
 775 
 776   // DONE_LABEL is a hot target - we'd really like to place it at the
 777   // start of cache line by padding with NOPs.
 778   // See the AMD and Intel software optimization manuals for the
 779   // most efficient "long" NOP encodings.
 780   // Unfortunately none of our alignment mechanisms suffice.
 781   bind (CheckSucc);
 782 #else // _LP64
 783   // It's inflated
 784   Label LNotRecursive, LSuccess, LGoSlowPath;
 785 
 786   cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), 0);
 787   jccb(Assembler::equal, LNotRecursive);
 788 
 789   // Recursive inflated unlock
 790   decq(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
 791   jmpb(LSuccess);
 792 
 793   bind(LNotRecursive);

 837   // length while by virtue of passing control into the slow path.
 838 
 839   // box is really RAX -- the following CMPXCHG depends on that binding
 840   // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
 841   lock();
 842   cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
 843   // There's no successor so we tried to regrab the lock.
 844   // If that didn't work, then another thread grabbed the
 845   // lock so we're done (and exit was a success).
 846   jccb  (Assembler::notEqual, LSuccess);
 847   // Intentional fall-through into slow path
 848 
 849   bind  (LGoSlowPath);
 850   orl   (boxReg, 1);                      // set ICC.ZF=0 to indicate failure
 851   jmpb  (DONE_LABEL);
 852 
 853   bind  (LSuccess);
 854   testl (boxReg, 0);                      // set ICC.ZF=1 to indicate success
 855   jmpb  (DONE_LABEL);
 856 





 857 #endif
 858   if (LockingMode != LM_MONITOR) {
 859     bind  (Stacked);
 860     if (LockingMode == LM_LIGHTWEIGHT) {
 861       mov(boxReg, tmpReg);
 862       lightweight_unlock(objReg, boxReg, tmpReg, DONE_LABEL);
 863       xorl(tmpReg, tmpReg);
 864     } else if (LockingMode == LM_LEGACY) {
 865       movptr(tmpReg, Address (boxReg, 0));      // re-fetch
 866       lock();
 867       cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
 868     }
 869   }
 870   bind(DONE_LABEL);
 871 }
 872 
 873 //-------------------------------------------------------------------------------------------
 874 // Generic instructions support for use in .ad files C2 code generation
 875 
 876 void C2_MacroAssembler::vabsnegd(int opcode, XMMRegister dst, XMMRegister src, Register scr) {
 877   if (dst != src) {
 878     movdqu(dst, src);
 879   }
 880   if (opcode == Op_AbsVD) {
 881     andpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_mask()), scr);
 882   } else {
 883     assert((opcode == Op_NegVD),"opcode should be Op_NegD");
 884     xorpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), scr);
 885   }
 886 }
 887 
 888 void C2_MacroAssembler::vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr) {
 889   if (opcode == Op_AbsVD) {

3988   evpcmpb(ktmp, ktmp, shuffle, xtmp2, Assembler::lt, true, vlen_enc);
3989   evshufi64x2(xtmp3, src, src, 0x55, vlen_enc);
3990   evpshufb(dst, ktmp, xtmp3, shuffle, true, vlen_enc);
3991 
3992   // Perform above steps with lane comparison expression as INDEX >= 32 && INDEX < 48
3993   // and broadcasting third 128 bit lane.
3994   evpcmpb(ktmp, k0, shuffle,  xtmp2, Assembler::nlt, true, vlen_enc);
3995   vpaddb(xtmp1, xtmp1, xtmp2, vlen_enc);
3996   evpcmpb(ktmp, ktmp, shuffle,  xtmp1, Assembler::lt, true, vlen_enc);
3997   evshufi64x2(xtmp3, src, src, 0xAA, vlen_enc);
3998   evpshufb(dst, ktmp, xtmp3, shuffle, true, vlen_enc);
3999 
4000   // Perform above steps with lane comparison expression as INDEX >= 48 && INDEX < 64
4001   // and broadcasting third 128 bit lane.
4002   evpcmpb(ktmp, k0, shuffle,  xtmp1, Assembler::nlt, true, vlen_enc);
4003   vpsllq(xtmp2, xtmp2, 0x1, vlen_enc);
4004   evpcmpb(ktmp, ktmp, shuffle,  xtmp2, Assembler::lt, true, vlen_enc);
4005   evshufi64x2(xtmp3, src, src, 0xFF, vlen_enc);
4006   evpshufb(dst, ktmp, xtmp3, shuffle, true, vlen_enc);
4007 }
4008 
4009 #ifdef _LP64
4010 void C2_MacroAssembler::load_nklass_compact_c2(Register dst, Register obj, Register index, Address::ScaleFactor scale, int disp) {
4011   C2LoadNKlassStub* stub = new (Compile::current()->comp_arena()) C2LoadNKlassStub(dst);
4012   Compile::current()->output()->add_stub(stub);
4013 
4014   // Note: Don't clobber obj anywhere in that method!
4015 
4016   // The incoming address is pointing into obj-start + klass_offset_in_bytes. We need to extract
4017   // obj-start, so that we can load from the object's mark-word instead. Usually the address
4018   // comes as obj-start in obj and klass_offset_in_bytes in disp. However, sometimes C2
4019   // emits code that pre-computes obj-start + klass_offset_in_bytes into a register, and
4020   // then passes that register as obj and 0 in disp. The following code extracts the base
4021   // and offset to load the mark-word.
4022   int offset = oopDesc::mark_offset_in_bytes() + disp - oopDesc::klass_offset_in_bytes();
4023   movq(dst, Address(obj, index, scale, offset));
4024   testb(dst, markWord::monitor_value);
4025   jcc(Assembler::notZero, stub->entry());
4026   bind(stub->continuation());
4027   shrq(dst, markWord::klass_shift);
4028 }
4029 #endif
< prev index next >