9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "asm/assembler.hpp"
27 #include "asm/assembler.inline.hpp"
28 #include "oops/methodData.hpp"
29 #include "opto/c2_MacroAssembler.hpp"
30 #include "opto/intrinsicnode.hpp"
31 #include "opto/opcodes.hpp"
32 #include "opto/subnode.hpp"
33 #include "runtime/biasedLocking.hpp"
34 #include "runtime/objectMonitor.hpp"
35 #include "runtime/stubRoutines.hpp"
36
37 inline Assembler::AvxVectorLen C2_MacroAssembler::vector_length_encoding(int vlen_in_bytes) {
38 switch (vlen_in_bytes) {
39 case 4: // fall-through
40 case 8: // fall-through
41 case 16: return Assembler::AVX_128bit;
42 case 32: return Assembler::AVX_256bit;
43 case 64: return Assembler::AVX_512bit;
44
45 default: {
46 ShouldNotReachHere();
47 return Assembler::AVX_NoVec;
48 }
49 }
50 }
51
429 // But beware of excessive branch density on AMD Opterons.
430 //
431 // * Both fast_lock and fast_unlock set the ICC.ZF to indicate success
432 // or failure of the fast path. If the fast path fails then we pass
433 // control to the slow path, typically in C. In fast_lock and
434 // fast_unlock we often branch to DONE_LABEL, just to find that C2
435 // will emit a conditional branch immediately after the node.
436 // So we have branches to branches and lots of ICC.ZF games.
437 // Instead, it might be better to have C2 pass a "FailureLabel"
438 // into fast_lock and fast_unlock. In the case of success, control
439 // will drop through the node. ICC.ZF is undefined at exit.
440 // In the case of failure, the node will branch directly to the
441 // FailureLabel
442
443
444 // obj: object to lock
445 // box: on-stack box address (displaced header location) - KILLED
446 // rax,: tmp -- KILLED
447 // scr: tmp -- KILLED
448 void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg,
449 Register scrReg, Register cx1Reg, Register cx2Reg,
450 BiasedLockingCounters* counters,
451 RTMLockingCounters* rtm_counters,
452 RTMLockingCounters* stack_rtm_counters,
453 Metadata* method_data,
454 bool use_rtm, bool profile_rtm) {
455 // Ensure the register assignments are disjoint
456 assert(tmpReg == rax, "");
457
458 if (use_rtm) {
459 assert_different_registers(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg);
460 } else {
461 assert(cx2Reg == noreg, "");
462 assert_different_registers(objReg, boxReg, tmpReg, scrReg);
463 }
464
465 if (counters != NULL) {
466 atomic_incl(ExternalAddress((address)counters->total_entry_count_addr()), scrReg);
467 }
468
469 // Possible cases that we'll encounter in fast_lock
496 // it's stack-locked, biased or neutral
497 // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
498 // order to reduce the number of conditional branches in the most common cases.
499 // Beware -- there's a subtle invariant that fetch of the markword
500 // at [FETCH], below, will never observe a biased encoding (*101b).
501 // If this invariant is not held we risk exclusion (safety) failure.
502 if (UseBiasedLocking && !UseOptoBiasInlining) {
503 biased_locking_enter(boxReg, objReg, tmpReg, scrReg, cx1Reg, false, DONE_LABEL, NULL, counters);
504 }
505
506 #if INCLUDE_RTM_OPT
507 if (UseRTMForStackLocks && use_rtm) {
508 rtm_stack_locking(objReg, tmpReg, scrReg, cx2Reg,
509 stack_rtm_counters, method_data, profile_rtm,
510 DONE_LABEL, IsInflated);
511 }
512 #endif // INCLUDE_RTM_OPT
513
514 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // [FETCH]
515 testptr(tmpReg, markWord::monitor_value); // inflated vs stack-locked|neutral|biased
516 jccb(Assembler::notZero, IsInflated);
517
518 // Attempt stack-locking ...
519 orptr (tmpReg, markWord::unlocked_value);
520 movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS
521 lock();
522 cmpxchgptr(boxReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Updates tmpReg
523 if (counters != NULL) {
524 cond_inc32(Assembler::equal,
525 ExternalAddress((address)counters->fast_path_entry_count_addr()));
526 }
527 jcc(Assembler::equal, DONE_LABEL); // Success
528
529 // Recursive locking.
530 // The object is stack-locked: markword contains stack pointer to BasicLock.
531 // Locked by current thread if difference with current SP is less than one page.
532 subptr(tmpReg, rsp);
533 // Next instruction set ZFlag == 1 (Success) if difference is less then one page.
534 andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) );
535 movptr(Address(boxReg, 0), tmpReg);
536 if (counters != NULL) {
537 cond_inc32(Assembler::equal,
538 ExternalAddress((address)counters->fast_path_entry_count_addr()));
539 }
540 jmp(DONE_LABEL);
541
542 bind(IsInflated);
543 // The object is inflated. tmpReg contains pointer to ObjectMonitor* + markWord::monitor_value
544
545 #if INCLUDE_RTM_OPT
546 // Use the same RTM locking code in 32- and 64-bit VM.
547 if (use_rtm) {
548 rtm_inflated_locking(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg,
549 rtm_counters, method_data, profile_rtm, DONE_LABEL);
550 } else {
551 #endif // INCLUDE_RTM_OPT
552
553 #ifndef _LP64
554 // The object is inflated.
555
556 // boxReg refers to the on-stack BasicLock in the current frame.
557 // We'd like to write:
558 // set box->_displaced_header = markWord::unused_mark(). Any non-0 value suffices.
667 // Critically, the biased locking test must have precedence over
668 // and appear before the (box->dhw == 0) recursive stack-lock test.
669 if (UseBiasedLocking && !UseOptoBiasInlining) {
670 biased_locking_exit(objReg, tmpReg, DONE_LABEL);
671 }
672
673 #if INCLUDE_RTM_OPT
674 if (UseRTMForStackLocks && use_rtm) {
675 assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
676 Label L_regular_unlock;
677 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // fetch markword
678 andptr(tmpReg, markWord::biased_lock_mask_in_place); // look at 3 lock bits
679 cmpptr(tmpReg, markWord::unlocked_value); // bits = 001 unlocked
680 jccb(Assembler::notEqual, L_regular_unlock); // if !HLE RegularLock
681 xend(); // otherwise end...
682 jmp(DONE_LABEL); // ... and we're done
683 bind(L_regular_unlock);
684 }
685 #endif
686
687 cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD); // Examine the displaced header
688 jcc (Assembler::zero, DONE_LABEL); // 0 indicates recursive stack-lock
689 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Examine the object's markword
690 testptr(tmpReg, markWord::monitor_value); // Inflated?
691 jccb (Assembler::zero, Stacked);
692
693 // It's inflated.
694 #if INCLUDE_RTM_OPT
695 if (use_rtm) {
696 Label L_regular_inflated_unlock;
697 int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
698 movptr(boxReg, Address(tmpReg, owner_offset));
699 testptr(boxReg, boxReg);
700 jccb(Assembler::notZero, L_regular_inflated_unlock);
701 xend();
702 jmpb(DONE_LABEL);
703 bind(L_regular_inflated_unlock);
704 }
705 #endif
706
707 // Despite our balanced locking property we still check that m->_owner == Self
708 // as java routines or native JNI code called by this thread might
709 // have released the lock.
710 // Refer to the comments in synchronizer.cpp for how we might encode extra
711 // state in _succ so we can avoid fetching EntryList|cxq.
712 //
713 // If there's no contention try a 1-0 exit. That is, exit without
714 // a costly MEMBAR or CAS. See synchronizer.cpp for details on how
715 // we detect and recover from the race that the 1-0 exit admits.
716 //
717 // Conceptually fast_unlock() must execute a STST|LDST "release" barrier
718 // before it STs null into _owner, releasing the lock. Updates
719 // to data protected by the critical section must be visible before
720 // we drop the lock (and thus before any other thread could acquire
721 // the lock and observe the fields protected by the lock).
722 // IA32's memory-model is SPO, so STs are ordered with respect to
723 // each other and there's no need for an explicit barrier (fence).
724 // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
725 #ifndef _LP64
726 get_thread (boxReg);
727
728 // Note that we could employ various encoding schemes to reduce
729 // the number of loads below (currently 4) to just 2 or 3.
730 // Refer to the comments in synchronizer.cpp.
731 // In practice the chain of fetches doesn't seem to impact performance, however.
732 xorptr(boxReg, boxReg);
733 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
734 jccb (Assembler::notZero, DONE_LABEL);
735 movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
736 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
737 jccb (Assembler::notZero, CheckSucc);
738 movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
739 jmpb (DONE_LABEL);
740
741 bind (Stacked);
742 // It's not inflated and it's not recursively stack-locked and it's not biased.
743 // It must be stack-locked.
744 // Try to reset the header to displaced header.
745 // The "box" value on the stack is stable, so we can reload
746 // and be assured we observe the same value as above.
747 movptr(tmpReg, Address(boxReg, 0));
748 lock();
749 cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
750 // Intention fall-thru into DONE_LABEL
751
752 // DONE_LABEL is a hot target - we'd really like to place it at the
753 // start of cache line by padding with NOPs.
754 // See the AMD and Intel software optimization manuals for the
755 // most efficient "long" NOP encodings.
756 // Unfortunately none of our alignment mechanisms suffice.
757 bind (CheckSucc);
758 #else // _LP64
759 // It's inflated
760 Label LNotRecursive, LSuccess, LGoSlowPath;
761
762 cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), 0);
763 jccb(Assembler::equal, LNotRecursive);
764
765 // Recursive inflated unlock
766 decq(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
767 jmpb(LSuccess);
768
769 bind(LNotRecursive);
813 // length while by virtue of passing control into the slow path.
814
815 // box is really RAX -- the following CMPXCHG depends on that binding
816 // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
817 lock();
818 cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
819 // There's no successor so we tried to regrab the lock.
820 // If that didn't work, then another thread grabbed the
821 // lock so we're done (and exit was a success).
822 jccb (Assembler::notEqual, LSuccess);
823 // Intentional fall-through into slow path
824
825 bind (LGoSlowPath);
826 orl (boxReg, 1); // set ICC.ZF=0 to indicate failure
827 jmpb (DONE_LABEL);
828
829 bind (LSuccess);
830 testl (boxReg, 0); // set ICC.ZF=1 to indicate success
831 jmpb (DONE_LABEL);
832
833 bind (Stacked);
834 movptr(tmpReg, Address (boxReg, 0)); // re-fetch
835 lock();
836 cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
837
838 #endif
839 bind(DONE_LABEL);
840 }
841
842 //-------------------------------------------------------------------------------------------
843 // Generic instructions support for use in .ad files C2 code generation
844
845 void C2_MacroAssembler::vabsnegd(int opcode, XMMRegister dst, XMMRegister src, Register scr) {
846 if (dst != src) {
847 movdqu(dst, src);
848 }
849 if (opcode == Op_AbsVD) {
850 andpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_mask()), scr);
851 } else {
852 assert((opcode == Op_NegVD),"opcode should be Op_NegD");
853 xorpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), scr);
854 }
855 }
856
857 void C2_MacroAssembler::vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr) {
858 if (opcode == Op_AbsVD) {
|
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "asm/assembler.hpp"
27 #include "asm/assembler.inline.hpp"
28 #include "oops/methodData.hpp"
29 #include "opto/c2_CodeStubs.hpp"
30 #include "opto/c2_MacroAssembler.hpp"
31 #include "opto/intrinsicnode.hpp"
32 #include "opto/opcodes.hpp"
33 #include "opto/output.hpp"
34 #include "opto/subnode.hpp"
35 #include "runtime/biasedLocking.hpp"
36 #include "runtime/objectMonitor.hpp"
37 #include "runtime/stubRoutines.hpp"
38
39 inline Assembler::AvxVectorLen C2_MacroAssembler::vector_length_encoding(int vlen_in_bytes) {
40 switch (vlen_in_bytes) {
41 case 4: // fall-through
42 case 8: // fall-through
43 case 16: return Assembler::AVX_128bit;
44 case 32: return Assembler::AVX_256bit;
45 case 64: return Assembler::AVX_512bit;
46
47 default: {
48 ShouldNotReachHere();
49 return Assembler::AVX_NoVec;
50 }
51 }
52 }
53
431 // But beware of excessive branch density on AMD Opterons.
432 //
433 // * Both fast_lock and fast_unlock set the ICC.ZF to indicate success
434 // or failure of the fast path. If the fast path fails then we pass
435 // control to the slow path, typically in C. In fast_lock and
436 // fast_unlock we often branch to DONE_LABEL, just to find that C2
437 // will emit a conditional branch immediately after the node.
438 // So we have branches to branches and lots of ICC.ZF games.
439 // Instead, it might be better to have C2 pass a "FailureLabel"
440 // into fast_lock and fast_unlock. In the case of success, control
441 // will drop through the node. ICC.ZF is undefined at exit.
442 // In the case of failure, the node will branch directly to the
443 // FailureLabel
444
445
446 // obj: object to lock
447 // box: on-stack box address (displaced header location) - KILLED
448 // rax,: tmp -- KILLED
449 // scr: tmp -- KILLED
450 void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg,
451 Register scrReg, Register cx1Reg, Register cx2Reg, Register thread,
452 BiasedLockingCounters* counters,
453 RTMLockingCounters* rtm_counters,
454 RTMLockingCounters* stack_rtm_counters,
455 Metadata* method_data,
456 bool use_rtm, bool profile_rtm) {
457 // Ensure the register assignments are disjoint
458 assert(tmpReg == rax, "");
459
460 if (use_rtm) {
461 assert_different_registers(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg);
462 } else {
463 assert(cx2Reg == noreg, "");
464 assert_different_registers(objReg, boxReg, tmpReg, scrReg);
465 }
466
467 if (counters != NULL) {
468 atomic_incl(ExternalAddress((address)counters->total_entry_count_addr()), scrReg);
469 }
470
471 // Possible cases that we'll encounter in fast_lock
498 // it's stack-locked, biased or neutral
499 // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
500 // order to reduce the number of conditional branches in the most common cases.
501 // Beware -- there's a subtle invariant that fetch of the markword
502 // at [FETCH], below, will never observe a biased encoding (*101b).
503 // If this invariant is not held we risk exclusion (safety) failure.
504 if (UseBiasedLocking && !UseOptoBiasInlining) {
505 biased_locking_enter(boxReg, objReg, tmpReg, scrReg, cx1Reg, false, DONE_LABEL, NULL, counters);
506 }
507
508 #if INCLUDE_RTM_OPT
509 if (UseRTMForStackLocks && use_rtm) {
510 rtm_stack_locking(objReg, tmpReg, scrReg, cx2Reg,
511 stack_rtm_counters, method_data, profile_rtm,
512 DONE_LABEL, IsInflated);
513 }
514 #endif // INCLUDE_RTM_OPT
515
516 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // [FETCH]
517 testptr(tmpReg, markWord::monitor_value); // inflated vs stack-locked|neutral|biased
518 jcc(Assembler::notZero, IsInflated);
519
520 if (LockingMode == LM_MONITOR) {
521 // Clear ZF so that we take the slow path at the DONE label. objReg is known to be not 0.
522 testptr(objReg, objReg);
523 } else if (LockingMode == LM_LEGACY) {
524 // Attempt stack-locking ...
525 orptr (tmpReg, markWord::unlocked_value);
526 movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS
527 lock();
528 cmpxchgptr(boxReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Updates tmpReg
529 if (counters != NULL) {
530 cond_inc32(Assembler::equal,
531 ExternalAddress((address)counters->fast_path_entry_count_addr()));
532 }
533 jcc(Assembler::equal, DONE_LABEL); // Success
534
535 // Recursive locking.
536 // The object is stack-locked: markword contains stack pointer to BasicLock.
537 // Locked by current thread if difference with current SP is less than one page.
538 subptr(tmpReg, rsp);
539 // Next instruction set ZFlag == 1 (Success) if difference is less then one page.
540 andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) );
541 movptr(Address(boxReg, 0), tmpReg);
542 if (counters != NULL) {
543 cond_inc32(Assembler::equal,
544 ExternalAddress((address)counters->fast_path_entry_count_addr()));
545 }
546 } else {
547 assert(LockingMode == LM_LIGHTWEIGHT, "");
548 fast_lock_impl(objReg, tmpReg, thread, scrReg, DONE_LABEL);
549 xorl(tmpReg, tmpReg); // Set ZF=1 to indicate success
550 }
551 jmp(DONE_LABEL);
552
553 bind(IsInflated);
554 // The object is inflated. tmpReg contains pointer to ObjectMonitor* + markWord::monitor_value
555
556 #if INCLUDE_RTM_OPT
557 // Use the same RTM locking code in 32- and 64-bit VM.
558 if (use_rtm) {
559 rtm_inflated_locking(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg,
560 rtm_counters, method_data, profile_rtm, DONE_LABEL);
561 } else {
562 #endif // INCLUDE_RTM_OPT
563
564 #ifndef _LP64
565 // The object is inflated.
566
567 // boxReg refers to the on-stack BasicLock in the current frame.
568 // We'd like to write:
569 // set box->_displaced_header = markWord::unused_mark(). Any non-0 value suffices.
678 // Critically, the biased locking test must have precedence over
679 // and appear before the (box->dhw == 0) recursive stack-lock test.
680 if (UseBiasedLocking && !UseOptoBiasInlining) {
681 biased_locking_exit(objReg, tmpReg, DONE_LABEL);
682 }
683
684 #if INCLUDE_RTM_OPT
685 if (UseRTMForStackLocks && use_rtm) {
686 assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
687 Label L_regular_unlock;
688 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // fetch markword
689 andptr(tmpReg, markWord::biased_lock_mask_in_place); // look at 3 lock bits
690 cmpptr(tmpReg, markWord::unlocked_value); // bits = 001 unlocked
691 jccb(Assembler::notEqual, L_regular_unlock); // if !HLE RegularLock
692 xend(); // otherwise end...
693 jmp(DONE_LABEL); // ... and we're done
694 bind(L_regular_unlock);
695 }
696 #endif
697
698 if (LockingMode == LM_LEGACY) {
699 cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD); // Examine the displaced header
700 jcc (Assembler::zero, DONE_LABEL); // 0 indicates recursive stack-lock
701 }
702 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Examine the object's markword
703 if (LockingMode != LM_MONITOR) {
704 testptr(tmpReg, markWord::monitor_value); // Inflated?
705 jcc(Assembler::zero, Stacked);
706 }
707
708 // It's inflated.
709 if (LockingMode == LM_LIGHTWEIGHT) {
710 // If the owner is ANONYMOUS, we need to fix it.
711 testb(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t) ObjectMonitor::ANONYMOUS_OWNER);
712 #ifdef _LP64
713 if (!Compile::current()->output()->in_scratch_emit_size()) {
714 C2HandleAnonOMOwnerStub* stub = new (Compile::current()->comp_arena()) C2HandleAnonOMOwnerStub(tmpReg, boxReg);
715 Compile::current()->output()->add_stub(stub);
716 jcc(Assembler::notEqual, stub->entry());
717 bind(stub->continuation());
718 } else
719 #endif
720 {
721 // We can't easily implement this optimization on 32 bit because we don't have a thread register.
722 // Call the slow-path instead.
723 jcc(Assembler::notEqual, DONE_LABEL);
724 }
725 }
726
727 #if INCLUDE_RTM_OPT
728 if (use_rtm) {
729 Label L_regular_inflated_unlock;
730 int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
731 movptr(boxReg, Address(tmpReg, owner_offset));
732 testptr(boxReg, boxReg);
733 jccb(Assembler::notZero, L_regular_inflated_unlock);
734 xend();
735 jmp(DONE_LABEL);
736 bind(L_regular_inflated_unlock);
737 }
738 #endif
739
740 // Despite our balanced locking property we still check that m->_owner == Self
741 // as java routines or native JNI code called by this thread might
742 // have released the lock.
743 // Refer to the comments in synchronizer.cpp for how we might encode extra
744 // state in _succ so we can avoid fetching EntryList|cxq.
745 //
746 // If there's no contention try a 1-0 exit. That is, exit without
747 // a costly MEMBAR or CAS. See synchronizer.cpp for details on how
748 // we detect and recover from the race that the 1-0 exit admits.
749 //
750 // Conceptually fast_unlock() must execute a STST|LDST "release" barrier
751 // before it STs null into _owner, releasing the lock. Updates
752 // to data protected by the critical section must be visible before
753 // we drop the lock (and thus before any other thread could acquire
754 // the lock and observe the fields protected by the lock).
755 // IA32's memory-model is SPO, so STs are ordered with respect to
756 // each other and there's no need for an explicit barrier (fence).
757 // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
758 #ifndef _LP64
759 get_thread (boxReg);
760
761 // Note that we could employ various encoding schemes to reduce
762 // the number of loads below (currently 4) to just 2 or 3.
763 // Refer to the comments in synchronizer.cpp.
764 // In practice the chain of fetches doesn't seem to impact performance, however.
765 xorptr(boxReg, boxReg);
766 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
767 jccb (Assembler::notZero, DONE_LABEL);
768 movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
769 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
770 jccb (Assembler::notZero, DONE_LABEL);
771 movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
772 jmpb (DONE_LABEL);
773
774 // Intention fall-thru into DONE_LABEL
775
776 // DONE_LABEL is a hot target - we'd really like to place it at the
777 // start of cache line by padding with NOPs.
778 // See the AMD and Intel software optimization manuals for the
779 // most efficient "long" NOP encodings.
780 // Unfortunately none of our alignment mechanisms suffice.
781 bind (CheckSucc);
782 #else // _LP64
783 // It's inflated
784 Label LNotRecursive, LSuccess, LGoSlowPath;
785
786 cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), 0);
787 jccb(Assembler::equal, LNotRecursive);
788
789 // Recursive inflated unlock
790 decq(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
791 jmpb(LSuccess);
792
793 bind(LNotRecursive);
837 // length while by virtue of passing control into the slow path.
838
839 // box is really RAX -- the following CMPXCHG depends on that binding
840 // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
841 lock();
842 cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
843 // There's no successor so we tried to regrab the lock.
844 // If that didn't work, then another thread grabbed the
845 // lock so we're done (and exit was a success).
846 jccb (Assembler::notEqual, LSuccess);
847 // Intentional fall-through into slow path
848
849 bind (LGoSlowPath);
850 orl (boxReg, 1); // set ICC.ZF=0 to indicate failure
851 jmpb (DONE_LABEL);
852
853 bind (LSuccess);
854 testl (boxReg, 0); // set ICC.ZF=1 to indicate success
855 jmpb (DONE_LABEL);
856
857 #endif
858 if (LockingMode != LM_MONITOR) {
859 bind (Stacked);
860 if (LockingMode == LM_LIGHTWEIGHT) {
861 mov(boxReg, tmpReg);
862 fast_unlock_impl(objReg, boxReg, tmpReg, DONE_LABEL);
863 xorl(tmpReg, tmpReg);
864 } else if (LockingMode == LM_LEGACY) {
865 movptr(tmpReg, Address (boxReg, 0)); // re-fetch
866 lock();
867 cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
868 }
869 }
870 bind(DONE_LABEL);
871 }
872
873 //-------------------------------------------------------------------------------------------
874 // Generic instructions support for use in .ad files C2 code generation
875
876 void C2_MacroAssembler::vabsnegd(int opcode, XMMRegister dst, XMMRegister src, Register scr) {
877 if (dst != src) {
878 movdqu(dst, src);
879 }
880 if (opcode == Op_AbsVD) {
881 andpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_mask()), scr);
882 } else {
883 assert((opcode == Op_NegVD),"opcode should be Op_NegD");
884 xorpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), scr);
885 }
886 }
887
888 void C2_MacroAssembler::vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr) {
889 if (opcode == Op_AbsVD) {
|