9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "asm/assembler.hpp"
27 #include "asm/assembler.inline.hpp"
28 #include "oops/methodData.hpp"
29 #include "opto/c2_MacroAssembler.hpp"
30 #include "opto/intrinsicnode.hpp"
31 #include "opto/opcodes.hpp"
32 #include "opto/subnode.hpp"
33 #include "runtime/biasedLocking.hpp"
34 #include "runtime/objectMonitor.hpp"
35 #include "runtime/stubRoutines.hpp"
36
37 inline Assembler::AvxVectorLen C2_MacroAssembler::vector_length_encoding(int vlen_in_bytes) {
38 switch (vlen_in_bytes) {
39 case 4: // fall-through
40 case 8: // fall-through
41 case 16: return Assembler::AVX_128bit;
42 case 32: return Assembler::AVX_256bit;
43 case 64: return Assembler::AVX_512bit;
44
45 default: {
46 ShouldNotReachHere();
47 return Assembler::AVX_NoVec;
48 }
49 }
50 }
51
429 // But beware of excessive branch density on AMD Opterons.
430 //
431 // * Both fast_lock and fast_unlock set the ICC.ZF to indicate success
432 // or failure of the fast path. If the fast path fails then we pass
433 // control to the slow path, typically in C. In fast_lock and
434 // fast_unlock we often branch to DONE_LABEL, just to find that C2
435 // will emit a conditional branch immediately after the node.
436 // So we have branches to branches and lots of ICC.ZF games.
437 // Instead, it might be better to have C2 pass a "FailureLabel"
438 // into fast_lock and fast_unlock. In the case of success, control
439 // will drop through the node. ICC.ZF is undefined at exit.
440 // In the case of failure, the node will branch directly to the
441 // FailureLabel
442
443
444 // obj: object to lock
445 // box: on-stack box address (displaced header location) - KILLED
446 // rax,: tmp -- KILLED
447 // scr: tmp -- KILLED
448 void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg,
449 Register scrReg, Register cx1Reg, Register cx2Reg,
450 BiasedLockingCounters* counters,
451 RTMLockingCounters* rtm_counters,
452 RTMLockingCounters* stack_rtm_counters,
453 Metadata* method_data,
454 bool use_rtm, bool profile_rtm) {
455 // Ensure the register assignments are disjoint
456 assert(tmpReg == rax, "");
457
458 if (use_rtm) {
459 assert_different_registers(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg);
460 } else {
461 assert(cx2Reg == noreg, "");
462 assert_different_registers(objReg, boxReg, tmpReg, scrReg);
463 }
464
465 if (counters != NULL) {
466 atomic_incl(ExternalAddress((address)counters->total_entry_count_addr()), scrReg);
467 }
468
469 // Possible cases that we'll encounter in fast_lock
498 // order to reduce the number of conditional branches in the most common cases.
499 // Beware -- there's a subtle invariant that fetch of the markword
500 // at [FETCH], below, will never observe a biased encoding (*101b).
501 // If this invariant is not held we risk exclusion (safety) failure.
502 if (UseBiasedLocking && !UseOptoBiasInlining) {
503 biased_locking_enter(boxReg, objReg, tmpReg, scrReg, cx1Reg, false, DONE_LABEL, NULL, counters);
504 }
505
506 #if INCLUDE_RTM_OPT
507 if (UseRTMForStackLocks && use_rtm) {
508 rtm_stack_locking(objReg, tmpReg, scrReg, cx2Reg,
509 stack_rtm_counters, method_data, profile_rtm,
510 DONE_LABEL, IsInflated);
511 }
512 #endif // INCLUDE_RTM_OPT
513
514 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // [FETCH]
515 testptr(tmpReg, markWord::monitor_value); // inflated vs stack-locked|neutral|biased
516 jccb(Assembler::notZero, IsInflated);
517
518 // Attempt stack-locking ...
519 orptr (tmpReg, markWord::unlocked_value);
520 movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS
521 lock();
522 cmpxchgptr(boxReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Updates tmpReg
523 if (counters != NULL) {
524 cond_inc32(Assembler::equal,
525 ExternalAddress((address)counters->fast_path_entry_count_addr()));
526 }
527 jcc(Assembler::equal, DONE_LABEL); // Success
528
529 // Recursive locking.
530 // The object is stack-locked: markword contains stack pointer to BasicLock.
531 // Locked by current thread if difference with current SP is less than one page.
532 subptr(tmpReg, rsp);
533 // Next instruction set ZFlag == 1 (Success) if difference is less then one page.
534 andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) );
535 movptr(Address(boxReg, 0), tmpReg);
536 if (counters != NULL) {
537 cond_inc32(Assembler::equal,
538 ExternalAddress((address)counters->fast_path_entry_count_addr()));
539 }
540 jmp(DONE_LABEL);
541
542 bind(IsInflated);
543 // The object is inflated. tmpReg contains pointer to ObjectMonitor* + markWord::monitor_value
544
545 #if INCLUDE_RTM_OPT
546 // Use the same RTM locking code in 32- and 64-bit VM.
547 if (use_rtm) {
548 rtm_inflated_locking(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg,
549 rtm_counters, method_data, profile_rtm, DONE_LABEL);
550 } else {
551 #endif // INCLUDE_RTM_OPT
552
553 #ifndef _LP64
554 // The object is inflated.
555
556 // boxReg refers to the on-stack BasicLock in the current frame.
557 // We'd like to write:
558 // set box->_displaced_header = markWord::unused_mark(). Any non-0 value suffices.
559 // This is convenient but results a ST-before-CAS penalty. The following CAS suffers
560 // additional latency as we have another ST in the store buffer that must drain.
561
562 // avoid ST-before-CAS
563 // register juggle because we need tmpReg for cmpxchgptr below
564 movptr(scrReg, boxReg);
565 movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2]
566
567 // Optimistic form: consider XORL tmpReg,tmpReg
568 movptr(tmpReg, NULL_WORD);
569
570 // Appears unlocked - try to swing _owner from null to non-null.
571 // Ideally, I'd manifest "Self" with get_thread and then attempt
572 // to CAS the register containing Self into m->Owner.
573 // But we don't have enough registers, so instead we can either try to CAS
574 // rsp or the address of the box (in scr) into &m->owner. If the CAS succeeds
575 // we later store "Self" into m->Owner. Transiently storing a stack address
576 // (rsp or the address of the box) into m->owner is harmless.
577 // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand.
578 lock();
579 cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
580 movptr(Address(scrReg, 0), 3); // box->_displaced_header = 3
581 // If we weren't able to swing _owner from NULL to the BasicLock
582 // then take the slow path.
583 jccb (Assembler::notZero, DONE_LABEL);
584 // update _owner from BasicLock to thread
585 get_thread (scrReg); // beware: clobbers ICCs
586 movptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), scrReg);
587 xorptr(boxReg, boxReg); // set icc.ZFlag = 1 to indicate success
588
589 // If the CAS fails we can either retry or pass control to the slow path.
590 // We use the latter tactic.
591 // Pass the CAS result in the icc.ZFlag into DONE_LABEL
592 // If the CAS was successful ...
593 // Self has acquired the lock
594 // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
595 // Intentional fall-through into DONE_LABEL ...
596 #else // _LP64
597 // It's inflated and we use scrReg for ObjectMonitor* in this section.
598 movq(scrReg, tmpReg);
599 xorq(tmpReg, tmpReg);
600 lock();
601 cmpxchgptr(r15_thread, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
602 // Unconditionally set box->_displaced_header = markWord::unused_mark().
603 // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
604 movptr(Address(boxReg, 0), (int32_t)intptr_t(markWord::unused_mark().value()));
605 // Propagate ICC.ZF from CAS above into DONE_LABEL.
606 jcc(Assembler::equal, DONE_LABEL); // CAS above succeeded; propagate ZF = 1 (success)
607
608 cmpptr(r15_thread, rax); // Check if we are already the owner (recursive lock)
667 // Critically, the biased locking test must have precedence over
668 // and appear before the (box->dhw == 0) recursive stack-lock test.
669 if (UseBiasedLocking && !UseOptoBiasInlining) {
670 biased_locking_exit(objReg, tmpReg, DONE_LABEL);
671 }
672
673 #if INCLUDE_RTM_OPT
674 if (UseRTMForStackLocks && use_rtm) {
675 assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
676 Label L_regular_unlock;
677 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // fetch markword
678 andptr(tmpReg, markWord::biased_lock_mask_in_place); // look at 3 lock bits
679 cmpptr(tmpReg, markWord::unlocked_value); // bits = 001 unlocked
680 jccb(Assembler::notEqual, L_regular_unlock); // if !HLE RegularLock
681 xend(); // otherwise end...
682 jmp(DONE_LABEL); // ... and we're done
683 bind(L_regular_unlock);
684 }
685 #endif
686
687 cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD); // Examine the displaced header
688 jcc (Assembler::zero, DONE_LABEL); // 0 indicates recursive stack-lock
689 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Examine the object's markword
690 testptr(tmpReg, markWord::monitor_value); // Inflated?
691 jccb (Assembler::zero, Stacked);
692
693 // It's inflated.
694 #if INCLUDE_RTM_OPT
695 if (use_rtm) {
696 Label L_regular_inflated_unlock;
697 int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
698 movptr(boxReg, Address(tmpReg, owner_offset));
699 testptr(boxReg, boxReg);
700 jccb(Assembler::notZero, L_regular_inflated_unlock);
701 xend();
702 jmpb(DONE_LABEL);
703 bind(L_regular_inflated_unlock);
704 }
705 #endif
706
707 // Despite our balanced locking property we still check that m->_owner == Self
708 // as java routines or native JNI code called by this thread might
709 // have released the lock.
710 // Refer to the comments in synchronizer.cpp for how we might encode extra
711 // state in _succ so we can avoid fetching EntryList|cxq.
722 // IA32's memory-model is SPO, so STs are ordered with respect to
723 // each other and there's no need for an explicit barrier (fence).
724 // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
725 #ifndef _LP64
726 get_thread (boxReg);
727
728 // Note that we could employ various encoding schemes to reduce
729 // the number of loads below (currently 4) to just 2 or 3.
730 // Refer to the comments in synchronizer.cpp.
731 // In practice the chain of fetches doesn't seem to impact performance, however.
732 xorptr(boxReg, boxReg);
733 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
734 jccb (Assembler::notZero, DONE_LABEL);
735 movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
736 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
737 jccb (Assembler::notZero, CheckSucc);
738 movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
739 jmpb (DONE_LABEL);
740
741 bind (Stacked);
742 // It's not inflated and it's not recursively stack-locked and it's not biased.
743 // It must be stack-locked.
744 // Try to reset the header to displaced header.
745 // The "box" value on the stack is stable, so we can reload
746 // and be assured we observe the same value as above.
747 movptr(tmpReg, Address(boxReg, 0));
748 lock();
749 cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
750 // Intention fall-thru into DONE_LABEL
751
752 // DONE_LABEL is a hot target - we'd really like to place it at the
753 // start of cache line by padding with NOPs.
754 // See the AMD and Intel software optimization manuals for the
755 // most efficient "long" NOP encodings.
756 // Unfortunately none of our alignment mechanisms suffice.
757 bind (CheckSucc);
758 #else // _LP64
759 // It's inflated
760 Label LNotRecursive, LSuccess, LGoSlowPath;
761
762 cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), 0);
763 jccb(Assembler::equal, LNotRecursive);
764
765 // Recursive inflated unlock
766 decq(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
767 jmpb(LSuccess);
768
769 bind(LNotRecursive);
814
815 // box is really RAX -- the following CMPXCHG depends on that binding
816 // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
817 lock();
818 cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
819 // There's no successor so we tried to regrab the lock.
820 // If that didn't work, then another thread grabbed the
821 // lock so we're done (and exit was a success).
822 jccb (Assembler::notEqual, LSuccess);
823 // Intentional fall-through into slow path
824
825 bind (LGoSlowPath);
826 orl (boxReg, 1); // set ICC.ZF=0 to indicate failure
827 jmpb (DONE_LABEL);
828
829 bind (LSuccess);
830 testl (boxReg, 0); // set ICC.ZF=1 to indicate success
831 jmpb (DONE_LABEL);
832
833 bind (Stacked);
834 movptr(tmpReg, Address (boxReg, 0)); // re-fetch
835 lock();
836 cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
837
838 #endif
839 bind(DONE_LABEL);
840 }
841
842 //-------------------------------------------------------------------------------------------
843 // Generic instructions support for use in .ad files C2 code generation
844
845 void C2_MacroAssembler::vabsnegd(int opcode, XMMRegister dst, XMMRegister src, Register scr) {
846 if (dst != src) {
847 movdqu(dst, src);
848 }
849 if (opcode == Op_AbsVD) {
850 andpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_mask()), scr);
851 } else {
852 assert((opcode == Op_NegVD),"opcode should be Op_NegD");
853 xorpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), scr);
854 }
855 }
856
|
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "asm/assembler.hpp"
27 #include "asm/assembler.inline.hpp"
28 #include "oops/methodData.hpp"
29 #include "opto/c2_CodeStubs.hpp"
30 #include "opto/c2_MacroAssembler.hpp"
31 #include "opto/intrinsicnode.hpp"
32 #include "opto/opcodes.hpp"
33 #include "opto/output.hpp"
34 #include "opto/subnode.hpp"
35 #include "runtime/biasedLocking.hpp"
36 #include "runtime/objectMonitor.hpp"
37 #include "runtime/stubRoutines.hpp"
38
39 inline Assembler::AvxVectorLen C2_MacroAssembler::vector_length_encoding(int vlen_in_bytes) {
40 switch (vlen_in_bytes) {
41 case 4: // fall-through
42 case 8: // fall-through
43 case 16: return Assembler::AVX_128bit;
44 case 32: return Assembler::AVX_256bit;
45 case 64: return Assembler::AVX_512bit;
46
47 default: {
48 ShouldNotReachHere();
49 return Assembler::AVX_NoVec;
50 }
51 }
52 }
53
431 // But beware of excessive branch density on AMD Opterons.
432 //
433 // * Both fast_lock and fast_unlock set the ICC.ZF to indicate success
434 // or failure of the fast path. If the fast path fails then we pass
435 // control to the slow path, typically in C. In fast_lock and
436 // fast_unlock we often branch to DONE_LABEL, just to find that C2
437 // will emit a conditional branch immediately after the node.
438 // So we have branches to branches and lots of ICC.ZF games.
439 // Instead, it might be better to have C2 pass a "FailureLabel"
440 // into fast_lock and fast_unlock. In the case of success, control
441 // will drop through the node. ICC.ZF is undefined at exit.
442 // In the case of failure, the node will branch directly to the
443 // FailureLabel
444
445
446 // obj: object to lock
447 // box: on-stack box address (displaced header location) - KILLED
448 // rax,: tmp -- KILLED
449 // scr: tmp -- KILLED
450 void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg,
451 Register scrReg, Register cx1Reg, Register cx2Reg, Register thread,
452 BiasedLockingCounters* counters,
453 RTMLockingCounters* rtm_counters,
454 RTMLockingCounters* stack_rtm_counters,
455 Metadata* method_data,
456 bool use_rtm, bool profile_rtm) {
457 // Ensure the register assignments are disjoint
458 assert(tmpReg == rax, "");
459
460 if (use_rtm) {
461 assert_different_registers(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg);
462 } else {
463 assert(cx2Reg == noreg, "");
464 assert_different_registers(objReg, boxReg, tmpReg, scrReg);
465 }
466
467 if (counters != NULL) {
468 atomic_incl(ExternalAddress((address)counters->total_entry_count_addr()), scrReg);
469 }
470
471 // Possible cases that we'll encounter in fast_lock
500 // order to reduce the number of conditional branches in the most common cases.
501 // Beware -- there's a subtle invariant that fetch of the markword
502 // at [FETCH], below, will never observe a biased encoding (*101b).
503 // If this invariant is not held we risk exclusion (safety) failure.
504 if (UseBiasedLocking && !UseOptoBiasInlining) {
505 biased_locking_enter(boxReg, objReg, tmpReg, scrReg, cx1Reg, false, DONE_LABEL, NULL, counters);
506 }
507
508 #if INCLUDE_RTM_OPT
509 if (UseRTMForStackLocks && use_rtm) {
510 rtm_stack_locking(objReg, tmpReg, scrReg, cx2Reg,
511 stack_rtm_counters, method_data, profile_rtm,
512 DONE_LABEL, IsInflated);
513 }
514 #endif // INCLUDE_RTM_OPT
515
516 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // [FETCH]
517 testptr(tmpReg, markWord::monitor_value); // inflated vs stack-locked|neutral|biased
518 jccb(Assembler::notZero, IsInflated);
519
520 if (UseFastLocking) {
521 #ifdef _LP64
522 fast_lock_impl(objReg, tmpReg, thread, scrReg, DONE_LABEL, false);
523 xorl(tmpReg, tmpReg); // Set ZF=1 to indicate success
524 #else
525 // We can not emit the lock-stack-check in verified_entry() because we don't have enough
526 // registers (for thread ptr). Therefor we have to emit the lock-stack-check in
527 // fast_lock_impl(). However, that check can take a slow-path with ZF=1, therefore
528 // we need to handle it specially and force ZF=0 before taking the actual slow-path.
529 Label slow;
530 fast_lock_impl(objReg, tmpReg, thread, scrReg, slow);
531 xorl(tmpReg, tmpReg);
532 jmp(DONE_LABEL);
533 bind(slow);
534 testptr(objReg, objReg); // ZF=0 to indicate failure
535 #endif
536 } else {
537 // Attempt stack-locking ...
538 orptr (tmpReg, markWord::unlocked_value);
539 movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS
540 lock();
541 cmpxchgptr(boxReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Updates tmpReg
542 if (counters != NULL) {
543 cond_inc32(Assembler::equal,
544 ExternalAddress((address)counters->fast_path_entry_count_addr()));
545 }
546 jcc(Assembler::equal, DONE_LABEL); // Success
547
548 // Recursive locking.
549 // The object is stack-locked: markword contains stack pointer to BasicLock.
550 // Locked by current thread if difference with current SP is less than one page.
551 subptr(tmpReg, rsp);
552 // Next instruction set ZFlag == 1 (Success) if difference is less then one page.
553 andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) );
554 movptr(Address(boxReg, 0), tmpReg);
555 if (counters != NULL) {
556 cond_inc32(Assembler::equal,
557 ExternalAddress((address)counters->fast_path_entry_count_addr()));
558 }
559 }
560 jmp(DONE_LABEL);
561
562 bind(IsInflated);
563 // The object is inflated. tmpReg contains pointer to ObjectMonitor* + markWord::monitor_value
564
565 #if INCLUDE_RTM_OPT
566 // Use the same RTM locking code in 32- and 64-bit VM.
567 if (use_rtm) {
568 rtm_inflated_locking(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg,
569 rtm_counters, method_data, profile_rtm, DONE_LABEL);
570 } else {
571 #endif // INCLUDE_RTM_OPT
572
573 #ifndef _LP64
574 // The object is inflated.
575
576 // boxReg refers to the on-stack BasicLock in the current frame.
577 // We'd like to write:
578 // set box->_displaced_header = markWord::unused_mark(). Any non-0 value suffices.
579 // This is convenient but results a ST-before-CAS penalty. The following CAS suffers
580 // additional latency as we have another ST in the store buffer that must drain.
581
582 // avoid ST-before-CAS
583 // register juggle because we need tmpReg for cmpxchgptr below
584 movptr(scrReg, boxReg);
585 movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2]
586
587 // Optimistic form: consider XORL tmpReg,tmpReg
588 movptr(tmpReg, NULL_WORD);
589
590 // Appears unlocked - try to swing _owner from null to non-null.
591 // Ideally, I'd manifest "Self" with get_thread and then attempt
592 // to CAS the register containing Self into m->Owner.
593 // But we don't have enough registers, so instead we can either try to CAS
594 // rsp or the address of the box (in scr) into &m->owner. If the CAS succeeds
595 // we later store "Self" into m->Owner. Transiently storing a stack address
596 // (rsp or the address of the box) into m->owner is harmless.
597 // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand.
598 lock();
599 cmpxchgptr(thread, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
600 movptr(Address(scrReg, 0), 3); // box->_displaced_header = 3
601 // If the CAS fails we can either retry or pass control to the slow path.
602 // We use the latter tactic.
603 // Pass the CAS result in the icc.ZFlag into DONE_LABEL
604 // If the CAS was successful ...
605 // Self has acquired the lock
606 // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
607 // Intentional fall-through into DONE_LABEL ...
608 #else // _LP64
609 // It's inflated and we use scrReg for ObjectMonitor* in this section.
610 movq(scrReg, tmpReg);
611 xorq(tmpReg, tmpReg);
612 lock();
613 cmpxchgptr(r15_thread, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
614 // Unconditionally set box->_displaced_header = markWord::unused_mark().
615 // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
616 movptr(Address(boxReg, 0), (int32_t)intptr_t(markWord::unused_mark().value()));
617 // Propagate ICC.ZF from CAS above into DONE_LABEL.
618 jcc(Assembler::equal, DONE_LABEL); // CAS above succeeded; propagate ZF = 1 (success)
619
620 cmpptr(r15_thread, rax); // Check if we are already the owner (recursive lock)
679 // Critically, the biased locking test must have precedence over
680 // and appear before the (box->dhw == 0) recursive stack-lock test.
681 if (UseBiasedLocking && !UseOptoBiasInlining) {
682 biased_locking_exit(objReg, tmpReg, DONE_LABEL);
683 }
684
685 #if INCLUDE_RTM_OPT
686 if (UseRTMForStackLocks && use_rtm) {
687 assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
688 Label L_regular_unlock;
689 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // fetch markword
690 andptr(tmpReg, markWord::biased_lock_mask_in_place); // look at 3 lock bits
691 cmpptr(tmpReg, markWord::unlocked_value); // bits = 001 unlocked
692 jccb(Assembler::notEqual, L_regular_unlock); // if !HLE RegularLock
693 xend(); // otherwise end...
694 jmp(DONE_LABEL); // ... and we're done
695 bind(L_regular_unlock);
696 }
697 #endif
698
699 if (!UseFastLocking) {
700 cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD); // Examine the displaced header
701 jcc (Assembler::zero, DONE_LABEL); // 0 indicates recursive stack-lock
702 }
703 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Examine the object's markword
704 testptr(tmpReg, markWord::monitor_value); // Inflated?
705 jcc(Assembler::zero, Stacked);
706
707 if (UseFastLocking) {
708 // If the owner is ANONYMOUS, we need to fix it.
709 testb(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int) (intptr_t) ANONYMOUS_OWNER);
710 #ifdef _LP64
711 C2HandleAnonOMOwnerStub* stub = new (Compile::current()->comp_arena()) C2HandleAnonOMOwnerStub(tmpReg);
712 Compile::current()->output()->add_stub(stub);
713 jcc(Assembler::notEqual, stub->entry());
714 bind(stub->continuation());
715 #else
716 // We can't easily implement this optimization on 32 bit because we don't have a thread register.
717 // Call the slow-path instead.
718 jcc(Assembler::notEqual, DONE_LABEL);
719 #endif
720 }
721
722 // It's inflated.
723 #if INCLUDE_RTM_OPT
724 if (use_rtm) {
725 Label L_regular_inflated_unlock;
726 int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
727 movptr(boxReg, Address(tmpReg, owner_offset));
728 testptr(boxReg, boxReg);
729 jccb(Assembler::notZero, L_regular_inflated_unlock);
730 xend();
731 jmpb(DONE_LABEL);
732 bind(L_regular_inflated_unlock);
733 }
734 #endif
735
736 // Despite our balanced locking property we still check that m->_owner == Self
737 // as java routines or native JNI code called by this thread might
738 // have released the lock.
739 // Refer to the comments in synchronizer.cpp for how we might encode extra
740 // state in _succ so we can avoid fetching EntryList|cxq.
751 // IA32's memory-model is SPO, so STs are ordered with respect to
752 // each other and there's no need for an explicit barrier (fence).
753 // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
754 #ifndef _LP64
755 get_thread (boxReg);
756
757 // Note that we could employ various encoding schemes to reduce
758 // the number of loads below (currently 4) to just 2 or 3.
759 // Refer to the comments in synchronizer.cpp.
760 // In practice the chain of fetches doesn't seem to impact performance, however.
761 xorptr(boxReg, boxReg);
762 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
763 jccb (Assembler::notZero, DONE_LABEL);
764 movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
765 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
766 jccb (Assembler::notZero, CheckSucc);
767 movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
768 jmpb (DONE_LABEL);
769
770 bind (Stacked);
771 if (UseFastLocking) {
772 mov(boxReg, tmpReg);
773 fast_unlock_impl(objReg, boxReg, tmpReg, DONE_LABEL);
774 xorl(tmpReg, tmpReg);
775 } else {
776 // It's not inflated and it's not recursively stack-locked and it's not biased.
777 // It must be stack-locked.
778 // Try to reset the header to displaced header.
779 // The "box" value on the stack is stable, so we can reload
780 // and be assured we observe the same value as above.
781 movptr(tmpReg, Address(boxReg, 0));
782 lock();
783 cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
784 }
785 // Intention fall-thru into DONE_LABEL
786
787 // DONE_LABEL is a hot target - we'd really like to place it at the
788 // start of cache line by padding with NOPs.
789 // See the AMD and Intel software optimization manuals for the
790 // most efficient "long" NOP encodings.
791 // Unfortunately none of our alignment mechanisms suffice.
792 bind (CheckSucc);
793 #else // _LP64
794 // It's inflated
795 Label LNotRecursive, LSuccess, LGoSlowPath;
796
797 cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), 0);
798 jccb(Assembler::equal, LNotRecursive);
799
800 // Recursive inflated unlock
801 decq(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
802 jmpb(LSuccess);
803
804 bind(LNotRecursive);
849
850 // box is really RAX -- the following CMPXCHG depends on that binding
851 // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
852 lock();
853 cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
854 // There's no successor so we tried to regrab the lock.
855 // If that didn't work, then another thread grabbed the
856 // lock so we're done (and exit was a success).
857 jccb (Assembler::notEqual, LSuccess);
858 // Intentional fall-through into slow path
859
860 bind (LGoSlowPath);
861 orl (boxReg, 1); // set ICC.ZF=0 to indicate failure
862 jmpb (DONE_LABEL);
863
864 bind (LSuccess);
865 testl (boxReg, 0); // set ICC.ZF=1 to indicate success
866 jmpb (DONE_LABEL);
867
868 bind (Stacked);
869
870 if (UseFastLocking) {
871 mov(boxReg, tmpReg);
872 fast_unlock_impl(objReg, boxReg, tmpReg, DONE_LABEL);
873 xorl(tmpReg, tmpReg);
874 } else {
875 movptr(tmpReg, Address (boxReg, 0)); // re-fetch
876 lock();
877 cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
878 }
879
880 #endif
881 bind(DONE_LABEL);
882 }
883
884 //-------------------------------------------------------------------------------------------
885 // Generic instructions support for use in .ad files C2 code generation
886
887 void C2_MacroAssembler::vabsnegd(int opcode, XMMRegister dst, XMMRegister src, Register scr) {
888 if (dst != src) {
889 movdqu(dst, src);
890 }
891 if (opcode == Op_AbsVD) {
892 andpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_mask()), scr);
893 } else {
894 assert((opcode == Op_NegVD),"opcode should be Op_NegD");
895 xorpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), scr);
896 }
897 }
898
|