28 #include "gc/shared/barrierSet.hpp"
29 #include "gc/shared/barrierSetAssembler.hpp"
30 #include "oops/methodData.hpp"
31 #include "opto/c2_MacroAssembler.hpp"
32 #include "opto/intrinsicnode.hpp"
33 #include "opto/output.hpp"
34 #include "opto/opcodes.hpp"
35 #include "opto/subnode.hpp"
36 #include "runtime/objectMonitor.hpp"
37 #include "runtime/stubRoutines.hpp"
38
39 #ifdef PRODUCT
40 #define BLOCK_COMMENT(str) /* nothing */
41 #define STOP(error) stop(error)
42 #else
43 #define BLOCK_COMMENT(str) block_comment(str)
44 #define STOP(error) block_comment(error); stop(error)
45 #endif
46
47 // C2 compiled method's prolog code.
48 void C2_MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b, bool is_stub) {
49
50 // WARNING: Initial instruction MUST be 5 bytes or longer so that
51 // NativeJump::patch_verified_entry will be able to patch out the entry
52 // code safely. The push to verify stack depth is ok at 5 bytes,
53 // the frame allocation can be either 3 or 6 bytes. So if we don't do
54 // stack bang then we must use the 6 byte frame allocation even if
55 // we have no frame. :-(
56 assert(stack_bang_size >= framesize || stack_bang_size <= 0, "stack bang size incorrect");
57
58 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
59 // Remove word for return addr
60 framesize -= wordSize;
61 stack_bang_size -= wordSize;
62
63 // Calls to C2R adapters often do not accept exceptional returns.
64 // We require that their callers must bang for them. But be careful, because
65 // some VM calls (such as call site linkage) can use several kilobytes of
66 // stack. But the stack safety zone should account for that.
67 // See bugs 4446381, 4468289, 4497237.
68 if (stack_bang_size > 0) {
110 }
111 if (UseSSE >= 2 && VerifyFPU) {
112 verify_FPU(0, "FPU stack must be clean on entry");
113 }
114 #endif
115
116 #ifdef ASSERT
117 if (VerifyStackAtCalls) {
118 Label L;
119 push(rax);
120 mov(rax, rsp);
121 andptr(rax, StackAlignmentInBytes-1);
122 cmpptr(rax, StackAlignmentInBytes-wordSize);
123 pop(rax);
124 jcc(Assembler::equal, L);
125 STOP("Stack is not properly aligned!");
126 bind(L);
127 }
128 #endif
129
130 if (!is_stub) {
131 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
132 #ifdef _LP64
133 if (BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
134 // We put the non-hot code of the nmethod entry barrier out-of-line in a stub.
135 Label dummy_slow_path;
136 Label dummy_continuation;
137 Label* slow_path = &dummy_slow_path;
138 Label* continuation = &dummy_continuation;
139 if (!Compile::current()->output()->in_scratch_emit_size()) {
140 // Use real labels from actual stub when not emitting code for the purpose of measuring its size
141 C2EntryBarrierStub* stub = new (Compile::current()->comp_arena()) C2EntryBarrierStub();
142 Compile::current()->output()->add_stub(stub);
143 slow_path = &stub->entry();
144 continuation = &stub->continuation();
145 }
146 bs->nmethod_entry_barrier(this, slow_path, continuation);
147 }
148 #else
149 // Don't bother with out-of-line nmethod entry barrier stub for x86_32.
531 // But beware of excessive branch density on AMD Opterons.
532 //
533 // * Both fast_lock and fast_unlock set the ICC.ZF to indicate success
534 // or failure of the fast path. If the fast path fails then we pass
535 // control to the slow path, typically in C. In fast_lock and
536 // fast_unlock we often branch to DONE_LABEL, just to find that C2
537 // will emit a conditional branch immediately after the node.
538 // So we have branches to branches and lots of ICC.ZF games.
539 // Instead, it might be better to have C2 pass a "FailureLabel"
540 // into fast_lock and fast_unlock. In the case of success, control
541 // will drop through the node. ICC.ZF is undefined at exit.
542 // In the case of failure, the node will branch directly to the
543 // FailureLabel
544
545
546 // obj: object to lock
547 // box: on-stack box address (displaced header location) - KILLED
548 // rax,: tmp -- KILLED
549 // scr: tmp -- KILLED
550 void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg,
551 Register scrReg, Register cx1Reg, Register cx2Reg,
552 RTMLockingCounters* rtm_counters,
553 RTMLockingCounters* stack_rtm_counters,
554 Metadata* method_data,
555 bool use_rtm, bool profile_rtm) {
556 // Ensure the register assignments are disjoint
557 assert(tmpReg == rax, "");
558
559 if (use_rtm) {
560 assert_different_registers(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg);
561 } else {
562 assert(cx1Reg == noreg, "");
563 assert(cx2Reg == noreg, "");
564 assert_different_registers(objReg, boxReg, tmpReg, scrReg);
565 }
566
567 // Possible cases that we'll encounter in fast_lock
568 // ------------------------------------------------
569 // * Inflated
570 // -- unlocked
571 // -- Locked
585 load_klass(tmpReg, objReg, scrReg);
586 movl(tmpReg, Address(tmpReg, Klass::access_flags_offset()));
587 testl(tmpReg, JVM_ACC_IS_VALUE_BASED_CLASS);
588 jcc(Assembler::notZero, DONE_LABEL);
589 }
590
591 #if INCLUDE_RTM_OPT
592 if (UseRTMForStackLocks && use_rtm) {
593 assert(!UseHeavyMonitors, "+UseHeavyMonitors and +UseRTMForStackLocks are mutually exclusive");
594 rtm_stack_locking(objReg, tmpReg, scrReg, cx2Reg,
595 stack_rtm_counters, method_data, profile_rtm,
596 DONE_LABEL, IsInflated);
597 }
598 #endif // INCLUDE_RTM_OPT
599
600 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // [FETCH]
601 testptr(tmpReg, markWord::monitor_value); // inflated vs stack-locked|neutral
602 jccb(Assembler::notZero, IsInflated);
603
604 if (!UseHeavyMonitors) {
605 // Attempt stack-locking ...
606 orptr (tmpReg, markWord::unlocked_value);
607 movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS
608 lock();
609 cmpxchgptr(boxReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Updates tmpReg
610 jcc(Assembler::equal, COUNT); // Success
611
612 // Recursive locking.
613 // The object is stack-locked: markword contains stack pointer to BasicLock.
614 // Locked by current thread if difference with current SP is less than one page.
615 subptr(tmpReg, rsp);
616 // Next instruction set ZFlag == 1 (Success) if difference is less then one page.
617 andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - (int)os::vm_page_size())) );
618 movptr(Address(boxReg, 0), tmpReg);
619 } else {
620 // Clear ZF so that we take the slow path at the DONE label. objReg is known to be not 0.
621 testptr(objReg, objReg);
622 }
623 jmp(DONE_LABEL);
624
625 bind(IsInflated);
626 // The object is inflated. tmpReg contains pointer to ObjectMonitor* + markWord::monitor_value
627
628 #if INCLUDE_RTM_OPT
629 // Use the same RTM locking code in 32- and 64-bit VM.
630 if (use_rtm) {
631 rtm_inflated_locking(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg,
632 rtm_counters, method_data, profile_rtm, DONE_LABEL);
633 } else {
634 #endif // INCLUDE_RTM_OPT
635
636 #ifndef _LP64
637 // The object is inflated.
638
642 // This is convenient but results a ST-before-CAS penalty. The following CAS suffers
643 // additional latency as we have another ST in the store buffer that must drain.
644
645 // avoid ST-before-CAS
646 // register juggle because we need tmpReg for cmpxchgptr below
647 movptr(scrReg, boxReg);
648 movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2]
649
650 // Optimistic form: consider XORL tmpReg,tmpReg
651 movptr(tmpReg, NULL_WORD);
652
653 // Appears unlocked - try to swing _owner from null to non-null.
654 // Ideally, I'd manifest "Self" with get_thread and then attempt
655 // to CAS the register containing Self into m->Owner.
656 // But we don't have enough registers, so instead we can either try to CAS
657 // rsp or the address of the box (in scr) into &m->owner. If the CAS succeeds
658 // we later store "Self" into m->Owner. Transiently storing a stack address
659 // (rsp or the address of the box) into m->owner is harmless.
660 // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand.
661 lock();
662 cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
663 movptr(Address(scrReg, 0), 3); // box->_displaced_header = 3
664 // If we weren't able to swing _owner from NULL to the BasicLock
665 // then take the slow path.
666 jccb (Assembler::notZero, NO_COUNT);
667 // update _owner from BasicLock to thread
668 get_thread (scrReg); // beware: clobbers ICCs
669 movptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), scrReg);
670 xorptr(boxReg, boxReg); // set icc.ZFlag = 1 to indicate success
671
672 // If the CAS fails we can either retry or pass control to the slow path.
673 // We use the latter tactic.
674 // Pass the CAS result in the icc.ZFlag into DONE_LABEL
675 // If the CAS was successful ...
676 // Self has acquired the lock
677 // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
678 // Intentional fall-through into DONE_LABEL ...
679 #else // _LP64
680 // It's inflated and we use scrReg for ObjectMonitor* in this section.
681 movq(scrReg, tmpReg);
682 xorq(tmpReg, tmpReg);
683 lock();
684 cmpxchgptr(r15_thread, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
685 // Unconditionally set box->_displaced_header = markWord::unused_mark().
686 // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
687 movptr(Address(boxReg, 0), checked_cast<int32_t>(markWord::unused_mark().value()));
688 // Propagate ICC.ZF from CAS above into DONE_LABEL.
689 jccb(Assembler::equal, COUNT); // CAS above succeeded; propagate ZF = 1 (success)
690
756 void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg, bool use_rtm) {
757 assert(boxReg == rax, "");
758 assert_different_registers(objReg, boxReg, tmpReg);
759
760 Label DONE_LABEL, Stacked, COUNT, NO_COUNT;
761
762 #if INCLUDE_RTM_OPT
763 if (UseRTMForStackLocks && use_rtm) {
764 assert(!UseHeavyMonitors, "+UseHeavyMonitors and +UseRTMForStackLocks are mutually exclusive");
765 Label L_regular_unlock;
766 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // fetch markword
767 andptr(tmpReg, markWord::lock_mask_in_place); // look at 2 lock bits
768 cmpptr(tmpReg, markWord::unlocked_value); // bits = 01 unlocked
769 jccb(Assembler::notEqual, L_regular_unlock); // if !HLE RegularLock
770 xend(); // otherwise end...
771 jmp(DONE_LABEL); // ... and we're done
772 bind(L_regular_unlock);
773 }
774 #endif
775
776 if (!UseHeavyMonitors) {
777 cmpptr(Address(boxReg, 0), NULL_WORD); // Examine the displaced header
778 jcc (Assembler::zero, COUNT); // 0 indicates recursive stack-lock
779 }
780 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Examine the object's markword
781 if (!UseHeavyMonitors) {
782 testptr(tmpReg, markWord::monitor_value); // Inflated?
783 jccb (Assembler::zero, Stacked);
784 }
785
786 // It's inflated.
787 #if INCLUDE_RTM_OPT
788 if (use_rtm) {
789 Label L_regular_inflated_unlock;
790 int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
791 movptr(boxReg, Address(tmpReg, owner_offset));
792 testptr(boxReg, boxReg);
793 jccb(Assembler::notZero, L_regular_inflated_unlock);
794 xend();
795 jmpb(DONE_LABEL);
796 bind(L_regular_inflated_unlock);
797 }
798 #endif
799
800 // Despite our balanced locking property we still check that m->_owner == Self
801 // as java routines or native JNI code called by this thread might
802 // have released the lock.
803 // Refer to the comments in synchronizer.cpp for how we might encode extra
804 // state in _succ so we can avoid fetching EntryList|cxq.
805 //
806 // If there's no contention try a 1-0 exit. That is, exit without
807 // a costly MEMBAR or CAS. See synchronizer.cpp for details on how
808 // we detect and recover from the race that the 1-0 exit admits.
809 //
810 // Conceptually fast_unlock() must execute a STST|LDST "release" barrier
811 // before it STs null into _owner, releasing the lock. Updates
812 // to data protected by the critical section must be visible before
813 // we drop the lock (and thus before any other thread could acquire
814 // the lock and observe the fields protected by the lock).
815 // IA32's memory-model is SPO, so STs are ordered with respect to
889 // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
890 lock();
891 cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
892 // There's no successor so we tried to regrab the lock.
893 // If that didn't work, then another thread grabbed the
894 // lock so we're done (and exit was a success).
895 jccb (Assembler::notEqual, LSuccess);
896 // Intentional fall-through into slow path
897
898 bind (LGoSlowPath);
899 orl (boxReg, 1); // set ICC.ZF=0 to indicate failure
900 jmpb (DONE_LABEL);
901
902 bind (LSuccess);
903 testl (boxReg, 0); // set ICC.ZF=1 to indicate success
904 jmpb (DONE_LABEL);
905
906 #endif
907 if (!UseHeavyMonitors) {
908 bind (Stacked);
909 movptr(tmpReg, Address (boxReg, 0)); // re-fetch
910 lock();
911 cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
912 // Intentional fall-thru into DONE_LABEL
913 }
914 bind(DONE_LABEL);
915
916 // ZFlag == 1 count in fast path
917 // ZFlag == 0 count in slow path
918 jccb(Assembler::notZero, NO_COUNT);
919
920 bind(COUNT);
921 // Count monitors in fast path
922 #ifndef _LP64
923 get_thread(tmpReg);
924 decrementl(Address(tmpReg, JavaThread::held_monitor_count_offset()));
925 #else // _LP64
926 decrementq(Address(r15_thread, JavaThread::held_monitor_count_offset()));
927 #endif
928
929 xorl(tmpReg, tmpReg); // Set ZF == 1
930
931 bind(NO_COUNT);
|
28 #include "gc/shared/barrierSet.hpp"
29 #include "gc/shared/barrierSetAssembler.hpp"
30 #include "oops/methodData.hpp"
31 #include "opto/c2_MacroAssembler.hpp"
32 #include "opto/intrinsicnode.hpp"
33 #include "opto/output.hpp"
34 #include "opto/opcodes.hpp"
35 #include "opto/subnode.hpp"
36 #include "runtime/objectMonitor.hpp"
37 #include "runtime/stubRoutines.hpp"
38
39 #ifdef PRODUCT
40 #define BLOCK_COMMENT(str) /* nothing */
41 #define STOP(error) stop(error)
42 #else
43 #define BLOCK_COMMENT(str) block_comment(str)
44 #define STOP(error) block_comment(error); stop(error)
45 #endif
46
47 // C2 compiled method's prolog code.
48 void C2_MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b, bool is_stub, int max_monitors) {
49
50 // WARNING: Initial instruction MUST be 5 bytes or longer so that
51 // NativeJump::patch_verified_entry will be able to patch out the entry
52 // code safely. The push to verify stack depth is ok at 5 bytes,
53 // the frame allocation can be either 3 or 6 bytes. So if we don't do
54 // stack bang then we must use the 6 byte frame allocation even if
55 // we have no frame. :-(
56 assert(stack_bang_size >= framesize || stack_bang_size <= 0, "stack bang size incorrect");
57
58 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
59 // Remove word for return addr
60 framesize -= wordSize;
61 stack_bang_size -= wordSize;
62
63 // Calls to C2R adapters often do not accept exceptional returns.
64 // We require that their callers must bang for them. But be careful, because
65 // some VM calls (such as call site linkage) can use several kilobytes of
66 // stack. But the stack safety zone should account for that.
67 // See bugs 4446381, 4468289, 4497237.
68 if (stack_bang_size > 0) {
110 }
111 if (UseSSE >= 2 && VerifyFPU) {
112 verify_FPU(0, "FPU stack must be clean on entry");
113 }
114 #endif
115
116 #ifdef ASSERT
117 if (VerifyStackAtCalls) {
118 Label L;
119 push(rax);
120 mov(rax, rsp);
121 andptr(rax, StackAlignmentInBytes-1);
122 cmpptr(rax, StackAlignmentInBytes-wordSize);
123 pop(rax);
124 jcc(Assembler::equal, L);
125 STOP("Stack is not properly aligned!");
126 bind(L);
127 }
128 #endif
129
130 #ifdef _LP64
131 if (UseFastLocking && max_monitors > 0) {
132 C2CheckLockStackStub* stub = new (Compile::current()->comp_arena()) C2CheckLockStackStub();
133 Compile::current()->output()->add_stub(stub);
134 assert(!is_stub, "only methods have monitors");
135 Register thread = r15_thread;
136 movptr(rax, Address(thread, JavaThread::lock_stack_current_offset()));
137 addptr(rax, max_monitors * oopSize);
138 cmpptr(rax, Address(thread, JavaThread::lock_stack_limit_offset()));
139 jcc(Assembler::greaterEqual, stub->entry());
140 bind(stub->continuation());
141 }
142 #endif
143
144 if (!is_stub) {
145 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
146 #ifdef _LP64
147 if (BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
148 // We put the non-hot code of the nmethod entry barrier out-of-line in a stub.
149 Label dummy_slow_path;
150 Label dummy_continuation;
151 Label* slow_path = &dummy_slow_path;
152 Label* continuation = &dummy_continuation;
153 if (!Compile::current()->output()->in_scratch_emit_size()) {
154 // Use real labels from actual stub when not emitting code for the purpose of measuring its size
155 C2EntryBarrierStub* stub = new (Compile::current()->comp_arena()) C2EntryBarrierStub();
156 Compile::current()->output()->add_stub(stub);
157 slow_path = &stub->entry();
158 continuation = &stub->continuation();
159 }
160 bs->nmethod_entry_barrier(this, slow_path, continuation);
161 }
162 #else
163 // Don't bother with out-of-line nmethod entry barrier stub for x86_32.
545 // But beware of excessive branch density on AMD Opterons.
546 //
547 // * Both fast_lock and fast_unlock set the ICC.ZF to indicate success
548 // or failure of the fast path. If the fast path fails then we pass
549 // control to the slow path, typically in C. In fast_lock and
550 // fast_unlock we often branch to DONE_LABEL, just to find that C2
551 // will emit a conditional branch immediately after the node.
552 // So we have branches to branches and lots of ICC.ZF games.
553 // Instead, it might be better to have C2 pass a "FailureLabel"
554 // into fast_lock and fast_unlock. In the case of success, control
555 // will drop through the node. ICC.ZF is undefined at exit.
556 // In the case of failure, the node will branch directly to the
557 // FailureLabel
558
559
560 // obj: object to lock
561 // box: on-stack box address (displaced header location) - KILLED
562 // rax,: tmp -- KILLED
563 // scr: tmp -- KILLED
564 void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg,
565 Register scrReg, Register cx1Reg, Register cx2Reg, Register thread,
566 RTMLockingCounters* rtm_counters,
567 RTMLockingCounters* stack_rtm_counters,
568 Metadata* method_data,
569 bool use_rtm, bool profile_rtm) {
570 // Ensure the register assignments are disjoint
571 assert(tmpReg == rax, "");
572
573 if (use_rtm) {
574 assert_different_registers(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg);
575 } else {
576 assert(cx1Reg == noreg, "");
577 assert(cx2Reg == noreg, "");
578 assert_different_registers(objReg, boxReg, tmpReg, scrReg);
579 }
580
581 // Possible cases that we'll encounter in fast_lock
582 // ------------------------------------------------
583 // * Inflated
584 // -- unlocked
585 // -- Locked
599 load_klass(tmpReg, objReg, scrReg);
600 movl(tmpReg, Address(tmpReg, Klass::access_flags_offset()));
601 testl(tmpReg, JVM_ACC_IS_VALUE_BASED_CLASS);
602 jcc(Assembler::notZero, DONE_LABEL);
603 }
604
605 #if INCLUDE_RTM_OPT
606 if (UseRTMForStackLocks && use_rtm) {
607 assert(!UseHeavyMonitors, "+UseHeavyMonitors and +UseRTMForStackLocks are mutually exclusive");
608 rtm_stack_locking(objReg, tmpReg, scrReg, cx2Reg,
609 stack_rtm_counters, method_data, profile_rtm,
610 DONE_LABEL, IsInflated);
611 }
612 #endif // INCLUDE_RTM_OPT
613
614 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // [FETCH]
615 testptr(tmpReg, markWord::monitor_value); // inflated vs stack-locked|neutral
616 jccb(Assembler::notZero, IsInflated);
617
618 if (!UseHeavyMonitors) {
619 if (UseFastLocking) {
620 #ifdef _LP64
621 fast_lock_impl(objReg, tmpReg, thread, scrReg, NO_COUNT, false);
622 jmp(COUNT);
623 #else
624 // We can not emit the lock-stack-check in verified_entry() because we don't have enough
625 // registers (for thread ptr). Therefore we have to emit the lock-stack-check in
626 // fast_lock_impl(). However, that check can take a slow-path with ZF=1, therefore
627 // we need to handle it specially and force ZF=0 before taking the actual slow-path.
628 Label slow;
629 fast_lock_impl(objReg, tmpReg, thread, scrReg, slow);
630 jmp(COUNT);
631 bind(slow);
632 testptr(objReg, objReg); // ZF=0 to indicate failure
633 jmp(NO_COUNT);
634 #endif
635 } else {
636 // Attempt stack-locking ...
637 orptr (tmpReg, markWord::unlocked_value);
638 movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS
639 lock();
640 cmpxchgptr(boxReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Updates tmpReg
641 jcc(Assembler::equal, COUNT); // Success
642
643 // Recursive locking.
644 // The object is stack-locked: markword contains stack pointer to BasicLock.
645 // Locked by current thread if difference with current SP is less than one page.
646 subptr(tmpReg, rsp);
647 // Next instruction set ZFlag == 1 (Success) if difference is less then one page.
648 andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - (int)os::vm_page_size())) );
649 movptr(Address(boxReg, 0), tmpReg);
650 }
651 } else {
652 // Clear ZF so that we take the slow path at the DONE label. objReg is known to be not 0.
653 testptr(objReg, objReg);
654 }
655 jmp(DONE_LABEL);
656
657 bind(IsInflated);
658 // The object is inflated. tmpReg contains pointer to ObjectMonitor* + markWord::monitor_value
659
660 #if INCLUDE_RTM_OPT
661 // Use the same RTM locking code in 32- and 64-bit VM.
662 if (use_rtm) {
663 rtm_inflated_locking(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg,
664 rtm_counters, method_data, profile_rtm, DONE_LABEL);
665 } else {
666 #endif // INCLUDE_RTM_OPT
667
668 #ifndef _LP64
669 // The object is inflated.
670
674 // This is convenient but results a ST-before-CAS penalty. The following CAS suffers
675 // additional latency as we have another ST in the store buffer that must drain.
676
677 // avoid ST-before-CAS
678 // register juggle because we need tmpReg for cmpxchgptr below
679 movptr(scrReg, boxReg);
680 movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2]
681
682 // Optimistic form: consider XORL tmpReg,tmpReg
683 movptr(tmpReg, NULL_WORD);
684
685 // Appears unlocked - try to swing _owner from null to non-null.
686 // Ideally, I'd manifest "Self" with get_thread and then attempt
687 // to CAS the register containing Self into m->Owner.
688 // But we don't have enough registers, so instead we can either try to CAS
689 // rsp or the address of the box (in scr) into &m->owner. If the CAS succeeds
690 // we later store "Self" into m->Owner. Transiently storing a stack address
691 // (rsp or the address of the box) into m->owner is harmless.
692 // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand.
693 lock();
694 cmpxchgptr(thread, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
695 movptr(Address(scrReg, 0), 3); // box->_displaced_header = 3
696
697 // If the CAS fails we can either retry or pass control to the slow path.
698 // We use the latter tactic.
699 // Pass the CAS result in the icc.ZFlag into DONE_LABEL
700 // If the CAS was successful ...
701 // Self has acquired the lock
702 // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
703 // Intentional fall-through into DONE_LABEL ...
704 #else // _LP64
705 // It's inflated and we use scrReg for ObjectMonitor* in this section.
706 movq(scrReg, tmpReg);
707 xorq(tmpReg, tmpReg);
708 lock();
709 cmpxchgptr(r15_thread, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
710 // Unconditionally set box->_displaced_header = markWord::unused_mark().
711 // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
712 movptr(Address(boxReg, 0), checked_cast<int32_t>(markWord::unused_mark().value()));
713 // Propagate ICC.ZF from CAS above into DONE_LABEL.
714 jccb(Assembler::equal, COUNT); // CAS above succeeded; propagate ZF = 1 (success)
715
781 void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg, bool use_rtm) {
782 assert(boxReg == rax, "");
783 assert_different_registers(objReg, boxReg, tmpReg);
784
785 Label DONE_LABEL, Stacked, COUNT, NO_COUNT;
786
787 #if INCLUDE_RTM_OPT
788 if (UseRTMForStackLocks && use_rtm) {
789 assert(!UseHeavyMonitors, "+UseHeavyMonitors and +UseRTMForStackLocks are mutually exclusive");
790 Label L_regular_unlock;
791 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // fetch markword
792 andptr(tmpReg, markWord::lock_mask_in_place); // look at 2 lock bits
793 cmpptr(tmpReg, markWord::unlocked_value); // bits = 01 unlocked
794 jccb(Assembler::notEqual, L_regular_unlock); // if !HLE RegularLock
795 xend(); // otherwise end...
796 jmp(DONE_LABEL); // ... and we're done
797 bind(L_regular_unlock);
798 }
799 #endif
800
801 if (!UseHeavyMonitors && !UseFastLocking) {
802 cmpptr(Address(boxReg, 0), NULL_WORD); // Examine the displaced header
803 jcc (Assembler::zero, COUNT); // 0 indicates recursive stack-lock
804 }
805 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Examine the object's markword
806 if (!UseHeavyMonitors) {
807 testptr(tmpReg, markWord::monitor_value); // Inflated?
808 #if INCLUDE_RTM_OPT
809 if (UseFastLocking && use_rtm) {
810 jcc(Assembler::zero, Stacked);
811 } else
812 #endif
813 jccb(Assembler::zero, Stacked);
814 if (UseFastLocking) {
815 // If the owner is ANONYMOUS, we need to fix it.
816 testb(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t) (intptr_t) ANONYMOUS_OWNER);
817 #ifdef _LP64
818 C2HandleAnonOMOwnerStub* stub = new (Compile::current()->comp_arena()) C2HandleAnonOMOwnerStub(tmpReg);
819 Compile::current()->output()->add_stub(stub);
820 jcc(Assembler::notEqual, stub->entry());
821 bind(stub->continuation());
822 #else
823 // We can't easily implement this optimization on 32 bit because we don't have a thread register.
824 // Call the slow-path instead.
825 jcc(Assembler::notEqual, NO_COUNT);
826 #endif
827 }
828 }
829
830 // It's inflated.
831 #if INCLUDE_RTM_OPT
832 if (use_rtm) {
833 Label L_regular_inflated_unlock;
834 int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
835 movptr(boxReg, Address(tmpReg, owner_offset));
836 testptr(boxReg, boxReg);
837 jccb(Assembler::notZero, L_regular_inflated_unlock);
838 xend();
839 jmp(DONE_LABEL);
840 bind(L_regular_inflated_unlock);
841 }
842 #endif
843
844 // Despite our balanced locking property we still check that m->_owner == Self
845 // as java routines or native JNI code called by this thread might
846 // have released the lock.
847 // Refer to the comments in synchronizer.cpp for how we might encode extra
848 // state in _succ so we can avoid fetching EntryList|cxq.
849 //
850 // If there's no contention try a 1-0 exit. That is, exit without
851 // a costly MEMBAR or CAS. See synchronizer.cpp for details on how
852 // we detect and recover from the race that the 1-0 exit admits.
853 //
854 // Conceptually fast_unlock() must execute a STST|LDST "release" barrier
855 // before it STs null into _owner, releasing the lock. Updates
856 // to data protected by the critical section must be visible before
857 // we drop the lock (and thus before any other thread could acquire
858 // the lock and observe the fields protected by the lock).
859 // IA32's memory-model is SPO, so STs are ordered with respect to
933 // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
934 lock();
935 cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
936 // There's no successor so we tried to regrab the lock.
937 // If that didn't work, then another thread grabbed the
938 // lock so we're done (and exit was a success).
939 jccb (Assembler::notEqual, LSuccess);
940 // Intentional fall-through into slow path
941
942 bind (LGoSlowPath);
943 orl (boxReg, 1); // set ICC.ZF=0 to indicate failure
944 jmpb (DONE_LABEL);
945
946 bind (LSuccess);
947 testl (boxReg, 0); // set ICC.ZF=1 to indicate success
948 jmpb (DONE_LABEL);
949
950 #endif
951 if (!UseHeavyMonitors) {
952 bind (Stacked);
953 if (UseFastLocking) {
954 mov(boxReg, tmpReg);
955 fast_unlock_impl(objReg, boxReg, tmpReg, NO_COUNT);
956 jmp(COUNT);
957 } else {
958 movptr(tmpReg, Address (boxReg, 0)); // re-fetch
959 lock();
960 cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
961 }
962 // Intentional fall-thru into DONE_LABEL
963 }
964 bind(DONE_LABEL);
965
966 // ZFlag == 1 count in fast path
967 // ZFlag == 0 count in slow path
968 jccb(Assembler::notZero, NO_COUNT);
969
970 bind(COUNT);
971 // Count monitors in fast path
972 #ifndef _LP64
973 get_thread(tmpReg);
974 decrementl(Address(tmpReg, JavaThread::held_monitor_count_offset()));
975 #else // _LP64
976 decrementq(Address(r15_thread, JavaThread::held_monitor_count_offset()));
977 #endif
978
979 xorl(tmpReg, tmpReg); // Set ZF == 1
980
981 bind(NO_COUNT);
|