16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "asm/assembler.hpp"
27 #include "asm/assembler.inline.hpp"
28 #include "gc/shared/barrierSet.hpp"
29 #include "gc/shared/barrierSetAssembler.hpp"
30 #include "oops/methodData.hpp"
31 #include "opto/c2_MacroAssembler.hpp"
32 #include "opto/intrinsicnode.hpp"
33 #include "opto/output.hpp"
34 #include "opto/opcodes.hpp"
35 #include "opto/subnode.hpp"
36 #include "runtime/objectMonitor.hpp"
37 #include "runtime/stubRoutines.hpp"
38
39 #ifdef PRODUCT
40 #define BLOCK_COMMENT(str) /* nothing */
41 #define STOP(error) stop(error)
42 #else
43 #define BLOCK_COMMENT(str) block_comment(str)
44 #define STOP(error) block_comment(error); stop(error)
45 #endif
46
47 // C2 compiled method's prolog code.
48 void C2_MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b, bool is_stub) {
49
50 // WARNING: Initial instruction MUST be 5 bytes or longer so that
51 // NativeJump::patch_verified_entry will be able to patch out the entry
52 // code safely. The push to verify stack depth is ok at 5 bytes,
53 // the frame allocation can be either 3 or 6 bytes. So if we don't do
54 // stack bang then we must use the 6 byte frame allocation even if
55 // we have no frame. :-(
56 assert(stack_bang_size >= framesize || stack_bang_size <= 0, "stack bang size incorrect");
57
536 // fast_unlock we often branch to DONE_LABEL, just to find that C2
537 // will emit a conditional branch immediately after the node.
538 // So we have branches to branches and lots of ICC.ZF games.
539 // Instead, it might be better to have C2 pass a "FailureLabel"
540 // into fast_lock and fast_unlock. In the case of success, control
541 // will drop through the node. ICC.ZF is undefined at exit.
542 // In the case of failure, the node will branch directly to the
543 // FailureLabel
544
545
546 // obj: object to lock
547 // box: on-stack box address (displaced header location) - KILLED
548 // rax,: tmp -- KILLED
549 // scr: tmp -- KILLED
550 void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg,
551 Register scrReg, Register cx1Reg, Register cx2Reg, Register thread,
552 RTMLockingCounters* rtm_counters,
553 RTMLockingCounters* stack_rtm_counters,
554 Metadata* method_data,
555 bool use_rtm, bool profile_rtm) {
556 // Ensure the register assignments are disjoint
557 assert(tmpReg == rax, "");
558
559 if (use_rtm) {
560 assert_different_registers(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg);
561 } else {
562 assert(cx1Reg == noreg, "");
563 assert(cx2Reg == noreg, "");
564 assert_different_registers(objReg, boxReg, tmpReg, scrReg);
565 }
566
567 // Possible cases that we'll encounter in fast_lock
568 // ------------------------------------------------
569 // * Inflated
570 // -- unlocked
571 // -- Locked
572 // = by self
573 // = by other
574 // * neutral
575 // * stack-locked
587 testl(tmpReg, JVM_ACC_IS_VALUE_BASED_CLASS);
588 jcc(Assembler::notZero, DONE_LABEL);
589 }
590
591 #if INCLUDE_RTM_OPT
592 if (UseRTMForStackLocks && use_rtm) {
593 assert(LockingMode != LM_MONITOR, "LockingMode == 0 (LM_MONITOR) and +UseRTMForStackLocks are mutually exclusive");
594 rtm_stack_locking(objReg, tmpReg, scrReg, cx2Reg,
595 stack_rtm_counters, method_data, profile_rtm,
596 DONE_LABEL, IsInflated);
597 }
598 #endif // INCLUDE_RTM_OPT
599
600 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // [FETCH]
601 testptr(tmpReg, markWord::monitor_value); // inflated vs stack-locked|neutral
602 jcc(Assembler::notZero, IsInflated);
603
604 if (LockingMode == LM_MONITOR) {
605 // Clear ZF so that we take the slow path at the DONE label. objReg is known to be not 0.
606 testptr(objReg, objReg);
607 } else if (LockingMode == LM_LEGACY) {
608 // Attempt stack-locking ...
609 orptr (tmpReg, markWord::unlocked_value);
610 movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS
611 lock();
612 cmpxchgptr(boxReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Updates tmpReg
613 jcc(Assembler::equal, COUNT); // Success
614
615 // Recursive locking.
616 // The object is stack-locked: markword contains stack pointer to BasicLock.
617 // Locked by current thread if difference with current SP is less than one page.
618 subptr(tmpReg, rsp);
619 // Next instruction set ZFlag == 1 (Success) if difference is less then one page.
620 andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - (int)os::vm_page_size())) );
621 movptr(Address(boxReg, 0), tmpReg);
622 } else {
623 assert(LockingMode == LM_LIGHTWEIGHT, "");
624 lightweight_lock(objReg, tmpReg, thread, scrReg, NO_COUNT);
625 jmp(COUNT);
626 }
627 jmp(DONE_LABEL);
628
629 bind(IsInflated);
630 // The object is inflated. tmpReg contains pointer to ObjectMonitor* + markWord::monitor_value
631
632 #if INCLUDE_RTM_OPT
633 // Use the same RTM locking code in 32- and 64-bit VM.
634 if (use_rtm) {
635 rtm_inflated_locking(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg,
636 rtm_counters, method_data, profile_rtm, DONE_LABEL);
637 } else {
638 #endif // INCLUDE_RTM_OPT
639
640 #ifndef _LP64
641 // The object is inflated.
642
643 // boxReg refers to the on-stack BasicLock in the current frame.
644 // We'd like to write:
645 // set box->_displaced_header = markWord::unused_mark(). Any non-0 value suffices.
736 // a frame.
737 // I2: If a method attempts to unlock an object that is not held by the
738 // the frame the interpreter throws IMSX.
739 //
740 // Lets say A(), which has provably balanced locking, acquires O and then calls B().
741 // B() doesn't have provably balanced locking so it runs in the interpreter.
742 // Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O
743 // is still locked by A().
744 //
745 // The only other source of unbalanced locking would be JNI. The "Java Native Interface:
746 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
747 // should not be unlocked by "normal" java-level locking and vice-versa. The specification
748 // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
749 // Arguably given that the spec legislates the JNI case as undefined our implementation
750 // could reasonably *avoid* checking owner in fast_unlock().
751 // In the interest of performance we elide m->Owner==Self check in unlock.
752 // A perfectly viable alternative is to elide the owner check except when
753 // Xcheck:jni is enabled.
754
755 void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg, bool use_rtm) {
756 assert(boxReg == rax, "");
757 assert_different_registers(objReg, boxReg, tmpReg);
758
759 Label DONE_LABEL, Stacked, COUNT, NO_COUNT;
760
761 #if INCLUDE_RTM_OPT
762 if (UseRTMForStackLocks && use_rtm) {
763 assert(LockingMode != LM_MONITOR, "LockingMode == 0 (LM_MONITOR) and +UseRTMForStackLocks are mutually exclusive");
764 Label L_regular_unlock;
765 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // fetch markword
766 andptr(tmpReg, markWord::lock_mask_in_place); // look at 2 lock bits
767 cmpptr(tmpReg, markWord::unlocked_value); // bits = 01 unlocked
768 jccb(Assembler::notEqual, L_regular_unlock); // if !HLE RegularLock
769 xend(); // otherwise end...
770 jmp(DONE_LABEL); // ... and we're done
771 bind(L_regular_unlock);
772 }
773 #endif
774
775 if (LockingMode == LM_LEGACY) {
776 cmpptr(Address(boxReg, 0), NULL_WORD); // Examine the displaced header
777 jcc (Assembler::zero, COUNT); // 0 indicates recursive stack-lock
778 }
779 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Examine the object's markword
780 if (LockingMode != LM_MONITOR) {
781 testptr(tmpReg, markWord::monitor_value); // Inflated?
782 jcc(Assembler::zero, Stacked);
783 }
784
785 // It's inflated.
786 if (LockingMode == LM_LIGHTWEIGHT) {
787 // If the owner is ANONYMOUS, we need to fix it - in an outline stub.
788 testb(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t) ObjectMonitor::ANONYMOUS_OWNER);
789 #ifdef _LP64
790 if (!Compile::current()->output()->in_scratch_emit_size()) {
791 C2HandleAnonOMOwnerStub* stub = new (Compile::current()->comp_arena()) C2HandleAnonOMOwnerStub(tmpReg, boxReg);
792 Compile::current()->output()->add_stub(stub);
793 jcc(Assembler::notEqual, stub->entry());
794 bind(stub->continuation());
795 } else
796 #endif
797 {
798 // We can't easily implement this optimization on 32 bit because we don't have a thread register.
799 // Call the slow-path instead.
800 jcc(Assembler::notEqual, NO_COUNT);
801 }
802 }
803
804 #if INCLUDE_RTM_OPT
805 if (use_rtm) {
806 Label L_regular_inflated_unlock;
807 int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
808 movptr(boxReg, Address(tmpReg, owner_offset));
809 testptr(boxReg, boxReg);
810 jccb(Assembler::notZero, L_regular_inflated_unlock);
811 xend();
812 jmp(DONE_LABEL);
813 bind(L_regular_inflated_unlock);
814 }
815 #endif
816
817 // Despite our balanced locking property we still check that m->_owner == Self
818 // as java routines or native JNI code called by this thread might
819 // have released the lock.
820 // Refer to the comments in synchronizer.cpp for how we might encode extra
821 // state in _succ so we can avoid fetching EntryList|cxq.
822 //
904
905 // box is really RAX -- the following CMPXCHG depends on that binding
906 // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
907 lock();
908 cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
909 // There's no successor so we tried to regrab the lock.
910 // If that didn't work, then another thread grabbed the
911 // lock so we're done (and exit was a success).
912 jccb (Assembler::notEqual, LSuccess);
913 // Intentional fall-through into slow path
914
915 bind (LGoSlowPath);
916 orl (boxReg, 1); // set ICC.ZF=0 to indicate failure
917 jmpb (DONE_LABEL);
918
919 bind (LSuccess);
920 testl (boxReg, 0); // set ICC.ZF=1 to indicate success
921 jmpb (DONE_LABEL);
922
923 #endif
924 if (LockingMode != LM_MONITOR) {
925 bind (Stacked);
926 if (LockingMode == LM_LIGHTWEIGHT) {
927 mov(boxReg, tmpReg);
928 lightweight_unlock(objReg, boxReg, tmpReg, NO_COUNT);
929 jmp(COUNT);
930 } else if (LockingMode == LM_LEGACY) {
931 movptr(tmpReg, Address (boxReg, 0)); // re-fetch
932 lock();
933 cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
934 }
935 // Intentional fall-thru into DONE_LABEL
936 }
937 bind(DONE_LABEL);
938
939 // ZFlag == 1 count in fast path
940 // ZFlag == 0 count in slow path
941 jccb(Assembler::notZero, NO_COUNT);
942
943 bind(COUNT);
944 // Count monitors in fast path
945 #ifndef _LP64
946 get_thread(tmpReg);
947 decrementl(Address(tmpReg, JavaThread::held_monitor_count_offset()));
948 #else // _LP64
949 decrementq(Address(r15_thread, JavaThread::held_monitor_count_offset()));
950 #endif
951
952 xorl(tmpReg, tmpReg); // Set ZF == 1
953
954 bind(NO_COUNT);
955 }
956
957 //-------------------------------------------------------------------------------------------
958 // Generic instructions support for use in .ad files C2 code generation
959
960 void C2_MacroAssembler::vabsnegd(int opcode, XMMRegister dst, XMMRegister src) {
961 if (dst != src) {
962 movdqu(dst, src);
963 }
964 if (opcode == Op_AbsVD) {
965 andpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_mask()), noreg);
966 } else {
967 assert((opcode == Op_NegVD),"opcode should be Op_NegD");
968 xorpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), noreg);
969 }
970 }
971
972 void C2_MacroAssembler::vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len) {
973 if (opcode == Op_AbsVD) {
974 vandpd(dst, src, ExternalAddress(StubRoutines::x86::vector_double_sign_mask()), vector_len, noreg);
975 } else {
976 assert((opcode == Op_NegVD),"opcode should be Op_NegD");
6162 // Perform above steps with lane comparison expression as INDEX >= 48 && INDEX < 64
6163 // and broadcasting third 128 bit lane.
6164 evpcmpb(ktmp, k0, shuffle, xtmp1, Assembler::nlt, true, vlen_enc);
6165 vpsllq(xtmp2, xtmp2, 0x1, vlen_enc);
6166 evpcmpb(ktmp, ktmp, shuffle, xtmp2, Assembler::lt, true, vlen_enc);
6167 evshufi64x2(xtmp3, src, src, 0xFF, vlen_enc);
6168 evpshufb(dst, ktmp, xtmp3, shuffle, true, vlen_enc);
6169 }
6170
6171 void C2_MacroAssembler::vector_rearrange_int_float(BasicType bt, XMMRegister dst,
6172 XMMRegister shuffle, XMMRegister src, int vlen_enc) {
6173 if (vlen_enc == AVX_128bit) {
6174 vpermilps(dst, src, shuffle, vlen_enc);
6175 } else if (bt == T_INT) {
6176 vpermd(dst, shuffle, src, vlen_enc);
6177 } else {
6178 assert(bt == T_FLOAT, "");
6179 vpermps(dst, shuffle, src, vlen_enc);
6180 }
6181 }
|
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "asm/assembler.hpp"
27 #include "asm/assembler.inline.hpp"
28 #include "gc/shared/barrierSet.hpp"
29 #include "gc/shared/barrierSetAssembler.hpp"
30 #include "oops/methodData.hpp"
31 #include "opto/c2_MacroAssembler.hpp"
32 #include "opto/intrinsicnode.hpp"
33 #include "opto/output.hpp"
34 #include "opto/opcodes.hpp"
35 #include "opto/subnode.hpp"
36 #include "runtime/globals.hpp"
37 #include "runtime/objectMonitor.hpp"
38 #include "runtime/stubRoutines.hpp"
39 #include "utilities/globalDefinitions.hpp"
40 #include "utilities/powerOfTwo.hpp"
41 #include "utilities/sizes.hpp"
42
43 #ifdef PRODUCT
44 #define BLOCK_COMMENT(str) /* nothing */
45 #define STOP(error) stop(error)
46 #else
47 #define BLOCK_COMMENT(str) block_comment(str)
48 #define STOP(error) block_comment(error); stop(error)
49 #endif
50
51 // C2 compiled method's prolog code.
52 void C2_MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b, bool is_stub) {
53
54 // WARNING: Initial instruction MUST be 5 bytes or longer so that
55 // NativeJump::patch_verified_entry will be able to patch out the entry
56 // code safely. The push to verify stack depth is ok at 5 bytes,
57 // the frame allocation can be either 3 or 6 bytes. So if we don't do
58 // stack bang then we must use the 6 byte frame allocation even if
59 // we have no frame. :-(
60 assert(stack_bang_size >= framesize || stack_bang_size <= 0, "stack bang size incorrect");
61
540 // fast_unlock we often branch to DONE_LABEL, just to find that C2
541 // will emit a conditional branch immediately after the node.
542 // So we have branches to branches and lots of ICC.ZF games.
543 // Instead, it might be better to have C2 pass a "FailureLabel"
544 // into fast_lock and fast_unlock. In the case of success, control
545 // will drop through the node. ICC.ZF is undefined at exit.
546 // In the case of failure, the node will branch directly to the
547 // FailureLabel
548
549
550 // obj: object to lock
551 // box: on-stack box address (displaced header location) - KILLED
552 // rax,: tmp -- KILLED
553 // scr: tmp -- KILLED
554 void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg,
555 Register scrReg, Register cx1Reg, Register cx2Reg, Register thread,
556 RTMLockingCounters* rtm_counters,
557 RTMLockingCounters* stack_rtm_counters,
558 Metadata* method_data,
559 bool use_rtm, bool profile_rtm) {
560 assert(LockingMode != LM_LIGHTWEIGHT, "lightweight locking should use fast_lock_lightweight");
561 // Ensure the register assignments are disjoint
562 assert(tmpReg == rax, "");
563
564 if (use_rtm) {
565 assert_different_registers(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg);
566 } else {
567 assert(cx1Reg == noreg, "");
568 assert(cx2Reg == noreg, "");
569 assert_different_registers(objReg, boxReg, tmpReg, scrReg);
570 }
571
572 // Possible cases that we'll encounter in fast_lock
573 // ------------------------------------------------
574 // * Inflated
575 // -- unlocked
576 // -- Locked
577 // = by self
578 // = by other
579 // * neutral
580 // * stack-locked
592 testl(tmpReg, JVM_ACC_IS_VALUE_BASED_CLASS);
593 jcc(Assembler::notZero, DONE_LABEL);
594 }
595
596 #if INCLUDE_RTM_OPT
597 if (UseRTMForStackLocks && use_rtm) {
598 assert(LockingMode != LM_MONITOR, "LockingMode == 0 (LM_MONITOR) and +UseRTMForStackLocks are mutually exclusive");
599 rtm_stack_locking(objReg, tmpReg, scrReg, cx2Reg,
600 stack_rtm_counters, method_data, profile_rtm,
601 DONE_LABEL, IsInflated);
602 }
603 #endif // INCLUDE_RTM_OPT
604
605 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // [FETCH]
606 testptr(tmpReg, markWord::monitor_value); // inflated vs stack-locked|neutral
607 jcc(Assembler::notZero, IsInflated);
608
609 if (LockingMode == LM_MONITOR) {
610 // Clear ZF so that we take the slow path at the DONE label. objReg is known to be not 0.
611 testptr(objReg, objReg);
612 } else {
613 assert(LockingMode == LM_LEGACY, "must be");
614 // Attempt stack-locking ...
615 orptr (tmpReg, markWord::unlocked_value);
616 movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS
617 lock();
618 cmpxchgptr(boxReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Updates tmpReg
619 jcc(Assembler::equal, COUNT); // Success
620
621 // Recursive locking.
622 // The object is stack-locked: markword contains stack pointer to BasicLock.
623 // Locked by current thread if difference with current SP is less than one page.
624 subptr(tmpReg, rsp);
625 // Next instruction set ZFlag == 1 (Success) if difference is less then one page.
626 andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - (int)os::vm_page_size())) );
627 movptr(Address(boxReg, 0), tmpReg);
628 }
629 jmp(DONE_LABEL);
630
631 bind(IsInflated);
632 // The object is inflated. tmpReg contains pointer to ObjectMonitor* + markWord::monitor_value
633
634 #if INCLUDE_RTM_OPT
635 // Use the same RTM locking code in 32- and 64-bit VM.
636 if (use_rtm) {
637 rtm_inflated_locking(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg,
638 rtm_counters, method_data, profile_rtm, DONE_LABEL);
639 } else {
640 #endif // INCLUDE_RTM_OPT
641
642 #ifndef _LP64
643 // The object is inflated.
644
645 // boxReg refers to the on-stack BasicLock in the current frame.
646 // We'd like to write:
647 // set box->_displaced_header = markWord::unused_mark(). Any non-0 value suffices.
738 // a frame.
739 // I2: If a method attempts to unlock an object that is not held by the
740 // the frame the interpreter throws IMSX.
741 //
742 // Lets say A(), which has provably balanced locking, acquires O and then calls B().
743 // B() doesn't have provably balanced locking so it runs in the interpreter.
744 // Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O
745 // is still locked by A().
746 //
747 // The only other source of unbalanced locking would be JNI. The "Java Native Interface:
748 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
749 // should not be unlocked by "normal" java-level locking and vice-versa. The specification
750 // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
751 // Arguably given that the spec legislates the JNI case as undefined our implementation
752 // could reasonably *avoid* checking owner in fast_unlock().
753 // In the interest of performance we elide m->Owner==Self check in unlock.
754 // A perfectly viable alternative is to elide the owner check except when
755 // Xcheck:jni is enabled.
756
757 void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg, bool use_rtm) {
758 assert(LockingMode != LM_LIGHTWEIGHT, "lightweight locking should use fast_unlock_lightweight");
759 assert(boxReg == rax, "");
760 assert_different_registers(objReg, boxReg, tmpReg);
761
762 Label DONE_LABEL, Stacked, COUNT, NO_COUNT;
763
764 #if INCLUDE_RTM_OPT
765 if (UseRTMForStackLocks && use_rtm) {
766 assert(LockingMode != LM_MONITOR, "LockingMode == 0 (LM_MONITOR) and +UseRTMForStackLocks are mutually exclusive");
767 Label L_regular_unlock;
768 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // fetch markword
769 andptr(tmpReg, markWord::lock_mask_in_place); // look at 2 lock bits
770 cmpptr(tmpReg, markWord::unlocked_value); // bits = 01 unlocked
771 jccb(Assembler::notEqual, L_regular_unlock); // if !HLE RegularLock
772 xend(); // otherwise end...
773 jmp(DONE_LABEL); // ... and we're done
774 bind(L_regular_unlock);
775 }
776 #endif
777
778 if (LockingMode == LM_LEGACY) {
779 cmpptr(Address(boxReg, 0), NULL_WORD); // Examine the displaced header
780 jcc (Assembler::zero, COUNT); // 0 indicates recursive stack-lock
781 }
782 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Examine the object's markword
783 if (LockingMode != LM_MONITOR) {
784 testptr(tmpReg, markWord::monitor_value); // Inflated?
785 jcc(Assembler::zero, Stacked);
786 }
787
788 // It's inflated.
789
790 #if INCLUDE_RTM_OPT
791 if (use_rtm) {
792 Label L_regular_inflated_unlock;
793 int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
794 movptr(boxReg, Address(tmpReg, owner_offset));
795 testptr(boxReg, boxReg);
796 jccb(Assembler::notZero, L_regular_inflated_unlock);
797 xend();
798 jmp(DONE_LABEL);
799 bind(L_regular_inflated_unlock);
800 }
801 #endif
802
803 // Despite our balanced locking property we still check that m->_owner == Self
804 // as java routines or native JNI code called by this thread might
805 // have released the lock.
806 // Refer to the comments in synchronizer.cpp for how we might encode extra
807 // state in _succ so we can avoid fetching EntryList|cxq.
808 //
890
891 // box is really RAX -- the following CMPXCHG depends on that binding
892 // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
893 lock();
894 cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
895 // There's no successor so we tried to regrab the lock.
896 // If that didn't work, then another thread grabbed the
897 // lock so we're done (and exit was a success).
898 jccb (Assembler::notEqual, LSuccess);
899 // Intentional fall-through into slow path
900
901 bind (LGoSlowPath);
902 orl (boxReg, 1); // set ICC.ZF=0 to indicate failure
903 jmpb (DONE_LABEL);
904
905 bind (LSuccess);
906 testl (boxReg, 0); // set ICC.ZF=1 to indicate success
907 jmpb (DONE_LABEL);
908
909 #endif
910 if (LockingMode == LM_LEGACY) {
911 bind (Stacked);
912 movptr(tmpReg, Address (boxReg, 0)); // re-fetch
913 lock();
914 cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
915 // Intentional fall-thru into DONE_LABEL
916 }
917
918 bind(DONE_LABEL);
919
920 // ZFlag == 1 count in fast path
921 // ZFlag == 0 count in slow path
922 jccb(Assembler::notZero, NO_COUNT);
923
924 bind(COUNT);
925 // Count monitors in fast path
926 #ifndef _LP64
927 get_thread(tmpReg);
928 decrementl(Address(tmpReg, JavaThread::held_monitor_count_offset()));
929 #else // _LP64
930 decrementq(Address(r15_thread, JavaThread::held_monitor_count_offset()));
931 #endif
932
933 xorl(tmpReg, tmpReg); // Set ZF == 1
934
935 bind(NO_COUNT);
936 }
937
938 void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register box, Register rax_reg,
939 Register t, Register thread) {
940 assert(LockingMode == LM_LIGHTWEIGHT, "must be");
941 assert(rax_reg == rax, "Used for CAS");
942 assert_different_registers(obj, box, rax_reg, t, thread);
943
944 // Handle inflated monitor.
945 Label inflated;
946 // Finish fast lock successfully. ZF value is irrelevant.
947 Label locked;
948 // Finish fast lock unsuccessfully. MUST jump with ZF == 0
949 Label slow_path;
950
951 if (DiagnoseSyncOnValueBasedClasses != 0) {
952 load_klass(rax_reg, obj, t);
953 movl(rax_reg, Address(rax_reg, Klass::access_flags_offset()));
954 testl(rax_reg, JVM_ACC_IS_VALUE_BASED_CLASS);
955 jcc(Assembler::notZero, slow_path);
956 }
957
958 const Register mark = t;
959
960 { // Lightweight Lock
961
962 Label push;
963
964 const Register top = box;
965
966 // Load the mark.
967 movptr(mark, Address(obj, oopDesc::mark_offset_in_bytes()));
968
969 // Prefetch top.
970 movl(top, Address(thread, JavaThread::lock_stack_top_offset()));
971
972 // Check for monitor (0b10).
973 testptr(mark, markWord::monitor_value);
974 jcc(Assembler::notZero, inflated);
975
976 // Check if lock-stack is full.
977 cmpl(top, LockStack::end_offset() - 1);
978 jcc(Assembler::greater, slow_path);
979
980 // Check if recursive.
981 cmpptr(obj, Address(thread, top, Address::times_1, -oopSize));
982 jccb(Assembler::equal, push);
983
984 // Try to lock. Transition lock bits 0b01 => 0b00
985 movptr(rax_reg, mark);
986 orptr(rax_reg, markWord::unlocked_value);
987 andptr(mark, ~(int32_t)markWord::unlocked_value);
988 lock(); cmpxchgptr(mark, Address(obj, oopDesc::mark_offset_in_bytes()));
989 jcc(Assembler::notEqual, slow_path);
990
991 bind(push);
992 // After successful lock, push object on lock-stack.
993 movptr(Address(thread, top), obj);
994 addl(Address(thread, JavaThread::lock_stack_top_offset()), oopSize);
995 jmpb(locked);
996 }
997
998 { // Handle inflated monitor.
999 bind(inflated);
1000
1001 const Register tagged_monitor = mark;
1002
1003 // CAS owner (null => current thread).
1004 xorptr(rax_reg, rax_reg);
1005 lock(); cmpxchgptr(thread, Address(tagged_monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
1006 jccb(Assembler::equal, locked);
1007
1008 // Check if recursive.
1009 cmpptr(thread, rax_reg);
1010 jccb(Assembler::notEqual, slow_path);
1011
1012 // Recursive.
1013 increment(Address(tagged_monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
1014 }
1015
1016 bind(locked);
1017 increment(Address(thread, JavaThread::held_monitor_count_offset()));
1018 // Set ZF = 1
1019 xorl(rax_reg, rax_reg);
1020
1021 #ifdef ASSERT
1022 // Check that locked label is reached with ZF set.
1023 Label zf_correct;
1024 jccb(Assembler::zero, zf_correct);
1025 stop("Fast Lock ZF != 1");
1026 #endif
1027
1028 bind(slow_path);
1029 #ifdef ASSERT
1030 // Check that slow_path label is reached with ZF not set.
1031 jccb(Assembler::notZero, zf_correct);
1032 stop("Fast Lock ZF != 0");
1033 bind(zf_correct);
1034 #endif
1035 // C2 uses the value of ZF to determine the continuation.
1036 }
1037
1038 void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax, Register t, Register thread) {
1039 assert(LockingMode == LM_LIGHTWEIGHT, "must be");
1040 assert(reg_rax == rax, "Used for CAS");
1041 assert_different_registers(obj, reg_rax, t);
1042
1043 // Handle inflated monitor.
1044 Label inflated, inflated_check_lock_stack;
1045 // Finish fast unlock successfully. MUST jump with ZF == 1
1046 Label unlocked;
1047
1048 // Assume success.
1049 decrement(Address(thread, JavaThread::held_monitor_count_offset()));
1050
1051 const Register mark = t;
1052 const Register top = reg_rax;
1053
1054 Label dummy;
1055 C2FastUnlockLightweightStub* stub = nullptr;
1056
1057 if (!Compile::current()->output()->in_scratch_emit_size()) {
1058 stub = new (Compile::current()->comp_arena()) C2FastUnlockLightweightStub(obj, mark, reg_rax, thread);
1059 Compile::current()->output()->add_stub(stub);
1060 }
1061
1062 Label& push_and_slow_path = stub == nullptr ? dummy : stub->push_and_slow_path();
1063 Label& check_successor = stub == nullptr ? dummy : stub->check_successor();
1064
1065 { // Lightweight Unlock
1066
1067 // Load top.
1068 movl(top, Address(thread, JavaThread::lock_stack_top_offset()));
1069
1070 // Prefetch mark.
1071 movptr(mark, Address(obj, oopDesc::mark_offset_in_bytes()));
1072
1073 // Check if obj is top of lock-stack.
1074 cmpptr(obj, Address(thread, top, Address::times_1, -oopSize));
1075 // Top of lock stack was not obj. Must be monitor.
1076 jcc(Assembler::notEqual, inflated_check_lock_stack);
1077
1078 // Pop lock-stack.
1079 DEBUG_ONLY(movptr(Address(thread, top, Address::times_1, -oopSize), 0);)
1080 subl(Address(thread, JavaThread::lock_stack_top_offset()), oopSize);
1081
1082 // Check if recursive.
1083 cmpptr(obj, Address(thread, top, Address::times_1, -2 * oopSize));
1084 jcc(Assembler::equal, unlocked);
1085
1086 // We elide the monitor check, let the CAS fail instead.
1087
1088 // Try to unlock. Transition lock bits 0b00 => 0b01
1089 movptr(reg_rax, mark);
1090 andptr(reg_rax, ~(int32_t)markWord::lock_mask);
1091 orptr(mark, markWord::unlocked_value);
1092 lock(); cmpxchgptr(mark, Address(obj, oopDesc::mark_offset_in_bytes()));
1093 jcc(Assembler::notEqual, push_and_slow_path);
1094 jmp(unlocked);
1095 }
1096
1097
1098 { // Handle inflated monitor.
1099 bind(inflated_check_lock_stack);
1100 #ifdef ASSERT
1101 Label check_done;
1102 subl(top, oopSize);
1103 cmpl(top, in_bytes(JavaThread::lock_stack_base_offset()));
1104 jcc(Assembler::below, check_done);
1105 cmpptr(obj, Address(thread, top));
1106 jccb(Assembler::notEqual, inflated_check_lock_stack);
1107 stop("Fast Unlock lock on stack");
1108 bind(check_done);
1109 testptr(mark, markWord::monitor_value);
1110 jccb(Assembler::notZero, inflated);
1111 stop("Fast Unlock not monitor");
1112 #endif
1113
1114 bind(inflated);
1115
1116 // mark contains the tagged ObjectMonitor*.
1117 const Register monitor = mark;
1118
1119 #ifndef _LP64
1120 // Check if recursive.
1121 xorptr(reg_rax, reg_rax);
1122 orptr(reg_rax, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
1123 jcc(Assembler::notZero, check_successor);
1124
1125 // Check if the entry lists are empty.
1126 movptr(reg_rax, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
1127 orptr(reg_rax, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
1128 jcc(Assembler::notZero, check_successor);
1129
1130 // Release lock.
1131 movptr(Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
1132 #else // _LP64
1133 Label recursive;
1134
1135 // Check if recursive.
1136 cmpptr(Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), 0);
1137 jccb(Assembler::notEqual, recursive);
1138
1139 // Check if the entry lists are empty.
1140 movptr(reg_rax, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
1141 orptr(reg_rax, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
1142 jcc(Assembler::notZero, check_successor);
1143
1144 // Release lock.
1145 movptr(Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
1146 jmpb(unlocked);
1147
1148 // Recursive unlock.
1149 bind(recursive);
1150 decrement(Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
1151 xorl(t, t);
1152 #endif
1153 }
1154
1155 bind(unlocked);
1156 if (stub != nullptr) {
1157 bind(stub->unlocked_continuation());
1158 }
1159
1160 #ifdef ASSERT
1161 // Check that unlocked label is reached with ZF set.
1162 Label zf_correct;
1163 jccb(Assembler::zero, zf_correct);
1164 stop("Fast Unlock ZF != 1");
1165 #endif
1166
1167 if (stub != nullptr) {
1168 bind(stub->slow_path_continuation());
1169 }
1170 #ifdef ASSERT
1171 // Check that stub->continuation() label is reached with ZF not set.
1172 jccb(Assembler::notZero, zf_correct);
1173 stop("Fast Unlock ZF != 0");
1174 bind(zf_correct);
1175 #endif
1176 // C2 uses the value of ZF to determine the continuation.
1177 }
1178
1179 //-------------------------------------------------------------------------------------------
1180 // Generic instructions support for use in .ad files C2 code generation
1181
1182 void C2_MacroAssembler::vabsnegd(int opcode, XMMRegister dst, XMMRegister src) {
1183 if (dst != src) {
1184 movdqu(dst, src);
1185 }
1186 if (opcode == Op_AbsVD) {
1187 andpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_mask()), noreg);
1188 } else {
1189 assert((opcode == Op_NegVD),"opcode should be Op_NegD");
1190 xorpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), noreg);
1191 }
1192 }
1193
1194 void C2_MacroAssembler::vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len) {
1195 if (opcode == Op_AbsVD) {
1196 vandpd(dst, src, ExternalAddress(StubRoutines::x86::vector_double_sign_mask()), vector_len, noreg);
1197 } else {
1198 assert((opcode == Op_NegVD),"opcode should be Op_NegD");
6384 // Perform above steps with lane comparison expression as INDEX >= 48 && INDEX < 64
6385 // and broadcasting third 128 bit lane.
6386 evpcmpb(ktmp, k0, shuffle, xtmp1, Assembler::nlt, true, vlen_enc);
6387 vpsllq(xtmp2, xtmp2, 0x1, vlen_enc);
6388 evpcmpb(ktmp, ktmp, shuffle, xtmp2, Assembler::lt, true, vlen_enc);
6389 evshufi64x2(xtmp3, src, src, 0xFF, vlen_enc);
6390 evpshufb(dst, ktmp, xtmp3, shuffle, true, vlen_enc);
6391 }
6392
6393 void C2_MacroAssembler::vector_rearrange_int_float(BasicType bt, XMMRegister dst,
6394 XMMRegister shuffle, XMMRegister src, int vlen_enc) {
6395 if (vlen_enc == AVX_128bit) {
6396 vpermilps(dst, src, shuffle, vlen_enc);
6397 } else if (bt == T_INT) {
6398 vpermd(dst, shuffle, src, vlen_enc);
6399 } else {
6400 assert(bt == T_FLOAT, "");
6401 vpermps(dst, shuffle, src, vlen_enc);
6402 }
6403 }
6404
6405 #ifdef _LP64
6406 void C2_MacroAssembler::load_nklass_compact_c2(Register dst, Register obj, Register index, Address::ScaleFactor scale, int disp) {
6407 C2LoadNKlassStub* stub = new (Compile::current()->comp_arena()) C2LoadNKlassStub(dst);
6408 Compile::current()->output()->add_stub(stub);
6409
6410 // Note: Don't clobber obj anywhere in that method!
6411
6412 // The incoming address is pointing into obj-start + klass_offset_in_bytes. We need to extract
6413 // obj-start, so that we can load from the object's mark-word instead. Usually the address
6414 // comes as obj-start in obj and klass_offset_in_bytes in disp. However, sometimes C2
6415 // emits code that pre-computes obj-start + klass_offset_in_bytes into a register, and
6416 // then passes that register as obj and 0 in disp. The following code extracts the base
6417 // and offset to load the mark-word.
6418 int offset = oopDesc::mark_offset_in_bytes() + disp - oopDesc::klass_offset_in_bytes();
6419 movq(dst, Address(obj, index, scale, offset));
6420 testb(dst, markWord::monitor_value);
6421 jcc(Assembler::notZero, stub->entry());
6422 bind(stub->continuation());
6423 shrq(dst, markWord::klass_shift);
6424 }
6425 #endif
|