9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "asm/assembler.hpp"
27 #include "asm/assembler.inline.hpp"
28 #include "oops/methodData.hpp"
29 #include "opto/c2_MacroAssembler.hpp"
30 #include "opto/intrinsicnode.hpp"
31 #include "opto/opcodes.hpp"
32 #include "opto/subnode.hpp"
33 #include "runtime/biasedLocking.hpp"
34 #include "runtime/objectMonitor.hpp"
35 #include "runtime/stubRoutines.hpp"
36
37 inline Assembler::AvxVectorLen C2_MacroAssembler::vector_length_encoding(int vlen_in_bytes) {
38 switch (vlen_in_bytes) {
39 case 4: // fall-through
40 case 8: // fall-through
41 case 16: return Assembler::AVX_128bit;
42 case 32: return Assembler::AVX_256bit;
43 case 64: return Assembler::AVX_512bit;
44
45 default: {
46 ShouldNotReachHere();
47 return Assembler::AVX_NoVec;
48 }
49 }
50 }
51
52 void C2_MacroAssembler::setvectmask(Register dst, Register src, KRegister mask) {
53 guarantee(PostLoopMultiversioning, "must be");
54 Assembler::movl(dst, 1);
55 Assembler::shlxl(dst, dst, src);
429 // But beware of excessive branch density on AMD Opterons.
430 //
431 // * Both fast_lock and fast_unlock set the ICC.ZF to indicate success
432 // or failure of the fast path. If the fast path fails then we pass
433 // control to the slow path, typically in C. In fast_lock and
434 // fast_unlock we often branch to DONE_LABEL, just to find that C2
435 // will emit a conditional branch immediately after the node.
436 // So we have branches to branches and lots of ICC.ZF games.
437 // Instead, it might be better to have C2 pass a "FailureLabel"
438 // into fast_lock and fast_unlock. In the case of success, control
439 // will drop through the node. ICC.ZF is undefined at exit.
440 // In the case of failure, the node will branch directly to the
441 // FailureLabel
442
443
444 // obj: object to lock
445 // box: on-stack box address (displaced header location) - KILLED
446 // rax,: tmp -- KILLED
447 // scr: tmp -- KILLED
448 void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg,
449 Register scrReg, Register cx1Reg, Register cx2Reg,
450 BiasedLockingCounters* counters,
451 RTMLockingCounters* rtm_counters,
452 RTMLockingCounters* stack_rtm_counters,
453 Metadata* method_data,
454 bool use_rtm, bool profile_rtm) {
455 // Ensure the register assignments are disjoint
456 assert(tmpReg == rax, "");
457
458 if (use_rtm) {
459 assert_different_registers(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg);
460 } else {
461 assert(cx2Reg == noreg, "");
462 assert_different_registers(objReg, boxReg, tmpReg, scrReg);
463 }
464
465 if (counters != NULL) {
466 atomic_incl(ExternalAddress((address)counters->total_entry_count_addr()), scrReg);
467 }
468
469 // Possible cases that we'll encounter in fast_lock
470 // ------------------------------------------------
471 // * Inflated
472 // -- unlocked
473 // -- Locked
474 // = by self
496 // it's stack-locked, biased or neutral
497 // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
498 // order to reduce the number of conditional branches in the most common cases.
499 // Beware -- there's a subtle invariant that fetch of the markword
500 // at [FETCH], below, will never observe a biased encoding (*101b).
501 // If this invariant is not held we risk exclusion (safety) failure.
502 if (UseBiasedLocking && !UseOptoBiasInlining) {
503 biased_locking_enter(boxReg, objReg, tmpReg, scrReg, cx1Reg, false, DONE_LABEL, NULL, counters);
504 }
505
506 #if INCLUDE_RTM_OPT
507 if (UseRTMForStackLocks && use_rtm) {
508 rtm_stack_locking(objReg, tmpReg, scrReg, cx2Reg,
509 stack_rtm_counters, method_data, profile_rtm,
510 DONE_LABEL, IsInflated);
511 }
512 #endif // INCLUDE_RTM_OPT
513
514 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // [FETCH]
515 testptr(tmpReg, markWord::monitor_value); // inflated vs stack-locked|neutral|biased
516 jccb(Assembler::notZero, IsInflated);
517
518 // Attempt stack-locking ...
519 orptr (tmpReg, markWord::unlocked_value);
520 movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS
521 lock();
522 cmpxchgptr(boxReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Updates tmpReg
523 if (counters != NULL) {
524 cond_inc32(Assembler::equal,
525 ExternalAddress((address)counters->fast_path_entry_count_addr()));
526 }
527 jcc(Assembler::equal, DONE_LABEL); // Success
528
529 // Recursive locking.
530 // The object is stack-locked: markword contains stack pointer to BasicLock.
531 // Locked by current thread if difference with current SP is less than one page.
532 subptr(tmpReg, rsp);
533 // Next instruction set ZFlag == 1 (Success) if difference is less then one page.
534 andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) );
535 movptr(Address(boxReg, 0), tmpReg);
536 if (counters != NULL) {
537 cond_inc32(Assembler::equal,
538 ExternalAddress((address)counters->fast_path_entry_count_addr()));
539 }
540 jmp(DONE_LABEL);
541
542 bind(IsInflated);
543 // The object is inflated. tmpReg contains pointer to ObjectMonitor* + markWord::monitor_value
544
545 #if INCLUDE_RTM_OPT
546 // Use the same RTM locking code in 32- and 64-bit VM.
547 if (use_rtm) {
548 rtm_inflated_locking(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg,
549 rtm_counters, method_data, profile_rtm, DONE_LABEL);
550 } else {
551 #endif // INCLUDE_RTM_OPT
552
553 #ifndef _LP64
554 // The object is inflated.
555
556 // boxReg refers to the on-stack BasicLock in the current frame.
557 // We'd like to write:
558 // set box->_displaced_header = markWord::unused_mark(). Any non-0 value suffices.
642 // a frame.
643 // I2: If a method attempts to unlock an object that is not held by the
644 // the frame the interpreter throws IMSX.
645 //
646 // Lets say A(), which has provably balanced locking, acquires O and then calls B().
647 // B() doesn't have provably balanced locking so it runs in the interpreter.
648 // Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O
649 // is still locked by A().
650 //
651 // The only other source of unbalanced locking would be JNI. The "Java Native Interface:
652 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
653 // should not be unlocked by "normal" java-level locking and vice-versa. The specification
654 // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
655 // Arguably given that the spec legislates the JNI case as undefined our implementation
656 // could reasonably *avoid* checking owner in fast_unlock().
657 // In the interest of performance we elide m->Owner==Self check in unlock.
658 // A perfectly viable alternative is to elide the owner check except when
659 // Xcheck:jni is enabled.
660
661 void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg, bool use_rtm) {
662 assert(boxReg == rax, "");
663 assert_different_registers(objReg, boxReg, tmpReg);
664
665 Label DONE_LABEL, Stacked, CheckSucc;
666
667 // Critically, the biased locking test must have precedence over
668 // and appear before the (box->dhw == 0) recursive stack-lock test.
669 if (UseBiasedLocking && !UseOptoBiasInlining) {
670 biased_locking_exit(objReg, tmpReg, DONE_LABEL);
671 }
672
673 #if INCLUDE_RTM_OPT
674 if (UseRTMForStackLocks && use_rtm) {
675 assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
676 Label L_regular_unlock;
677 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // fetch markword
678 andptr(tmpReg, markWord::biased_lock_mask_in_place); // look at 3 lock bits
679 cmpptr(tmpReg, markWord::unlocked_value); // bits = 001 unlocked
680 jccb(Assembler::notEqual, L_regular_unlock); // if !HLE RegularLock
681 xend(); // otherwise end...
682 jmp(DONE_LABEL); // ... and we're done
683 bind(L_regular_unlock);
684 }
685 #endif
686
687 cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD); // Examine the displaced header
688 jcc (Assembler::zero, DONE_LABEL); // 0 indicates recursive stack-lock
689 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Examine the object's markword
690 testptr(tmpReg, markWord::monitor_value); // Inflated?
691 jccb (Assembler::zero, Stacked);
692
693 // It's inflated.
694 #if INCLUDE_RTM_OPT
695 if (use_rtm) {
696 Label L_regular_inflated_unlock;
697 int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
698 movptr(boxReg, Address(tmpReg, owner_offset));
699 testptr(boxReg, boxReg);
700 jccb(Assembler::notZero, L_regular_inflated_unlock);
701 xend();
702 jmpb(DONE_LABEL);
703 bind(L_regular_inflated_unlock);
704 }
705 #endif
706
707 // Despite our balanced locking property we still check that m->_owner == Self
708 // as java routines or native JNI code called by this thread might
709 // have released the lock.
710 // Refer to the comments in synchronizer.cpp for how we might encode extra
711 // state in _succ so we can avoid fetching EntryList|cxq.
712 //
713 // If there's no contention try a 1-0 exit. That is, exit without
714 // a costly MEMBAR or CAS. See synchronizer.cpp for details on how
715 // we detect and recover from the race that the 1-0 exit admits.
716 //
717 // Conceptually fast_unlock() must execute a STST|LDST "release" barrier
718 // before it STs null into _owner, releasing the lock. Updates
719 // to data protected by the critical section must be visible before
720 // we drop the lock (and thus before any other thread could acquire
721 // the lock and observe the fields protected by the lock).
722 // IA32's memory-model is SPO, so STs are ordered with respect to
723 // each other and there's no need for an explicit barrier (fence).
724 // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
725 #ifndef _LP64
726 get_thread (boxReg);
727
728 // Note that we could employ various encoding schemes to reduce
729 // the number of loads below (currently 4) to just 2 or 3.
730 // Refer to the comments in synchronizer.cpp.
731 // In practice the chain of fetches doesn't seem to impact performance, however.
732 xorptr(boxReg, boxReg);
733 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
734 jccb (Assembler::notZero, DONE_LABEL);
735 movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
736 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
737 jccb (Assembler::notZero, CheckSucc);
738 movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
739 jmpb (DONE_LABEL);
740
741 bind (Stacked);
742 // It's not inflated and it's not recursively stack-locked and it's not biased.
743 // It must be stack-locked.
744 // Try to reset the header to displaced header.
745 // The "box" value on the stack is stable, so we can reload
746 // and be assured we observe the same value as above.
747 movptr(tmpReg, Address(boxReg, 0));
748 lock();
749 cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
750 // Intention fall-thru into DONE_LABEL
751
752 // DONE_LABEL is a hot target - we'd really like to place it at the
753 // start of cache line by padding with NOPs.
754 // See the AMD and Intel software optimization manuals for the
755 // most efficient "long" NOP encodings.
756 // Unfortunately none of our alignment mechanisms suffice.
757 bind (CheckSucc);
758 #else // _LP64
759 // It's inflated
760 Label LNotRecursive, LSuccess, LGoSlowPath;
761
762 cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), 0);
763 jccb(Assembler::equal, LNotRecursive);
764
765 // Recursive inflated unlock
766 decq(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
767 jmpb(LSuccess);
768
769 bind(LNotRecursive);
813 // length while by virtue of passing control into the slow path.
814
815 // box is really RAX -- the following CMPXCHG depends on that binding
816 // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
817 lock();
818 cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
819 // There's no successor so we tried to regrab the lock.
820 // If that didn't work, then another thread grabbed the
821 // lock so we're done (and exit was a success).
822 jccb (Assembler::notEqual, LSuccess);
823 // Intentional fall-through into slow path
824
825 bind (LGoSlowPath);
826 orl (boxReg, 1); // set ICC.ZF=0 to indicate failure
827 jmpb (DONE_LABEL);
828
829 bind (LSuccess);
830 testl (boxReg, 0); // set ICC.ZF=1 to indicate success
831 jmpb (DONE_LABEL);
832
833 bind (Stacked);
834 movptr(tmpReg, Address (boxReg, 0)); // re-fetch
835 lock();
836 cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
837
838 #endif
839 bind(DONE_LABEL);
840 }
841
842 //-------------------------------------------------------------------------------------------
843 // Generic instructions support for use in .ad files C2 code generation
844
845 void C2_MacroAssembler::vabsnegd(int opcode, XMMRegister dst, XMMRegister src, Register scr) {
846 if (dst != src) {
847 movdqu(dst, src);
848 }
849 if (opcode == Op_AbsVD) {
850 andpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_mask()), scr);
851 } else {
852 assert((opcode == Op_NegVD),"opcode should be Op_NegD");
853 xorpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), scr);
854 }
855 }
856
857 void C2_MacroAssembler::vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr) {
858 if (opcode == Op_AbsVD) {
859 vandpd(dst, src, ExternalAddress(StubRoutines::x86::vector_double_sign_mask()), vector_len, scr);
860 } else {
861 assert((opcode == Op_NegVD),"opcode should be Op_NegD");
3957 evpcmpb(ktmp, ktmp, shuffle, xtmp2, Assembler::lt, true, vlen_enc);
3958 evshufi64x2(xtmp3, src, src, 0x55, vlen_enc);
3959 evpshufb(dst, ktmp, xtmp3, shuffle, true, vlen_enc);
3960
3961 // Perform above steps with lane comparison expression as INDEX >= 32 && INDEX < 48
3962 // and broadcasting third 128 bit lane.
3963 evpcmpb(ktmp, k0, shuffle, xtmp2, Assembler::nlt, true, vlen_enc);
3964 vpaddb(xtmp1, xtmp1, xtmp2, vlen_enc);
3965 evpcmpb(ktmp, ktmp, shuffle, xtmp1, Assembler::lt, true, vlen_enc);
3966 evshufi64x2(xtmp3, src, src, 0xAA, vlen_enc);
3967 evpshufb(dst, ktmp, xtmp3, shuffle, true, vlen_enc);
3968
3969 // Perform above steps with lane comparison expression as INDEX >= 48 && INDEX < 64
3970 // and broadcasting third 128 bit lane.
3971 evpcmpb(ktmp, k0, shuffle, xtmp1, Assembler::nlt, true, vlen_enc);
3972 vpsllq(xtmp2, xtmp2, 0x1, vlen_enc);
3973 evpcmpb(ktmp, ktmp, shuffle, xtmp2, Assembler::lt, true, vlen_enc);
3974 evshufi64x2(xtmp3, src, src, 0xFF, vlen_enc);
3975 evpshufb(dst, ktmp, xtmp3, shuffle, true, vlen_enc);
3976 }
|
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "asm/assembler.hpp"
27 #include "asm/assembler.inline.hpp"
28 #include "oops/methodData.hpp"
29 #include "opto/c2_CodeStubs.hpp"
30 #include "opto/c2_MacroAssembler.hpp"
31 #include "opto/intrinsicnode.hpp"
32 #include "opto/opcodes.hpp"
33 #include "opto/output.hpp"
34 #include "opto/subnode.hpp"
35 #include "runtime/biasedLocking.hpp"
36 #include "runtime/globals.hpp"
37 #include "runtime/objectMonitor.hpp"
38 #include "runtime/stubRoutines.hpp"
39 #include "utilities/globalDefinitions.hpp"
40 #include "utilities/powerOfTwo.hpp"
41 #include "utilities/sizes.hpp"
42
43 inline Assembler::AvxVectorLen C2_MacroAssembler::vector_length_encoding(int vlen_in_bytes) {
44 switch (vlen_in_bytes) {
45 case 4: // fall-through
46 case 8: // fall-through
47 case 16: return Assembler::AVX_128bit;
48 case 32: return Assembler::AVX_256bit;
49 case 64: return Assembler::AVX_512bit;
50
51 default: {
52 ShouldNotReachHere();
53 return Assembler::AVX_NoVec;
54 }
55 }
56 }
57
58 void C2_MacroAssembler::setvectmask(Register dst, Register src, KRegister mask) {
59 guarantee(PostLoopMultiversioning, "must be");
60 Assembler::movl(dst, 1);
61 Assembler::shlxl(dst, dst, src);
435 // But beware of excessive branch density on AMD Opterons.
436 //
437 // * Both fast_lock and fast_unlock set the ICC.ZF to indicate success
438 // or failure of the fast path. If the fast path fails then we pass
439 // control to the slow path, typically in C. In fast_lock and
440 // fast_unlock we often branch to DONE_LABEL, just to find that C2
441 // will emit a conditional branch immediately after the node.
442 // So we have branches to branches and lots of ICC.ZF games.
443 // Instead, it might be better to have C2 pass a "FailureLabel"
444 // into fast_lock and fast_unlock. In the case of success, control
445 // will drop through the node. ICC.ZF is undefined at exit.
446 // In the case of failure, the node will branch directly to the
447 // FailureLabel
448
449
450 // obj: object to lock
451 // box: on-stack box address (displaced header location) - KILLED
452 // rax,: tmp -- KILLED
453 // scr: tmp -- KILLED
454 void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg,
455 Register scrReg, Register cx1Reg, Register cx2Reg, Register thread,
456 BiasedLockingCounters* counters,
457 RTMLockingCounters* rtm_counters,
458 RTMLockingCounters* stack_rtm_counters,
459 Metadata* method_data,
460 bool use_rtm, bool profile_rtm) {
461 assert(LockingMode != LM_LIGHTWEIGHT, "lightweight locking should use fast_lock_lightweight");
462 // Ensure the register assignments are disjoint
463 assert(tmpReg == rax, "");
464
465 if (use_rtm) {
466 assert_different_registers(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg);
467 } else {
468 assert(cx2Reg == noreg, "");
469 assert_different_registers(objReg, boxReg, tmpReg, scrReg);
470 }
471
472 if (counters != NULL) {
473 atomic_incl(ExternalAddress((address)counters->total_entry_count_addr()), scrReg);
474 }
475
476 // Possible cases that we'll encounter in fast_lock
477 // ------------------------------------------------
478 // * Inflated
479 // -- unlocked
480 // -- Locked
481 // = by self
503 // it's stack-locked, biased or neutral
504 // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
505 // order to reduce the number of conditional branches in the most common cases.
506 // Beware -- there's a subtle invariant that fetch of the markword
507 // at [FETCH], below, will never observe a biased encoding (*101b).
508 // If this invariant is not held we risk exclusion (safety) failure.
509 if (UseBiasedLocking && !UseOptoBiasInlining) {
510 biased_locking_enter(boxReg, objReg, tmpReg, scrReg, cx1Reg, false, DONE_LABEL, NULL, counters);
511 }
512
513 #if INCLUDE_RTM_OPT
514 if (UseRTMForStackLocks && use_rtm) {
515 rtm_stack_locking(objReg, tmpReg, scrReg, cx2Reg,
516 stack_rtm_counters, method_data, profile_rtm,
517 DONE_LABEL, IsInflated);
518 }
519 #endif // INCLUDE_RTM_OPT
520
521 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // [FETCH]
522 testptr(tmpReg, markWord::monitor_value); // inflated vs stack-locked|neutral|biased
523 jcc(Assembler::notZero, IsInflated);
524
525 if (LockingMode == LM_MONITOR) {
526 // Clear ZF so that we take the slow path at the DONE label. objReg is known to be not 0.
527 testptr(objReg, objReg);
528 } else {
529 assert(LockingMode == LM_LEGACY, "must be");
530 // Attempt stack-locking ...
531 orptr (tmpReg, markWord::unlocked_value);
532 movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS
533 lock();
534 cmpxchgptr(boxReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Updates tmpReg
535 if (counters != NULL) {
536 cond_inc32(Assembler::equal,
537 ExternalAddress((address)counters->fast_path_entry_count_addr()));
538 }
539 jcc(Assembler::equal, DONE_LABEL); // Success
540
541 // Recursive locking.
542 // The object is stack-locked: markword contains stack pointer to BasicLock.
543 // Locked by current thread if difference with current SP is less than one page.
544 subptr(tmpReg, rsp);
545 // Next instruction set ZFlag == 1 (Success) if difference is less then one page.
546 andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) );
547 movptr(Address(boxReg, 0), tmpReg);
548 if (counters != NULL) {
549 cond_inc32(Assembler::equal,
550 ExternalAddress((address)counters->fast_path_entry_count_addr()));
551 }
552 }
553 jmp(DONE_LABEL);
554
555 bind(IsInflated);
556 // The object is inflated. tmpReg contains pointer to ObjectMonitor* + markWord::monitor_value
557
558 #if INCLUDE_RTM_OPT
559 // Use the same RTM locking code in 32- and 64-bit VM.
560 if (use_rtm) {
561 rtm_inflated_locking(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg,
562 rtm_counters, method_data, profile_rtm, DONE_LABEL);
563 } else {
564 #endif // INCLUDE_RTM_OPT
565
566 #ifndef _LP64
567 // The object is inflated.
568
569 // boxReg refers to the on-stack BasicLock in the current frame.
570 // We'd like to write:
571 // set box->_displaced_header = markWord::unused_mark(). Any non-0 value suffices.
655 // a frame.
656 // I2: If a method attempts to unlock an object that is not held by the
657 // the frame the interpreter throws IMSX.
658 //
659 // Lets say A(), which has provably balanced locking, acquires O and then calls B().
660 // B() doesn't have provably balanced locking so it runs in the interpreter.
661 // Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O
662 // is still locked by A().
663 //
664 // The only other source of unbalanced locking would be JNI. The "Java Native Interface:
665 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
666 // should not be unlocked by "normal" java-level locking and vice-versa. The specification
667 // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
668 // Arguably given that the spec legislates the JNI case as undefined our implementation
669 // could reasonably *avoid* checking owner in fast_unlock().
670 // In the interest of performance we elide m->Owner==Self check in unlock.
671 // A perfectly viable alternative is to elide the owner check except when
672 // Xcheck:jni is enabled.
673
674 void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg, bool use_rtm) {
675 assert(LockingMode != LM_LIGHTWEIGHT, "lightweight locking should use fast_unlock_lightweight");
676 assert(boxReg == rax, "");
677 assert_different_registers(objReg, boxReg, tmpReg);
678
679 Label DONE_LABEL, Stacked, CheckSucc;
680
681 // Critically, the biased locking test must have precedence over
682 // and appear before the (box->dhw == 0) recursive stack-lock test.
683 if (UseBiasedLocking && !UseOptoBiasInlining) {
684 biased_locking_exit(objReg, tmpReg, DONE_LABEL);
685 }
686
687 #if INCLUDE_RTM_OPT
688 if (UseRTMForStackLocks && use_rtm) {
689 assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
690 Label L_regular_unlock;
691 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // fetch markword
692 andptr(tmpReg, markWord::biased_lock_mask_in_place); // look at 3 lock bits
693 cmpptr(tmpReg, markWord::unlocked_value); // bits = 001 unlocked
694 jccb(Assembler::notEqual, L_regular_unlock); // if !HLE RegularLock
695 xend(); // otherwise end...
696 jmp(DONE_LABEL); // ... and we're done
697 bind(L_regular_unlock);
698 }
699 #endif
700
701 if (LockingMode == LM_LEGACY) {
702 cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD); // Examine the displaced header
703 jcc (Assembler::zero, DONE_LABEL); // 0 indicates recursive stack-lock
704 }
705 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Examine the object's markword
706 if (LockingMode != LM_MONITOR) {
707 testptr(tmpReg, markWord::monitor_value); // Inflated?
708 jcc(Assembler::zero, Stacked);
709 }
710
711 // It's inflated.
712
713 #if INCLUDE_RTM_OPT
714 if (use_rtm) {
715 Label L_regular_inflated_unlock;
716 int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
717 movptr(boxReg, Address(tmpReg, owner_offset));
718 testptr(boxReg, boxReg);
719 jccb(Assembler::notZero, L_regular_inflated_unlock);
720 xend();
721 jmp(DONE_LABEL);
722 bind(L_regular_inflated_unlock);
723 }
724 #endif
725
726 // Despite our balanced locking property we still check that m->_owner == Self
727 // as java routines or native JNI code called by this thread might
728 // have released the lock.
729 // Refer to the comments in synchronizer.cpp for how we might encode extra
730 // state in _succ so we can avoid fetching EntryList|cxq.
731 //
732 // If there's no contention try a 1-0 exit. That is, exit without
733 // a costly MEMBAR or CAS. See synchronizer.cpp for details on how
734 // we detect and recover from the race that the 1-0 exit admits.
735 //
736 // Conceptually fast_unlock() must execute a STST|LDST "release" barrier
737 // before it STs null into _owner, releasing the lock. Updates
738 // to data protected by the critical section must be visible before
739 // we drop the lock (and thus before any other thread could acquire
740 // the lock and observe the fields protected by the lock).
741 // IA32's memory-model is SPO, so STs are ordered with respect to
742 // each other and there's no need for an explicit barrier (fence).
743 // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
744 #ifndef _LP64
745 get_thread (boxReg);
746
747 // Note that we could employ various encoding schemes to reduce
748 // the number of loads below (currently 4) to just 2 or 3.
749 // Refer to the comments in synchronizer.cpp.
750 // In practice the chain of fetches doesn't seem to impact performance, however.
751 xorptr(boxReg, boxReg);
752 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
753 jccb (Assembler::notZero, DONE_LABEL);
754 movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
755 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
756 jccb (Assembler::notZero, DONE_LABEL);
757 movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
758 jmpb (DONE_LABEL);
759
760 // Intention fall-thru into DONE_LABEL
761
762 // DONE_LABEL is a hot target - we'd really like to place it at the
763 // start of cache line by padding with NOPs.
764 // See the AMD and Intel software optimization manuals for the
765 // most efficient "long" NOP encodings.
766 // Unfortunately none of our alignment mechanisms suffice.
767 bind (CheckSucc);
768 #else // _LP64
769 // It's inflated
770 Label LNotRecursive, LSuccess, LGoSlowPath;
771
772 cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), 0);
773 jccb(Assembler::equal, LNotRecursive);
774
775 // Recursive inflated unlock
776 decq(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
777 jmpb(LSuccess);
778
779 bind(LNotRecursive);
823 // length while by virtue of passing control into the slow path.
824
825 // box is really RAX -- the following CMPXCHG depends on that binding
826 // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
827 lock();
828 cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
829 // There's no successor so we tried to regrab the lock.
830 // If that didn't work, then another thread grabbed the
831 // lock so we're done (and exit was a success).
832 jccb (Assembler::notEqual, LSuccess);
833 // Intentional fall-through into slow path
834
835 bind (LGoSlowPath);
836 orl (boxReg, 1); // set ICC.ZF=0 to indicate failure
837 jmpb (DONE_LABEL);
838
839 bind (LSuccess);
840 testl (boxReg, 0); // set ICC.ZF=1 to indicate success
841 jmpb (DONE_LABEL);
842
843 #endif
844 if (LockingMode == LM_LEGACY) {
845 bind (Stacked);
846 movptr(tmpReg, Address (boxReg, 0)); // re-fetch
847 lock();
848 cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
849 // Intentional fall-thru into DONE_LABEL
850 }
851
852 bind(DONE_LABEL);
853 }
854
855 void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register box, Register rax_reg,
856 Register t, Register thread) {
857 assert(LockingMode == LM_LIGHTWEIGHT, "must be");
858 assert(rax_reg == rax, "Used for CAS");
859 assert_different_registers(obj, box, rax_reg, t, thread);
860
861 // Handle inflated monitor.
862 Label inflated;
863 // Finish fast lock successfully. ZF value is irrelevant.
864 Label locked;
865 // Finish fast lock unsuccessfully. MUST jump with ZF == 0
866 Label slow_path;
867
868 if (DiagnoseSyncOnValueBasedClasses != 0) {
869 load_klass(rax_reg, obj, t);
870 movl(rax_reg, Address(rax_reg, Klass::access_flags_offset()));
871 testl(rax_reg, JVM_ACC_IS_VALUE_BASED_CLASS);
872 jcc(Assembler::notZero, slow_path);
873 }
874
875 const Register mark = t;
876
877 { // Lightweight Lock
878
879 Label push;
880
881 const Register top = box;
882
883 // Load the mark.
884 movptr(mark, Address(obj, oopDesc::mark_offset_in_bytes()));
885
886 // Prefetch top.
887 movl(top, Address(thread, JavaThread::lock_stack_top_offset()));
888
889 // Check for monitor (0b10).
890 testptr(mark, markWord::monitor_value);
891 jcc(Assembler::notZero, inflated);
892
893 // Check if lock-stack is full.
894 cmpl(top, LockStack::end_offset() - 1);
895 jcc(Assembler::greater, slow_path);
896
897 // Check if recursive.
898 cmpptr(obj, Address(thread, top, Address::times_1, -oopSize));
899 jccb(Assembler::equal, push);
900
901 // Try to lock. Transition lock bits 0b01 => 0b00
902 movptr(rax_reg, mark);
903 orptr(rax_reg, markWord::unlocked_value);
904 andptr(mark, ~(int32_t)markWord::unlocked_value);
905 lock(); cmpxchgptr(mark, Address(obj, oopDesc::mark_offset_in_bytes()));
906 jcc(Assembler::notEqual, slow_path);
907
908 bind(push);
909 // After successful lock, push object on lock-stack.
910 movptr(Address(thread, top), obj);
911 addl(Address(thread, JavaThread::lock_stack_top_offset()), oopSize);
912 jmpb(locked);
913 }
914
915 { // Handle inflated monitor.
916 bind(inflated);
917
918 const Register tagged_monitor = mark;
919
920 // CAS owner (null => current thread).
921 xorptr(rax_reg, rax_reg);
922 lock(); cmpxchgptr(thread, Address(tagged_monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
923 jccb(Assembler::equal, locked);
924
925 // Check if recursive.
926 cmpptr(thread, rax_reg);
927 jccb(Assembler::notEqual, slow_path);
928
929 // Recursive.
930 increment(Address(tagged_monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
931 }
932
933 bind(locked);
934 // Set ZF = 1
935 xorl(rax_reg, rax_reg);
936
937 #ifdef ASSERT
938 // Check that locked label is reached with ZF set.
939 Label zf_correct;
940 jccb(Assembler::zero, zf_correct);
941 stop("Fast Lock ZF != 1");
942 #endif
943
944 bind(slow_path);
945 #ifdef ASSERT
946 // Check that slow_path label is reached with ZF not set.
947 jccb(Assembler::notZero, zf_correct);
948 stop("Fast Lock ZF != 0");
949 bind(zf_correct);
950 #endif
951 // C2 uses the value of ZF to determine the continuation.
952 }
953
954 void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax, Register t, Register thread) {
955 assert(LockingMode == LM_LIGHTWEIGHT, "must be");
956 assert(reg_rax == rax, "Used for CAS");
957 assert_different_registers(obj, reg_rax, t);
958
959 // Handle inflated monitor.
960 Label inflated, inflated_check_lock_stack;
961 // Finish fast unlock successfully. MUST jump with ZF == 1
962 Label unlocked;
963
964 const Register mark = t;
965 const Register top = reg_rax;
966
967 Label dummy;
968 C2FastUnlockLightweightStub* stub = nullptr;
969
970 if (!Compile::current()->output()->in_scratch_emit_size()) {
971 stub = new (Compile::current()->comp_arena()) C2FastUnlockLightweightStub(obj, mark, reg_rax, thread);
972 Compile::current()->output()->add_stub(stub);
973 }
974
975 Label& push_and_slow_path = stub == nullptr ? dummy : stub->push_and_slow_path();
976 Label& check_successor = stub == nullptr ? dummy : stub->check_successor();
977
978 { // Lightweight Unlock
979
980 // Load top.
981 movl(top, Address(thread, JavaThread::lock_stack_top_offset()));
982
983 // Prefetch mark.
984 movptr(mark, Address(obj, oopDesc::mark_offset_in_bytes()));
985
986 // Check if obj is top of lock-stack.
987 cmpptr(obj, Address(thread, top, Address::times_1, -oopSize));
988 // Top of lock stack was not obj. Must be monitor.
989 jcc(Assembler::notEqual, inflated_check_lock_stack);
990
991 // Pop lock-stack.
992 DEBUG_ONLY(movptr(Address(thread, top, Address::times_1, -oopSize), 0);)
993 subl(Address(thread, JavaThread::lock_stack_top_offset()), oopSize);
994
995 // Check if recursive.
996 cmpptr(obj, Address(thread, top, Address::times_1, -2 * oopSize));
997 jcc(Assembler::equal, unlocked);
998
999 // We elide the monitor check, let the CAS fail instead.
1000
1001 // Try to unlock. Transition lock bits 0b00 => 0b01
1002 movptr(reg_rax, mark);
1003 andptr(reg_rax, ~(int32_t)markWord::lock_mask);
1004 orptr(mark, markWord::unlocked_value);
1005 lock(); cmpxchgptr(mark, Address(obj, oopDesc::mark_offset_in_bytes()));
1006 jcc(Assembler::notEqual, push_and_slow_path);
1007 jmp(unlocked);
1008 }
1009
1010
1011 { // Handle inflated monitor.
1012 bind(inflated_check_lock_stack);
1013 #ifdef ASSERT
1014 Label check_done;
1015 subl(top, oopSize);
1016 cmpl(top, in_bytes(JavaThread::lock_stack_base_offset()));
1017 jcc(Assembler::below, check_done);
1018 cmpptr(obj, Address(thread, top));
1019 jccb(Assembler::notEqual, inflated_check_lock_stack);
1020 stop("Fast Unlock lock on stack");
1021 bind(check_done);
1022 testptr(mark, markWord::monitor_value);
1023 jccb(Assembler::notZero, inflated);
1024 stop("Fast Unlock not monitor");
1025 #endif
1026
1027 bind(inflated);
1028
1029 // mark contains the tagged ObjectMonitor*.
1030 const Register monitor = mark;
1031
1032 #ifndef _LP64
1033 // Check if recursive.
1034 xorptr(reg_rax, reg_rax);
1035 orptr(reg_rax, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
1036 jcc(Assembler::notZero, check_successor);
1037
1038 // Check if the entry lists are empty.
1039 movptr(reg_rax, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
1040 orptr(reg_rax, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
1041 jcc(Assembler::notZero, check_successor);
1042
1043 // Release lock.
1044 movptr(Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
1045 #else // _LP64
1046 Label recursive;
1047
1048 // Check if recursive.
1049 cmpptr(Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), 0);
1050 jccb(Assembler::notEqual, recursive);
1051
1052 // Check if the entry lists are empty.
1053 movptr(reg_rax, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
1054 orptr(reg_rax, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
1055 jcc(Assembler::notZero, check_successor);
1056
1057 // Release lock.
1058 movptr(Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
1059 jmpb(unlocked);
1060
1061 // Recursive unlock.
1062 bind(recursive);
1063 decrement(Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
1064 xorl(t, t);
1065 #endif
1066 }
1067
1068 bind(unlocked);
1069 if (stub != nullptr) {
1070 bind(stub->unlocked_continuation());
1071 }
1072
1073 #ifdef ASSERT
1074 // Check that unlocked label is reached with ZF set.
1075 Label zf_correct;
1076 jccb(Assembler::zero, zf_correct);
1077 stop("Fast Unlock ZF != 1");
1078 #endif
1079
1080 if (stub != nullptr) {
1081 bind(stub->slow_path_continuation());
1082 }
1083 #ifdef ASSERT
1084 // Check that stub->continuation() label is reached with ZF not set.
1085 jccb(Assembler::notZero, zf_correct);
1086 stop("Fast Unlock ZF != 0");
1087 bind(zf_correct);
1088 #endif
1089 // C2 uses the value of ZF to determine the continuation.
1090 }
1091
1092 //-------------------------------------------------------------------------------------------
1093 // Generic instructions support for use in .ad files C2 code generation
1094
1095 void C2_MacroAssembler::vabsnegd(int opcode, XMMRegister dst, XMMRegister src, Register scr) {
1096 if (dst != src) {
1097 movdqu(dst, src);
1098 }
1099 if (opcode == Op_AbsVD) {
1100 andpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_mask()), scr);
1101 } else {
1102 assert((opcode == Op_NegVD),"opcode should be Op_NegD");
1103 xorpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), scr);
1104 }
1105 }
1106
1107 void C2_MacroAssembler::vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr) {
1108 if (opcode == Op_AbsVD) {
1109 vandpd(dst, src, ExternalAddress(StubRoutines::x86::vector_double_sign_mask()), vector_len, scr);
1110 } else {
1111 assert((opcode == Op_NegVD),"opcode should be Op_NegD");
4207 evpcmpb(ktmp, ktmp, shuffle, xtmp2, Assembler::lt, true, vlen_enc);
4208 evshufi64x2(xtmp3, src, src, 0x55, vlen_enc);
4209 evpshufb(dst, ktmp, xtmp3, shuffle, true, vlen_enc);
4210
4211 // Perform above steps with lane comparison expression as INDEX >= 32 && INDEX < 48
4212 // and broadcasting third 128 bit lane.
4213 evpcmpb(ktmp, k0, shuffle, xtmp2, Assembler::nlt, true, vlen_enc);
4214 vpaddb(xtmp1, xtmp1, xtmp2, vlen_enc);
4215 evpcmpb(ktmp, ktmp, shuffle, xtmp1, Assembler::lt, true, vlen_enc);
4216 evshufi64x2(xtmp3, src, src, 0xAA, vlen_enc);
4217 evpshufb(dst, ktmp, xtmp3, shuffle, true, vlen_enc);
4218
4219 // Perform above steps with lane comparison expression as INDEX >= 48 && INDEX < 64
4220 // and broadcasting third 128 bit lane.
4221 evpcmpb(ktmp, k0, shuffle, xtmp1, Assembler::nlt, true, vlen_enc);
4222 vpsllq(xtmp2, xtmp2, 0x1, vlen_enc);
4223 evpcmpb(ktmp, ktmp, shuffle, xtmp2, Assembler::lt, true, vlen_enc);
4224 evshufi64x2(xtmp3, src, src, 0xFF, vlen_enc);
4225 evpshufb(dst, ktmp, xtmp3, shuffle, true, vlen_enc);
4226 }
4227
4228 #ifdef _LP64
4229 void C2_MacroAssembler::load_nklass_compact_c2(Register dst, Register obj, Register index, Address::ScaleFactor scale, int disp) {
4230 C2LoadNKlassStub* stub = new (Compile::current()->comp_arena()) C2LoadNKlassStub(dst);
4231 Compile::current()->output()->add_stub(stub);
4232
4233 // Note: Don't clobber obj anywhere in that method!
4234
4235 // The incoming address is pointing into obj-start + klass_offset_in_bytes. We need to extract
4236 // obj-start, so that we can load from the object's mark-word instead. Usually the address
4237 // comes as obj-start in obj and klass_offset_in_bytes in disp. However, sometimes C2
4238 // emits code that pre-computes obj-start + klass_offset_in_bytes into a register, and
4239 // then passes that register as obj and 0 in disp. The following code extracts the base
4240 // and offset to load the mark-word.
4241 int offset = oopDesc::mark_offset_in_bytes() + disp - oopDesc::klass_offset_in_bytes();
4242 movq(dst, Address(obj, index, scale, offset));
4243 testb(dst, markWord::monitor_value);
4244 jcc(Assembler::notZero, stub->entry());
4245 bind(stub->continuation());
4246 shrq(dst, markWord::klass_shift);
4247 }
4248 #endif
|