< prev index next >

src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp

Print this page

  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/assembler.hpp"
  27 #include "asm/assembler.inline.hpp"
  28 #include "gc/shared/barrierSet.hpp"
  29 #include "gc/shared/barrierSetAssembler.hpp"
  30 #include "oops/methodData.hpp"
  31 #include "opto/c2_MacroAssembler.hpp"
  32 #include "opto/intrinsicnode.hpp"
  33 #include "opto/output.hpp"
  34 #include "opto/opcodes.hpp"
  35 #include "opto/subnode.hpp"

  36 #include "runtime/objectMonitor.hpp"
  37 #include "runtime/stubRoutines.hpp"



  38 
  39 #ifdef PRODUCT
  40 #define BLOCK_COMMENT(str) /* nothing */
  41 #define STOP(error) stop(error)
  42 #else
  43 #define BLOCK_COMMENT(str) block_comment(str)
  44 #define STOP(error) block_comment(error); stop(error)
  45 #endif
  46 
  47 // C2 compiled method's prolog code.
  48 void C2_MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b, bool is_stub) {
  49 
  50   // WARNING: Initial instruction MUST be 5 bytes or longer so that
  51   // NativeJump::patch_verified_entry will be able to patch out the entry
  52   // code safely. The push to verify stack depth is ok at 5 bytes,
  53   // the frame allocation can be either 3 or 6 bytes. So if we don't do
  54   // stack bang then we must use the 6 byte frame allocation even if
  55   // we have no frame. :-(
  56   assert(stack_bang_size >= framesize || stack_bang_size <= 0, "stack bang size incorrect");
  57 

 536 //    fast_unlock we often branch to DONE_LABEL, just to find that C2
 537 //    will emit a conditional branch immediately after the node.
 538 //    So we have branches to branches and lots of ICC.ZF games.
 539 //    Instead, it might be better to have C2 pass a "FailureLabel"
 540 //    into fast_lock and fast_unlock.  In the case of success, control
 541 //    will drop through the node.  ICC.ZF is undefined at exit.
 542 //    In the case of failure, the node will branch directly to the
 543 //    FailureLabel
 544 
 545 
 546 // obj: object to lock
 547 // box: on-stack box address (displaced header location) - KILLED
 548 // rax,: tmp -- KILLED
 549 // scr: tmp -- KILLED
 550 void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg,
 551                                  Register scrReg, Register cx1Reg, Register cx2Reg, Register thread,
 552                                  RTMLockingCounters* rtm_counters,
 553                                  RTMLockingCounters* stack_rtm_counters,
 554                                  Metadata* method_data,
 555                                  bool use_rtm, bool profile_rtm) {

 556   // Ensure the register assignments are disjoint
 557   assert(tmpReg == rax, "");
 558 
 559   if (use_rtm) {
 560     assert_different_registers(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg);
 561   } else {
 562     assert(cx1Reg == noreg, "");
 563     assert(cx2Reg == noreg, "");
 564     assert_different_registers(objReg, boxReg, tmpReg, scrReg);
 565   }
 566 
 567   // Possible cases that we'll encounter in fast_lock
 568   // ------------------------------------------------
 569   // * Inflated
 570   //    -- unlocked
 571   //    -- Locked
 572   //       = by self
 573   //       = by other
 574   // * neutral
 575   // * stack-locked

 587     testl(tmpReg, JVM_ACC_IS_VALUE_BASED_CLASS);
 588     jcc(Assembler::notZero, DONE_LABEL);
 589   }
 590 
 591 #if INCLUDE_RTM_OPT
 592   if (UseRTMForStackLocks && use_rtm) {
 593     assert(LockingMode != LM_MONITOR, "LockingMode == 0 (LM_MONITOR) and +UseRTMForStackLocks are mutually exclusive");
 594     rtm_stack_locking(objReg, tmpReg, scrReg, cx2Reg,
 595                       stack_rtm_counters, method_data, profile_rtm,
 596                       DONE_LABEL, IsInflated);
 597   }
 598 #endif // INCLUDE_RTM_OPT
 599 
 600   movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes()));          // [FETCH]
 601   testptr(tmpReg, markWord::monitor_value); // inflated vs stack-locked|neutral
 602   jcc(Assembler::notZero, IsInflated);
 603 
 604   if (LockingMode == LM_MONITOR) {
 605     // Clear ZF so that we take the slow path at the DONE label. objReg is known to be not 0.
 606     testptr(objReg, objReg);
 607   } else if (LockingMode == LM_LEGACY) {

 608     // Attempt stack-locking ...
 609     orptr (tmpReg, markWord::unlocked_value);
 610     movptr(Address(boxReg, 0), tmpReg);          // Anticipate successful CAS
 611     lock();
 612     cmpxchgptr(boxReg, Address(objReg, oopDesc::mark_offset_in_bytes()));      // Updates tmpReg
 613     jcc(Assembler::equal, COUNT);           // Success
 614 
 615     // Recursive locking.
 616     // The object is stack-locked: markword contains stack pointer to BasicLock.
 617     // Locked by current thread if difference with current SP is less than one page.
 618     subptr(tmpReg, rsp);
 619     // Next instruction set ZFlag == 1 (Success) if difference is less then one page.
 620     andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - (int)os::vm_page_size())) );
 621     movptr(Address(boxReg, 0), tmpReg);
 622   } else {
 623     assert(LockingMode == LM_LIGHTWEIGHT, "");
 624     lightweight_lock(objReg, tmpReg, thread, scrReg, NO_COUNT);
 625     jmp(COUNT);
 626   }
 627   jmp(DONE_LABEL);
 628 
 629   bind(IsInflated);
 630   // The object is inflated. tmpReg contains pointer to ObjectMonitor* + markWord::monitor_value
 631 
 632 #if INCLUDE_RTM_OPT
 633   // Use the same RTM locking code in 32- and 64-bit VM.
 634   if (use_rtm) {
 635     rtm_inflated_locking(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg,
 636                          rtm_counters, method_data, profile_rtm, DONE_LABEL);
 637   } else {
 638 #endif // INCLUDE_RTM_OPT
 639 
 640 #ifndef _LP64
 641   // The object is inflated.
 642 
 643   // boxReg refers to the on-stack BasicLock in the current frame.
 644   // We'd like to write:
 645   //   set box->_displaced_header = markWord::unused_mark().  Any non-0 value suffices.

 736 //      a frame.
 737 // I2:  If a method attempts to unlock an object that is not held by the
 738 //      the frame the interpreter throws IMSX.
 739 //
 740 // Lets say A(), which has provably balanced locking, acquires O and then calls B().
 741 // B() doesn't have provably balanced locking so it runs in the interpreter.
 742 // Control returns to A() and A() unlocks O.  By I1 and I2, above, we know that O
 743 // is still locked by A().
 744 //
 745 // The only other source of unbalanced locking would be JNI.  The "Java Native Interface:
 746 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
 747 // should not be unlocked by "normal" java-level locking and vice-versa.  The specification
 748 // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
 749 // Arguably given that the spec legislates the JNI case as undefined our implementation
 750 // could reasonably *avoid* checking owner in fast_unlock().
 751 // In the interest of performance we elide m->Owner==Self check in unlock.
 752 // A perfectly viable alternative is to elide the owner check except when
 753 // Xcheck:jni is enabled.
 754 
 755 void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg, bool use_rtm) {

 756   assert(boxReg == rax, "");
 757   assert_different_registers(objReg, boxReg, tmpReg);
 758 
 759   Label DONE_LABEL, Stacked, COUNT, NO_COUNT;
 760 
 761 #if INCLUDE_RTM_OPT
 762   if (UseRTMForStackLocks && use_rtm) {
 763     assert(LockingMode != LM_MONITOR, "LockingMode == 0 (LM_MONITOR) and +UseRTMForStackLocks are mutually exclusive");
 764     Label L_regular_unlock;
 765     movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // fetch markword
 766     andptr(tmpReg, markWord::lock_mask_in_place);                     // look at 2 lock bits
 767     cmpptr(tmpReg, markWord::unlocked_value);                         // bits = 01 unlocked
 768     jccb(Assembler::notEqual, L_regular_unlock);                      // if !HLE RegularLock
 769     xend();                                                           // otherwise end...
 770     jmp(DONE_LABEL);                                                  // ... and we're done
 771     bind(L_regular_unlock);
 772   }
 773 #endif
 774 
 775   if (LockingMode == LM_LEGACY) {
 776     cmpptr(Address(boxReg, 0), NULL_WORD);                            // Examine the displaced header
 777     jcc   (Assembler::zero, COUNT);                                   // 0 indicates recursive stack-lock
 778   }
 779   movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes()));   // Examine the object's markword
 780   if (LockingMode != LM_MONITOR) {
 781     testptr(tmpReg, markWord::monitor_value);                         // Inflated?
 782     jcc(Assembler::zero, Stacked);
 783   }
 784 
 785   // It's inflated.
 786   if (LockingMode == LM_LIGHTWEIGHT) {
 787     // If the owner is ANONYMOUS, we need to fix it -  in an outline stub.
 788     testb(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t) ObjectMonitor::ANONYMOUS_OWNER);
 789 #ifdef _LP64
 790     if (!Compile::current()->output()->in_scratch_emit_size()) {
 791       C2HandleAnonOMOwnerStub* stub = new (Compile::current()->comp_arena()) C2HandleAnonOMOwnerStub(tmpReg, boxReg);
 792       Compile::current()->output()->add_stub(stub);
 793       jcc(Assembler::notEqual, stub->entry());
 794       bind(stub->continuation());
 795     } else
 796 #endif
 797     {
 798       // We can't easily implement this optimization on 32 bit because we don't have a thread register.
 799       // Call the slow-path instead.
 800       jcc(Assembler::notEqual, NO_COUNT);
 801     }
 802   }
 803 
 804 #if INCLUDE_RTM_OPT
 805   if (use_rtm) {
 806     Label L_regular_inflated_unlock;
 807     int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
 808     movptr(boxReg, Address(tmpReg, owner_offset));
 809     testptr(boxReg, boxReg);
 810     jccb(Assembler::notZero, L_regular_inflated_unlock);
 811     xend();
 812     jmp(DONE_LABEL);
 813     bind(L_regular_inflated_unlock);
 814   }
 815 #endif
 816 
 817   // Despite our balanced locking property we still check that m->_owner == Self
 818   // as java routines or native JNI code called by this thread might
 819   // have released the lock.
 820   // Refer to the comments in synchronizer.cpp for how we might encode extra
 821   // state in _succ so we can avoid fetching EntryList|cxq.
 822   //

 904 
 905   // box is really RAX -- the following CMPXCHG depends on that binding
 906   // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
 907   lock();
 908   cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
 909   // There's no successor so we tried to regrab the lock.
 910   // If that didn't work, then another thread grabbed the
 911   // lock so we're done (and exit was a success).
 912   jccb  (Assembler::notEqual, LSuccess);
 913   // Intentional fall-through into slow path
 914 
 915   bind  (LGoSlowPath);
 916   orl   (boxReg, 1);                      // set ICC.ZF=0 to indicate failure
 917   jmpb  (DONE_LABEL);
 918 
 919   bind  (LSuccess);
 920   testl (boxReg, 0);                      // set ICC.ZF=1 to indicate success
 921   jmpb  (DONE_LABEL);
 922 
 923 #endif
 924   if (LockingMode != LM_MONITOR) {
 925     bind  (Stacked);
 926     if (LockingMode == LM_LIGHTWEIGHT) {
 927       mov(boxReg, tmpReg);
 928       lightweight_unlock(objReg, boxReg, tmpReg, NO_COUNT);
 929       jmp(COUNT);
 930     } else if (LockingMode == LM_LEGACY) {
 931       movptr(tmpReg, Address (boxReg, 0));      // re-fetch
 932       lock();
 933       cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
 934     }
 935     // Intentional fall-thru into DONE_LABEL
 936   }

 937   bind(DONE_LABEL);
 938 
 939   // ZFlag == 1 count in fast path
 940   // ZFlag == 0 count in slow path
 941   jccb(Assembler::notZero, NO_COUNT);
 942 
 943   bind(COUNT);
 944   // Count monitors in fast path
 945 #ifndef _LP64
 946   get_thread(tmpReg);
 947   decrementl(Address(tmpReg, JavaThread::held_monitor_count_offset()));
 948 #else // _LP64
 949   decrementq(Address(r15_thread, JavaThread::held_monitor_count_offset()));
 950 #endif
 951 
 952   xorl(tmpReg, tmpReg); // Set ZF == 1
 953 
 954   bind(NO_COUNT);
 955 }
 956 

















































































































































































































































 957 //-------------------------------------------------------------------------------------------
 958 // Generic instructions support for use in .ad files C2 code generation
 959 
 960 void C2_MacroAssembler::vabsnegd(int opcode, XMMRegister dst, XMMRegister src) {
 961   if (dst != src) {
 962     movdqu(dst, src);
 963   }
 964   if (opcode == Op_AbsVD) {
 965     andpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_mask()), noreg);
 966   } else {
 967     assert((opcode == Op_NegVD),"opcode should be Op_NegD");
 968     xorpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), noreg);
 969   }
 970 }
 971 
 972 void C2_MacroAssembler::vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len) {
 973   if (opcode == Op_AbsVD) {
 974     vandpd(dst, src, ExternalAddress(StubRoutines::x86::vector_double_sign_mask()), vector_len, noreg);
 975   } else {
 976     assert((opcode == Op_NegVD),"opcode should be Op_NegD");

6162   // Perform above steps with lane comparison expression as INDEX >= 48 && INDEX < 64
6163   // and broadcasting third 128 bit lane.
6164   evpcmpb(ktmp, k0, shuffle,  xtmp1, Assembler::nlt, true, vlen_enc);
6165   vpsllq(xtmp2, xtmp2, 0x1, vlen_enc);
6166   evpcmpb(ktmp, ktmp, shuffle,  xtmp2, Assembler::lt, true, vlen_enc);
6167   evshufi64x2(xtmp3, src, src, 0xFF, vlen_enc);
6168   evpshufb(dst, ktmp, xtmp3, shuffle, true, vlen_enc);
6169 }
6170 
6171 void C2_MacroAssembler::vector_rearrange_int_float(BasicType bt, XMMRegister dst,
6172                                                    XMMRegister shuffle, XMMRegister src, int vlen_enc) {
6173   if (vlen_enc == AVX_128bit) {
6174     vpermilps(dst, src, shuffle, vlen_enc);
6175   } else if (bt == T_INT) {
6176     vpermd(dst, shuffle, src, vlen_enc);
6177   } else {
6178     assert(bt == T_FLOAT, "");
6179     vpermps(dst, shuffle, src, vlen_enc);
6180   }
6181 }























  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/assembler.hpp"
  27 #include "asm/assembler.inline.hpp"
  28 #include "gc/shared/barrierSet.hpp"
  29 #include "gc/shared/barrierSetAssembler.hpp"
  30 #include "oops/methodData.hpp"
  31 #include "opto/c2_MacroAssembler.hpp"
  32 #include "opto/intrinsicnode.hpp"
  33 #include "opto/output.hpp"
  34 #include "opto/opcodes.hpp"
  35 #include "opto/subnode.hpp"
  36 #include "runtime/globals.hpp"
  37 #include "runtime/objectMonitor.hpp"
  38 #include "runtime/stubRoutines.hpp"
  39 #include "utilities/globalDefinitions.hpp"
  40 #include "utilities/powerOfTwo.hpp"
  41 #include "utilities/sizes.hpp"
  42 
  43 #ifdef PRODUCT
  44 #define BLOCK_COMMENT(str) /* nothing */
  45 #define STOP(error) stop(error)
  46 #else
  47 #define BLOCK_COMMENT(str) block_comment(str)
  48 #define STOP(error) block_comment(error); stop(error)
  49 #endif
  50 
  51 // C2 compiled method's prolog code.
  52 void C2_MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b, bool is_stub) {
  53 
  54   // WARNING: Initial instruction MUST be 5 bytes or longer so that
  55   // NativeJump::patch_verified_entry will be able to patch out the entry
  56   // code safely. The push to verify stack depth is ok at 5 bytes,
  57   // the frame allocation can be either 3 or 6 bytes. So if we don't do
  58   // stack bang then we must use the 6 byte frame allocation even if
  59   // we have no frame. :-(
  60   assert(stack_bang_size >= framesize || stack_bang_size <= 0, "stack bang size incorrect");
  61 

 540 //    fast_unlock we often branch to DONE_LABEL, just to find that C2
 541 //    will emit a conditional branch immediately after the node.
 542 //    So we have branches to branches and lots of ICC.ZF games.
 543 //    Instead, it might be better to have C2 pass a "FailureLabel"
 544 //    into fast_lock and fast_unlock.  In the case of success, control
 545 //    will drop through the node.  ICC.ZF is undefined at exit.
 546 //    In the case of failure, the node will branch directly to the
 547 //    FailureLabel
 548 
 549 
 550 // obj: object to lock
 551 // box: on-stack box address (displaced header location) - KILLED
 552 // rax,: tmp -- KILLED
 553 // scr: tmp -- KILLED
 554 void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg,
 555                                  Register scrReg, Register cx1Reg, Register cx2Reg, Register thread,
 556                                  RTMLockingCounters* rtm_counters,
 557                                  RTMLockingCounters* stack_rtm_counters,
 558                                  Metadata* method_data,
 559                                  bool use_rtm, bool profile_rtm) {
 560   assert(LockingMode != LM_LIGHTWEIGHT, "lightweight locking should use fast_lock_lightweight");
 561   // Ensure the register assignments are disjoint
 562   assert(tmpReg == rax, "");
 563 
 564   if (use_rtm) {
 565     assert_different_registers(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg);
 566   } else {
 567     assert(cx1Reg == noreg, "");
 568     assert(cx2Reg == noreg, "");
 569     assert_different_registers(objReg, boxReg, tmpReg, scrReg);
 570   }
 571 
 572   // Possible cases that we'll encounter in fast_lock
 573   // ------------------------------------------------
 574   // * Inflated
 575   //    -- unlocked
 576   //    -- Locked
 577   //       = by self
 578   //       = by other
 579   // * neutral
 580   // * stack-locked

 592     testl(tmpReg, JVM_ACC_IS_VALUE_BASED_CLASS);
 593     jcc(Assembler::notZero, DONE_LABEL);
 594   }
 595 
 596 #if INCLUDE_RTM_OPT
 597   if (UseRTMForStackLocks && use_rtm) {
 598     assert(LockingMode != LM_MONITOR, "LockingMode == 0 (LM_MONITOR) and +UseRTMForStackLocks are mutually exclusive");
 599     rtm_stack_locking(objReg, tmpReg, scrReg, cx2Reg,
 600                       stack_rtm_counters, method_data, profile_rtm,
 601                       DONE_LABEL, IsInflated);
 602   }
 603 #endif // INCLUDE_RTM_OPT
 604 
 605   movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes()));          // [FETCH]
 606   testptr(tmpReg, markWord::monitor_value); // inflated vs stack-locked|neutral
 607   jcc(Assembler::notZero, IsInflated);
 608 
 609   if (LockingMode == LM_MONITOR) {
 610     // Clear ZF so that we take the slow path at the DONE label. objReg is known to be not 0.
 611     testptr(objReg, objReg);
 612   } else {
 613     assert(LockingMode == LM_LEGACY, "must be");
 614     // Attempt stack-locking ...
 615     orptr (tmpReg, markWord::unlocked_value);
 616     movptr(Address(boxReg, 0), tmpReg);          // Anticipate successful CAS
 617     lock();
 618     cmpxchgptr(boxReg, Address(objReg, oopDesc::mark_offset_in_bytes()));      // Updates tmpReg
 619     jcc(Assembler::equal, COUNT);           // Success
 620 
 621     // Recursive locking.
 622     // The object is stack-locked: markword contains stack pointer to BasicLock.
 623     // Locked by current thread if difference with current SP is less than one page.
 624     subptr(tmpReg, rsp);
 625     // Next instruction set ZFlag == 1 (Success) if difference is less then one page.
 626     andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - (int)os::vm_page_size())) );
 627     movptr(Address(boxReg, 0), tmpReg);




 628   }
 629   jmp(DONE_LABEL);
 630 
 631   bind(IsInflated);
 632   // The object is inflated. tmpReg contains pointer to ObjectMonitor* + markWord::monitor_value
 633 
 634 #if INCLUDE_RTM_OPT
 635   // Use the same RTM locking code in 32- and 64-bit VM.
 636   if (use_rtm) {
 637     rtm_inflated_locking(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg,
 638                          rtm_counters, method_data, profile_rtm, DONE_LABEL);
 639   } else {
 640 #endif // INCLUDE_RTM_OPT
 641 
 642 #ifndef _LP64
 643   // The object is inflated.
 644 
 645   // boxReg refers to the on-stack BasicLock in the current frame.
 646   // We'd like to write:
 647   //   set box->_displaced_header = markWord::unused_mark().  Any non-0 value suffices.

 738 //      a frame.
 739 // I2:  If a method attempts to unlock an object that is not held by the
 740 //      the frame the interpreter throws IMSX.
 741 //
 742 // Lets say A(), which has provably balanced locking, acquires O and then calls B().
 743 // B() doesn't have provably balanced locking so it runs in the interpreter.
 744 // Control returns to A() and A() unlocks O.  By I1 and I2, above, we know that O
 745 // is still locked by A().
 746 //
 747 // The only other source of unbalanced locking would be JNI.  The "Java Native Interface:
 748 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
 749 // should not be unlocked by "normal" java-level locking and vice-versa.  The specification
 750 // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
 751 // Arguably given that the spec legislates the JNI case as undefined our implementation
 752 // could reasonably *avoid* checking owner in fast_unlock().
 753 // In the interest of performance we elide m->Owner==Self check in unlock.
 754 // A perfectly viable alternative is to elide the owner check except when
 755 // Xcheck:jni is enabled.
 756 
 757 void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg, bool use_rtm) {
 758   assert(LockingMode != LM_LIGHTWEIGHT, "lightweight locking should use fast_unlock_lightweight");
 759   assert(boxReg == rax, "");
 760   assert_different_registers(objReg, boxReg, tmpReg);
 761 
 762   Label DONE_LABEL, Stacked, COUNT, NO_COUNT;
 763 
 764 #if INCLUDE_RTM_OPT
 765   if (UseRTMForStackLocks && use_rtm) {
 766     assert(LockingMode != LM_MONITOR, "LockingMode == 0 (LM_MONITOR) and +UseRTMForStackLocks are mutually exclusive");
 767     Label L_regular_unlock;
 768     movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // fetch markword
 769     andptr(tmpReg, markWord::lock_mask_in_place);                     // look at 2 lock bits
 770     cmpptr(tmpReg, markWord::unlocked_value);                         // bits = 01 unlocked
 771     jccb(Assembler::notEqual, L_regular_unlock);                      // if !HLE RegularLock
 772     xend();                                                           // otherwise end...
 773     jmp(DONE_LABEL);                                                  // ... and we're done
 774     bind(L_regular_unlock);
 775   }
 776 #endif
 777 
 778   if (LockingMode == LM_LEGACY) {
 779     cmpptr(Address(boxReg, 0), NULL_WORD);                            // Examine the displaced header
 780     jcc   (Assembler::zero, COUNT);                                   // 0 indicates recursive stack-lock
 781   }
 782   movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes()));   // Examine the object's markword
 783   if (LockingMode != LM_MONITOR) {
 784     testptr(tmpReg, markWord::monitor_value);                         // Inflated?
 785     jcc(Assembler::zero, Stacked);
 786   }
 787 
 788   // It's inflated.

















 789 
 790 #if INCLUDE_RTM_OPT
 791   if (use_rtm) {
 792     Label L_regular_inflated_unlock;
 793     int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
 794     movptr(boxReg, Address(tmpReg, owner_offset));
 795     testptr(boxReg, boxReg);
 796     jccb(Assembler::notZero, L_regular_inflated_unlock);
 797     xend();
 798     jmp(DONE_LABEL);
 799     bind(L_regular_inflated_unlock);
 800   }
 801 #endif
 802 
 803   // Despite our balanced locking property we still check that m->_owner == Self
 804   // as java routines or native JNI code called by this thread might
 805   // have released the lock.
 806   // Refer to the comments in synchronizer.cpp for how we might encode extra
 807   // state in _succ so we can avoid fetching EntryList|cxq.
 808   //

 890 
 891   // box is really RAX -- the following CMPXCHG depends on that binding
 892   // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
 893   lock();
 894   cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
 895   // There's no successor so we tried to regrab the lock.
 896   // If that didn't work, then another thread grabbed the
 897   // lock so we're done (and exit was a success).
 898   jccb  (Assembler::notEqual, LSuccess);
 899   // Intentional fall-through into slow path
 900 
 901   bind  (LGoSlowPath);
 902   orl   (boxReg, 1);                      // set ICC.ZF=0 to indicate failure
 903   jmpb  (DONE_LABEL);
 904 
 905   bind  (LSuccess);
 906   testl (boxReg, 0);                      // set ICC.ZF=1 to indicate success
 907   jmpb  (DONE_LABEL);
 908 
 909 #endif
 910   if (LockingMode == LM_LEGACY) {
 911     bind  (Stacked);
 912     movptr(tmpReg, Address (boxReg, 0));      // re-fetch
 913     lock();
 914     cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box






 915     // Intentional fall-thru into DONE_LABEL
 916   }
 917 
 918   bind(DONE_LABEL);
 919 
 920   // ZFlag == 1 count in fast path
 921   // ZFlag == 0 count in slow path
 922   jccb(Assembler::notZero, NO_COUNT);
 923 
 924   bind(COUNT);
 925   // Count monitors in fast path
 926 #ifndef _LP64
 927   get_thread(tmpReg);
 928   decrementl(Address(tmpReg, JavaThread::held_monitor_count_offset()));
 929 #else // _LP64
 930   decrementq(Address(r15_thread, JavaThread::held_monitor_count_offset()));
 931 #endif
 932 
 933   xorl(tmpReg, tmpReg); // Set ZF == 1
 934 
 935   bind(NO_COUNT);
 936 }
 937 
 938 void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register box, Register rax_reg,
 939                                               Register t, Register thread) {
 940   assert(LockingMode == LM_LIGHTWEIGHT, "must be");
 941   assert(rax_reg == rax, "Used for CAS");
 942   assert_different_registers(obj, box, rax_reg, t, thread);
 943 
 944   // Handle inflated monitor.
 945   Label inflated;
 946   // Finish fast lock successfully. ZF value is irrelevant.
 947   Label locked;
 948   // Finish fast lock unsuccessfully. MUST jump with ZF == 0
 949   Label slow_path;
 950 
 951   if (DiagnoseSyncOnValueBasedClasses != 0) {
 952     load_klass(rax_reg, obj, t);
 953     movl(rax_reg, Address(rax_reg, Klass::access_flags_offset()));
 954     testl(rax_reg, JVM_ACC_IS_VALUE_BASED_CLASS);
 955     jcc(Assembler::notZero, slow_path);
 956   }
 957 
 958   const Register mark = t;
 959 
 960   { // Lightweight Lock
 961 
 962     Label push;
 963 
 964     const Register top = box;
 965 
 966     // Load the mark.
 967     movptr(mark, Address(obj, oopDesc::mark_offset_in_bytes()));
 968 
 969     // Prefetch top.
 970     movl(top, Address(thread, JavaThread::lock_stack_top_offset()));
 971 
 972     // Check for monitor (0b10).
 973     testptr(mark, markWord::monitor_value);
 974     jcc(Assembler::notZero, inflated);
 975 
 976     // Check if lock-stack is full.
 977     cmpl(top, LockStack::end_offset() - 1);
 978     jcc(Assembler::greater, slow_path);
 979 
 980     // Check if recursive.
 981     cmpptr(obj, Address(thread, top, Address::times_1, -oopSize));
 982     jccb(Assembler::equal, push);
 983 
 984     // Try to lock. Transition lock bits 0b01 => 0b00
 985     movptr(rax_reg, mark);
 986     orptr(rax_reg, markWord::unlocked_value);
 987     andptr(mark, ~(int32_t)markWord::unlocked_value);
 988     lock(); cmpxchgptr(mark, Address(obj, oopDesc::mark_offset_in_bytes()));
 989     jcc(Assembler::notEqual, slow_path);
 990 
 991     bind(push);
 992     // After successful lock, push object on lock-stack.
 993     movptr(Address(thread, top), obj);
 994     addl(Address(thread, JavaThread::lock_stack_top_offset()), oopSize);
 995     jmpb(locked);
 996   }
 997 
 998   { // Handle inflated monitor.
 999     bind(inflated);
1000 
1001     const Register tagged_monitor = mark;
1002 
1003     // CAS owner (null => current thread).
1004     xorptr(rax_reg, rax_reg);
1005     lock(); cmpxchgptr(thread, Address(tagged_monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
1006     jccb(Assembler::equal, locked);
1007 
1008     // Check if recursive.
1009     cmpptr(thread, rax_reg);
1010     jccb(Assembler::notEqual, slow_path);
1011 
1012     // Recursive.
1013     increment(Address(tagged_monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
1014   }
1015 
1016   bind(locked);
1017   increment(Address(thread, JavaThread::held_monitor_count_offset()));
1018   // Set ZF = 1
1019   xorl(rax_reg, rax_reg);
1020 
1021 #ifdef ASSERT
1022   // Check that locked label is reached with ZF set.
1023   Label zf_correct;
1024   jccb(Assembler::zero, zf_correct);
1025   stop("Fast Lock ZF != 1");
1026 #endif
1027 
1028   bind(slow_path);
1029 #ifdef ASSERT
1030   // Check that slow_path label is reached with ZF not set.
1031   jccb(Assembler::notZero, zf_correct);
1032   stop("Fast Lock ZF != 0");
1033   bind(zf_correct);
1034 #endif
1035   // C2 uses the value of ZF to determine the continuation.
1036 }
1037 
1038 void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax, Register t, Register thread) {
1039   assert(LockingMode == LM_LIGHTWEIGHT, "must be");
1040   assert(reg_rax == rax, "Used for CAS");
1041   assert_different_registers(obj, reg_rax, t);
1042 
1043   // Handle inflated monitor.
1044   Label inflated, inflated_check_lock_stack;
1045   // Finish fast unlock successfully.  MUST jump with ZF == 1
1046   Label unlocked;
1047 
1048   // Assume success.
1049   decrement(Address(thread, JavaThread::held_monitor_count_offset()));
1050 
1051   const Register mark = t;
1052   const Register top = reg_rax;
1053 
1054   Label dummy;
1055   C2FastUnlockLightweightStub* stub = nullptr;
1056 
1057   if (!Compile::current()->output()->in_scratch_emit_size()) {
1058     stub = new (Compile::current()->comp_arena()) C2FastUnlockLightweightStub(obj, mark, reg_rax, thread);
1059     Compile::current()->output()->add_stub(stub);
1060   }
1061 
1062   Label& push_and_slow_path = stub == nullptr ? dummy : stub->push_and_slow_path();
1063   Label& check_successor = stub == nullptr ? dummy : stub->check_successor();
1064 
1065   { // Lightweight Unlock
1066 
1067     // Load top.
1068     movl(top, Address(thread, JavaThread::lock_stack_top_offset()));
1069 
1070     // Prefetch mark.
1071     movptr(mark, Address(obj, oopDesc::mark_offset_in_bytes()));
1072 
1073     // Check if obj is top of lock-stack.
1074     cmpptr(obj, Address(thread, top, Address::times_1, -oopSize));
1075     // Top of lock stack was not obj. Must be monitor.
1076     jcc(Assembler::notEqual, inflated_check_lock_stack);
1077 
1078     // Pop lock-stack.
1079     DEBUG_ONLY(movptr(Address(thread, top, Address::times_1, -oopSize), 0);)
1080     subl(Address(thread, JavaThread::lock_stack_top_offset()), oopSize);
1081 
1082     // Check if recursive.
1083     cmpptr(obj, Address(thread, top, Address::times_1, -2 * oopSize));
1084     jcc(Assembler::equal, unlocked);
1085 
1086     // We elide the monitor check, let the CAS fail instead.
1087 
1088     // Try to unlock. Transition lock bits 0b00 => 0b01
1089     movptr(reg_rax, mark);
1090     andptr(reg_rax, ~(int32_t)markWord::lock_mask);
1091     orptr(mark, markWord::unlocked_value);
1092     lock(); cmpxchgptr(mark, Address(obj, oopDesc::mark_offset_in_bytes()));
1093     jcc(Assembler::notEqual, push_and_slow_path);
1094     jmp(unlocked);
1095   }
1096 
1097 
1098   { // Handle inflated monitor.
1099     bind(inflated_check_lock_stack);
1100 #ifdef ASSERT
1101     Label check_done;
1102     subl(top, oopSize);
1103     cmpl(top, in_bytes(JavaThread::lock_stack_base_offset()));
1104     jcc(Assembler::below, check_done);
1105     cmpptr(obj, Address(thread, top));
1106     jccb(Assembler::notEqual, inflated_check_lock_stack);
1107     stop("Fast Unlock lock on stack");
1108     bind(check_done);
1109     testptr(mark, markWord::monitor_value);
1110     jccb(Assembler::notZero, inflated);
1111     stop("Fast Unlock not monitor");
1112 #endif
1113 
1114     bind(inflated);
1115 
1116     // mark contains the tagged ObjectMonitor*.
1117     const Register monitor = mark;
1118 
1119 #ifndef _LP64
1120     // Check if recursive.
1121     xorptr(reg_rax, reg_rax);
1122     orptr(reg_rax, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
1123     jcc(Assembler::notZero, check_successor);
1124 
1125     // Check if the entry lists are empty.
1126     movptr(reg_rax, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
1127     orptr(reg_rax, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
1128     jcc(Assembler::notZero, check_successor);
1129 
1130     // Release lock.
1131     movptr(Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
1132 #else // _LP64
1133     Label recursive;
1134 
1135     // Check if recursive.
1136     cmpptr(Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), 0);
1137     jccb(Assembler::notEqual, recursive);
1138 
1139     // Check if the entry lists are empty.
1140     movptr(reg_rax, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
1141     orptr(reg_rax, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
1142     jcc(Assembler::notZero, check_successor);
1143 
1144     // Release lock.
1145     movptr(Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
1146     jmpb(unlocked);
1147 
1148     // Recursive unlock.
1149     bind(recursive);
1150     decrement(Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
1151     xorl(t, t);
1152 #endif
1153   }
1154 
1155   bind(unlocked);
1156   if (stub != nullptr) {
1157     bind(stub->unlocked_continuation());
1158   }
1159 
1160 #ifdef ASSERT
1161   // Check that unlocked label is reached with ZF set.
1162   Label zf_correct;
1163   jccb(Assembler::zero, zf_correct);
1164   stop("Fast Unlock ZF != 1");
1165 #endif
1166 
1167   if (stub != nullptr) {
1168     bind(stub->slow_path_continuation());
1169   }
1170 #ifdef ASSERT
1171   // Check that stub->continuation() label is reached with ZF not set.
1172   jccb(Assembler::notZero, zf_correct);
1173   stop("Fast Unlock ZF != 0");
1174   bind(zf_correct);
1175 #endif
1176   // C2 uses the value of ZF to determine the continuation.
1177 }
1178 
1179 //-------------------------------------------------------------------------------------------
1180 // Generic instructions support for use in .ad files C2 code generation
1181 
1182 void C2_MacroAssembler::vabsnegd(int opcode, XMMRegister dst, XMMRegister src) {
1183   if (dst != src) {
1184     movdqu(dst, src);
1185   }
1186   if (opcode == Op_AbsVD) {
1187     andpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_mask()), noreg);
1188   } else {
1189     assert((opcode == Op_NegVD),"opcode should be Op_NegD");
1190     xorpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), noreg);
1191   }
1192 }
1193 
1194 void C2_MacroAssembler::vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len) {
1195   if (opcode == Op_AbsVD) {
1196     vandpd(dst, src, ExternalAddress(StubRoutines::x86::vector_double_sign_mask()), vector_len, noreg);
1197   } else {
1198     assert((opcode == Op_NegVD),"opcode should be Op_NegD");

6384   // Perform above steps with lane comparison expression as INDEX >= 48 && INDEX < 64
6385   // and broadcasting third 128 bit lane.
6386   evpcmpb(ktmp, k0, shuffle,  xtmp1, Assembler::nlt, true, vlen_enc);
6387   vpsllq(xtmp2, xtmp2, 0x1, vlen_enc);
6388   evpcmpb(ktmp, ktmp, shuffle,  xtmp2, Assembler::lt, true, vlen_enc);
6389   evshufi64x2(xtmp3, src, src, 0xFF, vlen_enc);
6390   evpshufb(dst, ktmp, xtmp3, shuffle, true, vlen_enc);
6391 }
6392 
6393 void C2_MacroAssembler::vector_rearrange_int_float(BasicType bt, XMMRegister dst,
6394                                                    XMMRegister shuffle, XMMRegister src, int vlen_enc) {
6395   if (vlen_enc == AVX_128bit) {
6396     vpermilps(dst, src, shuffle, vlen_enc);
6397   } else if (bt == T_INT) {
6398     vpermd(dst, shuffle, src, vlen_enc);
6399   } else {
6400     assert(bt == T_FLOAT, "");
6401     vpermps(dst, shuffle, src, vlen_enc);
6402   }
6403 }
6404 
6405 #ifdef _LP64
6406 void C2_MacroAssembler::load_nklass_compact_c2(Register dst, Register obj, Register index, Address::ScaleFactor scale, int disp) {
6407   C2LoadNKlassStub* stub = new (Compile::current()->comp_arena()) C2LoadNKlassStub(dst);
6408   Compile::current()->output()->add_stub(stub);
6409 
6410   // Note: Don't clobber obj anywhere in that method!
6411 
6412   // The incoming address is pointing into obj-start + klass_offset_in_bytes. We need to extract
6413   // obj-start, so that we can load from the object's mark-word instead. Usually the address
6414   // comes as obj-start in obj and klass_offset_in_bytes in disp. However, sometimes C2
6415   // emits code that pre-computes obj-start + klass_offset_in_bytes into a register, and
6416   // then passes that register as obj and 0 in disp. The following code extracts the base
6417   // and offset to load the mark-word.
6418   int offset = oopDesc::mark_offset_in_bytes() + disp - oopDesc::klass_offset_in_bytes();
6419   movq(dst, Address(obj, index, scale, offset));
6420   testb(dst, markWord::monitor_value);
6421   jcc(Assembler::notZero, stub->entry());
6422   bind(stub->continuation());
6423   shrq(dst, markWord::klass_shift);
6424 }
6425 #endif
< prev index next >