< prev index next >

src/hotspot/cpu/ppc/macroAssembler_ppc.cpp

Print this page

  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  *
  24  */
  25 
  26 #include "precompiled.hpp"
  27 #include "asm/macroAssembler.inline.hpp"
  28 #include "compiler/disassembler.hpp"
  29 #include "gc/shared/collectedHeap.inline.hpp"
  30 #include "gc/shared/barrierSet.hpp"
  31 #include "gc/shared/barrierSetAssembler.hpp"
  32 #include "interpreter/interpreter.hpp"
  33 #include "memory/resourceArea.hpp"
  34 #include "nativeInst_ppc.hpp"
  35 #include "oops/klass.inline.hpp"
  36 #include "oops/methodData.hpp"
  37 #include "prims/methodHandles.hpp"

  38 #include "runtime/icache.hpp"
  39 #include "runtime/interfaceSupport.inline.hpp"
  40 #include "runtime/objectMonitor.hpp"
  41 #include "runtime/os.hpp"
  42 #include "runtime/safepoint.hpp"
  43 #include "runtime/safepointMechanism.hpp"
  44 #include "runtime/sharedRuntime.hpp"
  45 #include "runtime/stubRoutines.hpp"
  46 #include "runtime/vm_version.hpp"
  47 #include "utilities/macros.hpp"
  48 #include "utilities/powerOfTwo.hpp"
  49 
  50 #ifdef PRODUCT
  51 #define BLOCK_COMMENT(str) // nothing
  52 #else
  53 #define BLOCK_COMMENT(str) block_comment(str)
  54 #endif
  55 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
  56 
  57 #ifdef ASSERT

2156   }
2157 
2158   ld_largeoffset_unchecked(reg_scratch, destination_toc_offset, Rtoc, false);
2159   mtctr(reg_scratch);
2160   bctr();
2161 
2162   const address stub_start_addr = addr_at(stub_start_offset);
2163 
2164   // Assert that the encoded destination_toc_offset can be identified and that it is correct.
2165   assert(destination_toc_offset == NativeCallTrampolineStub_at(stub_start_addr)->destination_toc_offset(),
2166          "encoded offset into the constant pool must match");
2167   // Trampoline_stub_size should be good.
2168   assert((uint)(offset() - stub_start_offset) <= trampoline_stub_size, "should be good size");
2169   assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline");
2170 
2171   // End the stub.
2172   end_a_stub();
2173   return stub;
2174 }
2175 
2176 // TM on PPC64.
2177 void MacroAssembler::atomic_inc_ptr(Register addr, Register result, int simm16) {
2178   Label retry;
2179   bind(retry);
2180   ldarx(result, addr, /*hint*/ false);
2181   addi(result, result, simm16);
2182   stdcx_(result, addr);
2183   if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
2184     bne_predict_not_taken(CCR0, retry); // stXcx_ sets CCR0
2185   } else {
2186     bne(                  CCR0, retry); // stXcx_ sets CCR0
2187   }
2188 }
2189 
2190 void MacroAssembler::atomic_ori_int(Register addr, Register result, int uimm16) {
2191   Label retry;
2192   bind(retry);
2193   lwarx(result, addr, /*hint*/ false);
2194   ori(result, result, uimm16);
2195   stwcx_(result, addr);
2196   if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
2197     bne_predict_not_taken(CCR0, retry); // stXcx_ sets CCR0
2198   } else {
2199     bne(                  CCR0, retry); // stXcx_ sets CCR0
2200   }
2201 }
2202 
2203 #if INCLUDE_RTM_OPT
2204 
2205 // Update rtm_counters based on abort status
2206 // input: abort_status
2207 //        rtm_counters_Reg (RTMLockingCounters*)
2208 void MacroAssembler::rtm_counters_update(Register abort_status, Register rtm_counters_Reg) {
2209   // Mapping to keep PreciseRTMLockingStatistics similar to x86.
2210   // x86 ppc (! means inverted, ? means not the same)
2211   //  0   31  Set if abort caused by XABORT instruction.
2212   //  1  ! 7  If set, the transaction may succeed on a retry. This bit is always clear if bit 0 is set.
2213   //  2   13  Set if another logical processor conflicted with a memory address that was part of the transaction that aborted.
2214   //  3   10  Set if an internal buffer overflowed.
2215   //  4  ?12  Set if a debug breakpoint was hit.
2216   //  5  ?32  Set if an abort occurred during execution of a nested transaction.
2217   const int failure_bit[] = {tm_tabort, // Signal handler will set this too.
2218                              tm_failure_persistent,
2219                              tm_non_trans_cf,
2220                              tm_trans_cf,
2221                              tm_footprint_of,
2222                              tm_failure_code,
2223                              tm_transaction_level};
2224 
2225   const int num_failure_bits = sizeof(failure_bit) / sizeof(int);
2226   const int num_counters = RTMLockingCounters::ABORT_STATUS_LIMIT;
2227 
2228   const int bit2counter_map[][num_counters] =
2229   // 0 = no map; 1 = mapped, no inverted logic; -1 = mapped, inverted logic
2230   // Inverted logic means that if a bit is set don't count it, or vice-versa.
2231   // Care must be taken when mapping bits to counters as bits for a given
2232   // counter must be mutually exclusive. Otherwise, the counter will be
2233   // incremented more than once.
2234   // counters:
2235   // 0        1        2         3         4         5
2236   // abort  , persist, conflict, overflow, debug   , nested         bits:
2237   {{ 1      , 0      , 0       , 0       , 0       , 0      },   // abort
2238    { 0      , -1     , 0       , 0       , 0       , 0      },   // failure_persistent
2239    { 0      , 0      , 1       , 0       , 0       , 0      },   // non_trans_cf
2240    { 0      , 0      , 1       , 0       , 0       , 0      },   // trans_cf
2241    { 0      , 0      , 0       , 1       , 0       , 0      },   // footprint_of
2242    { 0      , 0      , 0       , 0       , -1      , 0      },   // failure_code = 0xD4
2243    { 0      , 0      , 0       , 0       , 0       , 1      }};  // transaction_level > 1
2244   // ...
2245 
2246   // Move abort_status value to R0 and use abort_status register as a
2247   // temporary register because R0 as third operand in ld/std is treated
2248   // as base address zero (value). Likewise, R0 as second operand in addi
2249   // is problematic because it amounts to li.
2250   const Register temp_Reg = abort_status;
2251   const Register abort_status_R0 = R0;
2252   mr(abort_status_R0, abort_status);
2253 
2254   // Increment total abort counter.
2255   int counters_offs = RTMLockingCounters::abort_count_offset();
2256   ld(temp_Reg, counters_offs, rtm_counters_Reg);
2257   addi(temp_Reg, temp_Reg, 1);
2258   std(temp_Reg, counters_offs, rtm_counters_Reg);
2259 
2260   // Increment specific abort counters.
2261   if (PrintPreciseRTMLockingStatistics) {
2262 
2263     // #0 counter offset.
2264     int abortX_offs = RTMLockingCounters::abortX_count_offset();
2265 
2266     for (int nbit = 0; nbit < num_failure_bits; nbit++) {
2267       for (int ncounter = 0; ncounter < num_counters; ncounter++) {
2268         if (bit2counter_map[nbit][ncounter] != 0) {
2269           Label check_abort;
2270           int abort_counter_offs = abortX_offs + (ncounter << 3);
2271 
2272           if (failure_bit[nbit] == tm_transaction_level) {
2273             // Don't check outer transaction, TL = 1 (bit 63). Hence only
2274             // 11 bits in the TL field are checked to find out if failure
2275             // occurred in a nested transaction. This check also matches
2276             // the case when nesting_of = 1 (nesting overflow).
2277             rldicr_(temp_Reg, abort_status_R0, failure_bit[nbit], 10);
2278           } else if (failure_bit[nbit] == tm_failure_code) {
2279             // Check failure code for trap or illegal caught in TM.
2280             // Bits 0:7 are tested as bit 7 (persistent) is copied from
2281             // tabort or treclaim source operand.
2282             // On Linux: trap or illegal is TM_CAUSE_SIGNAL (0xD4).
2283             rldicl(temp_Reg, abort_status_R0, 8, 56);
2284             cmpdi(CCR0, temp_Reg, 0xD4);
2285           } else {
2286             rldicr_(temp_Reg, abort_status_R0, failure_bit[nbit], 0);
2287           }
2288 
2289           if (bit2counter_map[nbit][ncounter] == 1) {
2290             beq(CCR0, check_abort);
2291           } else {
2292             bne(CCR0, check_abort);
2293           }
2294 
2295           // We don't increment atomically.
2296           ld(temp_Reg, abort_counter_offs, rtm_counters_Reg);
2297           addi(temp_Reg, temp_Reg, 1);
2298           std(temp_Reg, abort_counter_offs, rtm_counters_Reg);
2299 
2300           bind(check_abort);
2301         }
2302       }
2303     }
2304   }
2305   // Restore abort_status.
2306   mr(abort_status, abort_status_R0);
2307 }
2308 
2309 // Branch if (random & (count-1) != 0), count is 2^n
2310 // tmp and CR0 are killed
2311 void MacroAssembler::branch_on_random_using_tb(Register tmp, int count, Label& brLabel) {
2312   mftb(tmp);
2313   andi_(tmp, tmp, count-1);
2314   bne(CCR0, brLabel);
2315 }
2316 
2317 // Perform abort ratio calculation, set no_rtm bit if high ratio.
2318 // input:  rtm_counters_Reg (RTMLockingCounters* address) - KILLED
2319 void MacroAssembler::rtm_abort_ratio_calculation(Register rtm_counters_Reg,
2320                                                  RTMLockingCounters* rtm_counters,
2321                                                  Metadata* method_data) {
2322   Label L_done, L_check_always_rtm1, L_check_always_rtm2;
2323 
2324   if (RTMLockingCalculationDelay > 0) {
2325     // Delay calculation.
2326     ld(rtm_counters_Reg, (RegisterOrConstant)(intptr_t)RTMLockingCounters::rtm_calculation_flag_addr());
2327     cmpdi(CCR0, rtm_counters_Reg, 0);
2328     beq(CCR0, L_done);
2329     load_const_optimized(rtm_counters_Reg, (address)rtm_counters, R0); // reload
2330   }
2331   // Abort ratio calculation only if abort_count > RTMAbortThreshold.
2332   //   Aborted transactions = abort_count * 100
2333   //   All transactions = total_count *  RTMTotalCountIncrRate
2334   //   Set no_rtm bit if (Aborted transactions >= All transactions * RTMAbortRatio)
2335   ld(R0, RTMLockingCounters::abort_count_offset(), rtm_counters_Reg);
2336   if (is_simm(RTMAbortThreshold, 16)) {   // cmpdi can handle 16bit immediate only.
2337     cmpdi(CCR0, R0, RTMAbortThreshold);
2338     blt(CCR0, L_check_always_rtm2);  // reload of rtm_counters_Reg not necessary
2339   } else {
2340     load_const_optimized(rtm_counters_Reg, RTMAbortThreshold);
2341     cmpd(CCR0, R0, rtm_counters_Reg);
2342     blt(CCR0, L_check_always_rtm1);  // reload of rtm_counters_Reg required
2343   }
2344   mulli(R0, R0, 100);
2345 
2346   const Register tmpReg = rtm_counters_Reg;
2347   ld(tmpReg, RTMLockingCounters::total_count_offset(), rtm_counters_Reg);
2348   mulli(tmpReg, tmpReg, RTMTotalCountIncrRate); // allowable range: int16
2349   mulli(tmpReg, tmpReg, RTMAbortRatio);         // allowable range: int16
2350   cmpd(CCR0, R0, tmpReg);
2351   blt(CCR0, L_check_always_rtm1); // jump to reload
2352   if (method_data != nullptr) {
2353     // Set rtm_state to "no rtm" in MDO.
2354     // Not using a metadata relocation. Method and Class Loader are kept alive anyway.
2355     // (See nmethod::metadata_do and CodeBuffer::finalize_oop_references.)
2356     load_const(R0, (address)method_data + in_bytes(MethodData::rtm_state_offset()), tmpReg);
2357     atomic_ori_int(R0, tmpReg, NoRTM);
2358   }
2359   b(L_done);
2360 
2361   bind(L_check_always_rtm1);
2362   load_const_optimized(rtm_counters_Reg, (address)rtm_counters, R0); // reload
2363   bind(L_check_always_rtm2);
2364   ld(tmpReg, RTMLockingCounters::total_count_offset(), rtm_counters_Reg);
2365   int64_t thresholdValue = RTMLockingThreshold / RTMTotalCountIncrRate;
2366   if (is_simm(thresholdValue, 16)) {   // cmpdi can handle 16bit immediate only.
2367     cmpdi(CCR0, tmpReg, thresholdValue);
2368   } else {
2369     load_const_optimized(R0, thresholdValue);
2370     cmpd(CCR0, tmpReg, R0);
2371   }
2372   blt(CCR0, L_done);
2373   if (method_data != nullptr) {
2374     // Set rtm_state to "always rtm" in MDO.
2375     // Not using a metadata relocation. See above.
2376     load_const(R0, (address)method_data + in_bytes(MethodData::rtm_state_offset()), tmpReg);
2377     atomic_ori_int(R0, tmpReg, UseRTM);
2378   }
2379   bind(L_done);
2380 }
2381 
2382 // Update counters and perform abort ratio calculation.
2383 // input: abort_status_Reg
2384 void MacroAssembler::rtm_profiling(Register abort_status_Reg, Register temp_Reg,
2385                                    RTMLockingCounters* rtm_counters,
2386                                    Metadata* method_data,
2387                                    bool profile_rtm) {
2388 
2389   assert(rtm_counters != nullptr, "should not be null when profiling RTM");
2390   // Update rtm counters based on state at abort.
2391   // Reads abort_status_Reg, updates flags.
2392   assert_different_registers(abort_status_Reg, temp_Reg);
2393   load_const_optimized(temp_Reg, (address)rtm_counters, R0);
2394   rtm_counters_update(abort_status_Reg, temp_Reg);
2395   if (profile_rtm) {
2396     assert(rtm_counters != nullptr, "should not be null when profiling RTM");
2397     rtm_abort_ratio_calculation(temp_Reg, rtm_counters, method_data);
2398   }
2399 }
2400 
2401 // Retry on abort if abort's status indicates non-persistent failure.
2402 // inputs: retry_count_Reg
2403 //       : abort_status_Reg
2404 // output: retry_count_Reg decremented by 1
2405 void MacroAssembler::rtm_retry_lock_on_abort(Register retry_count_Reg, Register abort_status_Reg,
2406                                              Label& retryLabel, Label* checkRetry) {
2407   Label doneRetry;
2408 
2409   // Don't retry if failure is persistent.
2410   // The persistent bit is set when a (A) Disallowed operation is performed in
2411   // transactional state, like for instance trying to write the TFHAR after a
2412   // transaction is started; or when there is (B) a Nesting Overflow (too many
2413   // nested transactions); or when (C) the Footprint overflows (too many
2414   // addresses touched in TM state so there is no more space in the footprint
2415   // area to track them); or in case of (D) a Self-Induced Conflict, i.e. a
2416   // store is performed to a given address in TM state, then once in suspended
2417   // state the same address is accessed. Failure (A) is very unlikely to occur
2418   // in the JVM. Failure (D) will never occur because Suspended state is never
2419   // used in the JVM. Thus mostly (B) a Nesting Overflow or (C) a Footprint
2420   // Overflow will set the persistent bit.
2421   rldicr_(R0, abort_status_Reg, tm_failure_persistent, 0);
2422   bne(CCR0, doneRetry);
2423 
2424   // Don't retry if transaction was deliberately aborted, i.e. caused by a
2425   // tabort instruction.
2426   rldicr_(R0, abort_status_Reg, tm_tabort, 0);
2427   bne(CCR0, doneRetry);
2428 
2429   // Retry if transaction aborted due to a conflict with another thread.
2430   if (checkRetry) { bind(*checkRetry); }
2431   addic_(retry_count_Reg, retry_count_Reg, -1);
2432   blt(CCR0, doneRetry);
2433   b(retryLabel);
2434   bind(doneRetry);
2435 }
2436 
2437 // Spin and retry if lock is busy.
2438 // inputs: owner_addr_Reg (monitor address)
2439 //       : retry_count_Reg
2440 // output: retry_count_Reg decremented by 1
2441 // CTR is killed
2442 void MacroAssembler::rtm_retry_lock_on_busy(Register retry_count_Reg, Register owner_addr_Reg, Label& retryLabel) {
2443   Label SpinLoop, doneRetry, doRetry;
2444   addic_(retry_count_Reg, retry_count_Reg, -1);
2445   blt(CCR0, doneRetry);
2446 
2447   if (RTMSpinLoopCount > 1) {
2448     li(R0, RTMSpinLoopCount);
2449     mtctr(R0);
2450   }
2451 
2452   // low thread priority
2453   smt_prio_low();
2454   bind(SpinLoop);
2455 
2456   if (RTMSpinLoopCount > 1) {
2457     bdz(doRetry);
2458     ld(R0, 0, owner_addr_Reg);
2459     cmpdi(CCR0, R0, 0);
2460     bne(CCR0, SpinLoop);
2461   }
2462 
2463   bind(doRetry);
2464 
2465   // restore thread priority to default in userspace
2466 #ifdef LINUX
2467   smt_prio_medium_low();
2468 #else
2469   smt_prio_medium();
2470 #endif
2471 
2472   b(retryLabel);
2473 
2474   bind(doneRetry);
2475 }
2476 
2477 // Use RTM for normal stack locks.
2478 // Input: objReg (object to lock)
2479 void MacroAssembler::rtm_stack_locking(ConditionRegister flag,
2480                                        Register obj, Register mark_word, Register tmp,
2481                                        Register retry_on_abort_count_Reg,
2482                                        RTMLockingCounters* stack_rtm_counters,
2483                                        Metadata* method_data, bool profile_rtm,
2484                                        Label& DONE_LABEL, Label& IsInflated) {
2485   assert(UseRTMForStackLocks, "why call this otherwise?");
2486   Label L_rtm_retry, L_decrement_retry, L_on_abort;
2487 
2488   if (RTMRetryCount > 0) {
2489     load_const_optimized(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort
2490     bind(L_rtm_retry);
2491   }
2492   andi_(R0, mark_word, markWord::monitor_value);  // inflated vs stack-locked|neutral
2493   bne(CCR0, IsInflated);
2494 
2495   if (PrintPreciseRTMLockingStatistics || profile_rtm) {
2496     Label L_noincrement;
2497     if (RTMTotalCountIncrRate > 1) {
2498       branch_on_random_using_tb(tmp, RTMTotalCountIncrRate, L_noincrement);
2499     }
2500     assert(stack_rtm_counters != nullptr, "should not be null when profiling RTM");
2501     load_const_optimized(tmp, (address)stack_rtm_counters->total_count_addr(), R0);
2502     //atomic_inc_ptr(tmp, /*temp, will be reloaded*/mark_word); We don't increment atomically
2503     ldx(mark_word, tmp);
2504     addi(mark_word, mark_word, 1);
2505     stdx(mark_word, tmp);
2506     bind(L_noincrement);
2507   }
2508   tbegin_();
2509   beq(CCR0, L_on_abort);
2510   ld(mark_word, oopDesc::mark_offset_in_bytes(), obj);   // Reload in transaction, conflicts need to be tracked.
2511   andi(R0, mark_word, markWord::lock_mask_in_place);     // look at 2 lock bits
2512   cmpwi(flag, R0, markWord::unlocked_value);             // bits = 01 unlocked
2513   beq(flag, DONE_LABEL);                                 // all done if unlocked
2514 
2515   if (UseRTMXendForLockBusy) {
2516     tend_();
2517     b(L_decrement_retry);
2518   } else {
2519     tabort_();
2520   }
2521   bind(L_on_abort);
2522   const Register abort_status_Reg = tmp;
2523   mftexasr(abort_status_Reg);
2524   if (PrintPreciseRTMLockingStatistics || profile_rtm) {
2525     rtm_profiling(abort_status_Reg, /*temp*/mark_word, stack_rtm_counters, method_data, profile_rtm);
2526   }
2527   ld(mark_word, oopDesc::mark_offset_in_bytes(), obj); // reload
2528   if (RTMRetryCount > 0) {
2529     // Retry on lock abort if abort status is not permanent.
2530     rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry, &L_decrement_retry);
2531   } else {
2532     bind(L_decrement_retry);
2533   }
2534 }
2535 
2536 // Use RTM for inflating locks
2537 // inputs: obj       (object to lock)
2538 //         mark_word (current header - KILLED)
2539 //         boxReg    (on-stack box address (displaced header location) - KILLED)
2540 void MacroAssembler::rtm_inflated_locking(ConditionRegister flag,
2541                                           Register obj, Register mark_word, Register boxReg,
2542                                           Register retry_on_busy_count_Reg, Register retry_on_abort_count_Reg,
2543                                           RTMLockingCounters* rtm_counters,
2544                                           Metadata* method_data, bool profile_rtm,
2545                                           Label& DONE_LABEL) {
2546   assert(UseRTMLocking, "why call this otherwise?");
2547   Label L_rtm_retry, L_decrement_retry, L_on_abort;
2548   // Clean monitor_value bit to get valid pointer.
2549   int owner_offset = in_bytes(ObjectMonitor::owner_offset()) - markWord::monitor_value;
2550 
2551   // Store non-null, using boxReg instead of (intptr_t)markWord::unused_mark().
2552   std(boxReg, BasicLock::displaced_header_offset_in_bytes(), boxReg);
2553   const Register tmpReg = boxReg;
2554   const Register owner_addr_Reg = mark_word;
2555   addi(owner_addr_Reg, mark_word, owner_offset);
2556 
2557   if (RTMRetryCount > 0) {
2558     load_const_optimized(retry_on_busy_count_Reg, RTMRetryCount);  // Retry on lock busy.
2559     load_const_optimized(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort.
2560     bind(L_rtm_retry);
2561   }
2562   if (PrintPreciseRTMLockingStatistics || profile_rtm) {
2563     Label L_noincrement;
2564     if (RTMTotalCountIncrRate > 1) {
2565       branch_on_random_using_tb(R0, RTMTotalCountIncrRate, L_noincrement);
2566     }
2567     assert(rtm_counters != nullptr, "should not be null when profiling RTM");
2568     load_const(R0, (address)rtm_counters->total_count_addr(), tmpReg);
2569     //atomic_inc_ptr(R0, tmpReg); We don't increment atomically
2570     ldx(tmpReg, R0);
2571     addi(tmpReg, tmpReg, 1);
2572     stdx(tmpReg, R0);
2573     bind(L_noincrement);
2574   }
2575   tbegin_();
2576   beq(CCR0, L_on_abort);
2577   // We don't reload mark word. Will only be reset at safepoint.
2578   ld(R0, 0, owner_addr_Reg); // Load in transaction, conflicts need to be tracked.
2579   cmpdi(flag, R0, 0);
2580   beq(flag, DONE_LABEL);
2581 
2582   if (UseRTMXendForLockBusy) {
2583     tend_();
2584     b(L_decrement_retry);
2585   } else {
2586     tabort_();
2587   }
2588   bind(L_on_abort);
2589   const Register abort_status_Reg = tmpReg;
2590   mftexasr(abort_status_Reg);
2591   if (PrintPreciseRTMLockingStatistics || profile_rtm) {
2592     rtm_profiling(abort_status_Reg, /*temp*/ owner_addr_Reg, rtm_counters, method_data, profile_rtm);
2593     // Restore owner_addr_Reg
2594     ld(mark_word, oopDesc::mark_offset_in_bytes(), obj);
2595 #ifdef ASSERT
2596     andi_(R0, mark_word, markWord::monitor_value);
2597     asm_assert_ne("must be inflated"); // Deflating only allowed at safepoint.
2598 #endif
2599     addi(owner_addr_Reg, mark_word, owner_offset);
2600   }
2601   if (RTMRetryCount > 0) {
2602     // Retry on lock abort if abort status is not permanent.
2603     rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry);
2604   }
2605 
2606   // Appears unlocked - try to swing _owner from null to non-null.
2607   cmpxchgd(flag, /*current val*/ R0, (intptr_t)0, /*new val*/ R16_thread, owner_addr_Reg,
2608            MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
2609            MacroAssembler::cmpxchgx_hint_acquire_lock(), noreg, &L_decrement_retry, true);
2610 
2611   if (RTMRetryCount > 0) {
2612     // success done else retry
2613     b(DONE_LABEL);
2614     bind(L_decrement_retry);
2615     // Spin and retry if lock is busy.
2616     rtm_retry_lock_on_busy(retry_on_busy_count_Reg, owner_addr_Reg, L_rtm_retry);
2617   } else {
2618     bind(L_decrement_retry);
2619   }
2620 }
2621 
2622 #endif //  INCLUDE_RTM_OPT
2623 
2624 // "The box" is the space on the stack where we copy the object mark.
2625 void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register oop, Register box,
2626                                                Register temp, Register displaced_header, Register current_header,
2627                                                RTMLockingCounters* rtm_counters,
2628                                                RTMLockingCounters* stack_rtm_counters,
2629                                                Metadata* method_data,
2630                                                bool use_rtm, bool profile_rtm) {
2631   assert_different_registers(oop, box, temp, displaced_header, current_header);
2632   assert(LockingMode != LM_LIGHTWEIGHT || flag == CCR0, "bad condition register");
2633   Label object_has_monitor;
2634   Label cas_failed;
2635   Label success, failure;
2636 
2637   // Load markWord from object into displaced_header.
2638   ld(displaced_header, oopDesc::mark_offset_in_bytes(), oop);
2639 
2640   if (DiagnoseSyncOnValueBasedClasses != 0) {
2641     load_klass(temp, oop);
2642     lwz(temp, in_bytes(Klass::access_flags_offset()), temp);
2643     testbitdi(flag, R0, temp, exact_log2(JVM_ACC_IS_VALUE_BASED_CLASS));
2644     bne(flag, failure);
2645   }
2646 
2647 #if INCLUDE_RTM_OPT
2648   if (UseRTMForStackLocks && use_rtm) {
2649     rtm_stack_locking(flag, oop, displaced_header, temp, /*temp*/ current_header,
2650                       stack_rtm_counters, method_data, profile_rtm,
2651                       success, object_has_monitor);
2652   }
2653 #endif // INCLUDE_RTM_OPT
2654 
2655   // Handle existing monitor.
2656   // The object has an existing monitor iff (mark & monitor_value) != 0.
2657   andi_(temp, displaced_header, markWord::monitor_value);
2658   bne(CCR0, object_has_monitor);
2659 
2660   if (LockingMode == LM_MONITOR) {
2661     // Set NE to indicate 'failure' -> take slow-path.
2662     crandc(flag, Assembler::equal, flag, Assembler::equal);
2663     b(failure);
2664   } else if (LockingMode == LM_LEGACY) {

2665     // Set displaced_header to be (markWord of object | UNLOCK_VALUE).
2666     ori(displaced_header, displaced_header, markWord::unlocked_value);
2667 
2668     // Load Compare Value application register.
2669 
2670     // Initialize the box. (Must happen before we update the object mark!)
2671     std(displaced_header, BasicLock::displaced_header_offset_in_bytes(), box);
2672 
2673     // Must fence, otherwise, preceding store(s) may float below cmpxchg.
2674     // Compare object markWord with mark and if equal exchange scratch1 with object markWord.
2675     cmpxchgd(/*flag=*/flag,
2676              /*current_value=*/current_header,
2677              /*compare_value=*/displaced_header,
2678              /*exchange_value=*/box,
2679              /*where=*/oop,
2680              MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
2681              MacroAssembler::cmpxchgx_hint_acquire_lock(),
2682              noreg,
2683              &cas_failed,
2684              /*check without membar and ldarx first*/true);

2688     b(success);
2689 
2690     bind(cas_failed);
2691     // We did not see an unlocked object so try the fast recursive case.
2692 
2693     // Check if the owner is self by comparing the value in the markWord of object
2694     // (current_header) with the stack pointer.
2695     sub(current_header, current_header, R1_SP);
2696     load_const_optimized(temp, ~(os::vm_page_size()-1) | markWord::lock_mask_in_place);
2697 
2698     and_(R0/*==0?*/, current_header, temp);
2699     // If condition is true we are cont and hence we can store 0 as the
2700     // displaced header in the box, which indicates that it is a recursive lock.
2701     std(R0/*==0, perhaps*/, BasicLock::displaced_header_offset_in_bytes(), box);
2702 
2703     if (flag != CCR0) {
2704       mcrf(flag, CCR0);
2705     }
2706     beq(CCR0, success);
2707     b(failure);
2708   } else {
2709     assert(LockingMode == LM_LIGHTWEIGHT, "must be");
2710     lightweight_lock(oop, displaced_header, temp, failure);
2711     b(success);
2712   }
2713 
2714   // Handle existing monitor.
2715   bind(object_has_monitor);
2716   // The object's monitor m is unlocked iff m->owner is null,
2717   // otherwise m->owner may contain a thread or a stack address.
2718 
2719 #if INCLUDE_RTM_OPT
2720   // Use the same RTM locking code in 32- and 64-bit VM.
2721   if (use_rtm) {
2722     rtm_inflated_locking(flag, oop, displaced_header, box, temp, /*temp*/ current_header,
2723                          rtm_counters, method_data, profile_rtm, success);
2724     bne(flag, failure);
2725   } else {
2726 #endif // INCLUDE_RTM_OPT
2727 
2728   // Try to CAS m->owner from null to current thread.
2729   addi(temp, displaced_header, in_bytes(ObjectMonitor::owner_offset()) - markWord::monitor_value);
2730   cmpxchgd(/*flag=*/flag,
2731            /*current_value=*/current_header,
2732            /*compare_value=*/(intptr_t)0,
2733            /*exchange_value=*/R16_thread,
2734            /*where=*/temp,
2735            MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
2736            MacroAssembler::cmpxchgx_hint_acquire_lock());
2737 
2738   if (LockingMode != LM_LIGHTWEIGHT) {
2739     // Store a non-null value into the box.
2740     std(box, BasicLock::displaced_header_offset_in_bytes(), box);
2741   }
2742   beq(flag, success);
2743 
2744   // Check for recursive locking.
2745   cmpd(flag, current_header, R16_thread);
2746   bne(flag, failure);
2747 
2748   // Current thread already owns the lock. Just increment recursions.
2749   Register recursions = displaced_header;
2750   ld(recursions, in_bytes(ObjectMonitor::recursions_offset() - ObjectMonitor::owner_offset()), temp);
2751   addi(recursions, recursions, 1);
2752   std(recursions, in_bytes(ObjectMonitor::recursions_offset() - ObjectMonitor::owner_offset()), temp);
2753 
2754 #if INCLUDE_RTM_OPT
2755   } // use_rtm()
2756 #endif
2757 
2758   // flag == EQ indicates success, increment held monitor count
2759   // flag == NE indicates failure
2760   bind(success);
2761   inc_held_monitor_count(temp);
2762   bind(failure);
2763 }
2764 
2765 void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box,
2766                                                  Register temp, Register displaced_header, Register current_header,
2767                                                  bool use_rtm) {
2768   assert_different_registers(oop, box, temp, displaced_header, current_header);
2769   assert(LockingMode != LM_LIGHTWEIGHT || flag == CCR0, "bad condition register");
2770   Label success, failure, object_has_monitor, notRecursive;
2771 
2772 #if INCLUDE_RTM_OPT
2773   if (UseRTMForStackLocks && use_rtm) {
2774     Label L_regular_unlock;
2775     ld(current_header, oopDesc::mark_offset_in_bytes(), oop);   // fetch markword
2776     andi(R0, current_header, markWord::lock_mask_in_place);     // look at 2 lock bits
2777     cmpwi(flag, R0, markWord::unlocked_value);                  // bits = 01 unlocked
2778     bne(flag, L_regular_unlock);                                // else RegularLock
2779     tend_();                                                    // otherwise end...
2780     b(success);                                                 // ... and we're done
2781     bind(L_regular_unlock);
2782   }
2783 #endif
2784 
2785   if (LockingMode == LM_LEGACY) {
2786     // Find the lock address and load the displaced header from the stack.
2787     ld(displaced_header, BasicLock::displaced_header_offset_in_bytes(), box);
2788 
2789     // If the displaced header is 0, we have a recursive unlock.
2790     cmpdi(flag, displaced_header, 0);
2791     beq(flag, success);
2792   }
2793 
2794   // Handle existing monitor.
2795   // The object has an existing monitor iff (mark & monitor_value) != 0.
2796   RTM_OPT_ONLY( if (!(UseRTMForStackLocks && use_rtm)) ) // skip load if already done
2797   ld(current_header, oopDesc::mark_offset_in_bytes(), oop);
2798   andi_(R0, current_header, markWord::monitor_value);
2799   bne(CCR0, object_has_monitor);
2800 
2801   if (LockingMode == LM_MONITOR) {
2802     // Set NE to indicate 'failure' -> take slow-path.
2803     crandc(flag, Assembler::equal, flag, Assembler::equal);
2804     b(failure);
2805   } else if (LockingMode == LM_LEGACY) {

2806     // Check if it is still a light weight lock, this is is true if we see
2807     // the stack address of the basicLock in the markWord of the object.
2808     // Cmpxchg sets flag to cmpd(current_header, box).
2809     cmpxchgd(/*flag=*/flag,
2810              /*current_value=*/current_header,
2811              /*compare_value=*/box,
2812              /*exchange_value=*/displaced_header,
2813              /*where=*/oop,
2814              MacroAssembler::MemBarRel,
2815              MacroAssembler::cmpxchgx_hint_release_lock(),
2816              noreg,
2817              &failure);
2818     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
2819     b(success);
2820   } else {
2821     assert(LockingMode == LM_LIGHTWEIGHT, "must be");
2822     lightweight_unlock(oop, current_header, failure);
2823     b(success);
2824   }
2825 
2826   // Handle existing monitor.
2827   bind(object_has_monitor);
2828   STATIC_ASSERT(markWord::monitor_value <= INT_MAX);
2829   addi(current_header, current_header, -(int)markWord::monitor_value); // monitor
2830   ld(temp,             in_bytes(ObjectMonitor::owner_offset()), current_header);
2831 
2832   // It's inflated.
2833 #if INCLUDE_RTM_OPT
2834   if (use_rtm) {
2835     Label L_regular_inflated_unlock;
2836     // Clean monitor_value bit to get valid pointer
2837     cmpdi(flag, temp, 0);
2838     bne(flag, L_regular_inflated_unlock);
2839     tend_();
2840     b(success);
2841     bind(L_regular_inflated_unlock);
2842   }
2843 #endif
2844 
2845   // In case of LM_LIGHTWEIGHT, we may reach here with (temp & ObjectMonitor::ANONYMOUS_OWNER) != 0.
2846   // This is handled like owner thread mismatches: We take the slow path.
2847   cmpd(flag, temp, R16_thread);
2848   bne(flag, failure);
2849 
2850   ld(displaced_header, in_bytes(ObjectMonitor::recursions_offset()), current_header);
2851 
2852   addic_(displaced_header, displaced_header, -1);
2853   blt(CCR0, notRecursive); // Not recursive if negative after decrement.
2854   std(displaced_header, in_bytes(ObjectMonitor::recursions_offset()), current_header);
2855   if (flag == CCR0) { // Otherwise, flag is already EQ, here.
2856     crorc(CCR0, Assembler::equal, CCR0, Assembler::equal); // Set CCR0 EQ
2857   }
2858   b(success);
2859 
2860   bind(notRecursive);
2861   ld(temp,             in_bytes(ObjectMonitor::EntryList_offset()), current_header);
2862   ld(displaced_header, in_bytes(ObjectMonitor::cxq_offset()), current_header);
2863   orr(temp, temp, displaced_header); // Will be 0 if both are 0.
2864   cmpdi(flag, temp, 0);
2865   bne(flag, failure);
2866   release();
2867   std(temp, in_bytes(ObjectMonitor::owner_offset()), current_header);
2868 
2869   // flag == EQ indicates success, decrement held monitor count
2870   // flag == NE indicates failure
2871   bind(success);
2872   dec_held_monitor_count(temp);
2873   bind(failure);
2874 }
2875 














































































































































































































































































2876 void MacroAssembler::safepoint_poll(Label& slow_path, Register temp, bool at_return, bool in_nmethod) {
2877   ld(temp, in_bytes(JavaThread::polling_word_offset()), R16_thread);
2878 
2879   if (at_return) {
2880     if (in_nmethod) {
2881       if (UseSIGTRAP) {
2882         // Use Signal Handler.
2883         relocate(relocInfo::poll_return_type);
2884         td(traptoGreaterThanUnsigned, R1_SP, temp);
2885       } else {
2886         cmpld(CCR0, R1_SP, temp);
2887         // Stub may be out of range for short conditional branch.
2888         bc_far_optimized(Assembler::bcondCRbiIs1, bi0(CCR0, Assembler::greater), slow_path);
2889       }
2890     } else { // Not in nmethod.
2891       // Frame still on stack, need to get fp.
2892       Register fp = R0;
2893       ld(fp, _abi0(callers_sp), R1_SP);
2894       cmpld(CCR0, fp, temp);
2895       bgt(CCR0, slow_path);

4475     xori(tmp, tmp, markWord::unlocked_value); // flip unlocked bit
4476     andi_(R0, tmp, markWord::lock_mask_in_place);
4477     bne(CCR0, failed); // failed if new header doesn't contain locked_value (which is 0)
4478   } else {
4479     ldarx(tmp, obj, MacroAssembler::cmpxchgx_hint_release_lock());
4480     andi_(R0, tmp, markWord::lock_mask_in_place);
4481     bne(CCR0, failed); // failed if old header doesn't contain locked_value (which is 0)
4482     ori(tmp, tmp, markWord::unlocked_value); // set unlocked bit
4483   }
4484   stdcx_(tmp, obj);
4485   bne(CCR0, retry);
4486 
4487   if (semantics & MemBarFenceAfter) {
4488     fence();
4489   } else if (semantics & MemBarAcq) {
4490     isync();
4491   }
4492 }
4493 
4494 // Implements lightweight-locking.
4495 // Branches to slow upon failure to lock the object, with CCR0 NE.
4496 // Falls through upon success with CCR0 EQ.
4497 //
4498 //  - obj: the object to be locked
4499 //  - hdr: the header, already loaded from obj, will be destroyed
4500 //  - t1: temporary register
4501 void MacroAssembler::lightweight_lock(Register obj, Register hdr, Register t1, Label& slow) {
4502   assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking");
4503   assert_different_registers(obj, hdr, t1);













4504 
4505   // Check if we would have space on lock-stack for the object.
4506   lwz(t1, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread);
4507   cmplwi(CCR0, t1, LockStack::end_offset() - 1);
4508   bgt(CCR0, slow);

4509 
4510   // Quick check: Do not reserve cache line for atomic update if not unlocked.
4511   // (Similar to contention_hint in cmpxchg solutions.)
4512   xori(R0, hdr, markWord::unlocked_value); // flip unlocked bit
4513   andi_(R0, R0, markWord::lock_mask_in_place);
4514   bne(CCR0, slow); // failed if new header doesn't contain locked_value (which is 0)
4515 
4516   // Note: We're not publishing anything (like the displaced header in LM_LEGACY)
4517   // to other threads at this point. Hence, no release barrier, here.
4518   // (The obj has been written to the BasicObjectLock at obj_offset() within the own thread stack.)
4519   atomically_flip_locked_state(/* is_unlock */ false, obj, hdr, slow, MacroAssembler::MemBarAcq);
4520 

4521   // After successful lock, push object on lock-stack
4522   stdx(obj, t1, R16_thread);
4523   addi(t1, t1, oopSize);
4524   stw(t1, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread);
4525 }
4526 
4527 // Implements lightweight-unlocking.
4528 // Branches to slow upon failure, with CCR0 NE.
4529 // Falls through upon success, with CCR0 EQ.
4530 //
4531 // - obj: the object to be unlocked
4532 // - hdr: the (pre-loaded) header of the object, will be destroyed
4533 void MacroAssembler::lightweight_unlock(Register obj, Register hdr, Label& slow) {
4534   assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking");
4535   assert_different_registers(obj, hdr);
4536 
4537 #ifdef ASSERT
4538   {
4539     // Check that hdr is fast-locked.
4540     Label hdr_ok;
4541     andi_(R0, hdr, markWord::lock_mask_in_place);
4542     beq(CCR0, hdr_ok);
4543     stop("Header is not fast-locked");
4544     bind(hdr_ok);
4545   }
4546   Register t1 = hdr; // Reuse in debug build.
4547   {
4548     // The following checks rely on the fact that LockStack is only ever modified by
4549     // its owning thread, even if the lock got inflated concurrently; removal of LockStack
4550     // entries after inflation will happen delayed in that case.
4551 
4552     // Check for lock-stack underflow.
4553     Label stack_ok;
4554     lwz(t1, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread);
4555     cmplwi(CCR0, t1, LockStack::start_offset());
4556     bgt(CCR0, stack_ok);
4557     stop("Lock-stack underflow");
4558     bind(stack_ok);
4559   }
4560   {
4561     // Check if the top of the lock-stack matches the unlocked object.
4562     Label tos_ok;
4563     addi(t1, t1, -oopSize);
4564     ldx(t1, t1, R16_thread);
4565     cmpd(CCR0, t1, obj);
4566     beq(CCR0, tos_ok);
4567     stop("Top of lock-stack does not match the unlocked object");
4568     bind(tos_ok);
4569   }
4570 #endif
4571 
4572   // Release the lock.
4573   atomically_flip_locked_state(/* is_unlock */ true, obj, hdr, slow, MacroAssembler::MemBarRel);































4574 
4575   // After successful unlock, pop object from lock-stack
4576   Register t2 = hdr;
4577   lwz(t2, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread);
4578   addi(t2, t2, -oopSize);
4579 #ifdef ASSERT
4580   li(R0, 0);
4581   stdx(R0, t2, R16_thread);




4582 #endif
4583   stw(t2, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread);














4584 }

  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  *
  24  */
  25 
  26 #include "precompiled.hpp"
  27 #include "asm/macroAssembler.inline.hpp"
  28 #include "compiler/disassembler.hpp"
  29 #include "gc/shared/collectedHeap.inline.hpp"
  30 #include "gc/shared/barrierSet.hpp"
  31 #include "gc/shared/barrierSetAssembler.hpp"
  32 #include "interpreter/interpreter.hpp"
  33 #include "memory/resourceArea.hpp"
  34 #include "nativeInst_ppc.hpp"
  35 #include "oops/klass.inline.hpp"
  36 #include "oops/methodData.hpp"
  37 #include "prims/methodHandles.hpp"
  38 #include "register_ppc.hpp"
  39 #include "runtime/icache.hpp"
  40 #include "runtime/interfaceSupport.inline.hpp"
  41 #include "runtime/objectMonitor.hpp"
  42 #include "runtime/os.hpp"
  43 #include "runtime/safepoint.hpp"
  44 #include "runtime/safepointMechanism.hpp"
  45 #include "runtime/sharedRuntime.hpp"
  46 #include "runtime/stubRoutines.hpp"
  47 #include "runtime/vm_version.hpp"
  48 #include "utilities/macros.hpp"
  49 #include "utilities/powerOfTwo.hpp"
  50 
  51 #ifdef PRODUCT
  52 #define BLOCK_COMMENT(str) // nothing
  53 #else
  54 #define BLOCK_COMMENT(str) block_comment(str)
  55 #endif
  56 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
  57 
  58 #ifdef ASSERT

2157   }
2158 
2159   ld_largeoffset_unchecked(reg_scratch, destination_toc_offset, Rtoc, false);
2160   mtctr(reg_scratch);
2161   bctr();
2162 
2163   const address stub_start_addr = addr_at(stub_start_offset);
2164 
2165   // Assert that the encoded destination_toc_offset can be identified and that it is correct.
2166   assert(destination_toc_offset == NativeCallTrampolineStub_at(stub_start_addr)->destination_toc_offset(),
2167          "encoded offset into the constant pool must match");
2168   // Trampoline_stub_size should be good.
2169   assert((uint)(offset() - stub_start_offset) <= trampoline_stub_size, "should be good size");
2170   assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline");
2171 
2172   // End the stub.
2173   end_a_stub();
2174   return stub;
2175 }
2176 
































































































































































































































































































































































































































































2177 // "The box" is the space on the stack where we copy the object mark.
2178 void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register oop, Register box,
2179                                                Register temp, Register displaced_header, Register current_header) {
2180   assert(LockingMode != LM_LIGHTWEIGHT, "uses fast_lock_lightweight");



2181   assert_different_registers(oop, box, temp, displaced_header, current_header);

2182   Label object_has_monitor;
2183   Label cas_failed;
2184   Label success, failure;
2185 
2186   // Load markWord from object into displaced_header.
2187   ld(displaced_header, oopDesc::mark_offset_in_bytes(), oop);
2188 
2189   if (DiagnoseSyncOnValueBasedClasses != 0) {
2190     load_klass(temp, oop);
2191     lwz(temp, in_bytes(Klass::access_flags_offset()), temp);
2192     testbitdi(flag, R0, temp, exact_log2(JVM_ACC_IS_VALUE_BASED_CLASS));
2193     bne(flag, failure);
2194   }
2195 








2196   // Handle existing monitor.
2197   // The object has an existing monitor iff (mark & monitor_value) != 0.
2198   andi_(temp, displaced_header, markWord::monitor_value);
2199   bne(CCR0, object_has_monitor);
2200 
2201   if (LockingMode == LM_MONITOR) {
2202     // Set NE to indicate 'failure' -> take slow-path.
2203     crandc(flag, Assembler::equal, flag, Assembler::equal);
2204     b(failure);
2205   } else {
2206     assert(LockingMode == LM_LEGACY, "must be");
2207     // Set displaced_header to be (markWord of object | UNLOCK_VALUE).
2208     ori(displaced_header, displaced_header, markWord::unlocked_value);
2209 
2210     // Load Compare Value application register.
2211 
2212     // Initialize the box. (Must happen before we update the object mark!)
2213     std(displaced_header, BasicLock::displaced_header_offset_in_bytes(), box);
2214 
2215     // Must fence, otherwise, preceding store(s) may float below cmpxchg.
2216     // Compare object markWord with mark and if equal exchange scratch1 with object markWord.
2217     cmpxchgd(/*flag=*/flag,
2218              /*current_value=*/current_header,
2219              /*compare_value=*/displaced_header,
2220              /*exchange_value=*/box,
2221              /*where=*/oop,
2222              MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
2223              MacroAssembler::cmpxchgx_hint_acquire_lock(),
2224              noreg,
2225              &cas_failed,
2226              /*check without membar and ldarx first*/true);

2230     b(success);
2231 
2232     bind(cas_failed);
2233     // We did not see an unlocked object so try the fast recursive case.
2234 
2235     // Check if the owner is self by comparing the value in the markWord of object
2236     // (current_header) with the stack pointer.
2237     sub(current_header, current_header, R1_SP);
2238     load_const_optimized(temp, ~(os::vm_page_size()-1) | markWord::lock_mask_in_place);
2239 
2240     and_(R0/*==0?*/, current_header, temp);
2241     // If condition is true we are cont and hence we can store 0 as the
2242     // displaced header in the box, which indicates that it is a recursive lock.
2243     std(R0/*==0, perhaps*/, BasicLock::displaced_header_offset_in_bytes(), box);
2244 
2245     if (flag != CCR0) {
2246       mcrf(flag, CCR0);
2247     }
2248     beq(CCR0, success);
2249     b(failure);




2250   }
2251 
2252   // Handle existing monitor.
2253   bind(object_has_monitor);
2254   // The object's monitor m is unlocked iff m->owner is null,
2255   // otherwise m->owner may contain a thread or a stack address.
2256 









2257   // Try to CAS m->owner from null to current thread.
2258   addi(temp, displaced_header, in_bytes(ObjectMonitor::owner_offset()) - markWord::monitor_value);
2259   cmpxchgd(/*flag=*/flag,
2260            /*current_value=*/current_header,
2261            /*compare_value=*/(intptr_t)0,
2262            /*exchange_value=*/R16_thread,
2263            /*where=*/temp,
2264            MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
2265            MacroAssembler::cmpxchgx_hint_acquire_lock());
2266 
2267   // Store a non-null value into the box.
2268   std(box, BasicLock::displaced_header_offset_in_bytes(), box);


2269   beq(flag, success);
2270 
2271   // Check for recursive locking.
2272   cmpd(flag, current_header, R16_thread);
2273   bne(flag, failure);
2274 
2275   // Current thread already owns the lock. Just increment recursions.
2276   Register recursions = displaced_header;
2277   ld(recursions, in_bytes(ObjectMonitor::recursions_offset() - ObjectMonitor::owner_offset()), temp);
2278   addi(recursions, recursions, 1);
2279   std(recursions, in_bytes(ObjectMonitor::recursions_offset() - ObjectMonitor::owner_offset()), temp);
2280 




2281   // flag == EQ indicates success, increment held monitor count
2282   // flag == NE indicates failure
2283   bind(success);
2284   inc_held_monitor_count(temp);
2285   bind(failure);
2286 }
2287 
2288 void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box,
2289                                                  Register temp, Register displaced_header, Register current_header) {
2290   assert(LockingMode != LM_LIGHTWEIGHT, "uses fast_unlock_lightweight");
2291   assert_different_registers(oop, box, temp, displaced_header, current_header);

2292   Label success, failure, object_has_monitor, notRecursive;
2293 













2294   if (LockingMode == LM_LEGACY) {
2295     // Find the lock address and load the displaced header from the stack.
2296     ld(displaced_header, BasicLock::displaced_header_offset_in_bytes(), box);
2297 
2298     // If the displaced header is 0, we have a recursive unlock.
2299     cmpdi(flag, displaced_header, 0);
2300     beq(flag, success);
2301   }
2302 
2303   // Handle existing monitor.
2304   // The object has an existing monitor iff (mark & monitor_value) != 0.

2305   ld(current_header, oopDesc::mark_offset_in_bytes(), oop);
2306   andi_(R0, current_header, markWord::monitor_value);
2307   bne(CCR0, object_has_monitor);
2308 
2309   if (LockingMode == LM_MONITOR) {
2310     // Set NE to indicate 'failure' -> take slow-path.
2311     crandc(flag, Assembler::equal, flag, Assembler::equal);
2312     b(failure);
2313   } else {
2314     assert(LockingMode == LM_LEGACY, "must be");
2315     // Check if it is still a light weight lock, this is is true if we see
2316     // the stack address of the basicLock in the markWord of the object.
2317     // Cmpxchg sets flag to cmpd(current_header, box).
2318     cmpxchgd(/*flag=*/flag,
2319              /*current_value=*/current_header,
2320              /*compare_value=*/box,
2321              /*exchange_value=*/displaced_header,
2322              /*where=*/oop,
2323              MacroAssembler::MemBarRel,
2324              MacroAssembler::cmpxchgx_hint_release_lock(),
2325              noreg,
2326              &failure);
2327     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
2328     b(success);




2329   }
2330 
2331   // Handle existing monitor.
2332   bind(object_has_monitor);
2333   STATIC_ASSERT(markWord::monitor_value <= INT_MAX);
2334   addi(current_header, current_header, -(int)markWord::monitor_value); // monitor
2335   ld(temp,             in_bytes(ObjectMonitor::owner_offset()), current_header);
2336 













2337   // In case of LM_LIGHTWEIGHT, we may reach here with (temp & ObjectMonitor::ANONYMOUS_OWNER) != 0.
2338   // This is handled like owner thread mismatches: We take the slow path.
2339   cmpd(flag, temp, R16_thread);
2340   bne(flag, failure);
2341 
2342   ld(displaced_header, in_bytes(ObjectMonitor::recursions_offset()), current_header);
2343 
2344   addic_(displaced_header, displaced_header, -1);
2345   blt(CCR0, notRecursive); // Not recursive if negative after decrement.
2346   std(displaced_header, in_bytes(ObjectMonitor::recursions_offset()), current_header);
2347   if (flag == CCR0) { // Otherwise, flag is already EQ, here.
2348     crorc(CCR0, Assembler::equal, CCR0, Assembler::equal); // Set CCR0 EQ
2349   }
2350   b(success);
2351 
2352   bind(notRecursive);
2353   ld(temp,             in_bytes(ObjectMonitor::EntryList_offset()), current_header);
2354   ld(displaced_header, in_bytes(ObjectMonitor::cxq_offset()), current_header);
2355   orr(temp, temp, displaced_header); // Will be 0 if both are 0.
2356   cmpdi(flag, temp, 0);
2357   bne(flag, failure);
2358   release();
2359   std(temp, in_bytes(ObjectMonitor::owner_offset()), current_header);
2360 
2361   // flag == EQ indicates success, decrement held monitor count
2362   // flag == NE indicates failure
2363   bind(success);
2364   dec_held_monitor_count(temp);
2365   bind(failure);
2366 }
2367 
2368 void MacroAssembler::compiler_fast_lock_lightweight_object(ConditionRegister flag, Register obj, Register tmp1,
2369                                                            Register tmp2, Register tmp3) {
2370   assert_different_registers(obj, tmp1, tmp2, tmp3);
2371   assert(flag == CCR0, "bad condition register");
2372 
2373   // Handle inflated monitor.
2374   Label inflated;
2375   // Finish fast lock successfully. MUST reach to with flag == NE
2376   Label locked;
2377   // Finish fast lock unsuccessfully. MUST branch to with flag == EQ
2378   Label slow_path;
2379 
2380   if (DiagnoseSyncOnValueBasedClasses != 0) {
2381     load_klass(tmp1, obj);
2382     lwz(tmp1, in_bytes(Klass::access_flags_offset()), tmp1);
2383     testbitdi(flag, R0, tmp1, exact_log2(JVM_ACC_IS_VALUE_BASED_CLASS));
2384     bne(flag, slow_path);
2385   }
2386 
2387   const Register mark = tmp1;
2388   const Register t = tmp3; // Usage of R0 allowed!
2389 
2390   { // Lightweight locking
2391 
2392     // Push lock to the lock stack and finish successfully. MUST reach to with flag == EQ
2393     Label push;
2394 
2395     const Register top = tmp2;
2396 
2397     // Check if lock-stack is full.
2398     lwz(top, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread);
2399     cmplwi(flag, top, LockStack::end_offset() - 1);
2400     bgt(flag, slow_path);
2401 
2402     // The underflow check is elided. The recursive check will always fail
2403     // when the lock stack is empty because of the _bad_oop_sentinel field.
2404 
2405     // Check if recursive.
2406     subi(t, top, oopSize);
2407     ldx(t, R16_thread, t);
2408     cmpd(flag, obj, t);
2409     beq(flag, push);
2410 
2411     // Check for monitor (0b10) or locked (0b00).
2412     ld(mark, oopDesc::mark_offset_in_bytes(), obj);
2413     andi_(t, mark, markWord::lock_mask_in_place);
2414     cmpldi(flag, t, markWord::unlocked_value);
2415     bgt(flag, inflated);
2416     bne(flag, slow_path);
2417 
2418     // Not inflated.
2419 
2420     // Try to lock. Transition lock bits 0b00 => 0b01
2421     assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid a lea");
2422     atomically_flip_locked_state(/* is_unlock */ false, obj, mark, slow_path, MacroAssembler::MemBarAcq);
2423 
2424     bind(push);
2425     // After successful lock, push object on lock-stack.
2426     stdx(obj, R16_thread, top);
2427     addi(top, top, oopSize);
2428     stw(top, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread);
2429     b(locked);
2430   }
2431 
2432   { // Handle inflated monitor.
2433     bind(inflated);
2434 
2435     // mark contains the tagged ObjectMonitor*.
2436     const Register tagged_monitor = mark;
2437     const uintptr_t monitor_tag = markWord::monitor_value;
2438     const Register owner_addr = tmp2;
2439 
2440     // Compute owner address.
2441     addi(owner_addr, tagged_monitor, in_bytes(ObjectMonitor::owner_offset()) - monitor_tag);
2442 
2443     // CAS owner (null => current thread).
2444     cmpxchgd(/*flag=*/flag,
2445             /*current_value=*/t,
2446             /*compare_value=*/(intptr_t)0,
2447             /*exchange_value=*/R16_thread,
2448             /*where=*/owner_addr,
2449             MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
2450             MacroAssembler::cmpxchgx_hint_acquire_lock());
2451     beq(flag, locked);
2452 
2453     // Check if recursive.
2454     cmpd(flag, t, R16_thread);
2455     bne(flag, slow_path);
2456 
2457     // Recursive.
2458     ld(tmp1, in_bytes(ObjectMonitor::recursions_offset() - ObjectMonitor::owner_offset()), owner_addr);
2459     addi(tmp1, tmp1, 1);
2460     std(tmp1, in_bytes(ObjectMonitor::recursions_offset() - ObjectMonitor::owner_offset()), owner_addr);
2461   }
2462 
2463   bind(locked);
2464   inc_held_monitor_count(tmp1);
2465 
2466 #ifdef ASSERT
2467   // Check that locked label is reached with flag == EQ.
2468   Label flag_correct;
2469   beq(flag, flag_correct);
2470   stop("Fast Lock Flag != EQ");
2471 #endif
2472   bind(slow_path);
2473 #ifdef ASSERT
2474   // Check that slow_path label is reached with flag == NE.
2475   bne(flag, flag_correct);
2476   stop("Fast Lock Flag != NE");
2477   bind(flag_correct);
2478 #endif
2479   // C2 uses the value of flag (NE vs EQ) to determine the continuation.
2480 }
2481 
2482 void MacroAssembler::compiler_fast_unlock_lightweight_object(ConditionRegister flag, Register obj, Register tmp1,
2483                                                              Register tmp2, Register tmp3) {
2484   assert_different_registers(obj, tmp1, tmp2, tmp3);
2485   assert(flag == CCR0, "bad condition register");
2486 
2487   // Handle inflated monitor.
2488   Label inflated, inflated_load_monitor;
2489   // Finish fast unlock successfully. MUST reach to with flag == EQ.
2490   Label unlocked;
2491   // Finish fast unlock unsuccessfully. MUST branch to with flag == NE.
2492   Label slow_path;
2493 
2494   const Register mark = tmp1;
2495   const Register top = tmp2;
2496   const Register t = tmp3;
2497 
2498   { // Lightweight unlock
2499     Label push_and_slow;
2500 
2501     // Check if obj is top of lock-stack.
2502     lwz(top, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread);
2503     subi(top, top, oopSize);
2504     ldx(t, R16_thread, top);
2505     cmpd(flag, obj, t);
2506     // Top of lock stack was not obj. Must be monitor.
2507     bne(flag, inflated_load_monitor);
2508 
2509     // Pop lock-stack.
2510     DEBUG_ONLY(li(t, 0);)
2511     DEBUG_ONLY(stdx(t, R16_thread, top);)
2512     stw(top, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread);
2513 
2514     // The underflow check is elided. The recursive check will always fail
2515     // when the lock stack is empty because of the _bad_oop_sentinel field.
2516 
2517     // Check if recursive.
2518     subi(t, top, oopSize);
2519     ldx(t, R16_thread, t);
2520     cmpd(flag, obj, t);
2521     beq(flag, unlocked);
2522 
2523     // Not recursive.
2524 
2525     // Check for monitor (0b10).
2526     ld(mark, oopDesc::mark_offset_in_bytes(), obj);
2527     andi_(t, mark, markWord::monitor_value);
2528     bne(CCR0, inflated);
2529 
2530 #ifdef ASSERT
2531     // Check header not unlocked (0b01).
2532     Label not_unlocked;
2533     andi_(t, mark, markWord::unlocked_value);
2534     beq(CCR0, not_unlocked);
2535     stop("lightweight_unlock already unlocked");
2536     bind(not_unlocked);
2537 #endif
2538 
2539     // Try to unlock. Transition lock bits 0b00 => 0b01
2540     atomically_flip_locked_state(/* is_unlock */ true, obj, mark, push_and_slow, MacroAssembler::MemBarRel);
2541     b(unlocked);
2542 
2543     bind(push_and_slow);
2544     // Restore lock-stack and handle the unlock in runtime.
2545     DEBUG_ONLY(stdx(obj, R16_thread, top);)
2546     addi(top, top, oopSize);
2547     stw(top, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread);
2548     b(slow_path);
2549   }
2550 
2551   { // Handle inflated monitor.
2552     bind(inflated_load_monitor);
2553     ld(mark, oopDesc::mark_offset_in_bytes(), obj);
2554 #ifdef ASSERT
2555     andi_(t, mark, markWord::monitor_value);
2556     bne(CCR0, inflated);
2557     stop("Fast Unlock not monitor");
2558 #endif
2559 
2560     bind(inflated);
2561 
2562 #ifdef ASSERT
2563     Label check_done;
2564     subi(top, top, oopSize);
2565     cmplwi(CCR0, top, in_bytes(JavaThread::lock_stack_base_offset()));
2566     blt(CCR0, check_done);
2567     ldx(t, R16_thread, top);
2568     cmpd(flag, obj, t);
2569     bne(flag, inflated);
2570     stop("Fast Unlock lock on stack");
2571     bind(check_done);
2572 #endif
2573 
2574     // mark contains the tagged ObjectMonitor*.
2575     const Register monitor = mark;
2576     const uintptr_t monitor_tag = markWord::monitor_value;
2577 
2578     // Untag the monitor.
2579     subi(monitor, mark, monitor_tag);
2580 
2581     const Register recursions = tmp2;
2582     Label not_recursive;
2583 
2584     // Check if recursive.
2585     ld(recursions, in_bytes(ObjectMonitor::recursions_offset()), monitor);
2586     addic_(recursions, recursions, -1);
2587     blt(CCR0, not_recursive);
2588 
2589     // Recursive unlock.
2590     std(recursions, in_bytes(ObjectMonitor::recursions_offset()), monitor);
2591     crorc(CCR0, Assembler::equal, CCR0, Assembler::equal);
2592     b(unlocked);
2593 
2594     bind(not_recursive);
2595 
2596     Label release_;
2597     const Register t2 = tmp2;
2598 
2599     // Check if the entry lists are empty.
2600     ld(t, in_bytes(ObjectMonitor::EntryList_offset()), monitor);
2601     ld(t2, in_bytes(ObjectMonitor::cxq_offset()), monitor);
2602     orr(t, t, t2);
2603     cmpdi(flag, t, 0);
2604     beq(flag, release_);
2605 
2606     // The owner may be anonymous and we removed the last obj entry in
2607     // the lock-stack. This loses the information about the owner.
2608     // Write the thread to the owner field so the runtime knows the owner.
2609     std(R16_thread, in_bytes(ObjectMonitor::owner_offset()), monitor);
2610     b(slow_path);
2611 
2612     bind(release_);
2613     // Set owner to null.
2614     release();
2615     // t contains 0
2616     std(t, in_bytes(ObjectMonitor::owner_offset()), monitor);
2617   }
2618 
2619   bind(unlocked);
2620   dec_held_monitor_count(t);
2621 
2622 #ifdef ASSERT
2623   // Check that unlocked label is reached with flag == EQ.
2624   Label flag_correct;
2625   beq(flag, flag_correct);
2626   stop("Fast Lock Flag != EQ");
2627 #endif
2628   bind(slow_path);
2629 #ifdef ASSERT
2630   // Check that slow_path label is reached with flag == NE.
2631   bne(flag, flag_correct);
2632   stop("Fast Lock Flag != NE");
2633   bind(flag_correct);
2634 #endif
2635   // C2 uses the value of flag (NE vs EQ) to determine the continuation.
2636 }
2637 
2638 void MacroAssembler::safepoint_poll(Label& slow_path, Register temp, bool at_return, bool in_nmethod) {
2639   ld(temp, in_bytes(JavaThread::polling_word_offset()), R16_thread);
2640 
2641   if (at_return) {
2642     if (in_nmethod) {
2643       if (UseSIGTRAP) {
2644         // Use Signal Handler.
2645         relocate(relocInfo::poll_return_type);
2646         td(traptoGreaterThanUnsigned, R1_SP, temp);
2647       } else {
2648         cmpld(CCR0, R1_SP, temp);
2649         // Stub may be out of range for short conditional branch.
2650         bc_far_optimized(Assembler::bcondCRbiIs1, bi0(CCR0, Assembler::greater), slow_path);
2651       }
2652     } else { // Not in nmethod.
2653       // Frame still on stack, need to get fp.
2654       Register fp = R0;
2655       ld(fp, _abi0(callers_sp), R1_SP);
2656       cmpld(CCR0, fp, temp);
2657       bgt(CCR0, slow_path);

4237     xori(tmp, tmp, markWord::unlocked_value); // flip unlocked bit
4238     andi_(R0, tmp, markWord::lock_mask_in_place);
4239     bne(CCR0, failed); // failed if new header doesn't contain locked_value (which is 0)
4240   } else {
4241     ldarx(tmp, obj, MacroAssembler::cmpxchgx_hint_release_lock());
4242     andi_(R0, tmp, markWord::lock_mask_in_place);
4243     bne(CCR0, failed); // failed if old header doesn't contain locked_value (which is 0)
4244     ori(tmp, tmp, markWord::unlocked_value); // set unlocked bit
4245   }
4246   stdcx_(tmp, obj);
4247   bne(CCR0, retry);
4248 
4249   if (semantics & MemBarFenceAfter) {
4250     fence();
4251   } else if (semantics & MemBarAcq) {
4252     isync();
4253   }
4254 }
4255 
4256 // Implements lightweight-locking.


4257 //
4258 //  - obj: the object to be locked
4259 //  - t1, t2: temporary register
4260 void MacroAssembler::lightweight_lock(Register obj, Register t1, Register t2, Label& slow) {

4261   assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking");
4262   assert_different_registers(obj, t1, t2);
4263 
4264   Label push;
4265   const Register top = t1;
4266   const Register mark = t2;
4267   const Register t = R0;
4268 
4269   // Check if the lock-stack is full.
4270   lwz(top, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread);
4271   cmplwi(CCR0, top, LockStack::end_offset());
4272   bge(CCR0, slow);
4273 
4274   // The underflow check is elided. The recursive check will always fail
4275   // when the lock stack is empty because of the _bad_oop_sentinel field.
4276 
4277   // Check for recursion.
4278   subi(t, top, oopSize);
4279   ldx(t, R16_thread, t);
4280   cmpd(CCR0, obj, t);
4281   beq(CCR0, push);
4282 
4283   // Check header for monitor (0b10) or locked (0b00).
4284   ld(mark, oopDesc::mark_offset_in_bytes(), obj);
4285   xori(t, mark, markWord::unlocked_value);
4286   andi_(t, t, markWord::lock_mask_in_place);
4287   bne(CCR0, slow);
4288 
4289   // Try to lock. Transition lock bits 0b00 => 0b01
4290   atomically_flip_locked_state(/* is_unlock */ false, obj, mark, slow, MacroAssembler::MemBarAcq);


4291 
4292   bind(push);
4293   // After successful lock, push object on lock-stack
4294   stdx(obj, R16_thread, top);
4295   addi(top, top, oopSize);
4296   stw(top, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread);
4297 }
4298 
4299 // Implements lightweight-unlocking.


4300 //
4301 // - obj: the object to be unlocked
4302 //  - t1: temporary register
4303 void MacroAssembler::lightweight_unlock(Register obj, Register t1, Label& slow) {
4304   assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking");
4305   assert_different_registers(obj, t1);
4306 
4307 #ifdef ASSERT









4308   {
4309     // The following checks rely on the fact that LockStack is only ever modified by
4310     // its owning thread, even if the lock got inflated concurrently; removal of LockStack
4311     // entries after inflation will happen delayed in that case.
4312 
4313     // Check for lock-stack underflow.
4314     Label stack_ok;
4315     lwz(t1, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread);
4316     cmplwi(CCR0, t1, LockStack::start_offset());
4317     bge(CCR0, stack_ok);
4318     stop("Lock-stack underflow");
4319     bind(stack_ok);
4320   }










4321 #endif
4322 
4323   Label unlocked, push_and_slow;
4324   const Register top = t1;
4325   const Register mark = R0;
4326   Register t = R0;
4327 
4328   // Check if obj is top of lock-stack.
4329   lwz(top, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread);
4330   subi(top, top, oopSize);
4331   ldx(t, R16_thread, top);
4332   cmpd(CCR0, obj, t);
4333   bne(CCR0, slow);
4334 
4335   // Pop lock-stack.
4336   DEBUG_ONLY(li(t, 0);)
4337   DEBUG_ONLY(stdx(t, R16_thread, top);)
4338   stw(top, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread);
4339 
4340   // The underflow check is elided. The recursive check will always fail
4341   // when the lock stack is empty because of the _bad_oop_sentinel field.
4342 
4343   // Check if recursive.
4344   subi(t, top, oopSize);
4345   ldx(t, R16_thread, t);
4346   cmpd(CCR0, obj, t);
4347   beq(CCR0, unlocked);
4348 
4349   // Use top as tmp
4350   t = top;
4351 
4352   // Not recursive. Check header for monitor (0b10).
4353   ld(mark, oopDesc::mark_offset_in_bytes(), obj);
4354   andi_(t, mark, markWord::monitor_value);
4355   bne(CCR0, push_and_slow);
4356 




4357 #ifdef ASSERT
4358   // Check header not unlocked (0b01).
4359   Label not_unlocked;
4360   andi_(t, mark, markWord::unlocked_value);
4361   beq(CCR0, not_unlocked);
4362   stop("lightweight_unlock already unlocked");
4363   bind(not_unlocked);
4364 #endif
4365 
4366   // Try to unlock. Transition lock bits 0b00 => 0b01
4367   atomically_flip_locked_state(/* is_unlock */ true, obj, t, push_and_slow, MacroAssembler::MemBarRel);
4368   b(unlocked);
4369 
4370   bind(push_and_slow);
4371 
4372   // Restore lock-stack and handle the unlock in runtime.
4373   lwz(top, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread);
4374   DEBUG_ONLY(stdx(obj, R16_thread, top);)
4375   addi(top, top, oopSize);
4376   stw(top, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread);
4377   b(slow);
4378 
4379   bind(unlocked);
4380 }
< prev index next >