18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
25
26 #include "precompiled.hpp"
27 #include "asm/macroAssembler.inline.hpp"
28 #include "compiler/disassembler.hpp"
29 #include "gc/shared/collectedHeap.inline.hpp"
30 #include "gc/shared/barrierSet.hpp"
31 #include "gc/shared/barrierSetAssembler.hpp"
32 #include "interpreter/interpreter.hpp"
33 #include "memory/resourceArea.hpp"
34 #include "nativeInst_ppc.hpp"
35 #include "oops/klass.inline.hpp"
36 #include "oops/methodData.hpp"
37 #include "prims/methodHandles.hpp"
38 #include "runtime/icache.hpp"
39 #include "runtime/interfaceSupport.inline.hpp"
40 #include "runtime/objectMonitor.hpp"
41 #include "runtime/os.hpp"
42 #include "runtime/safepoint.hpp"
43 #include "runtime/safepointMechanism.hpp"
44 #include "runtime/sharedRuntime.hpp"
45 #include "runtime/stubRoutines.hpp"
46 #include "runtime/vm_version.hpp"
47 #include "utilities/macros.hpp"
48 #include "utilities/powerOfTwo.hpp"
49
50 #ifdef PRODUCT
51 #define BLOCK_COMMENT(str) // nothing
52 #else
53 #define BLOCK_COMMENT(str) block_comment(str)
54 #endif
55 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
56
57 #ifdef ASSERT
2156 }
2157
2158 ld_largeoffset_unchecked(reg_scratch, destination_toc_offset, Rtoc, false);
2159 mtctr(reg_scratch);
2160 bctr();
2161
2162 const address stub_start_addr = addr_at(stub_start_offset);
2163
2164 // Assert that the encoded destination_toc_offset can be identified and that it is correct.
2165 assert(destination_toc_offset == NativeCallTrampolineStub_at(stub_start_addr)->destination_toc_offset(),
2166 "encoded offset into the constant pool must match");
2167 // Trampoline_stub_size should be good.
2168 assert((uint)(offset() - stub_start_offset) <= trampoline_stub_size, "should be good size");
2169 assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline");
2170
2171 // End the stub.
2172 end_a_stub();
2173 return stub;
2174 }
2175
2176 // TM on PPC64.
2177 void MacroAssembler::atomic_inc_ptr(Register addr, Register result, int simm16) {
2178 Label retry;
2179 bind(retry);
2180 ldarx(result, addr, /*hint*/ false);
2181 addi(result, result, simm16);
2182 stdcx_(result, addr);
2183 if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
2184 bne_predict_not_taken(CCR0, retry); // stXcx_ sets CCR0
2185 } else {
2186 bne( CCR0, retry); // stXcx_ sets CCR0
2187 }
2188 }
2189
2190 void MacroAssembler::atomic_ori_int(Register addr, Register result, int uimm16) {
2191 Label retry;
2192 bind(retry);
2193 lwarx(result, addr, /*hint*/ false);
2194 ori(result, result, uimm16);
2195 stwcx_(result, addr);
2196 if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
2197 bne_predict_not_taken(CCR0, retry); // stXcx_ sets CCR0
2198 } else {
2199 bne( CCR0, retry); // stXcx_ sets CCR0
2200 }
2201 }
2202
2203 #if INCLUDE_RTM_OPT
2204
2205 // Update rtm_counters based on abort status
2206 // input: abort_status
2207 // rtm_counters_Reg (RTMLockingCounters*)
2208 void MacroAssembler::rtm_counters_update(Register abort_status, Register rtm_counters_Reg) {
2209 // Mapping to keep PreciseRTMLockingStatistics similar to x86.
2210 // x86 ppc (! means inverted, ? means not the same)
2211 // 0 31 Set if abort caused by XABORT instruction.
2212 // 1 ! 7 If set, the transaction may succeed on a retry. This bit is always clear if bit 0 is set.
2213 // 2 13 Set if another logical processor conflicted with a memory address that was part of the transaction that aborted.
2214 // 3 10 Set if an internal buffer overflowed.
2215 // 4 ?12 Set if a debug breakpoint was hit.
2216 // 5 ?32 Set if an abort occurred during execution of a nested transaction.
2217 const int failure_bit[] = {tm_tabort, // Signal handler will set this too.
2218 tm_failure_persistent,
2219 tm_non_trans_cf,
2220 tm_trans_cf,
2221 tm_footprint_of,
2222 tm_failure_code,
2223 tm_transaction_level};
2224
2225 const int num_failure_bits = sizeof(failure_bit) / sizeof(int);
2226 const int num_counters = RTMLockingCounters::ABORT_STATUS_LIMIT;
2227
2228 const int bit2counter_map[][num_counters] =
2229 // 0 = no map; 1 = mapped, no inverted logic; -1 = mapped, inverted logic
2230 // Inverted logic means that if a bit is set don't count it, or vice-versa.
2231 // Care must be taken when mapping bits to counters as bits for a given
2232 // counter must be mutually exclusive. Otherwise, the counter will be
2233 // incremented more than once.
2234 // counters:
2235 // 0 1 2 3 4 5
2236 // abort , persist, conflict, overflow, debug , nested bits:
2237 {{ 1 , 0 , 0 , 0 , 0 , 0 }, // abort
2238 { 0 , -1 , 0 , 0 , 0 , 0 }, // failure_persistent
2239 { 0 , 0 , 1 , 0 , 0 , 0 }, // non_trans_cf
2240 { 0 , 0 , 1 , 0 , 0 , 0 }, // trans_cf
2241 { 0 , 0 , 0 , 1 , 0 , 0 }, // footprint_of
2242 { 0 , 0 , 0 , 0 , -1 , 0 }, // failure_code = 0xD4
2243 { 0 , 0 , 0 , 0 , 0 , 1 }}; // transaction_level > 1
2244 // ...
2245
2246 // Move abort_status value to R0 and use abort_status register as a
2247 // temporary register because R0 as third operand in ld/std is treated
2248 // as base address zero (value). Likewise, R0 as second operand in addi
2249 // is problematic because it amounts to li.
2250 const Register temp_Reg = abort_status;
2251 const Register abort_status_R0 = R0;
2252 mr(abort_status_R0, abort_status);
2253
2254 // Increment total abort counter.
2255 int counters_offs = RTMLockingCounters::abort_count_offset();
2256 ld(temp_Reg, counters_offs, rtm_counters_Reg);
2257 addi(temp_Reg, temp_Reg, 1);
2258 std(temp_Reg, counters_offs, rtm_counters_Reg);
2259
2260 // Increment specific abort counters.
2261 if (PrintPreciseRTMLockingStatistics) {
2262
2263 // #0 counter offset.
2264 int abortX_offs = RTMLockingCounters::abortX_count_offset();
2265
2266 for (int nbit = 0; nbit < num_failure_bits; nbit++) {
2267 for (int ncounter = 0; ncounter < num_counters; ncounter++) {
2268 if (bit2counter_map[nbit][ncounter] != 0) {
2269 Label check_abort;
2270 int abort_counter_offs = abortX_offs + (ncounter << 3);
2271
2272 if (failure_bit[nbit] == tm_transaction_level) {
2273 // Don't check outer transaction, TL = 1 (bit 63). Hence only
2274 // 11 bits in the TL field are checked to find out if failure
2275 // occurred in a nested transaction. This check also matches
2276 // the case when nesting_of = 1 (nesting overflow).
2277 rldicr_(temp_Reg, abort_status_R0, failure_bit[nbit], 10);
2278 } else if (failure_bit[nbit] == tm_failure_code) {
2279 // Check failure code for trap or illegal caught in TM.
2280 // Bits 0:7 are tested as bit 7 (persistent) is copied from
2281 // tabort or treclaim source operand.
2282 // On Linux: trap or illegal is TM_CAUSE_SIGNAL (0xD4).
2283 rldicl(temp_Reg, abort_status_R0, 8, 56);
2284 cmpdi(CCR0, temp_Reg, 0xD4);
2285 } else {
2286 rldicr_(temp_Reg, abort_status_R0, failure_bit[nbit], 0);
2287 }
2288
2289 if (bit2counter_map[nbit][ncounter] == 1) {
2290 beq(CCR0, check_abort);
2291 } else {
2292 bne(CCR0, check_abort);
2293 }
2294
2295 // We don't increment atomically.
2296 ld(temp_Reg, abort_counter_offs, rtm_counters_Reg);
2297 addi(temp_Reg, temp_Reg, 1);
2298 std(temp_Reg, abort_counter_offs, rtm_counters_Reg);
2299
2300 bind(check_abort);
2301 }
2302 }
2303 }
2304 }
2305 // Restore abort_status.
2306 mr(abort_status, abort_status_R0);
2307 }
2308
2309 // Branch if (random & (count-1) != 0), count is 2^n
2310 // tmp and CR0 are killed
2311 void MacroAssembler::branch_on_random_using_tb(Register tmp, int count, Label& brLabel) {
2312 mftb(tmp);
2313 andi_(tmp, tmp, count-1);
2314 bne(CCR0, brLabel);
2315 }
2316
2317 // Perform abort ratio calculation, set no_rtm bit if high ratio.
2318 // input: rtm_counters_Reg (RTMLockingCounters* address) - KILLED
2319 void MacroAssembler::rtm_abort_ratio_calculation(Register rtm_counters_Reg,
2320 RTMLockingCounters* rtm_counters,
2321 Metadata* method_data) {
2322 Label L_done, L_check_always_rtm1, L_check_always_rtm2;
2323
2324 if (RTMLockingCalculationDelay > 0) {
2325 // Delay calculation.
2326 ld(rtm_counters_Reg, (RegisterOrConstant)(intptr_t)RTMLockingCounters::rtm_calculation_flag_addr());
2327 cmpdi(CCR0, rtm_counters_Reg, 0);
2328 beq(CCR0, L_done);
2329 load_const_optimized(rtm_counters_Reg, (address)rtm_counters, R0); // reload
2330 }
2331 // Abort ratio calculation only if abort_count > RTMAbortThreshold.
2332 // Aborted transactions = abort_count * 100
2333 // All transactions = total_count * RTMTotalCountIncrRate
2334 // Set no_rtm bit if (Aborted transactions >= All transactions * RTMAbortRatio)
2335 ld(R0, RTMLockingCounters::abort_count_offset(), rtm_counters_Reg);
2336 if (is_simm(RTMAbortThreshold, 16)) { // cmpdi can handle 16bit immediate only.
2337 cmpdi(CCR0, R0, RTMAbortThreshold);
2338 blt(CCR0, L_check_always_rtm2); // reload of rtm_counters_Reg not necessary
2339 } else {
2340 load_const_optimized(rtm_counters_Reg, RTMAbortThreshold);
2341 cmpd(CCR0, R0, rtm_counters_Reg);
2342 blt(CCR0, L_check_always_rtm1); // reload of rtm_counters_Reg required
2343 }
2344 mulli(R0, R0, 100);
2345
2346 const Register tmpReg = rtm_counters_Reg;
2347 ld(tmpReg, RTMLockingCounters::total_count_offset(), rtm_counters_Reg);
2348 mulli(tmpReg, tmpReg, RTMTotalCountIncrRate); // allowable range: int16
2349 mulli(tmpReg, tmpReg, RTMAbortRatio); // allowable range: int16
2350 cmpd(CCR0, R0, tmpReg);
2351 blt(CCR0, L_check_always_rtm1); // jump to reload
2352 if (method_data != nullptr) {
2353 // Set rtm_state to "no rtm" in MDO.
2354 // Not using a metadata relocation. Method and Class Loader are kept alive anyway.
2355 // (See nmethod::metadata_do and CodeBuffer::finalize_oop_references.)
2356 load_const(R0, (address)method_data + in_bytes(MethodData::rtm_state_offset()), tmpReg);
2357 atomic_ori_int(R0, tmpReg, NoRTM);
2358 }
2359 b(L_done);
2360
2361 bind(L_check_always_rtm1);
2362 load_const_optimized(rtm_counters_Reg, (address)rtm_counters, R0); // reload
2363 bind(L_check_always_rtm2);
2364 ld(tmpReg, RTMLockingCounters::total_count_offset(), rtm_counters_Reg);
2365 int64_t thresholdValue = RTMLockingThreshold / RTMTotalCountIncrRate;
2366 if (is_simm(thresholdValue, 16)) { // cmpdi can handle 16bit immediate only.
2367 cmpdi(CCR0, tmpReg, thresholdValue);
2368 } else {
2369 load_const_optimized(R0, thresholdValue);
2370 cmpd(CCR0, tmpReg, R0);
2371 }
2372 blt(CCR0, L_done);
2373 if (method_data != nullptr) {
2374 // Set rtm_state to "always rtm" in MDO.
2375 // Not using a metadata relocation. See above.
2376 load_const(R0, (address)method_data + in_bytes(MethodData::rtm_state_offset()), tmpReg);
2377 atomic_ori_int(R0, tmpReg, UseRTM);
2378 }
2379 bind(L_done);
2380 }
2381
2382 // Update counters and perform abort ratio calculation.
2383 // input: abort_status_Reg
2384 void MacroAssembler::rtm_profiling(Register abort_status_Reg, Register temp_Reg,
2385 RTMLockingCounters* rtm_counters,
2386 Metadata* method_data,
2387 bool profile_rtm) {
2388
2389 assert(rtm_counters != nullptr, "should not be null when profiling RTM");
2390 // Update rtm counters based on state at abort.
2391 // Reads abort_status_Reg, updates flags.
2392 assert_different_registers(abort_status_Reg, temp_Reg);
2393 load_const_optimized(temp_Reg, (address)rtm_counters, R0);
2394 rtm_counters_update(abort_status_Reg, temp_Reg);
2395 if (profile_rtm) {
2396 assert(rtm_counters != nullptr, "should not be null when profiling RTM");
2397 rtm_abort_ratio_calculation(temp_Reg, rtm_counters, method_data);
2398 }
2399 }
2400
2401 // Retry on abort if abort's status indicates non-persistent failure.
2402 // inputs: retry_count_Reg
2403 // : abort_status_Reg
2404 // output: retry_count_Reg decremented by 1
2405 void MacroAssembler::rtm_retry_lock_on_abort(Register retry_count_Reg, Register abort_status_Reg,
2406 Label& retryLabel, Label* checkRetry) {
2407 Label doneRetry;
2408
2409 // Don't retry if failure is persistent.
2410 // The persistent bit is set when a (A) Disallowed operation is performed in
2411 // transactional state, like for instance trying to write the TFHAR after a
2412 // transaction is started; or when there is (B) a Nesting Overflow (too many
2413 // nested transactions); or when (C) the Footprint overflows (too many
2414 // addresses touched in TM state so there is no more space in the footprint
2415 // area to track them); or in case of (D) a Self-Induced Conflict, i.e. a
2416 // store is performed to a given address in TM state, then once in suspended
2417 // state the same address is accessed. Failure (A) is very unlikely to occur
2418 // in the JVM. Failure (D) will never occur because Suspended state is never
2419 // used in the JVM. Thus mostly (B) a Nesting Overflow or (C) a Footprint
2420 // Overflow will set the persistent bit.
2421 rldicr_(R0, abort_status_Reg, tm_failure_persistent, 0);
2422 bne(CCR0, doneRetry);
2423
2424 // Don't retry if transaction was deliberately aborted, i.e. caused by a
2425 // tabort instruction.
2426 rldicr_(R0, abort_status_Reg, tm_tabort, 0);
2427 bne(CCR0, doneRetry);
2428
2429 // Retry if transaction aborted due to a conflict with another thread.
2430 if (checkRetry) { bind(*checkRetry); }
2431 addic_(retry_count_Reg, retry_count_Reg, -1);
2432 blt(CCR0, doneRetry);
2433 b(retryLabel);
2434 bind(doneRetry);
2435 }
2436
2437 // Spin and retry if lock is busy.
2438 // inputs: owner_addr_Reg (monitor address)
2439 // : retry_count_Reg
2440 // output: retry_count_Reg decremented by 1
2441 // CTR is killed
2442 void MacroAssembler::rtm_retry_lock_on_busy(Register retry_count_Reg, Register owner_addr_Reg, Label& retryLabel) {
2443 Label SpinLoop, doneRetry, doRetry;
2444 addic_(retry_count_Reg, retry_count_Reg, -1);
2445 blt(CCR0, doneRetry);
2446
2447 if (RTMSpinLoopCount > 1) {
2448 li(R0, RTMSpinLoopCount);
2449 mtctr(R0);
2450 }
2451
2452 // low thread priority
2453 smt_prio_low();
2454 bind(SpinLoop);
2455
2456 if (RTMSpinLoopCount > 1) {
2457 bdz(doRetry);
2458 ld(R0, 0, owner_addr_Reg);
2459 cmpdi(CCR0, R0, 0);
2460 bne(CCR0, SpinLoop);
2461 }
2462
2463 bind(doRetry);
2464
2465 // restore thread priority to default in userspace
2466 #ifdef LINUX
2467 smt_prio_medium_low();
2468 #else
2469 smt_prio_medium();
2470 #endif
2471
2472 b(retryLabel);
2473
2474 bind(doneRetry);
2475 }
2476
2477 // Use RTM for normal stack locks.
2478 // Input: objReg (object to lock)
2479 void MacroAssembler::rtm_stack_locking(ConditionRegister flag,
2480 Register obj, Register mark_word, Register tmp,
2481 Register retry_on_abort_count_Reg,
2482 RTMLockingCounters* stack_rtm_counters,
2483 Metadata* method_data, bool profile_rtm,
2484 Label& DONE_LABEL, Label& IsInflated) {
2485 assert(UseRTMForStackLocks, "why call this otherwise?");
2486 Label L_rtm_retry, L_decrement_retry, L_on_abort;
2487
2488 if (RTMRetryCount > 0) {
2489 load_const_optimized(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort
2490 bind(L_rtm_retry);
2491 }
2492 andi_(R0, mark_word, markWord::monitor_value); // inflated vs stack-locked|neutral
2493 bne(CCR0, IsInflated);
2494
2495 if (PrintPreciseRTMLockingStatistics || profile_rtm) {
2496 Label L_noincrement;
2497 if (RTMTotalCountIncrRate > 1) {
2498 branch_on_random_using_tb(tmp, RTMTotalCountIncrRate, L_noincrement);
2499 }
2500 assert(stack_rtm_counters != nullptr, "should not be null when profiling RTM");
2501 load_const_optimized(tmp, (address)stack_rtm_counters->total_count_addr(), R0);
2502 //atomic_inc_ptr(tmp, /*temp, will be reloaded*/mark_word); We don't increment atomically
2503 ldx(mark_word, tmp);
2504 addi(mark_word, mark_word, 1);
2505 stdx(mark_word, tmp);
2506 bind(L_noincrement);
2507 }
2508 tbegin_();
2509 beq(CCR0, L_on_abort);
2510 ld(mark_word, oopDesc::mark_offset_in_bytes(), obj); // Reload in transaction, conflicts need to be tracked.
2511 andi(R0, mark_word, markWord::lock_mask_in_place); // look at 2 lock bits
2512 cmpwi(flag, R0, markWord::unlocked_value); // bits = 01 unlocked
2513 beq(flag, DONE_LABEL); // all done if unlocked
2514
2515 if (UseRTMXendForLockBusy) {
2516 tend_();
2517 b(L_decrement_retry);
2518 } else {
2519 tabort_();
2520 }
2521 bind(L_on_abort);
2522 const Register abort_status_Reg = tmp;
2523 mftexasr(abort_status_Reg);
2524 if (PrintPreciseRTMLockingStatistics || profile_rtm) {
2525 rtm_profiling(abort_status_Reg, /*temp*/mark_word, stack_rtm_counters, method_data, profile_rtm);
2526 }
2527 ld(mark_word, oopDesc::mark_offset_in_bytes(), obj); // reload
2528 if (RTMRetryCount > 0) {
2529 // Retry on lock abort if abort status is not permanent.
2530 rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry, &L_decrement_retry);
2531 } else {
2532 bind(L_decrement_retry);
2533 }
2534 }
2535
2536 // Use RTM for inflating locks
2537 // inputs: obj (object to lock)
2538 // mark_word (current header - KILLED)
2539 // boxReg (on-stack box address (displaced header location) - KILLED)
2540 void MacroAssembler::rtm_inflated_locking(ConditionRegister flag,
2541 Register obj, Register mark_word, Register boxReg,
2542 Register retry_on_busy_count_Reg, Register retry_on_abort_count_Reg,
2543 RTMLockingCounters* rtm_counters,
2544 Metadata* method_data, bool profile_rtm,
2545 Label& DONE_LABEL) {
2546 assert(UseRTMLocking, "why call this otherwise?");
2547 Label L_rtm_retry, L_decrement_retry, L_on_abort;
2548 // Clean monitor_value bit to get valid pointer.
2549 int owner_offset = in_bytes(ObjectMonitor::owner_offset()) - markWord::monitor_value;
2550
2551 // Store non-null, using boxReg instead of (intptr_t)markWord::unused_mark().
2552 std(boxReg, BasicLock::displaced_header_offset_in_bytes(), boxReg);
2553 const Register tmpReg = boxReg;
2554 const Register owner_addr_Reg = mark_word;
2555 addi(owner_addr_Reg, mark_word, owner_offset);
2556
2557 if (RTMRetryCount > 0) {
2558 load_const_optimized(retry_on_busy_count_Reg, RTMRetryCount); // Retry on lock busy.
2559 load_const_optimized(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort.
2560 bind(L_rtm_retry);
2561 }
2562 if (PrintPreciseRTMLockingStatistics || profile_rtm) {
2563 Label L_noincrement;
2564 if (RTMTotalCountIncrRate > 1) {
2565 branch_on_random_using_tb(R0, RTMTotalCountIncrRate, L_noincrement);
2566 }
2567 assert(rtm_counters != nullptr, "should not be null when profiling RTM");
2568 load_const(R0, (address)rtm_counters->total_count_addr(), tmpReg);
2569 //atomic_inc_ptr(R0, tmpReg); We don't increment atomically
2570 ldx(tmpReg, R0);
2571 addi(tmpReg, tmpReg, 1);
2572 stdx(tmpReg, R0);
2573 bind(L_noincrement);
2574 }
2575 tbegin_();
2576 beq(CCR0, L_on_abort);
2577 // We don't reload mark word. Will only be reset at safepoint.
2578 ld(R0, 0, owner_addr_Reg); // Load in transaction, conflicts need to be tracked.
2579 cmpdi(flag, R0, 0);
2580 beq(flag, DONE_LABEL);
2581
2582 if (UseRTMXendForLockBusy) {
2583 tend_();
2584 b(L_decrement_retry);
2585 } else {
2586 tabort_();
2587 }
2588 bind(L_on_abort);
2589 const Register abort_status_Reg = tmpReg;
2590 mftexasr(abort_status_Reg);
2591 if (PrintPreciseRTMLockingStatistics || profile_rtm) {
2592 rtm_profiling(abort_status_Reg, /*temp*/ owner_addr_Reg, rtm_counters, method_data, profile_rtm);
2593 // Restore owner_addr_Reg
2594 ld(mark_word, oopDesc::mark_offset_in_bytes(), obj);
2595 #ifdef ASSERT
2596 andi_(R0, mark_word, markWord::monitor_value);
2597 asm_assert_ne("must be inflated"); // Deflating only allowed at safepoint.
2598 #endif
2599 addi(owner_addr_Reg, mark_word, owner_offset);
2600 }
2601 if (RTMRetryCount > 0) {
2602 // Retry on lock abort if abort status is not permanent.
2603 rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry);
2604 }
2605
2606 // Appears unlocked - try to swing _owner from null to non-null.
2607 cmpxchgd(flag, /*current val*/ R0, (intptr_t)0, /*new val*/ R16_thread, owner_addr_Reg,
2608 MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
2609 MacroAssembler::cmpxchgx_hint_acquire_lock(), noreg, &L_decrement_retry, true);
2610
2611 if (RTMRetryCount > 0) {
2612 // success done else retry
2613 b(DONE_LABEL);
2614 bind(L_decrement_retry);
2615 // Spin and retry if lock is busy.
2616 rtm_retry_lock_on_busy(retry_on_busy_count_Reg, owner_addr_Reg, L_rtm_retry);
2617 } else {
2618 bind(L_decrement_retry);
2619 }
2620 }
2621
2622 #endif // INCLUDE_RTM_OPT
2623
2624 // "The box" is the space on the stack where we copy the object mark.
2625 void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register oop, Register box,
2626 Register temp, Register displaced_header, Register current_header,
2627 RTMLockingCounters* rtm_counters,
2628 RTMLockingCounters* stack_rtm_counters,
2629 Metadata* method_data,
2630 bool use_rtm, bool profile_rtm) {
2631 assert_different_registers(oop, box, temp, displaced_header, current_header);
2632 assert(LockingMode != LM_LIGHTWEIGHT || flag == CCR0, "bad condition register");
2633 Label object_has_monitor;
2634 Label cas_failed;
2635 Label success, failure;
2636
2637 // Load markWord from object into displaced_header.
2638 ld(displaced_header, oopDesc::mark_offset_in_bytes(), oop);
2639
2640 if (DiagnoseSyncOnValueBasedClasses != 0) {
2641 load_klass(temp, oop);
2642 lwz(temp, in_bytes(Klass::access_flags_offset()), temp);
2643 testbitdi(flag, R0, temp, exact_log2(JVM_ACC_IS_VALUE_BASED_CLASS));
2644 bne(flag, failure);
2645 }
2646
2647 #if INCLUDE_RTM_OPT
2648 if (UseRTMForStackLocks && use_rtm) {
2649 rtm_stack_locking(flag, oop, displaced_header, temp, /*temp*/ current_header,
2650 stack_rtm_counters, method_data, profile_rtm,
2651 success, object_has_monitor);
2652 }
2653 #endif // INCLUDE_RTM_OPT
2654
2655 // Handle existing monitor.
2656 // The object has an existing monitor iff (mark & monitor_value) != 0.
2657 andi_(temp, displaced_header, markWord::monitor_value);
2658 bne(CCR0, object_has_monitor);
2659
2660 if (LockingMode == LM_MONITOR) {
2661 // Set NE to indicate 'failure' -> take slow-path.
2662 crandc(flag, Assembler::equal, flag, Assembler::equal);
2663 b(failure);
2664 } else if (LockingMode == LM_LEGACY) {
2665 // Set displaced_header to be (markWord of object | UNLOCK_VALUE).
2666 ori(displaced_header, displaced_header, markWord::unlocked_value);
2667
2668 // Load Compare Value application register.
2669
2670 // Initialize the box. (Must happen before we update the object mark!)
2671 std(displaced_header, BasicLock::displaced_header_offset_in_bytes(), box);
2672
2673 // Must fence, otherwise, preceding store(s) may float below cmpxchg.
2674 // Compare object markWord with mark and if equal exchange scratch1 with object markWord.
2675 cmpxchgd(/*flag=*/flag,
2676 /*current_value=*/current_header,
2677 /*compare_value=*/displaced_header,
2678 /*exchange_value=*/box,
2679 /*where=*/oop,
2680 MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
2681 MacroAssembler::cmpxchgx_hint_acquire_lock(),
2682 noreg,
2683 &cas_failed,
2684 /*check without membar and ldarx first*/true);
2688 b(success);
2689
2690 bind(cas_failed);
2691 // We did not see an unlocked object so try the fast recursive case.
2692
2693 // Check if the owner is self by comparing the value in the markWord of object
2694 // (current_header) with the stack pointer.
2695 sub(current_header, current_header, R1_SP);
2696 load_const_optimized(temp, ~(os::vm_page_size()-1) | markWord::lock_mask_in_place);
2697
2698 and_(R0/*==0?*/, current_header, temp);
2699 // If condition is true we are cont and hence we can store 0 as the
2700 // displaced header in the box, which indicates that it is a recursive lock.
2701 std(R0/*==0, perhaps*/, BasicLock::displaced_header_offset_in_bytes(), box);
2702
2703 if (flag != CCR0) {
2704 mcrf(flag, CCR0);
2705 }
2706 beq(CCR0, success);
2707 b(failure);
2708 } else {
2709 assert(LockingMode == LM_LIGHTWEIGHT, "must be");
2710 lightweight_lock(oop, displaced_header, temp, failure);
2711 b(success);
2712 }
2713
2714 // Handle existing monitor.
2715 bind(object_has_monitor);
2716 // The object's monitor m is unlocked iff m->owner is null,
2717 // otherwise m->owner may contain a thread or a stack address.
2718
2719 #if INCLUDE_RTM_OPT
2720 // Use the same RTM locking code in 32- and 64-bit VM.
2721 if (use_rtm) {
2722 rtm_inflated_locking(flag, oop, displaced_header, box, temp, /*temp*/ current_header,
2723 rtm_counters, method_data, profile_rtm, success);
2724 bne(flag, failure);
2725 } else {
2726 #endif // INCLUDE_RTM_OPT
2727
2728 // Try to CAS m->owner from null to current thread.
2729 addi(temp, displaced_header, in_bytes(ObjectMonitor::owner_offset()) - markWord::monitor_value);
2730 cmpxchgd(/*flag=*/flag,
2731 /*current_value=*/current_header,
2732 /*compare_value=*/(intptr_t)0,
2733 /*exchange_value=*/R16_thread,
2734 /*where=*/temp,
2735 MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
2736 MacroAssembler::cmpxchgx_hint_acquire_lock());
2737
2738 if (LockingMode != LM_LIGHTWEIGHT) {
2739 // Store a non-null value into the box.
2740 std(box, BasicLock::displaced_header_offset_in_bytes(), box);
2741 }
2742 beq(flag, success);
2743
2744 // Check for recursive locking.
2745 cmpd(flag, current_header, R16_thread);
2746 bne(flag, failure);
2747
2748 // Current thread already owns the lock. Just increment recursions.
2749 Register recursions = displaced_header;
2750 ld(recursions, in_bytes(ObjectMonitor::recursions_offset() - ObjectMonitor::owner_offset()), temp);
2751 addi(recursions, recursions, 1);
2752 std(recursions, in_bytes(ObjectMonitor::recursions_offset() - ObjectMonitor::owner_offset()), temp);
2753
2754 #if INCLUDE_RTM_OPT
2755 } // use_rtm()
2756 #endif
2757
2758 // flag == EQ indicates success, increment held monitor count
2759 // flag == NE indicates failure
2760 bind(success);
2761 inc_held_monitor_count(temp);
2762 bind(failure);
2763 }
2764
2765 void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box,
2766 Register temp, Register displaced_header, Register current_header,
2767 bool use_rtm) {
2768 assert_different_registers(oop, box, temp, displaced_header, current_header);
2769 assert(LockingMode != LM_LIGHTWEIGHT || flag == CCR0, "bad condition register");
2770 Label success, failure, object_has_monitor, notRecursive;
2771
2772 #if INCLUDE_RTM_OPT
2773 if (UseRTMForStackLocks && use_rtm) {
2774 Label L_regular_unlock;
2775 ld(current_header, oopDesc::mark_offset_in_bytes(), oop); // fetch markword
2776 andi(R0, current_header, markWord::lock_mask_in_place); // look at 2 lock bits
2777 cmpwi(flag, R0, markWord::unlocked_value); // bits = 01 unlocked
2778 bne(flag, L_regular_unlock); // else RegularLock
2779 tend_(); // otherwise end...
2780 b(success); // ... and we're done
2781 bind(L_regular_unlock);
2782 }
2783 #endif
2784
2785 if (LockingMode == LM_LEGACY) {
2786 // Find the lock address and load the displaced header from the stack.
2787 ld(displaced_header, BasicLock::displaced_header_offset_in_bytes(), box);
2788
2789 // If the displaced header is 0, we have a recursive unlock.
2790 cmpdi(flag, displaced_header, 0);
2791 beq(flag, success);
2792 }
2793
2794 // Handle existing monitor.
2795 // The object has an existing monitor iff (mark & monitor_value) != 0.
2796 RTM_OPT_ONLY( if (!(UseRTMForStackLocks && use_rtm)) ) // skip load if already done
2797 ld(current_header, oopDesc::mark_offset_in_bytes(), oop);
2798 andi_(R0, current_header, markWord::monitor_value);
2799 bne(CCR0, object_has_monitor);
2800
2801 if (LockingMode == LM_MONITOR) {
2802 // Set NE to indicate 'failure' -> take slow-path.
2803 crandc(flag, Assembler::equal, flag, Assembler::equal);
2804 b(failure);
2805 } else if (LockingMode == LM_LEGACY) {
2806 // Check if it is still a light weight lock, this is is true if we see
2807 // the stack address of the basicLock in the markWord of the object.
2808 // Cmpxchg sets flag to cmpd(current_header, box).
2809 cmpxchgd(/*flag=*/flag,
2810 /*current_value=*/current_header,
2811 /*compare_value=*/box,
2812 /*exchange_value=*/displaced_header,
2813 /*where=*/oop,
2814 MacroAssembler::MemBarRel,
2815 MacroAssembler::cmpxchgx_hint_release_lock(),
2816 noreg,
2817 &failure);
2818 assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
2819 b(success);
2820 } else {
2821 assert(LockingMode == LM_LIGHTWEIGHT, "must be");
2822 lightweight_unlock(oop, current_header, failure);
2823 b(success);
2824 }
2825
2826 // Handle existing monitor.
2827 bind(object_has_monitor);
2828 STATIC_ASSERT(markWord::monitor_value <= INT_MAX);
2829 addi(current_header, current_header, -(int)markWord::monitor_value); // monitor
2830 ld(temp, in_bytes(ObjectMonitor::owner_offset()), current_header);
2831
2832 // It's inflated.
2833 #if INCLUDE_RTM_OPT
2834 if (use_rtm) {
2835 Label L_regular_inflated_unlock;
2836 // Clean monitor_value bit to get valid pointer
2837 cmpdi(flag, temp, 0);
2838 bne(flag, L_regular_inflated_unlock);
2839 tend_();
2840 b(success);
2841 bind(L_regular_inflated_unlock);
2842 }
2843 #endif
2844
2845 // In case of LM_LIGHTWEIGHT, we may reach here with (temp & ObjectMonitor::ANONYMOUS_OWNER) != 0.
2846 // This is handled like owner thread mismatches: We take the slow path.
2847 cmpd(flag, temp, R16_thread);
2848 bne(flag, failure);
2849
2850 ld(displaced_header, in_bytes(ObjectMonitor::recursions_offset()), current_header);
2851
2852 addic_(displaced_header, displaced_header, -1);
2853 blt(CCR0, notRecursive); // Not recursive if negative after decrement.
2854 std(displaced_header, in_bytes(ObjectMonitor::recursions_offset()), current_header);
2855 if (flag == CCR0) { // Otherwise, flag is already EQ, here.
2856 crorc(CCR0, Assembler::equal, CCR0, Assembler::equal); // Set CCR0 EQ
2857 }
2858 b(success);
2859
2860 bind(notRecursive);
2861 ld(temp, in_bytes(ObjectMonitor::EntryList_offset()), current_header);
2862 ld(displaced_header, in_bytes(ObjectMonitor::cxq_offset()), current_header);
2863 orr(temp, temp, displaced_header); // Will be 0 if both are 0.
2864 cmpdi(flag, temp, 0);
2865 bne(flag, failure);
2866 release();
2867 std(temp, in_bytes(ObjectMonitor::owner_offset()), current_header);
2868
2869 // flag == EQ indicates success, decrement held monitor count
2870 // flag == NE indicates failure
2871 bind(success);
2872 dec_held_monitor_count(temp);
2873 bind(failure);
2874 }
2875
2876 void MacroAssembler::safepoint_poll(Label& slow_path, Register temp, bool at_return, bool in_nmethod) {
2877 ld(temp, in_bytes(JavaThread::polling_word_offset()), R16_thread);
2878
2879 if (at_return) {
2880 if (in_nmethod) {
2881 if (UseSIGTRAP) {
2882 // Use Signal Handler.
2883 relocate(relocInfo::poll_return_type);
2884 td(traptoGreaterThanUnsigned, R1_SP, temp);
2885 } else {
2886 cmpld(CCR0, R1_SP, temp);
2887 // Stub may be out of range for short conditional branch.
2888 bc_far_optimized(Assembler::bcondCRbiIs1, bi0(CCR0, Assembler::greater), slow_path);
2889 }
2890 } else { // Not in nmethod.
2891 // Frame still on stack, need to get fp.
2892 Register fp = R0;
2893 ld(fp, _abi0(callers_sp), R1_SP);
2894 cmpld(CCR0, fp, temp);
2895 bgt(CCR0, slow_path);
4475 xori(tmp, tmp, markWord::unlocked_value); // flip unlocked bit
4476 andi_(R0, tmp, markWord::lock_mask_in_place);
4477 bne(CCR0, failed); // failed if new header doesn't contain locked_value (which is 0)
4478 } else {
4479 ldarx(tmp, obj, MacroAssembler::cmpxchgx_hint_release_lock());
4480 andi_(R0, tmp, markWord::lock_mask_in_place);
4481 bne(CCR0, failed); // failed if old header doesn't contain locked_value (which is 0)
4482 ori(tmp, tmp, markWord::unlocked_value); // set unlocked bit
4483 }
4484 stdcx_(tmp, obj);
4485 bne(CCR0, retry);
4486
4487 if (semantics & MemBarFenceAfter) {
4488 fence();
4489 } else if (semantics & MemBarAcq) {
4490 isync();
4491 }
4492 }
4493
4494 // Implements lightweight-locking.
4495 // Branches to slow upon failure to lock the object, with CCR0 NE.
4496 // Falls through upon success with CCR0 EQ.
4497 //
4498 // - obj: the object to be locked
4499 // - hdr: the header, already loaded from obj, will be destroyed
4500 // - t1: temporary register
4501 void MacroAssembler::lightweight_lock(Register obj, Register hdr, Register t1, Label& slow) {
4502 assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking");
4503 assert_different_registers(obj, hdr, t1);
4504
4505 // Check if we would have space on lock-stack for the object.
4506 lwz(t1, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread);
4507 cmplwi(CCR0, t1, LockStack::end_offset() - 1);
4508 bgt(CCR0, slow);
4509
4510 // Quick check: Do not reserve cache line for atomic update if not unlocked.
4511 // (Similar to contention_hint in cmpxchg solutions.)
4512 xori(R0, hdr, markWord::unlocked_value); // flip unlocked bit
4513 andi_(R0, R0, markWord::lock_mask_in_place);
4514 bne(CCR0, slow); // failed if new header doesn't contain locked_value (which is 0)
4515
4516 // Note: We're not publishing anything (like the displaced header in LM_LEGACY)
4517 // to other threads at this point. Hence, no release barrier, here.
4518 // (The obj has been written to the BasicObjectLock at obj_offset() within the own thread stack.)
4519 atomically_flip_locked_state(/* is_unlock */ false, obj, hdr, slow, MacroAssembler::MemBarAcq);
4520
4521 // After successful lock, push object on lock-stack
4522 stdx(obj, t1, R16_thread);
4523 addi(t1, t1, oopSize);
4524 stw(t1, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread);
4525 }
4526
4527 // Implements lightweight-unlocking.
4528 // Branches to slow upon failure, with CCR0 NE.
4529 // Falls through upon success, with CCR0 EQ.
4530 //
4531 // - obj: the object to be unlocked
4532 // - hdr: the (pre-loaded) header of the object, will be destroyed
4533 void MacroAssembler::lightweight_unlock(Register obj, Register hdr, Label& slow) {
4534 assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking");
4535 assert_different_registers(obj, hdr);
4536
4537 #ifdef ASSERT
4538 {
4539 // Check that hdr is fast-locked.
4540 Label hdr_ok;
4541 andi_(R0, hdr, markWord::lock_mask_in_place);
4542 beq(CCR0, hdr_ok);
4543 stop("Header is not fast-locked");
4544 bind(hdr_ok);
4545 }
4546 Register t1 = hdr; // Reuse in debug build.
4547 {
4548 // The following checks rely on the fact that LockStack is only ever modified by
4549 // its owning thread, even if the lock got inflated concurrently; removal of LockStack
4550 // entries after inflation will happen delayed in that case.
4551
4552 // Check for lock-stack underflow.
4553 Label stack_ok;
4554 lwz(t1, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread);
4555 cmplwi(CCR0, t1, LockStack::start_offset());
4556 bgt(CCR0, stack_ok);
4557 stop("Lock-stack underflow");
4558 bind(stack_ok);
4559 }
4560 {
4561 // Check if the top of the lock-stack matches the unlocked object.
4562 Label tos_ok;
4563 addi(t1, t1, -oopSize);
4564 ldx(t1, t1, R16_thread);
4565 cmpd(CCR0, t1, obj);
4566 beq(CCR0, tos_ok);
4567 stop("Top of lock-stack does not match the unlocked object");
4568 bind(tos_ok);
4569 }
4570 #endif
4571
4572 // Release the lock.
4573 atomically_flip_locked_state(/* is_unlock */ true, obj, hdr, slow, MacroAssembler::MemBarRel);
4574
4575 // After successful unlock, pop object from lock-stack
4576 Register t2 = hdr;
4577 lwz(t2, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread);
4578 addi(t2, t2, -oopSize);
4579 #ifdef ASSERT
4580 li(R0, 0);
4581 stdx(R0, t2, R16_thread);
4582 #endif
4583 stw(t2, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread);
4584 }
|
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
25
26 #include "precompiled.hpp"
27 #include "asm/macroAssembler.inline.hpp"
28 #include "compiler/disassembler.hpp"
29 #include "gc/shared/collectedHeap.inline.hpp"
30 #include "gc/shared/barrierSet.hpp"
31 #include "gc/shared/barrierSetAssembler.hpp"
32 #include "interpreter/interpreter.hpp"
33 #include "memory/resourceArea.hpp"
34 #include "nativeInst_ppc.hpp"
35 #include "oops/klass.inline.hpp"
36 #include "oops/methodData.hpp"
37 #include "prims/methodHandles.hpp"
38 #include "register_ppc.hpp"
39 #include "runtime/icache.hpp"
40 #include "runtime/interfaceSupport.inline.hpp"
41 #include "runtime/objectMonitor.hpp"
42 #include "runtime/os.hpp"
43 #include "runtime/safepoint.hpp"
44 #include "runtime/safepointMechanism.hpp"
45 #include "runtime/sharedRuntime.hpp"
46 #include "runtime/stubRoutines.hpp"
47 #include "runtime/vm_version.hpp"
48 #include "utilities/macros.hpp"
49 #include "utilities/powerOfTwo.hpp"
50
51 #ifdef PRODUCT
52 #define BLOCK_COMMENT(str) // nothing
53 #else
54 #define BLOCK_COMMENT(str) block_comment(str)
55 #endif
56 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
57
58 #ifdef ASSERT
2157 }
2158
2159 ld_largeoffset_unchecked(reg_scratch, destination_toc_offset, Rtoc, false);
2160 mtctr(reg_scratch);
2161 bctr();
2162
2163 const address stub_start_addr = addr_at(stub_start_offset);
2164
2165 // Assert that the encoded destination_toc_offset can be identified and that it is correct.
2166 assert(destination_toc_offset == NativeCallTrampolineStub_at(stub_start_addr)->destination_toc_offset(),
2167 "encoded offset into the constant pool must match");
2168 // Trampoline_stub_size should be good.
2169 assert((uint)(offset() - stub_start_offset) <= trampoline_stub_size, "should be good size");
2170 assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline");
2171
2172 // End the stub.
2173 end_a_stub();
2174 return stub;
2175 }
2176
2177 // "The box" is the space on the stack where we copy the object mark.
2178 void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register oop, Register box,
2179 Register temp, Register displaced_header, Register current_header) {
2180 assert(LockingMode != LM_LIGHTWEIGHT, "uses fast_lock_lightweight");
2181 assert_different_registers(oop, box, temp, displaced_header, current_header);
2182 Label object_has_monitor;
2183 Label cas_failed;
2184 Label success, failure;
2185
2186 // Load markWord from object into displaced_header.
2187 ld(displaced_header, oopDesc::mark_offset_in_bytes(), oop);
2188
2189 if (DiagnoseSyncOnValueBasedClasses != 0) {
2190 load_klass(temp, oop);
2191 lwz(temp, in_bytes(Klass::access_flags_offset()), temp);
2192 testbitdi(flag, R0, temp, exact_log2(JVM_ACC_IS_VALUE_BASED_CLASS));
2193 bne(flag, failure);
2194 }
2195
2196 // Handle existing monitor.
2197 // The object has an existing monitor iff (mark & monitor_value) != 0.
2198 andi_(temp, displaced_header, markWord::monitor_value);
2199 bne(CCR0, object_has_monitor);
2200
2201 if (LockingMode == LM_MONITOR) {
2202 // Set NE to indicate 'failure' -> take slow-path.
2203 crandc(flag, Assembler::equal, flag, Assembler::equal);
2204 b(failure);
2205 } else {
2206 assert(LockingMode == LM_LEGACY, "must be");
2207 // Set displaced_header to be (markWord of object | UNLOCK_VALUE).
2208 ori(displaced_header, displaced_header, markWord::unlocked_value);
2209
2210 // Load Compare Value application register.
2211
2212 // Initialize the box. (Must happen before we update the object mark!)
2213 std(displaced_header, BasicLock::displaced_header_offset_in_bytes(), box);
2214
2215 // Must fence, otherwise, preceding store(s) may float below cmpxchg.
2216 // Compare object markWord with mark and if equal exchange scratch1 with object markWord.
2217 cmpxchgd(/*flag=*/flag,
2218 /*current_value=*/current_header,
2219 /*compare_value=*/displaced_header,
2220 /*exchange_value=*/box,
2221 /*where=*/oop,
2222 MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
2223 MacroAssembler::cmpxchgx_hint_acquire_lock(),
2224 noreg,
2225 &cas_failed,
2226 /*check without membar and ldarx first*/true);
2230 b(success);
2231
2232 bind(cas_failed);
2233 // We did not see an unlocked object so try the fast recursive case.
2234
2235 // Check if the owner is self by comparing the value in the markWord of object
2236 // (current_header) with the stack pointer.
2237 sub(current_header, current_header, R1_SP);
2238 load_const_optimized(temp, ~(os::vm_page_size()-1) | markWord::lock_mask_in_place);
2239
2240 and_(R0/*==0?*/, current_header, temp);
2241 // If condition is true we are cont and hence we can store 0 as the
2242 // displaced header in the box, which indicates that it is a recursive lock.
2243 std(R0/*==0, perhaps*/, BasicLock::displaced_header_offset_in_bytes(), box);
2244
2245 if (flag != CCR0) {
2246 mcrf(flag, CCR0);
2247 }
2248 beq(CCR0, success);
2249 b(failure);
2250 }
2251
2252 // Handle existing monitor.
2253 bind(object_has_monitor);
2254 // The object's monitor m is unlocked iff m->owner is null,
2255 // otherwise m->owner may contain a thread or a stack address.
2256
2257 // Try to CAS m->owner from null to current thread.
2258 addi(temp, displaced_header, in_bytes(ObjectMonitor::owner_offset()) - markWord::monitor_value);
2259 cmpxchgd(/*flag=*/flag,
2260 /*current_value=*/current_header,
2261 /*compare_value=*/(intptr_t)0,
2262 /*exchange_value=*/R16_thread,
2263 /*where=*/temp,
2264 MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
2265 MacroAssembler::cmpxchgx_hint_acquire_lock());
2266
2267 // Store a non-null value into the box.
2268 std(box, BasicLock::displaced_header_offset_in_bytes(), box);
2269 beq(flag, success);
2270
2271 // Check for recursive locking.
2272 cmpd(flag, current_header, R16_thread);
2273 bne(flag, failure);
2274
2275 // Current thread already owns the lock. Just increment recursions.
2276 Register recursions = displaced_header;
2277 ld(recursions, in_bytes(ObjectMonitor::recursions_offset() - ObjectMonitor::owner_offset()), temp);
2278 addi(recursions, recursions, 1);
2279 std(recursions, in_bytes(ObjectMonitor::recursions_offset() - ObjectMonitor::owner_offset()), temp);
2280
2281 // flag == EQ indicates success, increment held monitor count
2282 // flag == NE indicates failure
2283 bind(success);
2284 inc_held_monitor_count(temp);
2285 bind(failure);
2286 }
2287
2288 void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box,
2289 Register temp, Register displaced_header, Register current_header) {
2290 assert(LockingMode != LM_LIGHTWEIGHT, "uses fast_unlock_lightweight");
2291 assert_different_registers(oop, box, temp, displaced_header, current_header);
2292 Label success, failure, object_has_monitor, notRecursive;
2293
2294 if (LockingMode == LM_LEGACY) {
2295 // Find the lock address and load the displaced header from the stack.
2296 ld(displaced_header, BasicLock::displaced_header_offset_in_bytes(), box);
2297
2298 // If the displaced header is 0, we have a recursive unlock.
2299 cmpdi(flag, displaced_header, 0);
2300 beq(flag, success);
2301 }
2302
2303 // Handle existing monitor.
2304 // The object has an existing monitor iff (mark & monitor_value) != 0.
2305 ld(current_header, oopDesc::mark_offset_in_bytes(), oop);
2306 andi_(R0, current_header, markWord::monitor_value);
2307 bne(CCR0, object_has_monitor);
2308
2309 if (LockingMode == LM_MONITOR) {
2310 // Set NE to indicate 'failure' -> take slow-path.
2311 crandc(flag, Assembler::equal, flag, Assembler::equal);
2312 b(failure);
2313 } else {
2314 assert(LockingMode == LM_LEGACY, "must be");
2315 // Check if it is still a light weight lock, this is is true if we see
2316 // the stack address of the basicLock in the markWord of the object.
2317 // Cmpxchg sets flag to cmpd(current_header, box).
2318 cmpxchgd(/*flag=*/flag,
2319 /*current_value=*/current_header,
2320 /*compare_value=*/box,
2321 /*exchange_value=*/displaced_header,
2322 /*where=*/oop,
2323 MacroAssembler::MemBarRel,
2324 MacroAssembler::cmpxchgx_hint_release_lock(),
2325 noreg,
2326 &failure);
2327 assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
2328 b(success);
2329 }
2330
2331 // Handle existing monitor.
2332 bind(object_has_monitor);
2333 STATIC_ASSERT(markWord::monitor_value <= INT_MAX);
2334 addi(current_header, current_header, -(int)markWord::monitor_value); // monitor
2335 ld(temp, in_bytes(ObjectMonitor::owner_offset()), current_header);
2336
2337 // In case of LM_LIGHTWEIGHT, we may reach here with (temp & ObjectMonitor::ANONYMOUS_OWNER) != 0.
2338 // This is handled like owner thread mismatches: We take the slow path.
2339 cmpd(flag, temp, R16_thread);
2340 bne(flag, failure);
2341
2342 ld(displaced_header, in_bytes(ObjectMonitor::recursions_offset()), current_header);
2343
2344 addic_(displaced_header, displaced_header, -1);
2345 blt(CCR0, notRecursive); // Not recursive if negative after decrement.
2346 std(displaced_header, in_bytes(ObjectMonitor::recursions_offset()), current_header);
2347 if (flag == CCR0) { // Otherwise, flag is already EQ, here.
2348 crorc(CCR0, Assembler::equal, CCR0, Assembler::equal); // Set CCR0 EQ
2349 }
2350 b(success);
2351
2352 bind(notRecursive);
2353 ld(temp, in_bytes(ObjectMonitor::EntryList_offset()), current_header);
2354 ld(displaced_header, in_bytes(ObjectMonitor::cxq_offset()), current_header);
2355 orr(temp, temp, displaced_header); // Will be 0 if both are 0.
2356 cmpdi(flag, temp, 0);
2357 bne(flag, failure);
2358 release();
2359 std(temp, in_bytes(ObjectMonitor::owner_offset()), current_header);
2360
2361 // flag == EQ indicates success, decrement held monitor count
2362 // flag == NE indicates failure
2363 bind(success);
2364 dec_held_monitor_count(temp);
2365 bind(failure);
2366 }
2367
2368 void MacroAssembler::compiler_fast_lock_lightweight_object(ConditionRegister flag, Register obj, Register tmp1,
2369 Register tmp2, Register tmp3) {
2370 assert_different_registers(obj, tmp1, tmp2, tmp3);
2371 assert(flag == CCR0, "bad condition register");
2372
2373 // Handle inflated monitor.
2374 Label inflated;
2375 // Finish fast lock successfully. MUST reach to with flag == NE
2376 Label locked;
2377 // Finish fast lock unsuccessfully. MUST branch to with flag == EQ
2378 Label slow_path;
2379
2380 if (DiagnoseSyncOnValueBasedClasses != 0) {
2381 load_klass(tmp1, obj);
2382 lwz(tmp1, in_bytes(Klass::access_flags_offset()), tmp1);
2383 testbitdi(flag, R0, tmp1, exact_log2(JVM_ACC_IS_VALUE_BASED_CLASS));
2384 bne(flag, slow_path);
2385 }
2386
2387 const Register mark = tmp1;
2388 const Register t = tmp3; // Usage of R0 allowed!
2389
2390 { // Lightweight locking
2391
2392 // Push lock to the lock stack and finish successfully. MUST reach to with flag == EQ
2393 Label push;
2394
2395 const Register top = tmp2;
2396
2397 // Check if lock-stack is full.
2398 lwz(top, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread);
2399 cmplwi(flag, top, LockStack::end_offset() - 1);
2400 bgt(flag, slow_path);
2401
2402 // The underflow check is elided. The recursive check will always fail
2403 // when the lock stack is empty because of the _bad_oop_sentinel field.
2404
2405 // Check if recursive.
2406 subi(t, top, oopSize);
2407 ldx(t, R16_thread, t);
2408 cmpd(flag, obj, t);
2409 beq(flag, push);
2410
2411 // Check for monitor (0b10) or locked (0b00).
2412 ld(mark, oopDesc::mark_offset_in_bytes(), obj);
2413 andi_(t, mark, markWord::lock_mask_in_place);
2414 cmpldi(flag, t, markWord::unlocked_value);
2415 bgt(flag, inflated);
2416 bne(flag, slow_path);
2417
2418 // Not inflated.
2419
2420 // Try to lock. Transition lock bits 0b00 => 0b01
2421 assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid a lea");
2422 atomically_flip_locked_state(/* is_unlock */ false, obj, mark, slow_path, MacroAssembler::MemBarAcq);
2423
2424 bind(push);
2425 // After successful lock, push object on lock-stack.
2426 stdx(obj, R16_thread, top);
2427 addi(top, top, oopSize);
2428 stw(top, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread);
2429 b(locked);
2430 }
2431
2432 { // Handle inflated monitor.
2433 bind(inflated);
2434
2435 // mark contains the tagged ObjectMonitor*.
2436 const Register tagged_monitor = mark;
2437 const uintptr_t monitor_tag = markWord::monitor_value;
2438 const Register owner_addr = tmp2;
2439
2440 // Compute owner address.
2441 addi(owner_addr, tagged_monitor, in_bytes(ObjectMonitor::owner_offset()) - monitor_tag);
2442
2443 // CAS owner (null => current thread).
2444 cmpxchgd(/*flag=*/flag,
2445 /*current_value=*/t,
2446 /*compare_value=*/(intptr_t)0,
2447 /*exchange_value=*/R16_thread,
2448 /*where=*/owner_addr,
2449 MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
2450 MacroAssembler::cmpxchgx_hint_acquire_lock());
2451 beq(flag, locked);
2452
2453 // Check if recursive.
2454 cmpd(flag, t, R16_thread);
2455 bne(flag, slow_path);
2456
2457 // Recursive.
2458 ld(tmp1, in_bytes(ObjectMonitor::recursions_offset() - ObjectMonitor::owner_offset()), owner_addr);
2459 addi(tmp1, tmp1, 1);
2460 std(tmp1, in_bytes(ObjectMonitor::recursions_offset() - ObjectMonitor::owner_offset()), owner_addr);
2461 }
2462
2463 bind(locked);
2464 inc_held_monitor_count(tmp1);
2465
2466 #ifdef ASSERT
2467 // Check that locked label is reached with flag == EQ.
2468 Label flag_correct;
2469 beq(flag, flag_correct);
2470 stop("Fast Lock Flag != EQ");
2471 #endif
2472 bind(slow_path);
2473 #ifdef ASSERT
2474 // Check that slow_path label is reached with flag == NE.
2475 bne(flag, flag_correct);
2476 stop("Fast Lock Flag != NE");
2477 bind(flag_correct);
2478 #endif
2479 // C2 uses the value of flag (NE vs EQ) to determine the continuation.
2480 }
2481
2482 void MacroAssembler::compiler_fast_unlock_lightweight_object(ConditionRegister flag, Register obj, Register tmp1,
2483 Register tmp2, Register tmp3) {
2484 assert_different_registers(obj, tmp1, tmp2, tmp3);
2485 assert(flag == CCR0, "bad condition register");
2486
2487 // Handle inflated monitor.
2488 Label inflated, inflated_load_monitor;
2489 // Finish fast unlock successfully. MUST reach to with flag == EQ.
2490 Label unlocked;
2491 // Finish fast unlock unsuccessfully. MUST branch to with flag == NE.
2492 Label slow_path;
2493
2494 const Register mark = tmp1;
2495 const Register top = tmp2;
2496 const Register t = tmp3;
2497
2498 { // Lightweight unlock
2499 Label push_and_slow;
2500
2501 // Check if obj is top of lock-stack.
2502 lwz(top, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread);
2503 subi(top, top, oopSize);
2504 ldx(t, R16_thread, top);
2505 cmpd(flag, obj, t);
2506 // Top of lock stack was not obj. Must be monitor.
2507 bne(flag, inflated_load_monitor);
2508
2509 // Pop lock-stack.
2510 DEBUG_ONLY(li(t, 0);)
2511 DEBUG_ONLY(stdx(t, R16_thread, top);)
2512 stw(top, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread);
2513
2514 // The underflow check is elided. The recursive check will always fail
2515 // when the lock stack is empty because of the _bad_oop_sentinel field.
2516
2517 // Check if recursive.
2518 subi(t, top, oopSize);
2519 ldx(t, R16_thread, t);
2520 cmpd(flag, obj, t);
2521 beq(flag, unlocked);
2522
2523 // Not recursive.
2524
2525 // Check for monitor (0b10).
2526 ld(mark, oopDesc::mark_offset_in_bytes(), obj);
2527 andi_(t, mark, markWord::monitor_value);
2528 bne(CCR0, inflated);
2529
2530 #ifdef ASSERT
2531 // Check header not unlocked (0b01).
2532 Label not_unlocked;
2533 andi_(t, mark, markWord::unlocked_value);
2534 beq(CCR0, not_unlocked);
2535 stop("lightweight_unlock already unlocked");
2536 bind(not_unlocked);
2537 #endif
2538
2539 // Try to unlock. Transition lock bits 0b00 => 0b01
2540 atomically_flip_locked_state(/* is_unlock */ true, obj, mark, push_and_slow, MacroAssembler::MemBarRel);
2541 b(unlocked);
2542
2543 bind(push_and_slow);
2544 // Restore lock-stack and handle the unlock in runtime.
2545 DEBUG_ONLY(stdx(obj, R16_thread, top);)
2546 addi(top, top, oopSize);
2547 stw(top, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread);
2548 b(slow_path);
2549 }
2550
2551 { // Handle inflated monitor.
2552 bind(inflated_load_monitor);
2553 ld(mark, oopDesc::mark_offset_in_bytes(), obj);
2554 #ifdef ASSERT
2555 andi_(t, mark, markWord::monitor_value);
2556 bne(CCR0, inflated);
2557 stop("Fast Unlock not monitor");
2558 #endif
2559
2560 bind(inflated);
2561
2562 #ifdef ASSERT
2563 Label check_done;
2564 subi(top, top, oopSize);
2565 cmplwi(CCR0, top, in_bytes(JavaThread::lock_stack_base_offset()));
2566 blt(CCR0, check_done);
2567 ldx(t, R16_thread, top);
2568 cmpd(flag, obj, t);
2569 bne(flag, inflated);
2570 stop("Fast Unlock lock on stack");
2571 bind(check_done);
2572 #endif
2573
2574 // mark contains the tagged ObjectMonitor*.
2575 const Register monitor = mark;
2576 const uintptr_t monitor_tag = markWord::monitor_value;
2577
2578 // Untag the monitor.
2579 subi(monitor, mark, monitor_tag);
2580
2581 const Register recursions = tmp2;
2582 Label not_recursive;
2583
2584 // Check if recursive.
2585 ld(recursions, in_bytes(ObjectMonitor::recursions_offset()), monitor);
2586 addic_(recursions, recursions, -1);
2587 blt(CCR0, not_recursive);
2588
2589 // Recursive unlock.
2590 std(recursions, in_bytes(ObjectMonitor::recursions_offset()), monitor);
2591 crorc(CCR0, Assembler::equal, CCR0, Assembler::equal);
2592 b(unlocked);
2593
2594 bind(not_recursive);
2595
2596 Label release_;
2597 const Register t2 = tmp2;
2598
2599 // Check if the entry lists are empty.
2600 ld(t, in_bytes(ObjectMonitor::EntryList_offset()), monitor);
2601 ld(t2, in_bytes(ObjectMonitor::cxq_offset()), monitor);
2602 orr(t, t, t2);
2603 cmpdi(flag, t, 0);
2604 beq(flag, release_);
2605
2606 // The owner may be anonymous and we removed the last obj entry in
2607 // the lock-stack. This loses the information about the owner.
2608 // Write the thread to the owner field so the runtime knows the owner.
2609 std(R16_thread, in_bytes(ObjectMonitor::owner_offset()), monitor);
2610 b(slow_path);
2611
2612 bind(release_);
2613 // Set owner to null.
2614 release();
2615 // t contains 0
2616 std(t, in_bytes(ObjectMonitor::owner_offset()), monitor);
2617 }
2618
2619 bind(unlocked);
2620 dec_held_monitor_count(t);
2621
2622 #ifdef ASSERT
2623 // Check that unlocked label is reached with flag == EQ.
2624 Label flag_correct;
2625 beq(flag, flag_correct);
2626 stop("Fast Lock Flag != EQ");
2627 #endif
2628 bind(slow_path);
2629 #ifdef ASSERT
2630 // Check that slow_path label is reached with flag == NE.
2631 bne(flag, flag_correct);
2632 stop("Fast Lock Flag != NE");
2633 bind(flag_correct);
2634 #endif
2635 // C2 uses the value of flag (NE vs EQ) to determine the continuation.
2636 }
2637
2638 void MacroAssembler::safepoint_poll(Label& slow_path, Register temp, bool at_return, bool in_nmethod) {
2639 ld(temp, in_bytes(JavaThread::polling_word_offset()), R16_thread);
2640
2641 if (at_return) {
2642 if (in_nmethod) {
2643 if (UseSIGTRAP) {
2644 // Use Signal Handler.
2645 relocate(relocInfo::poll_return_type);
2646 td(traptoGreaterThanUnsigned, R1_SP, temp);
2647 } else {
2648 cmpld(CCR0, R1_SP, temp);
2649 // Stub may be out of range for short conditional branch.
2650 bc_far_optimized(Assembler::bcondCRbiIs1, bi0(CCR0, Assembler::greater), slow_path);
2651 }
2652 } else { // Not in nmethod.
2653 // Frame still on stack, need to get fp.
2654 Register fp = R0;
2655 ld(fp, _abi0(callers_sp), R1_SP);
2656 cmpld(CCR0, fp, temp);
2657 bgt(CCR0, slow_path);
4237 xori(tmp, tmp, markWord::unlocked_value); // flip unlocked bit
4238 andi_(R0, tmp, markWord::lock_mask_in_place);
4239 bne(CCR0, failed); // failed if new header doesn't contain locked_value (which is 0)
4240 } else {
4241 ldarx(tmp, obj, MacroAssembler::cmpxchgx_hint_release_lock());
4242 andi_(R0, tmp, markWord::lock_mask_in_place);
4243 bne(CCR0, failed); // failed if old header doesn't contain locked_value (which is 0)
4244 ori(tmp, tmp, markWord::unlocked_value); // set unlocked bit
4245 }
4246 stdcx_(tmp, obj);
4247 bne(CCR0, retry);
4248
4249 if (semantics & MemBarFenceAfter) {
4250 fence();
4251 } else if (semantics & MemBarAcq) {
4252 isync();
4253 }
4254 }
4255
4256 // Implements lightweight-locking.
4257 //
4258 // - obj: the object to be locked
4259 // - t1, t2: temporary register
4260 void MacroAssembler::lightweight_lock(Register obj, Register t1, Register t2, Label& slow) {
4261 assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking");
4262 assert_different_registers(obj, t1, t2);
4263
4264 Label push;
4265 const Register top = t1;
4266 const Register mark = t2;
4267 const Register t = R0;
4268
4269 // Check if the lock-stack is full.
4270 lwz(top, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread);
4271 cmplwi(CCR0, top, LockStack::end_offset());
4272 bge(CCR0, slow);
4273
4274 // The underflow check is elided. The recursive check will always fail
4275 // when the lock stack is empty because of the _bad_oop_sentinel field.
4276
4277 // Check for recursion.
4278 subi(t, top, oopSize);
4279 ldx(t, R16_thread, t);
4280 cmpd(CCR0, obj, t);
4281 beq(CCR0, push);
4282
4283 // Check header for monitor (0b10) or locked (0b00).
4284 ld(mark, oopDesc::mark_offset_in_bytes(), obj);
4285 xori(t, mark, markWord::unlocked_value);
4286 andi_(t, t, markWord::lock_mask_in_place);
4287 bne(CCR0, slow);
4288
4289 // Try to lock. Transition lock bits 0b00 => 0b01
4290 atomically_flip_locked_state(/* is_unlock */ false, obj, mark, slow, MacroAssembler::MemBarAcq);
4291
4292 bind(push);
4293 // After successful lock, push object on lock-stack
4294 stdx(obj, R16_thread, top);
4295 addi(top, top, oopSize);
4296 stw(top, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread);
4297 }
4298
4299 // Implements lightweight-unlocking.
4300 //
4301 // - obj: the object to be unlocked
4302 // - t1: temporary register
4303 void MacroAssembler::lightweight_unlock(Register obj, Register t1, Label& slow) {
4304 assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking");
4305 assert_different_registers(obj, t1);
4306
4307 #ifdef ASSERT
4308 {
4309 // The following checks rely on the fact that LockStack is only ever modified by
4310 // its owning thread, even if the lock got inflated concurrently; removal of LockStack
4311 // entries after inflation will happen delayed in that case.
4312
4313 // Check for lock-stack underflow.
4314 Label stack_ok;
4315 lwz(t1, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread);
4316 cmplwi(CCR0, t1, LockStack::start_offset());
4317 bge(CCR0, stack_ok);
4318 stop("Lock-stack underflow");
4319 bind(stack_ok);
4320 }
4321 #endif
4322
4323 Label unlocked, push_and_slow;
4324 const Register top = t1;
4325 const Register mark = R0;
4326 Register t = R0;
4327
4328 // Check if obj is top of lock-stack.
4329 lwz(top, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread);
4330 subi(top, top, oopSize);
4331 ldx(t, R16_thread, top);
4332 cmpd(CCR0, obj, t);
4333 bne(CCR0, slow);
4334
4335 // Pop lock-stack.
4336 DEBUG_ONLY(li(t, 0);)
4337 DEBUG_ONLY(stdx(t, R16_thread, top);)
4338 stw(top, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread);
4339
4340 // The underflow check is elided. The recursive check will always fail
4341 // when the lock stack is empty because of the _bad_oop_sentinel field.
4342
4343 // Check if recursive.
4344 subi(t, top, oopSize);
4345 ldx(t, R16_thread, t);
4346 cmpd(CCR0, obj, t);
4347 beq(CCR0, unlocked);
4348
4349 // Use top as tmp
4350 t = top;
4351
4352 // Not recursive. Check header for monitor (0b10).
4353 ld(mark, oopDesc::mark_offset_in_bytes(), obj);
4354 andi_(t, mark, markWord::monitor_value);
4355 bne(CCR0, push_and_slow);
4356
4357 #ifdef ASSERT
4358 // Check header not unlocked (0b01).
4359 Label not_unlocked;
4360 andi_(t, mark, markWord::unlocked_value);
4361 beq(CCR0, not_unlocked);
4362 stop("lightweight_unlock already unlocked");
4363 bind(not_unlocked);
4364 #endif
4365
4366 // Try to unlock. Transition lock bits 0b00 => 0b01
4367 atomically_flip_locked_state(/* is_unlock */ true, obj, t, push_and_slow, MacroAssembler::MemBarRel);
4368 b(unlocked);
4369
4370 bind(push_and_slow);
4371
4372 // Restore lock-stack and handle the unlock in runtime.
4373 lwz(top, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread);
4374 DEBUG_ONLY(stdx(obj, R16_thread, top);)
4375 addi(top, top, oopSize);
4376 stw(top, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread);
4377 b(slow);
4378
4379 bind(unlocked);
4380 }
|