1 /*
   2  * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2018, 2022, Red Hat, Inc. All rights reserved.
   4  * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
   5  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   6  *
   7  * This code is free software; you can redistribute it and/or modify it
   8  * under the terms of the GNU General Public License version 2 only, as
   9  * published by the Free Software Foundation.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  *
  25  */
  26 
  27 #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
  28 #include "gc/shenandoah/mode/shenandoahMode.hpp"
  29 #include "gc/shenandoah/shenandoahBarrierSet.hpp"
  30 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
  31 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
  32 #include "gc/shenandoah/shenandoahHeapRegion.hpp"
  33 #include "gc/shenandoah/shenandoahRuntime.hpp"
  34 #include "gc/shenandoah/shenandoahThreadLocalData.hpp"
  35 #include "interpreter/interp_masm.hpp"
  36 #include "interpreter/interpreter.hpp"
  37 #include "runtime/javaThread.hpp"
  38 #include "runtime/sharedRuntime.hpp"
  39 #ifdef COMPILER1
  40 #include "c1/c1_LIRAssembler.hpp"
  41 #include "c1/c1_MacroAssembler.hpp"
  42 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
  43 #endif
  44 #ifdef COMPILER2
  45 #include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp"
  46 #include "opto/output.hpp"
  47 #endif
  48 
  49 #define __ masm->
  50 
  51 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
  52                                                        Register src, Register dst, Register count, RegSet saved_regs) {
  53   if (is_oop) {
  54     bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
  55     if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahLoadRefBarrier) {
  56 
  57       Label done;
  58 
  59       // Avoid calling runtime if count == 0
  60       __ cbz(count, done);
  61 
  62       // Is GC active?
  63       assert(!saved_regs.contains(rscratch1), "Sanity: about to clobber rscratch1");
  64       assert(!saved_regs.contains(rscratch2), "Sanity: about to clobber rscratch2");
  65       Address gc_state(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
  66       __ ldrb(rscratch1, gc_state);
  67       if (ShenandoahSATBBarrier && dest_uninitialized) {
  68         __ tbz(rscratch1, ShenandoahHeap::HAS_FORWARDED_BITPOS, done);
  69       } else {
  70         __ mov(rscratch2, ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::MARKING);
  71         __ tst(rscratch1, rscratch2);
  72         __ br(Assembler::EQ, done);
  73       }
  74 
  75       __ push_call_clobbered_registers();
  76       // If arguments are not in proper places, shuffle them.
  77       // Doing this via the stack is the most straight-forward way to avoid
  78       // accidentally smashing any register.
  79       if (c_rarg0 != src || c_rarg1 != dst || c_rarg2 != count) {
  80         __ push(RegSet::of(src), sp);
  81         __ push(RegSet::of(dst), sp);
  82         __ push(RegSet::of(count), sp);
  83         __ pop(RegSet::of(c_rarg2), sp);
  84         __ pop(RegSet::of(c_rarg1), sp);
  85         __ pop(RegSet::of(c_rarg0), sp);
  86       }
  87       address target = nullptr;
  88       if (UseCompressedOops) {
  89         target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop);
  90       } else {
  91         target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop);
  92       }
  93       __ call_VM_leaf(target, 3);
  94       __ pop_call_clobbered_registers();
  95       __ bind(done);
  96     }
  97   }
  98 }
  99 
 100 void ShenandoahBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
 101                                                        Register start, Register count, Register tmp) {
 102   if (ShenandoahCardBarrier && is_oop) {
 103     gen_write_ref_array_post_barrier(masm, decorators, start, count, tmp);
 104   }
 105 }
 106 
 107 void ShenandoahBarrierSetAssembler::satb_barrier(MacroAssembler* masm,
 108                                                  Register obj,
 109                                                  Register pre_val,
 110                                                  Register thread,
 111                                                  Register tmp1,
 112                                                  Register tmp2) {
 113   assert(ShenandoahSATBBarrier, "Should be checked by caller");
 114   assert(thread == rthread, "must be");
 115 
 116   Label done;
 117   Label runtime;
 118 
 119   assert_different_registers(obj, pre_val, tmp1, tmp2);
 120   assert(pre_val != noreg && tmp1 != noreg && tmp2 != noreg, "expecting a register");
 121 
 122   Address index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
 123   Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
 124 
 125   // Is marking active?
 126   Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
 127   __ ldrb(tmp1, gc_state);
 128   __ tbz(tmp1, ShenandoahHeap::MARKING_BITPOS, done);
 129 
 130   // Do we need to load the previous value?
 131   if (obj != noreg) {
 132     if (UseCompressedOops) {
 133       __ ldrw(pre_val, Address(obj, 0));
 134       __ decode_heap_oop(pre_val);
 135     } else {
 136       __ ldr(pre_val, Address(obj, 0));
 137     }
 138   }
 139 
 140   // Is the previous value null?
 141   __ cbz(pre_val, done);
 142 
 143   // Can we store original value in the thread's buffer?
 144   // Is index == 0?
 145   // (The index field is typed as size_t.)
 146 
 147   __ ldr(tmp1, index);                      // tmp := *index_adr
 148   __ cbz(tmp1, runtime);                    // tmp == 0?
 149                                         // If yes, goto runtime
 150 
 151   __ sub(tmp1, tmp1, wordSize);             // tmp := tmp - wordSize
 152   __ str(tmp1, index);                      // *index_adr := tmp
 153   __ ldr(tmp2, buffer);
 154   __ add(tmp1, tmp1, tmp2);                 // tmp := tmp + *buffer_adr
 155 
 156   // Record the previous value
 157   __ str(pre_val, Address(tmp1, 0));
 158   __ b(done);
 159 
 160   __ bind(runtime);
 161 
 162   // Slow-path call
 163   __ enter(/* strip_ret_addr = */ true);
 164   __ push_call_clobbered_registers();
 165   if (c_rarg0 != pre_val) {
 166     __ mov(c_rarg0, pre_val);
 167   }
 168   // Calling with super_call_VM_leaf with c_rarg0 bypasses interpreter checks and avoids any moves.
 169   __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre), c_rarg0);
 170   __ pop_call_clobbered_registers();
 171   __ leave();
 172 
 173   __ bind(done);
 174 }
 175 
 176 void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr, DecoratorSet decorators) {
 177   assert(ShenandoahLoadRefBarrier, "Should be enabled");
 178   assert(dst != rscratch2, "need rscratch2");
 179   assert_different_registers(load_addr.base(), load_addr.index(), rscratch1, rscratch2);
 180 
 181   bool is_strong  = ShenandoahBarrierSet::is_strong_access(decorators);
 182   bool is_weak    = ShenandoahBarrierSet::is_weak_access(decorators);
 183   bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
 184   bool is_native  = ShenandoahBarrierSet::is_native_access(decorators);
 185   bool is_narrow  = UseCompressedOops && !is_native;
 186 
 187   Label heap_stable, not_cset;
 188   Address gc_state(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
 189   __ ldrb(rscratch2, gc_state);
 190 
 191   // Check for heap stability
 192   if (is_strong) {
 193     __ tbz(rscratch2, ShenandoahHeap::HAS_FORWARDED_BITPOS, heap_stable);
 194   } else {
 195     Label lrb;
 196     __ tbnz(rscratch2, ShenandoahHeap::WEAK_ROOTS_BITPOS, lrb);
 197     __ tbz(rscratch2, ShenandoahHeap::HAS_FORWARDED_BITPOS, heap_stable);
 198     __ bind(lrb);
 199   }
 200 
 201   // use r1 for load address
 202   Register result_dst = dst;
 203   if (dst == r1) {
 204     __ mov(rscratch1, dst);
 205     dst = rscratch1;
 206   }
 207 
 208   // Save r0 and r1, unless it is an output register
 209   RegSet to_save = RegSet::of(r0, r1) - result_dst;
 210   __ push(to_save, sp);
 211   __ lea(r1, load_addr);
 212   __ mov(r0, dst);
 213 
 214   // Test for in-cset
 215   if (is_strong) {
 216     if (AOTCodeCache::is_on_for_dump()) {
 217       __ lea(rscratch2, ExternalAddress(AOTRuntimeConstants::cset_base_address()));
 218       __ ldr(rscratch2, Address(rscratch2));
 219       __ lea(rscratch1, ExternalAddress(AOTRuntimeConstants::grain_shift_address()));
 220       __ ldrw(rscratch1, Address(rscratch1));
 221       __ lsrv(rscratch1, r0, rscratch1);
 222     } else {
 223       __ mov(rscratch2, ShenandoahHeap::in_cset_fast_test_addr());
 224       __ lsr(rscratch1, r0, ShenandoahHeapRegion::region_size_bytes_shift_jint());
 225     }
 226     __ ldrb(rscratch2, Address(rscratch2, rscratch1));
 227     __ tbz(rscratch2, 0, not_cset);
 228   }
 229 
 230   // Slow-path call
 231   __ enter(/* strip_ret_addr = */ true);
 232   __ push_call_clobbered_registers();
 233   address target = nullptr;
 234   if (is_strong) {
 235     if (is_narrow) {
 236       target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow);
 237     } else {
 238       target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong);
 239     }
 240   } else if (is_weak) {
 241     if (is_narrow) {
 242       target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow);
 243     } else {
 244       target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak);
 245     }
 246   } else {
 247     assert(is_phantom, "only remaining strength");
 248     assert(!is_narrow, "phantom access cannot be narrow");
 249     target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom);
 250   }
 251   // Calling with super_call_VM_leaf with c_rarg0/1 bypasses interpreter checks and avoids any moves.
 252   __ super_call_VM_leaf(target, c_rarg0, c_rarg1);
 253   __ mov(rscratch1, r0);
 254   __ pop_call_clobbered_registers();
 255   __ mov(r0, rscratch1);
 256   __ leave();
 257 
 258   __ bind(not_cset);
 259 
 260   __ mov(result_dst, r0);
 261   __ pop(to_save, sp);
 262 
 263   __ bind(heap_stable);
 264 }
 265 
 266 //
 267 // Arguments:
 268 //
 269 // Inputs:
 270 //   src:        oop location to load from, might be clobbered
 271 //
 272 // Output:
 273 //   dst:        oop loaded from src location
 274 //
 275 // Kill:
 276 //   rscratch1 (scratch reg)
 277 //
 278 // Alias:
 279 //   dst: rscratch1 (might use rscratch1 as temporary output register to avoid clobbering src)
 280 //
 281 void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
 282                                             Register dst, Address src, Register tmp1, Register tmp2) {
 283   // 1: non-reference load, no additional barrier is needed
 284   if (!is_reference_type(type)) {
 285     BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp2);
 286     return;
 287   }
 288 
 289   // 2: load a reference from src location and apply LRB if needed
 290   if (ShenandoahBarrierSet::need_load_reference_barrier(decorators, type)) {
 291     Register result_dst = dst;
 292 
 293     // Preserve src location for LRB
 294     if (dst == src.base() || dst == src.index()) {
 295       dst = rscratch1;
 296     }
 297     assert_different_registers(dst, src.base(), src.index());
 298 
 299     BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp2);
 300 
 301     load_reference_barrier(masm, dst, src, decorators);
 302 
 303     if (dst != result_dst) {
 304       __ mov(result_dst, dst);
 305       dst = result_dst;
 306     }
 307   } else {
 308     BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp2);
 309   }
 310 
 311   // 3: apply keep-alive barrier if needed
 312   if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) {
 313     satb_barrier(masm /* masm */,
 314                  noreg /* obj */,
 315                  dst /* pre_val */,
 316                  rthread /* thread */,
 317                  tmp1 /* tmp1 */,
 318                  tmp2 /* tmp2 */);
 319   }
 320 }
 321 
 322 void ShenandoahBarrierSetAssembler::card_barrier(MacroAssembler* masm, Register obj) {
 323   assert(ShenandoahCardBarrier, "Should have been checked by caller");
 324 
 325   __ lsr(obj, obj, CardTable::card_shift());
 326 
 327   assert(CardTable::dirty_card_val() == 0, "must be");
 328 
 329   Address curr_ct_holder_addr(rthread, in_bytes(ShenandoahThreadLocalData::card_table_offset()));
 330   __ ldr(rscratch1, curr_ct_holder_addr);
 331 
 332   if (UseCondCardMark) {
 333     Label L_already_dirty;
 334     __ ldrb(rscratch2, Address(obj, rscratch1));
 335     __ cbz(rscratch2, L_already_dirty);
 336     __ strb(zr, Address(obj, rscratch1));
 337     __ bind(L_already_dirty);
 338   } else {
 339     __ strb(zr, Address(obj, rscratch1));
 340   }
 341 }
 342 
 343 void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
 344                                              Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
 345   // 1: non-reference types require no barriers
 346   if (!is_reference_type(type)) {
 347     BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3);
 348     return;
 349   }
 350 
 351   // Flatten object address right away for simplicity: likely needed by barriers
 352   if (dst.index() == noreg && dst.offset() == 0) {
 353     if (dst.base() != tmp3) {
 354       __ mov(tmp3, dst.base());
 355     }
 356   } else {
 357     __ lea(tmp3, dst);
 358   }
 359 
 360   // 2: pre-barrier: SATB needs the previous value
 361   if (ShenandoahBarrierSet::need_satb_barrier(decorators, type)) {
 362     satb_barrier(masm,
 363                  tmp3 /* obj */,
 364                  tmp2 /* pre_val */,
 365                  rthread /* thread */,
 366                  tmp1 /* tmp */,
 367                  rscratch1 /* tmp2 */);
 368   }
 369 
 370   // Store!
 371   BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg, noreg);
 372 
 373   // 3: post-barrier: card barrier needs store address
 374   bool storing_non_null = (val != noreg);
 375   if (ShenandoahBarrierSet::need_card_barrier(decorators, type) && storing_non_null) {
 376     card_barrier(masm, tmp3);
 377   }
 378 }
 379 
 380 void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
 381                                                                   Register obj, Register tmp, Label& slowpath) {
 382   Label done;
 383   // Resolve jobject
 384   BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, obj, tmp, slowpath);
 385 
 386   // Check for null.
 387   __ cbz(obj, done);
 388 
 389   assert(obj != rscratch2, "need rscratch2");
 390   Address gc_state(jni_env, ShenandoahThreadLocalData::gc_state_offset() - JavaThread::jni_environment_offset());
 391   __ lea(rscratch2, gc_state);
 392   __ ldrb(rscratch2, Address(rscratch2));
 393 
 394   // Check for heap in evacuation phase
 395   __ tbnz(rscratch2, ShenandoahHeap::EVACUATION_BITPOS, slowpath);
 396 
 397   __ bind(done);
 398 }
 399 
 400 void ShenandoahBarrierSetAssembler::try_peek_weak_handle_in_nmethod(MacroAssembler* masm, Register weak_handle, Register obj,
 401                                                                     Register tmp, Label& slow_path) {
 402   assert_different_registers(weak_handle, tmp, noreg);
 403   assert_different_registers(obj, tmp, noreg);
 404 
 405   Label done;
 406 
 407   // Peek weak handle using the standard implementation.
 408   BarrierSetAssembler::try_peek_weak_handle_in_nmethod(masm, weak_handle, obj, tmp, slow_path);
 409 
 410   // Check if the reference is null, and if it is, take the fast path.
 411   __ cbz(obj, done);
 412 
 413   Address gc_state(rthread, ShenandoahThreadLocalData::gc_state_offset());
 414   __ lea(tmp, gc_state);
 415   __ ldrb(tmp, __ legitimize_address(gc_state, 1, tmp));
 416 
 417   // Check if the heap is under weak-reference/roots processing, in
 418   // which case we need to take the slow path.
 419   __ tbnz(tmp, ShenandoahHeap::WEAK_ROOTS_BITPOS, slow_path);
 420   __ bind(done);
 421 }
 422 
 423 void ShenandoahBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
 424                                                                      Register start, Register count, Register scratch) {
 425   assert(ShenandoahCardBarrier, "Should have been checked by caller");
 426 
 427   Label L_loop, L_done;
 428   const Register end = count;
 429 
 430   // Zero count? Nothing to do.
 431   __ cbz(count, L_done);
 432 
 433   // end = start + count << LogBytesPerHeapOop
 434   // last element address to make inclusive
 435   __ lea(end, Address(start, count, Address::lsl(LogBytesPerHeapOop)));
 436   __ sub(end, end, BytesPerHeapOop);
 437   __ lsr(start, start, CardTable::card_shift());
 438   __ lsr(end, end, CardTable::card_shift());
 439 
 440   // number of bytes to copy
 441   __ sub(count, end, start);
 442 
 443   Address curr_ct_holder_addr(rthread, in_bytes(ShenandoahThreadLocalData::card_table_offset()));
 444   __ ldr(scratch, curr_ct_holder_addr);
 445   __ add(start, start, scratch);
 446   __ bind(L_loop);
 447   __ strb(zr, Address(start, count));
 448   __ subs(count, count, 1);
 449   __ br(Assembler::GE, L_loop);
 450   __ bind(L_done);
 451 }
 452 
 453 #undef __
 454 
 455 #ifdef COMPILER1
 456 
 457 #define __ ce->masm()->
 458 
 459 void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) {
 460   ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
 461   // At this point we know that marking is in progress.
 462   // If do_load() is true then we have to emit the
 463   // load of the previous value; otherwise it has already
 464   // been loaded into _pre_val.
 465 
 466   __ bind(*stub->entry());
 467 
 468   assert(stub->pre_val()->is_register(), "Precondition.");
 469 
 470   Register pre_val_reg = stub->pre_val()->as_register();
 471 
 472   if (stub->do_load()) {
 473     ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/);
 474   }
 475   __ cbz(pre_val_reg, *stub->continuation());
 476   ce->store_parameter(stub->pre_val()->as_register(), 0);
 477   __ far_call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin()));
 478   __ b(*stub->continuation());
 479 }
 480 
 481 void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub) {
 482   ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
 483   __ bind(*stub->entry());
 484 
 485   DecoratorSet decorators = stub->decorators();
 486   bool is_strong  = ShenandoahBarrierSet::is_strong_access(decorators);
 487   bool is_weak    = ShenandoahBarrierSet::is_weak_access(decorators);
 488   bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
 489   bool is_native  = ShenandoahBarrierSet::is_native_access(decorators);
 490 
 491   Register obj = stub->obj()->as_register();
 492   Register res = stub->result()->as_register();
 493   Register addr = stub->addr()->as_pointer_register();
 494   Register tmp1 = stub->tmp1()->as_register();
 495   Register tmp2 = stub->tmp2()->as_register();
 496 
 497   assert(res == r0, "result must arrive in r0");
 498 
 499   if (res != obj) {
 500     __ mov(res, obj);
 501   }
 502 
 503   if (is_strong) {
 504     // Check for object in cset.
 505     if (AOTCodeCache::is_on_for_dump()) {
 506       __ lea(tmp2, ExternalAddress(AOTRuntimeConstants::cset_base_address()));
 507       __ ldr(tmp2, Address(tmp2));
 508       __ lea(tmp1, ExternalAddress(AOTRuntimeConstants::grain_shift_address()));
 509       __ ldrw(tmp1, Address(tmp1));
 510       __ lsrv(tmp1, res, tmp1);
 511     } else {
 512       __ mov(tmp2, ShenandoahHeap::in_cset_fast_test_addr());
 513       __ lsr(tmp1, res, ShenandoahHeapRegion::region_size_bytes_shift_jint());
 514     }
 515     __ ldrb(tmp2, Address(tmp2, tmp1));
 516     __ cbz(tmp2, *stub->continuation());
 517   }
 518 
 519   ce->store_parameter(res, 0);
 520   ce->store_parameter(addr, 1);
 521   if (is_strong) {
 522     if (is_native) {
 523       __ far_call(RuntimeAddress(bs->load_reference_barrier_strong_native_rt_code_blob()->code_begin()));
 524     } else {
 525       __ far_call(RuntimeAddress(bs->load_reference_barrier_strong_rt_code_blob()->code_begin()));
 526     }
 527   } else if (is_weak) {
 528     __ far_call(RuntimeAddress(bs->load_reference_barrier_weak_rt_code_blob()->code_begin()));
 529   } else {
 530     assert(is_phantom, "only remaining strength");
 531     __ far_call(RuntimeAddress(bs->load_reference_barrier_phantom_rt_code_blob()->code_begin()));
 532   }
 533 
 534   __ b(*stub->continuation());
 535 }
 536 
 537 #undef __
 538 
 539 #define __ sasm->
 540 
 541 void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
 542   __ prologue("shenandoah_pre_barrier", false);
 543 
 544   // arg0 : previous value of memory
 545 
 546   BarrierSet* bs = BarrierSet::barrier_set();
 547 
 548   const Register pre_val = r0;
 549   const Register thread = rthread;
 550   const Register tmp = rscratch1;
 551 
 552   Address queue_index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
 553   Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
 554 
 555   Label done;
 556   Label runtime;
 557 
 558   // Is marking still active?
 559   Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
 560   __ ldrb(tmp, gc_state);
 561   __ tbz(tmp, ShenandoahHeap::MARKING_BITPOS, done);
 562 
 563   // Can we store original value in the thread's buffer?
 564   __ ldr(tmp, queue_index);
 565   __ cbz(tmp, runtime);
 566 
 567   __ sub(tmp, tmp, wordSize);
 568   __ str(tmp, queue_index);
 569   __ ldr(rscratch2, buffer);
 570   __ add(tmp, tmp, rscratch2);
 571   __ load_parameter(0, rscratch2);
 572   __ str(rscratch2, Address(tmp, 0));
 573   __ b(done);
 574 
 575   __ bind(runtime);
 576   __ push_call_clobbered_registers();
 577   __ load_parameter(0, pre_val);
 578   __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre), pre_val);
 579   __ pop_call_clobbered_registers();
 580   __ bind(done);
 581 
 582   __ epilogue();
 583 }
 584 
 585 void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, DecoratorSet decorators) {
 586   __ prologue("shenandoah_load_reference_barrier", false);
 587   // arg0 : object to be resolved
 588 
 589   __ push_call_clobbered_registers();
 590   __ load_parameter(0, r0);
 591   __ load_parameter(1, r1);
 592 
 593   bool is_strong  = ShenandoahBarrierSet::is_strong_access(decorators);
 594   bool is_weak    = ShenandoahBarrierSet::is_weak_access(decorators);
 595   bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
 596   bool is_native  = ShenandoahBarrierSet::is_native_access(decorators);
 597   if (is_strong) {
 598     if (is_native) {
 599       __ lea(lr, RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong)));
 600     } else {
 601       if (UseCompressedOops) {
 602         __ lea(lr, RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow)));
 603       } else {
 604         __ lea(lr, RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong)));
 605       }
 606     }
 607   } else if (is_weak) {
 608     assert(!is_native, "weak must not be called off-heap");
 609     if (UseCompressedOops) {
 610       __ lea(lr, RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow)));
 611     } else {
 612       __ lea(lr, RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak)));
 613     }
 614   } else {
 615     assert(is_phantom, "only remaining strength");
 616     assert(is_native, "phantom must only be called off-heap");
 617     __ lea(lr, RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom)));
 618   }
 619   __ blr(lr);
 620   __ mov(rscratch1, r0);
 621   __ pop_call_clobbered_registers();
 622   __ mov(r0, rscratch1);
 623 
 624   __ epilogue();
 625 }
 626 
 627 #undef __
 628 
 629 #endif // COMPILER1
 630 
 631 #ifdef COMPILER2
 632 
 633 #undef __
 634 #define __ masm->
 635 
 636 
 637 void ShenandoahBarrierSetAssembler::load_c2(const MachNode* node, MacroAssembler* masm, Register dst, Address src, Register tmp1, Register tmp2, bool is_narrow, bool is_acquire) {
 638   // Do the actual load. This load is the candidate for implicit null check, and MUST come first.
 639   if (is_narrow) {
 640     if (is_acquire) {
 641       assert(src.getMode() == Address::base_plus_offset && src.offset() == 0,
 642           "is_acquire path requires address to be base-only");
 643       __ ldarw(dst, src.base());
 644     } else {
 645       __ ldrw(dst, src);
 646     }
 647   } else {
 648     if (is_acquire) {
 649       assert(src.getMode() == Address::base_plus_offset && src.offset() == 0,
 650           "is_acquire path requires address to be base-only");
 651       __ ldar(dst, src.base());
 652     } else {
 653       __ ldr(dst, src);
 654     }
 655   }
 656 
 657   ShenandoahBarrierStubC2::load_post(masm, node, dst, src, tmp1, tmp2, is_narrow);
 658 }
 659 
 660 void ShenandoahBarrierSetAssembler::store_c2(const MachNode* node, MacroAssembler* masm, Address dst, bool dst_narrow,
 661     Register src, bool src_narrow, Register tmp1, Register tmp2, Register tmp3, bool is_volatile) {
 662 
 663   ShenandoahBarrierStubC2::store_pre(masm, node, dst, tmp1, tmp2, tmp3, dst_narrow);
 664 
 665   // Do the actual store
 666   if (dst_narrow) {
 667     if (!src_narrow) {
 668       // Need to encode into rscratch, because we cannot clobber src.
 669       if ((node->barrier_data() & ShenandoahBitNotNull) == 0) {
 670         __ encode_heap_oop(tmp2, src);
 671       } else {
 672         __ encode_heap_oop_not_null(tmp2, src);
 673       }
 674       src = tmp2;
 675     }
 676 
 677     if (is_volatile) {
 678       assert(dst.getMode() == Address::base_plus_offset && dst.offset() == 0,
 679           "is_acquire path requires address to be base-only");
 680       __ stlrw(src, dst.base());
 681     } else {
 682       __ strw(src, dst);
 683     }
 684   } else {
 685     if (is_volatile) {
 686       assert(dst.getMode() == Address::base_plus_offset && dst.offset() == 0,
 687           "is_acquire path requires address to be base-only");
 688       __ stlr(src, dst.base());
 689     } else {
 690       __ str(src, dst);
 691     }
 692   }
 693 
 694   ShenandoahBarrierStubC2::store_post(masm, node, dst, tmp2, tmp3);
 695 }
 696 
 697 void ShenandoahBarrierSetAssembler::compare_and_set_c2(const MachNode* node, MacroAssembler* masm, Register res, Register addr,
 698     Register oldval, Register newval, Register tmp1, Register tmp2, Register tmp3, bool exchange, bool narrow, bool weak, bool acquire) {
 699   Assembler::operand_size op_size = narrow ? Assembler::word : Assembler::xword;
 700 
 701   ShenandoahBarrierStubC2::load_store_pre(masm, node, addr, tmp1, tmp2, tmp3, narrow);
 702 
 703   // CAS!
 704   __ cmpxchg(addr, oldval, newval, op_size, acquire, /* release */ true, weak, exchange ? res : noreg);
 705 
 706   // If we need a boolean result out of CAS, set the flag appropriately and promote the result.
 707   if (!exchange) {
 708     assert(res != noreg, "need result register");
 709     __ cset(res, Assembler::EQ);
 710   }
 711 
 712   ShenandoahBarrierStubC2::load_store_post(masm, node, Address(addr, 0), tmp2, tmp3);
 713 }
 714 
 715 void ShenandoahBarrierSetAssembler::get_and_set_c2(const MachNode* node, MacroAssembler* masm, Register preval,
 716     Register newval, Register addr, Register tmp1, Register tmp2, Register tmp3, bool is_acquire) {
 717   bool is_narrow = node->bottom_type()->isa_narrowoop();
 718 
 719   ShenandoahBarrierStubC2::load_store_pre(masm, node, addr, tmp1, tmp2, tmp3, is_narrow);
 720 
 721   if (is_narrow) {
 722     if (is_acquire) {
 723       __ atomic_xchgalw(preval, newval, addr);
 724     } else {
 725       __ atomic_xchgw(preval, newval, addr);
 726     }
 727   } else {
 728     if (is_acquire) {
 729       __ atomic_xchgal(preval, newval, addr);
 730     } else {
 731       __ atomic_xchg(preval, newval, addr);
 732     }
 733   }
 734 
 735   ShenandoahBarrierStubC2::load_store_post(masm, node, Address(addr, 0), tmp2, tmp3);
 736 }
 737 
 738 #undef __
 739 #define __ masm.
 740 
 741 void ShenandoahBarrierStubC2::cardtable(MacroAssembler& masm, Address address, Register tmp1, Register tmp2) {
 742   assert(CardTable::dirty_card_val() == 0, "must be");
 743   Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
 744 
 745   // tmp1 = card table base (holder)
 746   Address curr_ct_holder_addr(rthread, in_bytes(ShenandoahThreadLocalData::card_table_offset()));
 747   __ ldr(tmp1, curr_ct_holder_addr);
 748 
 749   // tmp2 = effective address
 750   __ lea(tmp2, address);
 751 
 752   // tmp2 = &card_table[ addr >> CardTable::card_shift() ] ; card index
 753   __ add(tmp2, tmp1, tmp2, Assembler::LSR, CardTable::card_shift());
 754 
 755   if (UseCondCardMark) {
 756     Label L_already_dirty;
 757     __ ldrb(tmp1, Address(tmp2));
 758     __ cbz(tmp1, L_already_dirty);
 759     __ strb(zr, Address(tmp2));
 760     __ bind(L_already_dirty);
 761   } else {
 762     __ strb(zr, Address(tmp2));
 763   }
 764 }
 765 
 766 void ShenandoahBarrierStubC2::enter_if_gc_state(MacroAssembler& masm, const char test_state, Register tmp) {
 767   Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
 768   PhaseOutput* const output = Compile::current()->output();
 769   Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(test_state)));
 770 
 771   // We piggyback on scratch_emit_size mode to compute the slowpath stub size.
 772   // We'll use that information to decide whether we need a far jump to the
 773   // stub entry point or not. In scratch_emit_size mode we don't bind entry()
 774   // because otherwise it will be rebound when we later emit the instructions
 775   // for real.
 776   if (_needs_far_jump) {
 777     __ ldrb(tmp, gc_state_fast);
 778     __ cbz(tmp, *continuation());
 779     __ b(output->in_scratch_emit_size() ? *continuation() : *entry());
 780   } else {
 781     __ ldrb(tmp, gc_state_fast);
 782     __ cbnz(tmp, output->in_scratch_emit_size() ? *continuation() : *entry());
 783   }
 784 
 785   // This is were the slowpath stub will return to or the code above will
 786   // jump to if the checks are false
 787   __ bind(*continuation());
 788 }
 789 
 790 void ShenandoahBarrierStubC2::emit_code(MacroAssembler& masm) {
 791   Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
 792   assert(_needs_keep_alive_barrier || _needs_load_ref_barrier, "Why are you here?");
 793   PhaseOutput* const output = Compile::current()->output();
 794 
 795   // We piggyback on scratch_emit_size mode to compute the slowpath stub size.
 796   // We'll use that information to decide whether we need a far jump to the
 797   // stub entry point or not. In scratch_emit_size mode we don't bind entry()
 798   // because otherwise it will be rebound when we later emit the instructions
 799   // for real.
 800   if (!output->in_scratch_emit_size()) {
 801     __ bind(*entry());
 802   }
 803 
 804   // If we need to load ourselves, do it here.
 805   if (_do_load) {
 806     if (_narrow) {
 807       __ ldrw(_obj, _addr);
 808     } else {
 809       __ ldr(_obj, _addr);
 810     }
 811   }
 812 
 813   // If the object is null, there is no point in applying barriers.
 814   maybe_far_jump_if_zero(masm, _obj);
 815 
 816   // We need to make sure that loads done by callers survive across slow-path calls.
 817   // For self-loads, we need to care about the case when both KA and LRB are enabled (rare).
 818   bool needs_both_barriers = _needs_keep_alive_barrier && _needs_load_ref_barrier;
 819   if (!_do_load || needs_both_barriers) {
 820     preserve(_obj);
 821   }
 822 
 823   // Go for barriers. Barriers can return straight to continuation, as long
 824   // as another barrier is not needed and we can reach the fastpath.
 825   if (needs_both_barriers) {
 826     // The Load match rule in the .ad file may have legitimized the load
 827     // address using a TEMP register and in that case we need to explicitly
 828     // preserve them here, because the RA does not consider TEMP as live-in,
 829     // and the KA runtime call may clobber them and cause a crash on the
 830     // subsequent LRB stub.
 831     if (_addr.base() != noreg) {
 832       preserve(_addr.base());
 833     }
 834     if (_addr.index() != noreg) {
 835       preserve(_addr.index());
 836     }
 837     keepalive(masm, nullptr);
 838     lrb(masm);
 839   } else if (_needs_keep_alive_barrier) {
 840     keepalive(masm, continuation());
 841   } else if (_needs_load_ref_barrier) {
 842     lrb(masm);
 843   } else {
 844     ShouldNotReachHere();
 845   }
 846 }
 847 
 848 void ShenandoahBarrierStubC2::maybe_far_jump_if_zero(MacroAssembler& masm, Register reg) {
 849   if (_needs_far_jump) {
 850     Label L_short_jump;
 851     __ cbnz(reg, L_short_jump);
 852     __ b(*continuation());
 853     __ bind(L_short_jump);
 854   } else {
 855     __ cbz(reg, *continuation());
 856   }
 857 }
 858 
 859 void ShenandoahBarrierStubC2::keepalive(MacroAssembler& masm, Label* L_done) {
 860   Address gcstate(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::MARKING)));
 861   Address index(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
 862   Address buffer(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
 863   Label L_through, L_slowpath;
 864 
 865   // If another barrier is enabled as well, do a runtime check for a specific barrier.
 866   if (_needs_load_ref_barrier) {
 867     assert(L_done == nullptr, "L_done is always null when _needs_load_ref_barrier is true");
 868     __ ldrb(_tmp1, gcstate);
 869     __ cbz(_tmp1, L_through);
 870   }
 871 
 872   // Fast-path: put object into buffer.
 873   // If buffer is already full, go slow.
 874   __ ldr(_tmp1, index);
 875   __ cbz(_tmp1, L_slowpath);
 876   __ sub(_tmp1, _tmp1, wordSize);
 877   __ str(_tmp1, index);
 878   __ ldr(_tmp2, buffer);
 879 
 880   // Store the object in queue.
 881   // If object is narrow, we need to decode it before inserting.
 882   if (_narrow) {
 883     __ add(_tmp2, _tmp2, _tmp1);
 884     __ decode_heap_oop_not_null(_tmp1, _obj);
 885     __ str(_tmp1, Address(_tmp2));
 886   } else {
 887     // Buffer is 64-bit address, must be in base register.
 888     __ str(_obj, Address(_tmp2, _tmp1));
 889   }
 890 
 891   // Fast-path exits here.
 892   if (L_done != nullptr) {
 893     __ b(*L_done);
 894   } else {
 895     __ b(L_through);
 896   }
 897 
 898   // Slow-path: call runtime to handle.
 899   __ bind(L_slowpath);
 900 
 901   {
 902     SaveLiveRegisters slr(&masm, this);
 903 
 904     // Go to runtime and handle the rest there.
 905     __ mov(c_rarg0, _obj);
 906     __ lea(lr, RuntimeAddress(keepalive_runtime_entry_addr()));
 907     __ blr(lr);
 908   }
 909   if (L_done != nullptr) {
 910     __ b(*L_done);
 911   } else {
 912     __ bind(L_through);
 913   }
 914 }
 915 
 916 void ShenandoahBarrierStubC2::lrb(MacroAssembler& masm) {
 917   Label L_slow;
 918 
 919   // If another barrier is enabled as well, do a runtime check for a specific barrier.
 920   if (_needs_keep_alive_barrier) {
 921     char state_to_check = ShenandoahHeap::HAS_FORWARDED | (_needs_load_ref_weak_barrier ? ShenandoahHeap::WEAK_ROOTS : 0);
 922     Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(state_to_check)));
 923     __ ldrb(_tmp1, gc_state_fast);
 924     maybe_far_jump_if_zero(masm, _tmp1);
 925   }
 926 
 927   // If weak references are being processed, weak/phantom loads need to go slow,
 928   // regardless of their cset status.
 929   if (_needs_load_ref_weak_barrier) {
 930     Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::WEAK_ROOTS)));
 931     __ ldrb(_tmp1, gc_state_fast);
 932     __ cbnz(_tmp1, L_slow);
 933   }
 934 
 935   // Cset-check. Fall-through to slow if in collection set.
 936   bool is_aot = AOTCodeCache::is_on_for_dump();
 937   if (!is_aot) {
 938     __ mov(_tmp1, ShenandoahHeap::in_cset_fast_test_addr());
 939     if (_narrow) {
 940       __ decode_heap_oop_not_null(_tmp2, _obj);
 941       __ add(_tmp1, _tmp1, _tmp2, Assembler::LSR, ShenandoahHeapRegion::region_size_bytes_shift_jint());
 942     } else {
 943       __ add(_tmp1, _tmp1, _obj, Assembler::LSR, ShenandoahHeapRegion::region_size_bytes_shift_jint());
 944     }
 945   } else {
 946     // Generating AOT code, pull the cset bitmap and region shift from AOT table.
 947     if (_narrow) {
 948       __ decode_heap_oop_not_null(_tmp1, _obj);
 949     } else {
 950       __ mov(_tmp1, _obj);
 951     }
 952     __ lea(_tmp2, ExternalAddress(AOTRuntimeConstants::grain_shift_address()));
 953     __ ldrw(_tmp2, Address(_tmp2));
 954     __ lsrv(_tmp2, _tmp1, _tmp2);
 955     __ lea(_tmp1, ExternalAddress(AOTRuntimeConstants::cset_base_address()));
 956     __ ldr(_tmp1, Address(_tmp1));
 957     __ add(_tmp1, _tmp1, _tmp2);
 958   }
 959   __ ldrb(_tmp1, Address(_tmp1, 0));
 960   maybe_far_jump_if_zero(masm, _tmp1);
 961 
 962   // Slow path
 963   __ bind(L_slow);
 964 
 965   // Obj is the result, need to temporarily stop preserving it.
 966   bool is_obj_preserved = is_preserved(_obj);
 967   if (is_obj_preserved) {
 968     dont_preserve(_obj);
 969   }
 970   {
 971     SaveLiveRegisters slr(&masm, this);
 972 
 973     // Shuffle in the arguments. The end result should be:
 974     //   c_rarg0 <-- obj
 975     //   c_rarg1 <-- lea(addr)
 976     if (c_rarg0 == _obj) {
 977       __ lea(c_rarg1, _addr);
 978     } else if (c_rarg1 == _obj) {
 979       __ mov(_tmp1, c_rarg1);
 980       __ lea(c_rarg1, _addr);
 981       __ mov(c_rarg0, _tmp1);
 982     } else {
 983       assert_different_registers(c_rarg1, _obj);
 984       __ lea(c_rarg1, _addr);
 985       __ mov(c_rarg0, _obj);
 986     }
 987 
 988     // Go to runtime and handle the rest there.
 989     __ lea(lr, RuntimeAddress(lrb_runtime_entry_addr()));
 990     __ blr(lr);
 991 
 992     // Save the result where needed. Narrow entries return narrowOop (32 bits)
 993     // and AAPCS does not guarantee the upper 32 bits of x0 are zero.
 994     if (_narrow) {
 995       __ movw(_obj, r0);
 996     } else if (_obj != r0) {
 997       __ mov(_obj, r0);
 998     }
 999   }
1000   if (is_obj_preserved) {
1001     preserve(_obj);
1002   }
1003 
1004   __ b(*continuation());
1005 }
1006 
1007 int ShenandoahBarrierStubC2::available_gp_registers() {
1008   Unimplemented(); // Not used
1009   return 0;
1010 }
1011 
1012 bool ShenandoahBarrierStubC2::is_special_register(Register r) {
1013   Unimplemented(); // Not used
1014   return true;
1015 }
1016 
1017 static ShenandoahBarrierSetC2State* barrier_set_state() {
1018   return reinterpret_cast<ShenandoahBarrierSetC2State*>(Compile::current()->barrier_set_state());
1019 }
1020 
1021 static int get_stub_size(ShenandoahBarrierStubC2* stub) {
1022   PhaseOutput* const output = Compile::current()->output();
1023   assert(output->in_scratch_emit_size(), "only used when in scratch_emit_size.");
1024   BufferBlob* const blob = output->scratch_buffer_blob();
1025   CodeBuffer cb(blob->content_begin(), (address)output->scratch_locs_memory() - blob->content_begin());
1026   MacroAssembler masm(&cb);
1027   stub->emit_code(masm);
1028   return cb.insts_size();
1029 }
1030 
1031 void ShenandoahBarrierStubC2::post_init() {
1032   // If we are in scratch emit mode we assume worst case, and force the use of
1033   // far branches.
1034   PhaseOutput* const output = Compile::current()->output();
1035   ShenandoahBarrierSetC2State* state = barrier_set_state();
1036   if (output->in_scratch_emit_size()) {
1037     state->inc_stubs_current_total_size(get_stub_size(this));
1038     _needs_far_jump = true;
1039     return;
1040   }
1041 
1042   // The logic implemented in this stub only uses short jumps (cbz, cbnz) if
1043   // the aggregation of all relevant code sections of a method is less than 1MB
1044   // - 2KB. We could be more aggressive and try and compute the distance
1045   // between the fastpath branch and the stub entry but in practice not many
1046   // methods reach the 1MB size.
1047   const BufferSizingData* sizing = output->buffer_sizing_data();
1048   const int code_size = sizing->_code + state->stubs_current_total_size();
1049 
1050   // Maximum backward range is 1M. Maximum forward reach is 1M - 4bytes.
1051   // Subtract 2K to be ultra conservative.
1052   const int cond_branch_max_reach = (int)(1*M - 2*K);
1053   _needs_far_jump = code_size >= cond_branch_max_reach;
1054 }
1055 
1056 #endif // COMPILER2