1 /*
   2  * Copyright (c) 2018, 2021, Red Hat, Inc. All rights reserved.
   3  * Copyright (c) 2012, 2022 SAP SE. All rights reserved.
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This code is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 only, as
   8  * published by the Free Software Foundation.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  *
  24  */
  25 
  26 #include "gc/shared/gcArguments.hpp"
  27 #include "gc/shared/gc_globals.hpp"
  28 #include "macroAssembler_ppc.hpp"
  29 #include "precompiled.hpp"
  30 #include "asm/macroAssembler.inline.hpp"
  31 #include "gc/shenandoah/shenandoahBarrierSet.hpp"
  32 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
  33 #include "gc/shenandoah/shenandoahForwarding.hpp"
  34 #include "gc/shenandoah/shenandoahHeap.hpp"
  35 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
  36 #include "gc/shenandoah/shenandoahHeapRegion.hpp"
  37 #include "gc/shenandoah/shenandoahRuntime.hpp"
  38 #include "gc/shenandoah/shenandoahThreadLocalData.hpp"
  39 #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
  40 #include "gc/shenandoah/mode/shenandoahMode.hpp"
  41 #include "interpreter/interpreter.hpp"
  42 #include "runtime/javaThread.hpp"
  43 #include "runtime/sharedRuntime.hpp"
  44 #include "utilities/globalDefinitions.hpp"
  45 #include "vm_version_ppc.hpp"
  46 #ifdef COMPILER1
  47 #include "c1/c1_LIRAssembler.hpp"
  48 #include "c1/c1_MacroAssembler.hpp"
  49 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
  50 #endif
  51 
  52 #define __ masm->
  53 
  54 void ShenandoahBarrierSetAssembler::satb_write_barrier(MacroAssembler *masm,
  55                                                        Register base, RegisterOrConstant ind_or_offs,
  56                                                        Register tmp1, Register tmp2, Register tmp3,
  57                                                        MacroAssembler::PreservationLevel preservation_level) {
  58   if (ShenandoahSATBBarrier) {
  59     __ block_comment("satb_write_barrier (shenandoahgc) {");
  60     satb_write_barrier_impl(masm, 0, base, ind_or_offs, tmp1, tmp2, tmp3, preservation_level);
  61     __ block_comment("} satb_write_barrier (shenandoahgc)");
  62   }
  63 }
  64 
  65 void ShenandoahBarrierSetAssembler::iu_barrier(MacroAssembler *masm,
  66                                                Register val,
  67                                                Register tmp1, Register tmp2,
  68                                                MacroAssembler::PreservationLevel preservation_level,
  69                                                DecoratorSet decorators) {
  70   // IU barriers are also employed to avoid resurrection of weak references,
  71   // even if Shenandoah does not operate in incremental update mode.
  72   if (ShenandoahIUBarrier || ShenandoahSATBBarrier) {
  73     __ block_comment("iu_barrier (shenandoahgc) {");
  74     satb_write_barrier_impl(masm, decorators, noreg, noreg, val, tmp1, tmp2, preservation_level);
  75     __ block_comment("} iu_barrier (shenandoahgc)");
  76   }
  77 }
  78 
  79 void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler *masm, DecoratorSet decorators,
  80                                                            Register base, RegisterOrConstant ind_or_offs,
  81                                                            Register dst,
  82                                                            Register tmp1, Register tmp2,
  83                                                            MacroAssembler::PreservationLevel preservation_level) {
  84   if (ShenandoahLoadRefBarrier) {
  85     __ block_comment("load_reference_barrier (shenandoahgc) {");
  86     load_reference_barrier_impl(masm, decorators, base, ind_or_offs, dst, tmp1, tmp2, preservation_level);
  87     __ block_comment("} load_reference_barrier (shenandoahgc)");
  88   }
  89 }
  90 
  91 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler *masm, DecoratorSet decorators, BasicType type,
  92                                                        Register src, Register dst, Register count,
  93                                                        Register preserve1, Register preserve2) {
  94   Register R11_tmp = R11_scratch1;
  95 
  96   assert_different_registers(src, dst, count, R11_tmp, noreg);
  97   if (preserve1 != noreg) {
  98     // Technically not required, but likely to indicate an error.
  99     assert_different_registers(preserve1, preserve2);
 100   }
 101 
 102   /* ==== Check whether barrier is required (optimizations) ==== */
 103   // Fast path: Component type of array is not a reference type.
 104   if (!is_reference_type(type)) {
 105     return;
 106   }
 107 
 108   bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
 109 
 110   // Fast path: No barrier required if for every barrier type, it is either disabled or would not store
 111   // any useful information.
 112   if ((!ShenandoahSATBBarrier || dest_uninitialized) && !ShenandoahIUBarrier && !ShenandoahLoadRefBarrier) {
 113     return;
 114   }
 115 
 116   __ block_comment("arraycopy_prologue (shenandoahgc) {");
 117   Label skip_prologue;
 118 
 119   // Fast path: Array is of length zero.
 120   __ cmpdi(CCR0, count, 0);
 121   __ beq(CCR0, skip_prologue);
 122 
 123   /* ==== Check whether barrier is required (gc state) ==== */
 124   __ lbz(R11_tmp, in_bytes(ShenandoahThreadLocalData::gc_state_offset()),
 125          R16_thread);
 126 
 127   // The set of garbage collection states requiring barriers depends on the available barrier types and the
 128   // type of the reference in question.
 129   // For instance, satb barriers may be skipped if it is certain that the overridden values are not relevant
 130   // for the garbage collector.
 131   const int required_states = ShenandoahSATBBarrier && dest_uninitialized
 132                               ? ShenandoahHeap::HAS_FORWARDED
 133                               : ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::MARKING;
 134 
 135   __ andi_(R11_tmp, R11_tmp, required_states);
 136   __ beq(CCR0, skip_prologue);
 137 
 138   /* ==== Invoke runtime ==== */
 139   // Save to-be-preserved registers.
 140   int highest_preserve_register_index = 0;
 141   {
 142     if (preserve1 != noreg && preserve1->is_volatile()) {
 143       __ std(preserve1, -BytesPerWord * ++highest_preserve_register_index, R1_SP);
 144     }
 145     if (preserve2 != noreg && preserve2 != preserve1 && preserve2->is_volatile()) {
 146       __ std(preserve2, -BytesPerWord * ++highest_preserve_register_index, R1_SP);
 147     }
 148 
 149     __ std(src, -BytesPerWord * ++highest_preserve_register_index, R1_SP);
 150     __ std(dst, -BytesPerWord * ++highest_preserve_register_index, R1_SP);
 151     __ std(count, -BytesPerWord * ++highest_preserve_register_index, R1_SP);
 152 
 153     __ save_LR_CR(R11_tmp);
 154     __ push_frame_reg_args(-BytesPerWord * highest_preserve_register_index,
 155                            R11_tmp);
 156   }
 157 
 158   // Invoke runtime.
 159   address jrt_address = nullptr;
 160   if (UseCompressedOops) {
 161     jrt_address = CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop_entry);
 162   } else {
 163     jrt_address = CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop_entry);
 164   }
 165   assert(jrt_address != nullptr, "jrt routine cannot be found");
 166 
 167   __ call_VM_leaf(jrt_address, src, dst, count);
 168 
 169   // Restore to-be-preserved registers.
 170   {
 171     __ pop_frame();
 172     __ restore_LR_CR(R11_tmp);
 173 
 174     __ ld(count, -BytesPerWord * highest_preserve_register_index--, R1_SP);
 175     __ ld(dst, -BytesPerWord * highest_preserve_register_index--, R1_SP);
 176     __ ld(src, -BytesPerWord * highest_preserve_register_index--, R1_SP);
 177 
 178     if (preserve2 != noreg && preserve2 != preserve1 && preserve2->is_volatile()) {
 179       __ ld(preserve2, -BytesPerWord * highest_preserve_register_index--, R1_SP);
 180     }
 181     if (preserve1 != noreg && preserve1->is_volatile()) {
 182       __ ld(preserve1, -BytesPerWord * highest_preserve_register_index--, R1_SP);
 183     }
 184   }
 185 
 186   __ bind(skip_prologue);
 187   __ block_comment("} arraycopy_prologue (shenandoahgc)");
 188 }
 189 
 190 void ShenandoahBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
 191                                                        Register dst, Register count,
 192                                                        Register preserve) {
 193   if (ShenandoahCardBarrier && is_reference_type(type)) {
 194     __ block_comment("arraycopy_epilogue (shenandoahgc) {");
 195     gen_write_ref_array_post_barrier(masm, decorators, dst, count, preserve);
 196     __ block_comment("} arraycopy_epilogue (shenandoahgc)");
 197   }
 198 }
 199 
 200 // The to-be-enqueued value can either be determined
 201 // - dynamically by passing the reference's address information (load mode) or
 202 // - statically by passing a register the value is stored in (preloaded mode)
 203 //   - for performance optimizations in cases where the previous value is known (currently not implemented) and
 204 //   - for incremental-update barriers.
 205 //
 206 // decorators:  The previous value's decorator set.
 207 //              In "load mode", the value must equal '0'.
 208 // base:        Base register of the reference's address (load mode).
 209 //              In "preloaded mode", the register must equal 'noreg'.
 210 // ind_or_offs: Index or offset of the reference's address (load mode).
 211 //              If 'base' equals 'noreg' (preloaded mode), the passed value is ignored.
 212 // pre_val:     Register holding the to-be-stored value (preloaded mode).
 213 //              In "load mode", this register acts as a temporary register and must
 214 //              thus not be 'noreg'.  In "preloaded mode", its content will be sustained.
 215 // tmp1/tmp2:   Temporary registers, one of which must be non-volatile in "preloaded mode".
 216 void ShenandoahBarrierSetAssembler::satb_write_barrier_impl(MacroAssembler *masm, DecoratorSet decorators,
 217                                                             Register base, RegisterOrConstant ind_or_offs,
 218                                                             Register pre_val,
 219                                                             Register tmp1, Register tmp2,
 220                                                             MacroAssembler::PreservationLevel preservation_level) {
 221   assert_different_registers(tmp1, tmp2, pre_val, noreg);
 222 
 223   Label skip_barrier;
 224 
 225   /* ==== Determine necessary runtime invocation preservation measures ==== */
 226   const bool needs_frame           = preservation_level >= MacroAssembler::PRESERVATION_FRAME_LR;
 227   const bool preserve_gp_registers = preservation_level >= MacroAssembler::PRESERVATION_FRAME_LR_GP_REGS;
 228   const bool preserve_fp_registers = preservation_level >= MacroAssembler::PRESERVATION_FRAME_LR_GP_FP_REGS;
 229 
 230   // Check whether marking is active.
 231   __ lbz(tmp1, in_bytes(ShenandoahThreadLocalData::gc_state_offset()), R16_thread);
 232 
 233   __ andi_(tmp1, tmp1, ShenandoahHeap::MARKING);
 234   __ beq(CCR0, skip_barrier);
 235 
 236   /* ==== Determine the reference's previous value ==== */
 237   bool preloaded_mode = base == noreg;
 238   Register pre_val_save = noreg;
 239 
 240   if (preloaded_mode) {
 241     // Previous value has been passed to the method, so it must not be determined manually.
 242     // In case 'pre_val' is a volatile register, it must be saved across the C-call
 243     // as callers may depend on its value.
 244     // Unless the general purposes registers are saved anyway, one of the temporary registers
 245     // (i.e., 'tmp1' and 'tmp2') is used to the preserve 'pre_val'.
 246     if (!preserve_gp_registers && pre_val->is_volatile()) {
 247       pre_val_save = !tmp1->is_volatile() ? tmp1 : tmp2;
 248       assert(!pre_val_save->is_volatile(), "at least one of the temporary registers must be non-volatile");
 249     }
 250 
 251     if ((decorators & IS_NOT_NULL) != 0) {
 252 #ifdef ASSERT
 253       __ cmpdi(CCR0, pre_val, 0);
 254       __ asm_assert_ne("null oop is not allowed");
 255 #endif // ASSERT
 256     } else {
 257       __ cmpdi(CCR0, pre_val, 0);
 258       __ beq(CCR0, skip_barrier);
 259     }
 260   } else {
 261     // Load from the reference address to determine the reference's current value (before the store is being performed).
 262     // Contrary to the given value in "preloaded mode", it is not necessary to preserve it.
 263     assert(decorators == 0, "decorator set must be empty");
 264     assert(base != noreg, "base must be a register");
 265     assert(!ind_or_offs.is_register() || ind_or_offs.as_register() != noreg, "ind_or_offs must be a register");
 266     if (UseCompressedOops) {
 267       __ lwz(pre_val, ind_or_offs, base);
 268     } else {
 269       __ ld(pre_val, ind_or_offs, base);
 270     }
 271 
 272     __ cmpdi(CCR0, pre_val, 0);
 273     __ beq(CCR0, skip_barrier);
 274 
 275     if (UseCompressedOops) {
 276       __ decode_heap_oop_not_null(pre_val);
 277     }
 278   }
 279 
 280   /* ==== Try to enqueue the to-be-stored value directly into thread's local SATB mark queue ==== */
 281   {
 282     Label runtime;
 283     Register Rbuffer = tmp1, Rindex = tmp2;
 284 
 285     // Check whether the queue has enough capacity to store another oop.
 286     // If not, jump to the runtime to commit the buffer and to allocate a new one.
 287     // (The buffer's index corresponds to the amount of remaining free space.)
 288     __ ld(Rindex, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()), R16_thread);
 289     __ cmpdi(CCR0, Rindex, 0);
 290     __ beq(CCR0, runtime); // If index == 0 (buffer is full), goto runtime.
 291 
 292     // Capacity suffices.  Decrement the queue's size by the size of one oop.
 293     // (The buffer is filled contrary to the heap's growing direction, i.e., it is filled downwards.)
 294     __ addi(Rindex, Rindex, -wordSize);
 295     __ std(Rindex, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()), R16_thread);
 296 
 297     // Enqueue the previous value and skip the invocation of the runtime.
 298     __ ld(Rbuffer, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()), R16_thread);
 299     __ stdx(pre_val, Rbuffer, Rindex);
 300     __ b(skip_barrier);
 301 
 302     __ bind(runtime);
 303   }
 304 
 305   /* ==== Invoke runtime to commit SATB mark queue to gc and allocate a new buffer ==== */
 306   // Save to-be-preserved registers.
 307   int nbytes_save = 0;
 308 
 309   if (needs_frame) {
 310     if (preserve_gp_registers) {
 311       nbytes_save = (preserve_fp_registers
 312                      ? MacroAssembler::num_volatile_gp_regs + MacroAssembler::num_volatile_fp_regs
 313                      : MacroAssembler::num_volatile_gp_regs) * BytesPerWord;
 314       __ save_volatile_gprs(R1_SP, -nbytes_save, preserve_fp_registers);
 315     }
 316 
 317     __ save_LR_CR(tmp1);
 318     __ push_frame_reg_args(nbytes_save, tmp2);
 319   }
 320 
 321   if (!preserve_gp_registers && preloaded_mode && pre_val->is_volatile()) {
 322     assert(pre_val_save != noreg, "nv_save must not be noreg");
 323 
 324     // 'pre_val' register must be saved manually unless general-purpose are preserved in general.
 325     __ mr(pre_val_save, pre_val);
 326   }
 327 
 328   // Invoke runtime.
 329   __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, R16_thread);
 330 
 331   // Restore to-be-preserved registers.
 332   if (!preserve_gp_registers && preloaded_mode && pre_val->is_volatile()) {
 333     __ mr(pre_val, pre_val_save);
 334   }
 335 
 336   if (needs_frame) {
 337     __ pop_frame();
 338     __ restore_LR_CR(tmp1);
 339 
 340     if (preserve_gp_registers) {
 341       __ restore_volatile_gprs(R1_SP, -nbytes_save, preserve_fp_registers);
 342     }
 343   }
 344 
 345   __ bind(skip_barrier);
 346 }
 347 
 348 void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssembler *masm, Register dst, Register tmp) {
 349   __ block_comment("resolve_forward_pointer_not_null (shenandoahgc) {");
 350 
 351   Register tmp1 = tmp,
 352            R0_tmp2 = R0;
 353   assert_different_registers(dst, tmp1, R0_tmp2, noreg);
 354 
 355   // If the object has been evacuated, the mark word layout is as follows:
 356   // | forwarding pointer (62-bit) | '11' (2-bit) |
 357 
 358   // The invariant that stack/thread pointers have the lowest two bits cleared permits retrieving
 359   // the forwarding pointer solely by inversing the lowest two bits.
 360   // This invariant follows inevitably from hotspot's minimal alignment.
 361   assert(markWord::marked_value <= (unsigned long) MinObjAlignmentInBytes,
 362          "marked value must not be higher than hotspot's minimal alignment");
 363 
 364   Label done;
 365 
 366   // Load the object's mark word.
 367   __ ld(tmp1, oopDesc::mark_offset_in_bytes(), dst);
 368 
 369   // Load the bit mask for the lock bits.
 370   __ li(R0_tmp2, markWord::lock_mask_in_place);
 371 
 372   // Check whether all bits matching the bit mask are set.
 373   // If that is the case, the object has been evacuated and the most significant bits form the forward pointer.
 374   __ andc_(R0_tmp2, R0_tmp2, tmp1);
 375 
 376   assert(markWord::lock_mask_in_place == markWord::marked_value,
 377          "marked value must equal the value obtained when all lock bits are being set");
 378   if (VM_Version::has_isel()) {
 379     __ xori(tmp1, tmp1, markWord::lock_mask_in_place);
 380     __ isel(dst, CCR0, Assembler::equal, false, tmp1);
 381   } else {
 382     __ bne(CCR0, done);
 383     __ xori(dst, tmp1, markWord::lock_mask_in_place);
 384   }
 385 
 386   __ bind(done);
 387   __ block_comment("} resolve_forward_pointer_not_null (shenandoahgc)");
 388 }
 389 
 390 // base:        Base register of the reference's address.
 391 // ind_or_offs: Index or offset of the reference's address (load mode).
 392 // dst:         Reference's address.  In case the object has been evacuated, this is the to-space version
 393 //              of that object.
 394 void ShenandoahBarrierSetAssembler::load_reference_barrier_impl(
 395     MacroAssembler *masm, DecoratorSet decorators,
 396     Register base, RegisterOrConstant ind_or_offs,
 397     Register dst,
 398     Register tmp1, Register tmp2,
 399     MacroAssembler::PreservationLevel preservation_level) {
 400   if (ind_or_offs.is_register()) {
 401     assert_different_registers(tmp1, tmp2, base, ind_or_offs.as_register(), dst, noreg);
 402   } else {
 403     assert_different_registers(tmp1, tmp2, base, dst, noreg);
 404   }
 405 
 406   Label skip_barrier;
 407 
 408   bool is_strong  = ShenandoahBarrierSet::is_strong_access(decorators);
 409   bool is_weak    = ShenandoahBarrierSet::is_weak_access(decorators);
 410   bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
 411   bool is_native  = ShenandoahBarrierSet::is_native_access(decorators);
 412   bool is_narrow  = UseCompressedOops && !is_native;
 413 
 414   /* ==== Check whether heap is stable ==== */
 415   __ lbz(tmp2, in_bytes(ShenandoahThreadLocalData::gc_state_offset()), R16_thread);
 416 
 417   if (is_strong) {
 418     // For strong references, the heap is considered stable if "has forwarded" is not active.
 419     __ andi_(tmp1, tmp2, ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::EVACUATION);
 420     __ beq(CCR0, skip_barrier);
 421 #ifdef ASSERT
 422     // "evacuation" -> (implies) "has forwarded".  If we reach this code, "has forwarded" must thus be set.
 423     __ andi_(tmp1, tmp1, ShenandoahHeap::HAS_FORWARDED);
 424     __ asm_assert_ne("'has forwarded' is missing");
 425 #endif // ASSERT
 426   } else {
 427     // For all non-strong references, the heap is considered stable if not any of "has forwarded",
 428     // "root set processing", and "weak reference processing" is active.
 429     // The additional phase conditions are in place to avoid the resurrection of weak references (see JDK-8266440).
 430     Label skip_fastpath;
 431     __ andi_(tmp1, tmp2, ShenandoahHeap::WEAK_ROOTS);
 432     __ bne(CCR0, skip_fastpath);
 433 
 434     __ andi_(tmp1, tmp2, ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::EVACUATION);
 435     __ beq(CCR0, skip_barrier);
 436 #ifdef ASSERT
 437     // "evacuation" -> (implies) "has forwarded".  If we reach this code, "has forwarded" must thus be set.
 438     __ andi_(tmp1, tmp1, ShenandoahHeap::HAS_FORWARDED);
 439     __ asm_assert_ne("'has forwarded' is missing");
 440 #endif // ASSERT
 441 
 442     __ bind(skip_fastpath);
 443   }
 444 
 445   /* ==== Check whether region is in collection set ==== */
 446   if (is_strong) {
 447     // Shenandoah stores metadata on regions in a continuous area of memory in which a single byte corresponds to
 448     // an entire region of the shenandoah heap.  At present, only the least significant bit is of significance
 449     // and indicates whether the region is part of the collection set.
 450     //
 451     // All regions are of the same size and are always aligned by a power of two.
 452     // Any address can thus be shifted by a fixed number of bits to retrieve the address prefix shared by
 453     // all objects within that region (region identification bits).
 454     //
 455     //  | unused bits | region identification bits | object identification bits |
 456     //  (Region size depends on a couple of criteria, such as page size, user-provided arguments and the max heap size.
 457     //   The number of object identification bits can thus not be determined at compile time.)
 458     //
 459     // -------------------------------------------------------  <--- cs (collection set) base address
 460     // | lost space due to heap space base address                   -> 'ShenandoahHeap::in_cset_fast_test_addr()'
 461     // | (region identification bits contain heap base offset)
 462     // |------------------------------------------------------  <--- cs base address + (heap_base >> region size shift)
 463     // | collection set in the proper                                -> shift: 'region_size_bytes_shift_jint()'
 464     // |
 465     // |------------------------------------------------------  <--- cs base address + (heap_base >> region size shift)
 466     //                                                                               + number of regions
 467     __ load_const_optimized(tmp2, ShenandoahHeap::in_cset_fast_test_addr(), tmp1);
 468     __ srdi(tmp1, dst, ShenandoahHeapRegion::region_size_bytes_shift_jint());
 469     __ lbzx(tmp2, tmp1, tmp2);
 470     __ andi_(tmp2, tmp2, 1);
 471     __ beq(CCR0, skip_barrier);
 472   }
 473 
 474   /* ==== Invoke runtime ==== */
 475   // Save to-be-preserved registers.
 476   int nbytes_save = 0;
 477 
 478   const bool needs_frame           = preservation_level >= MacroAssembler::PRESERVATION_FRAME_LR;
 479   const bool preserve_gp_registers = preservation_level >= MacroAssembler::PRESERVATION_FRAME_LR_GP_REGS;
 480   const bool preserve_fp_registers = preservation_level >= MacroAssembler::PRESERVATION_FRAME_LR_GP_FP_REGS;
 481 
 482   if (needs_frame) {
 483     if (preserve_gp_registers) {
 484       nbytes_save = (preserve_fp_registers
 485                      ? MacroAssembler::num_volatile_gp_regs + MacroAssembler::num_volatile_fp_regs
 486                      : MacroAssembler::num_volatile_gp_regs) * BytesPerWord;
 487       __ save_volatile_gprs(R1_SP, -nbytes_save, preserve_fp_registers);
 488     }
 489 
 490     __ save_LR_CR(tmp1);
 491     __ push_frame_reg_args(nbytes_save, tmp1);
 492   }
 493 
 494   // Calculate the reference's absolute address.
 495   __ add(R4_ARG2, ind_or_offs, base);
 496 
 497   // Invoke runtime.
 498   address jrt_address = nullptr;
 499 
 500   if (is_strong) {
 501     if (is_narrow) {
 502       jrt_address = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow);
 503     } else {
 504       jrt_address = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong);
 505     }
 506   } else if (is_weak) {
 507     if (is_narrow) {
 508       jrt_address = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow);
 509     } else {
 510       jrt_address = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak);
 511     }
 512   } else {
 513     assert(is_phantom, "only remaining strength");
 514     assert(!is_narrow, "phantom access cannot be narrow");
 515     jrt_address = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom);
 516   }
 517   assert(jrt_address != nullptr, "jrt routine cannot be found");
 518 
 519   __ call_VM_leaf(jrt_address, dst /* reference */, R4_ARG2 /* reference address */);
 520 
 521   // Restore to-be-preserved registers.
 522   if (preserve_gp_registers) {
 523     __ mr(R0, R3_RET);
 524   } else {
 525     __ mr_if_needed(dst, R3_RET);
 526   }
 527 
 528   if (needs_frame) {
 529     __ pop_frame();
 530     __ restore_LR_CR(tmp1);
 531 
 532     if (preserve_gp_registers) {
 533       __ restore_volatile_gprs(R1_SP, -nbytes_save, preserve_fp_registers);
 534       __ mr(dst, R0);
 535     }
 536   }
 537 
 538   __ bind(skip_barrier);
 539 }
 540 
 541 // base:           Base register of the reference's address.
 542 // ind_or_offs:    Index or offset of the reference's address.
 543 // L_handle_null:  An optional label that will be jumped to if the reference is null.
 544 void ShenandoahBarrierSetAssembler::load_at(
 545     MacroAssembler *masm, DecoratorSet decorators, BasicType type,
 546     Register base, RegisterOrConstant ind_or_offs, Register dst,
 547     Register tmp1, Register tmp2,
 548     MacroAssembler::PreservationLevel preservation_level, Label *L_handle_null) {
 549   // Register must not clash, except 'base' and 'dst'.
 550   if (ind_or_offs.is_register()) {
 551     if (base != noreg) {
 552       assert_different_registers(tmp1, tmp2, base, ind_or_offs.register_or_noreg(), R0, noreg);
 553     }
 554     assert_different_registers(tmp1, tmp2, dst, ind_or_offs.register_or_noreg(), R0, noreg);
 555   } else {
 556     if (base == noreg) {
 557       assert_different_registers(tmp1, tmp2, base, R0, noreg);
 558     }
 559     assert_different_registers(tmp1, tmp2, dst, R0, noreg);
 560   }
 561 
 562   /* ==== Apply load barrier, if required ==== */
 563   if (ShenandoahBarrierSet::need_load_reference_barrier(decorators, type)) {
 564     assert(is_reference_type(type), "need_load_reference_barrier must check whether type is a reference type");
 565 
 566     // If 'dst' clashes with either 'base' or 'ind_or_offs', use an intermediate result register
 567     // to keep the values of those alive until the load reference barrier is applied.
 568     Register intermediate_dst = (dst == base || (ind_or_offs.is_register() && dst == ind_or_offs.as_register()))
 569                                 ? tmp2
 570                                 : dst;
 571 
 572     BarrierSetAssembler::load_at(masm, decorators, type,
 573                                  base, ind_or_offs,
 574                                  intermediate_dst,
 575                                  tmp1, noreg,
 576                                  preservation_level, L_handle_null);
 577 
 578     load_reference_barrier(masm, decorators,
 579                            base, ind_or_offs,
 580                            intermediate_dst,
 581                            tmp1, R0,
 582                            preservation_level);
 583 
 584     __ mr_if_needed(dst, intermediate_dst);
 585   } else {
 586     BarrierSetAssembler::load_at(masm, decorators, type,
 587                                  base, ind_or_offs,
 588                                  dst,
 589                                  tmp1, tmp2,
 590                                  preservation_level, L_handle_null);
 591   }
 592 
 593   /* ==== Apply keep-alive barrier, if required (e.g., to inhibit weak reference resurrection) ==== */
 594   if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) {
 595     iu_barrier(masm, dst, tmp1, tmp2, preservation_level);
 596   }
 597 }
 598 
 599 void ShenandoahBarrierSetAssembler::store_check(MacroAssembler* masm, Register base, RegisterOrConstant ind_or_offs, Register tmp) {
 600   assert(ShenandoahCardBarrier, "Should have been checked by caller");
 601 
 602   ShenandoahBarrierSet* ctbs = ShenandoahBarrierSet::barrier_set();
 603   CardTable* ct = ctbs->card_table();
 604   assert_different_registers(base, tmp, R0);
 605 
 606   if (ind_or_offs.is_constant()) {
 607     __ add_const_optimized(base, base, ind_or_offs.as_constant(), tmp);
 608   } else {
 609     __ add(base, ind_or_offs.as_register(), base);
 610   }
 611 
 612   __ load_const_optimized(tmp, (address)ct->byte_map_base(), R0);
 613   __ srdi(base, base, CardTable::card_shift());
 614   __ li(R0, CardTable::dirty_card_val());
 615   __ stbx(R0, tmp, base);
 616 }
 617 
 618 // base:        Base register of the reference's address.
 619 // ind_or_offs: Index or offset of the reference's address.
 620 // val:         To-be-stored value/reference's new value.
 621 void ShenandoahBarrierSetAssembler::store_at(MacroAssembler *masm, DecoratorSet decorators, BasicType type,
 622                                              Register base, RegisterOrConstant ind_or_offs, Register val,
 623                                              Register tmp1, Register tmp2, Register tmp3,
 624                                              MacroAssembler::PreservationLevel preservation_level) {
 625   if (is_reference_type(type)) {
 626     if (ShenandoahSATBBarrier) {
 627       satb_write_barrier(masm, base, ind_or_offs, tmp1, tmp2, tmp3, preservation_level);
 628     }
 629 
 630     if (ShenandoahIUBarrier && val != noreg) {
 631       iu_barrier(masm, val, tmp1, tmp2, preservation_level, decorators);
 632     }
 633   }
 634 
 635   BarrierSetAssembler::store_at(masm, decorators, type,
 636                                 base, ind_or_offs,
 637                                 val,
 638                                 tmp1, tmp2, tmp3,
 639                                 preservation_level);
 640 
 641   // No need for post barrier if storing NULL
 642   if (ShenandoahCardBarrier && is_reference_type(type) && val != noreg) {
 643     store_check(masm, base, ind_or_offs, tmp1);
 644   }
 645 }
 646 
 647 void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler *masm,
 648                                                                   Register dst, Register jni_env, Register obj,
 649                                                                   Register tmp, Label &slowpath) {
 650   __ block_comment("try_resolve_jobject_in_native (shenandoahgc) {");
 651 
 652   assert_different_registers(jni_env, obj, tmp);
 653 
 654   Label done;
 655 
 656   // Fast path: Reference is null (JNI tags are zero for null pointers).
 657   __ cmpdi(CCR0, obj, 0);
 658   __ beq(CCR0, done);
 659 
 660   // Resolve jobject using standard implementation.
 661   BarrierSetAssembler::try_resolve_jobject_in_native(masm, dst, jni_env, obj, tmp, slowpath);
 662 
 663   // Check whether heap is stable.
 664   __ lbz(tmp,
 665          in_bytes(ShenandoahThreadLocalData::gc_state_offset() - JavaThread::jni_environment_offset()),
 666          jni_env);
 667 
 668   __ andi_(tmp, tmp, ShenandoahHeap::EVACUATION | ShenandoahHeap::HAS_FORWARDED);
 669   __ bne(CCR0, slowpath);
 670 
 671   __ bind(done);
 672   __ block_comment("} try_resolve_jobject_in_native (shenandoahgc)");
 673 }
 674 
 675 // Special shenandoah CAS implementation that handles false negatives due
 676 // to concurrent evacuation.  That is, the CAS operation is intended to succeed in
 677 // the following scenarios (success criteria):
 678 //  s1) The reference pointer ('base_addr') equals the expected ('expected') pointer.
 679 //  s2) The reference pointer refers to the from-space version of an already-evacuated
 680 //      object, whereas the expected pointer refers to the to-space version of the same object.
 681 // Situations in which the reference pointer refers to the to-space version of an object
 682 // and the expected pointer refers to the from-space version of the same object can not occur due to
 683 // shenandoah's strong to-space invariant.  This also implies that the reference stored in 'new_val'
 684 // can not refer to the from-space version of an already-evacuated object.
 685 //
 686 // To guarantee correct behavior in concurrent environments, two races must be addressed:
 687 //  r1) A concurrent thread may heal the reference pointer (i.e., it is no longer referring to the
 688 //      from-space version but to the to-space version of the object in question).
 689 //      In this case, the CAS operation should succeed.
 690 //  r2) A concurrent thread may mutate the reference (i.e., the reference pointer refers to an entirely different object).
 691 //      In this case, the CAS operation should fail.
 692 //
 693 // By default, the value held in the 'result' register is zero to indicate failure of CAS,
 694 // non-zero to indicate success.  If 'is_cae' is set, the result is the most recently fetched
 695 // value from 'base_addr' rather than a boolean success indicator.
 696 void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler *masm, Register base_addr,
 697                                                 Register expected, Register new_val, Register tmp1, Register tmp2,
 698                                                 bool is_cae, Register result) {
 699   __ block_comment("cmpxchg_oop (shenandoahgc) {");
 700 
 701   assert_different_registers(base_addr, new_val, tmp1, tmp2, result, R0);
 702   assert_different_registers(base_addr, expected, tmp1, tmp2, result, R0);
 703 
 704   // Potential clash of 'success_flag' and 'tmp' is being accounted for.
 705   Register success_flag  = is_cae ? noreg  : result,
 706            current_value = is_cae ? result : tmp1,
 707            tmp           = is_cae ? tmp1   : result,
 708            initial_value = tmp2;
 709 
 710   Label done, step_four;
 711 
 712   __ bind(step_four);
 713 
 714   /* ==== Step 1 ("Standard" CAS) ==== */
 715   // Fast path: The values stored in 'expected' and 'base_addr' are equal.
 716   // Given that 'expected' must refer to the to-space object of an evacuated object (strong to-space invariant),
 717   // no special processing is required.
 718   if (UseCompressedOops) {
 719     __ cmpxchgw(CCR0, current_value, expected, new_val, base_addr, MacroAssembler::MemBarNone,
 720                 false, success_flag, true);
 721   } else {
 722     __ cmpxchgd(CCR0, current_value, expected, new_val, base_addr, MacroAssembler::MemBarNone,
 723                 false, success_flag, nullptr, true);
 724   }
 725 
 726   // Skip the rest of the barrier if the CAS operation succeeds immediately.
 727   // If it does not, the value stored at the address is either the from-space pointer of the
 728   // referenced object (success criteria s2)) or simply another object.
 729   __ beq(CCR0, done);
 730 
 731   /* ==== Step 2 (Null check) ==== */
 732   // The success criteria s2) cannot be matched with a null pointer
 733   // (null pointers cannot be subject to concurrent evacuation).  The failure of the CAS operation is thus legitimate.
 734   __ cmpdi(CCR0, current_value, 0);
 735   __ beq(CCR0, done);
 736 
 737   /* ==== Step 3 (reference pointer refers to from-space version; success criteria s2)) ==== */
 738   // To check whether the reference pointer refers to the from-space version, the forward
 739   // pointer of the object referred to by the reference is resolved and compared against the expected pointer.
 740   // If this check succeed, another CAS operation is issued with the from-space pointer being the expected pointer.
 741   //
 742   // Save the potential from-space pointer.
 743   __ mr(initial_value, current_value);
 744 
 745   // Resolve forward pointer.
 746   if (UseCompressedOops) { __ decode_heap_oop_not_null(current_value); }
 747   resolve_forward_pointer_not_null(masm, current_value, tmp);
 748   if (UseCompressedOops) { __ encode_heap_oop_not_null(current_value); }
 749 
 750   if (!is_cae) {
 751     // 'success_flag' was overwritten by call to 'resovle_forward_pointer_not_null'.
 752     // Load zero into register for the potential failure case.
 753     __ li(success_flag, 0);
 754   }
 755   __ cmpd(CCR0, current_value, expected);
 756   __ bne(CCR0, done);
 757 
 758   // Discard fetched value as it might be a reference to the from-space version of an object.
 759   if (UseCompressedOops) {
 760     __ cmpxchgw(CCR0, R0, initial_value, new_val, base_addr, MacroAssembler::MemBarNone,
 761                 false, success_flag);
 762   } else {
 763     __ cmpxchgd(CCR0, R0, initial_value, new_val, base_addr, MacroAssembler::MemBarNone,
 764                 false, success_flag);
 765   }
 766 
 767   /* ==== Step 4 (Retry CAS with to-space pointer (success criteria s2) under race r1)) ==== */
 768   // The reference pointer could have been healed whilst the previous CAS operation was being performed.
 769   // Another CAS operation must thus be issued with the to-space pointer being the expected pointer.
 770   // If that CAS operation fails as well, race r2) must have occurred, indicating that
 771   // the operation failure is legitimate.
 772   //
 773   // To keep the code's size small and thus improving cache (icache) performance, this highly
 774   // unlikely case should be handled by the smallest possible code.  Instead of emitting a third,
 775   // explicit CAS operation, the code jumps back and reuses the first CAS operation (step 1)
 776   // (passed arguments are identical).
 777   //
 778   // A failure of the CAS operation in step 1 would imply that the overall CAS operation is supposed
 779   // to fail.  Jumping back to step 1 requires, however, that step 2 and step 3 are re-executed as well.
 780   // It is thus important to ensure that a re-execution of those steps does not put program correctness
 781   // at risk:
 782   // - Step 2: Either terminates in failure (desired result) or falls through to step 3.
 783   // - Step 3: Terminates if the comparison between the forwarded, fetched pointer and the expected value
 784   //           fails.  Unless the reference has been updated in the meanwhile once again, this is
 785   //           guaranteed to be the case.
 786   //           In case of a concurrent update, the CAS would be retried again. This is legitimate
 787   //           in terms of program correctness (even though it is not desired).
 788   __ bne(CCR0, step_four);
 789 
 790   __ bind(done);
 791   __ block_comment("} cmpxchg_oop (shenandoahgc)");
 792 }
 793 
 794 void ShenandoahBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
 795                                                                      Register addr, Register count, Register preserve) {
 796   assert(ShenandoahCardBarrier, "Should have been checked by caller");
 797 
 798   ShenandoahBarrierSet* bs = ShenandoahBarrierSet::barrier_set();
 799   CardTable* ct = bs->card_table();
 800   assert_different_registers(addr, count, R0);
 801 
 802   Label L_skip_loop, L_store_loop;
 803 
 804   __ sldi_(count, count, LogBytesPerHeapOop);
 805 
 806   // Zero length? Skip.
 807   __ beq(CCR0, L_skip_loop);
 808 
 809   __ addi(count, count, -BytesPerHeapOop);
 810   __ add(count, addr, count);
 811   // Use two shifts to clear out those low order two bits! (Cannot opt. into 1.)
 812   __ srdi(addr, addr, CardTable::card_shift());
 813   __ srdi(count, count, CardTable::card_shift());
 814   __ subf(count, addr, count);
 815   __ add_const_optimized(addr, addr, (address)ct->byte_map_base(), R0);
 816   __ addi(count, count, 1);
 817   __ li(R0, 0);
 818   __ mtctr(count);
 819 
 820   // Byte store loop
 821   __ bind(L_store_loop);
 822   __ stb(R0, 0, addr);
 823   __ addi(addr, addr, 1);
 824   __ bdnz(L_store_loop);
 825   __ bind(L_skip_loop);
 826 }
 827 
 828 #undef __
 829 
 830 #ifdef COMPILER1
 831 
 832 #define __ ce->masm()->
 833 
 834 void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler *ce, ShenandoahPreBarrierStub *stub) {
 835   __ block_comment("gen_pre_barrier_stub (shenandoahgc) {");
 836 
 837   ShenandoahBarrierSetC1 *bs = (ShenandoahBarrierSetC1*) BarrierSet::barrier_set()->barrier_set_c1();
 838   __ bind(*stub->entry());
 839 
 840   // GC status has already been verified by 'ShenandoahBarrierSetC1::pre_barrier'.
 841   // This stub is the slowpath of that function.
 842 
 843   assert(stub->pre_val()->is_register(), "pre_val must be a register");
 844   Register pre_val = stub->pre_val()->as_register();
 845 
 846   // If 'do_load()' returns false, the to-be-stored value is already available in 'stub->pre_val()'
 847   // ("preloaded mode" of the store barrier).
 848   if (stub->do_load()) {
 849     ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false);
 850   }
 851 
 852   // Fast path: Reference is null.
 853   __ cmpdi(CCR0, pre_val, 0);
 854   __ bc_far_optimized(Assembler::bcondCRbiIs1_bhintNoHint, __ bi0(CCR0, Assembler::equal), *stub->continuation());
 855 
 856   // Argument passing via the stack.
 857   __ std(pre_val, -8, R1_SP);
 858 
 859   __ load_const_optimized(R0, bs->pre_barrier_c1_runtime_code_blob()->code_begin());
 860   __ call_stub(R0);
 861 
 862   __ b(*stub->continuation());
 863   __ block_comment("} gen_pre_barrier_stub (shenandoahgc)");
 864 }
 865 
 866 void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler *ce,
 867                                                                     ShenandoahLoadReferenceBarrierStub *stub) {
 868   __ block_comment("gen_load_reference_barrier_stub (shenandoahgc) {");
 869 
 870   ShenandoahBarrierSetC1 *bs = (ShenandoahBarrierSetC1*) BarrierSet::barrier_set()->barrier_set_c1();
 871   __ bind(*stub->entry());
 872 
 873   Register obj  = stub->obj()->as_register();
 874   Register res  = stub->result()->as_register();
 875   Register addr = stub->addr()->as_pointer_register();
 876   Register tmp1 = stub->tmp1()->as_register();
 877   Register tmp2 = stub->tmp2()->as_register();
 878   assert_different_registers(addr, res, tmp1, tmp2);
 879 
 880 #ifdef ASSERT
 881   // Ensure that 'res' is 'R3_ARG1' and contains the same value as 'obj' to reduce the number of required
 882   // copy instructions.
 883   assert(R3_RET == res, "res must be r3");
 884   __ cmpd(CCR0, res, obj);
 885   __ asm_assert_eq("result register must contain the reference stored in obj");
 886 #endif
 887 
 888   DecoratorSet decorators = stub->decorators();
 889 
 890   /* ==== Check whether region is in collection set ==== */
 891   // GC status (unstable) has already been verified by 'ShenandoahBarrierSetC1::load_reference_barrier_impl'.
 892   // This stub is the slowpath of that function.
 893 
 894   bool is_strong  = ShenandoahBarrierSet::is_strong_access(decorators);
 895   bool is_weak    = ShenandoahBarrierSet::is_weak_access(decorators);
 896   bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
 897   bool is_native  = ShenandoahBarrierSet::is_native_access(decorators);
 898 
 899   if (is_strong) {
 900     // Check whether object is in collection set.
 901     __ load_const_optimized(tmp2, ShenandoahHeap::in_cset_fast_test_addr(), tmp1);
 902     __ srdi(tmp1, obj, ShenandoahHeapRegion::region_size_bytes_shift_jint());
 903     __ lbzx(tmp2, tmp1, tmp2);
 904 
 905     __ andi_(tmp2, tmp2, 1);
 906     __ bc_far_optimized(Assembler::bcondCRbiIs1_bhintNoHint, __ bi0(CCR0, Assembler::equal), *stub->continuation());
 907   }
 908 
 909   address blob_addr = nullptr;
 910 
 911   if (is_strong) {
 912     if (is_native) {
 913       blob_addr = bs->load_reference_barrier_strong_native_rt_code_blob()->code_begin();
 914     } else {
 915       blob_addr = bs->load_reference_barrier_strong_rt_code_blob()->code_begin();
 916     }
 917   } else if (is_weak) {
 918     blob_addr = bs->load_reference_barrier_weak_rt_code_blob()->code_begin();
 919   } else {
 920     assert(is_phantom, "only remaining strength");
 921     blob_addr = bs->load_reference_barrier_phantom_rt_code_blob()->code_begin();
 922   }
 923 
 924   assert(blob_addr != nullptr, "code blob cannot be found");
 925 
 926   // Argument passing via the stack.  'obj' is passed implicitly (as asserted above).
 927   __ std(addr, -8, R1_SP);
 928 
 929   __ load_const_optimized(tmp1, blob_addr, tmp2);
 930   __ call_stub(tmp1);
 931 
 932   // 'res' is 'R3_RET'.  The result is thus already in the correct register.
 933 
 934   __ b(*stub->continuation());
 935   __ block_comment("} gen_load_reference_barrier_stub (shenandoahgc)");
 936 }
 937 
 938 #undef __
 939 
 940 #define __ sasm->
 941 
 942 void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler *sasm) {
 943   __ block_comment("generate_c1_pre_barrier_runtime_stub (shenandoahgc) {");
 944 
 945   Label runtime, skip_barrier;
 946   BarrierSet *bs = BarrierSet::barrier_set();
 947 
 948   // Argument passing via the stack.
 949   const int caller_stack_slots = 3;
 950 
 951   Register R0_pre_val = R0;
 952   __ ld(R0, -8, R1_SP);
 953   Register R11_tmp1 = R11_scratch1;
 954   __ std(R11_tmp1, -16, R1_SP);
 955   Register R12_tmp2 = R12_scratch2;
 956   __ std(R12_tmp2, -24, R1_SP);
 957 
 958   /* ==== Check whether marking is active ==== */
 959   // Even though gc status was checked in 'ShenandoahBarrierSetAssembler::gen_pre_barrier_stub',
 960   // another check is required as a safepoint might have been reached in the meantime (JDK-8140588).
 961   __ lbz(R12_tmp2, in_bytes(ShenandoahThreadLocalData::gc_state_offset()), R16_thread);
 962 
 963   __ andi_(R12_tmp2, R12_tmp2, ShenandoahHeap::MARKING);
 964   __ beq(CCR0, skip_barrier);
 965 
 966   /* ==== Add previous value directly to thread-local SATB mark queue ==== */
 967   // Check queue's capacity.  Jump to runtime if no free slot is available.
 968   __ ld(R12_tmp2, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()), R16_thread);
 969   __ cmpdi(CCR0, R12_tmp2, 0);
 970   __ beq(CCR0, runtime);
 971 
 972   // Capacity suffices.  Decrement the queue's size by one slot (size of one oop).
 973   __ addi(R12_tmp2, R12_tmp2, -wordSize);
 974   __ std(R12_tmp2, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()), R16_thread);
 975 
 976   // Enqueue the previous value and skip the runtime invocation.
 977   __ ld(R11_tmp1, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()), R16_thread);
 978   __ stdx(R0_pre_val, R11_tmp1, R12_tmp2);
 979   __ b(skip_barrier);
 980 
 981   __ bind(runtime);
 982 
 983   /* ==== Invoke runtime to commit SATB mark queue to gc and allocate a new buffer ==== */
 984   // Save to-be-preserved registers.
 985   const int nbytes_save = (MacroAssembler::num_volatile_regs + caller_stack_slots) * BytesPerWord;
 986   __ save_volatile_gprs(R1_SP, -nbytes_save);
 987   __ save_LR_CR(R11_tmp1);
 988   __ push_frame_reg_args(nbytes_save, R11_tmp1);
 989 
 990   // Invoke runtime.
 991   __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), R0_pre_val, R16_thread);
 992 
 993   // Restore to-be-preserved registers.
 994   __ pop_frame();
 995   __ restore_LR_CR(R11_tmp1);
 996   __ restore_volatile_gprs(R1_SP, -nbytes_save);
 997 
 998   __ bind(skip_barrier);
 999 
1000   // Restore spilled registers.
1001   __ ld(R11_tmp1, -16, R1_SP);
1002   __ ld(R12_tmp2, -24, R1_SP);
1003 
1004   __ blr();
1005   __ block_comment("} generate_c1_pre_barrier_runtime_stub (shenandoahgc)");
1006 }
1007 
1008 void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler *sasm,
1009                                                                                     DecoratorSet decorators) {
1010   __ block_comment("generate_c1_load_reference_barrier_runtime_stub (shenandoahgc) {");
1011 
1012   // Argument passing via the stack.
1013   const int caller_stack_slots = 1;
1014 
1015   // Save to-be-preserved registers.
1016   const int nbytes_save = (MacroAssembler::num_volatile_regs - 1 // 'R3_ARG1' is skipped
1017                            + caller_stack_slots) * BytesPerWord;
1018   __ save_volatile_gprs(R1_SP, -nbytes_save, true, false);
1019 
1020   // Load arguments from stack.
1021   // No load required, as assured by assertions in 'ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub'.
1022   Register R3_obj = R3_ARG1;
1023   Register R4_load_addr = R4_ARG2;
1024   __ ld(R4_load_addr, -8, R1_SP);
1025 
1026   Register R11_tmp = R11_scratch1;
1027 
1028   /* ==== Invoke runtime ==== */
1029   bool is_strong  = ShenandoahBarrierSet::is_strong_access(decorators);
1030   bool is_weak    = ShenandoahBarrierSet::is_weak_access(decorators);
1031   bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
1032   bool is_native  = ShenandoahBarrierSet::is_native_access(decorators);
1033 
1034   address jrt_address = nullptr;
1035 
1036   if (is_strong) {
1037     if (is_native) {
1038       jrt_address = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong);
1039     } else {
1040       if (UseCompressedOops) {
1041         jrt_address = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow);
1042       } else {
1043         jrt_address = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong);
1044       }
1045     }
1046   } else if (is_weak) {
1047     assert(!is_native, "weak load reference barrier must not be called off-heap");
1048     if (UseCompressedOops) {
1049       jrt_address = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow);
1050     } else {
1051       jrt_address = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak);
1052     }
1053   } else {
1054     assert(is_phantom, "reference type must be phantom");
1055     assert(is_native, "phantom load reference barrier must be called off-heap");
1056     jrt_address = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom);
1057   }
1058   assert(jrt_address != nullptr, "load reference barrier runtime routine cannot be found");
1059 
1060   __ save_LR_CR(R11_tmp);
1061   __ push_frame_reg_args(nbytes_save, R11_tmp);
1062 
1063   // Invoke runtime.  Arguments are already stored in the corresponding registers.
1064   __ call_VM_leaf(jrt_address, R3_obj, R4_load_addr);
1065 
1066   // Restore to-be-preserved registers.
1067   __ pop_frame();
1068   __ restore_LR_CR(R11_tmp);
1069   __ restore_volatile_gprs(R1_SP, -nbytes_save, true, false); // Skip 'R3_RET' register.
1070 
1071   __ blr();
1072   __ block_comment("} generate_c1_load_reference_barrier_runtime_stub (shenandoahgc)");
1073 }
1074 
1075 #undef __
1076 
1077 #endif // COMPILER1