1 /*
  2  * Copyright (c) 2018, 2024, Red Hat, Inc. All rights reserved.
  3  * Copyright (c) 2012, 2024 SAP SE. All rights reserved.
  4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  5  *
  6  * This code is free software; you can redistribute it and/or modify it
  7  * under the terms of the GNU General Public License version 2 only, as
  8  * published by the Free Software Foundation.
  9  *
 10  * This code is distributed in the hope that it will be useful, but WITHOUT
 11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 13  * version 2 for more details (a copy is included in the LICENSE file that
 14  * accompanied this code).
 15  *
 16  * You should have received a copy of the GNU General Public License version
 17  * 2 along with this work; if not, write to the Free Software Foundation,
 18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 19  *
 20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 21  * or visit www.oracle.com if you need additional information or have any
 22  * questions.
 23  *
 24  */
 25 
 26 #include "precompiled.hpp"
 27 #include "asm/macroAssembler.inline.hpp"
 28 #include "gc/shared/gcArguments.hpp"
 29 #include "gc/shared/gc_globals.hpp"
 30 #include "gc/shenandoah/shenandoahBarrierSet.hpp"
 31 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
 32 #include "gc/shenandoah/shenandoahForwarding.hpp"
 33 #include "gc/shenandoah/shenandoahHeap.hpp"
 34 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
 35 #include "gc/shenandoah/shenandoahHeapRegion.hpp"
 36 #include "gc/shenandoah/shenandoahRuntime.hpp"
 37 #include "gc/shenandoah/shenandoahThreadLocalData.hpp"
 38 #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
 39 #include "interpreter/interpreter.hpp"
 40 #include "macroAssembler_ppc.hpp"
 41 #include "runtime/javaThread.hpp"
 42 #include "runtime/sharedRuntime.hpp"
 43 #include "utilities/globalDefinitions.hpp"
 44 #include "vm_version_ppc.hpp"
 45 #ifdef COMPILER1
 46 #include "c1/c1_LIRAssembler.hpp"
 47 #include "c1/c1_MacroAssembler.hpp"
 48 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
 49 #endif
 50 
 51 #define __ masm->
 52 
 53 void ShenandoahBarrierSetAssembler::satb_write_barrier(MacroAssembler *masm,
 54                                                        Register base, RegisterOrConstant ind_or_offs,
 55                                                        Register tmp1, Register tmp2, Register tmp3,
 56                                                        MacroAssembler::PreservationLevel preservation_level) {
 57   if (ShenandoahSATBBarrier) {
 58     __ block_comment("satb_write_barrier (shenandoahgc) {");
 59     satb_write_barrier_impl(masm, 0, base, ind_or_offs, tmp1, tmp2, tmp3, preservation_level);
 60     __ block_comment("} satb_write_barrier (shenandoahgc)");
 61   }
 62 }
 63 
 64 void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler *masm, DecoratorSet decorators,
 65                                                            Register base, RegisterOrConstant ind_or_offs,
 66                                                            Register dst,
 67                                                            Register tmp1, Register tmp2,
 68                                                            MacroAssembler::PreservationLevel preservation_level) {
 69   if (ShenandoahLoadRefBarrier) {
 70     __ block_comment("load_reference_barrier (shenandoahgc) {");
 71     load_reference_barrier_impl(masm, decorators, base, ind_or_offs, dst, tmp1, tmp2, preservation_level);
 72     __ block_comment("} load_reference_barrier (shenandoahgc)");
 73   }
 74 }
 75 
 76 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler *masm, DecoratorSet decorators, BasicType type,
 77                                                        Register src, Register dst, Register count,
 78                                                        Register preserve1, Register preserve2) {
 79   __ block_comment("arraycopy_prologue (shenandoahgc) {");
 80 
 81   Register R11_tmp = R11_scratch1;
 82 
 83   assert_different_registers(src, dst, count, R11_tmp, noreg);
 84   if (preserve1 != noreg) {
 85     // Technically not required, but likely to indicate an error.
 86     assert_different_registers(preserve1, preserve2);
 87   }
 88 
 89   /* ==== Check whether barrier is required (optimizations) ==== */
 90   // Fast path: Component type of array is not a reference type.
 91   if (!is_reference_type(type)) {
 92     return;
 93   }
 94 
 95   bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
 96 
 97   // Fast path: No barrier required if for every barrier type, it is either disabled or would not store
 98   // any useful information.
 99   if ((!ShenandoahSATBBarrier || dest_uninitialized) && !ShenandoahLoadRefBarrier) {
100     return;
101   }
102 
103   Label skip_prologue;
104 
105   // Fast path: Array is of length zero.
106   __ cmpdi(CCR0, count, 0);
107   __ beq(CCR0, skip_prologue);
108 
109   /* ==== Check whether barrier is required (gc state) ==== */
110   __ lbz(R11_tmp, in_bytes(ShenandoahThreadLocalData::gc_state_offset()),
111          R16_thread);
112 
113   // The set of garbage collection states requiring barriers depends on the available barrier types and the
114   // type of the reference in question.
115   // For instance, satb barriers may be skipped if it is certain that the overridden values are not relevant
116   // for the garbage collector.
117   const int required_states = ShenandoahSATBBarrier && dest_uninitialized
118                               ? ShenandoahHeap::HAS_FORWARDED
119                               : ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::MARKING;
120 
121   __ andi_(R11_tmp, R11_tmp, required_states);
122   __ beq(CCR0, skip_prologue);
123 
124   /* ==== Invoke runtime ==== */
125   // Save to-be-preserved registers.
126   int highest_preserve_register_index = 0;
127   {
128     if (preserve1 != noreg && preserve1->is_volatile()) {
129       __ std(preserve1, -BytesPerWord * ++highest_preserve_register_index, R1_SP);
130     }
131     if (preserve2 != noreg && preserve2 != preserve1 && preserve2->is_volatile()) {
132       __ std(preserve2, -BytesPerWord * ++highest_preserve_register_index, R1_SP);
133     }
134 
135     __ std(src, -BytesPerWord * ++highest_preserve_register_index, R1_SP);
136     __ std(dst, -BytesPerWord * ++highest_preserve_register_index, R1_SP);
137     __ std(count, -BytesPerWord * ++highest_preserve_register_index, R1_SP);
138 
139     __ save_LR(R11_tmp);
140     __ push_frame_reg_args(-BytesPerWord * highest_preserve_register_index,
141                            R11_tmp);
142   }
143 
144   // Invoke runtime.
145   address jrt_address = nullptr;
146   if (UseCompressedOops) {
147     jrt_address = CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop);
148   } else {
149     jrt_address = CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop);
150   }
151   assert(jrt_address != nullptr, "jrt routine cannot be found");
152 
153   __ call_VM_leaf(jrt_address, src, dst, count);
154 
155   // Restore to-be-preserved registers.
156   {
157     __ pop_frame();
158     __ restore_LR(R11_tmp);
159 
160     __ ld(count, -BytesPerWord * highest_preserve_register_index--, R1_SP);
161     __ ld(dst, -BytesPerWord * highest_preserve_register_index--, R1_SP);
162     __ ld(src, -BytesPerWord * highest_preserve_register_index--, R1_SP);
163 
164     if (preserve2 != noreg && preserve2 != preserve1 && preserve2->is_volatile()) {
165       __ ld(preserve2, -BytesPerWord * highest_preserve_register_index--, R1_SP);
166     }
167     if (preserve1 != noreg && preserve1->is_volatile()) {
168       __ ld(preserve1, -BytesPerWord * highest_preserve_register_index--, R1_SP);
169     }
170   }
171 
172   __ bind(skip_prologue);
173   __ block_comment("} arraycopy_prologue (shenandoahgc)");
174 }
175 
176 // The to-be-enqueued value can either be determined
177 // - dynamically by passing the reference's address information (load mode) or
178 // - statically by passing a register the value is stored in (preloaded mode)
179 //   - for performance optimizations in cases where the previous value is known (currently not implemented) and
180 //   - for incremental-update barriers.
181 //
182 // decorators:  The previous value's decorator set.
183 //              In "load mode", the value must equal '0'.
184 // base:        Base register of the reference's address (load mode).
185 //              In "preloaded mode", the register must equal 'noreg'.
186 // ind_or_offs: Index or offset of the reference's address (load mode).
187 //              If 'base' equals 'noreg' (preloaded mode), the passed value is ignored.
188 // pre_val:     Register holding the to-be-stored value (preloaded mode).
189 //              In "load mode", this register acts as a temporary register and must
190 //              thus not be 'noreg'.  In "preloaded mode", its content will be sustained.
191 // tmp1/tmp2:   Temporary registers, one of which must be non-volatile in "preloaded mode".
192 void ShenandoahBarrierSetAssembler::satb_write_barrier_impl(MacroAssembler *masm, DecoratorSet decorators,
193                                                             Register base, RegisterOrConstant ind_or_offs,
194                                                             Register pre_val,
195                                                             Register tmp1, Register tmp2,
196                                                             MacroAssembler::PreservationLevel preservation_level) {
197   assert_different_registers(tmp1, tmp2, pre_val, noreg);
198 
199   Label skip_barrier;
200 
201   /* ==== Determine necessary runtime invocation preservation measures ==== */
202   const bool needs_frame           = preservation_level >= MacroAssembler::PRESERVATION_FRAME_LR;
203   const bool preserve_gp_registers = preservation_level >= MacroAssembler::PRESERVATION_FRAME_LR_GP_REGS;
204   const bool preserve_fp_registers = preservation_level >= MacroAssembler::PRESERVATION_FRAME_LR_GP_FP_REGS;
205 
206   // Check whether marking is active.
207   __ lbz(tmp1, in_bytes(ShenandoahThreadLocalData::gc_state_offset()), R16_thread);
208 
209   __ andi_(tmp1, tmp1, ShenandoahHeap::MARKING);
210   __ beq(CCR0, skip_barrier);
211 
212   /* ==== Determine the reference's previous value ==== */
213   bool preloaded_mode = base == noreg;
214   Register pre_val_save = noreg;
215 
216   if (preloaded_mode) {
217     // Previous value has been passed to the method, so it must not be determined manually.
218     // In case 'pre_val' is a volatile register, it must be saved across the C-call
219     // as callers may depend on its value.
220     // Unless the general purposes registers are saved anyway, one of the temporary registers
221     // (i.e., 'tmp1' and 'tmp2') is used to the preserve 'pre_val'.
222     if (!preserve_gp_registers && pre_val->is_volatile()) {
223       pre_val_save = !tmp1->is_volatile() ? tmp1 : tmp2;
224       assert(!pre_val_save->is_volatile(), "at least one of the temporary registers must be non-volatile");
225     }
226 
227     if ((decorators & IS_NOT_NULL) != 0) {
228 #ifdef ASSERT
229       __ cmpdi(CCR0, pre_val, 0);
230       __ asm_assert_ne("null oop is not allowed");
231 #endif // ASSERT
232     } else {
233       __ cmpdi(CCR0, pre_val, 0);
234       __ beq(CCR0, skip_barrier);
235     }
236   } else {
237     // Load from the reference address to determine the reference's current value (before the store is being performed).
238     // Contrary to the given value in "preloaded mode", it is not necessary to preserve it.
239     assert(decorators == 0, "decorator set must be empty");
240     assert(base != noreg, "base must be a register");
241     assert(!ind_or_offs.is_register() || ind_or_offs.as_register() != noreg, "ind_or_offs must be a register");
242     if (UseCompressedOops) {
243       __ lwz(pre_val, ind_or_offs, base);
244     } else {
245       __ ld(pre_val, ind_or_offs, base);
246     }
247 
248     __ cmpdi(CCR0, pre_val, 0);
249     __ beq(CCR0, skip_barrier);
250 
251     if (UseCompressedOops) {
252       __ decode_heap_oop_not_null(pre_val);
253     }
254   }
255 
256   /* ==== Try to enqueue the to-be-stored value directly into thread's local SATB mark queue ==== */
257   {
258     Label runtime;
259     Register Rbuffer = tmp1, Rindex = tmp2;
260 
261     // Check whether the queue has enough capacity to store another oop.
262     // If not, jump to the runtime to commit the buffer and to allocate a new one.
263     // (The buffer's index corresponds to the amount of remaining free space.)
264     __ ld(Rindex, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()), R16_thread);
265     __ cmpdi(CCR0, Rindex, 0);
266     __ beq(CCR0, runtime); // If index == 0 (buffer is full), goto runtime.
267 
268     // Capacity suffices.  Decrement the queue's size by the size of one oop.
269     // (The buffer is filled contrary to the heap's growing direction, i.e., it is filled downwards.)
270     __ addi(Rindex, Rindex, -wordSize);
271     __ std(Rindex, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()), R16_thread);
272 
273     // Enqueue the previous value and skip the invocation of the runtime.
274     __ ld(Rbuffer, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()), R16_thread);
275     __ stdx(pre_val, Rbuffer, Rindex);
276     __ b(skip_barrier);
277 
278     __ bind(runtime);
279   }
280 
281   /* ==== Invoke runtime to commit SATB mark queue to gc and allocate a new buffer ==== */
282   // Save to-be-preserved registers.
283   int nbytes_save = 0;
284 
285   if (needs_frame) {
286     if (preserve_gp_registers) {
287       nbytes_save = (preserve_fp_registers
288                      ? MacroAssembler::num_volatile_gp_regs + MacroAssembler::num_volatile_fp_regs
289                      : MacroAssembler::num_volatile_gp_regs) * BytesPerWord;
290       __ save_volatile_gprs(R1_SP, -nbytes_save, preserve_fp_registers);
291     }
292 
293     __ save_LR(tmp1);
294     __ push_frame_reg_args(nbytes_save, tmp2);
295   }
296 
297   if (!preserve_gp_registers && preloaded_mode && pre_val->is_volatile()) {
298     assert(pre_val_save != noreg, "nv_save must not be noreg");
299 
300     // 'pre_val' register must be saved manually unless general-purpose are preserved in general.
301     __ mr(pre_val_save, pre_val);
302   }
303 
304   // Invoke runtime.
305   __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre), pre_val, R16_thread);
306 
307   // Restore to-be-preserved registers.
308   if (!preserve_gp_registers && preloaded_mode && pre_val->is_volatile()) {
309     __ mr(pre_val, pre_val_save);
310   }
311 
312   if (needs_frame) {
313     __ pop_frame();
314     __ restore_LR(tmp1);
315 
316     if (preserve_gp_registers) {
317       __ restore_volatile_gprs(R1_SP, -nbytes_save, preserve_fp_registers);
318     }
319   }
320 
321   __ bind(skip_barrier);
322 }
323 
324 void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssembler *masm, Register dst, Register tmp) {
325   __ block_comment("resolve_forward_pointer_not_null (shenandoahgc) {");
326 
327   Register tmp1 = tmp,
328            R0_tmp2 = R0;
329   assert_different_registers(dst, tmp1, R0_tmp2, noreg);
330 
331   // If the object has been evacuated, the mark word layout is as follows:
332   // | forwarding pointer (62-bit) | '11' (2-bit) |
333 
334   // The invariant that stack/thread pointers have the lowest two bits cleared permits retrieving
335   // the forwarding pointer solely by inversing the lowest two bits.
336   // This invariant follows inevitably from hotspot's minimal alignment.
337   assert(markWord::marked_value <= (unsigned long) MinObjAlignmentInBytes,
338          "marked value must not be higher than hotspot's minimal alignment");
339 
340   Label done;
341 
342   // Load the object's mark word.
343   __ ld(tmp1, oopDesc::mark_offset_in_bytes(), dst);
344 
345   // Load the bit mask for the lock bits.
346   __ li(R0_tmp2, markWord::lock_mask_in_place);
347 
348   // Check whether all bits matching the bit mask are set.
349   // If that is the case, the object has been evacuated and the most significant bits form the forward pointer.
350   __ andc_(R0_tmp2, R0_tmp2, tmp1);
351 
352   assert(markWord::lock_mask_in_place == markWord::marked_value,
353          "marked value must equal the value obtained when all lock bits are being set");
354   if (VM_Version::has_isel()) {
355     __ xori(tmp1, tmp1, markWord::lock_mask_in_place);
356     __ isel(dst, CCR0, Assembler::equal, false, tmp1);
357   } else {
358     __ bne(CCR0, done);
359     __ xori(dst, tmp1, markWord::lock_mask_in_place);
360   }
361 
362   __ bind(done);
363   __ block_comment("} resolve_forward_pointer_not_null (shenandoahgc)");
364 }
365 
366 // base:        Base register of the reference's address.
367 // ind_or_offs: Index or offset of the reference's address (load mode).
368 // dst:         Reference's address.  In case the object has been evacuated, this is the to-space version
369 //              of that object.
370 void ShenandoahBarrierSetAssembler::load_reference_barrier_impl(
371     MacroAssembler *masm, DecoratorSet decorators,
372     Register base, RegisterOrConstant ind_or_offs,
373     Register dst,
374     Register tmp1, Register tmp2,
375     MacroAssembler::PreservationLevel preservation_level) {
376   if (ind_or_offs.is_register()) {
377     assert_different_registers(tmp1, tmp2, base, ind_or_offs.as_register(), dst, noreg);
378   } else {
379     assert_different_registers(tmp1, tmp2, base, dst, noreg);
380   }
381 
382   Label skip_barrier;
383 
384   bool is_strong  = ShenandoahBarrierSet::is_strong_access(decorators);
385   bool is_weak    = ShenandoahBarrierSet::is_weak_access(decorators);
386   bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
387   bool is_native  = ShenandoahBarrierSet::is_native_access(decorators);
388   bool is_narrow  = UseCompressedOops && !is_native;
389 
390   /* ==== Check whether heap is stable ==== */
391   __ lbz(tmp2, in_bytes(ShenandoahThreadLocalData::gc_state_offset()), R16_thread);
392 
393   if (is_strong) {
394     // For strong references, the heap is considered stable if "has forwarded" is not active.
395     __ andi_(tmp1, tmp2, ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::EVACUATION);
396     __ beq(CCR0, skip_barrier);
397 #ifdef ASSERT
398     // "evacuation" -> (implies) "has forwarded".  If we reach this code, "has forwarded" must thus be set.
399     __ andi_(tmp1, tmp1, ShenandoahHeap::HAS_FORWARDED);
400     __ asm_assert_ne("'has forwarded' is missing");
401 #endif // ASSERT
402   } else {
403     // For all non-strong references, the heap is considered stable if not any of "has forwarded",
404     // "root set processing", and "weak reference processing" is active.
405     // The additional phase conditions are in place to avoid the resurrection of weak references (see JDK-8266440).
406     Label skip_fastpath;
407     __ andi_(tmp1, tmp2, ShenandoahHeap::WEAK_ROOTS);
408     __ bne(CCR0, skip_fastpath);
409 
410     __ andi_(tmp1, tmp2, ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::EVACUATION);
411     __ beq(CCR0, skip_barrier);
412 #ifdef ASSERT
413     // "evacuation" -> (implies) "has forwarded".  If we reach this code, "has forwarded" must thus be set.
414     __ andi_(tmp1, tmp1, ShenandoahHeap::HAS_FORWARDED);
415     __ asm_assert_ne("'has forwarded' is missing");
416 #endif // ASSERT
417 
418     __ bind(skip_fastpath);
419   }
420 
421   /* ==== Check whether region is in collection set ==== */
422   if (is_strong) {
423     // Shenandoah stores metadata on regions in a continuous area of memory in which a single byte corresponds to
424     // an entire region of the shenandoah heap.  At present, only the least significant bit is of significance
425     // and indicates whether the region is part of the collection set.
426     //
427     // All regions are of the same size and are always aligned by a power of two.
428     // Any address can thus be shifted by a fixed number of bits to retrieve the address prefix shared by
429     // all objects within that region (region identification bits).
430     //
431     //  | unused bits | region identification bits | object identification bits |
432     //  (Region size depends on a couple of criteria, such as page size, user-provided arguments and the max heap size.
433     //   The number of object identification bits can thus not be determined at compile time.)
434     //
435     // -------------------------------------------------------  <--- cs (collection set) base address
436     // | lost space due to heap space base address                   -> 'ShenandoahHeap::in_cset_fast_test_addr()'
437     // | (region identification bits contain heap base offset)
438     // |------------------------------------------------------  <--- cs base address + (heap_base >> region size shift)
439     // | collection set in the proper                                -> shift: 'region_size_bytes_shift_jint()'
440     // |
441     // |------------------------------------------------------  <--- cs base address + (heap_base >> region size shift)
442     //                                                                               + number of regions
443     __ load_const_optimized(tmp2, ShenandoahHeap::in_cset_fast_test_addr(), tmp1);
444     __ srdi(tmp1, dst, ShenandoahHeapRegion::region_size_bytes_shift_jint());
445     __ lbzx(tmp2, tmp1, tmp2);
446     __ andi_(tmp2, tmp2, 1);
447     __ beq(CCR0, skip_barrier);
448   }
449 
450   /* ==== Invoke runtime ==== */
451   // Save to-be-preserved registers.
452   int nbytes_save = 0;
453 
454   const bool needs_frame           = preservation_level >= MacroAssembler::PRESERVATION_FRAME_LR;
455   const bool preserve_gp_registers = preservation_level >= MacroAssembler::PRESERVATION_FRAME_LR_GP_REGS;
456   const bool preserve_fp_registers = preservation_level >= MacroAssembler::PRESERVATION_FRAME_LR_GP_FP_REGS;
457 
458   if (needs_frame) {
459     if (preserve_gp_registers) {
460       nbytes_save = (preserve_fp_registers
461                      ? MacroAssembler::num_volatile_gp_regs + MacroAssembler::num_volatile_fp_regs
462                      : MacroAssembler::num_volatile_gp_regs) * BytesPerWord;
463       __ save_volatile_gprs(R1_SP, -nbytes_save, preserve_fp_registers);
464     }
465 
466     __ save_LR(tmp1);
467     __ push_frame_reg_args(nbytes_save, tmp1);
468   }
469 
470   // Calculate the reference's absolute address.
471   __ add(R4_ARG2, ind_or_offs, base);
472 
473   // Invoke runtime.
474   address jrt_address = nullptr;
475 
476   if (is_strong) {
477     if (is_narrow) {
478       jrt_address = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow);
479     } else {
480       jrt_address = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong);
481     }
482   } else if (is_weak) {
483     if (is_narrow) {
484       jrt_address = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow);
485     } else {
486       jrt_address = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak);
487     }
488   } else {
489     assert(is_phantom, "only remaining strength");
490     assert(!is_narrow, "phantom access cannot be narrow");
491     jrt_address = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom);
492   }
493   assert(jrt_address != nullptr, "jrt routine cannot be found");
494 
495   __ call_VM_leaf(jrt_address, dst /* reference */, R4_ARG2 /* reference address */);
496 
497   // Restore to-be-preserved registers.
498   if (preserve_gp_registers) {
499     __ mr(R0, R3_RET);
500   } else {
501     __ mr_if_needed(dst, R3_RET);
502   }
503 
504   if (needs_frame) {
505     __ pop_frame();
506     __ restore_LR(tmp1);
507 
508     if (preserve_gp_registers) {
509       __ restore_volatile_gprs(R1_SP, -nbytes_save, preserve_fp_registers);
510       __ mr(dst, R0);
511     }
512   }
513 
514   __ bind(skip_barrier);
515 }
516 
517 // base:           Base register of the reference's address.
518 // ind_or_offs:    Index or offset of the reference's address.
519 // L_handle_null:  An optional label that will be jumped to if the reference is null.
520 void ShenandoahBarrierSetAssembler::load_at(
521     MacroAssembler *masm, DecoratorSet decorators, BasicType type,
522     Register base, RegisterOrConstant ind_or_offs, Register dst,
523     Register tmp1, Register tmp2,
524     MacroAssembler::PreservationLevel preservation_level, Label *L_handle_null) {
525   // Register must not clash, except 'base' and 'dst'.
526   if (ind_or_offs.is_register()) {
527     if (base != noreg) {
528       assert_different_registers(tmp1, tmp2, base, ind_or_offs.register_or_noreg(), R0, noreg);
529     }
530     assert_different_registers(tmp1, tmp2, dst, ind_or_offs.register_or_noreg(), R0, noreg);
531   } else {
532     if (base == noreg) {
533       assert_different_registers(tmp1, tmp2, base, R0, noreg);
534     }
535     assert_different_registers(tmp1, tmp2, dst, R0, noreg);
536   }
537 
538   /* ==== Apply load barrier, if required ==== */
539   if (ShenandoahBarrierSet::need_load_reference_barrier(decorators, type)) {
540     assert(is_reference_type(type), "need_load_reference_barrier must check whether type is a reference type");
541 
542     // If 'dst' clashes with either 'base' or 'ind_or_offs', use an intermediate result register
543     // to keep the values of those alive until the load reference barrier is applied.
544     Register intermediate_dst = (dst == base || (ind_or_offs.is_register() && dst == ind_or_offs.as_register()))
545                                 ? tmp2
546                                 : dst;
547 
548     BarrierSetAssembler::load_at(masm, decorators, type,
549                                  base, ind_or_offs,
550                                  intermediate_dst,
551                                  tmp1, noreg,
552                                  preservation_level, L_handle_null);
553 
554     load_reference_barrier(masm, decorators,
555                            base, ind_or_offs,
556                            intermediate_dst,
557                            tmp1, R0,
558                            preservation_level);
559 
560     __ mr_if_needed(dst, intermediate_dst);
561   } else {
562     BarrierSetAssembler::load_at(masm, decorators, type,
563                                  base, ind_or_offs,
564                                  dst,
565                                  tmp1, tmp2,
566                                  preservation_level, L_handle_null);
567   }
568 
569   /* ==== Apply keep-alive barrier, if required (e.g., to inhibit weak reference resurrection) ==== */
570   if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) {
571     if (ShenandoahSATBBarrier) {
572       __ block_comment("keep_alive_barrier (shenandoahgc) {");
573       satb_write_barrier_impl(masm, 0, noreg, noreg, dst, tmp1, tmp2, preservation_level);
574       __ block_comment("} keep_alive_barrier (shenandoahgc)");
575     }
576   }
577 }
578 
579 // base:        Base register of the reference's address.
580 // ind_or_offs: Index or offset of the reference's address.
581 // val:         To-be-stored value/reference's new value.
582 void ShenandoahBarrierSetAssembler::store_at(MacroAssembler *masm, DecoratorSet decorators, BasicType type,
583                                              Register base, RegisterOrConstant ind_or_offs, Register val,
584                                              Register tmp1, Register tmp2, Register tmp3,
585                                              MacroAssembler::PreservationLevel preservation_level) {
586   if (is_reference_type(type)) {
587     if (ShenandoahSATBBarrier) {
588       satb_write_barrier(masm, base, ind_or_offs, tmp1, tmp2, tmp3, preservation_level);
589     }
590   }
591 
592   BarrierSetAssembler::store_at(masm, decorators, type,
593                                 base, ind_or_offs,
594                                 val,
595                                 tmp1, tmp2, tmp3,
596                                 preservation_level);
597 }
598 
599 void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler *masm,
600                                                                   Register dst, Register jni_env, Register obj,
601                                                                   Register tmp, Label &slowpath) {
602   __ block_comment("try_resolve_jobject_in_native (shenandoahgc) {");
603 
604   assert_different_registers(jni_env, obj, tmp);
605 
606   Label done;
607 
608   // Fast path: Reference is null (JNI tags are zero for null pointers).
609   __ cmpdi(CCR0, obj, 0);
610   __ beq(CCR0, done);
611 
612   // Resolve jobject using standard implementation.
613   BarrierSetAssembler::try_resolve_jobject_in_native(masm, dst, jni_env, obj, tmp, slowpath);
614 
615   // Check whether heap is stable.
616   __ lbz(tmp,
617          in_bytes(ShenandoahThreadLocalData::gc_state_offset() - JavaThread::jni_environment_offset()),
618          jni_env);
619 
620   __ andi_(tmp, tmp, ShenandoahHeap::EVACUATION | ShenandoahHeap::HAS_FORWARDED);
621   __ bne(CCR0, slowpath);
622 
623   __ bind(done);
624   __ block_comment("} try_resolve_jobject_in_native (shenandoahgc)");
625 }
626 
627 // Special shenandoah CAS implementation that handles false negatives due
628 // to concurrent evacuation.  That is, the CAS operation is intended to succeed in
629 // the following scenarios (success criteria):
630 //  s1) The reference pointer ('base_addr') equals the expected ('expected') pointer.
631 //  s2) The reference pointer refers to the from-space version of an already-evacuated
632 //      object, whereas the expected pointer refers to the to-space version of the same object.
633 // Situations in which the reference pointer refers to the to-space version of an object
634 // and the expected pointer refers to the from-space version of the same object can not occur due to
635 // shenandoah's strong to-space invariant.  This also implies that the reference stored in 'new_val'
636 // can not refer to the from-space version of an already-evacuated object.
637 //
638 // To guarantee correct behavior in concurrent environments, two races must be addressed:
639 //  r1) A concurrent thread may heal the reference pointer (i.e., it is no longer referring to the
640 //      from-space version but to the to-space version of the object in question).
641 //      In this case, the CAS operation should succeed.
642 //  r2) A concurrent thread may mutate the reference (i.e., the reference pointer refers to an entirely different object).
643 //      In this case, the CAS operation should fail.
644 //
645 // By default, the value held in the 'result' register is zero to indicate failure of CAS,
646 // non-zero to indicate success.  If 'is_cae' is set, the result is the most recently fetched
647 // value from 'base_addr' rather than a boolean success indicator.
648 void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler *masm, Register base_addr,
649                                                 Register expected, Register new_val, Register tmp1, Register tmp2,
650                                                 bool is_cae, Register result) {
651   __ block_comment("cmpxchg_oop (shenandoahgc) {");
652 
653   assert_different_registers(base_addr, new_val, tmp1, tmp2, result, R0);
654   assert_different_registers(base_addr, expected, tmp1, tmp2, result, R0);
655 
656   // Potential clash of 'success_flag' and 'tmp' is being accounted for.
657   Register success_flag  = is_cae ? noreg  : result,
658            current_value = is_cae ? result : tmp1,
659            tmp           = is_cae ? tmp1   : result,
660            initial_value = tmp2;
661 
662   Label done, step_four;
663 
664   __ bind(step_four);
665 
666   /* ==== Step 1 ("Standard" CAS) ==== */
667   // Fast path: The values stored in 'expected' and 'base_addr' are equal.
668   // Given that 'expected' must refer to the to-space object of an evacuated object (strong to-space invariant),
669   // no special processing is required.
670   if (UseCompressedOops) {
671     __ cmpxchgw(CCR0, current_value, expected, new_val, base_addr, MacroAssembler::MemBarNone,
672                 false, success_flag, nullptr, true);
673   } else {
674     __ cmpxchgd(CCR0, current_value, expected, new_val, base_addr, MacroAssembler::MemBarNone,
675                 false, success_flag, nullptr, true);
676   }
677 
678   // Skip the rest of the barrier if the CAS operation succeeds immediately.
679   // If it does not, the value stored at the address is either the from-space pointer of the
680   // referenced object (success criteria s2)) or simply another object.
681   __ beq(CCR0, done);
682 
683   /* ==== Step 2 (Null check) ==== */
684   // The success criteria s2) cannot be matched with a null pointer
685   // (null pointers cannot be subject to concurrent evacuation).  The failure of the CAS operation is thus legitimate.
686   __ cmpdi(CCR0, current_value, 0);
687   __ beq(CCR0, done);
688 
689   /* ==== Step 3 (reference pointer refers to from-space version; success criteria s2)) ==== */
690   // To check whether the reference pointer refers to the from-space version, the forward
691   // pointer of the object referred to by the reference is resolved and compared against the expected pointer.
692   // If this check succeed, another CAS operation is issued with the from-space pointer being the expected pointer.
693   //
694   // Save the potential from-space pointer.
695   __ mr(initial_value, current_value);
696 
697   // Resolve forward pointer.
698   if (UseCompressedOops) { __ decode_heap_oop_not_null(current_value); }
699   resolve_forward_pointer_not_null(masm, current_value, tmp);
700   if (UseCompressedOops) { __ encode_heap_oop_not_null(current_value); }
701 
702   if (!is_cae) {
703     // 'success_flag' was overwritten by call to 'resovle_forward_pointer_not_null'.
704     // Load zero into register for the potential failure case.
705     __ li(success_flag, 0);
706   }
707   __ cmpd(CCR0, current_value, expected);
708   __ bne(CCR0, done);
709 
710   // Discard fetched value as it might be a reference to the from-space version of an object.
711   if (UseCompressedOops) {
712     __ cmpxchgw(CCR0, R0, initial_value, new_val, base_addr, MacroAssembler::MemBarNone,
713                 false, success_flag);
714   } else {
715     __ cmpxchgd(CCR0, R0, initial_value, new_val, base_addr, MacroAssembler::MemBarNone,
716                 false, success_flag);
717   }
718 
719   /* ==== Step 4 (Retry CAS with to-space pointer (success criteria s2) under race r1)) ==== */
720   // The reference pointer could have been healed whilst the previous CAS operation was being performed.
721   // Another CAS operation must thus be issued with the to-space pointer being the expected pointer.
722   // If that CAS operation fails as well, race r2) must have occurred, indicating that
723   // the operation failure is legitimate.
724   //
725   // To keep the code's size small and thus improving cache (icache) performance, this highly
726   // unlikely case should be handled by the smallest possible code.  Instead of emitting a third,
727   // explicit CAS operation, the code jumps back and reuses the first CAS operation (step 1)
728   // (passed arguments are identical).
729   //
730   // A failure of the CAS operation in step 1 would imply that the overall CAS operation is supposed
731   // to fail.  Jumping back to step 1 requires, however, that step 2 and step 3 are re-executed as well.
732   // It is thus important to ensure that a re-execution of those steps does not put program correctness
733   // at risk:
734   // - Step 2: Either terminates in failure (desired result) or falls through to step 3.
735   // - Step 3: Terminates if the comparison between the forwarded, fetched pointer and the expected value
736   //           fails.  Unless the reference has been updated in the meanwhile once again, this is
737   //           guaranteed to be the case.
738   //           In case of a concurrent update, the CAS would be retried again. This is legitimate
739   //           in terms of program correctness (even though it is not desired).
740   __ bne(CCR0, step_four);
741 
742   __ bind(done);
743   __ block_comment("} cmpxchg_oop (shenandoahgc)");
744 }
745 
746 #undef __
747 
748 #ifdef COMPILER1
749 
750 #define __ ce->masm()->
751 
752 void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler *ce, ShenandoahPreBarrierStub *stub) {
753   __ block_comment("gen_pre_barrier_stub (shenandoahgc) {");
754 
755   ShenandoahBarrierSetC1 *bs = (ShenandoahBarrierSetC1*) BarrierSet::barrier_set()->barrier_set_c1();
756   __ bind(*stub->entry());
757 
758   // GC status has already been verified by 'ShenandoahBarrierSetC1::pre_barrier'.
759   // This stub is the slowpath of that function.
760 
761   assert(stub->pre_val()->is_register(), "pre_val must be a register");
762   Register pre_val = stub->pre_val()->as_register();
763 
764   // If 'do_load()' returns false, the to-be-stored value is already available in 'stub->pre_val()'
765   // ("preloaded mode" of the store barrier).
766   if (stub->do_load()) {
767     ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false);
768   }
769 
770   // Fast path: Reference is null.
771   __ cmpdi(CCR0, pre_val, 0);
772   __ bc_far_optimized(Assembler::bcondCRbiIs1_bhintNoHint, __ bi0(CCR0, Assembler::equal), *stub->continuation());
773 
774   // Argument passing via the stack.
775   __ std(pre_val, -8, R1_SP);
776 
777   __ load_const_optimized(R0, bs->pre_barrier_c1_runtime_code_blob()->code_begin());
778   __ call_stub(R0);
779 
780   __ b(*stub->continuation());
781   __ block_comment("} gen_pre_barrier_stub (shenandoahgc)");
782 }
783 
784 void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler *ce,
785                                                                     ShenandoahLoadReferenceBarrierStub *stub) {
786   __ block_comment("gen_load_reference_barrier_stub (shenandoahgc) {");
787 
788   ShenandoahBarrierSetC1 *bs = (ShenandoahBarrierSetC1*) BarrierSet::barrier_set()->barrier_set_c1();
789   __ bind(*stub->entry());
790 
791   Register obj  = stub->obj()->as_register();
792   Register res  = stub->result()->as_register();
793   Register addr = stub->addr()->as_pointer_register();
794   Register tmp1 = stub->tmp1()->as_register();
795   Register tmp2 = stub->tmp2()->as_register();
796   assert_different_registers(addr, res, tmp1, tmp2);
797 
798 #ifdef ASSERT
799   // Ensure that 'res' is 'R3_ARG1' and contains the same value as 'obj' to reduce the number of required
800   // copy instructions.
801   assert(R3_RET == res, "res must be r3");
802   __ cmpd(CCR0, res, obj);
803   __ asm_assert_eq("result register must contain the reference stored in obj");
804 #endif
805 
806   DecoratorSet decorators = stub->decorators();
807 
808   /* ==== Check whether region is in collection set ==== */
809   // GC status (unstable) has already been verified by 'ShenandoahBarrierSetC1::load_reference_barrier_impl'.
810   // This stub is the slowpath of that function.
811 
812   bool is_strong  = ShenandoahBarrierSet::is_strong_access(decorators);
813   bool is_weak    = ShenandoahBarrierSet::is_weak_access(decorators);
814   bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
815   bool is_native  = ShenandoahBarrierSet::is_native_access(decorators);
816 
817   if (is_strong) {
818     // Check whether object is in collection set.
819     __ load_const_optimized(tmp2, ShenandoahHeap::in_cset_fast_test_addr(), tmp1);
820     __ srdi(tmp1, obj, ShenandoahHeapRegion::region_size_bytes_shift_jint());
821     __ lbzx(tmp2, tmp1, tmp2);
822 
823     __ andi_(tmp2, tmp2, 1);
824     __ bc_far_optimized(Assembler::bcondCRbiIs1_bhintNoHint, __ bi0(CCR0, Assembler::equal), *stub->continuation());
825   }
826 
827   address blob_addr = nullptr;
828 
829   if (is_strong) {
830     if (is_native) {
831       blob_addr = bs->load_reference_barrier_strong_native_rt_code_blob()->code_begin();
832     } else {
833       blob_addr = bs->load_reference_barrier_strong_rt_code_blob()->code_begin();
834     }
835   } else if (is_weak) {
836     blob_addr = bs->load_reference_barrier_weak_rt_code_blob()->code_begin();
837   } else {
838     assert(is_phantom, "only remaining strength");
839     blob_addr = bs->load_reference_barrier_phantom_rt_code_blob()->code_begin();
840   }
841 
842   assert(blob_addr != nullptr, "code blob cannot be found");
843 
844   // Argument passing via the stack.  'obj' is passed implicitly (as asserted above).
845   __ std(addr, -8, R1_SP);
846 
847   __ load_const_optimized(tmp1, blob_addr, tmp2);
848   __ call_stub(tmp1);
849 
850   // 'res' is 'R3_RET'.  The result is thus already in the correct register.
851 
852   __ b(*stub->continuation());
853   __ block_comment("} gen_load_reference_barrier_stub (shenandoahgc)");
854 }
855 
856 #undef __
857 
858 #define __ sasm->
859 
860 void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler *sasm) {
861   __ block_comment("generate_c1_pre_barrier_runtime_stub (shenandoahgc) {");
862 
863   Label runtime, skip_barrier;
864   BarrierSet *bs = BarrierSet::barrier_set();
865 
866   // Argument passing via the stack.
867   const int caller_stack_slots = 3;
868 
869   Register R0_pre_val = R0;
870   __ ld(R0, -8, R1_SP);
871   Register R11_tmp1 = R11_scratch1;
872   __ std(R11_tmp1, -16, R1_SP);
873   Register R12_tmp2 = R12_scratch2;
874   __ std(R12_tmp2, -24, R1_SP);
875 
876   /* ==== Check whether marking is active ==== */
877   // Even though gc status was checked in 'ShenandoahBarrierSetAssembler::gen_pre_barrier_stub',
878   // another check is required as a safepoint might have been reached in the meantime (JDK-8140588).
879   __ lbz(R12_tmp2, in_bytes(ShenandoahThreadLocalData::gc_state_offset()), R16_thread);
880 
881   __ andi_(R12_tmp2, R12_tmp2, ShenandoahHeap::MARKING);
882   __ beq(CCR0, skip_barrier);
883 
884   /* ==== Add previous value directly to thread-local SATB mark queue ==== */
885   // Check queue's capacity.  Jump to runtime if no free slot is available.
886   __ ld(R12_tmp2, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()), R16_thread);
887   __ cmpdi(CCR0, R12_tmp2, 0);
888   __ beq(CCR0, runtime);
889 
890   // Capacity suffices.  Decrement the queue's size by one slot (size of one oop).
891   __ addi(R12_tmp2, R12_tmp2, -wordSize);
892   __ std(R12_tmp2, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()), R16_thread);
893 
894   // Enqueue the previous value and skip the runtime invocation.
895   __ ld(R11_tmp1, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()), R16_thread);
896   __ stdx(R0_pre_val, R11_tmp1, R12_tmp2);
897   __ b(skip_barrier);
898 
899   __ bind(runtime);
900 
901   /* ==== Invoke runtime to commit SATB mark queue to gc and allocate a new buffer ==== */
902   // Save to-be-preserved registers.
903   const int nbytes_save = (MacroAssembler::num_volatile_regs + caller_stack_slots) * BytesPerWord;
904   __ save_volatile_gprs(R1_SP, -nbytes_save);
905   __ save_LR(R11_tmp1);
906   __ push_frame_reg_args(nbytes_save, R11_tmp1);
907 
908   // Invoke runtime.
909   __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre), R0_pre_val, R16_thread);
910 
911   // Restore to-be-preserved registers.
912   __ pop_frame();
913   __ restore_LR(R11_tmp1);
914   __ restore_volatile_gprs(R1_SP, -nbytes_save);
915 
916   __ bind(skip_barrier);
917 
918   // Restore spilled registers.
919   __ ld(R11_tmp1, -16, R1_SP);
920   __ ld(R12_tmp2, -24, R1_SP);
921 
922   __ blr();
923   __ block_comment("} generate_c1_pre_barrier_runtime_stub (shenandoahgc)");
924 }
925 
926 void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler *sasm,
927                                                                                     DecoratorSet decorators) {
928   __ block_comment("generate_c1_load_reference_barrier_runtime_stub (shenandoahgc) {");
929 
930   // Argument passing via the stack.
931   const int caller_stack_slots = 1;
932 
933   // Save to-be-preserved registers.
934   const int nbytes_save = (MacroAssembler::num_volatile_regs - 1 // 'R3_ARG1' is skipped
935                            + caller_stack_slots) * BytesPerWord;
936   __ save_volatile_gprs(R1_SP, -nbytes_save, true, false);
937 
938   // Load arguments from stack.
939   // No load required, as assured by assertions in 'ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub'.
940   Register R3_obj = R3_ARG1;
941   Register R4_load_addr = R4_ARG2;
942   __ ld(R4_load_addr, -8, R1_SP);
943 
944   Register R11_tmp = R11_scratch1;
945 
946   /* ==== Invoke runtime ==== */
947   bool is_strong  = ShenandoahBarrierSet::is_strong_access(decorators);
948   bool is_weak    = ShenandoahBarrierSet::is_weak_access(decorators);
949   bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
950   bool is_native  = ShenandoahBarrierSet::is_native_access(decorators);
951 
952   address jrt_address = nullptr;
953 
954   if (is_strong) {
955     if (is_native) {
956       jrt_address = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong);
957     } else {
958       if (UseCompressedOops) {
959         jrt_address = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow);
960       } else {
961         jrt_address = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong);
962       }
963     }
964   } else if (is_weak) {
965     assert(!is_native, "weak load reference barrier must not be called off-heap");
966     if (UseCompressedOops) {
967       jrt_address = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow);
968     } else {
969       jrt_address = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak);
970     }
971   } else {
972     assert(is_phantom, "reference type must be phantom");
973     assert(is_native, "phantom load reference barrier must be called off-heap");
974     jrt_address = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom);
975   }
976   assert(jrt_address != nullptr, "load reference barrier runtime routine cannot be found");
977 
978   __ save_LR(R11_tmp);
979   __ push_frame_reg_args(nbytes_save, R11_tmp);
980 
981   // Invoke runtime.  Arguments are already stored in the corresponding registers.
982   __ call_VM_leaf(jrt_address, R3_obj, R4_load_addr);
983 
984   // Restore to-be-preserved registers.
985   __ pop_frame();
986   __ restore_LR(R11_tmp);
987   __ restore_volatile_gprs(R1_SP, -nbytes_save, true, false); // Skip 'R3_RET' register.
988 
989   __ blr();
990   __ block_comment("} generate_c1_load_reference_barrier_runtime_stub (shenandoahgc)");
991 }
992 
993 #undef __
994 
995 #endif // COMPILER1