1 /*
  2  * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
  3  * Copyright (c) 2018, 2020, Red Hat, Inc. All rights reserved.
  4  * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
  5  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  6  *
  7  * This code is free software; you can redistribute it and/or modify it
  8  * under the terms of the GNU General Public License version 2 only, as
  9  * published by the Free Software Foundation.
 10  *
 11  * This code is distributed in the hope that it will be useful, but WITHOUT
 12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 14  * version 2 for more details (a copy is included in the LICENSE file that
 15  * accompanied this code).
 16  *
 17  * You should have received a copy of the GNU General Public License version
 18  * 2 along with this work; if not, write to the Free Software Foundation,
 19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 20  *
 21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 22  * or visit www.oracle.com if you need additional information or have any
 23  * questions.
 24  *
 25  */
 26 
 27 #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
 28 #include "gc/shenandoah/mode/shenandoahMode.hpp"
 29 #include "gc/shenandoah/shenandoahBarrierSet.hpp"
 30 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
 31 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
 32 #include "gc/shenandoah/shenandoahHeapRegion.hpp"
 33 #include "gc/shenandoah/shenandoahRuntime.hpp"
 34 #include "gc/shenandoah/shenandoahThreadLocalData.hpp"
 35 #include "interpreter/interp_masm.hpp"
 36 #include "interpreter/interpreter.hpp"
 37 #include "runtime/javaThread.hpp"
 38 #include "runtime/sharedRuntime.hpp"
 39 #ifdef COMPILER1
 40 #include "c1/c1_LIRAssembler.hpp"
 41 #include "c1/c1_MacroAssembler.hpp"
 42 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
 43 #endif
 44 #ifdef COMPILER2
 45 #include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp"
 46 #include "opto/output.hpp"
 47 #endif
 48 
 49 #define __ masm->
 50 
 51 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
 52                                                        Register src, Register dst, Register count, RegSet saved_regs) {
 53   if (is_oop) {
 54     bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
 55     if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahLoadRefBarrier) {
 56 
 57       Label done;
 58 
 59       // Avoid calling runtime if count == 0
 60       __ beqz(count, done);
 61 
 62       // Is GC active?
 63       Address gc_state(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
 64       assert_different_registers(src, dst, count, t0);
 65 
 66       assert(!saved_regs.contains(t0), "Sanity: about to clobber t0");
 67 
 68       __ lbu(t0, gc_state);
 69       if (ShenandoahSATBBarrier && dest_uninitialized) {
 70         __ test_bit(t0, t0, ShenandoahHeap::HAS_FORWARDED_BITPOS);
 71         __ beqz(t0, done);
 72       } else {
 73         __ andi(t0, t0, ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::MARKING);
 74         __ beqz(t0, done);
 75       }
 76 
 77       __ push_call_clobbered_registers();
 78       // If arguments are not in proper places, shuffle them.
 79       // Doing this via the stack is the most straight-forward way to avoid
 80       // accidentally smashing any register.
 81       if (c_rarg0 != src || c_rarg1 != dst || c_rarg2 != count) {
 82         __ push_reg(RegSet::of(src), sp);
 83         __ push_reg(RegSet::of(dst), sp);
 84         __ push_reg(RegSet::of(count), sp);
 85         __ pop_reg(RegSet::of(c_rarg2), sp);
 86         __ pop_reg(RegSet::of(c_rarg1), sp);
 87         __ pop_reg(RegSet::of(c_rarg0), sp);
 88       }
 89       address target = nullptr;
 90       if (UseCompressedOops) {
 91         target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop);
 92       } else {
 93         target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop);
 94       }
 95       __ call_VM_leaf(target, 3);
 96       __ pop_call_clobbered_registers();
 97       __ bind(done);
 98     }
 99   }
100 }
101 
102 void ShenandoahBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
103                                                        Register start, Register count, Register tmp) {
104   if (ShenandoahCardBarrier && is_oop) {
105     gen_write_ref_array_post_barrier(masm, decorators, start, count, tmp);
106   }
107 }
108 
109 void ShenandoahBarrierSetAssembler::satb_barrier(MacroAssembler* masm,
110                                                  Register obj,
111                                                  Register pre_val,
112                                                  Register thread,
113                                                  Register tmp1,
114                                                  Register tmp2) {
115   assert(ShenandoahSATBBarrier, "Should be checked by caller");
116   assert(thread == xthread, "must be");
117 
118   Label done;
119   Label runtime;
120 
121   assert_different_registers(obj, pre_val, tmp1, tmp2);
122   assert(pre_val != noreg && tmp1 != noreg && tmp2 != noreg, "expecting a register");
123 
124   Address index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
125   Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
126 
127   // Is marking active?
128   Address gc_state(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
129   __ lbu(t1, gc_state);
130   __ test_bit(t1, t1, ShenandoahHeap::MARKING_BITPOS);
131   __ beqz(t1, done);
132 
133   // Do we need to load the previous value?
134   if (obj != noreg) {
135     if (UseCompressedOops) {
136       __ lwu(pre_val, Address(obj, 0));
137       __ decode_heap_oop(pre_val);
138     } else {
139       __ ld(pre_val, Address(obj, 0));
140     }
141   }
142 
143   // Is the previous value null?
144   __ beqz(pre_val, done);
145 
146   // Can we store original value in the thread's buffer?
147   // Is index == 0?
148   // (The index field is typed as size_t.)
149   __ ld(tmp1, index);                  // tmp := *index_adr
150   __ beqz(tmp1, runtime);              // tmp == 0? If yes, goto runtime
151 
152   __ subi(tmp1, tmp1, wordSize);       // tmp := tmp - wordSize
153   __ sd(tmp1, index);                  // *index_adr := tmp
154   __ ld(tmp2, buffer);
155   __ add(tmp1, tmp1, tmp2);            // tmp := tmp + *buffer_adr
156 
157   // Record the previous value
158   __ sd(pre_val, Address(tmp1, 0));
159   __ j(done);
160 
161   // Slow-path call.
162   __ bind(runtime);
163   __ enter();
164   __ push_call_clobbered_registers();
165   if (c_rarg0 != pre_val) {
166     __ mv(c_rarg0, pre_val);
167   }
168   // Calling with super_call_VM_leaf with c_rarg0 bypasses interpreter checks and avoids any moves.
169   __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre), c_rarg0);
170   __ pop_call_clobbered_registers();
171   __ leave();
172 
173   __ bind(done);
174 }
175 
176 void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm,
177                                                            Register dst,
178                                                            Address load_addr,
179                                                            DecoratorSet decorators) {
180   assert(ShenandoahLoadRefBarrier, "Should be enabled");
181   assert(dst != t1 && load_addr.base() != t1, "need t1");
182   assert_different_registers(load_addr.base(), t0, t1);
183 
184   bool is_strong  = ShenandoahBarrierSet::is_strong_access(decorators);
185   bool is_weak    = ShenandoahBarrierSet::is_weak_access(decorators);
186   bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
187   bool is_native  = ShenandoahBarrierSet::is_native_access(decorators);
188   bool is_narrow  = UseCompressedOops && !is_native;
189 
190   Label heap_stable, not_cset;
191   Address gc_state(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
192   __ lbu(t1, gc_state);
193 
194   // Check for heap stability
195   if (is_strong) {
196     __ test_bit(t1, t1, ShenandoahHeap::HAS_FORWARDED_BITPOS);
197     __ beqz(t1, heap_stable);
198   } else {
199     Label lrb;
200     __ test_bit(t0, t1, ShenandoahHeap::WEAK_ROOTS_BITPOS);
201     __ bnez(t0, lrb);
202     __ test_bit(t0, t1, ShenandoahHeap::HAS_FORWARDED_BITPOS);
203     __ beqz(t0, heap_stable);
204     __ bind(lrb);
205   }
206 
207   // use x11 for load address
208   Register result_dst = dst;
209   if (dst == x11) {
210     __ mv(t1, dst);
211     dst = t1;
212   }
213 
214   // Save x10 and x11, unless it is an output register
215   RegSet saved_regs = RegSet::of(x10, x11) - result_dst;
216   __ push_reg(saved_regs, sp);
217   __ la(x11, load_addr);
218   __ mv(x10, dst);
219 
220   // Test for in-cset
221   if (is_strong) {
222     __ mv(t1, ShenandoahHeap::in_cset_fast_test_addr());
223     __ srli(t0, x10, ShenandoahHeapRegion::region_size_bytes_shift_jint());
224     __ add(t1, t1, t0);
225     __ lbu(t1, Address(t1));
226     __ test_bit(t0, t1, 0);
227     __ beqz(t0, not_cset);
228   }
229 
230   // Slow-path call
231   __ enter();
232   __ push_call_clobbered_registers();
233   address target = nullptr;
234   if (is_strong) {
235     if (is_narrow) {
236       target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow);
237     } else {
238       target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong);
239     }
240   } else if (is_weak) {
241     if (is_narrow) {
242       target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow);
243     } else {
244       target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak);
245     }
246   } else {
247     assert(is_phantom, "only remaining strength");
248     assert(!is_narrow, "phantom access cannot be narrow");
249     target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom);
250   }
251   // Calling with super_call_VM_leaf with c_rarg0/1 bypasses interpreter checks and avoids any moves.
252   __ super_call_VM_leaf(target, c_rarg0, c_rarg1);
253   __ mv(t0, x10);
254   __ pop_call_clobbered_registers();
255   __ mv(x10, t0);
256   __ leave();
257 
258   __ bind(not_cset);
259   __ mv(result_dst, x10);
260   __ pop_reg(saved_regs, sp);
261 
262   __ bind(heap_stable);
263 }
264 
265 //
266 // Arguments:
267 //
268 // Inputs:
269 //   src:        oop location to load from, might be clobbered
270 //
271 // Output:
272 //   dst:        oop loaded from src location
273 //
274 // Kill:
275 //   x30 (tmp reg)
276 //
277 // Alias:
278 //   dst: x30 (might use x30 as temporary output register to avoid clobbering src)
279 //
280 void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm,
281                                             DecoratorSet decorators,
282                                             BasicType type,
283                                             Register dst,
284                                             Address src,
285                                             Register tmp1,
286                                             Register tmp2) {
287   // 1: non-reference load, no additional barrier is needed
288   if (!is_reference_type(type)) {
289     BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp2);
290     return;
291   }
292 
293   // 2: load a reference from src location and apply LRB if needed
294   if (ShenandoahBarrierSet::need_load_reference_barrier(decorators, type)) {
295     Register result_dst = dst;
296 
297     // Preserve src location for LRB
298     RegSet saved_regs;
299     if (dst == src.base()) {
300       dst = (src.base() == x28) ? x29 : x28;
301       saved_regs = RegSet::of(dst);
302       __ push_reg(saved_regs, sp);
303     }
304     assert_different_registers(dst, src.base());
305 
306     BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp2);
307 
308     load_reference_barrier(masm, dst, src, decorators);
309 
310     if (dst != result_dst) {
311       __ mv(result_dst, dst);
312       dst = result_dst;
313     }
314 
315     if (saved_regs.bits() != 0) {
316       __ pop_reg(saved_regs, sp);
317     }
318   } else {
319     BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp2);
320   }
321 
322   // 3: apply keep-alive barrier if needed
323   if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) {
324     satb_barrier(masm /* masm */,
325                  noreg /* obj */,
326                  dst /* pre_val */,
327                  xthread /* thread */,
328                  tmp1 /* tmp1 */,
329                  tmp2 /* tmp2 */);
330   }
331 }
332 
333 void ShenandoahBarrierSetAssembler::card_barrier(MacroAssembler* masm, Register obj) {
334   assert(ShenandoahCardBarrier, "Should have been checked by caller");
335 
336   __ srli(obj, obj, CardTable::card_shift());
337 
338   assert(CardTable::dirty_card_val() == 0, "must be");
339 
340   Address curr_ct_holder_addr(xthread, in_bytes(ShenandoahThreadLocalData::card_table_offset()));
341   __ ld(t1, curr_ct_holder_addr);
342   __ add(t1, obj, t1);
343 
344   if (UseCondCardMark) {
345     Label L_already_dirty;
346     __ lbu(t0, Address(t1));
347     __ beqz(t0, L_already_dirty);
348     __ sb(zr, Address(t1));
349     __ bind(L_already_dirty);
350   } else {
351     __ sb(zr, Address(t1));
352   }
353 }
354 
355 void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
356                                              Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
357   // 1: non-reference types require no barriers
358   if (!is_reference_type(type)) {
359     BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3);
360     return;
361   }
362 
363   // Flatten object address right away for simplicity: likely needed by barriers
364   if (dst.offset() == 0) {
365     if (dst.base() != tmp3) {
366       __ mv(tmp3, dst.base());
367     }
368   } else {
369     __ la(tmp3, dst);
370   }
371 
372   // 2: pre-barrier: SATB needs the previous value
373   if (ShenandoahBarrierSet::need_satb_barrier(decorators, type)) {
374     satb_barrier(masm,
375                  tmp3 /* obj */,
376                  tmp2 /* pre_val */,
377                  xthread /* thread */,
378                  tmp1 /* tmp */,
379                  t0 /* tmp2 */);
380   }
381 
382   // Store!
383   BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg, noreg);
384 
385   // 3: post-barrier: card barrier needs store address
386   bool storing_non_null = (val != noreg);
387   if (ShenandoahBarrierSet::need_card_barrier(decorators, type) && storing_non_null) {
388     card_barrier(masm, tmp3);
389   }
390 }
391 
392 void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
393                                                                   Register obj, Register tmp, Label& slowpath) {
394   Label done;
395   // Resolve jobject
396   BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, obj, tmp, slowpath);
397 
398   // Check for null.
399   __ beqz(obj, done);
400 
401   assert(obj != t1, "need t1");
402   Address gc_state(jni_env, ShenandoahThreadLocalData::gc_state_offset() - JavaThread::jni_environment_offset());
403   __ lbu(t1, gc_state);
404 
405   // Check for heap in evacuation phase
406   __ test_bit(t0, t1, ShenandoahHeap::EVACUATION_BITPOS);
407   __ bnez(t0, slowpath);
408 
409   __ bind(done);
410 }
411 
412 void ShenandoahBarrierSetAssembler::try_peek_weak_handle_in_nmethod(MacroAssembler *masm, Register weak_handle,
413                                                                     Register obj, Register tmp, Label& slow_path) {
414   assert_different_registers(weak_handle, tmp, noreg);
415   assert_different_registers(obj, tmp, noreg);
416 
417 
418   Label done;
419 
420   // Peek weak handle using the standard implementation.
421   BarrierSetAssembler::try_peek_weak_handle_in_nmethod(masm, weak_handle, obj, tmp, slow_path);
422 
423   // Check if the reference is null, and if it is, take the fast path.
424   __ beqz(obj, done);
425 
426   Address gc_state(xthread, ShenandoahThreadLocalData::gc_state_offset());
427   __ lbu(tmp, gc_state);
428 
429   // Check if the heap is under weak-reference/roots processing, in
430   // which case we need to take the slow path.
431   __ test_bit(tmp, tmp, ShenandoahHeap::WEAK_ROOTS_BITPOS);
432   __ bnez(tmp, slow_path);
433   __ bind(done);
434 }
435 
436 void ShenandoahBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
437                                                                      Register start, Register count, Register tmp) {
438   assert(ShenandoahCardBarrier, "Did you mean to enable ShenandoahCardBarrier?");
439 
440   Label L_loop, L_done;
441   const Register end = count;
442 
443   // Zero count? Nothing to do.
444   __ beqz(count, L_done);
445 
446   // end = start + count << LogBytesPerHeapOop
447   // last element address to make inclusive
448   __ shadd(end, count, start, tmp, LogBytesPerHeapOop);
449   __ subi(end, end, BytesPerHeapOop);
450   __ srli(start, start, CardTable::card_shift());
451   __ srli(end, end, CardTable::card_shift());
452 
453   // number of bytes to copy
454   __ sub(count, end, start);
455 
456   Address curr_ct_holder_addr(xthread, in_bytes(ShenandoahThreadLocalData::card_table_offset()));
457   __ ld(tmp, curr_ct_holder_addr);
458   __ add(start, start, tmp);
459 
460   __ bind(L_loop);
461   __ add(tmp, start, count);
462   __ sb(zr, Address(tmp));
463   __ subi(count, count, 1);
464   __ bgez(count, L_loop);
465   __ bind(L_done);
466 }
467 
468 #undef __
469 
470 #ifdef COMPILER1
471 
472 #define __ ce->masm()->
473 
474 void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) {
475   ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
476   // At this point we know that marking is in progress.
477   // If do_load() is true then we have to emit the
478   // load of the previous value; otherwise it has already
479   // been loaded into _pre_val.
480   __ bind(*stub->entry());
481 
482   assert(stub->pre_val()->is_register(), "Precondition.");
483 
484   Register pre_val_reg = stub->pre_val()->as_register();
485 
486   if (stub->do_load()) {
487     ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */);
488   }
489   __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true);
490   ce->store_parameter(stub->pre_val()->as_register(), 0);
491   __ far_call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin()));
492   __ j(*stub->continuation());
493 }
494 
495 void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce,
496                                                                     ShenandoahLoadReferenceBarrierStub* stub) {
497   ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
498   __ bind(*stub->entry());
499 
500   DecoratorSet decorators = stub->decorators();
501   bool is_strong  = ShenandoahBarrierSet::is_strong_access(decorators);
502   bool is_weak    = ShenandoahBarrierSet::is_weak_access(decorators);
503   bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
504   bool is_native  = ShenandoahBarrierSet::is_native_access(decorators);
505 
506   Register obj = stub->obj()->as_register();
507   Register res = stub->result()->as_register();
508   Register addr = stub->addr()->as_pointer_register();
509   Register tmp1 = stub->tmp1()->as_register();
510   Register tmp2 = stub->tmp2()->as_register();
511 
512   assert(res == x10, "result must arrive in x10");
513   assert_different_registers(tmp1, tmp2, t0);
514 
515   if (res != obj) {
516     __ mv(res, obj);
517   }
518 
519   if (is_strong) {
520     // Check for object in cset.
521     __ mv(tmp2, ShenandoahHeap::in_cset_fast_test_addr());
522     __ srli(tmp1, res, ShenandoahHeapRegion::region_size_bytes_shift_jint());
523     __ add(tmp2, tmp2, tmp1);
524     __ lbu(tmp2, Address(tmp2));
525     __ beqz(tmp2, *stub->continuation(), /* is_far */ true);
526   }
527 
528   ce->store_parameter(res, 0);
529   ce->store_parameter(addr, 1);
530 
531   if (is_strong) {
532     if (is_native) {
533       __ far_call(RuntimeAddress(bs->load_reference_barrier_strong_native_rt_code_blob()->code_begin()));
534     } else {
535       __ far_call(RuntimeAddress(bs->load_reference_barrier_strong_rt_code_blob()->code_begin()));
536     }
537   } else if (is_weak) {
538     __ far_call(RuntimeAddress(bs->load_reference_barrier_weak_rt_code_blob()->code_begin()));
539   } else {
540     assert(is_phantom, "only remaining strength");
541     __ far_call(RuntimeAddress(bs->load_reference_barrier_phantom_rt_code_blob()->code_begin()));
542   }
543 
544   __ j(*stub->continuation());
545 }
546 
547 #undef __
548 
549 #define __ sasm->
550 
551 void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
552   __ prologue("shenandoah_pre_barrier", false);
553 
554   // arg0 : previous value of memory
555 
556   BarrierSet* bs = BarrierSet::barrier_set();
557 
558   const Register pre_val = x10;
559   const Register thread = xthread;
560   const Register tmp = t0;
561 
562   Address queue_index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
563   Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
564 
565   Label done;
566   Label runtime;
567 
568   // Is marking still active?
569   Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
570   __ lb(tmp, gc_state);
571   __ test_bit(tmp, tmp, ShenandoahHeap::MARKING_BITPOS);
572   __ beqz(tmp, done);
573 
574   // Can we store original value in the thread's buffer?
575   __ ld(tmp, queue_index);
576   __ beqz(tmp, runtime);
577 
578   __ subi(tmp, tmp, wordSize);
579   __ sd(tmp, queue_index);
580   __ ld(t1, buffer);
581   __ add(tmp, tmp, t1);
582   __ load_parameter(0, t1);
583   __ sd(t1, Address(tmp, 0));
584   __ j(done);
585 
586   __ bind(runtime);
587   __ push_call_clobbered_registers();
588   __ load_parameter(0, pre_val);
589   __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre), pre_val);
590   __ pop_call_clobbered_registers();
591   __ bind(done);
592 
593   __ epilogue();
594 }
595 
596 void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm,
597                                                                                     DecoratorSet decorators) {
598   __ prologue("shenandoah_load_reference_barrier", false);
599   // arg0 : object to be resolved
600 
601   __ push_call_clobbered_registers();
602   __ load_parameter(0, x10);
603   __ load_parameter(1, x11);
604 
605   bool is_strong  = ShenandoahBarrierSet::is_strong_access(decorators);
606   bool is_weak    = ShenandoahBarrierSet::is_weak_access(decorators);
607   bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
608   bool is_native  = ShenandoahBarrierSet::is_native_access(decorators);
609   address target  = nullptr;
610   if (is_strong) {
611     if (is_native) {
612       target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong);
613     } else {
614       if (UseCompressedOops) {
615         target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow);
616       } else {
617         target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong);
618       }
619     }
620   } else if (is_weak) {
621     assert(!is_native, "weak must not be called off-heap");
622     if (UseCompressedOops) {
623       target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow);
624     } else {
625       target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak);
626     }
627   } else {
628     assert(is_phantom, "only remaining strength");
629     assert(is_native, "phantom must only be called off-heap");
630     target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom);
631   }
632   __ rt_call(target);
633   __ mv(t0, x10);
634   __ pop_call_clobbered_registers();
635   __ mv(x10, t0);
636 
637   __ epilogue();
638 }
639 
640 #undef __
641 
642 #endif // COMPILER1
643 
644 #ifdef COMPILER2
645 
646 #undef __
647 #define __ masm->
648 
649 void ShenandoahBarrierSetAssembler::load_c2(const MachNode* node, MacroAssembler* masm, Register dst, Address src, Register tmp1, Register tmp2, bool is_narrow) {
650   // Do the actual load. This load is the candidate for implicit null check, and MUST come first.
651   if (is_narrow) {
652     __ lwu(dst, src);
653   } else {
654     __ ld(dst, src);
655   }
656 
657   ShenandoahBarrierStubC2::load_post(masm, node, dst, src, tmp1, tmp2, is_narrow);
658 }
659 
660 void ShenandoahBarrierSetAssembler::store_c2(const MachNode* node, MacroAssembler* masm, Address dst, bool dst_narrow,
661     Register src, bool src_narrow, Register tmp1, Register tmp2, Register tmp3) {
662 
663   ShenandoahBarrierStubC2::store_pre(masm, node, dst, tmp1, tmp2, tmp3, dst_narrow);
664 
665   // Do the actual store
666   if (dst_narrow) {
667     if (!src_narrow) {
668       // Need to encode into tmp, because we cannot clobber src.
669       assert(tmp1 != noreg, "need temp register");
670       if ((node->barrier_data() & ShenandoahBitNotNull) == 0) {
671         __ encode_heap_oop(tmp1, src);
672       } else {
673         __ encode_heap_oop_not_null(tmp1, src);
674       }
675       src = tmp1;
676     }
677     __ sw(src, dst);
678   } else {
679     __ sd(src, dst);
680   }
681 
682   ShenandoahBarrierStubC2::store_post(masm, node, dst, tmp2, tmp3);
683 }
684 
685 void ShenandoahBarrierSetAssembler::compare_and_set_c2(const MachNode* node, MacroAssembler* masm, Register res, Register addr,
686     Register oldval, Register newval, Register tmp1, Register tmp2, Register tmp3, bool exchange, bool narrow, bool is_acquire) {
687   const Assembler::Aqrl acquire = is_acquire ? Assembler::aq : Assembler::relaxed;
688   const Assembler::Aqrl release = Assembler::rl;
689   const Assembler::operand_size size = narrow ? Assembler::uint32 : Assembler::int64;
690 
691   ShenandoahBarrierStubC2::load_store_pre(masm, node, Address(addr), tmp1, tmp2, tmp3, narrow);
692 
693   // CAS!
694   __ cmpxchg(addr, oldval, newval, size, acquire, release, /* result */ res, !exchange /* result_as_bool */);
695 
696   ShenandoahBarrierStubC2::load_store_post(masm, node, Address(addr, 0), tmp2, tmp3);
697 }
698 
699 void ShenandoahBarrierSetAssembler::get_and_set_c2(const MachNode* node, MacroAssembler* masm, Register preval,
700     Register newval, Register addr, Register tmp1, Register tmp2, Register tmp3, bool is_acquire) {
701   const bool is_narrow = node->bottom_type()->isa_narrowoop();
702 
703   ShenandoahBarrierStubC2::load_store_pre(masm, node, Address(addr, 0), tmp1, tmp2, tmp3, is_narrow);
704 
705   if (is_narrow) {
706     if (is_acquire) {
707       __ atomic_xchgalwu(preval, newval, addr);
708     } else {
709       __ atomic_xchgwu(preval, newval, addr);
710     }
711   } else {
712     if (is_acquire) {
713       __ atomic_xchgal(preval, newval, addr);
714     } else {
715       __ atomic_xchg(preval, newval, addr);
716     }
717   }
718 
719   ShenandoahBarrierStubC2::load_store_post(masm, node, Address(addr, 0), tmp2, tmp3);
720 }
721 
722 #undef __
723 #define __ masm.
724 
725 void ShenandoahBarrierStubC2::cardtable(MacroAssembler& masm, Address address, Register tmp1, Register tmp2) {
726   assert(CardTable::dirty_card_val() == 0, "must be");
727   Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
728 
729   // tmp1 = card table base (holder)
730   Address curr_ct_holder_addr(xthread, in_bytes(ShenandoahThreadLocalData::card_table_offset()));
731   __ ld(tmp1, curr_ct_holder_addr);
732 
733   // tmp1 = effective address
734   __ la(tmp2, address);
735 
736   // tmp2 = &card_table[ addr >> CardTable::card_shift() ] ; card index
737   __ srli(tmp2, tmp2, CardTable::card_shift());
738   __ add(tmp2, tmp2, tmp1);
739 
740   if (UseCondCardMark) {
741     Label L_already_dirty;
742     __ lbu(tmp1, Address(tmp2));
743     __ beqz(tmp1, L_already_dirty);
744     __ sb(zr, Address(tmp2));
745     __ bind(L_already_dirty);
746   } else {
747     __ sb(zr, Address(tmp2));
748   }
749 }
750 
751 void ShenandoahBarrierStubC2::enter_if_gc_state(MacroAssembler& masm, const char test_state, Register tmp) {
752   Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
753 
754   Address gc_state_fast(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(test_state)));
755   __ lbu(tmp, gc_state_fast);
756   __ beqz(tmp, *continuation());
757   __ j(*entry());
758 
759   // This is were the slowpath stub will return to or the code above will
760   // jump to if the checks are false
761   __ bind(*continuation());
762 }
763 
764 void ShenandoahBarrierStubC2::emit_code(MacroAssembler& masm) {
765   Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
766   assert(_needs_keep_alive_barrier || _needs_load_ref_barrier, "Why are you here?");
767 
768   __ bind(*entry());
769 
770   // If we need to load ourselves, do it here.
771   if (_do_load) {
772     if (_narrow) {
773       __ lwu(_obj, _addr);
774     } else {
775       __ ld(_obj, _addr);
776     }
777   }
778 
779   // If the object is null, there is no point in applying barriers.
780   maybe_far_jump_if_zero(masm, _obj);
781 
782   // We need to make sure that loads done by callers survive across slow-path calls.
783   // For self-loads, we need to care about the case when both KA and LRB are enabled (rare).
784   bool needs_both_barriers = _needs_keep_alive_barrier && _needs_load_ref_barrier;
785   if (!_do_load || needs_both_barriers) {
786     preserve(_obj);
787   }
788 
789   // Go for barriers. Barriers can return straight to continuation, as long
790   // as another barrier is not needed and we can reach the fastpath.
791   if (needs_both_barriers) {
792     keepalive(masm, nullptr);
793     lrb(masm);
794   } else if (_needs_keep_alive_barrier) {
795     keepalive(masm, continuation());
796   } else if (_needs_load_ref_barrier) {
797     lrb(masm);
798   } else {
799     ShouldNotReachHere();
800   }
801 }
802 
803 void ShenandoahBarrierStubC2::maybe_far_jump_if_zero(MacroAssembler& masm, Register reg) {
804   Label L_short_jump;
805   __ bnez(reg, L_short_jump);
806   __ j(*continuation());
807   __ bind(L_short_jump);
808 }
809 
810 void ShenandoahBarrierStubC2::keepalive(MacroAssembler& masm, Label* L_done) {
811   Address index(xthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
812   Address buffer(xthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
813   Label L_through, L_slowpath;
814 
815   // If another barrier is enabled as well, do a runtime check for a specific barrier.
816   if (_needs_load_ref_barrier) {
817     assert(L_done == nullptr, "L_done is always null when _needs_load_ref_barrier is true");
818     Address gc_state_fast(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::MARKING)));
819     __ lbu(_tmp1, gc_state_fast);
820     __ beqz(_tmp1, L_through);
821   }
822 
823   // Fast-path: put object into buffer.
824   // If buffer is already full, go slow.
825   __ ld(_tmp1, index);
826   __ beqz(_tmp1, L_slowpath);
827   __ subi(_tmp1, _tmp1, wordSize);
828   __ sd(_tmp1, index);
829   __ ld(_tmp2, buffer);
830 
831   // Store the object in queue.
832   // If object is narrow, we need to decode it before inserting.
833   __ add(_tmp1, _tmp1, _tmp2);
834   if (_narrow) {
835     __ decode_heap_oop_not_null(_tmp2, _obj);
836     __ sd(_tmp2, Address(_tmp1));
837   } else {
838     __ sd(_obj, Address(_tmp1));
839   }
840 
841   // Fast-path exits here.
842   if (L_done != nullptr) {
843     __ j(*L_done);
844   } else {
845     __ j(L_through);
846   }
847 
848   // Slow-path: call runtime to handle.
849   __ bind(L_slowpath);
850 
851   {
852     SaveLiveRegisters slr(&masm, this);
853 
854     // Go to runtime and handle the rest there.
855     __ mv(c_rarg0, _obj);
856     __ la(ra, RuntimeAddress(keepalive_runtime_entry_addr()));
857     __ jalr(ra);
858   }
859   if (L_done != nullptr) {
860     __ j(*L_done);
861   } else {
862     __ bind(L_through);
863   }
864 }
865 
866 void ShenandoahBarrierStubC2::lrb(MacroAssembler& masm) {
867   Label L_slow;
868 
869   // If another barrier is enabled as well, do a runtime check for a specific barrier.
870   if (_needs_keep_alive_barrier) {
871     char state_to_check = ShenandoahHeap::HAS_FORWARDED | (_needs_load_ref_weak_barrier ? ShenandoahHeap::WEAK_ROOTS : 0);
872     Address gc_state_fast(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(state_to_check)));
873     __ lbu(_tmp1, gc_state_fast);
874     maybe_far_jump_if_zero(masm, _tmp1);
875   }
876 
877   // If weak references are being processed, weak/phantom loads need to go slow,
878   // regardless of their cset status.
879   if (_needs_load_ref_weak_barrier) {
880     Address gc_state_fast(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::WEAK_ROOTS)));
881     __ lbu(_tmp1, gc_state_fast);
882     __ bnez(_tmp1, L_slow);
883   }
884 
885   // Cset-check. Fall-through to slow if in collection set.
886   if (_narrow) {
887     __ decode_heap_oop_not_null(_tmp2, _obj);
888   } else {
889     __ mv(_tmp2, _obj);
890   }
891 
892   __ mv(_tmp1, ShenandoahHeap::in_cset_fast_test_addr());
893   __ srli(_tmp2, _tmp2, ShenandoahHeapRegion::region_size_bytes_shift_jint());
894   __ add(_tmp1, _tmp1, _tmp2);
895   __ lbu(_tmp1, Address(_tmp1, 0));
896   maybe_far_jump_if_zero(masm, _tmp1);
897 
898   // Slow path
899   __ bind(L_slow);
900 
901   // Obj is the result, need to temporarily stop preserving it.
902   bool is_obj_preserved = is_preserved(_obj);
903   if (is_obj_preserved) {
904     dont_preserve(_obj);
905   }
906   {
907     SaveLiveRegisters slr(&masm, this);
908 
909     // Shuffle in the arguments. The end result should be:
910     //   c_rarg0 <- obj
911     //   c_rarg1 <- lea(addr)
912     if (c_rarg0 == _obj) {
913       __ la(c_rarg1, _addr);
914     } else if (c_rarg1 == _obj) {
915       // Set up arguments in reverse, and then flip them
916       __ la(c_rarg0, _addr);
917       // flip them
918       __ mv(_tmp1, c_rarg0);
919       __ mv(c_rarg0, c_rarg1);
920       __ mv(c_rarg1, _tmp1);
921     } else {
922       assert_different_registers(c_rarg1, _obj);
923       __ la(c_rarg1, _addr);
924       __ mv(c_rarg0, _obj);
925     }
926 
927     // Go to runtime and handle the rest there.
928     __ la(ra, RuntimeAddress(lrb_runtime_entry_addr()));
929     __ jalr(ra);
930 
931     // Save the result where needed. Narrow entries return narrowOop (32 bits)
932     // we need to zero the upper 32 bits of x10.
933     if (_narrow) {
934       __ zext_w(_obj, x10);
935     } else {
936       __ mv(_obj, x10);
937     }
938   }
939   if (is_obj_preserved) {
940     preserve(_obj);
941   }
942 
943   __ j(*continuation());
944 }
945 
946 int ShenandoahBarrierStubC2::available_gp_registers() {
947   Unimplemented(); // Not used
948   return 0;
949 }
950 
951 bool ShenandoahBarrierStubC2::is_special_register(Register r) {
952   Unimplemented(); // Not used
953   return true;
954 }
955 
956 void ShenandoahBarrierStubC2::post_init() {
957   // Do nothing.
958 }
959 
960 #endif // COMPILER2