1 /*
   2  * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2018, 2020, Red Hat, Inc. All rights reserved.
   4  * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
   5  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   6  *
   7  * This code is free software; you can redistribute it and/or modify it
   8  * under the terms of the GNU General Public License version 2 only, as
   9  * published by the Free Software Foundation.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  *
  25  */
  26 
  27 #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
  28 #include "gc/shenandoah/mode/shenandoahMode.hpp"
  29 #include "gc/shenandoah/shenandoahBarrierSet.hpp"
  30 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
  31 #include "gc/shenandoah/shenandoahForwarding.hpp"
  32 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
  33 #include "gc/shenandoah/shenandoahHeapRegion.hpp"
  34 #include "gc/shenandoah/shenandoahRuntime.hpp"
  35 #include "gc/shenandoah/shenandoahThreadLocalData.hpp"
  36 #include "interpreter/interp_masm.hpp"
  37 #include "interpreter/interpreter.hpp"
  38 #include "runtime/javaThread.hpp"
  39 #include "runtime/sharedRuntime.hpp"
  40 #ifdef COMPILER1
  41 #include "c1/c1_LIRAssembler.hpp"
  42 #include "c1/c1_MacroAssembler.hpp"
  43 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
  44 #endif
  45 #ifdef COMPILER2
  46 #include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp"
  47 #include "opto/output.hpp"
  48 #endif
  49 
  50 #define __ masm->
  51 
  52 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
  53                                                        Register src, Register dst, Register count, RegSet saved_regs) {
  54   if (is_oop) {
  55     bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
  56     if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahLoadRefBarrier) {
  57 
  58       Label done;
  59 
  60       // Avoid calling runtime if count == 0
  61       __ beqz(count, done);
  62 
  63       // Is GC active?
  64       Address gc_state(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
  65       assert_different_registers(src, dst, count, t0);
  66 
  67       assert(!saved_regs.contains(t0), "Sanity: about to clobber t0");
  68 
  69       __ lbu(t0, gc_state);
  70       if (ShenandoahSATBBarrier && dest_uninitialized) {
  71         __ test_bit(t0, t0, ShenandoahHeap::HAS_FORWARDED_BITPOS);
  72         __ beqz(t0, done);
  73       } else {
  74         __ andi(t0, t0, ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::MARKING);
  75         __ beqz(t0, done);
  76       }
  77 
  78       __ push_call_clobbered_registers();
  79       // If arguments are not in proper places, shuffle them.
  80       // Doing this via the stack is the most straight-forward way to avoid
  81       // accidentally smashing any register.
  82       if (c_rarg0 != src || c_rarg1 != dst || c_rarg2 != count) {
  83         __ push_reg(RegSet::of(src), sp);
  84         __ push_reg(RegSet::of(dst), sp);
  85         __ push_reg(RegSet::of(count), sp);
  86         __ pop_reg(RegSet::of(c_rarg2), sp);
  87         __ pop_reg(RegSet::of(c_rarg1), sp);
  88         __ pop_reg(RegSet::of(c_rarg0), sp);
  89       }
  90       address target = nullptr;
  91       if (UseCompressedOops) {
  92         target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop);
  93       } else {
  94         target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop);
  95       }
  96       __ call_VM_leaf(target, 3);
  97       __ pop_call_clobbered_registers();
  98       __ bind(done);
  99     }
 100   }
 101 }
 102 
 103 void ShenandoahBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
 104                                                        Register start, Register count, Register tmp) {
 105   if (ShenandoahCardBarrier && is_oop) {
 106     gen_write_ref_array_post_barrier(masm, decorators, start, count, tmp);
 107   }
 108 }
 109 
 110 void ShenandoahBarrierSetAssembler::satb_barrier(MacroAssembler* masm,
 111                                                  Register obj,
 112                                                  Register pre_val,
 113                                                  Register thread,
 114                                                  Register tmp1,
 115                                                  Register tmp2) {
 116   assert(ShenandoahSATBBarrier, "Should be checked by caller");
 117   assert(thread == xthread, "must be");
 118 
 119   Label done;
 120   Label runtime;
 121 
 122   assert_different_registers(obj, pre_val, tmp1, tmp2);
 123   assert(pre_val != noreg && tmp1 != noreg && tmp2 != noreg, "expecting a register");
 124 
 125   Address index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
 126   Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
 127 
 128   // Is marking active?
 129   Address gc_state(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
 130   __ lbu(t1, gc_state);
 131   __ test_bit(t1, t1, ShenandoahHeap::MARKING_BITPOS);
 132   __ beqz(t1, done);
 133 
 134   // Do we need to load the previous value?
 135   if (obj != noreg) {
 136     if (UseCompressedOops) {
 137       __ lwu(pre_val, Address(obj, 0));
 138       __ decode_heap_oop(pre_val);
 139     } else {
 140       __ ld(pre_val, Address(obj, 0));
 141     }
 142   }
 143 
 144   // Is the previous value null?
 145   __ beqz(pre_val, done);
 146 
 147   // Can we store original value in the thread's buffer?
 148   // Is index == 0?
 149   // (The index field is typed as size_t.)
 150   __ ld(tmp1, index);                  // tmp := *index_adr
 151   __ beqz(tmp1, runtime);              // tmp == 0? If yes, goto runtime
 152 
 153   __ subi(tmp1, tmp1, wordSize);       // tmp := tmp - wordSize
 154   __ sd(tmp1, index);                  // *index_adr := tmp
 155   __ ld(tmp2, buffer);
 156   __ add(tmp1, tmp1, tmp2);            // tmp := tmp + *buffer_adr
 157 
 158   // Record the previous value
 159   __ sd(pre_val, Address(tmp1, 0));
 160   __ j(done);
 161 
 162   // Slow-path call.
 163   __ bind(runtime);
 164   __ enter();
 165   __ push_call_clobbered_registers();
 166   if (c_rarg0 != pre_val) {
 167     __ mv(c_rarg0, pre_val);
 168   }
 169   // Calling with super_call_VM_leaf with c_rarg0 bypasses interpreter checks and avoids any moves.
 170   __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre), c_rarg0);
 171   __ pop_call_clobbered_registers();
 172   __ leave();
 173 
 174   __ bind(done);
 175 }
 176 
 177 void ShenandoahBarrierSetAssembler::resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp) {
 178   assert(ShenandoahLoadRefBarrier || ShenandoahCASBarrier, "Should be enabled");
 179 
 180   Label is_null;
 181   __ beqz(dst, is_null);
 182   resolve_forward_pointer_not_null(masm, dst, tmp);
 183   __ bind(is_null);
 184 }
 185 
 186 // IMPORTANT: This must preserve all registers, even t0 and t1, except those explicitly
 187 // passed in.
 188 void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp) {
 189   assert(ShenandoahLoadRefBarrier || ShenandoahCASBarrier, "Should be enabled");
 190   // The below loads the mark word, checks if the lowest two bits are
 191   // set, and if so, clear the lowest two bits and copy the result
 192   // to dst. Otherwise it leaves dst alone.
 193   // Implementing this is surprisingly awkward. I do it here by:
 194   // - Inverting the mark word
 195   // - Test lowest two bits == 0
 196   // - If so, set the lowest two bits
 197   // - Invert the result back, and copy to dst
 198   RegSet saved_regs = RegSet::of(t2);
 199   bool borrow_reg = (tmp == noreg);
 200   if (borrow_reg) {
 201     // No free registers available. Make one useful.
 202     tmp = t0;
 203     if (tmp == dst) {
 204       tmp = t1;
 205     }
 206     saved_regs += RegSet::of(tmp);
 207   }
 208 
 209   assert_different_registers(tmp, dst, t2);
 210   __ push_reg(saved_regs, sp);
 211 
 212   Label done;
 213   __ ld(tmp, Address(dst, oopDesc::mark_offset_in_bytes()));
 214   __ xori(tmp, tmp, -1); // eon with 0 is equivalent to XOR with -1
 215   __ andi(t2, tmp, markWord::lock_mask_in_place);
 216   __ bnez(t2, done);
 217   __ ori(tmp, tmp, markWord::marked_value);
 218   __ xori(dst, tmp, -1); // eon with 0 is equivalent to XOR with -1
 219   __ bind(done);
 220 
 221   __ pop_reg(saved_regs, sp);
 222 }
 223 
 224 void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm,
 225                                                            Register dst,
 226                                                            Address load_addr,
 227                                                            DecoratorSet decorators) {
 228   assert(ShenandoahLoadRefBarrier, "Should be enabled");
 229   assert(dst != t1 && load_addr.base() != t1, "need t1");
 230   assert_different_registers(load_addr.base(), t0, t1);
 231 
 232   bool is_strong  = ShenandoahBarrierSet::is_strong_access(decorators);
 233   bool is_weak    = ShenandoahBarrierSet::is_weak_access(decorators);
 234   bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
 235   bool is_native  = ShenandoahBarrierSet::is_native_access(decorators);
 236   bool is_narrow  = UseCompressedOops && !is_native;
 237 
 238   Label heap_stable, not_cset;
 239   Address gc_state(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
 240   __ lbu(t1, gc_state);
 241 
 242   // Check for heap stability
 243   if (is_strong) {
 244     __ test_bit(t1, t1, ShenandoahHeap::HAS_FORWARDED_BITPOS);
 245     __ beqz(t1, heap_stable);
 246   } else {
 247     Label lrb;
 248     __ test_bit(t0, t1, ShenandoahHeap::WEAK_ROOTS_BITPOS);
 249     __ bnez(t0, lrb);
 250     __ test_bit(t0, t1, ShenandoahHeap::HAS_FORWARDED_BITPOS);
 251     __ beqz(t0, heap_stable);
 252     __ bind(lrb);
 253   }
 254 
 255   // use x11 for load address
 256   Register result_dst = dst;
 257   if (dst == x11) {
 258     __ mv(t1, dst);
 259     dst = t1;
 260   }
 261 
 262   // Save x10 and x11, unless it is an output register
 263   RegSet saved_regs = RegSet::of(x10, x11) - result_dst;
 264   __ push_reg(saved_regs, sp);
 265   __ la(x11, load_addr);
 266   __ mv(x10, dst);
 267 
 268   // Test for in-cset
 269   if (is_strong) {
 270     __ mv(t1, ShenandoahHeap::in_cset_fast_test_addr());
 271     __ srli(t0, x10, ShenandoahHeapRegion::region_size_bytes_shift_jint());
 272     __ add(t1, t1, t0);
 273     __ lbu(t1, Address(t1));
 274     __ test_bit(t0, t1, 0);
 275     __ beqz(t0, not_cset);
 276   }
 277 
 278   // Slow-path call
 279   __ enter();
 280   __ push_call_clobbered_registers();
 281   address target = nullptr;
 282   if (is_strong) {
 283     if (is_narrow) {
 284       target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow);
 285     } else {
 286       target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong);
 287     }
 288   } else if (is_weak) {
 289     if (is_narrow) {
 290       target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow);
 291     } else {
 292       target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak);
 293     }
 294   } else {
 295     assert(is_phantom, "only remaining strength");
 296     assert(!is_narrow, "phantom access cannot be narrow");
 297     target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom);
 298   }
 299   // Calling with super_call_VM_leaf with c_rarg0/1 bypasses interpreter checks and avoids any moves.
 300   __ super_call_VM_leaf(target, c_rarg0, c_rarg1);
 301   __ mv(t0, x10);
 302   __ pop_call_clobbered_registers();
 303   __ mv(x10, t0);
 304   __ leave();
 305 
 306   __ bind(not_cset);
 307   __ mv(result_dst, x10);
 308   __ pop_reg(saved_regs, sp);
 309 
 310   __ bind(heap_stable);
 311 }
 312 
 313 //
 314 // Arguments:
 315 //
 316 // Inputs:
 317 //   src:        oop location to load from, might be clobbered
 318 //
 319 // Output:
 320 //   dst:        oop loaded from src location
 321 //
 322 // Kill:
 323 //   x30 (tmp reg)
 324 //
 325 // Alias:
 326 //   dst: x30 (might use x30 as temporary output register to avoid clobbering src)
 327 //
 328 void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm,
 329                                             DecoratorSet decorators,
 330                                             BasicType type,
 331                                             Register dst,
 332                                             Address src,
 333                                             Register tmp1,
 334                                             Register tmp2) {
 335   // 1: non-reference load, no additional barrier is needed
 336   if (!is_reference_type(type)) {
 337     BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp2);
 338     return;
 339   }
 340 
 341   // 2: load a reference from src location and apply LRB if needed
 342   if (ShenandoahBarrierSet::need_load_reference_barrier(decorators, type)) {
 343     Register result_dst = dst;
 344 
 345     // Preserve src location for LRB
 346     RegSet saved_regs;
 347     if (dst == src.base()) {
 348       dst = (src.base() == x28) ? x29 : x28;
 349       saved_regs = RegSet::of(dst);
 350       __ push_reg(saved_regs, sp);
 351     }
 352     assert_different_registers(dst, src.base());
 353 
 354     BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp2);
 355 
 356     load_reference_barrier(masm, dst, src, decorators);
 357 
 358     if (dst != result_dst) {
 359       __ mv(result_dst, dst);
 360       dst = result_dst;
 361     }
 362 
 363     if (saved_regs.bits() != 0) {
 364       __ pop_reg(saved_regs, sp);
 365     }
 366   } else {
 367     BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp2);
 368   }
 369 
 370   // 3: apply keep-alive barrier if needed
 371   if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) {
 372     satb_barrier(masm /* masm */,
 373                  noreg /* obj */,
 374                  dst /* pre_val */,
 375                  xthread /* thread */,
 376                  tmp1 /* tmp1 */,
 377                  tmp2 /* tmp2 */);
 378   }
 379 }
 380 
 381 void ShenandoahBarrierSetAssembler::card_barrier(MacroAssembler* masm, Register obj) {
 382   assert(ShenandoahCardBarrier, "Should have been checked by caller");
 383 
 384   __ srli(obj, obj, CardTable::card_shift());
 385 
 386   assert(CardTable::dirty_card_val() == 0, "must be");
 387 
 388   Address curr_ct_holder_addr(xthread, in_bytes(ShenandoahThreadLocalData::card_table_offset()));
 389   __ ld(t1, curr_ct_holder_addr);
 390   __ add(t1, obj, t1);
 391 
 392   if (UseCondCardMark) {
 393     Label L_already_dirty;
 394     __ lbu(t0, Address(t1));
 395     __ beqz(t0, L_already_dirty);
 396     __ sb(zr, Address(t1));
 397     __ bind(L_already_dirty);
 398   } else {
 399     __ sb(zr, Address(t1));
 400   }
 401 }
 402 
 403 void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
 404                                              Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
 405   // 1: non-reference types require no barriers
 406   if (!is_reference_type(type)) {
 407     BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3);
 408     return;
 409   }
 410 
 411   // Flatten object address right away for simplicity: likely needed by barriers
 412   if (dst.offset() == 0) {
 413     if (dst.base() != tmp3) {
 414       __ mv(tmp3, dst.base());
 415     }
 416   } else {
 417     __ la(tmp3, dst);
 418   }
 419 
 420   // 2: pre-barrier: SATB needs the previous value
 421   if (ShenandoahBarrierSet::need_satb_barrier(decorators, type)) {
 422     satb_barrier(masm,
 423                  tmp3 /* obj */,
 424                  tmp2 /* pre_val */,
 425                  xthread /* thread */,
 426                  tmp1 /* tmp */,
 427                  t0 /* tmp2 */);
 428   }
 429 
 430   // Store!
 431   BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg, noreg);
 432 
 433   // 3: post-barrier: card barrier needs store address
 434   bool storing_non_null = (val != noreg);
 435   if (ShenandoahBarrierSet::need_card_barrier(decorators, type) && storing_non_null) {
 436     card_barrier(masm, tmp3);
 437   }
 438 }
 439 
 440 void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
 441                                                                   Register obj, Register tmp, Label& slowpath) {
 442   Label done;
 443   // Resolve jobject
 444   BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, obj, tmp, slowpath);
 445 
 446   // Check for null.
 447   __ beqz(obj, done);
 448 
 449   assert(obj != t1, "need t1");
 450   Address gc_state(jni_env, ShenandoahThreadLocalData::gc_state_offset() - JavaThread::jni_environment_offset());
 451   __ lbu(t1, gc_state);
 452 
 453   // Check for heap in evacuation phase
 454   __ test_bit(t0, t1, ShenandoahHeap::EVACUATION_BITPOS);
 455   __ bnez(t0, slowpath);
 456 
 457   __ bind(done);
 458 }
 459 
 460 void ShenandoahBarrierSetAssembler::try_peek_weak_handle_in_nmethod(MacroAssembler *masm, Register weak_handle,
 461                                                                     Register obj, Register tmp, Label& slow_path) {
 462   assert_different_registers(weak_handle, tmp, noreg);
 463   assert_different_registers(obj, tmp, noreg);
 464 
 465 
 466   Label done;
 467 
 468   // Peek weak handle using the standard implementation.
 469   BarrierSetAssembler::try_peek_weak_handle_in_nmethod(masm, weak_handle, obj, tmp, slow_path);
 470 
 471   // Check if the reference is null, and if it is, take the fast path.
 472   __ beqz(obj, done);
 473 
 474   Address gc_state(xthread, ShenandoahThreadLocalData::gc_state_offset());
 475   __ lbu(tmp, gc_state);
 476 
 477   // Check if the heap is under weak-reference/roots processing, in
 478   // which case we need to take the slow path.
 479   __ test_bit(tmp, tmp, ShenandoahHeap::WEAK_ROOTS_BITPOS);
 480   __ bnez(tmp, slow_path);
 481   __ bind(done);
 482 }
 483 
 484 // Special Shenandoah CAS implementation that handles false negatives due
 485 // to concurrent evacuation.  The service is more complex than a
 486 // traditional CAS operation because the CAS operation is intended to
 487 // succeed if the reference at addr exactly matches expected or if the
 488 // reference at addr holds a pointer to a from-space object that has
 489 // been relocated to the location named by expected.  There are two
 490 // races that must be addressed:
 491 //  a) A parallel thread may mutate the contents of addr so that it points
 492 //     to a different object.  In this case, the CAS operation should fail.
 493 //  b) A parallel thread may heal the contents of addr, replacing a
 494 //     from-space pointer held in addr with the to-space pointer
 495 //     representing the new location of the object.
 496 // Upon entry to cmpxchg_oop, it is assured that new_val equals null
 497 // or it refers to an object that is not being evacuated out of
 498 // from-space, or it refers to the to-space version of an object that
 499 // is being evacuated out of from-space.
 500 //
 501 // By default the value held in the result register following execution
 502 // of the generated code sequence is 0 to indicate failure of CAS,
 503 // non-zero to indicate success. If is_cae, the result is the value most
 504 // recently fetched from addr rather than a boolean success indicator.
 505 //
 506 // Clobbers t0, t1
 507 void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm,
 508                                                 Register addr,
 509                                                 Register expected,
 510                                                 Register new_val,
 511                                                 Assembler::Aqrl acquire,
 512                                                 Assembler::Aqrl release,
 513                                                 bool is_cae,
 514                                                 Register result) {
 515   bool is_narrow = UseCompressedOops;
 516   Assembler::operand_size size = is_narrow ? Assembler::uint32 : Assembler::int64;
 517 
 518   assert_different_registers(addr, expected, t0, t1);
 519   assert_different_registers(addr, new_val, t0, t1);
 520 
 521   Label retry, success, fail, done;
 522 
 523   __ bind(retry);
 524 
 525   // Step1: Try to CAS.
 526   __ cmpxchg(addr, expected, new_val, size, acquire, release, /* result */ t1);
 527 
 528   // If success, then we are done.
 529   __ beq(expected, t1, success);
 530 
 531   // Step2: CAS failed, check the forwarded pointer.
 532   __ mv(t0, t1);
 533 
 534   if (is_narrow) {
 535     __ decode_heap_oop(t0, t0);
 536   }
 537   resolve_forward_pointer(masm, t0);
 538 
 539   __ encode_heap_oop(t0, t0);
 540 
 541   // Report failure when the forwarded oop was not expected.
 542   __ bne(t0, expected, fail);
 543 
 544   // Step 3: CAS again using the forwarded oop.
 545   __ cmpxchg(addr, t1, new_val, size, acquire, release, /* result */ t0);
 546 
 547   // Retry when failed.
 548   __ bne(t0, t1, retry);
 549 
 550   __ bind(success);
 551   if (is_cae) {
 552     __ mv(result, expected);
 553   } else {
 554     __ mv(result, 1);
 555   }
 556   __ j(done);
 557 
 558   __ bind(fail);
 559   if (is_cae) {
 560     __ mv(result, t0);
 561   } else {
 562     __ mv(result, zr);
 563   }
 564 
 565   __ bind(done);
 566 }
 567 
 568 void ShenandoahBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
 569                                                                      Register start, Register count, Register tmp) {
 570   assert(ShenandoahCardBarrier, "Did you mean to enable ShenandoahCardBarrier?");
 571 
 572   Label L_loop, L_done;
 573   const Register end = count;
 574 
 575   // Zero count? Nothing to do.
 576   __ beqz(count, L_done);
 577 
 578   // end = start + count << LogBytesPerHeapOop
 579   // last element address to make inclusive
 580   __ shadd(end, count, start, tmp, LogBytesPerHeapOop);
 581   __ subi(end, end, BytesPerHeapOop);
 582   __ srli(start, start, CardTable::card_shift());
 583   __ srli(end, end, CardTable::card_shift());
 584 
 585   // number of bytes to copy
 586   __ sub(count, end, start);
 587 
 588   Address curr_ct_holder_addr(xthread, in_bytes(ShenandoahThreadLocalData::card_table_offset()));
 589   __ ld(tmp, curr_ct_holder_addr);
 590   __ add(start, start, tmp);
 591 
 592   __ bind(L_loop);
 593   __ add(tmp, start, count);
 594   __ sb(zr, Address(tmp));
 595   __ subi(count, count, 1);
 596   __ bgez(count, L_loop);
 597   __ bind(L_done);
 598 }
 599 
 600 #undef __
 601 
 602 #ifdef COMPILER1
 603 
 604 #define __ ce->masm()->
 605 
 606 void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) {
 607   ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
 608   // At this point we know that marking is in progress.
 609   // If do_load() is true then we have to emit the
 610   // load of the previous value; otherwise it has already
 611   // been loaded into _pre_val.
 612   __ bind(*stub->entry());
 613 
 614   assert(stub->pre_val()->is_register(), "Precondition.");
 615 
 616   Register pre_val_reg = stub->pre_val()->as_register();
 617 
 618   if (stub->do_load()) {
 619     ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */);
 620   }
 621   __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true);
 622   ce->store_parameter(stub->pre_val()->as_register(), 0);
 623   __ far_call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin()));
 624   __ j(*stub->continuation());
 625 }
 626 
 627 void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce,
 628                                                                     ShenandoahLoadReferenceBarrierStub* stub) {
 629   ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
 630   __ bind(*stub->entry());
 631 
 632   DecoratorSet decorators = stub->decorators();
 633   bool is_strong  = ShenandoahBarrierSet::is_strong_access(decorators);
 634   bool is_weak    = ShenandoahBarrierSet::is_weak_access(decorators);
 635   bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
 636   bool is_native  = ShenandoahBarrierSet::is_native_access(decorators);
 637 
 638   Register obj = stub->obj()->as_register();
 639   Register res = stub->result()->as_register();
 640   Register addr = stub->addr()->as_pointer_register();
 641   Register tmp1 = stub->tmp1()->as_register();
 642   Register tmp2 = stub->tmp2()->as_register();
 643 
 644   assert(res == x10, "result must arrive in x10");
 645   assert_different_registers(tmp1, tmp2, t0);
 646 
 647   if (res != obj) {
 648     __ mv(res, obj);
 649   }
 650 
 651   if (is_strong) {
 652     // Check for object in cset.
 653     __ mv(tmp2, ShenandoahHeap::in_cset_fast_test_addr());
 654     __ srli(tmp1, res, ShenandoahHeapRegion::region_size_bytes_shift_jint());
 655     __ add(tmp2, tmp2, tmp1);
 656     __ lbu(tmp2, Address(tmp2));
 657     __ beqz(tmp2, *stub->continuation(), true /* is_far */);
 658   }
 659 
 660   ce->store_parameter(res, 0);
 661   ce->store_parameter(addr, 1);
 662 
 663   if (is_strong) {
 664     if (is_native) {
 665       __ far_call(RuntimeAddress(bs->load_reference_barrier_strong_native_rt_code_blob()->code_begin()));
 666     } else {
 667       __ far_call(RuntimeAddress(bs->load_reference_barrier_strong_rt_code_blob()->code_begin()));
 668     }
 669   } else if (is_weak) {
 670     __ far_call(RuntimeAddress(bs->load_reference_barrier_weak_rt_code_blob()->code_begin()));
 671   } else {
 672     assert(is_phantom, "only remaining strength");
 673     __ far_call(RuntimeAddress(bs->load_reference_barrier_phantom_rt_code_blob()->code_begin()));
 674   }
 675 
 676   __ j(*stub->continuation());
 677 }
 678 
 679 #undef __
 680 
 681 #define __ sasm->
 682 
 683 void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
 684   __ prologue("shenandoah_pre_barrier", false);
 685 
 686   // arg0 : previous value of memory
 687 
 688   BarrierSet* bs = BarrierSet::barrier_set();
 689 
 690   const Register pre_val = x10;
 691   const Register thread = xthread;
 692   const Register tmp = t0;
 693 
 694   Address queue_index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
 695   Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
 696 
 697   Label done;
 698   Label runtime;
 699 
 700   // Is marking still active?
 701   Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
 702   __ lb(tmp, gc_state);
 703   __ test_bit(tmp, tmp, ShenandoahHeap::MARKING_BITPOS);
 704   __ beqz(tmp, done);
 705 
 706   // Can we store original value in the thread's buffer?
 707   __ ld(tmp, queue_index);
 708   __ beqz(tmp, runtime);
 709 
 710   __ subi(tmp, tmp, wordSize);
 711   __ sd(tmp, queue_index);
 712   __ ld(t1, buffer);
 713   __ add(tmp, tmp, t1);
 714   __ load_parameter(0, t1);
 715   __ sd(t1, Address(tmp, 0));
 716   __ j(done);
 717 
 718   __ bind(runtime);
 719   __ push_call_clobbered_registers();
 720   __ load_parameter(0, pre_val);
 721   __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre), pre_val);
 722   __ pop_call_clobbered_registers();
 723   __ bind(done);
 724 
 725   __ epilogue();
 726 }
 727 
 728 void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm,
 729                                                                                     DecoratorSet decorators) {
 730   __ prologue("shenandoah_load_reference_barrier", false);
 731   // arg0 : object to be resolved
 732 
 733   __ push_call_clobbered_registers();
 734   __ load_parameter(0, x10);
 735   __ load_parameter(1, x11);
 736 
 737   bool is_strong  = ShenandoahBarrierSet::is_strong_access(decorators);
 738   bool is_weak    = ShenandoahBarrierSet::is_weak_access(decorators);
 739   bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
 740   bool is_native  = ShenandoahBarrierSet::is_native_access(decorators);
 741   address target  = nullptr;
 742   if (is_strong) {
 743     if (is_native) {
 744       target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong);
 745     } else {
 746       if (UseCompressedOops) {
 747         target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow);
 748       } else {
 749         target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong);
 750       }
 751     }
 752   } else if (is_weak) {
 753     assert(!is_native, "weak must not be called off-heap");
 754     if (UseCompressedOops) {
 755       target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow);
 756     } else {
 757       target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak);
 758     }
 759   } else {
 760     assert(is_phantom, "only remaining strength");
 761     assert(is_native, "phantom must only be called off-heap");
 762     target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom);
 763   }
 764   __ rt_call(target);
 765   __ mv(t0, x10);
 766   __ pop_call_clobbered_registers();
 767   __ mv(x10, t0);
 768 
 769   __ epilogue();
 770 }
 771 
 772 #undef __
 773 
 774 #endif // COMPILER1
 775 
 776 #ifdef COMPILER2
 777 
 778 #undef __
 779 #define __ masm->
 780 
 781 void ShenandoahBarrierSetAssembler::load_c2(const MachNode* node, MacroAssembler* masm, Register dst, Address src, Register tmp1, Register tmp2, bool is_narrow) {
 782   // Do the actual load. This load is the candidate for implicit null check, and MUST come first.
 783   if (is_narrow) {
 784     __ lwu(dst, src);
 785   } else {
 786     __ ld(dst, src);
 787   }
 788 
 789   ShenandoahBarrierStubC2::load_post(masm, node, dst, src, tmp1, tmp2, is_narrow);
 790 }
 791 
 792 void ShenandoahBarrierSetAssembler::store_c2(const MachNode* node, MacroAssembler* masm, Address dst, bool dst_narrow,
 793     Register src, bool src_narrow, Register tmp1, Register tmp2, Register tmp3) {
 794 
 795   ShenandoahBarrierStubC2::store_pre(masm, node, tmp1, dst, tmp2, tmp3, dst_narrow);
 796 
 797   // Do the actual store
 798   if (dst_narrow) {
 799     if (!src_narrow) {
 800       // Need to encode into tmp, because we cannot clobber src.
 801       assert(tmp1 != noreg, "need temp register");
 802       if ((node->barrier_data() & ShenandoahBitNotNull) == 0) {
 803         __ encode_heap_oop(tmp1, src);
 804       } else {
 805         __ encode_heap_oop_not_null(tmp1, src);
 806       }
 807       src = tmp1;
 808     }
 809     __ sw(src, dst);
 810   } else {
 811     __ sd(src, dst);
 812   }
 813 
 814   ShenandoahBarrierStubC2::store_post(masm, node, dst, tmp2, tmp3);
 815 }
 816 
 817 void ShenandoahBarrierSetAssembler::compare_and_set_c2(const MachNode* node, MacroAssembler* masm, Register res, Register addr,
 818     Register oldval, Register newval, Register tmp1, Register tmp2, Register tmp3, bool exchange, bool narrow, bool is_acquire) {
 819   const Assembler::Aqrl acquire = is_acquire ? Assembler::aq : Assembler::relaxed;
 820   const Assembler::Aqrl release = Assembler::rl;
 821   const Assembler::operand_size size = narrow ? Assembler::uint32 : Assembler::int64;
 822 
 823   ShenandoahBarrierStubC2::load_store_pre(masm, node, tmp1, Address(addr), tmp2, tmp3, narrow);
 824 
 825   // CAS!
 826   __ cmpxchg(addr, oldval, newval, size, acquire, release, /* result */ res, !exchange /* result_as_bool */);
 827 
 828   ShenandoahBarrierStubC2::load_store_post(masm, node, Address(addr, 0), tmp2, tmp3);
 829 }
 830 
 831 void ShenandoahBarrierSetAssembler::get_and_set_c2(const MachNode* node, MacroAssembler* masm, Register preval,
 832     Register newval, Register addr, Register tmp1, Register tmp2, Register tmp3, bool is_acquire) {
 833   const bool is_narrow = node->bottom_type()->isa_narrowoop();
 834 
 835   ShenandoahBarrierStubC2::load_store_pre(masm, node, tmp1, Address(addr, 0), tmp2, tmp3, is_narrow);
 836 
 837   if (is_narrow) {
 838     if (is_acquire) {
 839       __ atomic_xchgalwu(preval, newval, addr);
 840     } else {
 841       __ atomic_xchgwu(preval, newval, addr);
 842     }
 843   } else {
 844     if (is_acquire) {
 845       __ atomic_xchgal(preval, newval, addr);
 846     } else {
 847       __ atomic_xchg(preval, newval, addr);
 848     }
 849   }
 850 
 851   ShenandoahBarrierStubC2::load_store_post(masm, node, Address(addr, 0), tmp2, tmp3);
 852 }
 853 
 854 #undef __
 855 #define __ masm.
 856 
 857 void ShenandoahBarrierStubC2::cardtable(MacroAssembler& masm, Address address, Register tmp1, Register tmp2) {
 858   assert(CardTable::dirty_card_val() == 0, "must be");
 859   Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
 860 
 861   // tmp1 = card table base (holder)
 862   Address curr_ct_holder_addr(xthread, in_bytes(ShenandoahThreadLocalData::card_table_offset()));
 863   __ ld(tmp1, curr_ct_holder_addr);
 864 
 865   // tmp1 = effective address
 866   __ la(tmp2, address);
 867 
 868   // tmp2 = &card_table[ addr >> CardTable::card_shift() ] ; card index
 869   __ srli(tmp2, tmp2, CardTable::card_shift());
 870   __ add(tmp2, tmp2, tmp1);
 871 
 872   if (UseCondCardMark) {
 873     Label L_already_dirty;
 874     __ lbu(tmp1, Address(tmp2));
 875     __ beqz(tmp1, L_already_dirty);
 876     __ sb(zr, Address(tmp2));
 877     __ bind(L_already_dirty);
 878   } else {
 879     __ sb(zr, Address(tmp2));
 880   }
 881 }
 882 
 883 void ShenandoahBarrierStubC2::enter_if_gc_state(MacroAssembler& masm, const char test_state, Register tmp) {
 884   Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
 885 
 886   Address gc_state_fast(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(test_state)));
 887   __ lbu(tmp, gc_state_fast);
 888   __ beqz(tmp, *continuation());
 889   __ j(*entry());
 890 
 891   // This is were the slowpath stub will return to or the code above will
 892   // jump to if the checks are false
 893   __ bind(*continuation());
 894 }
 895 
 896 void ShenandoahBarrierStubC2::emit_code(MacroAssembler& masm) {
 897   Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
 898   assert(_needs_keep_alive_barrier || _needs_load_ref_barrier, "Why are you here?");
 899 
 900   __ bind(*entry());
 901 
 902   // If we need to load ourselves, do it here.
 903   if (_do_load) {
 904     if (_narrow) {
 905       __ lwu(_obj, _addr);
 906     } else {
 907       __ ld(_obj, _addr);
 908     }
 909   }
 910 
 911   // If the object is null, there is no point in applying barriers.
 912   maybe_far_jump_if_zero(masm, _obj);
 913 
 914   // We need to make sure that loads done by callers survive across slow-path calls.
 915   // For self-loads, we need to care about the case when both KA and LRB are enabled (rare).
 916   bool needs_both_barriers = _needs_keep_alive_barrier && _needs_load_ref_barrier;
 917   if (!_do_load || needs_both_barriers) {
 918     preserve(_obj);
 919   }
 920 
 921   // Go for barriers. Barriers can return straight to continuation, as long
 922   // as another barrier is not needed and we can reach the fastpath.
 923   if (needs_both_barriers) {
 924     keepalive(masm, nullptr);
 925     lrb(masm);
 926   } else if (_needs_keep_alive_barrier) {
 927     keepalive(masm, continuation());
 928   } else if (_needs_load_ref_barrier) {
 929     lrb(masm);
 930   } else {
 931     ShouldNotReachHere();
 932   }
 933 }
 934 
 935 void ShenandoahBarrierStubC2::maybe_far_jump_if_zero(MacroAssembler& masm, Register reg) {
 936   Label L_short_jump;
 937   __ bnez(reg, L_short_jump);
 938   __ j(*continuation());
 939   __ bind(L_short_jump);
 940 }
 941 
 942 void ShenandoahBarrierStubC2::keepalive(MacroAssembler& masm, Label* L_done) {
 943   Address index(xthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
 944   Address buffer(xthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
 945   Label L_through, L_slowpath;
 946 
 947   // If another barrier is enabled as well, do a runtime check for a specific barrier.
 948   if (_needs_load_ref_barrier) {
 949     assert(L_done == nullptr, "L_done is always null when _needs_load_ref_barrier is true");
 950     Address gc_state_fast(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::MARKING)));
 951     __ lbu(_tmp1, gc_state_fast);
 952     __ beqz(_tmp1, L_through);
 953   }
 954 
 955   // Fast-path: put object into buffer.
 956   // If buffer is already full, go slow.
 957   __ ld(_tmp1, index);
 958   __ beqz(_tmp1, L_slowpath);
 959   __ subi(_tmp1, _tmp1, wordSize);
 960   __ sd(_tmp1, index);
 961   __ ld(_tmp2, buffer);
 962 
 963   // Store the object in queue.
 964   // If object is narrow, we need to decode it before inserting.
 965   __ add(_tmp1, _tmp1, _tmp2);
 966   if (_narrow) {
 967     __ decode_heap_oop_not_null(_tmp2, _obj);
 968     __ sd(_tmp2, Address(_tmp1));
 969   } else {
 970     __ sd(_obj, Address(_tmp1));
 971   }
 972 
 973   // Fast-path exits here.
 974   if (L_done != nullptr) {
 975     __ j(*L_done);
 976   } else {
 977     __ j(L_through);
 978   }
 979 
 980   // Slow-path: call runtime to handle.
 981   __ bind(L_slowpath);
 982 
 983   {
 984     SaveLiveRegisters slr(&masm, this);
 985 
 986     // Go to runtime and handle the rest there.
 987     __ mv(c_rarg0, _obj);
 988     __ rt_call(keepalive_runtime_entry_addr());
 989   }
 990   if (L_done != nullptr) {
 991     __ j(*L_done);
 992   } else {
 993     __ bind(L_through);
 994   }
 995 }
 996 
 997 void ShenandoahBarrierStubC2::lrb(MacroAssembler& masm) {
 998   Label L_slow;
 999 
1000   // If another barrier is enabled as well, do a runtime check for a specific barrier.
1001   if (_needs_keep_alive_barrier) {
1002     char state_to_check = ShenandoahHeap::HAS_FORWARDED | (_needs_load_ref_weak_barrier ? ShenandoahHeap::WEAK_ROOTS : 0);
1003     Address gc_state_fast(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(state_to_check)));
1004     __ lbu(_tmp1, gc_state_fast);
1005     maybe_far_jump_if_zero(masm, _tmp1);
1006   }
1007 
1008   // If weak references are being processed, weak/phantom loads need to go slow,
1009   // regardless of their cset status.
1010   if (_needs_load_ref_weak_barrier) {
1011     Address gc_state_fast(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::WEAK_ROOTS)));
1012     __ lbu(_tmp1, gc_state_fast);
1013     __ bnez(_tmp1, L_slow);
1014   }
1015 
1016   // Cset-check. Fall-through to slow if in collection set.
1017   if (_narrow) {
1018     __ decode_heap_oop_not_null(_tmp2, _obj);
1019   } else {
1020     __ mv(_tmp2, _obj);
1021   }
1022 
1023   __ mv(_tmp1, ShenandoahHeap::in_cset_fast_test_addr());
1024   __ srli(_tmp2, _tmp2, ShenandoahHeapRegion::region_size_bytes_shift_jint());
1025   __ add(_tmp1, _tmp1, _tmp2);
1026   __ lbu(_tmp1, Address(_tmp1, 0));
1027   maybe_far_jump_if_zero(masm, _tmp1);
1028 
1029   // Slow path
1030   __ bind(L_slow);
1031 
1032   // Obj is the result, need to temporarily stop preserving it.
1033   bool is_obj_preserved = is_preserved(_obj);
1034   if (is_obj_preserved) {
1035     dont_preserve(_obj);
1036   }
1037   {
1038     SaveLiveRegisters slr(&masm, this);
1039 
1040     // Shuffle in the arguments. The end result should be:
1041     //   c_rarg0 <- obj
1042     //   c_rarg1 <- lea(addr)
1043     if (c_rarg0 == _obj) {
1044       __ la(c_rarg1, _addr);
1045     } else if (c_rarg1 == _obj) {
1046       // Set up arguments in reverse, and then flip them
1047       __ la(c_rarg0, _addr);
1048       // flip them
1049       __ mv(_tmp1, c_rarg0);
1050       __ mv(c_rarg0, c_rarg1);
1051       __ mv(c_rarg1, _tmp1);
1052     } else {
1053       assert_different_registers(c_rarg1, _obj);
1054       __ la(c_rarg1, _addr);
1055       __ mv(c_rarg0, _obj);
1056     }
1057 
1058     // Go to runtime and handle the rest there.
1059     __ rt_call(lrb_runtime_entry_addr());
1060 
1061     // Save the result where needed. Narrow entries return narrowOop (32 bits)
1062     // we need to zero the upper 32 bits of x10.
1063     if (_narrow) {
1064       __ zext_w(_obj, x10);
1065     } else {
1066       __ mv(_obj, x10);
1067     }
1068   }
1069   if (is_obj_preserved) {
1070     preserve(_obj);
1071   }
1072 
1073   __ j(*continuation());
1074 }
1075 
1076 int ShenandoahBarrierStubC2::available_gp_registers() {
1077   Unimplemented(); // Not used
1078   return 0;
1079 }
1080 
1081 bool ShenandoahBarrierStubC2::is_special_register(Register r) {
1082   Unimplemented(); // Not used
1083   return true;
1084 }
1085 
1086 void ShenandoahBarrierStubC2::post_init() {
1087   // Do nothing.
1088 }
1089 
1090 #endif // COMPILER2