1 /*
   2  * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2018, 2021, Red Hat, Inc. All rights reserved.
   4  * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
   5  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   6  *
   7  * This code is free software; you can redistribute it and/or modify it
   8  * under the terms of the GNU General Public License version 2 only, as
   9  * published by the Free Software Foundation.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  *
  25  */
  26 
  27 #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
  28 #include "gc/shenandoah/mode/shenandoahMode.hpp"
  29 #include "gc/shenandoah/shenandoahBarrierSet.hpp"
  30 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
  31 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
  32 #include "gc/shenandoah/shenandoahHeapRegion.hpp"
  33 #include "gc/shenandoah/shenandoahNMethod.inline.hpp"
  34 #include "gc/shenandoah/shenandoahRuntime.hpp"
  35 #include "gc/shenandoah/shenandoahThreadLocalData.hpp"
  36 #include "interpreter/interpreter.hpp"
  37 #include "nativeInst_x86.hpp"
  38 #include "runtime/javaThread.hpp"
  39 #include "runtime/sharedRuntime.hpp"
  40 #include "utilities/macros.hpp"
  41 #ifdef COMPILER1
  42 #include "c1/c1_LIRAssembler.hpp"
  43 #include "c1/c1_MacroAssembler.hpp"
  44 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
  45 #endif
  46 #ifdef COMPILER2
  47 #include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp"
  48 #endif
  49 
  50 #define __ masm->
  51 
  52 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
  53                                                        Register src, Register dst, Register count) {
  54 
  55   bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
  56 
  57   if (is_reference_type(type)) {
  58     if (ShenandoahCardBarrier) {
  59       bool checkcast = (decorators & ARRAYCOPY_CHECKCAST) != 0;
  60       bool disjoint = (decorators & ARRAYCOPY_DISJOINT) != 0;
  61       bool obj_int = (type == T_OBJECT) && UseCompressedOops;
  62 
  63       // We need to save the original element count because the array copy stub
  64       // will destroy the value and we need it for the card marking barrier.
  65       if (!checkcast) {
  66         if (!obj_int) {
  67           // Save count for barrier
  68           __ movptr(r11, count);
  69         } else if (disjoint) {
  70           // Save dst in r11 in the disjoint case
  71           __ movq(r11, dst);
  72         }
  73       }
  74     }
  75 
  76     if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahLoadRefBarrier) {
  77       Register thread = r15_thread;
  78       assert_different_registers(src, dst, count, thread);
  79 
  80       Label L_done;
  81       // Short-circuit if count == 0.
  82       __ testptr(count, count);
  83       __ jcc(Assembler::zero, L_done);
  84 
  85       // Avoid runtime call when not active.
  86       Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
  87       int flags;
  88       if (ShenandoahSATBBarrier && dest_uninitialized) {
  89         flags = ShenandoahHeap::HAS_FORWARDED;
  90       } else {
  91         flags = ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::MARKING;
  92       }
  93       __ testb(gc_state, flags);
  94       __ jcc(Assembler::zero, L_done);
  95 
  96       __ push_call_clobbered_registers(/* save_fpu = */ false);
  97       // If arguments are not in proper places, shuffle them.
  98       // Doing this via the stack is the most straight-forward way to avoid
  99       // accidentally smashing any register.
 100       if (c_rarg0 != src || c_rarg1 != dst || c_rarg2 != count) {
 101         __ push(src);
 102         __ push(dst);
 103         __ push(count);
 104         __ pop(c_rarg2);
 105         __ pop(c_rarg1);
 106         __ pop(c_rarg0);
 107       }
 108       address target = nullptr;
 109       if (UseCompressedOops) {
 110         target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop);
 111       } else {
 112         target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop);
 113       }
 114       __ call_VM_leaf(target, 3);
 115 
 116       __ pop_call_clobbered_registers(/* restore_fpu = */ false);
 117 
 118       __ bind(L_done);
 119     }
 120   }
 121 
 122 }
 123 
 124 void ShenandoahBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
 125                                                        Register src, Register dst, Register count) {
 126 
 127   if (ShenandoahCardBarrier && is_reference_type(type)) {
 128     bool checkcast = (decorators & ARRAYCOPY_CHECKCAST) != 0;
 129     bool disjoint = (decorators & ARRAYCOPY_DISJOINT) != 0;
 130     bool obj_int = (type == T_OBJECT) && UseCompressedOops;
 131     Register tmp = rax;
 132 
 133     if (!checkcast) {
 134       if (!obj_int) {
 135         // Save count for barrier
 136         count = r11;
 137       } else if (disjoint) {
 138         // Use the saved dst in the disjoint case
 139         dst = r11;
 140       }
 141     } else {
 142       tmp = rscratch1;
 143     }
 144     gen_write_ref_array_post_barrier(masm, decorators, dst, count, tmp);
 145   }
 146 }
 147 
 148 void ShenandoahBarrierSetAssembler::satb_barrier(MacroAssembler* masm,
 149                                                  Register obj,
 150                                                  Register pre_val,
 151                                                  Register tmp) {
 152   assert(ShenandoahSATBBarrier, "Should be checked by caller");
 153   const Register thread = r15_thread;
 154 
 155   Label done;
 156   Label runtime;
 157 
 158   assert(pre_val != noreg, "check this code");
 159 
 160   if (obj != noreg) {
 161     assert_different_registers(obj, pre_val, tmp);
 162     assert(pre_val != rax, "check this code");
 163   }
 164 
 165   Address index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
 166   Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
 167 
 168   Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
 169   __ testb(gc_state, ShenandoahHeap::MARKING);
 170   __ jcc(Assembler::zero, done);
 171 
 172   // Do we need to load the previous value?
 173   if (obj != noreg) {
 174     if (UseCompressedOops) {
 175       __ movl(pre_val, Address(obj, 0));
 176       __ decode_heap_oop(pre_val);
 177     } else {
 178       __ movq(pre_val, Address(obj, 0));
 179     }
 180   }
 181 
 182   // Is the previous value null?
 183   __ cmpptr(pre_val, NULL_WORD);
 184   __ jcc(Assembler::equal, done);
 185 
 186   // Can we store original value in the thread's buffer?
 187   // Is index == 0?
 188   // (The index field is typed as size_t.)
 189 
 190   __ movptr(tmp, index);                   // tmp := *index_adr
 191   __ cmpptr(tmp, 0);                       // tmp == 0?
 192   __ jcc(Assembler::equal, runtime);       // If yes, goto runtime
 193 
 194   __ subptr(tmp, wordSize);                // tmp := tmp - wordSize
 195   __ movptr(index, tmp);                   // *index_adr := tmp
 196   __ addptr(tmp, buffer);                  // tmp := tmp + *buffer_adr
 197 
 198   // Record the previous value
 199   __ movptr(Address(tmp, 0), pre_val);
 200   __ jmp(done);
 201 
 202   __ bind(runtime);
 203 
 204   // Slow-path call.
 205   // Some paths can be reached from the c2i adapter with live fp arguments in registers.
 206   __ enter();
 207   __ push_call_clobbered_registers(/* save_fpu = */ true);
 208 
 209   assert(thread != c_rarg0, "smashed arg");
 210   if (c_rarg0 != pre_val) {
 211     __ mov(c_rarg0, pre_val);
 212   }
 213 
 214   // Calling with super_call_VM_leaf with c_rarg0 bypasses interpreter checks and avoids any moves.
 215   __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre), c_rarg0);
 216 
 217   __ pop_call_clobbered_registers(/* restore_fpu = */ true);
 218   __ leave();
 219 
 220   __ bind(done);
 221 }
 222 
 223 void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address src, DecoratorSet decorators) {
 224   assert(ShenandoahLoadRefBarrier, "Should be enabled");
 225 
 226   bool is_strong  = ShenandoahBarrierSet::is_strong_access(decorators);
 227   bool is_weak    = ShenandoahBarrierSet::is_weak_access(decorators);
 228   bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
 229   bool is_native  = ShenandoahBarrierSet::is_native_access(decorators);
 230   bool is_narrow  = UseCompressedOops && !is_native;
 231 
 232   Label heap_stable, not_cset;
 233 
 234   __ block_comment("load_reference_barrier { ");
 235 
 236   // Check if GC is active
 237   Register thread = r15_thread;
 238 
 239   Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
 240   int flags = ShenandoahHeap::HAS_FORWARDED;
 241   if (!is_strong) {
 242     flags |= ShenandoahHeap::WEAK_ROOTS;
 243   }
 244   __ testb(gc_state, flags);
 245   __ jcc(Assembler::zero, heap_stable);
 246 
 247   Register tmp1 = noreg, tmp2 = noreg;
 248   if (is_strong) {
 249     // Test for object in cset
 250     // Allocate temporary registers
 251     for (int i = 0; i < Register::available_gp_registers(); i++) {
 252       Register r = as_Register(i);
 253       if (r != rsp && r != rbp && r != rcx && r != dst && r != src.base() && r != src.index() ) {
 254         if (tmp1 == noreg) {
 255           tmp1 = r;
 256         } else {
 257           tmp2 = r;
 258           break;
 259         }
 260       }
 261     }
 262     assert(tmp1 != noreg, "tmp1 allocated");
 263     assert(tmp2 != noreg, "tmp2 allocated");
 264     assert_different_registers(tmp1, tmp2, src.base(), src.index());
 265     assert_different_registers(tmp1, tmp2, dst);
 266 
 267     __ push(tmp1);
 268     __ push(tmp2);
 269 
 270     // Optimized cset-test
 271     __ movptr(tmp1, dst);
 272     if (AOTCodeCache::is_on_for_dump()) {
 273       assert_different_registers(tmp1, tmp2, rcx);
 274       __ lea(tmp2, ExternalAddress(AOTRuntimeConstants::grain_shift_address()));
 275       __ push(rcx);
 276       __ movb(rcx, Address(tmp2));
 277       __ shrptr(tmp1);
 278       __ pop(rcx);
 279       __ lea(tmp2, ExternalAddress(AOTRuntimeConstants::cset_base_address()));
 280       __ movptr(tmp2, Address(tmp2));
 281     } else {
 282       __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint());
 283       __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr());
 284     }
 285     __ movbool(tmp1, Address(tmp1, tmp2, Address::times_1));
 286     __ testbool(tmp1);
 287     __ jcc(Assembler::zero, not_cset);
 288   }
 289 
 290   // Slow-path call.
 291   // Save registers that can be clobbered by call.
 292   // Some paths can be reached from the c2i adapter with live fp arguments in registers.
 293   __ enter();
 294   if (dst != rax) {
 295     __ push(rax);
 296   }
 297   __ push_call_clobbered_registers_except(rax, /* save_fpu = */ true);
 298 
 299   // Shuffle registers such that dst is in c_rarg0 and addr in c_rarg1.
 300   if (dst == c_rarg1) {
 301     __ lea(c_rarg0, src);
 302     __ xchgptr(c_rarg1, c_rarg0);
 303   } else {
 304     __ lea(c_rarg1, src);
 305     __ movptr(c_rarg0, dst);
 306   }
 307 
 308   address target = nullptr;
 309   if (is_strong) {
 310     if (is_narrow) {
 311       target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow);
 312     } else {
 313       target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong);
 314     }
 315   } else if (is_weak) {
 316     if (is_narrow) {
 317       target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow);
 318     } else {
 319       target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak);
 320     }
 321   } else {
 322     assert(is_phantom, "only remaining strength");
 323     assert(!is_narrow, "phantom access cannot be narrow");
 324     target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom);
 325   }
 326 
 327   // Calling with super_call_VM_leaf with c_rarg0/1 bypasses interpreter checks and avoids any moves.
 328   __ super_call_VM_leaf(target, c_rarg0, c_rarg1);
 329   __ pop_call_clobbered_registers_except(rax, /* restore_fpu = */ true);
 330   if (dst != rax) {
 331     __ movptr(dst, rax);
 332     __ pop(rax);
 333   }
 334   __ leave();
 335 
 336   __ bind(not_cset);
 337 
 338   if  (is_strong) {
 339     __ pop(tmp2);
 340     __ pop(tmp1);
 341   }
 342 
 343   __ bind(heap_stable);
 344 
 345   __ block_comment("} load_reference_barrier");
 346 }
 347 
 348 //
 349 // Arguments:
 350 //
 351 // Inputs:
 352 //   src:        oop location, might be clobbered
 353 //   tmp1:       scratch register, might not be valid.
 354 //
 355 // Output:
 356 //   dst:        oop loaded from src location
 357 //
 358 // Kill:
 359 //   tmp1 (if it is valid)
 360 //
 361 void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
 362              Register dst, Address src, Register tmp1) {
 363   // 1: non-reference load, no additional barrier is needed
 364   if (!is_reference_type(type)) {
 365     BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1);
 366     return;
 367   }
 368 
 369   assert((decorators & ON_UNKNOWN_OOP_REF) == 0, "Not expected");
 370 
 371   // 2: load a reference from src location and apply LRB if needed
 372   if (ShenandoahBarrierSet::need_load_reference_barrier(decorators, type)) {
 373     Register result_dst = dst;
 374     bool use_tmp1_for_dst = false;
 375 
 376     // Preserve src location for LRB
 377     if (dst == src.base() || dst == src.index()) {
 378     // Use tmp1 for dst if possible, as it is not used in BarrierAssembler::load_at()
 379       if (tmp1->is_valid() && tmp1 != src.base() && tmp1 != src.index()) {
 380         dst = tmp1;
 381         use_tmp1_for_dst = true;
 382       } else {
 383         dst = rdi;
 384         __ push(dst);
 385       }
 386       assert_different_registers(dst, src.base(), src.index());
 387     }
 388 
 389     BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1);
 390 
 391     load_reference_barrier(masm, dst, src, decorators);
 392 
 393     // Move loaded oop to final destination
 394     if (dst != result_dst) {
 395       __ movptr(result_dst, dst);
 396 
 397       if (!use_tmp1_for_dst) {
 398         __ pop(dst);
 399       }
 400 
 401       dst = result_dst;
 402     }
 403   } else {
 404     BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1);
 405   }
 406 
 407   // 3: apply keep-alive barrier if needed
 408   if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) {
 409     satb_barrier(masm /* masm */,
 410                  noreg /* obj */,
 411                  dst /* pre_val */,
 412                  tmp1 /* tmp */);
 413   }
 414 }
 415 
 416 void ShenandoahBarrierSetAssembler::card_barrier(MacroAssembler* masm, Register obj) {
 417   assert(ShenandoahCardBarrier, "Should have been checked by caller");
 418 
 419   // Does a store check for the oop in register obj. The content of
 420   // register obj is destroyed afterwards.
 421   __ shrptr(obj, CardTable::card_shift());
 422 
 423   // We'll use this register as the TLS base address and also later on
 424   // to hold the byte_map_base.
 425   Register thread = r15_thread;
 426   Register tmp = rscratch1;
 427 
 428   Address curr_ct_holder_addr(thread, in_bytes(ShenandoahThreadLocalData::card_table_offset()));
 429   __ movptr(tmp, curr_ct_holder_addr);
 430   Address card_addr(tmp, obj, Address::times_1);
 431 
 432   int dirty = CardTable::dirty_card_val();
 433   if (UseCondCardMark) {
 434     Label L_already_dirty;
 435     __ cmpb(card_addr, dirty);
 436     __ jccb(Assembler::equal, L_already_dirty);
 437     __ movb(card_addr, dirty);
 438     __ bind(L_already_dirty);
 439   } else {
 440     __ movb(card_addr, dirty);
 441   }
 442 }
 443 
 444 void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
 445               Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
 446 
 447   // 1: non-reference types require no barriers
 448   if (!is_reference_type(type)) {
 449     BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3);
 450     return;
 451   }
 452 
 453   // Flatten object address right away for simplicity: likely needed by barriers
 454   assert_different_registers(val, tmp1, tmp2, tmp3, r15_thread);
 455   if (dst.index() == noreg && dst.disp() == 0) {
 456     if (dst.base() != tmp1) {
 457       __ movptr(tmp1, dst.base());
 458     }
 459   } else {
 460     __ lea(tmp1, dst);
 461   }
 462 
 463   // 2: pre-barrier: SATB needs the previous value
 464   if (ShenandoahBarrierSet::need_satb_barrier(decorators, type)) {
 465     satb_barrier(masm,
 466                  tmp1 /* obj */,
 467                  tmp2 /* pre_val */,
 468                  tmp3 /* tmp */);
 469   }
 470 
 471   // Store!
 472   BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg, noreg);
 473 
 474   // 3: post-barrier: card barrier needs store address
 475   bool storing_non_null = (val != noreg);
 476   if (ShenandoahBarrierSet::need_card_barrier(decorators, type) && storing_non_null) {
 477     card_barrier(masm, tmp1);
 478   }
 479 }
 480 
 481 void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
 482                                                                   Register obj, Register tmp, Label& slowpath) {
 483   Label done;
 484   // Resolve jobject
 485   BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, obj, tmp, slowpath);
 486 
 487   // Check for null.
 488   __ testptr(obj, obj);
 489   __ jcc(Assembler::zero, done);
 490 
 491   Address gc_state(jni_env, ShenandoahThreadLocalData::gc_state_offset() - JavaThread::jni_environment_offset());
 492   __ testb(gc_state, ShenandoahHeap::EVACUATION);
 493   __ jccb(Assembler::notZero, slowpath);
 494   __ bind(done);
 495 }
 496 
 497 void ShenandoahBarrierSetAssembler::try_peek_weak_handle_in_nmethod(MacroAssembler* masm, Register weak_handle, Register obj, Label& slowpath) {
 498   Label done;
 499 
 500   // Peek weak handle using the standard implementation.
 501   BarrierSetAssembler::try_peek_weak_handle_in_nmethod(masm, weak_handle, obj, slowpath);
 502 
 503   // Check if the reference is null, and if it is, take the fast path.
 504   __ testptr(obj, obj);
 505   __ jcc(Assembler::zero, done);
 506 
 507   Address gc_state(r15_thread, ShenandoahThreadLocalData::gc_state_offset());
 508 
 509   // Check if the heap is under weak-reference/roots processing, in
 510   // which case we need to take the slow path.
 511   __ testb(gc_state, ShenandoahHeap::WEAK_ROOTS);
 512   __ jcc(Assembler::notZero, slowpath);
 513   __ bind(done);
 514 }
 515 
 516 #ifdef PRODUCT
 517 #define BLOCK_COMMENT(str) /* nothing */
 518 #else
 519 #define BLOCK_COMMENT(str) __ block_comment(str)
 520 #endif
 521 
 522 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
 523 
 524 #define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8)
 525 
 526 void ShenandoahBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
 527                                                                      Register addr, Register count,
 528                                                                      Register tmp) {
 529   assert(ShenandoahCardBarrier, "Should have been checked by caller");
 530 
 531   Label L_loop, L_done;
 532   const Register end = count;
 533   assert_different_registers(addr, end);
 534 
 535   // Zero count? Nothing to do.
 536   __ testl(count, count);
 537   __ jccb(Assembler::zero, L_done);
 538 
 539   const Register thread = r15_thread;
 540   Address curr_ct_holder_addr(thread, in_bytes(ShenandoahThreadLocalData::card_table_offset()));
 541   __ movptr(tmp, curr_ct_holder_addr);
 542 
 543   __ leaq(end, Address(addr, count, TIMES_OOP, 0));  // end == addr+count*oop_size
 544   __ subptr(end, BytesPerHeapOop); // end - 1 to make inclusive
 545   __ shrptr(addr, CardTable::card_shift());
 546   __ shrptr(end, CardTable::card_shift());
 547   __ subptr(end, addr); // end --> cards count
 548 
 549   __ addptr(addr, tmp);
 550 
 551   __ BIND(L_loop);
 552   __ movb(Address(addr, count, Address::times_1), 0);
 553   __ decrement(count);
 554   __ jccb(Assembler::greaterEqual, L_loop);
 555 
 556   __ BIND(L_done);
 557 }
 558 
 559 #undef __
 560 
 561 #ifdef COMPILER1
 562 
 563 #define __ ce->masm()->
 564 
 565 void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) {
 566   ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
 567   // At this point we know that marking is in progress.
 568   // If do_load() is true then we have to emit the
 569   // load of the previous value; otherwise it has already
 570   // been loaded into _pre_val.
 571 
 572   __ bind(*stub->entry());
 573   assert(stub->pre_val()->is_register(), "Precondition.");
 574 
 575   Register pre_val_reg = stub->pre_val()->as_register();
 576 
 577   if (stub->do_load()) {
 578     ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/);
 579   }
 580 
 581   __ cmpptr(pre_val_reg, NULL_WORD);
 582   __ jcc(Assembler::equal, *stub->continuation());
 583   ce->store_parameter(stub->pre_val()->as_register(), 0);
 584   __ call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin()));
 585   __ jmp(*stub->continuation());
 586 
 587 }
 588 
 589 void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub) {
 590   ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
 591   __ bind(*stub->entry());
 592 
 593   DecoratorSet decorators = stub->decorators();
 594   bool is_strong  = ShenandoahBarrierSet::is_strong_access(decorators);
 595   bool is_weak    = ShenandoahBarrierSet::is_weak_access(decorators);
 596   bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
 597   bool is_native  = ShenandoahBarrierSet::is_native_access(decorators);
 598 
 599   Register obj = stub->obj()->as_register();
 600   Register res = stub->result()->as_register();
 601   Register addr = stub->addr()->as_pointer_register();
 602   Register tmp1 = stub->tmp1()->as_register();
 603   Register tmp2 = stub->tmp2()->as_register();
 604   assert_different_registers(obj, res, addr, tmp1, tmp2);
 605 
 606   Label slow_path;
 607 
 608   assert(res == rax, "result must arrive in rax");
 609 
 610   if (res != obj) {
 611     __ mov(res, obj);
 612   }
 613 
 614   if (is_strong) {
 615     // Check for object being in the collection set.
 616     __ mov(tmp1, res);
 617     if (AOTCodeCache::is_on_for_dump()) {
 618       __ push(rcx);
 619       __ lea(rcx, ExternalAddress(AOTRuntimeConstants::grain_shift_address()));
 620       __ movl(rcx, Address(rcx));
 621       if (tmp1 != rcx) {
 622         __ mov(tmp1, res);
 623         __ shrptr(tmp1);
 624         __ pop(rcx);
 625       } else {
 626         assert_different_registers(tmp2, rcx);
 627         __ mov(tmp2, res);
 628         __ shrptr(tmp2);
 629         __ pop(rcx);
 630         __ movptr(tmp1, tmp2);
 631       }
 632       __ lea(tmp2, ExternalAddress(AOTRuntimeConstants::cset_base_address()));
 633       __ movptr(tmp2, Address(tmp2));
 634     } else {
 635       __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint());
 636       __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr());
 637     }
 638     __ movbool(tmp2, Address(tmp2, tmp1, Address::times_1));
 639     __ testbool(tmp2);
 640     __ jcc(Assembler::zero, *stub->continuation());
 641   }
 642 
 643   __ bind(slow_path);
 644   ce->store_parameter(res, 0);
 645   ce->store_parameter(addr, 1);
 646   if (is_strong) {
 647     if (is_native) {
 648       __ call(RuntimeAddress(bs->load_reference_barrier_strong_native_rt_code_blob()->code_begin()));
 649     } else {
 650       __ call(RuntimeAddress(bs->load_reference_barrier_strong_rt_code_blob()->code_begin()));
 651     }
 652   } else if (is_weak) {
 653     __ call(RuntimeAddress(bs->load_reference_barrier_weak_rt_code_blob()->code_begin()));
 654   } else {
 655     assert(is_phantom, "only remaining strength");
 656     __ call(RuntimeAddress(bs->load_reference_barrier_phantom_rt_code_blob()->code_begin()));
 657   }
 658   __ jmp(*stub->continuation());
 659 }
 660 
 661 #undef __
 662 
 663 #define __ sasm->
 664 
 665 void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
 666   __ prologue("shenandoah_pre_barrier", false);
 667   // arg0 : previous value of memory
 668 
 669   __ push(rax);
 670   __ push(rdx);
 671 
 672   const Register pre_val = rax;
 673   const Register thread = r15_thread;
 674   const Register tmp = rdx;
 675 
 676   Address queue_index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
 677   Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
 678 
 679   Label done;
 680   Label runtime;
 681 
 682   // Is SATB still active?
 683   Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
 684   __ testb(gc_state, ShenandoahHeap::MARKING);
 685   __ jcc(Assembler::zero, done);
 686 
 687   // Can we store original value in the thread's buffer?
 688 
 689   __ movptr(tmp, queue_index);
 690   __ testptr(tmp, tmp);
 691   __ jcc(Assembler::zero, runtime);
 692   __ subptr(tmp, wordSize);
 693   __ movptr(queue_index, tmp);
 694   __ addptr(tmp, buffer);
 695 
 696   // prev_val (rax)
 697   __ load_parameter(0, pre_val);
 698   __ movptr(Address(tmp, 0), pre_val);
 699   __ jmp(done);
 700 
 701   __ bind(runtime);
 702 
 703   __ save_live_registers_no_oop_map(true);
 704 
 705   // load the pre-value
 706   __ load_parameter(0, rcx);
 707   __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre), rcx);
 708 
 709   __ restore_live_registers(true);
 710 
 711   __ bind(done);
 712 
 713   __ pop(rdx);
 714   __ pop(rax);
 715 
 716   __ epilogue();
 717 }
 718 
 719 void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, DecoratorSet decorators) {
 720   __ prologue("shenandoah_load_reference_barrier", false);
 721   // arg0 : object to be resolved
 722 
 723   __ save_live_registers_no_oop_map(true);
 724 
 725   bool is_strong  = ShenandoahBarrierSet::is_strong_access(decorators);
 726   bool is_weak    = ShenandoahBarrierSet::is_weak_access(decorators);
 727   bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
 728   bool is_native  = ShenandoahBarrierSet::is_native_access(decorators);
 729 
 730   __ load_parameter(0, c_rarg0);
 731   __ load_parameter(1, c_rarg1);
 732   if (is_strong) {
 733     if (is_native) {
 734       __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong), c_rarg0, c_rarg1);
 735     } else {
 736       if (UseCompressedOops) {
 737         __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow), c_rarg0, c_rarg1);
 738       } else {
 739         __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong), c_rarg0, c_rarg1);
 740       }
 741     }
 742   } else if (is_weak) {
 743     assert(!is_native, "weak must not be called off-heap");
 744     if (UseCompressedOops) {
 745       __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow), c_rarg0, c_rarg1);
 746     } else {
 747       __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak), c_rarg0, c_rarg1);
 748     }
 749   } else {
 750     assert(is_phantom, "only remaining strength");
 751     assert(is_native, "phantom must only be called off-heap");
 752     __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom), c_rarg0, c_rarg1);
 753   }
 754 
 755   __ restore_live_registers_except_rax(true);
 756 
 757   __ epilogue();
 758 }
 759 
 760 #undef __
 761 
 762 #endif // COMPILER1
 763 
 764 #ifdef COMPILER2
 765 
 766 #undef __
 767 #define __ masm->
 768 
 769 void ShenandoahBarrierSetAssembler::load_c2(const MachNode* node, MacroAssembler* masm, Register dst, Address src, bool narrow) {
 770   // Do the actual load. This load is the candidate for implicit null check, and MUST come first.
 771   if (narrow) {
 772     __ movl(dst, src);
 773   } else {
 774     __ movq(dst, src);
 775   }
 776 
 777   ShenandoahBarrierStubC2::load_post(masm, node, dst, src, noreg, noreg, narrow);
 778 }
 779 
 780 void ShenandoahBarrierSetAssembler::store_c2(const MachNode* node, MacroAssembler* masm,
 781                                              Address dst, bool dst_narrow,
 782                                              Register src, bool src_narrow,
 783                                              Register tmp) {
 784 
 785   ShenandoahBarrierStubC2::store_pre(masm, node, dst, tmp, noreg, noreg, dst_narrow);
 786 
 787   // Need to encode into tmp, because we cannot clobber src.
 788   if (dst_narrow && !src_narrow) {
 789     __ movq(tmp, src);
 790     if ((node->barrier_data() & ShenandoahBitNotNull) == 0) {
 791       __ encode_heap_oop(tmp);
 792     } else {
 793       __ encode_heap_oop_not_null(tmp);
 794     }
 795     src = tmp;
 796   }
 797 
 798   // Do the actual store
 799   if (dst_narrow) {
 800     __ movl(dst, src);
 801   } else {
 802     __ movq(dst, src);
 803   }
 804 
 805   ShenandoahBarrierStubC2::store_post(masm, node, dst, tmp, noreg);
 806 }
 807 
 808 void ShenandoahBarrierSetAssembler::compare_and_set_c2(const MachNode* node, MacroAssembler* masm,
 809                                                        Register res, Address addr,
 810                                                        Register oldval, Register newval, Register tmp,
 811                                                        bool narrow) {
 812 
 813   assert(oldval == rax, "must be in rax for implicit use in cmpxchg");
 814 
 815   // Oldval and newval can be in the same register, but all other registers should be
 816   // distinct for extra safety, as we shuffle register values around.
 817   assert_different_registers(oldval, tmp, addr.base(), addr.index());
 818   assert_different_registers(newval, tmp, addr.base(), addr.index());
 819 
 820   ShenandoahBarrierStubC2::load_store_pre(masm, node, addr, tmp, noreg, noreg, narrow);
 821 
 822   // CAS!
 823   __ lock();
 824   if (narrow) {
 825     __ cmpxchgl(newval, addr);
 826   } else {
 827     __ cmpxchgptr(newval, addr);
 828   }
 829 
 830   // If we need a boolean result out of CAS, set the flag appropriately and promote the result.
 831   if (res != noreg) {
 832     __ setcc(Assembler::equal, res);
 833   }
 834 
 835   ShenandoahBarrierStubC2::load_store_post(masm, node, addr, tmp, noreg);
 836 }
 837 
 838 void ShenandoahBarrierSetAssembler::get_and_set_c2(const MachNode* node, MacroAssembler* masm, Register newval, Address addr, Register tmp, bool narrow) {
 839   assert_different_registers(newval, tmp, addr.base(), addr.index());
 840 
 841   ShenandoahBarrierStubC2::load_store_pre(masm, node, addr, tmp, noreg, noreg, narrow);
 842 
 843   if (narrow) {
 844     __ xchgl(newval, addr);
 845   } else {
 846     __ xchgq(newval, addr);
 847   }
 848 
 849   ShenandoahBarrierStubC2::load_store_post(masm, node, addr, tmp, noreg);
 850 }
 851 
 852 #undef __
 853 #define __ masm.
 854 
 855 void ShenandoahBarrierStubC2::cardtable(MacroAssembler& masm, Address addr, Register tmp1, Register tmp2) {
 856   Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
 857 
 858   __ lea(tmp1, addr);
 859   __ shrptr(tmp1, CardTable::card_shift());
 860   __ addptr(tmp1, Address(r15_thread, in_bytes(ShenandoahThreadLocalData::card_table_offset())));
 861   Address card_address(tmp1, 0);
 862 
 863   assert(CardTable::dirty_card_val() == 0, "Encoding assumption");
 864   Label L_done;
 865   if (UseCondCardMark) {
 866     __ cmpb(card_address, 0);
 867     __ jccb(Assembler::equal, L_done);
 868   }
 869   if (UseCompressedOops && CompressedOops::base() == nullptr) {
 870     __ movb(card_address, r12);
 871   } else {
 872     __ movb(card_address, 0);
 873   }
 874   __ bind(L_done);
 875 }
 876 
 877 void ShenandoahBarrierStubC2::enter_if_gc_state(MacroAssembler& masm, const char test_state, Register tmp) {
 878   Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
 879 
 880   // Emit the unconditional branch in the first version of the method.
 881   // Let the rest of runtime figure out how to manage it.
 882   __ relocate(barrier_Relocation::spec(), ShenandoahNMethod::gc_state_to_reloc(test_state));
 883   __ jmp(*entry(), /* maybe_short = */ false);
 884 
 885   __ bind(*continuation());
 886 }
 887 
 888 address ShenandoahBarrierSetAssembler::parse_stub_address(address pc) {
 889   NativeInstruction* ni = nativeInstruction_at(pc);
 890   assert(ni->is_jump(), "Initial code version: GC barrier fastpath must be a jump");
 891   NativeJump* jmp = nativeJump_at(pc);
 892   return jmp->jump_destination();
 893 }
 894 
 895 static void insert_5_byte_nop(address pc) {
 896   *(pc + 0) = 0x0F;
 897   *(pc + 1) = 0x1F;
 898   *(pc + 2) = 0x44;
 899   *(pc + 3) = 0x00;
 900   *(pc + 4) = 0x00;
 901   ICache::invalidate_range(pc, 5);
 902 }
 903 
 904 static bool is_5_byte_nop(address pc) {
 905   if (*(pc + 0) != 0x0F) return false;
 906   if (*(pc + 1) != 0x1F) return false;
 907   if (*(pc + 2) != 0x44) return false;
 908   if (*(pc + 3) != 0x00) return false;
 909   if (*(pc + 4) != 0x00) return false;
 910   return true;
 911 }
 912 
 913 static void check_at(bool cond, address pc, const char* msg) {
 914   assert(cond, "%s: at PC " PTR_FORMAT ": %02x%02x%02x%02x%02x",
 915          msg, p2i(pc), *(pc + 0), *(pc + 1), *(pc + 2), *(pc + 3), *(pc + 4));
 916 }
 917 
 918 bool ShenandoahBarrierSetAssembler::is_active(address pc) {
 919   NativeInstruction* ni = nativeInstruction_at(pc);
 920   return ni->is_jump();
 921 }
 922 
 923 void ShenandoahBarrierSetAssembler::patch_branch_to_nop(address pc) {
 924   NativeInstruction* ni = nativeInstruction_at(pc);
 925   if (ni->is_jump()) {
 926     insert_5_byte_nop(pc);
 927   } else {
 928     check_at(is_5_byte_nop(pc), pc, "Should already be nop");
 929   }
 930 }
 931 
 932 void ShenandoahBarrierSetAssembler::patch_nop_to_branch(address pc, address stub_addr) {
 933   NativeInstruction* ni = nativeInstruction_at(pc);
 934   if (is_5_byte_nop(pc)) {
 935     NativeJump::insert(pc, stub_addr);
 936   } else {
 937     check_at(ni->is_jump(), pc, "Should already be jump");
 938     check_at(nativeJump_at(pc)->jump_destination() == stub_addr, pc, "Jump should be to the same address");
 939   }
 940 }
 941 
 942 void ShenandoahBarrierStubC2::emit_code(MacroAssembler& masm) {
 943   Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
 944   assert(_needs_keep_alive_barrier || _needs_load_ref_barrier, "Why are you here?");
 945 
 946   // On x86, there is a significant penalty with unaligned branch target, for example
 947   // when the target instruction straggles the fetch line. It makes (performance) sense
 948   // to spend some code size to align the target better.
 949   __ align(16);
 950   __ bind(*entry());
 951 
 952   // If we need to load ourselves, do it here.
 953   if (_do_load) {
 954     if (_narrow) {
 955       __ movl(_obj, _addr);
 956     } else {
 957       __ movq(_obj, _addr);
 958     }
 959   }
 960 
 961   // If the object is null, there is no point in applying barriers.
 962   maybe_far_jump_if_zero(masm, _obj);
 963 
 964   // We need to make sure that loads done by callers survive across slow-path calls.
 965   // For self-loads, we need to care about the case when both KA and LRB are enabled (rare).
 966   bool needs_both_barriers = _needs_keep_alive_barrier && _needs_load_ref_barrier;
 967   if (!_do_load || needs_both_barriers) {
 968     preserve(_obj);
 969   }
 970 
 971   // Go for barriers. Barriers can return straight to continuation, as long
 972   // as another barrier is not needed.
 973   if (needs_both_barriers) {
 974     keepalive(masm, nullptr);
 975     lrb(masm);
 976   } else if (_needs_keep_alive_barrier) {
 977     keepalive(masm, continuation());
 978   } else if (_needs_load_ref_barrier) {
 979     lrb(masm);
 980   } else {
 981     ShouldNotReachHere();
 982   }
 983 }
 984 
 985 void ShenandoahBarrierStubC2::keepalive(MacroAssembler& masm, Label* L_done) {
 986   Address index(r15_thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
 987   Address buffer(r15_thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
 988 
 989   Label L_through, L_pop_and_slow;
 990 
 991   // If another barrier is enabled as well, do a check for a specific barrier.
 992   if (_needs_load_ref_barrier) {
 993     assert(L_done == nullptr, "Should be");
 994     // Emit the unconditional branch in the first version of the method.
 995     // Let the rest of runtime figure out how to manage it.
 996     // TODO: We could have spared the over-jump if patching knew we need the inverse branch.
 997     char state_to_check = ShenandoahHeap::MARKING;
 998     Label L_over;
 999     __ relocate(barrier_Relocation::spec(), ShenandoahNMethod::gc_state_to_reloc(state_to_check));
1000     __ jmp(L_over, /* maybe_short = */ false);
1001     __ jmp(L_through);
1002     __ bind(L_over);
1003   }
1004 
1005   // Need temp to work, allocate one now.
1006   bool tmp_live;
1007   Register tmp = select_temp_register(tmp_live);
1008   if (tmp_live) {
1009     __ push(tmp);
1010   }
1011 
1012   // Fast-path: put object into buffer.
1013   // If buffer is already full, go slow.
1014   __ movptr(tmp, index);
1015   __ subptr(tmp, wordSize);
1016   __ jccb(Assembler::below, L_pop_and_slow);
1017   __ movptr(index, tmp);
1018   __ addptr(tmp, buffer);
1019 
1020   // Store the object in queue.
1021   // If object is narrow, we need to decode it before inserting.
1022   // We can skip the re-encoding if we know that object is not preserved.
1023   if (_narrow) {
1024     __ decode_heap_oop_not_null(_obj);
1025   }
1026   __ movptr(Address(tmp, 0), _obj);
1027   if (_narrow && is_preserved(_obj)) {
1028     __ encode_heap_oop_not_null(_obj);
1029   }
1030 
1031   // Fast-path exits here.
1032   if (tmp_live) {
1033     __ pop(tmp);
1034   }
1035 
1036   if (L_done != nullptr) {
1037     __ jmp(*L_done);
1038   } else {
1039     __ jmp(L_through);
1040   }
1041 
1042   // Slow-path: call runtime to handle.
1043   // Need to pop tmp immediately for stack to remain aligned.
1044   __ bind(L_pop_and_slow);
1045   if (tmp_live) {
1046     __ pop(tmp);
1047   }
1048   {
1049     SaveLiveRegisters slr(&masm, this);
1050 
1051     // Shuffle in the arguments. The end result should be:
1052     //   c_rarg0 <-- obj
1053     if (c_rarg0 != _obj) {
1054       __ mov(c_rarg0, _obj);
1055     }
1056 
1057     // Go to runtime and handle the rest there.
1058     // Use rax as scratch, as it will be saved if live.
1059     __ call(RuntimeAddress(keepalive_runtime_entry_addr()), rax);
1060   }
1061   if (L_done != nullptr) {
1062     __ jmp(*L_done);
1063   } else {
1064     __ bind(L_through);
1065   }
1066 }
1067 
1068 void ShenandoahBarrierStubC2::lrb(MacroAssembler& masm) {
1069   Label L_pop_and_slow, L_slow;
1070 
1071   // If weak references are being processed, weak/phantom loads need to go slow,
1072   // regardless of their cset status.
1073   if (_needs_load_ref_weak_barrier) {
1074     char state_to_check = ShenandoahHeap::WEAK_ROOTS;
1075     __ relocate(barrier_Relocation::spec(), ShenandoahNMethod::gc_state_to_reloc(state_to_check));
1076     __ jmp(L_slow, /* maybe_short = */ false);
1077   }
1078 
1079   if (_needs_keep_alive_barrier) {
1080     // Emit the unconditional branch in the first version of the method.
1081     // Let the rest of runtime figure out how to manage it.
1082     // TODO: We could have spared the over-jump if patching knew we need the inverse branch.
1083     char state_to_check = ShenandoahHeap::HAS_FORWARDED | (_needs_load_ref_weak_barrier ? ShenandoahHeap::WEAK_ROOTS : 0);
1084     Label L_over;
1085     __ relocate(barrier_Relocation::spec(), ShenandoahNMethod::gc_state_to_reloc(state_to_check));
1086     __ jmp(L_over, /* maybe_short = */ false);
1087     __ jmp(*continuation());
1088     __ bind(L_over);
1089   }
1090 
1091   bool is_aot = AOTCodeCache::is_on_for_dump();
1092 
1093   // Need temp to work, allocate one now.
1094   bool tmp_live;
1095   Register tmp = select_temp_register(tmp_live, /* skip_reg1 = */ is_aot ? rcx : noreg);
1096   if (tmp_live) {
1097     __ push(tmp);
1098   }
1099 
1100   // Compute the cset bitmap index
1101   if (_narrow) {
1102     __ decode_heap_oop_not_null(tmp, _obj);
1103   } else {
1104     __ movptr(tmp, _obj);
1105   }
1106 
1107   Address cset_addr_arg;
1108   intptr_t cset_addr = reinterpret_cast<intptr_t>(ShenandoahHeap::in_cset_fast_test_addr());
1109   if (!is_aot && cset_addr < INT32_MAX) {
1110     // Cset bitmap is at easily encodeable address. Just use it as displacement.
1111     __ shrptr(tmp, ShenandoahHeapRegion::region_size_bytes_shift_jint());
1112     cset_addr_arg = Address(tmp, checked_cast<int>(cset_addr));
1113   } else {
1114     bool tmp2_live;
1115     Register tmp2 = select_temp_register(tmp2_live, /* skip_reg1 = */ tmp, /* skip_reg2 = */ is_aot ? rcx : noreg);
1116     if (tmp2_live) {
1117       __ push(tmp2);
1118     }
1119     if (is_aot) {
1120       // Generating AOT code, pull the cset bitmap and region shift from AOT table.
1121       assert_different_registers(tmp, tmp2, rcx);
1122       __ push(rcx);
1123       __ lea(rcx, ExternalAddress(AOTRuntimeConstants::grain_shift_address()));
1124       __ movl(rcx, Address(rcx));
1125       __ shrptr(tmp);
1126       __ pop(rcx);
1127       __ lea(tmp2, ExternalAddress(AOTRuntimeConstants::cset_base_address()));
1128       __ addptr(tmp, Address(tmp2));
1129     } else {
1130       // Cset bitmap is far away. Add its address fully.
1131       __ shrptr(tmp, ShenandoahHeapRegion::region_size_bytes_shift_jint());
1132       __ movptr(tmp2, cset_addr);
1133       __ addptr(tmp, tmp2);
1134     }
1135     if (tmp2_live) {
1136       __ pop(tmp2);
1137     }
1138     cset_addr_arg = Address(tmp, 0);
1139   }
1140 
1141   // Cset-check. Fall-through to slow if in collection set.
1142   __ cmpb(cset_addr_arg, 0);
1143   if (tmp_live) {
1144     __ jccb(Assembler::notEqual, L_pop_and_slow);
1145     __ pop(tmp);
1146     __ jmp(*continuation());
1147   } else {
1148     // Nothing else to do, jump back
1149     __ jcc(Assembler::equal, *continuation());
1150   }
1151 
1152   // Slow path
1153   __ bind(L_pop_and_slow);
1154   // Need to pop tmp immediately for stack to remain aligned.
1155   if (tmp_live) {
1156     __ pop(tmp);
1157   }
1158   __ bind(L_slow);
1159 
1160   // Obj is the result, need to temporarily stop preserving it.
1161   bool is_obj_preserved = is_preserved(_obj);
1162   if (is_obj_preserved) {
1163     dont_preserve(_obj);
1164   }
1165   {
1166     SaveLiveRegisters slr(&masm, this);
1167 
1168     assert_different_registers(rax, c_rarg0, c_rarg1);
1169 
1170     // Shuffle in the arguments. The end result should be:
1171     //   c_rarg0 <-- obj
1172     //   c_rarg1 <-- lea(addr)
1173     if (_obj == c_rarg0) {
1174       __ lea(c_rarg1, _addr);
1175     } else if (_obj == c_rarg1) {
1176       // Set up arguments in reverse, and then flip them
1177       __ lea(c_rarg0, _addr);
1178       __ xchgptr(c_rarg0, c_rarg1);
1179     } else {
1180       assert_different_registers(_obj, c_rarg0, c_rarg1);
1181       __ lea(c_rarg1, _addr);
1182       __ movptr(c_rarg0, _obj);
1183     }
1184 
1185     // Go to runtime and handle the rest there.
1186     // Use rax as scratch, as it will be clobbered by result anyway.
1187     __ call(RuntimeAddress(lrb_runtime_entry_addr()), rax);
1188 
1189     // Save the result where needed.
1190     if (_narrow) {
1191       __ movl(_obj, rax);
1192     } else if (_obj != rax) {
1193       __ movptr(_obj, rax);
1194     }
1195   }
1196   if (is_obj_preserved) {
1197     preserve(_obj);
1198   }
1199 
1200   __ jmp(*continuation());
1201 }
1202 
1203 int ShenandoahBarrierStubC2::available_gp_registers() {
1204   return Register::available_gp_registers();
1205 }
1206 
1207 bool ShenandoahBarrierStubC2::is_special_register(Register r) {
1208   return r == rsp || r == rbp || r == r12_heapbase || r == r15_thread;
1209 }
1210 
1211 void ShenandoahBarrierStubC2::post_init() {
1212   // Do nothing.
1213 }
1214 
1215 void ShenandoahBarrierStubC2::maybe_far_jump_if_zero(MacroAssembler& masm, Register reg) {
1216   if (_narrow) {
1217     __ testl(reg, reg);
1218   } else {
1219     __ testq(reg, reg);
1220   }
1221   __ jcc(Assembler::zero, *continuation());
1222 }
1223 
1224 #endif // COMPILER2