1 /*
   2  * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2018, 2021, Red Hat, Inc. All rights reserved.
   4  * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
   5  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   6  *
   7  * This code is free software; you can redistribute it and/or modify it
   8  * under the terms of the GNU General Public License version 2 only, as
   9  * published by the Free Software Foundation.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  *
  25  */
  26 
  27 #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
  28 #include "gc/shenandoah/mode/shenandoahMode.hpp"
  29 #include "gc/shenandoah/shenandoahBarrierSet.hpp"
  30 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
  31 #include "gc/shenandoah/shenandoahForwarding.hpp"
  32 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
  33 #include "gc/shenandoah/shenandoahHeapRegion.hpp"
  34 #include "gc/shenandoah/shenandoahRuntime.hpp"
  35 #include "gc/shenandoah/shenandoahThreadLocalData.hpp"
  36 #include "interpreter/interpreter.hpp"
  37 #include "nativeInst_x86.hpp"
  38 #include "runtime/javaThread.hpp"
  39 #include "runtime/sharedRuntime.hpp"
  40 #include "utilities/macros.hpp"
  41 #ifdef COMPILER1
  42 #include "c1/c1_LIRAssembler.hpp"
  43 #include "c1/c1_MacroAssembler.hpp"
  44 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
  45 #endif
  46 #ifdef COMPILER2
  47 #include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp"
  48 #endif
  49 
  50 #define __ masm->
  51 
  52 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
  53                                                        Register src, Register dst, Register count) {
  54 
  55   bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
  56 
  57   if (is_reference_type(type)) {
  58     if (ShenandoahCardBarrier) {
  59       bool checkcast = (decorators & ARRAYCOPY_CHECKCAST) != 0;
  60       bool disjoint = (decorators & ARRAYCOPY_DISJOINT) != 0;
  61       bool obj_int = (type == T_OBJECT) && UseCompressedOops;
  62 
  63       // We need to save the original element count because the array copy stub
  64       // will destroy the value and we need it for the card marking barrier.
  65       if (!checkcast) {
  66         if (!obj_int) {
  67           // Save count for barrier
  68           __ movptr(r11, count);
  69         } else if (disjoint) {
  70           // Save dst in r11 in the disjoint case
  71           __ movq(r11, dst);
  72         }
  73       }
  74     }
  75 
  76     if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahLoadRefBarrier) {
  77       Register thread = r15_thread;
  78       assert_different_registers(src, dst, count, thread);
  79 
  80       Label L_done;
  81       // Short-circuit if count == 0.
  82       __ testptr(count, count);
  83       __ jcc(Assembler::zero, L_done);
  84 
  85       // Avoid runtime call when not active.
  86       Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
  87       int flags;
  88       if (ShenandoahSATBBarrier && dest_uninitialized) {
  89         flags = ShenandoahHeap::HAS_FORWARDED;
  90       } else {
  91         flags = ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::MARKING;
  92       }
  93       __ testb(gc_state, flags);
  94       __ jcc(Assembler::zero, L_done);
  95 
  96       __ push_call_clobbered_registers(/* save_fpu = */ false);
  97       // If arguments are not in proper places, shuffle them.
  98       // Doing this via the stack is the most straight-forward way to avoid
  99       // accidentally smashing any register.
 100       if (c_rarg0 != src || c_rarg1 != dst || c_rarg2 != count) {
 101         __ push(src);
 102         __ push(dst);
 103         __ push(count);
 104         __ pop(c_rarg2);
 105         __ pop(c_rarg1);
 106         __ pop(c_rarg0);
 107       }
 108       address target = nullptr;
 109       if (UseCompressedOops) {
 110         target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop);
 111       } else {
 112         target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop);
 113       }
 114       __ call_VM_leaf(target, 3);
 115 
 116       __ pop_call_clobbered_registers(/* restore_fpu = */ false);
 117 
 118       __ bind(L_done);
 119     }
 120   }
 121 
 122 }
 123 
 124 void ShenandoahBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
 125                                                        Register src, Register dst, Register count) {
 126 
 127   if (ShenandoahCardBarrier && is_reference_type(type)) {
 128     bool checkcast = (decorators & ARRAYCOPY_CHECKCAST) != 0;
 129     bool disjoint = (decorators & ARRAYCOPY_DISJOINT) != 0;
 130     bool obj_int = (type == T_OBJECT) && UseCompressedOops;
 131     Register tmp = rax;
 132 
 133     if (!checkcast) {
 134       if (!obj_int) {
 135         // Save count for barrier
 136         count = r11;
 137       } else if (disjoint) {
 138         // Use the saved dst in the disjoint case
 139         dst = r11;
 140       }
 141     } else {
 142       tmp = rscratch1;
 143     }
 144     gen_write_ref_array_post_barrier(masm, decorators, dst, count, tmp);
 145   }
 146 }
 147 
 148 void ShenandoahBarrierSetAssembler::satb_barrier(MacroAssembler* masm,
 149                                                  Register obj,
 150                                                  Register pre_val,
 151                                                  Register tmp) {
 152   assert(ShenandoahSATBBarrier, "Should be checked by caller");
 153   const Register thread = r15_thread;
 154 
 155   Label done;
 156   Label runtime;
 157 
 158   assert(pre_val != noreg, "check this code");
 159 
 160   if (obj != noreg) {
 161     assert_different_registers(obj, pre_val, tmp);
 162     assert(pre_val != rax, "check this code");
 163   }
 164 
 165   Address index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
 166   Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
 167 
 168   Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
 169   __ testb(gc_state, ShenandoahHeap::MARKING);
 170   __ jcc(Assembler::zero, done);
 171 
 172   // Do we need to load the previous value?
 173   if (obj != noreg) {
 174     if (UseCompressedOops) {
 175       __ movl(pre_val, Address(obj, 0));
 176       __ decode_heap_oop(pre_val);
 177     } else {
 178       __ movq(pre_val, Address(obj, 0));
 179     }
 180   }
 181 
 182   // Is the previous value null?
 183   __ cmpptr(pre_val, NULL_WORD);
 184   __ jcc(Assembler::equal, done);
 185 
 186   // Can we store original value in the thread's buffer?
 187   // Is index == 0?
 188   // (The index field is typed as size_t.)
 189 
 190   __ movptr(tmp, index);                   // tmp := *index_adr
 191   __ cmpptr(tmp, 0);                       // tmp == 0?
 192   __ jcc(Assembler::equal, runtime);       // If yes, goto runtime
 193 
 194   __ subptr(tmp, wordSize);                // tmp := tmp - wordSize
 195   __ movptr(index, tmp);                   // *index_adr := tmp
 196   __ addptr(tmp, buffer);                  // tmp := tmp + *buffer_adr
 197 
 198   // Record the previous value
 199   __ movptr(Address(tmp, 0), pre_val);
 200   __ jmp(done);
 201 
 202   __ bind(runtime);
 203 
 204   // Slow-path call.
 205   // Some paths can be reached from the c2i adapter with live fp arguments in registers.
 206   __ enter();
 207   __ push_call_clobbered_registers(/* save_fpu = */ true);
 208 
 209   assert(thread != c_rarg0, "smashed arg");
 210   if (c_rarg0 != pre_val) {
 211     __ mov(c_rarg0, pre_val);
 212   }
 213 
 214   // Calling with super_call_VM_leaf with c_rarg0 bypasses interpreter checks and avoids any moves.
 215   __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre), c_rarg0);
 216 
 217   __ pop_call_clobbered_registers(/* restore_fpu = */ true);
 218   __ leave();
 219 
 220   __ bind(done);
 221 }
 222 
 223 void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address src, DecoratorSet decorators) {
 224   assert(ShenandoahLoadRefBarrier, "Should be enabled");
 225 
 226   bool is_strong  = ShenandoahBarrierSet::is_strong_access(decorators);
 227   bool is_weak    = ShenandoahBarrierSet::is_weak_access(decorators);
 228   bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
 229   bool is_native  = ShenandoahBarrierSet::is_native_access(decorators);
 230   bool is_narrow  = UseCompressedOops && !is_native;
 231 
 232   Label heap_stable, not_cset;
 233 
 234   __ block_comment("load_reference_barrier { ");
 235 
 236   // Check if GC is active
 237   Register thread = r15_thread;
 238 
 239   Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
 240   int flags = ShenandoahHeap::HAS_FORWARDED;
 241   if (!is_strong) {
 242     flags |= ShenandoahHeap::WEAK_ROOTS;
 243   }
 244   __ testb(gc_state, flags);
 245   __ jcc(Assembler::zero, heap_stable);
 246 
 247   Register tmp1 = noreg, tmp2 = noreg;
 248   if (is_strong) {
 249     // Test for object in cset
 250     // Allocate temporary registers
 251     for (int i = 0; i < Register::available_gp_registers(); i++) {
 252       Register r = as_Register(i);
 253       if (r != rsp && r != rbp && r != rcx && r != dst && r != src.base() && r != src.index() ) {
 254         if (tmp1 == noreg) {
 255           tmp1 = r;
 256         } else {
 257           tmp2 = r;
 258           break;
 259         }
 260       }
 261     }
 262     assert(tmp1 != noreg, "tmp1 allocated");
 263     assert(tmp2 != noreg, "tmp2 allocated");
 264     assert_different_registers(tmp1, tmp2, src.base(), src.index());
 265     assert_different_registers(tmp1, tmp2, dst);
 266 
 267     __ push(tmp1);
 268     __ push(tmp2);
 269 
 270     // Optimized cset-test
 271     __ movptr(tmp1, dst);
 272     if (AOTCodeCache::is_on_for_dump()) {
 273       assert_different_registers(tmp1, tmp2, rcx);
 274       __ lea(tmp2, ExternalAddress(AOTRuntimeConstants::grain_shift_address()));
 275       __ push(rcx);
 276       __ movb(rcx, Address(tmp2));
 277       __ shrptr(tmp1);
 278       __ pop(rcx);
 279       __ lea(tmp2, ExternalAddress(AOTRuntimeConstants::cset_base_address()));
 280       __ movptr(tmp2, Address(tmp2));
 281     } else {
 282       __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint());
 283       __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr());
 284     }
 285     __ movbool(tmp1, Address(tmp1, tmp2, Address::times_1));
 286     __ testbool(tmp1);
 287     __ jcc(Assembler::zero, not_cset);
 288   }
 289 
 290   // Slow-path call.
 291   // Save registers that can be clobbered by call.
 292   // Some paths can be reached from the c2i adapter with live fp arguments in registers.
 293   __ enter();
 294   if (dst != rax) {
 295     __ push(rax);
 296   }
 297   __ push_call_clobbered_registers_except(rax, /* save_fpu = */ true);
 298 
 299   // Shuffle registers such that dst is in c_rarg0 and addr in c_rarg1.
 300   if (dst == c_rarg1) {
 301     __ lea(c_rarg0, src);
 302     __ xchgptr(c_rarg1, c_rarg0);
 303   } else {
 304     __ lea(c_rarg1, src);
 305     __ movptr(c_rarg0, dst);
 306   }
 307 
 308   address target = nullptr;
 309   if (is_strong) {
 310     if (is_narrow) {
 311       target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow);
 312     } else {
 313       target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong);
 314     }
 315   } else if (is_weak) {
 316     if (is_narrow) {
 317       target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow);
 318     } else {
 319       target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak);
 320     }
 321   } else {
 322     assert(is_phantom, "only remaining strength");
 323     assert(!is_narrow, "phantom access cannot be narrow");
 324     target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom);
 325   }
 326 
 327   // Calling with super_call_VM_leaf with c_rarg0/1 bypasses interpreter checks and avoids any moves.
 328   __ super_call_VM_leaf(target, c_rarg0, c_rarg1);
 329   __ pop_call_clobbered_registers_except(rax, /* restore_fpu = */ true);
 330   if (dst != rax) {
 331     __ movptr(dst, rax);
 332     __ pop(rax);
 333   }
 334   __ leave();
 335 
 336   __ bind(not_cset);
 337 
 338   if  (is_strong) {
 339     __ pop(tmp2);
 340     __ pop(tmp1);
 341   }
 342 
 343   __ bind(heap_stable);
 344 
 345   __ block_comment("} load_reference_barrier");
 346 }
 347 
 348 //
 349 // Arguments:
 350 //
 351 // Inputs:
 352 //   src:        oop location, might be clobbered
 353 //   tmp1:       scratch register, might not be valid.
 354 //
 355 // Output:
 356 //   dst:        oop loaded from src location
 357 //
 358 // Kill:
 359 //   tmp1 (if it is valid)
 360 //
 361 void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
 362              Register dst, Address src, Register tmp1) {
 363   // 1: non-reference load, no additional barrier is needed
 364   if (!is_reference_type(type)) {
 365     BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1);
 366     return;
 367   }
 368 
 369   assert((decorators & ON_UNKNOWN_OOP_REF) == 0, "Not expected");
 370 
 371   // 2: load a reference from src location and apply LRB if needed
 372   if (ShenandoahBarrierSet::need_load_reference_barrier(decorators, type)) {
 373     Register result_dst = dst;
 374     bool use_tmp1_for_dst = false;
 375 
 376     // Preserve src location for LRB
 377     if (dst == src.base() || dst == src.index()) {
 378     // Use tmp1 for dst if possible, as it is not used in BarrierAssembler::load_at()
 379       if (tmp1->is_valid() && tmp1 != src.base() && tmp1 != src.index()) {
 380         dst = tmp1;
 381         use_tmp1_for_dst = true;
 382       } else {
 383         dst = rdi;
 384         __ push(dst);
 385       }
 386       assert_different_registers(dst, src.base(), src.index());
 387     }
 388 
 389     BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1);
 390 
 391     load_reference_barrier(masm, dst, src, decorators);
 392 
 393     // Move loaded oop to final destination
 394     if (dst != result_dst) {
 395       __ movptr(result_dst, dst);
 396 
 397       if (!use_tmp1_for_dst) {
 398         __ pop(dst);
 399       }
 400 
 401       dst = result_dst;
 402     }
 403   } else {
 404     BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1);
 405   }
 406 
 407   // 3: apply keep-alive barrier if needed
 408   if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) {
 409     satb_barrier(masm /* masm */,
 410                  noreg /* obj */,
 411                  dst /* pre_val */,
 412                  tmp1 /* tmp */);
 413   }
 414 }
 415 
 416 void ShenandoahBarrierSetAssembler::card_barrier(MacroAssembler* masm, Register obj) {
 417   assert(ShenandoahCardBarrier, "Should have been checked by caller");
 418 
 419   // Does a store check for the oop in register obj. The content of
 420   // register obj is destroyed afterwards.
 421   __ shrptr(obj, CardTable::card_shift());
 422 
 423   // We'll use this register as the TLS base address and also later on
 424   // to hold the byte_map_base.
 425   Register thread = r15_thread;
 426   Register tmp = rscratch1;
 427 
 428   Address curr_ct_holder_addr(thread, in_bytes(ShenandoahThreadLocalData::card_table_offset()));
 429   __ movptr(tmp, curr_ct_holder_addr);
 430   Address card_addr(tmp, obj, Address::times_1);
 431 
 432   int dirty = CardTable::dirty_card_val();
 433   if (UseCondCardMark) {
 434     Label L_already_dirty;
 435     __ cmpb(card_addr, dirty);
 436     __ jccb(Assembler::equal, L_already_dirty);
 437     __ movb(card_addr, dirty);
 438     __ bind(L_already_dirty);
 439   } else {
 440     __ movb(card_addr, dirty);
 441   }
 442 }
 443 
 444 void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
 445               Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
 446 
 447   // 1: non-reference types require no barriers
 448   if (!is_reference_type(type)) {
 449     BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3);
 450     return;
 451   }
 452 
 453   // Flatten object address right away for simplicity: likely needed by barriers
 454   assert_different_registers(val, tmp1, tmp2, tmp3, r15_thread);
 455   if (dst.index() == noreg && dst.disp() == 0) {
 456     if (dst.base() != tmp1) {
 457       __ movptr(tmp1, dst.base());
 458     }
 459   } else {
 460     __ lea(tmp1, dst);
 461   }
 462 
 463   // 2: pre-barrier: SATB needs the previous value
 464   if (ShenandoahBarrierSet::need_satb_barrier(decorators, type)) {
 465     satb_barrier(masm,
 466                  tmp1 /* obj */,
 467                  tmp2 /* pre_val */,
 468                  tmp3 /* tmp */);
 469   }
 470 
 471   // Store!
 472   BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg, noreg);
 473 
 474   // 3: post-barrier: card barrier needs store address
 475   bool storing_non_null = (val != noreg);
 476   if (ShenandoahBarrierSet::need_card_barrier(decorators, type) && storing_non_null) {
 477     card_barrier(masm, tmp1);
 478   }
 479 }
 480 
 481 void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
 482                                                                   Register obj, Register tmp, Label& slowpath) {
 483   Label done;
 484   // Resolve jobject
 485   BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, obj, tmp, slowpath);
 486 
 487   // Check for null.
 488   __ testptr(obj, obj);
 489   __ jcc(Assembler::zero, done);
 490 
 491   Address gc_state(jni_env, ShenandoahThreadLocalData::gc_state_offset() - JavaThread::jni_environment_offset());
 492   __ testb(gc_state, ShenandoahHeap::EVACUATION);
 493   __ jccb(Assembler::notZero, slowpath);
 494   __ bind(done);
 495 }
 496 
 497 void ShenandoahBarrierSetAssembler::try_peek_weak_handle_in_nmethod(MacroAssembler* masm, Register weak_handle, Register obj, Label& slowpath) {
 498   Label done;
 499 
 500   // Peek weak handle using the standard implementation.
 501   BarrierSetAssembler::try_peek_weak_handle_in_nmethod(masm, weak_handle, obj, slowpath);
 502 
 503   // Check if the reference is null, and if it is, take the fast path.
 504   __ testptr(obj, obj);
 505   __ jcc(Assembler::zero, done);
 506 
 507   Address gc_state(r15_thread, ShenandoahThreadLocalData::gc_state_offset());
 508 
 509   // Check if the heap is under weak-reference/roots processing, in
 510   // which case we need to take the slow path.
 511   __ testb(gc_state, ShenandoahHeap::WEAK_ROOTS);
 512   __ jcc(Assembler::notZero, slowpath);
 513   __ bind(done);
 514 }
 515 
 516 // Special Shenandoah CAS implementation that handles false negatives
 517 // due to concurrent evacuation.
 518 void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm,
 519                                                 Register res, Address addr, Register oldval, Register newval,
 520                                                 bool exchange, Register tmp1, Register tmp2) {
 521   assert(ShenandoahCASBarrier, "Should only be used when CAS barrier is enabled");
 522   assert(oldval == rax, "must be in rax for implicit use in cmpxchg");
 523   assert_different_registers(oldval, tmp1, tmp2);
 524   assert_different_registers(newval, tmp1, tmp2);
 525 
 526   Label L_success, L_failure;
 527 
 528   // Remember oldval for retry logic below
 529   if (UseCompressedOops) {
 530     __ movl(tmp1, oldval);
 531   } else {
 532     __ movptr(tmp1, oldval);
 533   }
 534 
 535   // Step 1. Fast-path.
 536   //
 537   // Try to CAS with given arguments. If successful, then we are done.
 538 
 539   if (UseCompressedOops) {
 540     __ lock();
 541     __ cmpxchgl(newval, addr);
 542   } else {
 543     __ lock();
 544     __ cmpxchgptr(newval, addr);
 545   }
 546   __ jcc(Assembler::equal, L_success);
 547 
 548   // Step 2. CAS had failed. This may be a false negative.
 549   //
 550   // The trouble comes when we compare the to-space pointer with the from-space
 551   // pointer to the same object. To resolve this, it will suffice to resolve
 552   // the value from memory -- this will give both to-space pointers.
 553   // If they mismatch, then it was a legitimate failure.
 554   //
 555   // Before reaching to resolve sequence, see if we can avoid the whole shebang
 556   // with filters.
 557 
 558   // Filter: when offending in-memory value is null, the failure is definitely legitimate
 559   __ testptr(oldval, oldval);
 560   __ jcc(Assembler::zero, L_failure);
 561 
 562   // Filter: when heap is stable, the failure is definitely legitimate
 563   const Register thread = r15_thread;
 564   Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
 565   __ testb(gc_state, ShenandoahHeap::HAS_FORWARDED);
 566   __ jcc(Assembler::zero, L_failure);
 567 
 568   if (UseCompressedOops) {
 569     __ movl(tmp2, oldval);
 570     __ decode_heap_oop(tmp2);
 571   } else {
 572     __ movptr(tmp2, oldval);
 573   }
 574 
 575   // Decode offending in-memory value.
 576   // Test if-forwarded
 577   __ testb(Address(tmp2, oopDesc::mark_offset_in_bytes()), markWord::marked_value);
 578   __ jcc(Assembler::noParity, L_failure);  // When odd number of bits, then not forwarded
 579   __ jcc(Assembler::zero, L_failure);      // When it is 00, then also not forwarded
 580 
 581   // Load and mask forwarding pointer
 582   __ movptr(tmp2, Address(tmp2, oopDesc::mark_offset_in_bytes()));
 583   __ shrptr(tmp2, 2);
 584   __ shlptr(tmp2, 2);
 585 
 586   if (UseCompressedOops) {
 587     __ decode_heap_oop(tmp1); // decode for comparison
 588   }
 589 
 590   // Now we have the forwarded offender in tmp2.
 591   // Compare and if they don't match, we have legitimate failure
 592   __ cmpptr(tmp1, tmp2);
 593   __ jcc(Assembler::notEqual, L_failure);
 594 
 595   // Step 3. Need to fix the memory ptr before continuing.
 596   //
 597   // At this point, we have from-space oldval in the register, and its to-space
 598   // address is in tmp2. Let's try to update it into memory. We don't care if it
 599   // succeeds or not. If it does, then the retrying CAS would see it and succeed.
 600   // If this fixup fails, this means somebody else beat us to it, and necessarily
 601   // with to-space ptr store. We still have to do the retry, because the GC might
 602   // have updated the reference for us.
 603 
 604   if (UseCompressedOops) {
 605     __ encode_heap_oop(tmp2); // previously decoded at step 2.
 606   }
 607 
 608   if (UseCompressedOops) {
 609     __ lock();
 610     __ cmpxchgl(tmp2, addr);
 611   } else {
 612     __ lock();
 613     __ cmpxchgptr(tmp2, addr);
 614   }
 615 
 616   // Step 4. Try to CAS again.
 617   //
 618   // This is guaranteed not to have false negatives, because oldval is definitely
 619   // to-space, and memory pointer is to-space as well. Nothing is able to store
 620   // from-space ptr into memory anymore. Make sure oldval is restored, after being
 621   // garbled during retries.
 622   //
 623   if (UseCompressedOops) {
 624     __ movl(oldval, tmp2);
 625   } else {
 626     __ movptr(oldval, tmp2);
 627   }
 628 
 629   if (UseCompressedOops) {
 630     __ lock();
 631     __ cmpxchgl(newval, addr);
 632   } else {
 633     __ lock();
 634     __ cmpxchgptr(newval, addr);
 635   }
 636   if (!exchange) {
 637     __ jccb(Assembler::equal, L_success); // fastpath, peeking into Step 5, no need to jump
 638   }
 639 
 640   // Step 5. If we need a boolean result out of CAS, set the flag appropriately.
 641   // and promote the result. Note that we handle the flag from both the 1st and 2nd CAS.
 642   // Otherwise, failure witness for CAE is in oldval on all paths, and we can return.
 643 
 644   if (exchange) {
 645     __ bind(L_failure);
 646     __ bind(L_success);
 647   } else {
 648     assert(res != noreg, "need result register");
 649 
 650     Label exit;
 651     __ bind(L_failure);
 652     __ xorptr(res, res);
 653     __ jmpb(exit);
 654 
 655     __ bind(L_success);
 656     __ movptr(res, 1);
 657     __ bind(exit);
 658   }
 659 }
 660 
 661 #ifdef PRODUCT
 662 #define BLOCK_COMMENT(str) /* nothing */
 663 #else
 664 #define BLOCK_COMMENT(str) __ block_comment(str)
 665 #endif
 666 
 667 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
 668 
 669 #define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8)
 670 
 671 void ShenandoahBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
 672                                                                      Register addr, Register count,
 673                                                                      Register tmp) {
 674   assert(ShenandoahCardBarrier, "Should have been checked by caller");
 675 
 676   Label L_loop, L_done;
 677   const Register end = count;
 678   assert_different_registers(addr, end);
 679 
 680   // Zero count? Nothing to do.
 681   __ testl(count, count);
 682   __ jccb(Assembler::zero, L_done);
 683 
 684   const Register thread = r15_thread;
 685   Address curr_ct_holder_addr(thread, in_bytes(ShenandoahThreadLocalData::card_table_offset()));
 686   __ movptr(tmp, curr_ct_holder_addr);
 687 
 688   __ leaq(end, Address(addr, count, TIMES_OOP, 0));  // end == addr+count*oop_size
 689   __ subptr(end, BytesPerHeapOop); // end - 1 to make inclusive
 690   __ shrptr(addr, CardTable::card_shift());
 691   __ shrptr(end, CardTable::card_shift());
 692   __ subptr(end, addr); // end --> cards count
 693 
 694   __ addptr(addr, tmp);
 695 
 696   __ BIND(L_loop);
 697   __ movb(Address(addr, count, Address::times_1), 0);
 698   __ decrement(count);
 699   __ jccb(Assembler::greaterEqual, L_loop);
 700 
 701   __ BIND(L_done);
 702 }
 703 
 704 #undef __
 705 
 706 #ifdef COMPILER1
 707 
 708 #define __ ce->masm()->
 709 
 710 void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) {
 711   ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
 712   // At this point we know that marking is in progress.
 713   // If do_load() is true then we have to emit the
 714   // load of the previous value; otherwise it has already
 715   // been loaded into _pre_val.
 716 
 717   __ bind(*stub->entry());
 718   assert(stub->pre_val()->is_register(), "Precondition.");
 719 
 720   Register pre_val_reg = stub->pre_val()->as_register();
 721 
 722   if (stub->do_load()) {
 723     ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/);
 724   }
 725 
 726   __ cmpptr(pre_val_reg, NULL_WORD);
 727   __ jcc(Assembler::equal, *stub->continuation());
 728   ce->store_parameter(stub->pre_val()->as_register(), 0);
 729   __ call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin()));
 730   __ jmp(*stub->continuation());
 731 
 732 }
 733 
 734 void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub) {
 735   ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
 736   __ bind(*stub->entry());
 737 
 738   DecoratorSet decorators = stub->decorators();
 739   bool is_strong  = ShenandoahBarrierSet::is_strong_access(decorators);
 740   bool is_weak    = ShenandoahBarrierSet::is_weak_access(decorators);
 741   bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
 742   bool is_native  = ShenandoahBarrierSet::is_native_access(decorators);
 743 
 744   Register obj = stub->obj()->as_register();
 745   Register res = stub->result()->as_register();
 746   Register addr = stub->addr()->as_pointer_register();
 747   Register tmp1 = stub->tmp1()->as_register();
 748   Register tmp2 = stub->tmp2()->as_register();
 749   assert_different_registers(obj, res, addr, tmp1, tmp2);
 750 
 751   Label slow_path;
 752 
 753   assert(res == rax, "result must arrive in rax");
 754 
 755   if (res != obj) {
 756     __ mov(res, obj);
 757   }
 758 
 759   if (is_strong) {
 760     // Check for object being in the collection set.
 761     __ mov(tmp1, res);
 762     if (AOTCodeCache::is_on_for_dump()) {
 763       __ push(rcx);
 764       __ lea(rcx, ExternalAddress(AOTRuntimeConstants::grain_shift_address()));
 765       __ movl(rcx, Address(rcx));
 766       if (tmp1 != rcx) {
 767         __ mov(tmp1, res);
 768         __ shrptr(tmp1);
 769         __ pop(rcx);
 770       } else {
 771         assert_different_registers(tmp2, rcx);
 772         __ mov(tmp2, res);
 773         __ shrptr(tmp2);
 774         __ pop(rcx);
 775         __ movptr(tmp1, tmp2);
 776       }
 777       __ lea(tmp2, ExternalAddress(AOTRuntimeConstants::cset_base_address()));
 778       __ movptr(tmp2, Address(tmp2));
 779     } else {
 780       __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint());
 781       __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr());
 782     }
 783     __ movbool(tmp2, Address(tmp2, tmp1, Address::times_1));
 784     __ testbool(tmp2);
 785     __ jcc(Assembler::zero, *stub->continuation());
 786   }
 787 
 788   __ bind(slow_path);
 789   ce->store_parameter(res, 0);
 790   ce->store_parameter(addr, 1);
 791   if (is_strong) {
 792     if (is_native) {
 793       __ call(RuntimeAddress(bs->load_reference_barrier_strong_native_rt_code_blob()->code_begin()));
 794     } else {
 795       __ call(RuntimeAddress(bs->load_reference_barrier_strong_rt_code_blob()->code_begin()));
 796     }
 797   } else if (is_weak) {
 798     __ call(RuntimeAddress(bs->load_reference_barrier_weak_rt_code_blob()->code_begin()));
 799   } else {
 800     assert(is_phantom, "only remaining strength");
 801     __ call(RuntimeAddress(bs->load_reference_barrier_phantom_rt_code_blob()->code_begin()));
 802   }
 803   __ jmp(*stub->continuation());
 804 }
 805 
 806 #undef __
 807 
 808 #define __ sasm->
 809 
 810 void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
 811   __ prologue("shenandoah_pre_barrier", false);
 812   // arg0 : previous value of memory
 813 
 814   __ push(rax);
 815   __ push(rdx);
 816 
 817   const Register pre_val = rax;
 818   const Register thread = r15_thread;
 819   const Register tmp = rdx;
 820 
 821   Address queue_index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
 822   Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
 823 
 824   Label done;
 825   Label runtime;
 826 
 827   // Is SATB still active?
 828   Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
 829   __ testb(gc_state, ShenandoahHeap::MARKING);
 830   __ jcc(Assembler::zero, done);
 831 
 832   // Can we store original value in the thread's buffer?
 833 
 834   __ movptr(tmp, queue_index);
 835   __ testptr(tmp, tmp);
 836   __ jcc(Assembler::zero, runtime);
 837   __ subptr(tmp, wordSize);
 838   __ movptr(queue_index, tmp);
 839   __ addptr(tmp, buffer);
 840 
 841   // prev_val (rax)
 842   __ load_parameter(0, pre_val);
 843   __ movptr(Address(tmp, 0), pre_val);
 844   __ jmp(done);
 845 
 846   __ bind(runtime);
 847 
 848   __ save_live_registers_no_oop_map(true);
 849 
 850   // load the pre-value
 851   __ load_parameter(0, rcx);
 852   __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre), rcx);
 853 
 854   __ restore_live_registers(true);
 855 
 856   __ bind(done);
 857 
 858   __ pop(rdx);
 859   __ pop(rax);
 860 
 861   __ epilogue();
 862 }
 863 
 864 void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, DecoratorSet decorators) {
 865   __ prologue("shenandoah_load_reference_barrier", false);
 866   // arg0 : object to be resolved
 867 
 868   __ save_live_registers_no_oop_map(true);
 869 
 870   bool is_strong  = ShenandoahBarrierSet::is_strong_access(decorators);
 871   bool is_weak    = ShenandoahBarrierSet::is_weak_access(decorators);
 872   bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
 873   bool is_native  = ShenandoahBarrierSet::is_native_access(decorators);
 874 
 875   __ load_parameter(0, c_rarg0);
 876   __ load_parameter(1, c_rarg1);
 877   if (is_strong) {
 878     if (is_native) {
 879       __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong), c_rarg0, c_rarg1);
 880     } else {
 881       if (UseCompressedOops) {
 882         __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow), c_rarg0, c_rarg1);
 883       } else {
 884         __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong), c_rarg0, c_rarg1);
 885       }
 886     }
 887   } else if (is_weak) {
 888     assert(!is_native, "weak must not be called off-heap");
 889     if (UseCompressedOops) {
 890       __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow), c_rarg0, c_rarg1);
 891     } else {
 892       __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak), c_rarg0, c_rarg1);
 893     }
 894   } else {
 895     assert(is_phantom, "only remaining strength");
 896     assert(is_native, "phantom must only be called off-heap");
 897     __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom), c_rarg0, c_rarg1);
 898   }
 899 
 900   __ restore_live_registers_except_rax(true);
 901 
 902   __ epilogue();
 903 }
 904 
 905 #undef __
 906 
 907 #endif // COMPILER1
 908 
 909 #ifdef COMPILER2
 910 
 911 #undef __
 912 #define __ masm->
 913 
 914 void ShenandoahBarrierSetAssembler::load_c2(const MachNode* node, MacroAssembler* masm, Register dst, Address src, bool narrow) {
 915   // Do the actual load. This load is the candidate for implicit null check, and MUST come first.
 916   if (narrow) {
 917     __ movl(dst, src);
 918   } else {
 919     __ movq(dst, src);
 920   }
 921 
 922   ShenandoahBarrierStubC2::load_post(masm, node, dst, src, noreg, noreg, narrow);
 923 }
 924 
 925 void ShenandoahBarrierSetAssembler::store_c2(const MachNode* node, MacroAssembler* masm,
 926                                              Address dst, bool dst_narrow,
 927                                              Register src, bool src_narrow,
 928                                              Register tmp) {
 929 
 930   ShenandoahBarrierStubC2::store_pre(masm, node, tmp, dst, noreg, noreg, dst_narrow);
 931 
 932   // Need to encode into tmp, because we cannot clobber src.
 933   if (dst_narrow && !src_narrow) {
 934     __ movq(tmp, src);
 935     if ((node->barrier_data() & ShenandoahBitNotNull) == 0) {
 936       __ encode_heap_oop(tmp);
 937     } else {
 938       __ encode_heap_oop_not_null(tmp);
 939     }
 940     src = tmp;
 941   }
 942 
 943   // Do the actual store
 944   if (dst_narrow) {
 945     __ movl(dst, src);
 946   } else {
 947     __ movq(dst, src);
 948   }
 949 
 950   ShenandoahBarrierStubC2::store_post(masm, node, dst, tmp, noreg);
 951 }
 952 
 953 void ShenandoahBarrierSetAssembler::compare_and_set_c2(const MachNode* node, MacroAssembler* masm,
 954                                                        Register res, Address addr,
 955                                                        Register oldval, Register newval, Register tmp,
 956                                                        bool narrow) {
 957 
 958   assert(oldval == rax, "must be in rax for implicit use in cmpxchg");
 959 
 960   // Oldval and newval can be in the same register, but all other registers should be
 961   // distinct for extra safety, as we shuffle register values around.
 962   assert_different_registers(oldval, tmp, addr.base(), addr.index());
 963   assert_different_registers(newval, tmp, addr.base(), addr.index());
 964 
 965   ShenandoahBarrierStubC2::load_store_pre(masm, node, tmp, addr, noreg, noreg, narrow);
 966 
 967   // CAS!
 968   __ lock();
 969   if (narrow) {
 970     __ cmpxchgl(newval, addr);
 971   } else {
 972     __ cmpxchgptr(newval, addr);
 973   }
 974 
 975   // If we need a boolean result out of CAS, set the flag appropriately and promote the result.
 976   if (res != noreg) {
 977     __ setcc(Assembler::equal, res);
 978   }
 979 
 980   ShenandoahBarrierStubC2::load_store_post(masm, node, addr, tmp, noreg);
 981 }
 982 
 983 void ShenandoahBarrierSetAssembler::get_and_set_c2(const MachNode* node, MacroAssembler* masm, Register newval, Address addr, Register tmp, bool narrow) {
 984   assert_different_registers(newval, tmp, addr.base(), addr.index());
 985 
 986   ShenandoahBarrierStubC2::load_store_pre(masm, node, tmp, addr, noreg, noreg, narrow);
 987 
 988   if (narrow) {
 989     __ xchgl(newval, addr);
 990   } else {
 991     __ xchgq(newval, addr);
 992   }
 993 
 994   ShenandoahBarrierStubC2::load_store_post(masm, node, addr, tmp, noreg);
 995 }
 996 
 997 #undef __
 998 #define __ masm.
 999 
1000 void ShenandoahBarrierStubC2::cardtable(MacroAssembler& masm, Address addr, Register tmp1, Register tmp2) {
1001   Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
1002 
1003   __ lea(tmp1, addr);
1004   __ shrptr(tmp1, CardTable::card_shift());
1005   __ addptr(tmp1, Address(r15_thread, in_bytes(ShenandoahThreadLocalData::card_table_offset())));
1006   Address card_address(tmp1, 0);
1007 
1008   assert(CardTable::dirty_card_val() == 0, "Encoding assumption");
1009   Label L_done;
1010   if (UseCondCardMark) {
1011     __ cmpb(card_address, 0);
1012     __ jccb(Assembler::equal, L_done);
1013   }
1014   if (UseCompressedOops && CompressedOops::base() == nullptr) {
1015     __ movb(card_address, r12);
1016   } else {
1017     __ movb(card_address, 0);
1018   }
1019   __ bind(L_done);
1020 }
1021 
1022 void ShenandoahBarrierStubC2::enter_if_gc_state(MacroAssembler& masm, const char test_state, Register tmp) {
1023   Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
1024 
1025   // Emit the unconditional branch in the first version of the method.
1026   // Let the rest of runtime figure out how to manage it.
1027   __ relocate(barrier_Relocation::spec(), ShenandoahThreadLocalData::gc_state_to_fast_array_index(test_state));
1028   __ jmp(*entry(), /* maybe_short = */ false);
1029 
1030 #ifdef ASSERT
1031   Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(test_state)));
1032   __ cmpb(gc_state_fast, 0);
1033   __ jccb(Assembler::zero, *continuation());
1034   __ hlt(); // Correctness bug: barrier is NOP-ed, but heap is NOT IDLE
1035 #endif
1036   // TODO: When barriers are consistently turned off at the end of the cycle, assert that barrier is NOP-ed.
1037 
1038   __ bind(*continuation());
1039 }
1040 
1041 address ShenandoahBarrierSetAssembler::parse_stub_address(address pc) {
1042   NativeInstruction* ni = nativeInstruction_at(pc);
1043   assert(ni->is_jump(), "Initial code version: GC barrier fastpath must be a jump");
1044   NativeJump* jmp = nativeJump_at(pc);
1045   return jmp->jump_destination();
1046 }
1047 
1048 void insert_5_byte_nop(address pc) {
1049   *(pc + 0) = 0x0F;
1050   *(pc + 1) = 0x1F;
1051   *(pc + 2) = 0x44;
1052   *(pc + 3) = 0x00;
1053   *(pc + 4) = 0x00;
1054   ICache::invalidate_range(pc, 5);
1055 }
1056 
1057 bool is_5_byte_nop(address pc) {
1058   if (*(pc + 0) != 0x0F) return false;
1059   if (*(pc + 1) != 0x1F) return false;
1060   if (*(pc + 2) != 0x44) return false;
1061   if (*(pc + 3) != 0x00) return false;
1062   if (*(pc + 4) != 0x00) return false;
1063   return true;
1064 }
1065 
1066 void check_at(bool cond, address pc, const char* msg) {
1067   assert(cond, "%s: at PC " PTR_FORMAT ": %02x%02x%02x%02x%02x",
1068          msg, p2i(pc), *(pc + 0), *(pc + 1), *(pc + 2), *(pc + 3), *(pc + 4));
1069 }
1070 
1071 bool ShenandoahBarrierSetAssembler::is_active(address pc) {
1072   NativeInstruction* ni = nativeInstruction_at(pc);
1073   return ni->is_jump();
1074 }
1075 
1076 void ShenandoahBarrierSetAssembler::patch_branch_to_nop(address pc) {
1077   NativeInstruction* ni = nativeInstruction_at(pc);
1078   if (ni->is_jump()) {
1079     insert_5_byte_nop(pc);
1080   } else {
1081     check_at(is_5_byte_nop(pc), pc, "Should already be nop");
1082   }
1083 }
1084 
1085 void ShenandoahBarrierSetAssembler::patch_nop_to_branch(address pc, address stub_addr) {
1086   NativeInstruction* ni = nativeInstruction_at(pc);
1087   if (is_5_byte_nop(pc)) {
1088     NativeJump::insert(pc, stub_addr);
1089   } else {
1090     check_at(ni->is_jump(), pc, "Should already be jump");
1091     check_at(nativeJump_at(pc)->jump_destination() == stub_addr, pc, "Jump should be to the same address");
1092   }
1093 }
1094 
1095 void ShenandoahBarrierStubC2::emit_code(MacroAssembler& masm) {
1096   Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
1097   assert(_needs_keep_alive_barrier || _needs_load_ref_barrier, "Why are you here?");
1098 
1099   // On x86, there is a significant penalty with unaligned branch target, for example
1100   // when the target instruction straggles the fetch line. It makes (performance) sense
1101   // to spend some code size to align the target better.
1102   __ align(16);
1103   __ bind(*entry());
1104 
1105   // If we need to load ourselves, do it here.
1106   if (_do_load) {
1107     if (_narrow) {
1108       __ movl(_obj, _addr);
1109     } else {
1110       __ movq(_obj, _addr);
1111     }
1112   }
1113 
1114   // If the object is null, there is no point in applying barriers.
1115   maybe_far_jump_if_zero(masm, _obj, continuation());
1116 
1117   // We need to make sure that loads done by callers survive across slow-path calls.
1118   // For self-loads, we need to care about the case when both KA and LRB are enabled (rare).
1119   bool needs_both_barriers = _needs_keep_alive_barrier && _needs_load_ref_barrier;
1120   if (!_do_load || needs_both_barriers) {
1121     preserve(_obj);
1122   }
1123 
1124   // Go for barriers. Barriers can return straight to continuation, as long
1125   // as another barrier is not needed.
1126   if (needs_both_barriers) {
1127     keepalive(masm, nullptr);
1128     lrb(masm);
1129   } else if (_needs_keep_alive_barrier) {
1130     keepalive(masm, continuation());
1131   } else if (_needs_load_ref_barrier) {
1132     lrb(masm);
1133   } else {
1134     ShouldNotReachHere();
1135   }
1136 }
1137 
1138 void ShenandoahBarrierStubC2::keepalive(MacroAssembler& masm, Label* L_done) {
1139   Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::MARKING)));
1140   Address index(r15_thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
1141   Address buffer(r15_thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
1142 
1143   Label L_through, L_pop_and_slow;
1144 
1145   // Hotpatched GC checks are racy: we can turn off GC state before we patch the barriers.
1146   // Therefore, alas we need a separate check here. TODO: Figure this out.
1147   __ cmpb(gc_state_fast, 0);
1148   if (L_done != nullptr) {
1149     __ jcc(Assembler::equal, *L_done);
1150   } else {
1151     __ jcc(Assembler::equal, L_through);
1152   }
1153 
1154   // Need temp to work, allocate one now.
1155   bool tmp_live;
1156   Register tmp = select_temp_register(tmp_live);
1157   if (tmp_live) {
1158     __ push(tmp);
1159   }
1160 
1161   // Fast-path: put object into buffer.
1162   // If buffer is already full, go slow.
1163   __ movptr(tmp, index);
1164   __ subptr(tmp, wordSize);
1165   __ jccb(Assembler::below, L_pop_and_slow);
1166   __ movptr(index, tmp);
1167   __ addptr(tmp, buffer);
1168 
1169   // Store the object in queue.
1170   // If object is narrow, we need to decode it before inserting.
1171   // We can skip the re-encoding if we know that object is not preserved.
1172   if (_narrow) {
1173     __ decode_heap_oop_not_null(_obj);
1174   }
1175   __ movptr(Address(tmp, 0), _obj);
1176   if (_narrow && is_preserved(_obj)) {
1177     __ encode_heap_oop_not_null(_obj);
1178   }
1179 
1180   // Fast-path exits here.
1181   if (tmp_live) {
1182     __ pop(tmp);
1183   }
1184 
1185   if (L_done != nullptr) {
1186     __ jmp(*L_done);
1187   } else {
1188     __ jmp(L_through);
1189   }
1190 
1191   // Slow-path: call runtime to handle.
1192   // Need to pop tmp immediately for stack to remain aligned.
1193   __ bind(L_pop_and_slow);
1194   if (tmp_live) {
1195     __ pop(tmp);
1196   }
1197   {
1198     SaveLiveRegisters slr(&masm, this);
1199 
1200     // Shuffle in the arguments. The end result should be:
1201     //   c_rarg0 <-- obj
1202     if (c_rarg0 != _obj) {
1203       __ mov(c_rarg0, _obj);
1204     }
1205 
1206     // Go to runtime and handle the rest there.
1207     // Use rax as scratch, as it will be saved if live.
1208     __ call(RuntimeAddress(keepalive_runtime_entry_addr()), rax);
1209   }
1210   if (L_done != nullptr) {
1211     __ jmp(*L_done);
1212   } else {
1213     __ bind(L_through);
1214   }
1215 }
1216 
1217 void ShenandoahBarrierStubC2::lrb(MacroAssembler& masm) {
1218   Label L_pop_and_slow, L_slow;
1219 
1220   // Hotpatched GC checks are racy: we can turn off GC state before we patch the barriers.
1221   // Therefore, alas we need a separate check here. TODO: Figure this out.
1222   char state_to_check = ShenandoahHeap::HAS_FORWARDED | (_needs_load_ref_weak_barrier ? ShenandoahHeap::WEAK_ROOTS : 0);
1223   Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(state_to_check)));
1224   __ cmpb(gc_state_fast, 0);
1225   __ jcc(Assembler::equal, *continuation());
1226 
1227   // If weak references are being processed, weak/phantom loads need to go slow,
1228   // regardless of their cset status.
1229   if (_needs_load_ref_weak_barrier) {
1230     Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::WEAK_ROOTS)));
1231     __ cmpb(gc_state_fast, 0);
1232     __ jccb(Assembler::notEqual, L_slow);
1233   }
1234 
1235   bool is_aot = AOTCodeCache::is_on_for_dump();
1236 
1237   // Need temp to work, allocate one now.
1238   bool tmp_live;
1239   Register tmp = select_temp_register(tmp_live, /* skip_reg1 = */ is_aot ? rcx : noreg);
1240   if (tmp_live) {
1241     __ push(tmp);
1242   }
1243 
1244   // Compute the cset bitmap index
1245   if (_narrow) {
1246     __ decode_heap_oop_not_null(tmp, _obj);
1247   } else {
1248     __ movptr(tmp, _obj);
1249   }
1250 
1251   Address cset_addr_arg;
1252   intptr_t cset_addr = reinterpret_cast<intptr_t>(ShenandoahHeap::in_cset_fast_test_addr());
1253   if (!is_aot && cset_addr < INT32_MAX) {
1254     // Cset bitmap is at easily encodeable address. Just use it as displacement.
1255     __ shrptr(tmp, ShenandoahHeapRegion::region_size_bytes_shift_jint());
1256     cset_addr_arg = Address(tmp, checked_cast<int>(cset_addr));
1257   } else {
1258     bool tmp2_live;
1259     Register tmp2 = select_temp_register(tmp2_live, /* skip_reg1 = */ tmp, /* skip_reg2 = */ is_aot ? rcx : noreg);
1260     if (tmp2_live) {
1261       __ push(tmp2);
1262     }
1263     if (is_aot) {
1264       // Generating AOT code, pull the cset bitmap and region shift from AOT table.
1265       assert_different_registers(tmp, tmp2, rcx);
1266       __ push(rcx);
1267       __ lea(rcx, ExternalAddress(AOTRuntimeConstants::grain_shift_address()));
1268       __ movl(rcx, Address(rcx));
1269       __ shrptr(tmp);
1270       __ pop(rcx);
1271       __ lea(tmp2, ExternalAddress(AOTRuntimeConstants::cset_base_address()));
1272       __ addptr(tmp, Address(tmp2));
1273     } else {
1274       // Cset bitmap is far away. Add its address fully.
1275       __ shrptr(tmp, ShenandoahHeapRegion::region_size_bytes_shift_jint());
1276       __ movptr(tmp2, cset_addr);
1277       __ addptr(tmp, tmp2);
1278     }
1279     if (tmp2_live) {
1280       __ pop(tmp2);
1281     }
1282     cset_addr_arg = Address(tmp, 0);
1283   }
1284 
1285   // Cset-check. Fall-through to slow if in collection set.
1286   __ cmpb(cset_addr_arg, 0);
1287   if (tmp_live) {
1288     __ jccb(Assembler::notEqual, L_pop_and_slow);
1289     __ pop(tmp);
1290     __ jmp(*continuation());
1291   } else {
1292     // Nothing else to do, jump back
1293     __ jcc(Assembler::equal, *continuation());
1294   }
1295 
1296   // Slow path
1297   __ bind(L_pop_and_slow);
1298   // Need to pop tmp immediately for stack to remain aligned.
1299   if (tmp_live) {
1300     __ pop(tmp);
1301   }
1302   __ bind(L_slow);
1303 
1304   // Obj is the result, need to temporarily stop preserving it.
1305   bool is_obj_preserved = is_preserved(_obj);
1306   if (is_obj_preserved) {
1307     dont_preserve(_obj);
1308   }
1309   {
1310     SaveLiveRegisters slr(&masm, this);
1311 
1312     assert_different_registers(rax, c_rarg0, c_rarg1);
1313 
1314     // Shuffle in the arguments. The end result should be:
1315     //   c_rarg0 <-- obj
1316     //   c_rarg1 <-- lea(addr)
1317     if (_obj == c_rarg0) {
1318       __ lea(c_rarg1, _addr);
1319     } else if (_obj == c_rarg1) {
1320       // Set up arguments in reverse, and then flip them
1321       __ lea(c_rarg0, _addr);
1322       __ xchgptr(c_rarg0, c_rarg1);
1323     } else {
1324       assert_different_registers(_obj, c_rarg0, c_rarg1);
1325       __ lea(c_rarg1, _addr);
1326       __ movptr(c_rarg0, _obj);
1327     }
1328 
1329     // Go to runtime and handle the rest there.
1330     // Use rax as scratch, as it will be clobbered by result anyway.
1331     __ call(RuntimeAddress(lrb_runtime_entry_addr()), rax);
1332 
1333     // Save the result where needed.
1334     if (_narrow) {
1335       __ movl(_obj, rax);
1336     } else if (_obj != rax) {
1337       __ movptr(_obj, rax);
1338     }
1339   }
1340   if (is_obj_preserved) {
1341     preserve(_obj);
1342   }
1343 
1344   __ jmp(*continuation());
1345 }
1346 
1347 int ShenandoahBarrierStubC2::available_gp_registers() {
1348   return Register::available_gp_registers();
1349 }
1350 
1351 bool ShenandoahBarrierStubC2::is_special_register(Register r) {
1352   return r == rsp || r == rbp || r == r12_heapbase || r == r15_thread;
1353 }
1354 
1355 void ShenandoahBarrierStubC2::post_init() {
1356   // Do nothing.
1357 }
1358 
1359 void ShenandoahBarrierStubC2::maybe_far_jump_if_zero(MacroAssembler& masm, Register reg, Label* L_target) {
1360   if (_narrow) {
1361     __ testl(reg, reg);
1362   } else {
1363     __ testq(reg, reg);
1364   }
1365   __ jcc(Assembler::zero, *L_target);
1366 }
1367 
1368 #endif // COMPILER2