< prev index next >

src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp

Print this page
@@ -39,43 +39,46 @@
  #ifdef COMPILER1
  #include "c1/c1_LIRAssembler.hpp"
  #include "c1/c1_MacroAssembler.hpp"
  #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
  #endif
+ #ifdef COMPILER2
+ #include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp"
+ #endif
  
  #define __ masm->
  
  void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
                                                         Register src, Register dst, Register count, RegSet saved_regs) {
    if (is_oop) {
      bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
      if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahLoadRefBarrier) {
  
-       Label done;
+       Label L_done;
  
        // Avoid calling runtime if count == 0
-       __ cbz(count, done);
+       __ cbz(count, L_done);
  
        // Is GC active?
        Address gc_state(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
        __ ldrb(rscratch1, gc_state);
        if (ShenandoahSATBBarrier && dest_uninitialized) {
-         __ tbz(rscratch1, ShenandoahHeap::HAS_FORWARDED_BITPOS, done);
+         __ tbz(rscratch1, ShenandoahHeap::HAS_FORWARDED_BITPOS, L_done);
        } else {
          __ mov(rscratch2, ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::MARKING);
          __ tst(rscratch1, rscratch2);
-         __ br(Assembler::EQ, done);
+         __ br(Assembler::EQ, L_done);
        }
  
        __ push(saved_regs, sp);
        if (UseCompressedOops) {
          __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop), src, dst, count);
        } else {
          __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop), src, dst, count);
        }
        __ pop(saved_regs, sp);
-       __ bind(done);
+       __ bind(L_done);
      }
    }
  }
  
  void ShenandoahBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,

@@ -99,50 +102,50 @@
    // directly to skip generating the check by
    // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
  
    assert(thread == rthread, "must be");
  
-   Label done;
-   Label runtime;
+   Label L_done;
+   Label L_runtime;
  
    assert_different_registers(obj, pre_val, tmp1, tmp2);
    assert(pre_val != noreg && tmp1 != noreg && tmp2 != noreg, "expecting a register");
  
    Address index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
    Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
  
    // Is marking active?
    Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
    __ ldrb(tmp1, gc_state);
-   __ tbz(tmp1, ShenandoahHeap::MARKING_BITPOS, done);
+   __ tbz(tmp1, ShenandoahHeap::MARKING_BITPOS, L_done);
  
    // Do we need to load the previous value?
    if (obj != noreg) {
      __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW);
    }
  
    // Is the previous value null?
-   __ cbz(pre_val, done);
+   __ cbz(pre_val, L_done);
  
    // Can we store original value in the thread's buffer?
    // Is index == 0?
    // (The index field is typed as size_t.)
  
    __ ldr(tmp1, index);                      // tmp := *index_adr
-   __ cbz(tmp1, runtime);                    // tmp == 0?
+   __ cbz(tmp1, L_runtime);                    // tmp == 0?
                                          // If yes, goto runtime
  
    __ sub(tmp1, tmp1, wordSize);             // tmp := tmp - wordSize
    __ str(tmp1, index);                      // *index_adr := tmp
    __ ldr(tmp2, buffer);
    __ add(tmp1, tmp1, tmp2);                 // tmp := tmp + *buffer_adr
  
    // Record the previous value
    __ str(pre_val, Address(tmp1, 0));
-   __ b(done);
+   __ b(L_done);
  
-   __ bind(runtime);
+   __ bind(L_runtime);
    // save the live input values
    RegSet saved = RegSet::of(pre_val);
    if (tosca_live) saved += RegSet::of(r0);
    if (obj != noreg) saved += RegSet::of(obj);
  

@@ -167,19 +170,19 @@
      __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre), pre_val);
    }
  
    __ pop(saved, sp);
  
-   __ bind(done);
+   __ bind(L_done);
  }
  
  void ShenandoahBarrierSetAssembler::resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp) {
    assert(ShenandoahLoadRefBarrier || ShenandoahCASBarrier, "Should be enabled");
-   Label is_null;
-   __ cbz(dst, is_null);
+   Label L_is_null;
+   __ cbz(dst, L_is_null);
    resolve_forward_pointer_not_null(masm, dst, tmp);
-   __ bind(is_null);
+   __ bind(L_is_null);
  }
  
  // IMPORTANT: This must preserve all registers, even rscratch1 and rscratch2, except those explicitly
  // passed in.
  void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp) {

@@ -203,18 +206,18 @@
      __ push(RegSet::of(tmp), sp);
    }
  
    assert_different_registers(tmp, dst);
  
-   Label done;
+   Label L_done;
    __ ldr(tmp, Address(dst, oopDesc::mark_offset_in_bytes()));
    __ eon(tmp, tmp, zr);
    __ ands(zr, tmp, markWord::lock_mask_in_place);
-   __ br(Assembler::NE, done);
+   __ br(Assembler::NE, L_done);
    __ orr(tmp, tmp, markWord::marked_value);
    __ eon(dst, tmp, zr);
-   __ bind(done);
+   __ bind(L_done);
  
    if (borrow_reg) {
      __ pop(RegSet::of(tmp), sp);
    }
  }

@@ -228,23 +231,23 @@
    bool is_weak    = ShenandoahBarrierSet::is_weak_access(decorators);
    bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
    bool is_native  = ShenandoahBarrierSet::is_native_access(decorators);
    bool is_narrow  = UseCompressedOops && !is_native;
  
-   Label heap_stable, not_cset;
+   Label L_heap_stable, L_not_cset;
    __ enter(/*strip_ret_addr*/true);
    Address gc_state(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
    __ ldrb(rscratch2, gc_state);
  
    // Check for heap stability
    if (is_strong) {
-     __ tbz(rscratch2, ShenandoahHeap::HAS_FORWARDED_BITPOS, heap_stable);
+     __ tbz(rscratch2, ShenandoahHeap::HAS_FORWARDED_BITPOS, L_heap_stable);
    } else {
-     Label lrb;
-     __ tbnz(rscratch2, ShenandoahHeap::WEAK_ROOTS_BITPOS, lrb);
-     __ tbz(rscratch2, ShenandoahHeap::HAS_FORWARDED_BITPOS, heap_stable);
-     __ bind(lrb);
+     Label L_lrb;
+     __ tbnz(rscratch2, ShenandoahHeap::WEAK_ROOTS_BITPOS, L_lrb);
+     __ tbz(rscratch2, ShenandoahHeap::HAS_FORWARDED_BITPOS, L_heap_stable);
+     __ bind(L_lrb);
    }
  
    // use r1 for load address
    Register result_dst = dst;
    if (dst == r1) {

@@ -261,11 +264,11 @@
    // Test for in-cset
    if (is_strong) {
      __ mov(rscratch2, ShenandoahHeap::in_cset_fast_test_addr());
      __ lsr(rscratch1, r0, ShenandoahHeapRegion::region_size_bytes_shift_jint());
      __ ldrb(rscratch2, Address(rscratch2, rscratch1));
-     __ tbz(rscratch2, 0, not_cset);
+     __ tbz(rscratch2, 0, L_not_cset);
    }
  
    __ push_call_clobbered_registers();
    if (is_strong) {
      if (is_narrow) {

@@ -288,16 +291,16 @@
    __ blr(lr);
    __ mov(rscratch1, r0);
    __ pop_call_clobbered_registers();
    __ mov(r0, rscratch1);
  
-   __ bind(not_cset);
+   __ bind(L_not_cset);
  
    __ mov(result_dst, r0);
    __ pop(to_save, sp);
  
-   __ bind(heap_stable);
+   __ bind(L_heap_stable);
    __ leave();
  }
  
  //
  // Arguments:

@@ -421,27 +424,27 @@
      card_barrier(masm, tmp3);
    }
  }
  
  void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
-                                                                   Register obj, Register tmp, Label& slowpath) {
-   Label done;
+                                                                   Register obj, Register tmp, Label& L_slowpath) {
+   Label L_done;
    // Resolve jobject
-   BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, obj, tmp, slowpath);
+   BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, obj, tmp, L_slowpath);
  
    // Check for null.
-   __ cbz(obj, done);
+   __ cbz(obj, L_done);
  
    assert(obj != rscratch2, "need rscratch2");
    Address gc_state(jni_env, ShenandoahThreadLocalData::gc_state_offset() - JavaThread::jni_environment_offset());
    __ lea(rscratch2, gc_state);
    __ ldrb(rscratch2, Address(rscratch2));
  
    // Check for heap in evacuation phase
-   __ tbnz(rscratch2, ShenandoahHeap::EVACUATION_BITPOS, slowpath);
+   __ tbnz(rscratch2, ShenandoahHeap::EVACUATION_BITPOS, L_slowpath);
  
-   __ bind(done);
+   __ bind(L_done);
  }
  
  // Special Shenandoah CAS implementation that handles false negatives due
  // to concurrent evacuation.  The service is more complex than a
  // traditional CAS operation because the CAS operation is intended to

@@ -478,11 +481,11 @@
    Assembler::operand_size size = is_narrow ? Assembler::word : Assembler::xword;
  
    assert_different_registers(addr, expected, tmp1, tmp2);
    assert_different_registers(addr, new_val,  tmp1, tmp2);
  
-   Label step4, done;
+   Label L_step4, L_done;
  
    // There are two ways to reach this label.  Initial entry into the
    // cmpxchg_oop code expansion starts at step1 (which is equivalent
    // to label step4).  Additionally, in the rare case that four steps
    // are required to perform the requested operation, the fourth step

@@ -493,11 +496,11 @@
    //
    // The comments that immediately follow the step4 label apply only
    // to the case in which control reaches this label by branch from
    // step 3.
  
-   __ bind (step4);
+   __ bind (L_step4);
  
    // Step 4. CAS has failed because the value most recently fetched
    // from addr is no longer the from-space pointer held in tmp2.  If a
    // different thread replaced the in-memory value with its equivalent
    // to-space pointer, then CAS may still be able to succeed.  The

@@ -522,11 +525,11 @@
    // following branches to done to report failure of CAS.  If both
    // expected and tmp2 equal null, the following branches to done to
    // report success of CAS.  There's no need for a special test of
    // expected equal to null.
  
-   __ br(Assembler::EQ, done);
+   __ br(Assembler::EQ, L_done);
    // if CAS failed, fall through to step 2
  
    // Step 2. CAS has failed because the value held at addr does not
    // match expected.  This may be a false negative because the value fetched
    // from addr (now held in tmp2) may be a from-space pointer to the

@@ -557,11 +560,11 @@
    // in memory) equal to null.
    __ cmp(tmp1, expected);
  
    // If not, then the failure was legitimate and we're done.
    // Branching to done with NE condition denotes failure.
-   __ br(Assembler::NE, done);
+   __ br(Assembler::NE, L_done);
  
    // Fall through to step 3.  No need for step3 label.
  
    // Step 3.  We've confirmed that the value originally held in memory
    // (now held in tmp2) pointed to from-space version of original

@@ -578,20 +581,20 @@
    // EQ flag set iff success.  tmp2 holds value fetched, tmp1 (rscratch1) clobbered.
  
    // If fetched value did not equal the new expected, this could
    // still be a false negative because some other thread may have
    // newly overwritten the memory value with its to-space equivalent.
-   __ br(Assembler::NE, step4);
+   __ br(Assembler::NE, L_step4);
  
    if (is_cae) {
      // We're falling through to done to indicate success.  Success
      // with is_cae is denoted by returning the value of expected as
      // result.
      __ mov(tmp2, expected);
    }
  
-   __ bind(done);
+   __ bind(L_done);
    // At entry to done, the Z (EQ) flag is on iff if the CAS
    // operation was successful.  Additionally, if is_cae, tmp2 holds
    // the value most recently fetched from addr. In this case, success
    // is denoted by tmp2 matching expected.
  

@@ -600,10 +603,498 @@
    } else {
      __ cset(result, Assembler::EQ);
    }
  }
  
+ #ifdef COMPILER2
+ void ShenandoahBarrierSetAssembler::gc_state_check_c2(MacroAssembler* masm, Register rscratch, const unsigned char test_state, BarrierStubC2* slow_stub) {
+   if (ShenandoahGCStateCheckRemove) {
+     // Unrealistic: remove all barrier fastpath checks.
+   } else if (ShenandoahGCStateCheckHotpatch) {
+     // In the ideal world, we would hot-patch the branch to slow stub with a single
+     // (unconditional) jump or nop, based on our current GC state.
+     // FIXME: we may need more than one nop. to discuss.
+     __ nop();
+   } else {
+ #ifdef ASSERT
+     const unsigned char allowed = (unsigned char)(ShenandoahHeap::MARKING | ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::WEAK_ROOTS);
+     const unsigned char only_valid_flags = test_state & (unsigned char) ~allowed;
+     assert(test_state > 0x0, "Invalid test_state asked: %x", test_state);
+     assert(only_valid_flags == 0x0, "Invalid test_state asked: %x", test_state);
+ #endif
+ 
+     Label L_short_branch;
+ 
+     bool one_bit = (test_state & (test_state - 1)) == 0;
+     char no_weak_set = (test_state & (~ShenandoahHeap::WEAK_ROOTS));
+ 
+     Address gcs_addr(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
+     __ ldrb(rscratch, gcs_addr);
+ 
+     // if only one bit is required then we can always use tbz
+     if (one_bit) {
+       int bit = __builtin_ctz((unsigned)test_state);
+       __ tbz(rscratch, bit, *slow_stub->continuation());
+     } else if (no_weak_set == test_state) {
+       __ ands(rscratch, rscratch, test_state);
+       __ cbz(rscratch, *slow_stub->continuation());
+     } else {
+       // One single 'ands' isn't possible because weak is set, making the
+       // immediate pattern invalid. One single tbz/tbnz doesn't work because we
+       // have 2 or more bits set.
+       //
+       // We'll tackle this by breaking the problem in two parts. First we only
+       // check for weak_roots and then we check for the other flags using
+       // 'ands' without the weak bit set.
+       __ tbnz(rscratch, ShenandoahHeap::WEAK_ROOTS_BITPOS, L_short_branch);
+ 
+       // We cleared the weak bit earlier on
+       __ ands(rscratch, rscratch, no_weak_set);
+       __ cbz(rscratch, *slow_stub->continuation());
+     }
+ 
+     __ bind(L_short_branch);
+     __ b(*slow_stub->entry());
+ 
+     // This is were the stub will return to or the code above will jump to if
+     // the checks are false
+     __ bind(*slow_stub->continuation());
+   }
+ }
+ 
+ /**
+  * The logic implemented here relies on certain flags being on specific
+  * positions of the GCState. Also note that all pointer values in register are
+  * guaranteed to be 'to-space' addresses. The algorithm is as follows. If the
+  * CAS succeed:
+  *    - 'res' will be set to 1.
+  *    - We need to check SATB flag (index 1 of GCState). If the flag is active
+  *      need to store 'oldval' in the buffer.
+  *    - We wrote 'newval' to 'addr', therefore we need to mark the corresponding
+  *      card in the card table for 'addr' as dirty.
+  * If the CAS failed:
+  *    - 'res' will be set to 0.
+  *    - If the GCState FORWARDING bit (index 0 of GCState) is set we'll need to
+  *      retry the CAS, because the failure may be because the value in 'addr' is
+  *      the (outdated) 'from-space' version of 'expected'. The retry is done in a
+  *      stub. If the retry succeed then we need to do the steps described above
+  *      too for CAS succeed too.
+  *    - If FORWARDING bit is clear there is nothing else to do.
+  */
+ void ShenandoahBarrierSetAssembler::cae_c2(const MachNode* node, MacroAssembler* masm, Register res, Register addr, Register oldval, Register newval, bool exchange, bool maybe_null, bool narrow, bool acquire, bool release, bool weak) {
+   Assembler::operand_size op_size = narrow ? Assembler::word : Assembler::xword;
+ 
+   // Issue cmpxchg first, res will have the failure witness if CAS fails
+   __ cmpxchg(addr, oldval, newval, op_size, acquire, release, weak, exchange ? res : rscratch2);
+ 
+   // First CAS attempt. If successful, then we are done.
+   // EQ flag set iff success.
+   __ cset(exchange ? rscratch2 : res, Assembler::EQ);
+ 
+   if (!ShenandoahSkipBarriers && (ShenandoahCASBarrierStubC2::needs_barrier(node) || ShenandoahStoreBarrierStubC2::needs_card_barrier(node))) {
+     Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+ 
+     if (ShenandoahCASBarrierStubC2::needs_barrier(node)) {
+       ShenandoahCASBarrierStubC2* stub = ShenandoahCASBarrierStubC2::create(node, addr, oldval, newval, res, narrow, exchange, maybe_null, acquire, release, weak);
+ 
+       char check = 0;
+       check |= ShenandoahLoadBarrierStubC2::needs_keep_alive_barrier(node)    ? ShenandoahHeap::MARKING : 0;
+       check |= ShenandoahLoadBarrierStubC2::needs_load_ref_barrier(node)      ? ShenandoahHeap::HAS_FORWARDED : 0;
+       gc_state_check_c2(masm, rscratch1, check, stub);
+     }
+ 
+     if (ShenandoahStoreBarrierStubC2::needs_card_barrier(node)) {
+       if (exchange) {
+         __ cmp(res, oldval);
+         __ cset(rscratch2, Assembler::EQ);
+       }
+       card_barrier_c2(node, masm, addr, exchange ? rscratch2 : res);
+     }
+   }
+ }
+ 
+ void ShenandoahBarrierSetAssembler::get_and_set_c2(const MachNode* node, MacroAssembler* masm, Register preval, Register newval, Register addr, bool acquire) {
+   if (node->bottom_type()->isa_narrowoop()) {
+     if (acquire) {
+       __ atomic_xchgalw(preval, newval, addr);
+     } else {
+       __ atomic_xchgw(preval, newval, addr);
+     }
+   } else {
+     if (acquire) {
+       __ atomic_xchgal(preval, newval, addr);
+     } else {
+       __ atomic_xchg(preval, newval, addr);
+     }
+   }
+ 
+   if (!ShenandoahSkipBarriers && (ShenandoahLoadBarrierStubC2::needs_barrier(node) || ShenandoahStoreBarrierStubC2::needs_card_barrier(node))) {
+     Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+ 
+     if (ShenandoahLoadBarrierStubC2::needs_barrier(node)) {
+       ShenandoahLoadBarrierStubC2* const stub = ShenandoahLoadBarrierStubC2::create(node, preval, addr);
+ 
+       char check = 0;
+       check |= ShenandoahLoadBarrierStubC2::needs_keep_alive_barrier(node)    ? ShenandoahHeap::MARKING : 0;
+       check |= ShenandoahLoadBarrierStubC2::needs_load_ref_barrier(node)      ? ShenandoahHeap::HAS_FORWARDED : 0;
+       check |= ShenandoahLoadBarrierStubC2::needs_load_ref_barrier_weak(node) ? ShenandoahHeap::WEAK_ROOTS : 0;
+       gc_state_check_c2(masm, rscratch1, check, stub);
+     }
+ 
+     if (ShenandoahStoreBarrierStubC2::needs_card_barrier(node)) {
+       card_barrier_c2(node, masm, addr, noreg);
+     }
+   }
+ }
+ 
+ void ShenandoahBarrierSetAssembler::store_c2(const MachNode* node, MacroAssembler* masm,
+                                              Register addr, bool dst_narrow,
+                                              Register src, bool src_narrow,
+                                              bool is_volatile) {
+ 
+   // Emit barrier if needed
+   if (!ShenandoahSkipBarriers && ShenandoahStoreBarrierStubC2::needs_barrier(node)) {
+     Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+ 
+     if (ShenandoahStoreBarrierStubC2::needs_keep_alive_barrier(node)) {
+       ShenandoahStoreBarrierStubC2* const stub = ShenandoahStoreBarrierStubC2::create(node, addr, dst_narrow);
+ 
+       gc_state_check_c2(masm, rscratch1, ShenandoahHeap::MARKING, stub);
+     }
+ 
+     if (ShenandoahStoreBarrierStubC2::needs_card_barrier(node)) {
+       card_barrier_c2(node, masm, addr, noreg);
+     }
+   }
+ 
+   // Do the actual store
+   if (dst_narrow) {
+     if (!src_narrow) {
+       // Need to encode into rscratch, because we cannot clobber src.
+       // TODO: Maybe there is a matcher way to test that src is unused after this?
+       __ mov(rscratch1, src);
+       if (ShenandoahStoreBarrierStubC2::src_not_null(node)) {
+         __ encode_heap_oop_not_null(rscratch1);
+       } else {
+         __ encode_heap_oop(rscratch1);
+       }
+       src = rscratch1;
+     }
+ 
+     if (is_volatile) {
+       __ stlrw(src, addr);
+     } else {
+       __ strw(src, addr);
+     }
+   } else {
+     if (is_volatile) {
+       __ stlr(src, addr);
+     } else {
+       __ str(src, addr);
+     }
+   }
+ }
+ 
+ void ShenandoahBarrierSetAssembler::load_c2(const MachNode* node, MacroAssembler* masm,
+                                             Register dst, Register addr, bool acquire) {
+   if (node->bottom_type()->isa_narrowoop()) {
+     if (acquire) {
+       __ ldarw(dst, addr);
+     } else {
+       __ ldrw(dst, addr);
+     }
+   } else {
+     if (acquire) {
+       __ ldar(dst, addr);
+     } else {
+       __ ldr(dst, addr);
+     }
+   }
+ 
+   if (!ShenandoahSkipBarriers && ShenandoahLoadBarrierStubC2::needs_barrier(node)) {
+     Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+ 
+     ShenandoahLoadBarrierStubC2* const stub = ShenandoahLoadBarrierStubC2::create(node, dst, addr);
+     stub->preserve(addr);
+     stub->dont_preserve(dst);
+ 
+     char check = 0;
+     check |= ShenandoahLoadBarrierStubC2::needs_keep_alive_barrier(node)    ? ShenandoahHeap::MARKING : 0;
+     check |= ShenandoahLoadBarrierStubC2::needs_load_ref_barrier(node)      ? ShenandoahHeap::HAS_FORWARDED : 0;
+     check |= ShenandoahLoadBarrierStubC2::needs_load_ref_barrier_weak(node) ? ShenandoahHeap::WEAK_ROOTS : 0;
+     gc_state_check_c2(masm, rscratch1, check, stub);
+   }
+ }
+ 
+ void ShenandoahBarrierSetAssembler::card_barrier_c2(const MachNode* node, MacroAssembler* masm, Register addr, Register cond) {
+   if ((node->barrier_data() & ShenandoahBitCardMark) == 0) {
+     return;
+   }
+ 
+   assert(CardTable::dirty_card_val() == 0, "must be");
+   Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+   Label L_skip;
+ 
+   if (cond != noreg) {
+     __ cbz(cond, L_skip);
+   }
+ 
+   // rscratch2 = addr >> CardTable::card_shift()
+   __ lsr(rscratch2, addr, CardTable::card_shift());
+ 
+   // rscratch1 = card table base (holder)
+   Address curr_ct_holder_addr(rthread, in_bytes(ShenandoahThreadLocalData::card_table_offset()));
+   __ ldr(rscratch1, curr_ct_holder_addr);
+ 
+   // rscratch2 = &card_table[card_index]
+   __ add(rscratch2, rscratch1, rscratch2);
+ 
+   if (UseCondCardMark) {
+     Label L_already_dirty;
+     __ ldrb(rscratch1, Address(rscratch2));
+     __ cbz(rscratch1, L_already_dirty);
+     __ strb(zr, Address(rscratch2));
+     __ bind(L_already_dirty);
+   } else {
+     __ strb(zr, Address(rscratch2));
+   }
+   __ bind(L_skip);
+ }
+ #undef __
+ #define __ masm.
+ 
+ void ShenandoahStoreBarrierStubC2::emit_code(MacroAssembler& masm) {
+   Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
+   __ bind(*entry());
+ 
+   Label L_done;
+ 
+   // We'll use "_addr_reg" register as third scratch register
+   assert(_addr_reg != noreg, "should be");
+   RegSet saved = RegSet::of(_addr_reg);
+   Register rscratch3 = _addr_reg;
+   __ push(saved, sp);
+ 
+   // Do we need to load the previous value?
+   if (_addr_reg != noreg) {
+     __ load_heap_oop(rscratch3, Address(rscratch3, 0), noreg, noreg, AS_RAW);
+     // FIXME: We can merge this on the load above
+     __ cbz(rscratch3, L_done);
+   } else {
+     if (_dst_narrow) {
+       __ decode_heap_oop(rscratch3, &L_done);
+     } else {
+       __ cbz(rscratch3, L_done);
+     }
+   }
+ 
+   satb(&masm, this, rscratch1, rscratch2, rscratch3, &L_done);
+ 
+   __ bind(L_done);
+   __ pop(saved, sp);
+   __ b(*continuation());
+ }
+ 
+ void ShenandoahLoadBarrierStubC2::emit_code(MacroAssembler& masm) {
+   Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
+ 
+   __ bind(*entry());
+ 
+   Label L_lrb;
+ 
+   if (_narrow) {
+     if (_maybe_null) {
+       __ decode_heap_oop(_dst, &L_lrb);
+     } else {
+       __ decode_heap_oop_not_null(_dst);
+     }
+   } else {
+     __ cbz(_dst, L_lrb);
+   }
+ 
+   { // SATB
+     Address gcs_addr(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
+     __ ldrb(rscratch1, gcs_addr);
+     __ tbz(rscratch1, ShenandoahHeap::MARKING_BITPOS, L_lrb);
+ 
+     preserve(_dst);
+     satb(&masm, this, rscratch1, rscratch2, _dst, &L_lrb);
+   }
+ 
+   __ bind(L_lrb); { // LRB
+     Label L_lrb_end;
+ 
+     if ((_node->barrier_data() & ShenandoahBitStrong) != 0) {
+       Address gcs_addr(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
+       __ ldrb(rscratch1, gcs_addr);
+       __ tbz(rscratch1, ShenandoahHeap::HAS_FORWARDED_BITPOS, L_lrb_end);
+     }
+ 
+     dont_preserve(_dst);
+     lrb(&masm, this, _dst, _addr_reg, &L_lrb_end, _narrow);
+ 
+     __ bind(L_lrb_end);
+   }
+ 
+   if (_narrow) {
+     if (_maybe_null) {
+       __ encode_heap_oop(_dst);
+     } else {
+       __ encode_heap_oop_not_null(_dst);
+     }
+   }
+ 
+   __ b(*continuation());
+ }
+ 
+ void ShenandoahCASBarrierStubC2::emit_code(MacroAssembler& masm) {
+   Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
+ 
+   __ bind(*entry());
+ 
+   // Non-strong references should always go to runtime. We do not expect
+   // CASes over non-strong locations.
+   assert((_node->barrier_data() & ShenandoahBitStrong) != 0, "Only strong references for CASes");
+ 
+   Label L_final;
+   Label L_succeded;
+ 
+   // check if first CAS succeded, if it did we just need to write to SATB
+   __ cbnz(_cae ? rscratch2 : _result, L_succeded);
+ 
+ 
+   // LRB + CAS Retry
+           // First CAS attempt did not succeed. Execute LRB on 'addr' and retry CAS.
+           if (!_cae) {
+             __ mov(_result, rscratch2);
+           }
+ 
+           // [Compressed] failure witness is in _result. Decode it and check
+           // if it is in collection set.
+           if (_narrow) {
+             __ decode_heap_oop(_result);
+           }
+ 
+           lrb(&masm, this, _result, _addr_reg, &L_final, _narrow);
+ 
+           __ bind(L_final);
+ 
+           Assembler::operand_size size = _narrow ? Assembler::word : Assembler::xword;
+           __ cmpxchg(_addr_reg, _expected, _new_val, size, _acquire, _release, _weak, _result);
+ 
+           if (!_cae) {
+             __ cset(_result, Assembler::EQ);
+           }
+           // If the retry did not succeed skip SATB
+           __ br(Assembler::NE, *continuation());
+ 
+ 
+ 
+ 
+     // SATB
+     __ bind(L_succeded);
+               Label short_branch;
+               Label L_done;
+ 
+               Address gcs_addr(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
+               __ ldrb(rscratch1, gcs_addr);
+               __ tbnz(rscratch1, ShenandoahHeap::MARKING_BITPOS, short_branch);
+               __ b(*continuation());
+               __ bind(short_branch);
+ 
+               // We'll use "_addr_reg" register as third scratch register
+               assert(_addr_reg != noreg, "should be");
+               RegSet saved = RegSet::of(_addr_reg);
+               Register rscratch3 = _addr_reg;
+               __ push(saved, sp);
+ 
+               if (_narrow) {
+                 __ decode_heap_oop(rscratch3, _expected, &L_done);
+               } else {
+                 __ mov(rscratch3, _expected);
+                 __ cbz(rscratch3, L_done);
+               }
+ 
+               satb(&masm, this, rscratch1, rscratch2, rscratch3, &L_done);
+ 
+               __ bind(L_done);
+               __ pop(saved, sp);
+ 
+     __ b(*continuation());
+ }
+ 
+ void ShenandoahBarrierStubC2::satb(MacroAssembler* masm, ShenandoahBarrierStubC2* stub, Register scratch1, Register scratch2, Register scratch3, Label* L_done) {
+   Address index(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
+   Address buffer(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
+   Label L_runtime;
+ 
+   // If buffer is full, call into runtime.
+   masm->ldr(scratch1, index);
+   masm->cbz(scratch1, L_runtime);
+ 
+   // The buffer is not full, store value into it.
+   masm->sub(scratch1, scratch1, wordSize);
+   masm->str(scratch1, index);
+   masm->ldr(scratch2, buffer);
+   masm->str(scratch3, Address(scratch2, scratch1));
+   masm->b(*L_done);
+ 
+   // Runtime call
+   masm->bind(L_runtime);
+   {
+     SaveLiveRegisters save_registers(masm, stub);
+     masm->mov(c_rarg0, scratch3);
+     masm->mov(scratch1, CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre));
+     masm->blr(scratch1);
+   }
+ }
+ 
+ void ShenandoahBarrierStubC2::lrb(MacroAssembler* masm, ShenandoahBarrierStubC2* stub, Register obj, Register addr, Label* L_done, bool narrow) {
+   // Weak/phantom loads always need to go to runtime, otherwise check for
+   // object in cset.
+   if ((_node->barrier_data() & ShenandoahBitStrong) != 0) {
+     masm->mov(rscratch2, ShenandoahHeap::in_cset_fast_test_addr());
+     masm->lsr(rscratch1, obj, ShenandoahHeapRegion::region_size_bytes_shift_jint());
+     masm->ldrb(rscratch2, Address(rscratch2, rscratch1));
+     masm->cbz(rscratch2, *L_done);
+   }
+ 
+   {
+     SaveLiveRegisters save_registers(masm, stub);
+     assert(obj != addr, "sanity address and obj can't be the same.");
+     assert(c_rarg0 != addr, "need to be separate registers, otherwise we override data.");
+     assert(c_rarg1 != obj, "sanity");
+ 
+     masm->mov(c_rarg0, obj);
+     masm->mov(c_rarg1, addr);
+ 
+     if (narrow) {
+       if ((_node->barrier_data() & ShenandoahBitStrong) != 0) {
+         masm->mov(rscratch1, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow));
+       } else if ((_node->barrier_data() & ShenandoahBitWeak) != 0) {
+         masm->mov(rscratch1, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow));
+       } else if ((_node->barrier_data() & ShenandoahBitPhantom) != 0) {
+         masm->mov(rscratch1, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom_narrow));
+       }
+     } else {
+       if ((_node->barrier_data() & ShenandoahBitStrong) != 0) {
+         masm->mov(rscratch1, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong));
+       } else if ((_node->barrier_data() & ShenandoahBitWeak) != 0) {
+         masm->mov(rscratch1, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak));
+       } else if ((_node->barrier_data() & ShenandoahBitPhantom) != 0) {
+         masm->mov(rscratch1, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom));
+       }
+     }
+     masm->blr(rscratch1);
+     masm->mov(obj, r0);
+   }
+ }
+ 
+ #undef __
+ #define __ masm->
+ #endif // COMPILER2
+ 
  void ShenandoahBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
                                                                       Register start, Register count, Register scratch) {
    assert(ShenandoahCardBarrier, "Should have been checked by caller");
  
    Label L_loop, L_done;

@@ -724,36 +1215,36 @@
    const Register tmp = rscratch1;
  
    Address queue_index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
    Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
  
-   Label done;
-   Label runtime;
+   Label L_done;
+   Label L_runtime;
  
    // Is marking still active?
    Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
    __ ldrb(tmp, gc_state);
-   __ tbz(tmp, ShenandoahHeap::MARKING_BITPOS, done);
+   __ tbz(tmp, ShenandoahHeap::MARKING_BITPOS, L_done);
  
    // Can we store original value in the thread's buffer?
    __ ldr(tmp, queue_index);
-   __ cbz(tmp, runtime);
+   __ cbz(tmp, L_runtime);
  
    __ sub(tmp, tmp, wordSize);
    __ str(tmp, queue_index);
    __ ldr(rscratch2, buffer);
    __ add(tmp, tmp, rscratch2);
    __ load_parameter(0, rscratch2);
    __ str(rscratch2, Address(tmp, 0));
-   __ b(done);
+   __ b(L_done);
  
-   __ bind(runtime);
+   __ bind(L_runtime);
    __ push_call_clobbered_registers();
    __ load_parameter(0, pre_val);
    __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre), pre_val);
    __ pop_call_clobbered_registers();
-   __ bind(done);
+   __ bind(L_done);
  
    __ epilogue();
  }
  
  void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, DecoratorSet decorators) {
< prev index next >