diff a/run-dacapo.sh b/run-dacapo.sh --- /dev/null +++ b/run-dacapo.sh @@ -0,0 +1,85 @@ +#!/bin/bash + +# Run with e.g. ./run-dacapo.sh fop -n 400 + +set -euo pipefail + +# Look around for release JDK image +J_HP= +if [ -d build/linux-x86_64-server-release/images/jdk/ ]; then + J_HP=build/linux-x86_64-server-release/images/jdk/bin/java +elif [ -d build/linux-aarch64-server-release/images/jdk/ ]; then + J_HP=build/linux-aarch64-server-release/images/jdk/bin/java +else + echo "Cannot find JDK" + exit 1 +fi + +J_ML= +if [ -d jdk-mainline/ ]; then + J_ML=jdk-mainline/bin/java +fi + +DACAPO=dacapo +if [ ! -d $DACAPO ]; then + echo "Download Dacapo to $DACAPO" + exit 1 +fi +W="-jar $DACAPO/dacapo-23.11-MR2-chopin.jar $*" + +OPTS="-XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions" + +# Only C2, only COH +OPTS="$OPTS -XX:-TieredCompilation -XX:+UseCompactObjectHeaders -XX:+UseCompressedOops" + +# Heap config +OPTS="$OPTS -Xmx10g -Xms10g -XX:+UseTransparentHugePages -XX:+AlwaysPreTouch" + +# GC config +OPTS="$OPTS -XX:+UseShenandoahGC" + +# Mitigate code cache effects +OPTS="$OPTS -XX:ReservedCodeCacheSize=256M" + +OPTS_PASSIVE_NONE="$OPTS -XX:ShenandoahGCMode=passive" +OPTS_PASSIVE_ALL="$OPTS_PASSIVE_NONE -XX:+ShenandoahLoadRefBarrier -XX:+ShenandoahSATBBarrier -XX:+ShenandoahCASBarrier -XX:+ShenandoahCloneBarrier" + +run_with() { + P=$* + for I in `seq 1 3`; do + echo -n " run $I: " + $P $W 2>&1 | awk '/completed warmup|PASSED/ { printf "%s ", $(NF-2)} END { print "" }' + done +} + +echo +echo ------ +echo $* + +echo +echo "Hotpatching: Concurrent" +run_with $J_HP $OPTS + +#echo +#echo "Hotpatching: Passive, No barriers" +#run_with $J_HP $OPTS_PASSIVE_NONE + +#echo +#echo "Hotpatching: Passive, All barriers" +#run_with $J_HP $OPTS_PASSIVE_ALL + +if [ "x" != "x$J_ML" ]; then + echo + echo "Mainline: Concurrent" + run_with $J_ML $OPTS + +# echo +# echo "Mainline: Passive, No barriers" +# run_with $J_ML $OPTS_PASSIVE_NONE + +# echo +# echo "Mainline: Passive, All barriers" +# run_with $J_ML $OPTS_PASSIVE_ALL +fi + + diff a/run-helloworld.sh b/run-helloworld.sh --- /dev/null +++ b/run-helloworld.sh @@ -0,0 +1,75 @@ +#!/bin/bash + +# Run with e.g. ./run-helloworld.sh + +set -euo pipefail + +# Look around for release JDK image +J_HP= +if [ -d build/linux-x86_64-server-release/images/jdk/ ]; then + J_HP=build/linux-x86_64-server-release/images/jdk/bin/java +elif [ -d build/linux-aarch64-server-release/images/jdk/ ]; then + J_HP=build/linux-aarch64-server-release/images/jdk/bin/java +else + echo "Cannot find JDK" + exit 1 +fi + +J_ML= +if [ -d jdk-mainline/ ]; then + J_ML=jdk-mainline/bin/java +fi + +OPTS="-XX:+UseShenandoahGC -Xmx8g -Xms8g -XX:+AlwaysPreTouch -XX:+UseTransparentHugePages -XX:-TieredCompilation -XX:+UnlockDiagnosticVMOptions -XX:ShenandoahGCMode=passive -XX:+UnlockExperimentalVMOptions" + +OPTS_ALL="$OPTS -XX:+ShenandoahLoadRefBarrier -XX:+ShenandoahSATBBarrier -XX:+ShenandoahCASBarrier -XX:+ShenandoahCloneBarrier" + +echo +echo ------ +echo Hello World footprint: + +cat > Hello.java <&1 | grep "Tier4" + done +} + +echo +echo ------ +echo $* + +if [ "x" != "x$J_ML" ]; then + echo + echo "Mainline: Concurrent" + run_with $J_ML $OPTS + + echo + echo "Mainline: Passive, No barriers" + run_with $J_ML $OPTS_PASSIVE_NONE + + echo + echo "Mainline: Passive, All barriers" + run_with $J_ML $OPTS_PASSIVE_ALL +fi + +echo +echo "HP: Concurrent" +run_with $J_HP $OPTS + +echo +echo "HP: Passive, No barriers" +run_with $J_HP $OPTS_PASSIVE_NONE + +echo +echo "HP: Passive, All barriers" +run_with $J_HP $OPTS_PASSIVE_ALL diff a/run-renaissance.sh b/run-renaissance.sh --- /dev/null +++ b/run-renaissance.sh @@ -0,0 +1,86 @@ +#!/bin/bash + +# Run with e.g. ./run-renaissance.sh fj-kmeans -r 10 + +set -euo pipefail + +# Look around for release JDK image +J_HP= +if [ -d build/linux-x86_64-server-release/images/jdk/ ]; then + J_HP=build/linux-x86_64-server-release/images/jdk/bin/java +elif [ -d build/linux-aarch64-server-release/images/jdk/ ]; then + J_HP=build/linux-aarch64-server-release/images/jdk/bin/java +else + echo "Cannot find JDK" + exit 1 +fi + +J_ML= +if [ -d jdk-mainline/ ]; then + J_ML=jdk-mainline/bin/java +fi + +RNS=renaissance-gpl-0.16.1.jar +if [ ! -r $RNS ]; then + echo "Download Renaissance to $RNS" + exit 1 +fi +W="-jar $RNS $*" + + +OPTS="-XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions" + +# Only C2, only COH +OPTS="$OPTS -XX:-TieredCompilation -XX:+UseCompactObjectHeaders" + +# Heap config +OPTS="$OPTS -Xmx10g -Xms10g -XX:+UseTransparentHugePages -XX:+AlwaysPreTouch" + +# GC config +OPTS="$OPTS -XX:+UseShenandoahGC" + +# Mitigate code cache effects +OPTS="$OPTS -XX:ReservedCodeCacheSize=256M" + +OPTS_PASSIVE_NONE="$OPTS -XX:ShenandoahGCMode=passive" +OPTS_PASSIVE_ALL="$OPTS_PASSIVE_NONE -XX:+ShenandoahLoadRefBarrier -XX:+ShenandoahSATBBarrier -XX:+ShenandoahCASBarrier -XX:+ShenandoahCloneBarrier" + +run_with() { + P=$* + for I in `seq 1 3`; do + echo -n " run $I: " + $P $W 2>&1 | awk '/iteration (.*) completed/ { $s = $(NF-2); gsub(/\(/, "", $s); printf("%s ", int($s)); } END { print "" }' + done +} + +echo +echo ------ +echo $* + +echo +echo "Hotpatching: Concurrent" +run_with $J_HP $OPTS + +#echo +#echo "Hotpatching: Passive, No barriers" +#run_with $J_HP $OPTS_PASSIVE_NONE + +#echo +#echo "Hotpatching: Passive, All barriers" +#run_with $J_HP $OPTS_PASSIVE_ALL + +if [ "x" != "x$J_ML" ]; then + echo + echo "Mainline: Concurrent" + run_with $J_ML $OPTS + +# echo +# echo "Mainline: Passive, No barriers" +# run_with $J_ML $OPTS_PASSIVE_NONE + +# echo +# echo "Mainline: Passive, All barriers" +# run_with $J_ML $OPTS_PASSIVE_ALL +fi + + diff a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp --- a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp @@ -972,31 +972,89 @@ } void ShenandoahBarrierStubC2::enter_if_gc_state(MacroAssembler& masm, const char test_state, Register tmp) { Assembler::InlineSkippedInstructionsCounter skip_counter(&masm); PhaseOutput* const output = Compile::current()->output(); - Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(test_state))); - // We piggyback on scratch_emit_size mode to compute the slowpath stub size. - // We'll use that information to decide whether we need a far jump to the - // stub entry point or not. In scratch_emit_size mode we don't bind entry() - // because otherwise it will be rebound when we later emit the instructions - // for real. - if (_needs_far_jump) { - __ ldrb(tmp, gc_state_fast); - __ cbz(tmp, *continuation()); - __ b(output->in_scratch_emit_size() ? *continuation() : *entry()); + // Emit the unconditional branch in the first version of the method. + // Let the rest of runtime figure out how to manage it. + if (output->in_scratch_emit_size()) { + // We piggyback on scratch_emit_size mode to compute the slowpath stub size. + // We'll use that information to decide whether we need a far jump to the + // stub entry point or not. In scratch_emit_size mode we don't bind entry() + // because otherwise it will be rebound when we later emit the instructions + // for real. + __ nop(); } else { - __ ldrb(tmp, gc_state_fast); - __ cbnz(tmp, output->in_scratch_emit_size() ? *continuation() : *entry()); + __ relocate(barrier_Relocation::spec()); + __ b(*entry()); + +// #ifdef ASSERT +// Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(test_state))); +// __ ldrb(rscratch1, gc_state_fast); +// __ cbz(rscratch1, *continuation()); +// __ hlt(0); // Correctness bug: barrier is NOP-ed, but heap is NOT IDLE +// #endif } // This is were the slowpath stub will return to or the code above will // jump to if the checks are false __ bind(*continuation()); } +address ShenandoahBarrierSetAssembler::parse_stub_address(address pc) { + NativeInstruction* ni = nativeInstruction_at(pc); + assert(ni->is_jump(), "Initial code version: GC barrier fastpath must be a jump"); + NativeJump* jmp = nativeJump_at(pc); + return jmp->jump_destination(); +} + +void insert_nop(address pc) { + *(pc + 0) = 0x1F; + *(pc + 1) = 0x20; + *(pc + 2) = 0x03; + *(pc + 3) = 0xD5; + ICache::invalidate_range(pc, 4); +} + +bool is_nop(address pc) { + if (*(pc + 0) != 0x1F) return false; + if (*(pc + 1) != 0x20) return false; + if (*(pc + 2) != 0x03) return false; + if (*(pc + 3) != 0xD5) return false; + return true; +} + +void check_at(bool cond, address pc, const char* msg) { + assert(cond, "%s: at PC " PTR_FORMAT ": %02x%02x%02x%02x%02x", + msg, p2i(pc), *(pc + 0), *(pc + 1), *(pc + 2), *(pc + 3), *(pc + 4)); +} + +bool ShenandoahBarrierSetAssembler::is_active(address pc) { + NativeInstruction* ni = nativeInstruction_at(pc); + return ni->is_jump(); +} + +void ShenandoahBarrierSetAssembler::patch_branch_to_nop(address pc) { + NativeInstruction* ni = nativeInstruction_at(pc); + if (ni->is_jump()) { + insert_nop(pc); + } else { + check_at(is_nop(pc), pc, "Should already be nop"); + } +} + +void ShenandoahBarrierSetAssembler::patch_nop_to_branch(address pc, address stub_addr) { + NativeInstruction* ni = nativeInstruction_at(pc); + if (is_nop(pc)) { + NativeJump::insert(pc, stub_addr); + } else { + check_at(ni->is_jump(), pc, "Should already be jump"); + check_at(nativeJump_at(pc)->jump_destination() == stub_addr, pc, "Jump should be to the same address"); + } +} + void ShenandoahBarrierStubC2::emit_code(MacroAssembler& masm) { Assembler::InlineSkippedInstructionsCounter skip_counter(&masm); assert(_needs_keep_alive_barrier || _needs_load_ref_barrier, "Why are you here?"); PhaseOutput* const output = Compile::current()->output(); @@ -1017,11 +1075,11 @@ __ ldr(_obj, _addr); } } // If the object is null, there is no point in applying barriers. - maybe_far_jump_if_zero(masm, _obj); + maybe_far_jump_if_zero(masm, _obj, continuation()); // We need to make sure that loads done by callers survive across slow-path calls. // For self-loads, we need to care about the case when both KA and LRB are enabled (rare). bool needs_both_barriers = _needs_keep_alive_barrier && _needs_load_ref_barrier; if (!_do_load || needs_both_barriers) { @@ -1051,31 +1109,32 @@ } else { ShouldNotReachHere(); } } -void ShenandoahBarrierStubC2::maybe_far_jump_if_zero(MacroAssembler& masm, Register reg) { +void ShenandoahBarrierStubC2::maybe_far_jump_if_zero(MacroAssembler& masm, Register reg, Label* L_target) { if (_needs_far_jump) { Label L_short_jump; __ cbnz(reg, L_short_jump); - __ b(*continuation()); + __ b(*L_target); __ bind(L_short_jump); } else { - __ cbz(reg, *continuation()); + __ cbz(reg, *L_target); } } void ShenandoahBarrierStubC2::keepalive(MacroAssembler& masm, Label* L_done) { Address gcstate(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::MARKING))); Address index(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); Address buffer(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); Label L_through, L_slowpath; - // If another barrier is enabled as well, do a runtime check for a specific barrier. - if (_needs_load_ref_barrier) { - assert(L_done == nullptr, "L_done is always null when _needs_load_ref_barrier is true"); - __ ldrb(_tmp1, gcstate); + // Hotpatched GC checks only care about idle/non-idle state, so we need to check again here. + __ ldrb(_tmp1, gcstate); + if (L_done != nullptr) { + maybe_far_jump_if_zero(masm, _tmp1, L_done); + } else { __ cbz(_tmp1, L_through); } // Fast-path: put object into buffer. // If buffer is already full, go slow. @@ -1122,17 +1181,15 @@ } void ShenandoahBarrierStubC2::lrb(MacroAssembler& masm) { Label L_slow; - // If another barrier is enabled as well, do a runtime check for a specific barrier. - if (_needs_keep_alive_barrier) { - char state_to_check = ShenandoahHeap::HAS_FORWARDED | (_needs_load_ref_weak_barrier ? ShenandoahHeap::WEAK_ROOTS : 0); - Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(state_to_check))); - __ ldrb(_tmp1, gc_state_fast); - maybe_far_jump_if_zero(masm, _tmp1); - } + // Hotpatched GC checks only care about idle/non-idle state, so we need to check again here. + char state_to_check = ShenandoahHeap::HAS_FORWARDED | (_needs_load_ref_weak_barrier ? ShenandoahHeap::WEAK_ROOTS : 0); + Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(state_to_check))); + __ ldrb(_tmp1, gc_state_fast); + maybe_far_jump_if_zero(masm, _tmp1, continuation()); // If weak references are being processed, weak/phantom loads need to go slow, // regardless of their cset status. if (_needs_load_ref_weak_barrier) { Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::WEAK_ROOTS))); @@ -1163,11 +1220,11 @@ __ lea(_tmp1, ExternalAddress(AOTRuntimeConstants::cset_base_address())); __ ldr(_tmp1, Address(_tmp1)); __ add(_tmp1, _tmp1, _tmp2); } __ ldrb(_tmp1, Address(_tmp1, 0)); - maybe_far_jump_if_zero(masm, _tmp1); + maybe_far_jump_if_zero(masm, _tmp1, continuation()); // Slow path __ bind(L_slow); // Obj is the result, need to temporarily stop preserving it. diff a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp --- a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp @@ -85,10 +85,16 @@ void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm); void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, DecoratorSet decorators); #endif #ifdef COMPILER2 + // Barrier hotpatching + static address parse_stub_address(address pc); + static bool is_active(address pc); + static void patch_branch_to_nop(address pc); + static void patch_nop_to_branch(address pc, address stub_addr); + // Entry points from Matcher void load_c2(const MachNode* node, MacroAssembler* masm, Register dst, Address addr, Register tmp1, Register tmp2, bool is_narrow, bool is_acquire); void store_c2(const MachNode* node, MacroAssembler* masm, Address dst, bool dst_narrow, Register src, bool src_narrow, Register tmp1, Register tmp2, Register tmp3, bool is_volatile); void compare_and_set_c2(const MachNode* node, MacroAssembler* masm, Register res, Register addr, Register oldval, Register newval, Register tmp1, Register tmp2, Register tmp3, bool exchange, bool narrow, bool weak, bool acquire); diff a/src/hotspot/cpu/aarch64/nativeInst_aarch64.cpp b/src/hotspot/cpu/aarch64/nativeInst_aarch64.cpp --- a/src/hotspot/cpu/aarch64/nativeInst_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/nativeInst_aarch64.cpp @@ -206,10 +206,27 @@ //-------------------------------------------------------------------------------- void NativeJump::verify() { ; } +void NativeJump::insert(address code_pos, address entry) { + // Dispacement is relative to the jump instruction PC + intptr_t disp = (intptr_t)entry - ((intptr_t)code_pos); + + // The jump immediate is 26 bits and it will at execution time be scaled by 4 + int64_t imm26 = disp >> 2; + + // The farthest that we can jump is +/- 128MiB + guarantee(Assembler::is_simm(imm26, 26), "maximum offset is 128MiB, you asking for %ld", imm26); + + // Patch with opcode | offset + *((int32_t*)code_pos) = 0x14000000 | imm26; + + // Tell hardware to invalidate icache line containing code_pos + ICache::invalidate_range(code_pos, instruction_size); +} + address NativeJump::jump_destination() const { address dest = MacroAssembler::target_addr_for_insn(instruction_address()); // We use jump to self as the unresolved address which the inline // cache code (and relocs) know about diff a/src/hotspot/cpu/ppc/gc/shenandoah/shenandoahBarrierSetAssembler_ppc.cpp b/src/hotspot/cpu/ppc/gc/shenandoah/shenandoahBarrierSetAssembler_ppc.cpp --- a/src/hotspot/cpu/ppc/gc/shenandoah/shenandoahBarrierSetAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/gc/shenandoah/shenandoahBarrierSetAssembler_ppc.cpp @@ -1229,11 +1229,11 @@ __ ld(_obj, _addr.disp(), _addr.base()); } } // If the object is null, there is no point in applying barriers. - maybe_far_jump_if_zero(masm, _obj); + maybe_far_jump_if_zero(masm, _obj, continuation()); // We need to make sure that loads done by callers survive across slow-path calls. // For self-loads, we need to care about the case when both KA and LRB are enabled (rare). bool needs_both_barriers = _needs_keep_alive_barrier && _needs_load_ref_barrier; if (!_do_load || needs_both_barriers) { @@ -1252,27 +1252,28 @@ } else { ShouldNotReachHere(); } } -void ShenandoahBarrierStubC2::maybe_far_jump_if_zero(MacroAssembler& masm, Register reg) { +void ShenandoahBarrierStubC2::maybe_far_jump_if_zero(MacroAssembler& masm, Register reg, Label* L_target) { __ cmpdi(CR0, reg, 0); - // Branch to continuation if equal - __ bc_far_optimized(Assembler::bcondCRbiIs1, __ bi0(CR0, Assembler::equal), *continuation()); + // Branch to target if equal + __ bc_far_optimized(Assembler::bcondCRbiIs1, __ bi0(CR0, Assembler::equal), *L_target); } void ShenandoahBarrierStubC2::keepalive(MacroAssembler& masm, Label* L_done) { const int gcstate_offset = in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::MARKING)); const int index_offset = in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()); const int buffer_offset = in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()); Label L_through, L_slowpath; - // If another barrier is enabled as well, do a runtime check for a specific barrier. - if (_needs_load_ref_barrier) { - assert(L_done == nullptr, "L_done is always null when _needs_load_ref_barrier is true"); - __ lbz(_tmp1, gcstate_offset, R16_thread); - __ cmpdi(CR0, _tmp1, 0); + // Hotpatched GC checks only care about idle/non-idle state, so we need to check again here. + __ lbz(_tmp1, gcstate_offset, R16_thread); + __ cmpdi(CR0, _tmp1, 0); + if (L_done != nullptr) { + maybe_far_jump_if_zero(masm, _tmp1, L_done); + } else { __ beq(CR0, L_through); } // Fast-path: put object into buffer. // If buffer is already full, go slow. @@ -1318,16 +1319,14 @@ } void ShenandoahBarrierStubC2::lrb(MacroAssembler& masm) { Label L_slow; - // If another barrier is enabled as well, do a runtime check for a specific barrier. - if (_needs_keep_alive_barrier) { - char state_to_check = ShenandoahHeap::HAS_FORWARDED | (_needs_load_ref_weak_barrier ? ShenandoahHeap::WEAK_ROOTS : 0); - __ lbz(_tmp1, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(state_to_check)), R16_thread); - maybe_far_jump_if_zero(masm, _tmp1); - } + // Hotpatched GC checks only care about idle/non-idle state, so we need to check again here. + char state_to_check = ShenandoahHeap::HAS_FORWARDED | (_needs_load_ref_weak_barrier ? ShenandoahHeap::WEAK_ROOTS : 0); + __ lbz(_tmp1, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(state_to_check)), R16_thread); + maybe_far_jump_if_zero(masm, _tmp1, continuation()); // If weak references are being processed, weak/phantom loads need to go slow, // regardless of their cset status. if (_needs_load_ref_weak_barrier) { __ lbz(_tmp1, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::WEAK_ROOTS)), R16_thread); @@ -1342,11 +1341,11 @@ __ srdi(_tmp2, decoded, ShenandoahHeapRegion::region_size_bytes_shift_jint()); } else { __ srdi(_tmp2, _obj, ShenandoahHeapRegion::region_size_bytes_shift_jint()); } __ lbzx(_tmp2, _tmp2, _tmp1); - maybe_far_jump_if_zero(masm, _tmp2); + maybe_far_jump_if_zero(masm, _tmp2, continuation()); // Slow path __ bind(L_slow); // Obj is the result, need to temporarily stop preserving it. diff a/src/hotspot/cpu/ppc/gc/shenandoah/shenandoahBarrierSetAssembler_ppc.hpp b/src/hotspot/cpu/ppc/gc/shenandoah/shenandoahBarrierSetAssembler_ppc.hpp --- a/src/hotspot/cpu/ppc/gc/shenandoah/shenandoahBarrierSetAssembler_ppc.hpp +++ b/src/hotspot/cpu/ppc/gc/shenandoah/shenandoahBarrierSetAssembler_ppc.hpp @@ -131,10 +131,15 @@ virtual void try_peek_weak_handle_in_nmethod(MacroAssembler* masm, Register weak_handle, Register obj, Register tmp, Label& slow_path); #ifdef COMPILER2 + // Barrier hotpatching + static address parse_stub_address(address pc) { Unimplemented(); } + static void patch_branch_to_nop(address pc) { Unimplemented(); } + static void patch_nop_to_branch(address pc, address stub_addr) { Unimplemented(); } + // Entry points from Matcher void load_c2(const MachNode* node, MacroAssembler* masm, Register dst, Register addr, int disp, Register tmp1, Register tmp2, bool narrow, bool acquire); void store_c2(const MachNode* node, MacroAssembler* masm, Register dst, int disp, bool dst_narrow, Register src, bool src_narrow, Register tmp1, Register tmp2, Register tmp3); diff a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp --- a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp @@ -907,11 +907,11 @@ __ ld(_obj, _addr); } } // If the object is null, there is no point in applying barriers. - maybe_far_jump_if_zero(masm, _obj); + maybe_far_jump_if_zero(masm, _obj, continuation()); // We need to make sure that loads done by callers survive across slow-path calls. // For self-loads, we need to care about the case when both KA and LRB are enabled (rare). bool needs_both_barriers = _needs_keep_alive_barrier && _needs_load_ref_barrier; if (!_do_load || needs_both_barriers) { @@ -930,27 +930,28 @@ } else { ShouldNotReachHere(); } } -void ShenandoahBarrierStubC2::maybe_far_jump_if_zero(MacroAssembler& masm, Register reg) { +void ShenandoahBarrierStubC2::maybe_far_jump_if_zero(MacroAssembler& masm, Register reg, Label* L_target) { Label L_short_jump; __ bnez(reg, L_short_jump); - __ j(*continuation()); + __ j(*L_target); __ bind(L_short_jump); } void ShenandoahBarrierStubC2::keepalive(MacroAssembler& masm, Label* L_done) { Address index(xthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); Address buffer(xthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); Label L_through, L_slowpath; - // If another barrier is enabled as well, do a runtime check for a specific barrier. - if (_needs_load_ref_barrier) { - assert(L_done == nullptr, "L_done is always null when _needs_load_ref_barrier is true"); - Address gc_state_fast(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::MARKING))); - __ lbu(_tmp1, gc_state_fast); + // Hotpatched GC checks only care about idle/non-idle state, so we need to check again here. + Address gc_state_fast(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::MARKING))); + __ lbu(_tmp1, gc_state_fast); + if (L_done != nullptr) { + maybe_far_jump_if_zero(masm, _tmp1, L_done); + } else { __ beqz(_tmp1, L_through); } // Fast-path: put object into buffer. // If buffer is already full, go slow. @@ -996,17 +997,15 @@ } void ShenandoahBarrierStubC2::lrb(MacroAssembler& masm) { Label L_slow; - // If another barrier is enabled as well, do a runtime check for a specific barrier. - if (_needs_keep_alive_barrier) { - char state_to_check = ShenandoahHeap::HAS_FORWARDED | (_needs_load_ref_weak_barrier ? ShenandoahHeap::WEAK_ROOTS : 0); - Address gc_state_fast(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(state_to_check))); - __ lbu(_tmp1, gc_state_fast); - maybe_far_jump_if_zero(masm, _tmp1); - } + // Hotpatched GC checks only care about idle/non-idle state, so we need to check again here. + char state_to_check = ShenandoahHeap::HAS_FORWARDED | (_needs_load_ref_weak_barrier ? ShenandoahHeap::WEAK_ROOTS : 0); + Address gc_state_fast(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(state_to_check))); + __ lbu(_tmp1, gc_state_fast); + maybe_far_jump_if_zero(masm, _tmp1, continuation()); // If weak references are being processed, weak/phantom loads need to go slow, // regardless of their cset status. if (_needs_load_ref_weak_barrier) { Address gc_state_fast(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::WEAK_ROOTS))); @@ -1023,11 +1022,11 @@ __ mv(_tmp1, ShenandoahHeap::in_cset_fast_test_addr()); __ srli(_tmp2, _tmp2, ShenandoahHeapRegion::region_size_bytes_shift_jint()); __ add(_tmp1, _tmp1, _tmp2); __ lbu(_tmp1, Address(_tmp1, 0)); - maybe_far_jump_if_zero(masm, _tmp1); + maybe_far_jump_if_zero(masm, _tmp1, continuation()); // Slow path __ bind(L_slow); // Obj is the result, need to temporarily stop preserving it. diff a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp --- a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp @@ -90,10 +90,15 @@ void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm); void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, DecoratorSet decorators); #endif #ifdef COMPILER2 + // Barrier hotpatching + static address parse_stub_address(address pc) { Unimplemented(); } + static void patch_branch_to_nop(address pc) { Unimplemented(); } + static void patch_nop_to_branch(address pc, address stub_addr) { Unimplemented(); } + // Entry points from Matcher void load_c2(const MachNode* node, MacroAssembler* masm, Register dst, Address addr, Register tmp1, Register tmp2, bool is_narrow); void store_c2(const MachNode* node, MacroAssembler* masm, Address dst, bool dst_narrow, Register src, bool src_narrow, Register tmp1, Register tmp2, Register tmp3); void compare_and_set_c2(const MachNode* node, MacroAssembler* masm, Register res, Register addr, Register oldval, diff a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp --- a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp @@ -32,10 +32,11 @@ #include "gc/shenandoah/shenandoahHeap.inline.hpp" #include "gc/shenandoah/shenandoahHeapRegion.hpp" #include "gc/shenandoah/shenandoahRuntime.hpp" #include "gc/shenandoah/shenandoahThreadLocalData.hpp" #include "interpreter/interpreter.hpp" +#include "nativeInst_x86.hpp" #include "runtime/javaThread.hpp" #include "runtime/sharedRuntime.hpp" #include "utilities/macros.hpp" #ifdef COMPILER1 #include "c1/c1_LIRAssembler.hpp" @@ -1019,16 +1020,80 @@ } void ShenandoahBarrierStubC2::enter_if_gc_state(MacroAssembler& masm, const char test_state, Register tmp) { Assembler::InlineSkippedInstructionsCounter skip_counter(&masm); + // Emit the unconditional branch in the first version of the method. + // Let the rest of runtime figure out how to manage it. + __ relocate(barrier_Relocation::spec()); + __ jmp(*entry(), /* maybe_short = */ false); + +#ifdef ASSERT Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(test_state))); __ cmpb(gc_state_fast, 0); - __ jcc(Assembler::notEqual, *entry()); + __ jccb(Assembler::zero, *continuation()); + __ hlt(); // Correctness bug: barrier is NOP-ed, but heap is NOT IDLE +#endif + // TODO: When barriers are consistently turned off at the end of the cycle, assert that barrier is NOP-ed. + __ bind(*continuation()); } +address ShenandoahBarrierSetAssembler::parse_stub_address(address pc) { + NativeInstruction* ni = nativeInstruction_at(pc); + assert(ni->is_jump(), "Initial code version: GC barrier fastpath must be a jump"); + NativeJump* jmp = nativeJump_at(pc); + return jmp->jump_destination(); +} + +void insert_5_byte_nop(address pc) { + *(pc + 0) = 0x0F; + *(pc + 1) = 0x1F; + *(pc + 2) = 0x44; + *(pc + 3) = 0x00; + *(pc + 4) = 0x00; + ICache::invalidate_range(pc, 5); +} + +bool is_5_byte_nop(address pc) { + if (*(pc + 0) != 0x0F) return false; + if (*(pc + 1) != 0x1F) return false; + if (*(pc + 2) != 0x44) return false; + if (*(pc + 3) != 0x00) return false; + if (*(pc + 4) != 0x00) return false; + return true; +} + +void check_at(bool cond, address pc, const char* msg) { + assert(cond, "%s: at PC " PTR_FORMAT ": %02x%02x%02x%02x%02x", + msg, p2i(pc), *(pc + 0), *(pc + 1), *(pc + 2), *(pc + 3), *(pc + 4)); +} + +bool ShenandoahBarrierSetAssembler::is_active(address pc) { + NativeInstruction* ni = nativeInstruction_at(pc); + return ni->is_jump(); +} + +void ShenandoahBarrierSetAssembler::patch_branch_to_nop(address pc) { + NativeInstruction* ni = nativeInstruction_at(pc); + if (ni->is_jump()) { + insert_5_byte_nop(pc); + } else { + check_at(is_5_byte_nop(pc), pc, "Should already be nop"); + } +} + +void ShenandoahBarrierSetAssembler::patch_nop_to_branch(address pc, address stub_addr) { + NativeInstruction* ni = nativeInstruction_at(pc); + if (is_5_byte_nop(pc)) { + NativeJump::insert(pc, stub_addr); + } else { + check_at(ni->is_jump(), pc, "Should already be jump"); + check_at(nativeJump_at(pc)->jump_destination() == stub_addr, pc, "Jump should be to the same address"); + } +} + void ShenandoahBarrierStubC2::emit_code(MacroAssembler& masm) { Assembler::InlineSkippedInstructionsCounter skip_counter(&masm); assert(_needs_keep_alive_barrier || _needs_load_ref_barrier, "Why are you here?"); // On x86, there is a significant penalty with unaligned branch target, for example @@ -1045,11 +1110,11 @@ __ movq(_obj, _addr); } } // If the object is null, there is no point in applying barriers. - maybe_far_jump_if_zero(masm, _obj); + maybe_far_jump_if_zero(masm, _obj, continuation()); // We need to make sure that loads done by callers survive across slow-path calls. // For self-loads, we need to care about the case when both KA and LRB are enabled (rare). bool needs_both_barriers = _needs_keep_alive_barrier && _needs_load_ref_barrier; if (!_do_load || needs_both_barriers) { @@ -1075,14 +1140,15 @@ Address index(r15_thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); Address buffer(r15_thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); Label L_through, L_pop_and_slow; - // If another barrier is enabled as well, do a runtime check for a specific barrier. - if (_needs_load_ref_barrier) { - assert(L_done == nullptr, "L_done is always null when _needs_load_ref_barrier is true"); - __ cmpb(gc_state_fast, 0); + // Hotpatched GC checks only care about idle/non-idle state, so we need to check again here. + __ cmpb(gc_state_fast, 0); + if (L_done != nullptr) { + __ jcc(Assembler::equal, *L_done); + } else { __ jcc(Assembler::equal, L_through); } // Need temp to work, allocate one now. bool tmp_live; @@ -1148,17 +1214,15 @@ } void ShenandoahBarrierStubC2::lrb(MacroAssembler& masm) { Label L_pop_and_slow, L_slow; - // If another barrier is enabled as well, do a runtime check for a specific barrier. - if (_needs_keep_alive_barrier) { - char state_to_check = ShenandoahHeap::HAS_FORWARDED | (_needs_load_ref_weak_barrier ? ShenandoahHeap::WEAK_ROOTS : 0); - Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(state_to_check))); - __ cmpb(gc_state_fast, 0); - __ jcc(Assembler::equal, *continuation()); - } + // Hotpatched GC checks only care about idle/non-idle state, so we need to check again here. + char state_to_check = ShenandoahHeap::HAS_FORWARDED | (_needs_load_ref_weak_barrier ? ShenandoahHeap::WEAK_ROOTS : 0); + Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(state_to_check))); + __ cmpb(gc_state_fast, 0); + __ jcc(Assembler::equal, *continuation()); // If weak references are being processed, weak/phantom loads need to go slow, // regardless of their cset status. if (_needs_load_ref_weak_barrier) { Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::WEAK_ROOTS))); @@ -1288,15 +1352,15 @@ void ShenandoahBarrierStubC2::post_init() { // Do nothing. } -void ShenandoahBarrierStubC2::maybe_far_jump_if_zero(MacroAssembler& masm, Register reg) { +void ShenandoahBarrierStubC2::maybe_far_jump_if_zero(MacroAssembler& masm, Register reg, Label* L_target) { if (_narrow) { __ testl(reg, reg); } else { __ testq(reg, reg); } - __ jcc(Assembler::zero, *continuation()); + __ jcc(Assembler::zero, *L_target); } #endif // COMPILER2 diff a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp --- a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp @@ -81,10 +81,16 @@ void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm); void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, DecoratorSet decorators); #endif #ifdef COMPILER2 + // Barrier hotpatching + static address parse_stub_address(address pc); + static bool is_active(address pc); + static void patch_branch_to_nop(address pc); + static void patch_nop_to_branch(address pc, address stub_addr); + // Entry points from Matcher void load_c2(const MachNode* node, MacroAssembler* masm, Register dst, Address src, bool narrow); void store_c2(const MachNode* node, MacroAssembler* masm, Address dst, bool dst_narrow, Register src, bool src_narrow, Register tmp); void compare_and_set_c2(const MachNode* node, MacroAssembler* masm, Register res, Address addr, Register oldval, Register newval, diff a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp --- a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp +++ b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp @@ -168,11 +168,11 @@ int available_gp_registers(); bool is_live_register(Register reg); bool is_special_register(Register reg); Register select_temp_register(bool& selected_live, Register skip_reg1 = noreg, Register skip_reg2 = noreg); - void maybe_far_jump_if_zero(MacroAssembler& masm, Register reg); + void maybe_far_jump_if_zero(MacroAssembler& masm, Register reg, Label* L_target); void enter_if_gc_state(MacroAssembler& masm, const char test_state, Register tmp); void keepalive(MacroAssembler& masm, Label* L_done); void lrb(MacroAssembler& masm); diff a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp --- a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp @@ -157,10 +157,25 @@ FLAG_SET_DEFAULT(UseCountedLoopSafepoints, true); if (FLAG_IS_DEFAULT(LoopStripMiningIter)) { FLAG_SET_DEFAULT(LoopStripMiningIter, 1000); } } + + if (NMethodRelocation) { + warning("NMethod relocation is not supported with hotpatching yet"); + FLAG_SET_DEFAULT(NMethodRelocation, false); + } + + if (HotCodeHeap) { + warning("Hot code heap is not supported with hotpatching yet"); + FLAG_SET_DEFAULT(HotCodeHeap, false); + } + + if (HotCodeHeapSize) { + warning("Hot code heap is not supported with hotpatching yet"); + FLAG_SET_DEFAULT(HotCodeHeapSize, 0); + } #endif // COMPILER2 // Record more information about previous cycles for improved debugging pleasure if (FLAG_IS_DEFAULT(LogEventsBufferEntries)) { FLAG_SET_DEFAULT(LogEventsBufferEntries, 250); diff a/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.cpp b/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.cpp --- a/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.cpp @@ -173,10 +173,13 @@ ShenandoahKeepAliveClosure oops; StackWatermarkSet::finish_processing(JavaThread::cast(thread), &oops, StackWatermarkKind::gc); } else if (_heap->is_concurrent_weak_root_in_progress() && _heap->is_evacuation_in_progress()) { ShenandoahContextEvacuateUpdateRootsClosure oops; StackWatermarkSet::finish_processing(JavaThread::cast(thread), &oops, StackWatermarkKind::gc); + } else { + ShenandoahNoOpClosure oops; + StackWatermarkSet::finish_processing(JavaThread::cast(thread), &oops, StackWatermarkKind::gc); } } } void ShenandoahBarrierSet::write_ref_array(HeapWord* start, size_t count) { diff a/src/hotspot/share/gc/shenandoah/shenandoahBarrierSetNMethod.cpp b/src/hotspot/share/gc/shenandoah/shenandoahBarrierSetNMethod.cpp --- a/src/hotspot/share/gc/shenandoah/shenandoahBarrierSetNMethod.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahBarrierSetNMethod.cpp @@ -70,8 +70,8 @@ // CodeCache unloading support nm->mark_as_maybe_on_stack(); // Disarm - ShenandoahNMethod::disarm_nmethod(nm); + ShenandoahNMethod::disarm_nmethod_unlocked(nm); return true; } diff a/src/hotspot/share/gc/shenandoah/shenandoahClosures.hpp b/src/hotspot/share/gc/shenandoah/shenandoahClosures.hpp --- a/src/hotspot/share/gc/shenandoah/shenandoahClosures.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahClosures.hpp @@ -239,10 +239,16 @@ // // ========= Utilities // +class ShenandoahNoOpClosure : public OopClosure { +public: + inline void do_oop(oop* p) { } + inline void do_oop(narrowOop* p) { } +}; + #ifdef ASSERT class ShenandoahAssertNotForwardedClosure : public OopClosure { private: template inline void do_oop_work(T* p); diff a/src/hotspot/share/gc/shenandoah/shenandoahCodeRoots.cpp b/src/hotspot/share/gc/shenandoah/shenandoahCodeRoots.cpp --- a/src/hotspot/share/gc/shenandoah/shenandoahCodeRoots.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahCodeRoots.cpp @@ -52,10 +52,21 @@ assert_locked_or_safepoint(CodeCache_lock); _nmethod_table->unregister_nmethod(nm); } void ShenandoahCodeRoots::arm_nmethods() { + char gc_state = ShenandoahHeap::heap()->gc_state(); + log_info(gc)("Arming nmethods with GC state: %d [%s%s%s%s%s%s%s]", + gc_state, + ((gc_state & ShenandoahHeap::HAS_FORWARDED) > 0) ? "HAS_FORWARDED " : "", + ((gc_state & ShenandoahHeap::MARKING) > 0) ? "MARKING " : "", + ((gc_state & ShenandoahHeap::EVACUATION) > 0) ? "EVACUATION " : "", + ((gc_state & ShenandoahHeap::UPDATE_REFS) > 0) ? "UPDATE_REFS " : "", + ((gc_state & ShenandoahHeap::WEAK_ROOTS) > 0) ? "WEAK_ROOTS " : "", + ((gc_state & ShenandoahHeap::YOUNG_MARKING) > 0) ? "YOUNG_MARKING " : "", + ((gc_state & ShenandoahHeap::OLD_MARKING) > 0) ? "OLD_MARKING " : "" + ); BarrierSet::barrier_set()->barrier_set_nmethod()->arm_all_nmethods(); } class ShenandoahDisarmNMethodClosure : public NMethodClosure { public: @@ -70,25 +81,54 @@ ShenandoahConcurrentNMethodIterator _iterator; public: ShenandoahDisarmNMethodsTask() : WorkerTask("Shenandoah Disarm NMethods"), - _iterator(ShenandoahCodeRoots::table()) { - assert(SafepointSynchronize::is_at_safepoint(), "Only at a safepoint"); - } + _iterator(ShenandoahCodeRoots::table()) {} virtual void work(uint worker_id) { - ShenandoahParallelWorkerSession worker_session(worker_id); _iterator.nmethods_do(&_cl); } }; void ShenandoahCodeRoots::disarm_nmethods() { ShenandoahDisarmNMethodsTask task; ShenandoahHeap::heap()->workers()->run_task(&task); } +#ifdef ASSERT +class ShenandoahCheckNMethodClosure : public NMethodClosure { + bool const _armed; +public: + ShenandoahCheckNMethodClosure(int armed) : _armed(armed) {} + virtual void do_nmethod(nmethod* nm) { + ShenandoahNMethod::assert_barriers(nm, _armed); + } +}; + +class ShenandoahCheckNMethodsTask : public WorkerTask { +private: + ShenandoahCheckNMethodClosure _cl; + ShenandoahConcurrentNMethodIterator _iterator; + +public: + ShenandoahCheckNMethodsTask() : + WorkerTask("Shenandoah Check NMethods"), + _cl(!ShenandoahHeap::heap()->is_idle()), + _iterator(ShenandoahCodeRoots::table()) {} + + virtual void work(uint worker_id) { + _iterator.nmethods_do(&_cl); + } +}; + +void ShenandoahCodeRoots::check_barriers() { + ShenandoahCheckNMethodsTask task; + ShenandoahHeap::heap()->workers()->run_task(&task); +} +#endif + class ShenandoahNMethodUnlinkClosure : public NMethodClosure { private: bool _unloading_occurred; ShenandoahHeap* const _heap; BarrierSetNMethod* const _bs; diff a/src/hotspot/share/gc/shenandoah/shenandoahCodeRoots.hpp b/src/hotspot/share/gc/shenandoah/shenandoahCodeRoots.hpp --- a/src/hotspot/share/gc/shenandoah/shenandoahCodeRoots.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahCodeRoots.hpp @@ -67,10 +67,12 @@ // Concurrent nmethod unloading support static void unlink(WorkerThreads* workers, bool unloading_occurred); static void purge(); static void arm_nmethods(); static void disarm_nmethods(); + static void check_barriers() NOT_DEBUG_RETURN; + static int disarmed_value() { return _disarmed_value; } static int* disarmed_value_address() { return &_disarmed_value; } private: static ShenandoahNMethodTable* _nmethod_table; diff a/src/hotspot/share/gc/shenandoah/shenandoahConcurrentGC.cpp b/src/hotspot/share/gc/shenandoah/shenandoahConcurrentGC.cpp --- a/src/hotspot/share/gc/shenandoah/shenandoahConcurrentGC.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahConcurrentGC.cpp @@ -255,13 +255,12 @@ return false; } // In normal cycle, final-update-refs would verify at the end of the cycle. // In abbreviated cycle, we need to verify separately. - if (ShenandoahVerify) { - vmop_entry_final_verify(); - } + // TODO: This is now also puts the barriers down at the end of the cycle. Refine. + vmop_entry_final_verify(); } // We defer generation resizing actions until after cset regions have been recycled. We do this even following an // abbreviated cycle. if (heap->mode()->is_generational()) { @@ -695,10 +694,13 @@ assert(_generation->is_bitmap_clear(), "need clear marking bitmap"); assert(!_generation->is_mark_complete(), "should not be complete"); assert(!heap->has_forwarded_objects(), "No forwarded objects on this path"); + // First pause in cycle, check that barriers were not left enabled. + ShenandoahCodeRoots::check_barriers(); + if (heap->mode()->is_generational()) { if (_generation->is_global()) { heap->old_generation()->cancel_gc(); } @@ -1040,11 +1042,11 @@ void do_nmethod(nmethod* n) { ShenandoahNMethod* data = ShenandoahNMethod::gc_data(n); ShenandoahNMethodLocker locker(data->lock()); data->oops_do(&_cl, /* fix_relocations = */ true); - ShenandoahNMethod::disarm_nmethod(n); + ShenandoahNMethod::disarm_nmethod_unlocked(n); } }; class ShenandoahConcurrentRootsEvacUpdateTask : public WorkerTask { private: @@ -1227,10 +1229,16 @@ } heap->rebuild_free_set(true /*concurrent*/); _generation->heuristics()->start_idle_span(); + { + // Final pause: update GC barriers to idle state. + ShenandoahCodeRoots::arm_nmethods(); + ShenandoahStackWatermark::change_epoch_id(); + } + { ShenandoahTimingsTracker timing(ShenandoahPhaseTimings::final_update_refs_propagate_gc_state); heap->propagate_gc_state_to_all_threads(); } } @@ -1256,13 +1264,18 @@ heap->concurrent_final_roots(); return true; } void ShenandoahConcurrentGC::op_verify_final() { - assert(ShenandoahVerify, "Should have been checked before"); ShenandoahHeap* const heap = ShenandoahHeap::heap(); - heap->verifier()->verify_after_gc(_generation); + if (ShenandoahVerify) { + heap->verifier()->verify_after_gc(_generation); + } + + // Final pause, arm the nmethods to put barriers down. + ShenandoahCodeRoots::arm_nmethods(); + ShenandoahStackWatermark::change_epoch_id(); } void ShenandoahConcurrentGC::op_cleanup_complete() { ShenandoahWorkerScope scope(ShenandoahHeap::heap()->workers(), ShenandoahWorkerPolicy::calc_workers_for_conc_cleanup(), @@ -1273,10 +1286,16 @@ void ShenandoahConcurrentGC::op_reset_after_collect() { ShenandoahWorkerScope scope(ShenandoahHeap::heap()->workers(), ShenandoahWorkerPolicy::calc_workers_for_conc_reset(), "reset after collection."); + // Final concurrent phase: complete disabling all barriers. + ShenandoahCodeRoots::disarm_nmethods(); + + // Check that barriers were not left enabled. + ShenandoahCodeRoots::check_barriers(); + ShenandoahHeap* const heap = ShenandoahHeap::heap(); if (heap->mode()->is_generational()) { // If we are in the midst of an old gc bootstrap or an old marking, we want to leave the mark bit map of // the young generation intact. In particular, reference processing in the old generation may potentially // need the reachability of a young generation referent of a Reference object in the old generation. diff a/src/hotspot/share/gc/shenandoah/shenandoahDegeneratedGC.cpp b/src/hotspot/share/gc/shenandoah/shenandoahDegeneratedGC.cpp --- a/src/hotspot/share/gc/shenandoah/shenandoahDegeneratedGC.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahDegeneratedGC.cpp @@ -34,10 +34,11 @@ #include "gc/shenandoah/shenandoahHeap.inline.hpp" #include "gc/shenandoah/shenandoahMetrics.hpp" #include "gc/shenandoah/shenandoahMonitoringSupport.hpp" #include "gc/shenandoah/shenandoahOldGeneration.hpp" #include "gc/shenandoah/shenandoahRootProcessor.inline.hpp" +#include "gc/shenandoah/shenandoahStackWatermark.hpp" #include "gc/shenandoah/shenandoahSTWMark.hpp" #include "gc/shenandoah/shenandoahUtils.hpp" #include "gc/shenandoah/shenandoahVerifier.hpp" #include "gc/shenandoah/shenandoahVMOperations.hpp" #include "gc/shenandoah/shenandoahWorkerPolicy.hpp" @@ -298,12 +299,12 @@ op_update_refs(); op_update_roots(); assert(!heap->cancelled_gc(), "STW reference update can not OOM"); } - // Disarm nmethods that armed in concurrent cycle. - // In above case, update roots should disarm them + // Leaving degenerated GC, we need to flip barriers back to idle. + ShenandoahCodeRoots::arm_nmethods(); ShenandoahCodeRoots::disarm_nmethods(); op_cleanup_complete(); if (heap->mode()->is_generational()) { diff a/src/hotspot/share/gc/shenandoah/shenandoahFullGC.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFullGC.cpp --- a/src/hotspot/share/gc/shenandoah/shenandoahFullGC.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFullGC.cpp @@ -50,10 +50,11 @@ #include "gc/shenandoah/shenandoahMetrics.hpp" #include "gc/shenandoah/shenandoahMonitoringSupport.hpp" #include "gc/shenandoah/shenandoahPhaseTimings.hpp" #include "gc/shenandoah/shenandoahReferenceProcessor.hpp" #include "gc/shenandoah/shenandoahRootProcessor.inline.hpp" +#include "gc/shenandoah/shenandoahStackWatermark.hpp" #include "gc/shenandoah/shenandoahSTWMark.hpp" #include "gc/shenandoah/shenandoahUtils.hpp" #include "gc/shenandoah/shenandoahVerifier.hpp" #include "gc/shenandoah/shenandoahVMOperations.hpp" #include "gc/shenandoah/shenandoahWorkerPolicy.hpp" @@ -126,10 +127,14 @@ // Regardless if progress was made, we record that we completed a "successful" full GC. _generation->heuristics()->record_success_full(); heap->shenandoah_policy()->record_success_full(); + // Leaving full GC, we need to flip barriers back to idle. + ShenandoahCodeRoots::arm_nmethods(); + ShenandoahCodeRoots::disarm_nmethods(); + { ShenandoahTimingsTracker timing(ShenandoahPhaseTimings::full_gc_propagate_gc_state); heap->propagate_gc_state_to_all_threads(); } } diff a/src/hotspot/share/gc/shenandoah/shenandoahNMethod.cpp b/src/hotspot/share/gc/shenandoah/shenandoahNMethod.cpp --- a/src/hotspot/share/gc/shenandoah/shenandoahNMethod.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahNMethod.cpp @@ -22,19 +22,20 @@ * questions. * */ +#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" #include "gc/shenandoah/shenandoahClosures.inline.hpp" #include "gc/shenandoah/shenandoahHeap.inline.hpp" #include "gc/shenandoah/shenandoahNMethod.inline.hpp" #include "memory/resourceArea.hpp" #include "runtime/continuation.hpp" #include "runtime/safepointVerifiers.hpp" -ShenandoahNMethod::ShenandoahNMethod(nmethod* nm, GrowableArray& oops, bool non_immediate_oops) : - _nm(nm), _oops(nullptr), _oops_count(0), _unregistered(false), _lock(), _ic_lock() { +ShenandoahNMethod::ShenandoahNMethod(nmethod* nm, GrowableArray& oops, bool non_immediate_oops, GrowableArray& barriers) : + _nm(nm), _oops(nullptr), _oops_count(0), _barriers(nullptr), _barriers_count(0), _unregistered(false), _lock(), _ic_lock() { if (!oops.is_empty()) { _oops_count = oops.length(); _oops = NEW_C_HEAP_ARRAY(oop*, _oops_count, mtGC); for (int c = 0; c < _oops_count; c++) { @@ -42,24 +43,36 @@ } } _has_non_immed_oops = non_immediate_oops; assert_same_oops(); + + if (!barriers.is_empty()) { + _barriers_count = barriers.length(); + _barriers = NEW_C_HEAP_ARRAY(ShenandoahNMethodBarrier, _barriers_count, mtGC); + for (int c = 0; c < _barriers_count; c++) { + _barriers[c] = barriers.at(c); + } + } } ShenandoahNMethod::~ShenandoahNMethod() { if (_oops != nullptr) { FREE_C_HEAP_ARRAY(_oops); } + if (_barriers != nullptr) { + FREE_C_HEAP_ARRAY(_barriers); + } } void ShenandoahNMethod::update() { ResourceMark rm; bool non_immediate_oops = false; GrowableArray oops; + GrowableArray barriers; - detect_reloc_oops(nm(), oops, non_immediate_oops); + parse(nm(), oops, non_immediate_oops, barriers); if (oops.length() != _oops_count) { if (_oops != nullptr) { FREE_C_HEAP_ARRAY(_oops); _oops = nullptr; } @@ -76,48 +89,66 @@ _has_non_immed_oops = non_immediate_oops; assert_same_oops(); } -void ShenandoahNMethod::detect_reloc_oops(nmethod* nm, GrowableArray& oops, bool& has_non_immed_oops) { +void ShenandoahNMethod::parse(nmethod* nm, GrowableArray& oops, bool& has_non_immed_oops, GrowableArray& barriers) { has_non_immed_oops = false; - // Find all oops relocations RelocIterator iter(nm); while (iter.next()) { - if (iter.type() != relocInfo::oop_type) { - // Not an oop - continue; - } - - oop_Relocation* r = iter.oop_reloc(); - if (!r->oop_is_immediate()) { - // Non-immediate oop found - has_non_immed_oops = true; - continue; - } - - oop value = r->oop_value(); - if (value != nullptr) { - oop* addr = r->oop_addr(); - shenandoah_assert_correct(addr, value); - shenandoah_assert_not_in_cset_except(addr, value, ShenandoahHeap::heap()->cancelled_gc()); - shenandoah_assert_not_forwarded(addr, value); - // Non-null immediate oop found. null oops can safely be - // ignored since the method will be re-registered if they - // are later patched to be non-null. - oops.push(addr); + switch (iter.type()) { + case relocInfo::oop_type: { + oop_Relocation* r = iter.oop_reloc(); + if (!r->oop_is_immediate()) { + // Non-immediate oop found + has_non_immed_oops = true; + break; + } + + oop value = r->oop_value(); + if (value != nullptr) { + oop* addr = r->oop_addr(); + shenandoah_assert_correct(addr, value); + shenandoah_assert_not_in_cset_except(addr, value, ShenandoahHeap::heap()->cancelled_gc()); + shenandoah_assert_not_forwarded(addr, value); + // Non-null immediate oop found. null oops can safely be + // ignored since the method will be re-registered if they + // are later patched to be non-null. + oops.push(addr); + } + break; + } +#ifdef COMPILER2 + case relocInfo::barrier_type: { + barrier_Relocation* r = iter.barrier_reloc(); + + ShenandoahNMethodBarrier b; + b._pc = r->addr(); + // TODO: Parsing the stub address from generated code is kludgy. It also does not work + // with nmethod relocation, that can copy the nmethod body with barriers already nop-ped out. + b._stub_addr = ShenandoahBarrierSetAssembler::parse_stub_address(b._pc); + // TODO next step: Figure out which GC state we care about in at this fastpath check: + // b._gc_state_fast_bit = r->format(); + barriers.push(b); + break; + } +#endif + default: + // We do not care about other relocations. + break; } } } ShenandoahNMethod* ShenandoahNMethod::for_nmethod(nmethod* nm) { ResourceMark rm; bool non_immediate_oops = false; GrowableArray oops; + GrowableArray barriers; - detect_reloc_oops(nm, oops, non_immediate_oops); - return new ShenandoahNMethod(nm, oops, non_immediate_oops); + parse(nm, oops, non_immediate_oops, barriers); + return new ShenandoahNMethod(nm, oops, non_immediate_oops, barriers); } void ShenandoahNMethod::heal_nmethod(nmethod* nm) { ShenandoahNMethod* data = gc_data(nm); assert(data != nullptr, "Sanity"); @@ -135,11 +166,38 @@ // In this case, concurrent root phase is skipped and degenerated GC should be // followed, where nmethods are disarmed. } } +void ShenandoahNMethod::update_barriers() { +#ifdef COMPILER2 + ShenandoahHeap* heap = ShenandoahHeap::heap(); + + for (int c = 0; c < _barriers_count; c++) { + address pc = _barriers[c]._pc; + address stub_addr = _barriers[c]._stub_addr; + if (heap->is_idle()) { + ShenandoahBarrierSetAssembler::patch_branch_to_nop(pc); + } else { + ShenandoahBarrierSetAssembler::patch_nop_to_branch(pc, stub_addr); + } + } +#endif +} + #ifdef ASSERT +void ShenandoahNMethod::assert_barriers(nmethod* nm, bool expected) { +#ifdef COMPILER2 + ShenandoahNMethod* snm = gc_data(nm); + for (int c = 0; c < snm->_barriers_count; c++) { + address pc = snm->_barriers[c]._pc; + bool actual = ShenandoahBarrierSetAssembler::is_active(pc); + assert(expected == actual, "armed expected: %s, actual: %s", BOOL_TO_STR(expected), BOOL_TO_STR(actual)); + } +#endif +} + void ShenandoahNMethod::assert_correct() { ShenandoahHeap* heap = ShenandoahHeap::heap(); for (int c = 0; c < _oops_count; c++) { oop *loc = _oops[c]; assert(_nm->code_contains((address) loc) || _nm->oops_contains(loc), "nmethod should contain the oop*"); @@ -203,12 +261,13 @@ debug_stream.print_cr("recorded oops: %d", _oops_count); for (int i = 0; i < _oops_count; i++) { debug_stream.print_cr("-> " PTR_FORMAT, p2i(_oops[i])); } GrowableArray check; + GrowableArray barriers; bool non_immed; - detect_reloc_oops(nm(), check, non_immed); + parse(nm(), check, non_immed, barriers); debug_stream.print_cr("check oops: %d", check.length()); for (int i = 0; i < check.length(); i++) { debug_stream.print_cr("-> " PTR_FORMAT, p2i(check.at(i))); } fatal("Must match #detected: %d, #recorded: %d, #total: %d, begin: " PTR_FORMAT ", end: " PTR_FORMAT "\n%s", @@ -240,19 +299,21 @@ assert(nm == data->nm(), "Must be same nmethod"); // Prevent updating a nmethod while concurrent iteration is in progress. wait_until_concurrent_iteration_done(); ShenandoahNMethodLocker data_locker(data->lock()); data->update(); + data->update_barriers(); } else { // For a new nmethod, we can safely append it to the list, because // concurrent iteration will not touch it. data = ShenandoahNMethod::for_nmethod(nm); assert(data != nullptr, "Sanity"); ShenandoahNMethod::attach_gc_data(nm, data); ShenandoahLocker locker(&_lock); log_register_nmethod(nm); append(data); + data->update_barriers(); } // Disarm new nmethod ShenandoahNMethod::disarm_nmethod(nm); } diff a/src/hotspot/share/gc/shenandoah/shenandoahNMethod.hpp b/src/hotspot/share/gc/shenandoah/shenandoahNMethod.hpp --- a/src/hotspot/share/gc/shenandoah/shenandoahNMethod.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahNMethod.hpp @@ -35,53 +35,64 @@ // Use ShenandoahReentrantLock as ShenandoahNMethodLock typedef ShenandoahReentrantLock ShenandoahNMethodLock; typedef ShenandoahLocker ShenandoahNMethodLocker; +struct ShenandoahNMethodBarrier { + address _pc; + address _stub_addr; +}; + // ShenandoahNMethod tuple records the internal locations of oop slots within reclocation stream in // the nmethod. This allows us to quickly scan the oops without doing the nmethod-internal scans, // that sometimes involves parsing the machine code. Note it does not record the oops themselves, // because it would then require handling these tuples as the new class of roots. class ShenandoahNMethod : public CHeapObj { private: nmethod* const _nm; oop** _oops; int _oops_count; + ShenandoahNMethodBarrier* _barriers; + int _barriers_count; bool _has_non_immed_oops; bool _unregistered; ShenandoahNMethodLock _lock; ShenandoahNMethodLock _ic_lock; public: - ShenandoahNMethod(nmethod *nm, GrowableArray& oops, bool has_non_immed_oops); + ShenandoahNMethod(nmethod *nm, GrowableArray& oops, bool has_non_immed_oops, GrowableArray& barriers); ~ShenandoahNMethod(); inline nmethod* nm() const; inline ShenandoahNMethodLock* lock(); inline ShenandoahNMethodLock* ic_lock(); inline void oops_do(OopClosure* oops, bool fix_relocations = false); // Update oops when the nmethod is re-registered void update(); + void update_barriers(); + inline bool is_unregistered() const; static ShenandoahNMethod* for_nmethod(nmethod* nm); static inline ShenandoahNMethodLock* lock_for_nmethod(nmethod* nm); static inline ShenandoahNMethodLock* ic_lock_for_nmethod(nmethod* nm); static void heal_nmethod(nmethod* nm); static inline void heal_nmethod_metadata(ShenandoahNMethod* nmethod_data); static inline void disarm_nmethod(nmethod* nm); + static inline void disarm_nmethod_unlocked(nmethod* nm); static inline ShenandoahNMethod* gc_data(nmethod* nm); static inline void attach_gc_data(nmethod* nm, ShenandoahNMethod* gc_data); + static void assert_barriers(nmethod* nm, bool armed) NOT_DEBUG_RETURN; void assert_correct() NOT_DEBUG_RETURN; void assert_same_oops() NOT_DEBUG_RETURN; private: - static void detect_reloc_oops(nmethod* nm, GrowableArray& oops, bool& _has_non_immed_oops); + static void parse(nmethod* nm, GrowableArray& oops, bool& _has_non_immed_oops, GrowableArray& barriers); }; class ShenandoahNMethodTable; // ShenandoahNMethodList holds registered nmethod data. The list is reference counted. diff a/src/hotspot/share/gc/shenandoah/shenandoahNMethod.inline.hpp b/src/hotspot/share/gc/shenandoah/shenandoahNMethod.inline.hpp --- a/src/hotspot/share/gc/shenandoah/shenandoahNMethod.inline.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahNMethod.inline.hpp @@ -69,12 +69,27 @@ ShenandoahEvacuateUpdateMetadataClosure cl; nmethod_data->oops_do(&cl, true /*fix relocation*/); } void ShenandoahNMethod::disarm_nmethod(nmethod* nm) { + ShenandoahNMethod* data = gc_data(nm); + assert(data != nullptr, "Sanity"); + ShenandoahNMethodLock* lock = data->lock(); + assert(lock != nullptr, "Must be"); + ShenandoahNMethodLocker locker(lock); + + disarm_nmethod_unlocked(nm); +} + +void ShenandoahNMethod::disarm_nmethod_unlocked(nmethod* nm) { + ShenandoahNMethod* data = gc_data(nm); + assert(data != nullptr, "Sanity"); + assert(data->lock()->owned_by_self(), "Must hold the lock"); + BarrierSetNMethod* const bs = BarrierSet::barrier_set()->barrier_set_nmethod(); if (bs->is_armed(nm)) { + data->update_barriers(); bs->disarm(nm); } } ShenandoahNMethod* ShenandoahNMethod::gc_data(nmethod* nm) { diff a/src/hotspot/share/gc/shenandoah/shenandoahOldGC.cpp b/src/hotspot/share/gc/shenandoah/shenandoahOldGC.cpp --- a/src/hotspot/share/gc/shenandoah/shenandoahOldGC.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahOldGC.cpp @@ -134,10 +134,17 @@ // return from here with weak roots in progress. This is not a valid gc state // for any young collections (or allocation failures) that interrupt the old // collection. heap->concurrent_final_roots(); + // Arm the nmethods to possibly flip the barriers to idle. + vmop_entry_final_verify(); + + // Now we are back at concurrent phase, process nmethods to fix their barriers. + // TODO: Is this really safe to do when we overlap with young GC? + ShenandoahCodeRoots::disarm_nmethods(); + // After concurrent old marking finishes, we reclaim immediate garbage. Further, we may also want to expand OLD in order // to make room for anticipated promotions and/or for mixed evacuations. Mixed evacuations are especially likely to // follow the end of OLD marking. heap->rebuild_free_set_within_phase(); heap->free_set()->log_status_under_lock(); diff a/src/hotspot/share/gc/shenandoah/shenandoahStackWatermark.cpp b/src/hotspot/share/gc/shenandoah/shenandoahStackWatermark.cpp --- a/src/hotspot/share/gc/shenandoah/shenandoahStackWatermark.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahStackWatermark.cpp @@ -59,10 +59,11 @@ ShenandoahStackWatermark::ShenandoahStackWatermark(JavaThread* jt) : StackWatermark(jt, StackWatermarkKind::gc, _epoch_id), _heap(ShenandoahHeap::heap()), _stats(), + _no_op_cl(), _keep_alive_cl(), _evac_update_oop_cl(), _nm_cl() {} OopClosure* ShenandoahStackWatermark::closure_from_context(void* context) { @@ -71,48 +72,43 @@ _heap->is_concurrent_mark_in_progress(), "Only these two phases"); assert(Thread::current()->is_Worker_thread(), "Unexpected thread passing in context: " PTR_FORMAT, p2i(context)); return reinterpret_cast(context); } else { - if (_heap->is_concurrent_weak_root_in_progress()) { - assert(_heap->is_evacuation_in_progress(), "Nothing to evacuate"); + if (_heap->is_concurrent_weak_root_in_progress() && _heap->is_evacuation_in_progress()) { return &_evac_update_oop_cl; } else if (_heap->is_concurrent_mark_in_progress()) { return &_keep_alive_cl; } else { - ShouldNotReachHere(); - return nullptr; + return &_no_op_cl; } } } void ShenandoahStackWatermark::start_processing_impl(void* context) { NoSafepointVerifier nsv; - ShenandoahHeap* const heap = ShenandoahHeap::heap(); - // Process the non-frame part of the thread - if (heap->is_concurrent_weak_root_in_progress()) { - assert(heap->is_evacuation_in_progress(), "Should not be armed"); + ShenandoahHeap* const heap = ShenandoahHeap::heap(); + if (_heap->is_concurrent_weak_root_in_progress() && heap->is_evacuation_in_progress()) { // Retire the TLABs, which will force threads to reacquire their TLABs. // This is needed for two reasons. Strong one: new allocations would be with new freeset, // which would be outside the collection set, so no cset writes would happen there. // Weaker one: new allocations would happen past update watermark, and so less work would // be needed for reference updates (would update the large filler instead). retire_tlab(); - - _jt->oops_do_no_frames(closure_from_context(context), &_nm_cl); } else if (heap->is_concurrent_mark_in_progress()) { // We need to reset all TLABs because they might be below the TAMS, and we need to mark // the objects in them. Do not let mutators allocate any new objects in their current TLABs. // It is also a good place to resize the TLAB sizes for future allocations. retire_tlab(); - - _jt->oops_do_no_frames(closure_from_context(context), &_nm_cl); } else { - ShouldNotReachHere(); + // Can be here for updating barriers. No TLAB retirement is needed. } + // Process the non-frame part of the thread + _jt->oops_do_no_frames(closure_from_context(context), &_nm_cl); + // Publishes the processing start to concurrent threads StackWatermark::start_processing_impl(context); } void ShenandoahStackWatermark::retire_tlab() { @@ -128,10 +124,7 @@ void ShenandoahStackWatermark::process(const frame& fr, RegisterMap& register_map, void* context) { OopClosure* oops = closure_from_context(context); assert(oops != nullptr, "Should not get to here"); ShenandoahHeap* const heap = ShenandoahHeap::heap(); - assert((heap->is_concurrent_weak_root_in_progress() && heap->is_evacuation_in_progress()) || - heap->is_concurrent_mark_in_progress(), - "Only these two phases"); fr.oops_do(oops, &_nm_cl, ®ister_map, DerivedPointerIterationMode::_directly); } diff a/src/hotspot/share/gc/shenandoah/shenandoahStackWatermark.hpp b/src/hotspot/share/gc/shenandoah/shenandoahStackWatermark.hpp --- a/src/hotspot/share/gc/shenandoah/shenandoahStackWatermark.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahStackWatermark.hpp @@ -52,10 +52,11 @@ static uint32_t _epoch_id; ShenandoahHeap* const _heap; ThreadLocalAllocStats _stats; // Closures + ShenandoahNoOpClosure _no_op_cl; ShenandoahKeepAliveClosure _keep_alive_cl; ShenandoahEvacuateUpdateRootsClosure _evac_update_oop_cl; ShenandoahOnStackNMethodClosure _nm_cl; public: ShenandoahStackWatermark(JavaThread* jt); diff a/src/hotspot/share/prims/whitebox.cpp b/src/hotspot/share/prims/whitebox.cpp --- a/src/hotspot/share/prims/whitebox.cpp +++ b/src/hotspot/share/prims/whitebox.cpp @@ -1634,10 +1634,15 @@ WB_ENTRY(void, WB_RelocateNMethodFromMethod(JNIEnv* env, jobject o, jobject method, jint blob_type)) ResourceMark rm(THREAD); jmethodID jmid = reflected_method_to_jmid(thread, env, method); CHECK_JNI_EXCEPTION(env); + + if (!NMethodRelocation) { + return; + } + methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid)); nmethod* code = mh->code(); if (code != nullptr) { MutexLocker ml_Compile_lock(Compile_lock); CompiledICLocker ic_locker(code); @@ -1653,10 +1658,14 @@ if (address == nullptr) { return; } + if (!NMethodRelocation) { + return; + } + MutexLocker ml_Compile_lock(Compile_lock); MutexLocker ml_CompiledIC_lock(CompiledIC_lock, Mutex::_no_safepoint_check_flag); MutexLocker ml_CodeCache_lock(CodeCache_lock, Mutex::_no_safepoint_check_flag); // Verify that nmethod address is still valid diff a/test/hotspot/jtreg/compiler/hotcode/HotCodeCollectorMoveFunction.java b/test/hotspot/jtreg/compiler/hotcode/HotCodeCollectorMoveFunction.java --- a/test/hotspot/jtreg/compiler/hotcode/HotCodeCollectorMoveFunction.java +++ b/test/hotspot/jtreg/compiler/hotcode/HotCodeCollectorMoveFunction.java @@ -22,10 +22,11 @@ * */ /* * @test + * @ignore * @library /test/lib / * @build jdk.test.whitebox.WhiteBox * @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox * @run main/othervm -Xbootclasspath/a:. -Xbatch -XX:-TieredCompilation -XX:+SegmentedCodeCache -XX:+UnlockExperimentalVMOptions -XX:+HotCodeHeap * -XX:+NMethodRelocation -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:HotCodeIntervalSeconds=0 -XX:HotCodeCallLevel=0 diff a/test/hotspot/jtreg/compiler/hotcode/StressHotCodeCollector.java b/test/hotspot/jtreg/compiler/hotcode/StressHotCodeCollector.java --- a/test/hotspot/jtreg/compiler/hotcode/StressHotCodeCollector.java +++ b/test/hotspot/jtreg/compiler/hotcode/StressHotCodeCollector.java @@ -23,10 +23,11 @@ * */ /* * @test + * @ignore * @key randomness * @library /test/lib / * @build jdk.test.whitebox.WhiteBox * @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox * @run main/othervm -Xbootclasspath/a:. -Xcomp -XX:-TieredCompilation -XX:+UnlockExperimentalVMOptions -XX:+HotCodeHeap -XX:+NMethodRelocation diff a/test/hotspot/jtreg/compiler/whitebox/DeoptimizeRelocatedNMethod.java b/test/hotspot/jtreg/compiler/whitebox/DeoptimizeRelocatedNMethod.java --- a/test/hotspot/jtreg/compiler/whitebox/DeoptimizeRelocatedNMethod.java +++ b/test/hotspot/jtreg/compiler/whitebox/DeoptimizeRelocatedNMethod.java @@ -22,10 +22,11 @@ * */ /* * @test + * @ignore * @bug 8316694 * @library /test/lib / * @modules java.base/jdk.internal.misc java.management * @requires vm.opt.DeoptimizeALot != true * @requires vm.flavor == "server" & (vm.opt.TieredStopAtLevel == null | vm.opt.TieredStopAtLevel == 4) diff a/test/hotspot/jtreg/compiler/whitebox/RelocateNMethod.java b/test/hotspot/jtreg/compiler/whitebox/RelocateNMethod.java --- a/test/hotspot/jtreg/compiler/whitebox/RelocateNMethod.java +++ b/test/hotspot/jtreg/compiler/whitebox/RelocateNMethod.java @@ -22,10 +22,11 @@ * */ /* * @test + * @ignore * @bug 8316694 * @summary test that nmethod::relocate() correctly creates a new nmethod * @library /test/lib / * @modules java.base/jdk.internal.misc java.management * @requires vm.opt.DeoptimizeALot != true diff a/test/hotspot/jtreg/compiler/whitebox/RelocateNMethodMultiplePaths.java b/test/hotspot/jtreg/compiler/whitebox/RelocateNMethodMultiplePaths.java --- a/test/hotspot/jtreg/compiler/whitebox/RelocateNMethodMultiplePaths.java +++ b/test/hotspot/jtreg/compiler/whitebox/RelocateNMethodMultiplePaths.java @@ -22,10 +22,11 @@ * */ /* * @test id=C1 + * @ignore * @bug 8316694 * @requires vm.debug == true * @summary test that relocated nmethod is correctly deoptimized * @library /test/lib / * @modules java.base/jdk.internal.misc java.management @@ -37,10 +38,11 @@ * compiler.whitebox.RelocateNMethodMultiplePaths */ /* * @test id=C2 + * @ignore * @bug 8316694 * @requires vm.debug == true * @summary test that relocated nmethod is correctly deoptimized * @library /test/lib / * @modules java.base/jdk.internal.misc java.management diff a/test/hotspot/jtreg/serviceability/jvmti/NMethodRelocation/NMethodRelocationTest.java b/test/hotspot/jtreg/serviceability/jvmti/NMethodRelocation/NMethodRelocationTest.java --- a/test/hotspot/jtreg/serviceability/jvmti/NMethodRelocation/NMethodRelocationTest.java +++ b/test/hotspot/jtreg/serviceability/jvmti/NMethodRelocation/NMethodRelocationTest.java @@ -21,10 +21,11 @@ * questions. */ /* * @test + * @ignore * @bug 8316694 * @summary Verify that nmethod relocation posts the correct JVMTI events * @requires vm.jvmti & * vm.gc != "Epsilon" & * vm.flavor == "server" &