< prev index next >

src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp

Print this page

  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  *
  25  */
  26 
  27 #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
  28 #include "gc/shenandoah/mode/shenandoahMode.hpp"
  29 #include "gc/shenandoah/shenandoahBarrierSet.hpp"
  30 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
  31 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
  32 #include "gc/shenandoah/shenandoahHeapRegion.hpp"

  33 #include "gc/shenandoah/shenandoahRuntime.hpp"
  34 #include "gc/shenandoah/shenandoahThreadLocalData.hpp"
  35 #include "interpreter/interp_masm.hpp"
  36 #include "interpreter/interpreter.hpp"

  37 #include "runtime/javaThread.hpp"
  38 #include "runtime/sharedRuntime.hpp"
  39 #ifdef COMPILER1
  40 #include "c1/c1_LIRAssembler.hpp"
  41 #include "c1/c1_MacroAssembler.hpp"
  42 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
  43 #endif
  44 #ifdef COMPILER2
  45 #include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp"
  46 #include "opto/output.hpp"
  47 #endif
  48 
  49 #define __ masm->
  50 
  51 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
  52                                                        Register src, Register dst, Register count, RegSet saved_regs) {
  53   if (is_oop) {
  54     bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
  55     if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahLoadRefBarrier) {
  56 

 749   // tmp2 = effective address
 750   __ lea(tmp2, address);
 751 
 752   // tmp2 = &card_table[ addr >> CardTable::card_shift() ] ; card index
 753   __ add(tmp2, tmp1, tmp2, Assembler::LSR, CardTable::card_shift());
 754 
 755   if (UseCondCardMark) {
 756     Label L_already_dirty;
 757     __ ldrb(tmp1, Address(tmp2));
 758     __ cbz(tmp1, L_already_dirty);
 759     __ strb(zr, Address(tmp2));
 760     __ bind(L_already_dirty);
 761   } else {
 762     __ strb(zr, Address(tmp2));
 763   }
 764 }
 765 
 766 void ShenandoahBarrierStubC2::enter_if_gc_state(MacroAssembler& masm, const char test_state, Register tmp) {
 767   Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
 768   PhaseOutput* const output = Compile::current()->output();
 769   Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(test_state)));
 770 
 771   // We piggyback on scratch_emit_size mode to compute the slowpath stub size.
 772   // We'll use that information to decide whether we need a far jump to the
 773   // stub entry point or not. In scratch_emit_size mode we don't bind entry()
 774   // because otherwise it will be rebound when we later emit the instructions
 775   // for real.
 776   if (_needs_far_jump) {
 777     __ ldrb(tmp, gc_state_fast);
 778     __ cbz(tmp, *continuation());
 779     __ b(output->in_scratch_emit_size() ? *continuation() : *entry());
 780   } else {
 781     __ ldrb(tmp, gc_state_fast);
 782     __ cbnz(tmp, output->in_scratch_emit_size() ? *continuation() : *entry());
 783   }
 784 
 785   // This is were the slowpath stub will return to or the code above will
 786   // jump to if the checks are false
 787   __ bind(*continuation());
 788 }
 789 


















































 790 void ShenandoahBarrierStubC2::emit_code(MacroAssembler& masm) {
 791   Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
 792   assert(_needs_keep_alive_barrier || _needs_load_ref_barrier, "Why are you here?");
 793   PhaseOutput* const output = Compile::current()->output();
 794 
 795   // We piggyback on scratch_emit_size mode to compute the slowpath stub size.
 796   // We'll use that information to decide whether we need a far jump to the
 797   // stub entry point or not. In scratch_emit_size mode we don't bind entry()
 798   // because otherwise it will be rebound when we later emit the instructions
 799   // for real.
 800   if (!output->in_scratch_emit_size()) {
 801     __ bind(*entry());
 802   }
 803 
 804   // If we need to load ourselves, do it here.
 805   if (_do_load) {
 806     if (_narrow) {
 807       __ ldrw(_obj, _addr);
 808     } else {
 809       __ ldr(_obj, _addr);

 840     keepalive(masm, continuation());
 841   } else if (_needs_load_ref_barrier) {
 842     lrb(masm);
 843   } else {
 844     ShouldNotReachHere();
 845   }
 846 }
 847 
 848 void ShenandoahBarrierStubC2::maybe_far_jump_if_zero(MacroAssembler& masm, Register reg) {
 849   if (_needs_far_jump) {
 850     Label L_short_jump;
 851     __ cbnz(reg, L_short_jump);
 852     __ b(*continuation());
 853     __ bind(L_short_jump);
 854   } else {
 855     __ cbz(reg, *continuation());
 856   }
 857 }
 858 
 859 void ShenandoahBarrierStubC2::keepalive(MacroAssembler& masm, Label* L_done) {
 860   Address gcstate(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::MARKING)));
 861   Address index(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
 862   Address buffer(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
 863   Label L_through, L_slowpath;
 864 
 865   // If another barrier is enabled as well, do a runtime check for a specific barrier.
 866   if (_needs_load_ref_barrier) {
 867     assert(L_done == nullptr, "L_done is always null when _needs_load_ref_barrier is true");
 868     __ ldrb(_tmp1, gcstate);
 869     __ cbz(_tmp1, L_through);







 870   }
 871 
 872   // Fast-path: put object into buffer.
 873   // If buffer is already full, go slow.
 874   __ ldr(_tmp1, index);
 875   __ cbz(_tmp1, L_slowpath);
 876   __ sub(_tmp1, _tmp1, wordSize);
 877   __ str(_tmp1, index);
 878   __ ldr(_tmp2, buffer);
 879 
 880   // Store the object in queue.
 881   // If object is narrow, we need to decode it before inserting.
 882   if (_narrow) {
 883     __ add(_tmp2, _tmp2, _tmp1);
 884     __ decode_heap_oop_not_null(_tmp1, _obj);
 885     __ str(_tmp1, Address(_tmp2));
 886   } else {
 887     // Buffer is 64-bit address, must be in base register.
 888     __ str(_obj, Address(_tmp2, _tmp1));
 889   }

 899   __ bind(L_slowpath);
 900 
 901   {
 902     SaveLiveRegisters slr(&masm, this);
 903 
 904     // Go to runtime and handle the rest there.
 905     __ mov(c_rarg0, _obj);
 906     __ lea(lr, RuntimeAddress(keepalive_runtime_entry_addr()));
 907     __ blr(lr);
 908   }
 909   if (L_done != nullptr) {
 910     __ b(*L_done);
 911   } else {
 912     __ bind(L_through);
 913   }
 914 }
 915 
 916 void ShenandoahBarrierStubC2::lrb(MacroAssembler& masm) {
 917   Label L_slow;
 918 
 919   // If another barrier is enabled as well, do a runtime check for a specific barrier.
 920   if (_needs_keep_alive_barrier) {
 921     char state_to_check = ShenandoahHeap::HAS_FORWARDED | (_needs_load_ref_weak_barrier ? ShenandoahHeap::WEAK_ROOTS : 0);
 922     Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(state_to_check)));
 923     __ ldrb(_tmp1, gc_state_fast);
 924     maybe_far_jump_if_zero(masm, _tmp1);
 925   }
 926 
 927   // If weak references are being processed, weak/phantom loads need to go slow,
 928   // regardless of their cset status.
 929   if (_needs_load_ref_weak_barrier) {
 930     Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::WEAK_ROOTS)));
 931     __ ldrb(_tmp1, gc_state_fast);
 932     __ cbnz(_tmp1, L_slow);












 933   }
 934 
 935   // Cset-check. Fall-through to slow if in collection set.
 936   bool is_aot = AOTCodeCache::is_on_for_dump();
 937   if (!is_aot) {
 938     __ mov(_tmp1, ShenandoahHeap::in_cset_fast_test_addr());
 939     if (_narrow) {
 940       __ decode_heap_oop_not_null(_tmp2, _obj);
 941       __ add(_tmp1, _tmp1, _tmp2, Assembler::LSR, ShenandoahHeapRegion::region_size_bytes_shift_jint());
 942     } else {
 943       __ add(_tmp1, _tmp1, _obj, Assembler::LSR, ShenandoahHeapRegion::region_size_bytes_shift_jint());
 944     }
 945   } else {
 946     // Generating AOT code, pull the cset bitmap and region shift from AOT table.
 947     if (_narrow) {
 948       __ decode_heap_oop_not_null(_tmp1, _obj);
 949     } else {
 950       __ mov(_tmp1, _obj);
 951     }
 952     __ lea(_tmp2, ExternalAddress(AOTRuntimeConstants::grain_shift_address()));

  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  *
  25  */
  26 
  27 #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
  28 #include "gc/shenandoah/mode/shenandoahMode.hpp"
  29 #include "gc/shenandoah/shenandoahBarrierSet.hpp"
  30 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
  31 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
  32 #include "gc/shenandoah/shenandoahHeapRegion.hpp"
  33 #include "gc/shenandoah/shenandoahNMethod.inline.hpp"
  34 #include "gc/shenandoah/shenandoahRuntime.hpp"
  35 #include "gc/shenandoah/shenandoahThreadLocalData.hpp"
  36 #include "interpreter/interp_masm.hpp"
  37 #include "interpreter/interpreter.hpp"
  38 #include "nativeInst_aarch64.hpp"
  39 #include "runtime/javaThread.hpp"
  40 #include "runtime/sharedRuntime.hpp"
  41 #ifdef COMPILER1
  42 #include "c1/c1_LIRAssembler.hpp"
  43 #include "c1/c1_MacroAssembler.hpp"
  44 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
  45 #endif
  46 #ifdef COMPILER2
  47 #include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp"
  48 #include "opto/output.hpp"
  49 #endif
  50 
  51 #define __ masm->
  52 
  53 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
  54                                                        Register src, Register dst, Register count, RegSet saved_regs) {
  55   if (is_oop) {
  56     bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
  57     if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahLoadRefBarrier) {
  58 

 751   // tmp2 = effective address
 752   __ lea(tmp2, address);
 753 
 754   // tmp2 = &card_table[ addr >> CardTable::card_shift() ] ; card index
 755   __ add(tmp2, tmp1, tmp2, Assembler::LSR, CardTable::card_shift());
 756 
 757   if (UseCondCardMark) {
 758     Label L_already_dirty;
 759     __ ldrb(tmp1, Address(tmp2));
 760     __ cbz(tmp1, L_already_dirty);
 761     __ strb(zr, Address(tmp2));
 762     __ bind(L_already_dirty);
 763   } else {
 764     __ strb(zr, Address(tmp2));
 765   }
 766 }
 767 
 768 void ShenandoahBarrierStubC2::enter_if_gc_state(MacroAssembler& masm, const char test_state, Register tmp) {
 769   Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
 770   PhaseOutput* const output = Compile::current()->output();

 771 
 772   // Emit the unconditional branch in the first version of the method.
 773   // Let the rest of runtime figure out how to manage it.
 774   if (output->in_scratch_emit_size()) {
 775     // We piggyback on scratch_emit_size mode to compute the slowpath stub size.
 776     // We'll use that information to decide whether we need a far jump to the
 777     // stub entry point or not. In scratch_emit_size mode we don't bind entry()
 778     // because otherwise it will be rebound when we later emit the instructions
 779     // for real.
 780     __ nop();
 781   } else {
 782     __ relocate(barrier_Relocation::spec(), ShenandoahNMethod::gc_state_to_reloc(test_state));
 783     __ b(*entry());
 784   }
 785 
 786   // This is were the slowpath stub will return to or the code above will
 787   // jump to if the checks are false
 788   __ bind(*continuation());
 789 }
 790 
 791 address ShenandoahBarrierSetAssembler::parse_stub_address(address pc) {
 792   NativeInstruction* ni = nativeInstruction_at(pc);
 793   assert(ni->is_jump(), "Initial code version: GC barrier fastpath must be a jump");
 794   NativeJump* jmp = nativeJump_at(pc);
 795   return jmp->jump_destination();
 796 }
 797 
 798 static bool is_nop(address pc) {
 799   if (*(pc + 0) != 0x1F) return false;
 800   if (*(pc + 1) != 0x20) return false;
 801   if (*(pc + 2) != 0x03) return false;
 802   if (*(pc + 3) != 0xD5) return false;
 803   return true;
 804 }
 805 
 806 static void insert_nop(address pc) {
 807   *reinterpret_cast<int32_t*>(pc) = 0xD503201F;
 808   assert(is_nop(pc), "Should be");
 809   ICache::invalidate_range(pc, 4);
 810 }
 811 
 812 static void check_at(bool cond, address pc, const char* msg) {
 813   assert(cond, "%s: at PC " PTR_FORMAT ": %02x%02x%02x%02x",
 814          msg, p2i(pc), *(pc + 0), *(pc + 1), *(pc + 2), *(pc + 3));
 815 }
 816 
 817 bool ShenandoahBarrierSetAssembler::is_active(address pc) {
 818   NativeInstruction* ni = nativeInstruction_at(pc);
 819   return ni->is_jump();
 820 }
 821 
 822 void ShenandoahBarrierSetAssembler::patch_branch_to_nop(address pc) {
 823   NativeInstruction* ni = nativeInstruction_at(pc);
 824   if (ni->is_jump()) {
 825     insert_nop(pc);
 826   } else {
 827     check_at(is_nop(pc), pc, "Should already be nop");
 828   }
 829 }
 830 
 831 void ShenandoahBarrierSetAssembler::patch_nop_to_branch(address pc, address stub_addr) {
 832   NativeInstruction* ni = nativeInstruction_at(pc);
 833   if (is_nop(pc)) {
 834     NativeJump::insert(pc, stub_addr);
 835   } else {
 836     check_at(ni->is_jump(), pc, "Should already be jump");
 837     check_at(nativeJump_at(pc)->jump_destination() == stub_addr, pc, "Jump should be to the same address");
 838   }
 839 }
 840 
 841 void ShenandoahBarrierStubC2::emit_code(MacroAssembler& masm) {
 842   Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
 843   assert(_needs_keep_alive_barrier || _needs_load_ref_barrier, "Why are you here?");
 844   PhaseOutput* const output = Compile::current()->output();
 845 
 846   // We piggyback on scratch_emit_size mode to compute the slowpath stub size.
 847   // We'll use that information to decide whether we need a far jump to the
 848   // stub entry point or not. In scratch_emit_size mode we don't bind entry()
 849   // because otherwise it will be rebound when we later emit the instructions
 850   // for real.
 851   if (!output->in_scratch_emit_size()) {
 852     __ bind(*entry());
 853   }
 854 
 855   // If we need to load ourselves, do it here.
 856   if (_do_load) {
 857     if (_narrow) {
 858       __ ldrw(_obj, _addr);
 859     } else {
 860       __ ldr(_obj, _addr);

 891     keepalive(masm, continuation());
 892   } else if (_needs_load_ref_barrier) {
 893     lrb(masm);
 894   } else {
 895     ShouldNotReachHere();
 896   }
 897 }
 898 
 899 void ShenandoahBarrierStubC2::maybe_far_jump_if_zero(MacroAssembler& masm, Register reg) {
 900   if (_needs_far_jump) {
 901     Label L_short_jump;
 902     __ cbnz(reg, L_short_jump);
 903     __ b(*continuation());
 904     __ bind(L_short_jump);
 905   } else {
 906     __ cbz(reg, *continuation());
 907   }
 908 }
 909 
 910 void ShenandoahBarrierStubC2::keepalive(MacroAssembler& masm, Label* L_done) {

 911   Address index(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
 912   Address buffer(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
 913   Label L_through, L_slowpath;
 914 
 915   // If another barrier is enabled as well, do a check for a specific barrier.
 916   if (_needs_load_ref_barrier) {
 917     assert(L_done == nullptr, "Should be");
 918     // Emit the unconditional branch in the first version of the method.
 919     // Let the rest of runtime figure out how to manage it.
 920     // TODO: We could have spared the over-jump if patching knew we need the inverse branch.
 921     char state_to_check = ShenandoahHeap::MARKING;
 922     Label L_over;
 923     __ relocate(barrier_Relocation::spec(), ShenandoahNMethod::gc_state_to_reloc(state_to_check));
 924     __ b(L_over);
 925     __ b(L_through);
 926     __ bind(L_over);
 927   }
 928 
 929   // Fast-path: put object into buffer.
 930   // If buffer is already full, go slow.
 931   __ ldr(_tmp1, index);
 932   __ cbz(_tmp1, L_slowpath);
 933   __ sub(_tmp1, _tmp1, wordSize);
 934   __ str(_tmp1, index);
 935   __ ldr(_tmp2, buffer);
 936 
 937   // Store the object in queue.
 938   // If object is narrow, we need to decode it before inserting.
 939   if (_narrow) {
 940     __ add(_tmp2, _tmp2, _tmp1);
 941     __ decode_heap_oop_not_null(_tmp1, _obj);
 942     __ str(_tmp1, Address(_tmp2));
 943   } else {
 944     // Buffer is 64-bit address, must be in base register.
 945     __ str(_obj, Address(_tmp2, _tmp1));
 946   }

 956   __ bind(L_slowpath);
 957 
 958   {
 959     SaveLiveRegisters slr(&masm, this);
 960 
 961     // Go to runtime and handle the rest there.
 962     __ mov(c_rarg0, _obj);
 963     __ lea(lr, RuntimeAddress(keepalive_runtime_entry_addr()));
 964     __ blr(lr);
 965   }
 966   if (L_done != nullptr) {
 967     __ b(*L_done);
 968   } else {
 969     __ bind(L_through);
 970   }
 971 }
 972 
 973 void ShenandoahBarrierStubC2::lrb(MacroAssembler& masm) {
 974   Label L_slow;
 975 








 976   // If weak references are being processed, weak/phantom loads need to go slow,
 977   // regardless of their cset status.
 978   if (_needs_load_ref_weak_barrier) {
 979     char state_to_check = ShenandoahHeap::WEAK_ROOTS;
 980     __ relocate(barrier_Relocation::spec(), ShenandoahNMethod::gc_state_to_reloc(state_to_check));
 981     __ b(L_slow);
 982   }
 983 
 984   if (_needs_keep_alive_barrier) {
 985     // Emit the unconditional branch in the first version of the method.
 986     // Let the rest of runtime figure out how to manage it.
 987     // TODO: We could have spared the over-jump if patching knew we need the inverse branch.
 988     char state_to_check = ShenandoahHeap::HAS_FORWARDED | (_needs_load_ref_weak_barrier ? ShenandoahHeap::WEAK_ROOTS : 0);
 989     Label L_over;
 990     __ relocate(barrier_Relocation::spec(), ShenandoahNMethod::gc_state_to_reloc(state_to_check));
 991     __ b(L_over);
 992     __ b(*continuation());
 993     __ bind(L_over);
 994   }
 995 
 996   // Cset-check. Fall-through to slow if in collection set.
 997   bool is_aot = AOTCodeCache::is_on_for_dump();
 998   if (!is_aot) {
 999     __ mov(_tmp1, ShenandoahHeap::in_cset_fast_test_addr());
1000     if (_narrow) {
1001       __ decode_heap_oop_not_null(_tmp2, _obj);
1002       __ add(_tmp1, _tmp1, _tmp2, Assembler::LSR, ShenandoahHeapRegion::region_size_bytes_shift_jint());
1003     } else {
1004       __ add(_tmp1, _tmp1, _obj, Assembler::LSR, ShenandoahHeapRegion::region_size_bytes_shift_jint());
1005     }
1006   } else {
1007     // Generating AOT code, pull the cset bitmap and region shift from AOT table.
1008     if (_narrow) {
1009       __ decode_heap_oop_not_null(_tmp1, _obj);
1010     } else {
1011       __ mov(_tmp1, _obj);
1012     }
1013     __ lea(_tmp2, ExternalAddress(AOTRuntimeConstants::grain_shift_address()));
< prev index next >