< prev index next >

src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp

Print this page

 686   }
 687 
 688   // If we could not find a non-live register, select the live fallback:
 689   if (tmp == noreg) {
 690     tmp = fallback_live;
 691     selected_live = true;
 692   } else {
 693     selected_live = false;
 694   }
 695 
 696   assert(tmp != noreg, "successfully selected");
 697   assert_different_registers(tmp, reg1);
 698   assert_different_registers(tmp, addr.base());
 699   assert_different_registers(tmp, addr.index());
 700   return tmp;
 701 }
 702 
 703 void ShenandoahBarrierStubC2::enter_if_gc_state(MacroAssembler& masm, const char test_state) {
 704   Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
 705 
 706   int bit_to_check = ShenandoahThreadLocalData::gc_state_to_fast_bit(test_state);
 707   Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_offset()));
 708   __ ldrb(rscratch1, gc_state_fast);
 709   if (_test_and_branch_reachable) {
 710     __ tbnz(rscratch1, bit_to_check, _test_and_branch_reachable_entry);
 711   } else {
 712     __ tbz(rscratch1, bit_to_check, *continuation());
 713     __ b(*entry());





































































 714   }
 715   // This is were the slowpath stub will return to or the code above will
 716   // jump to if the checks are false
 717   __ bind(*continuation());
 718 }
 719 
 720 bool needs_acquiring_load_exclusive(const MachNode *n) {
 721   assert(n->is_CAS(true), "expecting a compare and swap");
 722   if (n->is_CAS(false)) {
 723     assert(n->has_trailing_membar(), "expected trailing membar");
 724   } else {
 725     return n->has_trailing_membar();
 726   }
 727 
 728   // so we can just return true here
 729   return true;
 730 }
 731 
 732 #undef __
 733 #define __ masm->
 734 
 735 void ShenandoahBarrierSetAssembler::compare_and_set_c2(const MachNode* node, MacroAssembler* masm, Register res, Register addr,
 736     Register oldval, Register newval, bool exchange, bool narrow, bool weak) {
 737   bool acquire = needs_acquiring_load_exclusive(node);

 860   } else {
 861     if (acquire) {
 862       __ ldar(dst, src.base());
 863     } else {
 864       __ ldr(dst, src);
 865     }
 866   }
 867 
 868   // Post-barrier: LRB / KA / weak-root processing.
 869   if (ShenandoahBarrierStubC2::needs_slow_barrier(node)) {
 870     ShenandoahBarrierStubC2* const stub = ShenandoahBarrierStubC2::create(node, dst, src, narrow, /* do_load: */ false, __ offset());
 871     char check = 0;
 872     check |= ShenandoahBarrierStubC2::needs_keep_alive_barrier(node)    ? ShenandoahHeap::MARKING : 0;
 873     check |= ShenandoahBarrierStubC2::needs_load_ref_barrier(node)      ? ShenandoahHeap::HAS_FORWARDED : 0;
 874     check |= ShenandoahBarrierStubC2::needs_load_ref_barrier_weak(node) ? ShenandoahHeap::WEAK_ROOTS : 0;
 875     stub->enter_if_gc_state(*masm, check);
 876   }
 877 }
 878 
 879 void ShenandoahBarrierSetAssembler::card_barrier_c2(const MachNode* node, MacroAssembler* masm, Address address) {
 880   if (!ShenandoahBarrierStubC2::needs_card_barrier(node)) {
 881     return;
 882   }
 883 
 884   assert(CardTable::dirty_card_val() == 0, "must be");
 885   Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
 886 
 887   // rscratch1 = card table base (holder)
 888   Address curr_ct_holder_addr(rthread, in_bytes(ShenandoahThreadLocalData::card_table_offset()));
 889   __ ldr(rscratch1, curr_ct_holder_addr);
 890 
 891   // rscratch2 = addr
 892   __ lea(rscratch2, address);
 893 
 894   // rscratch2 = &card_table[ addr >> CardTable::card_shift() ]
 895   __ add(rscratch2, rscratch1, rscratch2, Assembler::LSR, CardTable::card_shift());
 896 
 897   if (UseCondCardMark) {
 898     Label L_already_dirty;
 899     __ ldrb(rscratch1, Address(rscratch2));
 900     __ cbz(rscratch1, L_already_dirty);

 927   PhaseOutput* const output = Compile::current()->output();
 928   if (output->in_scratch_emit_size()) {
 929     return;
 930   }
 931 
 932   // Assume that each trampoline is one single instruction and that the stubs
 933   // will follow immediatelly after the _code section. Therefore, we are
 934   // checking if the distance between the fastpath branch and the
 935   // trampoline/entry of the current Stub is less than 32K.
 936   const int code_size = output->buffer_sizing_data()->_code;
 937   const int trampoline_offset = trampoline_stubs_count() * NativeInstruction::instruction_size;
 938   _test_and_branch_reachable = aarch64_test_and_branch_reachable(_fastpath_branch_offset, code_size + trampoline_offset);
 939   if (_test_and_branch_reachable) {
 940     inc_trampoline_stubs_count();
 941   }
 942 }
 943 
 944 void ShenandoahBarrierStubC2::emit_code(MacroAssembler& masm) {
 945   // If we reach here with _skip_trampoline set it means that earlier we
 946   // emitted a trampoline to this stub and now we need to emit the actual stub.
 947   if (_skip_trampoline) {
 948     emit_code_actual(masm);
 949   } else {
 950     _skip_trampoline = true;
 951 
 952     // The fastpath executes two branch instructions to reach this stub, let's
 953     // just emit the stub here and not add a third one.
 954     if (!_test_and_branch_reachable) {
 955       // By registering the stub again, after setting _skip_trampoline, we'll
 956       // effectivelly cause the stub to be emitted the next time ::emit_code is
 957       // called.
 958       ShenandoahBarrierStubC2::register_stub(this);
 959       return;
 960     }
 961 
 962     // This is entry point when coming from fastpath, IFF it's able to reach here
 963     // with a test and branch instruction, otherwise the entry is
 964     // ShenandoahBarrierStubC2::entry();
 965     const int target_offset = __ offset();
 966     __ bind(_test_and_branch_reachable_entry);
 967 

1059   if (selected_live) {
1060     pop_save_register(masm, _obj);
1061   }
1062 
1063   // Go back to fast path
1064   __ b(*continuation());
1065 }
1066 
1067 void ShenandoahBarrierStubC2::keepalive(MacroAssembler& masm, Register obj, Register tmp1, Register tmp2) {
1068   Address index(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
1069   Address buffer(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
1070   Label L_runtime;
1071   Label L_done;
1072 
1073   // The node doesn't even need keepalive barrier, just don't check anything else
1074   if (!_needs_keep_alive_barrier) {
1075     return ;
1076   }
1077 
1078   // If another barrier is enabled as well, do a runtime check for a specific barrier.
1079   if (_needs_load_ref_barrier) {

1080     Address gcs_addr(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
1081     __ ldrb(tmp1, gcs_addr);
1082     __ tbz(tmp1, ShenandoahHeap::MARKING_BITPOS, L_done);
1083   }
1084 
1085   // If buffer is full, call into runtime.
1086   __ ldr(tmp1, index);
1087   __ cbz(tmp1, L_runtime);
1088 
1089   // The buffer is not full, store value into it.
1090   __ sub(tmp1, tmp1, wordSize);
1091   __ str(tmp1, index);
1092   __ ldr(tmp2, buffer);
1093   __ str(obj, Address(tmp2, tmp1));
1094   __ b(L_done);
1095 
1096   // Runtime call
1097   __ bind(L_runtime);
1098 
1099   preserve(obj);

1109 
1110     // Restore the clobbered registers.
1111     if (clobbered_c_rarg0) {
1112       pop_save_register(masm, c_rarg0);
1113     }
1114   }
1115 
1116   __ bind(L_done);
1117 }
1118 
1119 void ShenandoahBarrierStubC2::lrb(MacroAssembler& masm, Register obj, Address addr, Register tmp) {
1120   Label L_done;
1121 
1122   // The node doesn't even need LRB barrier, just don't check anything else
1123   if (!_needs_load_ref_barrier) {
1124     return ;
1125   }
1126 
1127   if ((_node->barrier_data() & ShenandoahBitStrong) != 0) {
1128     // If another barrier is enabled as well, do a runtime check for a specific barrier.
1129     if (_needs_keep_alive_barrier) {

1130       char state_to_check = ShenandoahHeap::HAS_FORWARDED | (_needs_load_ref_weak_barrier ? ShenandoahHeap::WEAK_ROOTS : 0);
1131       int bit_to_check = ShenandoahThreadLocalData::gc_state_to_fast_bit(state_to_check);
1132       Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_offset()));
1133       __ ldrb(tmp, gc_state_fast);
1134       __ tbz(tmp, bit_to_check, L_done);
1135     }
1136 
1137     // Weak/phantom loads always need to go to runtime. For strong refs we
1138     // check if the object in cset, if they are not, then we are done with LRB.
1139     assert(ShenandoahHeapRegion::region_size_bytes_shift_jint() <= 63, "Maximum shift of the add is 63");
1140     __ mov(tmp, ShenandoahHeap::in_cset_fast_test_addr());
1141     __ add(tmp, tmp, obj, Assembler::LSR, ShenandoahHeapRegion::region_size_bytes_shift_jint());
1142     __ ldrb(tmp, Address(tmp, 0));
1143     __ cbz(tmp, L_done);
1144   }
1145 
1146   dont_preserve(obj);
1147   {
1148     // Shuffle in the arguments. The end result should be:
1149     //   c_rarg0 <-- obj

 686   }
 687 
 688   // If we could not find a non-live register, select the live fallback:
 689   if (tmp == noreg) {
 690     tmp = fallback_live;
 691     selected_live = true;
 692   } else {
 693     selected_live = false;
 694   }
 695 
 696   assert(tmp != noreg, "successfully selected");
 697   assert_different_registers(tmp, reg1);
 698   assert_different_registers(tmp, addr.base());
 699   assert_different_registers(tmp, addr.index());
 700   return tmp;
 701 }
 702 
 703 void ShenandoahBarrierStubC2::enter_if_gc_state(MacroAssembler& masm, const char test_state) {
 704   Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
 705 
 706   if (ShenandoahGCStateCheckRemove) {
 707     // Unrealistic: remove all barrier fastpath checks.
 708   } else if (ShenandoahGCStateCheckHotpatch) {
 709     // Emit the unconditional branch in the first version of the method.
 710     // Let the rest of runtime figure out how to manage it.
 711     __ relocate(barrier_Relocation::spec());

 712     __ b(*entry());
 713 
 714 #ifdef ASSERT
 715     Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_offset()));
 716     __ ldrb(rscratch1, gc_state_fast);
 717     __ cbz(rscratch1, *continuation());
 718     __ hlt(0); // Correctness bug: barrier is NOP-ed, but heap is NOT IDLE
 719 #endif
 720     __ bind(*continuation());
 721   } else {
 722     int bit_to_check = ShenandoahThreadLocalData::gc_state_to_fast_bit(test_state);
 723     Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_offset()));
 724     __ ldrb(rscratch1, gc_state_fast);
 725     if (_test_and_branch_reachable) {
 726       __ tbnz(rscratch1, bit_to_check, _test_and_branch_reachable_entry);
 727     } else {
 728       __ tbz(rscratch1, bit_to_check, *continuation());
 729       __ b(*entry());
 730     }
 731 
 732     // This is were the slowpath stub will return to or the code above will
 733     // jump to if the checks are false
 734     __ bind(*continuation());
 735   }
 736 }
 737 
 738 address ShenandoahBarrierSetAssembler::parse_stub_address(address pc) {
 739   NativeInstruction* ni = nativeInstruction_at(pc);
 740   assert(ni->is_jump(), "Initial code version: GC barrier fastpath must be a jump");
 741   NativeJump* jmp = nativeJump_at(pc);
 742   return jmp->jump_destination();
 743 }
 744 
 745 void insert_nop(address pc) {
 746   *(pc + 0) = 0x1F;
 747   *(pc + 1) = 0x20;
 748   *(pc + 2) = 0x03;
 749   *(pc + 3) = 0xD5;
 750   ICache::invalidate_range(pc, 4);
 751 }
 752 
 753 bool is_nop(address pc) {
 754   if (*(pc + 0) != 0x1F) return false;
 755   if (*(pc + 1) != 0x20) return false;
 756   if (*(pc + 2) != 0x03) return false;
 757   if (*(pc + 3) != 0xD5) return false;
 758   return true;
 759 }
 760 
 761 void check_at(bool cond, address pc, const char* msg) {
 762   assert(cond, "%s: at PC " PTR_FORMAT ": %02x%02x%02x%02x%02x",
 763          msg, p2i(pc), *(pc + 0), *(pc + 1), *(pc + 2), *(pc + 3), *(pc + 4));
 764 }
 765 
 766 void ShenandoahBarrierSetAssembler::patch_branch_to_nop(address pc) {
 767   NativeInstruction* ni = nativeInstruction_at(pc);
 768   if (ni->is_jump()) {
 769     insert_nop(pc);
 770   } else {
 771     check_at(is_nop(pc), pc, "Should already be nop");
 772   }
 773 }
 774 
 775 void ShenandoahBarrierSetAssembler::patch_nop_to_branch(address pc, address stub_addr) {
 776   NativeInstruction* ni = nativeInstruction_at(pc);
 777   if (is_nop(pc)) {
 778     NativeJump::insert(pc, stub_addr);
 779   } else {
 780     check_at(ni->is_jump(), pc, "Should already be jump");
 781     check_at(nativeJump_at(pc)->jump_destination() == stub_addr, pc, "Jump should be to the same address");
 782   }



 783 }
 784 
 785 bool needs_acquiring_load_exclusive(const MachNode *n) {
 786   assert(n->is_CAS(true), "expecting a compare and swap");
 787   if (n->is_CAS(false)) {
 788     assert(n->has_trailing_membar(), "expected trailing membar");
 789   } else {
 790     return n->has_trailing_membar();
 791   }
 792 
 793   // so we can just return true here
 794   return true;
 795 }
 796 
 797 #undef __
 798 #define __ masm->
 799 
 800 void ShenandoahBarrierSetAssembler::compare_and_set_c2(const MachNode* node, MacroAssembler* masm, Register res, Register addr,
 801     Register oldval, Register newval, bool exchange, bool narrow, bool weak) {
 802   bool acquire = needs_acquiring_load_exclusive(node);

 925   } else {
 926     if (acquire) {
 927       __ ldar(dst, src.base());
 928     } else {
 929       __ ldr(dst, src);
 930     }
 931   }
 932 
 933   // Post-barrier: LRB / KA / weak-root processing.
 934   if (ShenandoahBarrierStubC2::needs_slow_barrier(node)) {
 935     ShenandoahBarrierStubC2* const stub = ShenandoahBarrierStubC2::create(node, dst, src, narrow, /* do_load: */ false, __ offset());
 936     char check = 0;
 937     check |= ShenandoahBarrierStubC2::needs_keep_alive_barrier(node)    ? ShenandoahHeap::MARKING : 0;
 938     check |= ShenandoahBarrierStubC2::needs_load_ref_barrier(node)      ? ShenandoahHeap::HAS_FORWARDED : 0;
 939     check |= ShenandoahBarrierStubC2::needs_load_ref_barrier_weak(node) ? ShenandoahHeap::WEAK_ROOTS : 0;
 940     stub->enter_if_gc_state(*masm, check);
 941   }
 942 }
 943 
 944 void ShenandoahBarrierSetAssembler::card_barrier_c2(const MachNode* node, MacroAssembler* masm, Address address) {
 945   if (ShenandoahSkipBarriers || (node->barrier_data() & ShenandoahBitCardMark) == 0) {
 946     return;
 947   }
 948 
 949   assert(CardTable::dirty_card_val() == 0, "must be");
 950   Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
 951 
 952   // rscratch1 = card table base (holder)
 953   Address curr_ct_holder_addr(rthread, in_bytes(ShenandoahThreadLocalData::card_table_offset()));
 954   __ ldr(rscratch1, curr_ct_holder_addr);
 955 
 956   // rscratch2 = addr
 957   __ lea(rscratch2, address);
 958 
 959   // rscratch2 = &card_table[ addr >> CardTable::card_shift() ]
 960   __ add(rscratch2, rscratch1, rscratch2, Assembler::LSR, CardTable::card_shift());
 961 
 962   if (UseCondCardMark) {
 963     Label L_already_dirty;
 964     __ ldrb(rscratch1, Address(rscratch2));
 965     __ cbz(rscratch1, L_already_dirty);

 992   PhaseOutput* const output = Compile::current()->output();
 993   if (output->in_scratch_emit_size()) {
 994     return;
 995   }
 996 
 997   // Assume that each trampoline is one single instruction and that the stubs
 998   // will follow immediatelly after the _code section. Therefore, we are
 999   // checking if the distance between the fastpath branch and the
1000   // trampoline/entry of the current Stub is less than 32K.
1001   const int code_size = output->buffer_sizing_data()->_code;
1002   const int trampoline_offset = trampoline_stubs_count() * NativeInstruction::instruction_size;
1003   _test_and_branch_reachable = aarch64_test_and_branch_reachable(_fastpath_branch_offset, code_size + trampoline_offset);
1004   if (_test_and_branch_reachable) {
1005     inc_trampoline_stubs_count();
1006   }
1007 }
1008 
1009 void ShenandoahBarrierStubC2::emit_code(MacroAssembler& masm) {
1010   // If we reach here with _skip_trampoline set it means that earlier we
1011   // emitted a trampoline to this stub and now we need to emit the actual stub.
1012   if (ShenandoahGCStateCheckHotpatch || _skip_trampoline) {
1013     emit_code_actual(masm);
1014   } else {
1015     _skip_trampoline = true;
1016 
1017     // The fastpath executes two branch instructions to reach this stub, let's
1018     // just emit the stub here and not add a third one.
1019     if (!_test_and_branch_reachable) {
1020       // By registering the stub again, after setting _skip_trampoline, we'll
1021       // effectivelly cause the stub to be emitted the next time ::emit_code is
1022       // called.
1023       ShenandoahBarrierStubC2::register_stub(this);
1024       return;
1025     }
1026 
1027     // This is entry point when coming from fastpath, IFF it's able to reach here
1028     // with a test and branch instruction, otherwise the entry is
1029     // ShenandoahBarrierStubC2::entry();
1030     const int target_offset = __ offset();
1031     __ bind(_test_and_branch_reachable_entry);
1032 

1124   if (selected_live) {
1125     pop_save_register(masm, _obj);
1126   }
1127 
1128   // Go back to fast path
1129   __ b(*continuation());
1130 }
1131 
1132 void ShenandoahBarrierStubC2::keepalive(MacroAssembler& masm, Register obj, Register tmp1, Register tmp2) {
1133   Address index(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
1134   Address buffer(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
1135   Label L_runtime;
1136   Label L_done;
1137 
1138   // The node doesn't even need keepalive barrier, just don't check anything else
1139   if (!_needs_keep_alive_barrier) {
1140     return ;
1141   }
1142 
1143   // If another barrier is enabled as well, do a runtime check for a specific barrier.
1144   // Hotpatched GC checks only care about idle/non-idle state, so needs a check anyhow.
1145   if (_needs_load_ref_barrier || ShenandoahGCStateCheckHotpatch) {
1146     Address gcs_addr(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
1147     __ ldrb(tmp1, gcs_addr);
1148     __ tbz(tmp1, ShenandoahHeap::MARKING_BITPOS, L_done);
1149   }
1150 
1151   // If buffer is full, call into runtime.
1152   __ ldr(tmp1, index);
1153   __ cbz(tmp1, L_runtime);
1154 
1155   // The buffer is not full, store value into it.
1156   __ sub(tmp1, tmp1, wordSize);
1157   __ str(tmp1, index);
1158   __ ldr(tmp2, buffer);
1159   __ str(obj, Address(tmp2, tmp1));
1160   __ b(L_done);
1161 
1162   // Runtime call
1163   __ bind(L_runtime);
1164 
1165   preserve(obj);

1175 
1176     // Restore the clobbered registers.
1177     if (clobbered_c_rarg0) {
1178       pop_save_register(masm, c_rarg0);
1179     }
1180   }
1181 
1182   __ bind(L_done);
1183 }
1184 
1185 void ShenandoahBarrierStubC2::lrb(MacroAssembler& masm, Register obj, Address addr, Register tmp) {
1186   Label L_done;
1187 
1188   // The node doesn't even need LRB barrier, just don't check anything else
1189   if (!_needs_load_ref_barrier) {
1190     return ;
1191   }
1192 
1193   if ((_node->barrier_data() & ShenandoahBitStrong) != 0) {
1194     // If another barrier is enabled as well, do a runtime check for a specific barrier.
1195     // Hotpatched GC checks only care about idle/non-idle state, so needs a check anyhow.
1196     if (_needs_keep_alive_barrier || ShenandoahGCStateCheckHotpatch) {
1197       char state_to_check = ShenandoahHeap::HAS_FORWARDED | (_needs_load_ref_weak_barrier ? ShenandoahHeap::WEAK_ROOTS : 0);
1198       int bit_to_check = ShenandoahThreadLocalData::gc_state_to_fast_bit(state_to_check);
1199       Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_offset()));
1200       __ ldrb(tmp, gc_state_fast);
1201       __ tbz(tmp, bit_to_check, L_done);
1202     }
1203 
1204     // Weak/phantom loads always need to go to runtime. For strong refs we
1205     // check if the object in cset, if they are not, then we are done with LRB.
1206     assert(ShenandoahHeapRegion::region_size_bytes_shift_jint() <= 63, "Maximum shift of the add is 63");
1207     __ mov(tmp, ShenandoahHeap::in_cset_fast_test_addr());
1208     __ add(tmp, tmp, obj, Assembler::LSR, ShenandoahHeapRegion::region_size_bytes_shift_jint());
1209     __ ldrb(tmp, Address(tmp, 0));
1210     __ cbz(tmp, L_done);
1211   }
1212 
1213   dont_preserve(obj);
1214   {
1215     // Shuffle in the arguments. The end result should be:
1216     //   c_rarg0 <-- obj
< prev index next >