< prev index next >

src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp

Print this page

 957   // tmp2 = effective address
 958   __ lea(tmp2, address);
 959 
 960   // tmp2 = &card_table[ addr >> CardTable::card_shift() ] ; card index
 961   __ add(tmp2, tmp1, tmp2, Assembler::LSR, CardTable::card_shift());
 962 
 963   if (UseCondCardMark) {
 964     Label L_already_dirty;
 965     __ ldrb(tmp1, Address(tmp2));
 966     __ cbz(tmp1, L_already_dirty);
 967     __ strb(zr, Address(tmp2));
 968     __ bind(L_already_dirty);
 969   } else {
 970     __ strb(zr, Address(tmp2));
 971   }
 972 }
 973 
 974 void ShenandoahBarrierStubC2::enter_if_gc_state(MacroAssembler& masm, const char test_state, Register tmp) {
 975   Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
 976   PhaseOutput* const output = Compile::current()->output();
 977   Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(test_state)));
 978 
 979   // We piggyback on scratch_emit_size mode to compute the slowpath stub size.
 980   // We'll use that information to decide whether we need a far jump to the
 981   // stub entry point or not. In scratch_emit_size mode we don't bind entry()
 982   // because otherwise it will be rebound when we later emit the instructions
 983   // for real.
 984   if (_needs_far_jump) {
 985     __ ldrb(tmp, gc_state_fast);
 986     __ cbz(tmp, *continuation());
 987     __ b(output->in_scratch_emit_size() ? *continuation() : *entry());
 988   } else {
 989     __ ldrb(tmp, gc_state_fast);
 990     __ cbnz(tmp, output->in_scratch_emit_size() ? *continuation() : *entry());







 991   }
 992 
 993   // This is were the slowpath stub will return to or the code above will
 994   // jump to if the checks are false
 995   __ bind(*continuation());
 996 }
 997 




















































 998 void ShenandoahBarrierStubC2::emit_code(MacroAssembler& masm) {
 999   Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
1000   assert(_needs_keep_alive_barrier || _needs_load_ref_barrier, "Why are you here?");
1001   PhaseOutput* const output = Compile::current()->output();
1002 
1003   // We piggyback on scratch_emit_size mode to compute the slowpath stub size.
1004   // We'll use that information to decide whether we need a far jump to the
1005   // stub entry point or not. In scratch_emit_size mode we don't bind entry()
1006   // because otherwise it will be rebound when we later emit the instructions
1007   // for real.
1008   if (!output->in_scratch_emit_size()) {
1009     __ bind(*entry());
1010   }
1011 
1012   // If we need to load ourselves, do it here.
1013   if (_do_load) {
1014     if (_narrow) {
1015       __ ldrw(_obj, _addr);
1016     } else {
1017       __ ldr(_obj, _addr);
1018     }
1019   }
1020 
1021   // If the object is null, there is no point in applying barriers.
1022   maybe_far_jump_if_zero(masm, _obj);
1023 
1024   // We need to make sure that loads done by callers survive across slow-path calls.
1025   // For self-loads, we need to care about the case when both KA and LRB are enabled (rare).
1026   bool needs_both_barriers = _needs_keep_alive_barrier && _needs_load_ref_barrier;
1027   if (!_do_load || needs_both_barriers) {
1028     preserve(_obj);
1029   }
1030 
1031   // Go for barriers. Barriers can return straight to continuation, as long
1032   // as another barrier is not needed and we can reach the fastpath.
1033   if (needs_both_barriers) {
1034     // The Load match rule in the .ad file may have legitimized the load
1035     // address using a TEMP register and in that case we need to explicitly
1036     // preserve them here, because the RA does not consider TEMP as live-in,
1037     // and the KA runtime call may clobber them and cause a crash on the
1038     // subsequent LRB stub.
1039     if (_addr.base() != noreg) {
1040       preserve(_addr.base());
1041     }
1042     if (_addr.index() != noreg) {
1043       preserve(_addr.index());
1044     }
1045     keepalive(masm, nullptr);
1046     lrb(masm);
1047   } else if (_needs_keep_alive_barrier) {
1048     keepalive(masm, continuation());
1049   } else if (_needs_load_ref_barrier) {
1050     lrb(masm);
1051   } else {
1052     ShouldNotReachHere();
1053   }
1054 }
1055 
1056 void ShenandoahBarrierStubC2::maybe_far_jump_if_zero(MacroAssembler& masm, Register reg) {

1057   if (_needs_far_jump) {
1058     Label L_short_jump;
1059     __ cbnz(reg, L_short_jump);
1060     __ b(*continuation());
1061     __ bind(L_short_jump);
1062   } else {
1063     __ cbz(reg, *continuation());
1064   }
1065 }
1066 
1067 void ShenandoahBarrierStubC2::keepalive(MacroAssembler& masm, Label* L_done) {
1068   Address gcstate(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::MARKING)));
1069   Address index(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
1070   Address buffer(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
1071   Label L_through, L_slowpath;
1072 
1073   // If another barrier is enabled as well, do a runtime check for a specific barrier.
1074   if (_needs_load_ref_barrier) {
1075     assert(L_done == nullptr, "L_done is always null when _needs_load_ref_barrier is true");
1076     __ ldrb(_tmp1, gcstate);


1077     __ cbz(_tmp1, L_through);
1078   }
1079 
1080   // Fast-path: put object into buffer.
1081   // If buffer is already full, go slow.
1082   __ ldr(_tmp1, index);
1083   __ cbz(_tmp1, L_slowpath);
1084   __ sub(_tmp1, _tmp1, wordSize);
1085   __ str(_tmp1, index);
1086   __ ldr(_tmp2, buffer);
1087 
1088   // Store the object in queue.
1089   // If object is narrow, we need to decode it before inserting.
1090   if (_narrow) {
1091     __ add(_tmp2, _tmp2, _tmp1);
1092     __ decode_heap_oop_not_null(_tmp1, _obj);
1093     __ str(_tmp1, Address(_tmp2));
1094   } else {
1095     // Buffer is 64-bit address, must be in base register.
1096     __ str(_obj, Address(_tmp2, _tmp1));

1107   __ bind(L_slowpath);
1108 
1109   {
1110     SaveLiveRegisters slr(&masm, this);
1111 
1112     // Go to runtime and handle the rest there.
1113     __ mov(c_rarg0, _obj);
1114     __ lea(lr, RuntimeAddress(keepalive_runtime_entry_addr()));
1115     __ blr(lr);
1116   }
1117   if (L_done != nullptr) {
1118     __ b(*L_done);
1119   } else {
1120     __ bind(L_through);
1121   }
1122 }
1123 
1124 void ShenandoahBarrierStubC2::lrb(MacroAssembler& masm) {
1125   Label L_slow;
1126 
1127   // If another barrier is enabled as well, do a runtime check for a specific barrier.
1128   if (_needs_keep_alive_barrier) {
1129     char state_to_check = ShenandoahHeap::HAS_FORWARDED | (_needs_load_ref_weak_barrier ? ShenandoahHeap::WEAK_ROOTS : 0);
1130     Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(state_to_check)));
1131     __ ldrb(_tmp1, gc_state_fast);
1132     maybe_far_jump_if_zero(masm, _tmp1);
1133   }
1134 
1135   // If weak references are being processed, weak/phantom loads need to go slow,
1136   // regardless of their cset status.
1137   if (_needs_load_ref_weak_barrier) {
1138     Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::WEAK_ROOTS)));
1139     __ ldrb(_tmp1, gc_state_fast);
1140     __ cbnz(_tmp1, L_slow);
1141   }
1142 
1143   // Cset-check. Fall-through to slow if in collection set.
1144   bool is_aot = AOTCodeCache::is_on_for_dump();
1145   if (!is_aot) {
1146     __ mov(_tmp1, ShenandoahHeap::in_cset_fast_test_addr());
1147     if (_narrow) {
1148       __ decode_heap_oop_not_null(_tmp2, _obj);
1149       __ add(_tmp1, _tmp1, _tmp2, Assembler::LSR, ShenandoahHeapRegion::region_size_bytes_shift_jint());
1150     } else {
1151       __ add(_tmp1, _tmp1, _obj, Assembler::LSR, ShenandoahHeapRegion::region_size_bytes_shift_jint());
1152     }
1153   } else {
1154     // Generating AOT code, pull the cset bitmap and region shift from AOT table.
1155     if (_narrow) {
1156       __ decode_heap_oop_not_null(_tmp1, _obj);
1157     } else {
1158       __ mov(_tmp1, _obj);
1159     }
1160     __ lea(_tmp2, ExternalAddress(AOTRuntimeConstants::grain_shift_address()));
1161     __ ldrw(_tmp2, Address(_tmp2));
1162     __ lsrv(_tmp2, _tmp1, _tmp2);
1163     __ lea(_tmp1, ExternalAddress(AOTRuntimeConstants::cset_base_address()));
1164     __ ldr(_tmp1, Address(_tmp1));
1165     __ add(_tmp1, _tmp1, _tmp2);
1166   }
1167   __ ldrb(_tmp1, Address(_tmp1, 0));
1168   maybe_far_jump_if_zero(masm, _tmp1);
1169 
1170   // Slow path
1171   __ bind(L_slow);
1172 
1173   // Obj is the result, need to temporarily stop preserving it.
1174   bool is_obj_preserved = is_preserved(_obj);
1175   if (is_obj_preserved) {
1176     dont_preserve(_obj);
1177   }
1178   {
1179     SaveLiveRegisters slr(&masm, this);
1180 
1181     // Shuffle in the arguments. The end result should be:
1182     //   c_rarg0 <-- obj
1183     //   c_rarg1 <-- lea(addr)
1184     if (c_rarg0 == _obj) {
1185       __ lea(c_rarg1, _addr);
1186     } else if (c_rarg1 == _obj) {
1187       // Set up arguments in reverse, and then flip them
1188       __ lea(c_rarg0, _addr);

 957   // tmp2 = effective address
 958   __ lea(tmp2, address);
 959 
 960   // tmp2 = &card_table[ addr >> CardTable::card_shift() ] ; card index
 961   __ add(tmp2, tmp1, tmp2, Assembler::LSR, CardTable::card_shift());
 962 
 963   if (UseCondCardMark) {
 964     Label L_already_dirty;
 965     __ ldrb(tmp1, Address(tmp2));
 966     __ cbz(tmp1, L_already_dirty);
 967     __ strb(zr, Address(tmp2));
 968     __ bind(L_already_dirty);
 969   } else {
 970     __ strb(zr, Address(tmp2));
 971   }
 972 }
 973 
 974 void ShenandoahBarrierStubC2::enter_if_gc_state(MacroAssembler& masm, const char test_state, Register tmp) {
 975   Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
 976   PhaseOutput* const output = Compile::current()->output();

 977 
 978   // Emit the unconditional branch in the first version of the method.
 979   // Let the rest of runtime figure out how to manage it.
 980   if (output->in_scratch_emit_size()) {
 981     // We piggyback on scratch_emit_size mode to compute the slowpath stub size.
 982     // We'll use that information to decide whether we need a far jump to the
 983     // stub entry point or not. In scratch_emit_size mode we don't bind entry()
 984     // because otherwise it will be rebound when we later emit the instructions
 985     // for real.
 986     __ nop();
 987   } else {
 988     __ relocate(barrier_Relocation::spec(), ShenandoahThreadLocalData::gc_state_to_fast_array_index(test_state));
 989     __ b(*entry());
 990 
 991 // #ifdef ASSERT
 992 //     Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(test_state)));
 993 //     __ ldrb(rscratch1, gc_state_fast);
 994 //     __ cbz(rscratch1, *continuation());
 995 //     __ hlt(0); // Correctness bug: barrier is NOP-ed, but heap is NOT IDLE
 996 // #endif
 997   }
 998 
 999   // This is were the slowpath stub will return to or the code above will
1000   // jump to if the checks are false
1001   __ bind(*continuation());
1002 }
1003 
1004 address ShenandoahBarrierSetAssembler::parse_stub_address(address pc) {
1005   NativeInstruction* ni = nativeInstruction_at(pc);
1006   assert(ni->is_jump(), "Initial code version: GC barrier fastpath must be a jump");
1007   NativeJump* jmp = nativeJump_at(pc);
1008   return jmp->jump_destination();
1009 }
1010 
1011 void insert_nop(address pc) {
1012   *(pc + 0) = 0x1F;
1013   *(pc + 1) = 0x20;
1014   *(pc + 2) = 0x03;
1015   *(pc + 3) = 0xD5;
1016   ICache::invalidate_range(pc, 4);
1017 }
1018 
1019 bool is_nop(address pc) {
1020   if (*(pc + 0) != 0x1F) return false;
1021   if (*(pc + 1) != 0x20) return false;
1022   if (*(pc + 2) != 0x03) return false;
1023   if (*(pc + 3) != 0xD5) return false;
1024   return true;
1025 }
1026 
1027 void check_at(bool cond, address pc, const char* msg) {
1028   assert(cond, "%s: at PC " PTR_FORMAT ": %02x%02x%02x%02x%02x",
1029          msg, p2i(pc), *(pc + 0), *(pc + 1), *(pc + 2), *(pc + 3), *(pc + 4));
1030 }
1031 
1032 bool ShenandoahBarrierSetAssembler::is_active(address pc) {
1033   NativeInstruction* ni = nativeInstruction_at(pc);
1034   return ni->is_jump();
1035 }
1036 
1037 void ShenandoahBarrierSetAssembler::patch_branch_to_nop(address pc) {
1038   NativeInstruction* ni = nativeInstruction_at(pc);
1039   if (ni->is_jump()) {
1040     insert_nop(pc);
1041   } else {
1042     check_at(is_nop(pc), pc, "Should already be nop");
1043   }
1044 }
1045 
1046 void ShenandoahBarrierSetAssembler::patch_nop_to_branch(address pc, address stub_addr) {
1047   NativeInstruction* ni = nativeInstruction_at(pc);
1048   if (is_nop(pc)) {
1049     NativeJump::insert(pc, stub_addr);
1050   } else {
1051     check_at(ni->is_jump(), pc, "Should already be jump");
1052     check_at(nativeJump_at(pc)->jump_destination() == stub_addr, pc, "Jump should be to the same address");
1053   }
1054 }
1055 
1056 void ShenandoahBarrierStubC2::emit_code(MacroAssembler& masm) {
1057   Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
1058   assert(_needs_keep_alive_barrier || _needs_load_ref_barrier, "Why are you here?");
1059   PhaseOutput* const output = Compile::current()->output();
1060 
1061   // We piggyback on scratch_emit_size mode to compute the slowpath stub size.
1062   // We'll use that information to decide whether we need a far jump to the
1063   // stub entry point or not. In scratch_emit_size mode we don't bind entry()
1064   // because otherwise it will be rebound when we later emit the instructions
1065   // for real.
1066   if (!output->in_scratch_emit_size()) {
1067     __ bind(*entry());
1068   }
1069 
1070   // If we need to load ourselves, do it here.
1071   if (_do_load) {
1072     if (_narrow) {
1073       __ ldrw(_obj, _addr);
1074     } else {
1075       __ ldr(_obj, _addr);
1076     }
1077   }
1078 
1079   // If the object is null, there is no point in applying barriers.
1080   maybe_far_jump_if_zero(masm, _obj, continuation());
1081 
1082   // We need to make sure that loads done by callers survive across slow-path calls.
1083   // For self-loads, we need to care about the case when both KA and LRB are enabled (rare).
1084   bool needs_both_barriers = _needs_keep_alive_barrier && _needs_load_ref_barrier;
1085   if (!_do_load || needs_both_barriers) {
1086     preserve(_obj);
1087   }
1088 
1089   // Go for barriers. Barriers can return straight to continuation, as long
1090   // as another barrier is not needed and we can reach the fastpath.
1091   if (needs_both_barriers) {
1092     // The Load match rule in the .ad file may have legitimized the load
1093     // address using a TEMP register and in that case we need to explicitly
1094     // preserve them here, because the RA does not consider TEMP as live-in,
1095     // and the KA runtime call may clobber them and cause a crash on the
1096     // subsequent LRB stub.
1097     if (_addr.base() != noreg) {
1098       preserve(_addr.base());
1099     }
1100     if (_addr.index() != noreg) {
1101       preserve(_addr.index());
1102     }
1103     keepalive(masm, nullptr);
1104     lrb(masm);
1105   } else if (_needs_keep_alive_barrier) {
1106     keepalive(masm, continuation());
1107   } else if (_needs_load_ref_barrier) {
1108     lrb(masm);
1109   } else {
1110     ShouldNotReachHere();
1111   }
1112 }
1113 
1114 void ShenandoahBarrierStubC2::maybe_far_jump_if_zero(MacroAssembler& masm, Register reg, Label* L_target) {
1115   assert(L_target == continuation(), "Should be");
1116   if (_needs_far_jump) {
1117     Label L_short_jump;
1118     __ cbnz(reg, L_short_jump);
1119     __ b(*L_target);
1120     __ bind(L_short_jump);
1121   } else {
1122     __ cbz(reg, *L_target);
1123   }
1124 }
1125 
1126 void ShenandoahBarrierStubC2::keepalive(MacroAssembler& masm, Label* L_done) {
1127   Address gcstate(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::MARKING)));
1128   Address index(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
1129   Address buffer(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
1130   Label L_through, L_slowpath;
1131 
1132   // Hotpatched GC checks are racy: we can turn off GC state before we patch the barriers.
1133   // Therefore, alas we need a separate check here. TODO: Figure this out.
1134   __ ldrb(_tmp1, gcstate);
1135   if (L_done != nullptr) {
1136     maybe_far_jump_if_zero(masm, _tmp1, L_done);
1137   } else {
1138     __ cbz(_tmp1, L_through);
1139   }
1140 
1141   // Fast-path: put object into buffer.
1142   // If buffer is already full, go slow.
1143   __ ldr(_tmp1, index);
1144   __ cbz(_tmp1, L_slowpath);
1145   __ sub(_tmp1, _tmp1, wordSize);
1146   __ str(_tmp1, index);
1147   __ ldr(_tmp2, buffer);
1148 
1149   // Store the object in queue.
1150   // If object is narrow, we need to decode it before inserting.
1151   if (_narrow) {
1152     __ add(_tmp2, _tmp2, _tmp1);
1153     __ decode_heap_oop_not_null(_tmp1, _obj);
1154     __ str(_tmp1, Address(_tmp2));
1155   } else {
1156     // Buffer is 64-bit address, must be in base register.
1157     __ str(_obj, Address(_tmp2, _tmp1));

1168   __ bind(L_slowpath);
1169 
1170   {
1171     SaveLiveRegisters slr(&masm, this);
1172 
1173     // Go to runtime and handle the rest there.
1174     __ mov(c_rarg0, _obj);
1175     __ lea(lr, RuntimeAddress(keepalive_runtime_entry_addr()));
1176     __ blr(lr);
1177   }
1178   if (L_done != nullptr) {
1179     __ b(*L_done);
1180   } else {
1181     __ bind(L_through);
1182   }
1183 }
1184 
1185 void ShenandoahBarrierStubC2::lrb(MacroAssembler& masm) {
1186   Label L_slow;
1187 
1188   // Hotpatched GC checks are racy: we can turn off GC state before we patch the barriers.
1189   // Therefore, alas we need a separate check here. TODO: Figure this out.
1190   char state_to_check = ShenandoahHeap::HAS_FORWARDED | (_needs_load_ref_weak_barrier ? ShenandoahHeap::WEAK_ROOTS : 0);
1191   Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(state_to_check)));
1192   __ ldrb(_tmp1, gc_state_fast);
1193   maybe_far_jump_if_zero(masm, _tmp1, continuation());

1194 
1195   // If weak references are being processed, weak/phantom loads need to go slow,
1196   // regardless of their cset status.
1197   if (_needs_load_ref_weak_barrier) {
1198     Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::WEAK_ROOTS)));
1199     __ ldrb(_tmp1, gc_state_fast);
1200     __ cbnz(_tmp1, L_slow);
1201   }
1202 
1203   // Cset-check. Fall-through to slow if in collection set.
1204   bool is_aot = AOTCodeCache::is_on_for_dump();
1205   if (!is_aot) {
1206     __ mov(_tmp1, ShenandoahHeap::in_cset_fast_test_addr());
1207     if (_narrow) {
1208       __ decode_heap_oop_not_null(_tmp2, _obj);
1209       __ add(_tmp1, _tmp1, _tmp2, Assembler::LSR, ShenandoahHeapRegion::region_size_bytes_shift_jint());
1210     } else {
1211       __ add(_tmp1, _tmp1, _obj, Assembler::LSR, ShenandoahHeapRegion::region_size_bytes_shift_jint());
1212     }
1213   } else {
1214     // Generating AOT code, pull the cset bitmap and region shift from AOT table.
1215     if (_narrow) {
1216       __ decode_heap_oop_not_null(_tmp1, _obj);
1217     } else {
1218       __ mov(_tmp1, _obj);
1219     }
1220     __ lea(_tmp2, ExternalAddress(AOTRuntimeConstants::grain_shift_address()));
1221     __ ldrw(_tmp2, Address(_tmp2));
1222     __ lsrv(_tmp2, _tmp1, _tmp2);
1223     __ lea(_tmp1, ExternalAddress(AOTRuntimeConstants::cset_base_address()));
1224     __ ldr(_tmp1, Address(_tmp1));
1225     __ add(_tmp1, _tmp1, _tmp2);
1226   }
1227   __ ldrb(_tmp1, Address(_tmp1, 0));
1228   maybe_far_jump_if_zero(masm, _tmp1, continuation());
1229 
1230   // Slow path
1231   __ bind(L_slow);
1232 
1233   // Obj is the result, need to temporarily stop preserving it.
1234   bool is_obj_preserved = is_preserved(_obj);
1235   if (is_obj_preserved) {
1236     dont_preserve(_obj);
1237   }
1238   {
1239     SaveLiveRegisters slr(&masm, this);
1240 
1241     // Shuffle in the arguments. The end result should be:
1242     //   c_rarg0 <-- obj
1243     //   c_rarg1 <-- lea(addr)
1244     if (c_rarg0 == _obj) {
1245       __ lea(c_rarg1, _addr);
1246     } else if (c_rarg1 == _obj) {
1247       // Set up arguments in reverse, and then flip them
1248       __ lea(c_rarg0, _addr);
< prev index next >