957 // tmp2 = effective address
958 __ lea(tmp2, address);
959
960 // tmp2 = &card_table[ addr >> CardTable::card_shift() ] ; card index
961 __ add(tmp2, tmp1, tmp2, Assembler::LSR, CardTable::card_shift());
962
963 if (UseCondCardMark) {
964 Label L_already_dirty;
965 __ ldrb(tmp1, Address(tmp2));
966 __ cbz(tmp1, L_already_dirty);
967 __ strb(zr, Address(tmp2));
968 __ bind(L_already_dirty);
969 } else {
970 __ strb(zr, Address(tmp2));
971 }
972 }
973
974 void ShenandoahBarrierStubC2::enter_if_gc_state(MacroAssembler& masm, const char test_state, Register tmp) {
975 Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
976 PhaseOutput* const output = Compile::current()->output();
977 Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(test_state)));
978
979 // We piggyback on scratch_emit_size mode to compute the slowpath stub size.
980 // We'll use that information to decide whether we need a far jump to the
981 // stub entry point or not. In scratch_emit_size mode we don't bind entry()
982 // because otherwise it will be rebound when we later emit the instructions
983 // for real.
984 if (_needs_far_jump) {
985 __ ldrb(tmp, gc_state_fast);
986 __ cbz(tmp, *continuation());
987 __ b(output->in_scratch_emit_size() ? *continuation() : *entry());
988 } else {
989 __ ldrb(tmp, gc_state_fast);
990 __ cbnz(tmp, output->in_scratch_emit_size() ? *continuation() : *entry());
991 }
992
993 // This is were the slowpath stub will return to or the code above will
994 // jump to if the checks are false
995 __ bind(*continuation());
996 }
997
998 void ShenandoahBarrierStubC2::emit_code(MacroAssembler& masm) {
999 Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
1000 assert(_needs_keep_alive_barrier || _needs_load_ref_barrier, "Why are you here?");
1001 PhaseOutput* const output = Compile::current()->output();
1002
1003 // We piggyback on scratch_emit_size mode to compute the slowpath stub size.
1004 // We'll use that information to decide whether we need a far jump to the
1005 // stub entry point or not. In scratch_emit_size mode we don't bind entry()
1006 // because otherwise it will be rebound when we later emit the instructions
1007 // for real.
1008 if (!output->in_scratch_emit_size()) {
1009 __ bind(*entry());
1010 }
1011
1012 // If we need to load ourselves, do it here.
1013 if (_do_load) {
1014 if (_narrow) {
1015 __ ldrw(_obj, _addr);
1016 } else {
1017 __ ldr(_obj, _addr);
1018 }
1019 }
1020
1021 // If the object is null, there is no point in applying barriers.
1022 maybe_far_jump_if_zero(masm, _obj);
1023
1024 // We need to make sure that loads done by callers survive across slow-path calls.
1025 // For self-loads, we need to care about the case when both KA and LRB are enabled (rare).
1026 bool needs_both_barriers = _needs_keep_alive_barrier && _needs_load_ref_barrier;
1027 if (!_do_load || needs_both_barriers) {
1028 preserve(_obj);
1029 }
1030
1031 // Go for barriers. Barriers can return straight to continuation, as long
1032 // as another barrier is not needed and we can reach the fastpath.
1033 if (needs_both_barriers) {
1034 // The Load match rule in the .ad file may have legitimized the load
1035 // address using a TEMP register and in that case we need to explicitly
1036 // preserve them here, because the RA does not consider TEMP as live-in,
1037 // and the KA runtime call may clobber them and cause a crash on the
1038 // subsequent LRB stub.
1039 if (_addr.base() != noreg) {
1040 preserve(_addr.base());
1041 }
1042 if (_addr.index() != noreg) {
1043 preserve(_addr.index());
1044 }
1045 keepalive(masm, nullptr);
1046 lrb(masm);
1047 } else if (_needs_keep_alive_barrier) {
1048 keepalive(masm, continuation());
1049 } else if (_needs_load_ref_barrier) {
1050 lrb(masm);
1051 } else {
1052 ShouldNotReachHere();
1053 }
1054 }
1055
1056 void ShenandoahBarrierStubC2::maybe_far_jump_if_zero(MacroAssembler& masm, Register reg) {
1057 if (_needs_far_jump) {
1058 Label L_short_jump;
1059 __ cbnz(reg, L_short_jump);
1060 __ b(*continuation());
1061 __ bind(L_short_jump);
1062 } else {
1063 __ cbz(reg, *continuation());
1064 }
1065 }
1066
1067 void ShenandoahBarrierStubC2::keepalive(MacroAssembler& masm, Label* L_done) {
1068 Address gcstate(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::MARKING)));
1069 Address index(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
1070 Address buffer(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
1071 Label L_through, L_slowpath;
1072
1073 // If another barrier is enabled as well, do a runtime check for a specific barrier.
1074 if (_needs_load_ref_barrier) {
1075 assert(L_done == nullptr, "L_done is always null when _needs_load_ref_barrier is true");
1076 __ ldrb(_tmp1, gcstate);
1077 __ cbz(_tmp1, L_through);
1078 }
1079
1080 // Fast-path: put object into buffer.
1081 // If buffer is already full, go slow.
1082 __ ldr(_tmp1, index);
1083 __ cbz(_tmp1, L_slowpath);
1084 __ sub(_tmp1, _tmp1, wordSize);
1085 __ str(_tmp1, index);
1086 __ ldr(_tmp2, buffer);
1087
1088 // Store the object in queue.
1089 // If object is narrow, we need to decode it before inserting.
1090 if (_narrow) {
1091 __ add(_tmp2, _tmp2, _tmp1);
1092 __ decode_heap_oop_not_null(_tmp1, _obj);
1093 __ str(_tmp1, Address(_tmp2));
1094 } else {
1095 // Buffer is 64-bit address, must be in base register.
1096 __ str(_obj, Address(_tmp2, _tmp1));
1107 __ bind(L_slowpath);
1108
1109 {
1110 SaveLiveRegisters slr(&masm, this);
1111
1112 // Go to runtime and handle the rest there.
1113 __ mov(c_rarg0, _obj);
1114 __ lea(lr, RuntimeAddress(keepalive_runtime_entry_addr()));
1115 __ blr(lr);
1116 }
1117 if (L_done != nullptr) {
1118 __ b(*L_done);
1119 } else {
1120 __ bind(L_through);
1121 }
1122 }
1123
1124 void ShenandoahBarrierStubC2::lrb(MacroAssembler& masm) {
1125 Label L_slow;
1126
1127 // If another barrier is enabled as well, do a runtime check for a specific barrier.
1128 if (_needs_keep_alive_barrier) {
1129 char state_to_check = ShenandoahHeap::HAS_FORWARDED | (_needs_load_ref_weak_barrier ? ShenandoahHeap::WEAK_ROOTS : 0);
1130 Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(state_to_check)));
1131 __ ldrb(_tmp1, gc_state_fast);
1132 maybe_far_jump_if_zero(masm, _tmp1);
1133 }
1134
1135 // If weak references are being processed, weak/phantom loads need to go slow,
1136 // regardless of their cset status.
1137 if (_needs_load_ref_weak_barrier) {
1138 Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::WEAK_ROOTS)));
1139 __ ldrb(_tmp1, gc_state_fast);
1140 __ cbnz(_tmp1, L_slow);
1141 }
1142
1143 // Cset-check. Fall-through to slow if in collection set.
1144 bool is_aot = AOTCodeCache::is_on_for_dump();
1145 if (!is_aot) {
1146 __ mov(_tmp1, ShenandoahHeap::in_cset_fast_test_addr());
1147 if (_narrow) {
1148 __ decode_heap_oop_not_null(_tmp2, _obj);
1149 __ add(_tmp1, _tmp1, _tmp2, Assembler::LSR, ShenandoahHeapRegion::region_size_bytes_shift_jint());
1150 } else {
1151 __ add(_tmp1, _tmp1, _obj, Assembler::LSR, ShenandoahHeapRegion::region_size_bytes_shift_jint());
1152 }
1153 } else {
1154 // Generating AOT code, pull the cset bitmap and region shift from AOT table.
1155 if (_narrow) {
1156 __ decode_heap_oop_not_null(_tmp1, _obj);
1157 } else {
1158 __ mov(_tmp1, _obj);
1159 }
1160 __ lea(_tmp2, ExternalAddress(AOTRuntimeConstants::grain_shift_address()));
1161 __ ldrw(_tmp2, Address(_tmp2));
1162 __ lsrv(_tmp2, _tmp1, _tmp2);
1163 __ lea(_tmp1, ExternalAddress(AOTRuntimeConstants::cset_base_address()));
1164 __ ldr(_tmp1, Address(_tmp1));
1165 __ add(_tmp1, _tmp1, _tmp2);
1166 }
1167 __ ldrb(_tmp1, Address(_tmp1, 0));
1168 maybe_far_jump_if_zero(masm, _tmp1);
1169
1170 // Slow path
1171 __ bind(L_slow);
1172
1173 // Obj is the result, need to temporarily stop preserving it.
1174 bool is_obj_preserved = is_preserved(_obj);
1175 if (is_obj_preserved) {
1176 dont_preserve(_obj);
1177 }
1178 {
1179 SaveLiveRegisters slr(&masm, this);
1180
1181 // Shuffle in the arguments. The end result should be:
1182 // c_rarg0 <-- obj
1183 // c_rarg1 <-- lea(addr)
1184 if (c_rarg0 == _obj) {
1185 __ lea(c_rarg1, _addr);
1186 } else if (c_rarg1 == _obj) {
1187 // Set up arguments in reverse, and then flip them
1188 __ lea(c_rarg0, _addr);
|
957 // tmp2 = effective address
958 __ lea(tmp2, address);
959
960 // tmp2 = &card_table[ addr >> CardTable::card_shift() ] ; card index
961 __ add(tmp2, tmp1, tmp2, Assembler::LSR, CardTable::card_shift());
962
963 if (UseCondCardMark) {
964 Label L_already_dirty;
965 __ ldrb(tmp1, Address(tmp2));
966 __ cbz(tmp1, L_already_dirty);
967 __ strb(zr, Address(tmp2));
968 __ bind(L_already_dirty);
969 } else {
970 __ strb(zr, Address(tmp2));
971 }
972 }
973
974 void ShenandoahBarrierStubC2::enter_if_gc_state(MacroAssembler& masm, const char test_state, Register tmp) {
975 Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
976 PhaseOutput* const output = Compile::current()->output();
977
978 // Emit the unconditional branch in the first version of the method.
979 // Let the rest of runtime figure out how to manage it.
980 if (output->in_scratch_emit_size()) {
981 // We piggyback on scratch_emit_size mode to compute the slowpath stub size.
982 // We'll use that information to decide whether we need a far jump to the
983 // stub entry point or not. In scratch_emit_size mode we don't bind entry()
984 // because otherwise it will be rebound when we later emit the instructions
985 // for real.
986 __ nop();
987 } else {
988 __ relocate(barrier_Relocation::spec(), ShenandoahThreadLocalData::gc_state_to_fast_array_index(test_state));
989 __ b(*entry());
990
991 // #ifdef ASSERT
992 // Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(test_state)));
993 // __ ldrb(rscratch1, gc_state_fast);
994 // __ cbz(rscratch1, *continuation());
995 // __ hlt(0); // Correctness bug: barrier is NOP-ed, but heap is NOT IDLE
996 // #endif
997 }
998
999 // This is were the slowpath stub will return to or the code above will
1000 // jump to if the checks are false
1001 __ bind(*continuation());
1002 }
1003
1004 address ShenandoahBarrierSetAssembler::parse_stub_address(address pc) {
1005 NativeInstruction* ni = nativeInstruction_at(pc);
1006 assert(ni->is_jump(), "Initial code version: GC barrier fastpath must be a jump");
1007 NativeJump* jmp = nativeJump_at(pc);
1008 return jmp->jump_destination();
1009 }
1010
1011 void insert_nop(address pc) {
1012 *(pc + 0) = 0x1F;
1013 *(pc + 1) = 0x20;
1014 *(pc + 2) = 0x03;
1015 *(pc + 3) = 0xD5;
1016 ICache::invalidate_range(pc, 4);
1017 }
1018
1019 bool is_nop(address pc) {
1020 if (*(pc + 0) != 0x1F) return false;
1021 if (*(pc + 1) != 0x20) return false;
1022 if (*(pc + 2) != 0x03) return false;
1023 if (*(pc + 3) != 0xD5) return false;
1024 return true;
1025 }
1026
1027 void check_at(bool cond, address pc, const char* msg) {
1028 assert(cond, "%s: at PC " PTR_FORMAT ": %02x%02x%02x%02x%02x",
1029 msg, p2i(pc), *(pc + 0), *(pc + 1), *(pc + 2), *(pc + 3), *(pc + 4));
1030 }
1031
1032 bool ShenandoahBarrierSetAssembler::is_active(address pc) {
1033 NativeInstruction* ni = nativeInstruction_at(pc);
1034 return ni->is_jump();
1035 }
1036
1037 void ShenandoahBarrierSetAssembler::patch_branch_to_nop(address pc) {
1038 NativeInstruction* ni = nativeInstruction_at(pc);
1039 if (ni->is_jump()) {
1040 insert_nop(pc);
1041 } else {
1042 check_at(is_nop(pc), pc, "Should already be nop");
1043 }
1044 }
1045
1046 void ShenandoahBarrierSetAssembler::patch_nop_to_branch(address pc, address stub_addr) {
1047 NativeInstruction* ni = nativeInstruction_at(pc);
1048 if (is_nop(pc)) {
1049 NativeJump::insert(pc, stub_addr);
1050 } else {
1051 check_at(ni->is_jump(), pc, "Should already be jump");
1052 check_at(nativeJump_at(pc)->jump_destination() == stub_addr, pc, "Jump should be to the same address");
1053 }
1054 }
1055
1056 void ShenandoahBarrierStubC2::emit_code(MacroAssembler& masm) {
1057 Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
1058 assert(_needs_keep_alive_barrier || _needs_load_ref_barrier, "Why are you here?");
1059 PhaseOutput* const output = Compile::current()->output();
1060
1061 // We piggyback on scratch_emit_size mode to compute the slowpath stub size.
1062 // We'll use that information to decide whether we need a far jump to the
1063 // stub entry point or not. In scratch_emit_size mode we don't bind entry()
1064 // because otherwise it will be rebound when we later emit the instructions
1065 // for real.
1066 if (!output->in_scratch_emit_size()) {
1067 __ bind(*entry());
1068 }
1069
1070 // If we need to load ourselves, do it here.
1071 if (_do_load) {
1072 if (_narrow) {
1073 __ ldrw(_obj, _addr);
1074 } else {
1075 __ ldr(_obj, _addr);
1076 }
1077 }
1078
1079 // If the object is null, there is no point in applying barriers.
1080 maybe_far_jump_if_zero(masm, _obj, continuation());
1081
1082 // We need to make sure that loads done by callers survive across slow-path calls.
1083 // For self-loads, we need to care about the case when both KA and LRB are enabled (rare).
1084 bool needs_both_barriers = _needs_keep_alive_barrier && _needs_load_ref_barrier;
1085 if (!_do_load || needs_both_barriers) {
1086 preserve(_obj);
1087 }
1088
1089 // Go for barriers. Barriers can return straight to continuation, as long
1090 // as another barrier is not needed and we can reach the fastpath.
1091 if (needs_both_barriers) {
1092 // The Load match rule in the .ad file may have legitimized the load
1093 // address using a TEMP register and in that case we need to explicitly
1094 // preserve them here, because the RA does not consider TEMP as live-in,
1095 // and the KA runtime call may clobber them and cause a crash on the
1096 // subsequent LRB stub.
1097 if (_addr.base() != noreg) {
1098 preserve(_addr.base());
1099 }
1100 if (_addr.index() != noreg) {
1101 preserve(_addr.index());
1102 }
1103 keepalive(masm, nullptr);
1104 lrb(masm);
1105 } else if (_needs_keep_alive_barrier) {
1106 keepalive(masm, continuation());
1107 } else if (_needs_load_ref_barrier) {
1108 lrb(masm);
1109 } else {
1110 ShouldNotReachHere();
1111 }
1112 }
1113
1114 void ShenandoahBarrierStubC2::maybe_far_jump_if_zero(MacroAssembler& masm, Register reg, Label* L_target) {
1115 assert(L_target == continuation(), "Should be");
1116 if (_needs_far_jump) {
1117 Label L_short_jump;
1118 __ cbnz(reg, L_short_jump);
1119 __ b(*L_target);
1120 __ bind(L_short_jump);
1121 } else {
1122 __ cbz(reg, *L_target);
1123 }
1124 }
1125
1126 void ShenandoahBarrierStubC2::keepalive(MacroAssembler& masm, Label* L_done) {
1127 Address gcstate(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::MARKING)));
1128 Address index(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
1129 Address buffer(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
1130 Label L_through, L_slowpath;
1131
1132 // Hotpatched GC checks are racy: we can turn off GC state before we patch the barriers.
1133 // Therefore, alas we need a separate check here. TODO: Figure this out.
1134 __ ldrb(_tmp1, gcstate);
1135 if (L_done != nullptr) {
1136 maybe_far_jump_if_zero(masm, _tmp1, L_done);
1137 } else {
1138 __ cbz(_tmp1, L_through);
1139 }
1140
1141 // Fast-path: put object into buffer.
1142 // If buffer is already full, go slow.
1143 __ ldr(_tmp1, index);
1144 __ cbz(_tmp1, L_slowpath);
1145 __ sub(_tmp1, _tmp1, wordSize);
1146 __ str(_tmp1, index);
1147 __ ldr(_tmp2, buffer);
1148
1149 // Store the object in queue.
1150 // If object is narrow, we need to decode it before inserting.
1151 if (_narrow) {
1152 __ add(_tmp2, _tmp2, _tmp1);
1153 __ decode_heap_oop_not_null(_tmp1, _obj);
1154 __ str(_tmp1, Address(_tmp2));
1155 } else {
1156 // Buffer is 64-bit address, must be in base register.
1157 __ str(_obj, Address(_tmp2, _tmp1));
1168 __ bind(L_slowpath);
1169
1170 {
1171 SaveLiveRegisters slr(&masm, this);
1172
1173 // Go to runtime and handle the rest there.
1174 __ mov(c_rarg0, _obj);
1175 __ lea(lr, RuntimeAddress(keepalive_runtime_entry_addr()));
1176 __ blr(lr);
1177 }
1178 if (L_done != nullptr) {
1179 __ b(*L_done);
1180 } else {
1181 __ bind(L_through);
1182 }
1183 }
1184
1185 void ShenandoahBarrierStubC2::lrb(MacroAssembler& masm) {
1186 Label L_slow;
1187
1188 // Hotpatched GC checks are racy: we can turn off GC state before we patch the barriers.
1189 // Therefore, alas we need a separate check here. TODO: Figure this out.
1190 char state_to_check = ShenandoahHeap::HAS_FORWARDED | (_needs_load_ref_weak_barrier ? ShenandoahHeap::WEAK_ROOTS : 0);
1191 Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(state_to_check)));
1192 __ ldrb(_tmp1, gc_state_fast);
1193 maybe_far_jump_if_zero(masm, _tmp1, continuation());
1194
1195 // If weak references are being processed, weak/phantom loads need to go slow,
1196 // regardless of their cset status.
1197 if (_needs_load_ref_weak_barrier) {
1198 Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::WEAK_ROOTS)));
1199 __ ldrb(_tmp1, gc_state_fast);
1200 __ cbnz(_tmp1, L_slow);
1201 }
1202
1203 // Cset-check. Fall-through to slow if in collection set.
1204 bool is_aot = AOTCodeCache::is_on_for_dump();
1205 if (!is_aot) {
1206 __ mov(_tmp1, ShenandoahHeap::in_cset_fast_test_addr());
1207 if (_narrow) {
1208 __ decode_heap_oop_not_null(_tmp2, _obj);
1209 __ add(_tmp1, _tmp1, _tmp2, Assembler::LSR, ShenandoahHeapRegion::region_size_bytes_shift_jint());
1210 } else {
1211 __ add(_tmp1, _tmp1, _obj, Assembler::LSR, ShenandoahHeapRegion::region_size_bytes_shift_jint());
1212 }
1213 } else {
1214 // Generating AOT code, pull the cset bitmap and region shift from AOT table.
1215 if (_narrow) {
1216 __ decode_heap_oop_not_null(_tmp1, _obj);
1217 } else {
1218 __ mov(_tmp1, _obj);
1219 }
1220 __ lea(_tmp2, ExternalAddress(AOTRuntimeConstants::grain_shift_address()));
1221 __ ldrw(_tmp2, Address(_tmp2));
1222 __ lsrv(_tmp2, _tmp1, _tmp2);
1223 __ lea(_tmp1, ExternalAddress(AOTRuntimeConstants::cset_base_address()));
1224 __ ldr(_tmp1, Address(_tmp1));
1225 __ add(_tmp1, _tmp1, _tmp2);
1226 }
1227 __ ldrb(_tmp1, Address(_tmp1, 0));
1228 maybe_far_jump_if_zero(masm, _tmp1, continuation());
1229
1230 // Slow path
1231 __ bind(L_slow);
1232
1233 // Obj is the result, need to temporarily stop preserving it.
1234 bool is_obj_preserved = is_preserved(_obj);
1235 if (is_obj_preserved) {
1236 dont_preserve(_obj);
1237 }
1238 {
1239 SaveLiveRegisters slr(&masm, this);
1240
1241 // Shuffle in the arguments. The end result should be:
1242 // c_rarg0 <-- obj
1243 // c_rarg1 <-- lea(addr)
1244 if (c_rarg0 == _obj) {
1245 __ lea(c_rarg1, _addr);
1246 } else if (c_rarg1 == _obj) {
1247 // Set up arguments in reverse, and then flip them
1248 __ lea(c_rarg0, _addr);
|