17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 *
25 */
26
27 #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
28 #include "gc/shenandoah/mode/shenandoahMode.hpp"
29 #include "gc/shenandoah/shenandoahBarrierSet.hpp"
30 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
31 #include "gc/shenandoah/shenandoahForwarding.hpp"
32 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
33 #include "gc/shenandoah/shenandoahHeapRegion.hpp"
34 #include "gc/shenandoah/shenandoahRuntime.hpp"
35 #include "gc/shenandoah/shenandoahThreadLocalData.hpp"
36 #include "interpreter/interpreter.hpp"
37 #include "runtime/javaThread.hpp"
38 #include "runtime/sharedRuntime.hpp"
39 #include "utilities/macros.hpp"
40 #ifdef COMPILER1
41 #include "c1/c1_LIRAssembler.hpp"
42 #include "c1/c1_MacroAssembler.hpp"
43 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
44 #endif
45 #ifdef COMPILER2
46 #include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp"
47 #endif
48
49 #define __ masm->
50
51 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
52 Register src, Register dst, Register count) {
53
54 bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
55
56 if (is_reference_type(type)) {
1004 __ addptr(tmp1, Address(r15_thread, in_bytes(ShenandoahThreadLocalData::card_table_offset())));
1005 Address card_address(tmp1, 0);
1006
1007 assert(CardTable::dirty_card_val() == 0, "Encoding assumption");
1008 Label L_done;
1009 if (UseCondCardMark) {
1010 __ cmpb(card_address, 0);
1011 __ jccb(Assembler::equal, L_done);
1012 }
1013 if (UseCompressedOops && CompressedOops::base() == nullptr) {
1014 __ movb(card_address, r12);
1015 } else {
1016 __ movb(card_address, 0);
1017 }
1018 __ bind(L_done);
1019 }
1020
1021 void ShenandoahBarrierStubC2::enter_if_gc_state(MacroAssembler& masm, const char test_state, Register tmp) {
1022 Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
1023
1024 Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(test_state)));
1025 __ cmpb(gc_state_fast, 0);
1026 __ jcc(Assembler::notEqual, *entry());
1027 __ bind(*continuation());
1028 }
1029
1030 void ShenandoahBarrierStubC2::emit_code(MacroAssembler& masm) {
1031 Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
1032 assert(_needs_keep_alive_barrier || _needs_load_ref_barrier, "Why are you here?");
1033
1034 // On x86, there is a significant penalty with unaligned branch target, for example
1035 // when the target instruction straggles the fetch line. It makes (performance) sense
1036 // to spend some code size to align the target better.
1037 __ align(16);
1038 __ bind(*entry());
1039
1040 // If we need to load ourselves, do it here.
1041 if (_do_load) {
1042 if (_narrow) {
1043 __ movl(_obj, _addr);
1044 } else {
1045 __ movq(_obj, _addr);
1046 }
1047 }
1048
1049 // If the object is null, there is no point in applying barriers.
1050 maybe_far_jump_if_zero(masm, _obj);
1051
1052 // We need to make sure that loads done by callers survive across slow-path calls.
1053 // For self-loads, we need to care about the case when both KA and LRB are enabled (rare).
1054 bool needs_both_barriers = _needs_keep_alive_barrier && _needs_load_ref_barrier;
1055 if (!_do_load || needs_both_barriers) {
1056 preserve(_obj);
1057 }
1058
1059 // Go for barriers. Barriers can return straight to continuation, as long
1060 // as another barrier is not needed.
1061 if (needs_both_barriers) {
1062 keepalive(masm, nullptr);
1063 lrb(masm);
1064 } else if (_needs_keep_alive_barrier) {
1065 keepalive(masm, continuation());
1066 } else if (_needs_load_ref_barrier) {
1067 lrb(masm);
1068 } else {
1069 ShouldNotReachHere();
1070 }
1071 }
1072
1073 void ShenandoahBarrierStubC2::keepalive(MacroAssembler& masm, Label* L_done) {
1074 Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::MARKING)));
1075 Address index(r15_thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
1076 Address buffer(r15_thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
1077
1078 Label L_through, L_pop_and_slow;
1079
1080 // If another barrier is enabled as well, do a runtime check for a specific barrier.
1081 if (_needs_load_ref_barrier) {
1082 assert(L_done == nullptr, "L_done is always null when _needs_load_ref_barrier is true");
1083 __ cmpb(gc_state_fast, 0);
1084 __ jcc(Assembler::equal, L_through);
1085 }
1086
1087 // Need temp to work, allocate one now.
1088 bool tmp_live;
1089 Register tmp = select_temp_register(tmp_live);
1090 if (tmp_live) {
1091 __ push(tmp);
1092 }
1093
1094 // Fast-path: put object into buffer.
1095 // If buffer is already full, go slow.
1096 __ movptr(tmp, index);
1097 __ subptr(tmp, wordSize);
1098 __ jccb(Assembler::below, L_pop_and_slow);
1099 __ movptr(index, tmp);
1100 __ addptr(tmp, buffer);
1101
1102 // Store the object in queue.
1103 // If object is narrow, we need to decode it before inserting.
1133 // Shuffle in the arguments. The end result should be:
1134 // c_rarg0 <-- obj
1135 if (c_rarg0 != _obj) {
1136 __ mov(c_rarg0, _obj);
1137 }
1138
1139 // Go to runtime and handle the rest there.
1140 // Use rax as scratch, as it will be saved if live.
1141 __ call(RuntimeAddress(keepalive_runtime_entry_addr()), rax);
1142 }
1143 if (L_done != nullptr) {
1144 __ jmp(*L_done);
1145 } else {
1146 __ bind(L_through);
1147 }
1148 }
1149
1150 void ShenandoahBarrierStubC2::lrb(MacroAssembler& masm) {
1151 Label L_pop_and_slow, L_slow;
1152
1153 // If another barrier is enabled as well, do a runtime check for a specific barrier.
1154 if (_needs_keep_alive_barrier) {
1155 char state_to_check = ShenandoahHeap::HAS_FORWARDED | (_needs_load_ref_weak_barrier ? ShenandoahHeap::WEAK_ROOTS : 0);
1156 Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(state_to_check)));
1157 __ cmpb(gc_state_fast, 0);
1158 __ jcc(Assembler::equal, *continuation());
1159 }
1160
1161 // If weak references are being processed, weak/phantom loads need to go slow,
1162 // regardless of their cset status.
1163 if (_needs_load_ref_weak_barrier) {
1164 Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::WEAK_ROOTS)));
1165 __ cmpb(gc_state_fast, 0);
1166 __ jccb(Assembler::notEqual, L_slow);
1167 }
1168
1169 bool is_aot = AOTCodeCache::is_on_for_dump();
1170
1171 // Need temp to work, allocate one now.
1172 bool tmp_live;
1173 Register tmp = select_temp_register(tmp_live, /* skip_reg1 = */ is_aot ? rcx : noreg);
1174 if (tmp_live) {
1175 __ push(tmp);
1176 }
1177
1178 // Compute the cset bitmap index
1179 if (_narrow) {
1273 }
1274 if (is_obj_preserved) {
1275 preserve(_obj);
1276 }
1277
1278 __ jmp(*continuation());
1279 }
1280
1281 int ShenandoahBarrierStubC2::available_gp_registers() {
1282 return Register::available_gp_registers();
1283 }
1284
1285 bool ShenandoahBarrierStubC2::is_special_register(Register r) {
1286 return r == rsp || r == rbp || r == r12_heapbase || r == r15_thread;
1287 }
1288
1289 void ShenandoahBarrierStubC2::post_init() {
1290 // Do nothing.
1291 }
1292
1293 void ShenandoahBarrierStubC2::maybe_far_jump_if_zero(MacroAssembler& masm, Register reg) {
1294 if (_narrow) {
1295 __ testl(reg, reg);
1296 } else {
1297 __ testq(reg, reg);
1298 }
1299 __ jcc(Assembler::zero, *continuation());
1300 }
1301
1302 #endif // COMPILER2
|
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 *
25 */
26
27 #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
28 #include "gc/shenandoah/mode/shenandoahMode.hpp"
29 #include "gc/shenandoah/shenandoahBarrierSet.hpp"
30 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
31 #include "gc/shenandoah/shenandoahForwarding.hpp"
32 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
33 #include "gc/shenandoah/shenandoahHeapRegion.hpp"
34 #include "gc/shenandoah/shenandoahRuntime.hpp"
35 #include "gc/shenandoah/shenandoahThreadLocalData.hpp"
36 #include "interpreter/interpreter.hpp"
37 #include "nativeInst_x86.hpp"
38 #include "runtime/javaThread.hpp"
39 #include "runtime/sharedRuntime.hpp"
40 #include "utilities/macros.hpp"
41 #ifdef COMPILER1
42 #include "c1/c1_LIRAssembler.hpp"
43 #include "c1/c1_MacroAssembler.hpp"
44 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
45 #endif
46 #ifdef COMPILER2
47 #include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp"
48 #endif
49
50 #define __ masm->
51
52 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
53 Register src, Register dst, Register count) {
54
55 bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
56
57 if (is_reference_type(type)) {
1005 __ addptr(tmp1, Address(r15_thread, in_bytes(ShenandoahThreadLocalData::card_table_offset())));
1006 Address card_address(tmp1, 0);
1007
1008 assert(CardTable::dirty_card_val() == 0, "Encoding assumption");
1009 Label L_done;
1010 if (UseCondCardMark) {
1011 __ cmpb(card_address, 0);
1012 __ jccb(Assembler::equal, L_done);
1013 }
1014 if (UseCompressedOops && CompressedOops::base() == nullptr) {
1015 __ movb(card_address, r12);
1016 } else {
1017 __ movb(card_address, 0);
1018 }
1019 __ bind(L_done);
1020 }
1021
1022 void ShenandoahBarrierStubC2::enter_if_gc_state(MacroAssembler& masm, const char test_state, Register tmp) {
1023 Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
1024
1025 // Emit the unconditional branch in the first version of the method.
1026 // Let the rest of runtime figure out how to manage it.
1027 __ relocate(barrier_Relocation::spec(), ShenandoahThreadLocalData::gc_state_to_fast_array_index(test_state));
1028 __ jmp(*entry(), /* maybe_short = */ false);
1029
1030 #ifdef ASSERT
1031 Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(test_state)));
1032 __ cmpb(gc_state_fast, 0);
1033 __ jccb(Assembler::zero, *continuation());
1034 __ hlt(); // Correctness bug: barrier is NOP-ed, but heap is NOT IDLE
1035 #endif
1036 // TODO: When barriers are consistently turned off at the end of the cycle, assert that barrier is NOP-ed.
1037
1038 __ bind(*continuation());
1039 }
1040
1041 address ShenandoahBarrierSetAssembler::parse_stub_address(address pc) {
1042 NativeInstruction* ni = nativeInstruction_at(pc);
1043 assert(ni->is_jump(), "Initial code version: GC barrier fastpath must be a jump");
1044 NativeJump* jmp = nativeJump_at(pc);
1045 return jmp->jump_destination();
1046 }
1047
1048 void insert_5_byte_nop(address pc) {
1049 *(pc + 0) = 0x0F;
1050 *(pc + 1) = 0x1F;
1051 *(pc + 2) = 0x44;
1052 *(pc + 3) = 0x00;
1053 *(pc + 4) = 0x00;
1054 ICache::invalidate_range(pc, 5);
1055 }
1056
1057 bool is_5_byte_nop(address pc) {
1058 if (*(pc + 0) != 0x0F) return false;
1059 if (*(pc + 1) != 0x1F) return false;
1060 if (*(pc + 2) != 0x44) return false;
1061 if (*(pc + 3) != 0x00) return false;
1062 if (*(pc + 4) != 0x00) return false;
1063 return true;
1064 }
1065
1066 void check_at(bool cond, address pc, const char* msg) {
1067 assert(cond, "%s: at PC " PTR_FORMAT ": %02x%02x%02x%02x%02x",
1068 msg, p2i(pc), *(pc + 0), *(pc + 1), *(pc + 2), *(pc + 3), *(pc + 4));
1069 }
1070
1071 bool ShenandoahBarrierSetAssembler::is_active(address pc) {
1072 NativeInstruction* ni = nativeInstruction_at(pc);
1073 return ni->is_jump();
1074 }
1075
1076 void ShenandoahBarrierSetAssembler::patch_branch_to_nop(address pc) {
1077 NativeInstruction* ni = nativeInstruction_at(pc);
1078 if (ni->is_jump()) {
1079 insert_5_byte_nop(pc);
1080 } else {
1081 check_at(is_5_byte_nop(pc), pc, "Should already be nop");
1082 }
1083 }
1084
1085 void ShenandoahBarrierSetAssembler::patch_nop_to_branch(address pc, address stub_addr) {
1086 NativeInstruction* ni = nativeInstruction_at(pc);
1087 if (is_5_byte_nop(pc)) {
1088 NativeJump::insert(pc, stub_addr);
1089 } else {
1090 check_at(ni->is_jump(), pc, "Should already be jump");
1091 check_at(nativeJump_at(pc)->jump_destination() == stub_addr, pc, "Jump should be to the same address");
1092 }
1093 }
1094
1095 void ShenandoahBarrierStubC2::emit_code(MacroAssembler& masm) {
1096 Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
1097 assert(_needs_keep_alive_barrier || _needs_load_ref_barrier, "Why are you here?");
1098
1099 // On x86, there is a significant penalty with unaligned branch target, for example
1100 // when the target instruction straggles the fetch line. It makes (performance) sense
1101 // to spend some code size to align the target better.
1102 __ align(16);
1103 __ bind(*entry());
1104
1105 // If we need to load ourselves, do it here.
1106 if (_do_load) {
1107 if (_narrow) {
1108 __ movl(_obj, _addr);
1109 } else {
1110 __ movq(_obj, _addr);
1111 }
1112 }
1113
1114 // If the object is null, there is no point in applying barriers.
1115 maybe_far_jump_if_zero(masm, _obj, continuation());
1116
1117 // We need to make sure that loads done by callers survive across slow-path calls.
1118 // For self-loads, we need to care about the case when both KA and LRB are enabled (rare).
1119 bool needs_both_barriers = _needs_keep_alive_barrier && _needs_load_ref_barrier;
1120 if (!_do_load || needs_both_barriers) {
1121 preserve(_obj);
1122 }
1123
1124 // Go for barriers. Barriers can return straight to continuation, as long
1125 // as another barrier is not needed.
1126 if (needs_both_barriers) {
1127 keepalive(masm, nullptr);
1128 lrb(masm);
1129 } else if (_needs_keep_alive_barrier) {
1130 keepalive(masm, continuation());
1131 } else if (_needs_load_ref_barrier) {
1132 lrb(masm);
1133 } else {
1134 ShouldNotReachHere();
1135 }
1136 }
1137
1138 void ShenandoahBarrierStubC2::keepalive(MacroAssembler& masm, Label* L_done) {
1139 Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::MARKING)));
1140 Address index(r15_thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
1141 Address buffer(r15_thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
1142
1143 Label L_through, L_pop_and_slow;
1144
1145 // Hotpatched GC checks are racy: we can turn off GC state before we patch the barriers.
1146 // Therefore, alas we need a separate check here. TODO: Figure this out.
1147 __ cmpb(gc_state_fast, 0);
1148 if (L_done != nullptr) {
1149 __ jcc(Assembler::equal, *L_done);
1150 } else {
1151 __ jcc(Assembler::equal, L_through);
1152 }
1153
1154 // Need temp to work, allocate one now.
1155 bool tmp_live;
1156 Register tmp = select_temp_register(tmp_live);
1157 if (tmp_live) {
1158 __ push(tmp);
1159 }
1160
1161 // Fast-path: put object into buffer.
1162 // If buffer is already full, go slow.
1163 __ movptr(tmp, index);
1164 __ subptr(tmp, wordSize);
1165 __ jccb(Assembler::below, L_pop_and_slow);
1166 __ movptr(index, tmp);
1167 __ addptr(tmp, buffer);
1168
1169 // Store the object in queue.
1170 // If object is narrow, we need to decode it before inserting.
1200 // Shuffle in the arguments. The end result should be:
1201 // c_rarg0 <-- obj
1202 if (c_rarg0 != _obj) {
1203 __ mov(c_rarg0, _obj);
1204 }
1205
1206 // Go to runtime and handle the rest there.
1207 // Use rax as scratch, as it will be saved if live.
1208 __ call(RuntimeAddress(keepalive_runtime_entry_addr()), rax);
1209 }
1210 if (L_done != nullptr) {
1211 __ jmp(*L_done);
1212 } else {
1213 __ bind(L_through);
1214 }
1215 }
1216
1217 void ShenandoahBarrierStubC2::lrb(MacroAssembler& masm) {
1218 Label L_pop_and_slow, L_slow;
1219
1220 // Hotpatched GC checks are racy: we can turn off GC state before we patch the barriers.
1221 // Therefore, alas we need a separate check here. TODO: Figure this out.
1222 char state_to_check = ShenandoahHeap::HAS_FORWARDED | (_needs_load_ref_weak_barrier ? ShenandoahHeap::WEAK_ROOTS : 0);
1223 Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(state_to_check)));
1224 __ cmpb(gc_state_fast, 0);
1225 __ jcc(Assembler::equal, *continuation());
1226
1227 // If weak references are being processed, weak/phantom loads need to go slow,
1228 // regardless of their cset status.
1229 if (_needs_load_ref_weak_barrier) {
1230 Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::WEAK_ROOTS)));
1231 __ cmpb(gc_state_fast, 0);
1232 __ jccb(Assembler::notEqual, L_slow);
1233 }
1234
1235 bool is_aot = AOTCodeCache::is_on_for_dump();
1236
1237 // Need temp to work, allocate one now.
1238 bool tmp_live;
1239 Register tmp = select_temp_register(tmp_live, /* skip_reg1 = */ is_aot ? rcx : noreg);
1240 if (tmp_live) {
1241 __ push(tmp);
1242 }
1243
1244 // Compute the cset bitmap index
1245 if (_narrow) {
1339 }
1340 if (is_obj_preserved) {
1341 preserve(_obj);
1342 }
1343
1344 __ jmp(*continuation());
1345 }
1346
1347 int ShenandoahBarrierStubC2::available_gp_registers() {
1348 return Register::available_gp_registers();
1349 }
1350
1351 bool ShenandoahBarrierStubC2::is_special_register(Register r) {
1352 return r == rsp || r == rbp || r == r12_heapbase || r == r15_thread;
1353 }
1354
1355 void ShenandoahBarrierStubC2::post_init() {
1356 // Do nothing.
1357 }
1358
1359 void ShenandoahBarrierStubC2::maybe_far_jump_if_zero(MacroAssembler& masm, Register reg, Label* L_target) {
1360 if (_narrow) {
1361 __ testl(reg, reg);
1362 } else {
1363 __ testq(reg, reg);
1364 }
1365 __ jcc(Assembler::zero, *L_target);
1366 }
1367
1368 #endif // COMPILER2
|