25 */
26
27 #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
28 #include "gc/shenandoah/mode/shenandoahMode.hpp"
29 #include "gc/shenandoah/shenandoahBarrierSet.hpp"
30 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
31 #include "gc/shenandoah/shenandoahForwarding.hpp"
32 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
33 #include "gc/shenandoah/shenandoahHeapRegion.hpp"
34 #include "gc/shenandoah/shenandoahRuntime.hpp"
35 #include "gc/shenandoah/shenandoahThreadLocalData.hpp"
36 #include "interpreter/interpreter.hpp"
37 #include "runtime/javaThread.hpp"
38 #include "runtime/sharedRuntime.hpp"
39 #include "utilities/macros.hpp"
40 #ifdef COMPILER1
41 #include "c1/c1_LIRAssembler.hpp"
42 #include "c1/c1_MacroAssembler.hpp"
43 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
44 #endif
45
46 #define __ masm->
47
48 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
49 Register src, Register dst, Register count) {
50
51 bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
52
53 if (is_reference_type(type)) {
54 if (ShenandoahCardBarrier) {
55 bool checkcast = (decorators & ARRAYCOPY_CHECKCAST) != 0;
56 bool disjoint = (decorators & ARRAYCOPY_DISJOINT) != 0;
57 bool obj_int = (type == T_OBJECT) && UseCompressedOops;
58
59 // We need to save the original element count because the array copy stub
60 // will destroy the value and we need it for the card marking barrier.
61 if (!checkcast) {
62 if (!obj_int) {
63 // Save count for barrier
64 __ movptr(r11, count);
884 assert(!is_native, "weak must not be called off-heap");
885 if (UseCompressedOops) {
886 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow), c_rarg0, c_rarg1);
887 } else {
888 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak), c_rarg0, c_rarg1);
889 }
890 } else {
891 assert(is_phantom, "only remaining strength");
892 assert(is_native, "phantom must only be called off-heap");
893 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom), c_rarg0, c_rarg1);
894 }
895
896 __ restore_live_registers_except_rax(true);
897
898 __ epilogue();
899 }
900
901 #undef __
902
903 #endif // COMPILER1
|
25 */
26
27 #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
28 #include "gc/shenandoah/mode/shenandoahMode.hpp"
29 #include "gc/shenandoah/shenandoahBarrierSet.hpp"
30 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
31 #include "gc/shenandoah/shenandoahForwarding.hpp"
32 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
33 #include "gc/shenandoah/shenandoahHeapRegion.hpp"
34 #include "gc/shenandoah/shenandoahRuntime.hpp"
35 #include "gc/shenandoah/shenandoahThreadLocalData.hpp"
36 #include "interpreter/interpreter.hpp"
37 #include "runtime/javaThread.hpp"
38 #include "runtime/sharedRuntime.hpp"
39 #include "utilities/macros.hpp"
40 #ifdef COMPILER1
41 #include "c1/c1_LIRAssembler.hpp"
42 #include "c1/c1_MacroAssembler.hpp"
43 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
44 #endif
45 #ifdef COMPILER2
46 #include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp"
47 #endif
48
49 #define __ masm->
50
51 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
52 Register src, Register dst, Register count) {
53
54 bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
55
56 if (is_reference_type(type)) {
57 if (ShenandoahCardBarrier) {
58 bool checkcast = (decorators & ARRAYCOPY_CHECKCAST) != 0;
59 bool disjoint = (decorators & ARRAYCOPY_DISJOINT) != 0;
60 bool obj_int = (type == T_OBJECT) && UseCompressedOops;
61
62 // We need to save the original element count because the array copy stub
63 // will destroy the value and we need it for the card marking barrier.
64 if (!checkcast) {
65 if (!obj_int) {
66 // Save count for barrier
67 __ movptr(r11, count);
887 assert(!is_native, "weak must not be called off-heap");
888 if (UseCompressedOops) {
889 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow), c_rarg0, c_rarg1);
890 } else {
891 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak), c_rarg0, c_rarg1);
892 }
893 } else {
894 assert(is_phantom, "only remaining strength");
895 assert(is_native, "phantom must only be called off-heap");
896 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom), c_rarg0, c_rarg1);
897 }
898
899 __ restore_live_registers_except_rax(true);
900
901 __ epilogue();
902 }
903
904 #undef __
905
906 #endif // COMPILER1
907
908 #ifdef COMPILER2
909
910 #undef __
911 #define __ masm->
912
913 void ShenandoahBarrierSetAssembler::load_c2(const MachNode* node, MacroAssembler* masm, Register dst, Address src, bool narrow) {
914 // Do the actual load. This load is the candidate for implicit null check, and MUST come first.
915 if (narrow) {
916 __ movl(dst, src);
917 } else {
918 __ movq(dst, src);
919 }
920
921 ShenandoahBarrierStubC2::load_post(masm, node, dst, src, noreg, noreg, narrow);
922 }
923
924 void ShenandoahBarrierSetAssembler::store_c2(const MachNode* node, MacroAssembler* masm,
925 Address dst, bool dst_narrow,
926 Register src, bool src_narrow,
927 Register tmp) {
928
929 ShenandoahBarrierStubC2::store_pre(masm, node, tmp, dst, noreg, noreg, dst_narrow);
930
931 // Need to encode into tmp, because we cannot clobber src.
932 if (dst_narrow && !src_narrow) {
933 __ movq(tmp, src);
934 if ((node->barrier_data() & ShenandoahBitNotNull) == 0) {
935 __ encode_heap_oop(tmp);
936 } else {
937 __ encode_heap_oop_not_null(tmp);
938 }
939 src = tmp;
940 }
941
942 // Do the actual store
943 if (dst_narrow) {
944 __ movl(dst, src);
945 } else {
946 __ movq(dst, src);
947 }
948
949 ShenandoahBarrierStubC2::store_post(masm, node, dst, tmp, noreg);
950 }
951
952 void ShenandoahBarrierSetAssembler::compare_and_set_c2(const MachNode* node, MacroAssembler* masm,
953 Register res, Address addr,
954 Register oldval, Register newval, Register tmp,
955 bool narrow) {
956
957 assert(oldval == rax, "must be in rax for implicit use in cmpxchg");
958
959 // Oldval and newval can be in the same register, but all other registers should be
960 // distinct for extra safety, as we shuffle register values around.
961 assert_different_registers(oldval, tmp, addr.base(), addr.index());
962 assert_different_registers(newval, tmp, addr.base(), addr.index());
963
964 ShenandoahBarrierStubC2::load_store_pre(masm, node, tmp, addr, noreg, noreg, narrow);
965
966 // CAS!
967 __ lock();
968 if (narrow) {
969 __ cmpxchgl(newval, addr);
970 } else {
971 __ cmpxchgptr(newval, addr);
972 }
973
974 // If we need a boolean result out of CAS, set the flag appropriately and promote the result.
975 if (res != noreg) {
976 __ setcc(Assembler::equal, res);
977 }
978
979 ShenandoahBarrierStubC2::load_store_post(masm, node, addr, tmp, noreg);
980 }
981
982 void ShenandoahBarrierSetAssembler::get_and_set_c2(const MachNode* node, MacroAssembler* masm, Register newval, Address addr, Register tmp, bool narrow) {
983 assert_different_registers(newval, tmp, addr.base(), addr.index());
984
985 ShenandoahBarrierStubC2::load_store_pre(masm, node, tmp, addr, noreg, noreg, narrow);
986
987 if (narrow) {
988 __ xchgl(newval, addr);
989 } else {
990 __ xchgq(newval, addr);
991 }
992
993 ShenandoahBarrierStubC2::load_store_post(masm, node, addr, tmp, noreg);
994 }
995
996 #undef __
997 #define __ masm.
998
999 void ShenandoahBarrierStubC2::cardtable(MacroAssembler& masm, Address addr, Register tmp1, Register tmp2) {
1000 Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
1001
1002 __ lea(tmp1, addr);
1003 __ shrptr(tmp1, CardTable::card_shift());
1004 __ addptr(tmp1, Address(r15_thread, in_bytes(ShenandoahThreadLocalData::card_table_offset())));
1005 Address card_address(tmp1, 0);
1006
1007 assert(CardTable::dirty_card_val() == 0, "Encoding assumption");
1008 Label L_done;
1009 if (UseCondCardMark) {
1010 __ cmpb(card_address, 0);
1011 __ jccb(Assembler::equal, L_done);
1012 }
1013 if (UseCompressedOops && CompressedOops::base() == nullptr) {
1014 __ movb(card_address, r12);
1015 } else {
1016 __ movb(card_address, 0);
1017 }
1018 __ bind(L_done);
1019 }
1020
1021 void ShenandoahBarrierStubC2::enter_if_gc_state(MacroAssembler& masm, const char test_state, Register tmp) {
1022 Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
1023
1024 Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(test_state)));
1025 __ cmpb(gc_state_fast, 0);
1026 __ jcc(Assembler::notEqual, *entry());
1027 __ bind(*continuation());
1028 }
1029
1030 void ShenandoahBarrierStubC2::emit_code(MacroAssembler& masm) {
1031 Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
1032 assert(_needs_keep_alive_barrier || _needs_load_ref_barrier, "Why are you here?");
1033
1034 // On x86, there is a significant penalty with unaligned branch target, for example
1035 // when the target instruction straggles the fetch line. It makes (performance) sense
1036 // to spend some code size to align the target better.
1037 __ align(16);
1038 __ bind(*entry());
1039
1040 // If we need to load ourselves, do it here.
1041 if (_do_load) {
1042 if (_narrow) {
1043 __ movl(_obj, _addr);
1044 } else {
1045 __ movq(_obj, _addr);
1046 }
1047 }
1048
1049 // If the object is null, there is no point in applying barriers.
1050 maybe_far_jump_if_zero(masm, _obj);
1051
1052 // We need to make sure that loads done by callers survive across slow-path calls.
1053 // For self-loads, we need to care about the case when both KA and LRB are enabled (rare).
1054 bool needs_both_barriers = _needs_keep_alive_barrier && _needs_load_ref_barrier;
1055 if (!_do_load || needs_both_barriers) {
1056 preserve(_obj);
1057 }
1058
1059 // Go for barriers. Barriers can return straight to continuation, as long
1060 // as another barrier is not needed.
1061 if (needs_both_barriers) {
1062 keepalive(masm, nullptr);
1063 lrb(masm);
1064 } else if (_needs_keep_alive_barrier) {
1065 keepalive(masm, continuation());
1066 } else if (_needs_load_ref_barrier) {
1067 lrb(masm);
1068 } else {
1069 ShouldNotReachHere();
1070 }
1071 }
1072
1073 void ShenandoahBarrierStubC2::keepalive(MacroAssembler& masm, Label* L_done) {
1074 Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::MARKING)));
1075 Address index(r15_thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
1076 Address buffer(r15_thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
1077
1078 Label L_through, L_pop_and_slow;
1079
1080 // If another barrier is enabled as well, do a runtime check for a specific barrier.
1081 if (_needs_load_ref_barrier) {
1082 assert(L_done == nullptr, "L_done is always null when _needs_load_ref_barrier is true");
1083 __ cmpb(gc_state_fast, 0);
1084 __ jcc(Assembler::equal, L_through);
1085 }
1086
1087 // Need temp to work, allocate one now.
1088 bool tmp_live;
1089 Register tmp = select_temp_register(tmp_live);
1090 if (tmp_live) {
1091 __ push(tmp);
1092 }
1093
1094 // Fast-path: put object into buffer.
1095 // If buffer is already full, go slow.
1096 __ movptr(tmp, index);
1097 __ subptr(tmp, wordSize);
1098 __ jccb(Assembler::below, L_pop_and_slow);
1099 __ movptr(index, tmp);
1100 __ addptr(tmp, buffer);
1101
1102 // Store the object in queue.
1103 // If object is narrow, we need to decode it before inserting.
1104 // We can skip the re-encoding if we know that object is not preserved.
1105 if (_narrow) {
1106 __ decode_heap_oop_not_null(_obj);
1107 }
1108 __ movptr(Address(tmp, 0), _obj);
1109 if (_narrow && is_preserved(_obj)) {
1110 __ encode_heap_oop_not_null(_obj);
1111 }
1112
1113 // Fast-path exits here.
1114 if (tmp_live) {
1115 __ pop(tmp);
1116 }
1117
1118 if (L_done != nullptr) {
1119 __ jmp(*L_done);
1120 } else {
1121 __ jmp(L_through);
1122 }
1123
1124 // Slow-path: call runtime to handle.
1125 // Need to pop tmp immediately for stack to remain aligned.
1126 __ bind(L_pop_and_slow);
1127 if (tmp_live) {
1128 __ pop(tmp);
1129 }
1130 {
1131 SaveLiveRegisters slr(&masm, this);
1132
1133 // Shuffle in the arguments. The end result should be:
1134 // c_rarg0 <-- obj
1135 if (c_rarg0 != _obj) {
1136 __ mov(c_rarg0, _obj);
1137 }
1138
1139 // Go to runtime and handle the rest there.
1140 __ call(RuntimeAddress(keepalive_runtime_entry_addr()));
1141 }
1142 if (L_done != nullptr) {
1143 __ jmp(*L_done);
1144 } else {
1145 __ bind(L_through);
1146 }
1147 }
1148
1149 void ShenandoahBarrierStubC2::lrb(MacroAssembler& masm) {
1150 Label L_pop_and_slow, L_slow;
1151
1152 // If another barrier is enabled as well, do a runtime check for a specific barrier.
1153 if (_needs_keep_alive_barrier) {
1154 char state_to_check = ShenandoahHeap::HAS_FORWARDED | (_needs_load_ref_weak_barrier ? ShenandoahHeap::WEAK_ROOTS : 0);
1155 Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(state_to_check)));
1156 __ cmpb(gc_state_fast, 0);
1157 __ jcc(Assembler::equal, *continuation());
1158 }
1159
1160 // If weak references are being processed, weak/phantom loads need to go slow,
1161 // regardless of their cset status.
1162 if (_needs_load_ref_weak_barrier) {
1163 Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::WEAK_ROOTS)));
1164 __ cmpb(gc_state_fast, 0);
1165 __ jccb(Assembler::notEqual, L_slow);
1166 }
1167
1168 bool is_aot = AOTCodeCache::is_on_for_dump();
1169
1170 // Need temp to work, allocate one now.
1171 bool tmp_live;
1172 Register tmp = select_temp_register(tmp_live, /* skip_reg1 = */ is_aot ? rcx : noreg);
1173 if (tmp_live) {
1174 __ push(tmp);
1175 }
1176
1177 // Compute the cset bitmap index
1178 if (_narrow) {
1179 __ decode_heap_oop_not_null(tmp, _obj);
1180 } else {
1181 __ movptr(tmp, _obj);
1182 }
1183
1184 Address cset_addr_arg;
1185 intptr_t cset_addr = reinterpret_cast<intptr_t>(ShenandoahHeap::in_cset_fast_test_addr());
1186 if (!is_aot && cset_addr < INT32_MAX) {
1187 // Cset bitmap is at easily encodeable address. Just use it as displacement.
1188 __ shrptr(tmp, ShenandoahHeapRegion::region_size_bytes_shift_jint());
1189 cset_addr_arg = Address(tmp, checked_cast<int>(cset_addr));
1190 } else {
1191 bool tmp2_live;
1192 Register tmp2 = select_temp_register(tmp2_live, /* skip_reg1 = */ tmp, /* skip_reg2 = */ is_aot ? rcx : noreg);
1193 if (tmp2_live) {
1194 __ push(tmp2);
1195 }
1196 if (is_aot) {
1197 // Generating AOT code, pull the cset bitmap and region shift from AOT table.
1198 assert_different_registers(tmp, tmp2, rcx);
1199 __ push(rcx);
1200 __ lea(rcx, ExternalAddress(AOTRuntimeConstants::grain_shift_address()));
1201 __ movl(rcx, Address(rcx));
1202 __ shrptr(tmp);
1203 __ pop(rcx);
1204 __ lea(tmp2, ExternalAddress(AOTRuntimeConstants::cset_base_address()));
1205 __ addptr(tmp, Address(tmp2));
1206 } else {
1207 // Cset bitmap is far away. Add its address fully.
1208 __ shrptr(tmp, ShenandoahHeapRegion::region_size_bytes_shift_jint());
1209 __ movptr(tmp2, cset_addr);
1210 __ addptr(tmp, tmp2);
1211 }
1212 if (tmp2_live) {
1213 __ pop(tmp2);
1214 }
1215 cset_addr_arg = Address(tmp, 0);
1216 }
1217
1218 // Cset-check. Fall-through to slow if in collection set.
1219 __ cmpb(cset_addr_arg, 0);
1220 if (tmp_live) {
1221 __ jccb(Assembler::notEqual, L_pop_and_slow);
1222 __ pop(tmp);
1223 __ jmp(*continuation());
1224 } else {
1225 // Nothing else to do, jump back
1226 __ jcc(Assembler::equal, *continuation());
1227 }
1228
1229 // Slow path
1230 __ bind(L_pop_and_slow);
1231 // Need to pop tmp immediately for stack to remain aligned.
1232 if (tmp_live) {
1233 __ pop(tmp);
1234 }
1235 __ bind(L_slow);
1236
1237 // Obj is the result, need to temporarily stop preserving it.
1238 bool is_obj_preserved = is_preserved(_obj);
1239 if (is_obj_preserved) {
1240 dont_preserve(_obj);
1241 }
1242 {
1243 SaveLiveRegisters slr(&masm, this);
1244
1245 assert_different_registers(rax, c_rarg0, c_rarg1);
1246
1247 // Shuffle in the arguments. The end result should be:
1248 // c_rarg0 <-- obj
1249 // c_rarg1 <-- lea(addr)
1250 if (_obj == c_rarg0) {
1251 __ lea(c_rarg1, _addr);
1252 } else if (_obj == c_rarg1) {
1253 // Set up arguments in reverse, and then flip them
1254 __ lea(c_rarg0, _addr);
1255 __ xchgptr(c_rarg0, c_rarg1);
1256 } else {
1257 assert_different_registers(_obj, c_rarg0, c_rarg1);
1258 __ lea(c_rarg1, _addr);
1259 __ movptr(c_rarg0, _obj);
1260 }
1261
1262 // Go to runtime and handle the rest there.
1263 __ call(RuntimeAddress(lrb_runtime_entry_addr()));
1264
1265 // Save the result where needed.
1266 if (_narrow) {
1267 __ movl(_obj, rax);
1268 } else if (_obj != rax) {
1269 __ movptr(_obj, rax);
1270 }
1271 }
1272 if (is_obj_preserved) {
1273 preserve(_obj);
1274 }
1275
1276 __ jmp(*continuation());
1277 }
1278
1279 int ShenandoahBarrierStubC2::available_gp_registers() {
1280 return Register::available_gp_registers();
1281 }
1282
1283 bool ShenandoahBarrierStubC2::is_special_register(Register r) {
1284 return r == rsp || r == rbp || r == r12_heapbase || r == r15_thread;
1285 }
1286
1287 void ShenandoahBarrierStubC2::post_init() {
1288 // Do nothing.
1289 }
1290
1291 void ShenandoahBarrierStubC2::maybe_far_jump_if_zero(MacroAssembler& masm, Register reg) {
1292 if (_narrow) {
1293 __ testl(reg, reg);
1294 } else {
1295 __ testq(reg, reg);
1296 }
1297 __ jcc(Assembler::zero, *continuation());
1298 }
1299
1300 #endif // COMPILER2
|