< prev index next >

src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp

Print this page

  24  */
  25 
  26 #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
  27 #include "gc/shenandoah/mode/shenandoahMode.hpp"
  28 #include "gc/shenandoah/shenandoahBarrierSet.hpp"
  29 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
  30 #include "gc/shenandoah/shenandoahForwarding.hpp"
  31 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
  32 #include "gc/shenandoah/shenandoahHeapRegion.hpp"
  33 #include "gc/shenandoah/shenandoahRuntime.hpp"
  34 #include "gc/shenandoah/shenandoahThreadLocalData.hpp"
  35 #include "interpreter/interpreter.hpp"
  36 #include "runtime/javaThread.hpp"
  37 #include "runtime/sharedRuntime.hpp"
  38 #include "utilities/macros.hpp"
  39 #ifdef COMPILER1
  40 #include "c1/c1_LIRAssembler.hpp"
  41 #include "c1/c1_MacroAssembler.hpp"
  42 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
  43 #endif



  44 
  45 #define __ masm->
  46 
  47 static void save_machine_state(MacroAssembler* masm, bool handle_gpr, bool handle_fp) {
  48   if (handle_gpr) {
  49     __ push_IU_state();
  50   }
  51 
  52   if (handle_fp) {
  53     // Some paths can be reached from the c2i adapter with live fp arguments in registers.
  54     assert(Argument::n_float_register_parameters_j == 8, "8 fp registers to save at java call");
  55 
  56     const int xmm_size = wordSize * 2;
  57     __ subptr(rsp, xmm_size * 8);
  58     __ movdbl(Address(rsp, xmm_size * 0), xmm0);
  59     __ movdbl(Address(rsp, xmm_size * 1), xmm1);
  60     __ movdbl(Address(rsp, xmm_size * 2), xmm2);
  61     __ movdbl(Address(rsp, xmm_size * 3), xmm3);
  62     __ movdbl(Address(rsp, xmm_size * 4), xmm4);
  63     __ movdbl(Address(rsp, xmm_size * 5), xmm5);

 974     assert(!is_native, "weak must not be called off-heap");
 975     if (UseCompressedOops) {
 976       __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow), c_rarg0, c_rarg1);
 977     } else {
 978       __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak), c_rarg0, c_rarg1);
 979     }
 980   } else {
 981     assert(is_phantom, "only remaining strength");
 982     assert(is_native, "phantom must only be called off-heap");
 983     __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom), c_rarg0, c_rarg1);
 984   }
 985 
 986   __ restore_live_registers_except_rax(true);
 987 
 988   __ epilogue();
 989 }
 990 
 991 #undef __
 992 
 993 #endif // COMPILER1


















































































































































































































































































































































































































































































































































































































































  24  */
  25 
  26 #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
  27 #include "gc/shenandoah/mode/shenandoahMode.hpp"
  28 #include "gc/shenandoah/shenandoahBarrierSet.hpp"
  29 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
  30 #include "gc/shenandoah/shenandoahForwarding.hpp"
  31 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
  32 #include "gc/shenandoah/shenandoahHeapRegion.hpp"
  33 #include "gc/shenandoah/shenandoahRuntime.hpp"
  34 #include "gc/shenandoah/shenandoahThreadLocalData.hpp"
  35 #include "interpreter/interpreter.hpp"
  36 #include "runtime/javaThread.hpp"
  37 #include "runtime/sharedRuntime.hpp"
  38 #include "utilities/macros.hpp"
  39 #ifdef COMPILER1
  40 #include "c1/c1_LIRAssembler.hpp"
  41 #include "c1/c1_MacroAssembler.hpp"
  42 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
  43 #endif
  44 #ifdef COMPILER2
  45 #include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp"
  46 #endif
  47 
  48 #define __ masm->
  49 
  50 static void save_machine_state(MacroAssembler* masm, bool handle_gpr, bool handle_fp) {
  51   if (handle_gpr) {
  52     __ push_IU_state();
  53   }
  54 
  55   if (handle_fp) {
  56     // Some paths can be reached from the c2i adapter with live fp arguments in registers.
  57     assert(Argument::n_float_register_parameters_j == 8, "8 fp registers to save at java call");
  58 
  59     const int xmm_size = wordSize * 2;
  60     __ subptr(rsp, xmm_size * 8);
  61     __ movdbl(Address(rsp, xmm_size * 0), xmm0);
  62     __ movdbl(Address(rsp, xmm_size * 1), xmm1);
  63     __ movdbl(Address(rsp, xmm_size * 2), xmm2);
  64     __ movdbl(Address(rsp, xmm_size * 3), xmm3);
  65     __ movdbl(Address(rsp, xmm_size * 4), xmm4);
  66     __ movdbl(Address(rsp, xmm_size * 5), xmm5);

 977     assert(!is_native, "weak must not be called off-heap");
 978     if (UseCompressedOops) {
 979       __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow), c_rarg0, c_rarg1);
 980     } else {
 981       __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak), c_rarg0, c_rarg1);
 982     }
 983   } else {
 984     assert(is_phantom, "only remaining strength");
 985     assert(is_native, "phantom must only be called off-heap");
 986     __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom), c_rarg0, c_rarg1);
 987   }
 988 
 989   __ restore_live_registers_except_rax(true);
 990 
 991   __ epilogue();
 992 }
 993 
 994 #undef __
 995 
 996 #endif // COMPILER1
 997 
 998 #ifdef COMPILER2
 999 #undef __
1000 #define __ masm->
1001 
1002 Register ShenandoahBarrierStubC2::select_temp_register(Address addr, Register reg1, Register reg2) {
1003   Register tmp = noreg;
1004   for (int i = 0; i < 8; i++) {
1005     Register r = as_Register(i);
1006     if (r != rsp && r != rbp && r != reg1 && r != reg2 && r != addr.base() && r != addr.index()) {
1007       if (tmp == noreg) {
1008         tmp = r;
1009         break;
1010       }
1011     }
1012   }
1013 
1014   assert(tmp != noreg, "successfully allocated");
1015   assert_different_registers(tmp, reg1, reg2);
1016   assert_different_registers(tmp, addr.base());
1017   assert_different_registers(tmp, addr.index());
1018   return tmp;
1019 }
1020 
1021 void ShenandoahBarrierSetAssembler::gc_state_check_c2(MacroAssembler* masm, const char test_state, BarrierStubC2* slow_stub) {
1022   if (ShenandoahGCStateCheckRemove) {
1023     // Unrealistic: remove all barrier fastpath checks.
1024   } else if (ShenandoahGCStateCheckHotpatch) {
1025     // In the ideal world, we would hot-patch the branch to slow stub with a single
1026     // (unconditional) jump or nop, based on our current GC state. Jump to near target
1027     // within the nmethod (at 32-bit offset) takes 6 bytes.
1028     __ nop(6);
1029   } else {
1030     Address gc_state(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
1031     __ testb(gc_state, test_state);
1032     __ jcc(Assembler::notZero, *slow_stub->entry());
1033     __ bind(*slow_stub->continuation());
1034   }
1035 }
1036 
1037 void ShenandoahBarrierSetAssembler::load_c2(const MachNode* node, MacroAssembler* masm, Register dst, Address src) {
1038   // Do the actual load. This load is the candidate for implicit null check, and MUST come first.
1039   if (node->bottom_type()->isa_narrowoop()) {
1040     __ movl(dst, src);
1041   } else {
1042     __ movq(dst, src);
1043   }
1044 
1045   // Emit barrier if needed
1046   if (!ShenandoahSkipBarriers && ShenandoahLoadBarrierStubC2::needs_barrier(node)) {
1047     Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
1048 
1049     ShenandoahLoadBarrierStubC2* const stub = ShenandoahLoadBarrierStubC2::create(node, dst, src);
1050 
1051     char check = 0;
1052     check |= ShenandoahLoadBarrierStubC2::needs_keep_alive_barrier(node)    ? ShenandoahHeap::MARKING : 0;
1053     check |= ShenandoahLoadBarrierStubC2::needs_load_ref_barrier(node)      ? ShenandoahHeap::HAS_FORWARDED : 0;
1054     check |= ShenandoahLoadBarrierStubC2::needs_load_ref_barrier_weak(node) ? ShenandoahHeap::WEAK_ROOTS : 0;
1055     gc_state_check_c2(masm, check, stub);
1056   }
1057 }
1058 
1059 void ShenandoahBarrierSetAssembler::store_c2(const MachNode* node, MacroAssembler* masm,
1060                                              Address dst, bool dst_narrow,
1061                                              Register src, bool src_narrow,
1062                                              Register tmp) {
1063   // Emit barrier if needed
1064   if (!ShenandoahSkipBarriers && ShenandoahStoreBarrierStubC2::needs_barrier(node)) {
1065     Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
1066 
1067     if (ShenandoahStoreBarrierStubC2::needs_keep_alive_barrier(node)) {
1068       ShenandoahStoreBarrierStubC2* const stub = ShenandoahStoreBarrierStubC2::create(node, dst, dst_narrow, src, src_narrow, tmp);
1069       stub->dont_preserve(tmp); // temp, no need to preserve it
1070 
1071       gc_state_check_c2(masm, ShenandoahHeap::MARKING, stub);
1072     }
1073 
1074     if (ShenandoahStoreBarrierStubC2::needs_card_barrier(node)) {
1075       card_barrier_c2(masm, dst, tmp);
1076     }
1077   }
1078 
1079   // Need to encode into tmp, because we cannot clobber src.
1080   // TODO: Maybe there is a matcher way to test that src is unused after this?
1081   if (dst_narrow && !src_narrow) {
1082     __ movq(tmp, src);
1083     if (ShenandoahStoreBarrierStubC2::src_not_null(node)) {
1084       __ encode_heap_oop_not_null(tmp);
1085     } else {
1086       __ encode_heap_oop(tmp);
1087     }
1088     src = tmp;
1089   }
1090 
1091   // Do the actual store
1092   if (dst_narrow) {
1093     __ movl(dst, src);
1094   } else {
1095     __ movq(dst, src);
1096   }
1097 }
1098 
1099 void ShenandoahBarrierSetAssembler::cae_c2(const MachNode* node, MacroAssembler* masm,
1100               Register res, Address addr, Register oldval, Register newval,
1101               Register tmp1, Register tmp2, bool exchange, bool maybe_null, bool narrow) {
1102 
1103   assert(oldval == rax, "must be in rax for implicit use in cmpxchg");
1104   assert(narrow == UseCompressedOops, "should match");
1105 
1106   // Oldval and newval can be in the same register, but all other registers should be
1107   // distinct for extra safety, as we shuffle register values around.
1108   assert_different_registers(oldval, tmp1, tmp2, addr.base(), addr.index());
1109   assert_different_registers(newval, tmp1, tmp2, addr.base(), addr.index());
1110 
1111   // Remember oldval for retry logic in slow path. We need to do it here,
1112   // because it will be overwritten by the fast-path CAS.
1113   if (ShenandoahCASBarrierStubC2::needs_barrier(node)) {
1114     Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
1115     __ movptr(tmp2, oldval);
1116   }
1117 
1118   // Fast-path: Try to CAS optimistically.
1119   __ lock();
1120   if (narrow) {
1121     __ cmpxchgl(newval, addr);
1122   } else {
1123     __ cmpxchgptr(newval, addr);
1124   }
1125 
1126   // If we need a boolean result out of CAS, set the flag appropriately and promote the result.
1127   // This would be the final result if we do not go slow.
1128   if (!exchange) {
1129     assert(res != noreg, "need result register");
1130     __ setcc(Assembler::equal, res);
1131   } else {
1132     assert(res == noreg, "no result expected");
1133   }
1134 
1135   if (!ShenandoahSkipBarriers && ShenandoahCASBarrierStubC2::needs_barrier(node)) {
1136     Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
1137 
1138     if (ShenandoahCASBarrierStubC2::needs_load_ref_barrier(node) || ShenandoahCASBarrierStubC2::needs_keep_alive_barrier(node)) {
1139       ShenandoahCASBarrierStubC2* const stub = ShenandoahCASBarrierStubC2::create(node, addr, oldval, newval, res, tmp1, tmp2, narrow, exchange);
1140       if (res != noreg) {
1141         stub->dont_preserve(res);  // set at the end, no need to save
1142       }
1143       stub->dont_preserve(oldval); // saved explicitly
1144       stub->dont_preserve(tmp1);   // temp, no need to save
1145       stub->preserve(tmp2);        // carries oldval for final retry, must be saved
1146 
1147       // On success, we need to write to SATB if MARKING is set in GCState.
1148       // On failure, we need to run LRB and retry CAS if HAS_FORWARDED is set in GCState.
1149       if (exchange) {
1150         __ setcc(Assembler::equal, tmp1);
1151       }
1152 
1153       char state = 0;
1154       state |= ShenandoahCASBarrierStubC2::needs_load_ref_barrier(node)   ? ShenandoahHeap::HAS_FORWARDED : 0;
1155       state |= ShenandoahCASBarrierStubC2::needs_keep_alive_barrier(node) ? ShenandoahHeap::MARKING : 0;
1156       gc_state_check_c2(masm, state, stub);
1157     }
1158 
1159     if (ShenandoahStoreBarrierStubC2::needs_card_barrier(node)) {
1160       card_barrier_c2(masm, addr, tmp1);
1161     }
1162   }
1163 }
1164 
1165 void ShenandoahBarrierSetAssembler::get_and_set_c2(const MachNode* node, MacroAssembler* masm, Register newval, Address addr, Register tmp) {
1166   assert_different_registers(newval, tmp, addr.base(), addr.index());
1167 
1168   if (node->bottom_type()->isa_narrowoop()) {
1169     __ xchgl(newval, addr);
1170   } else {
1171     __ xchgq(newval, addr);
1172   }
1173 
1174   if (!ShenandoahSkipBarriers && (ShenandoahLoadBarrierStubC2::needs_barrier(node) || ShenandoahStoreBarrierStubC2::needs_card_barrier(node))) {
1175     Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
1176 
1177     if (ShenandoahLoadBarrierStubC2::needs_barrier(node)) {
1178       ShenandoahLoadBarrierStubC2* const stub = ShenandoahLoadBarrierStubC2::create(node, newval, addr);
1179 
1180       char check = 0;
1181       check |= ShenandoahLoadBarrierStubC2::needs_keep_alive_barrier(node)    ? ShenandoahHeap::MARKING : 0;
1182       check |= ShenandoahLoadBarrierStubC2::needs_load_ref_barrier(node)      ? ShenandoahHeap::HAS_FORWARDED : 0;
1183       check |= ShenandoahLoadBarrierStubC2::needs_load_ref_barrier_weak(node) ? ShenandoahHeap::WEAK_ROOTS : 0;
1184       gc_state_check_c2(masm, check, stub);
1185     }
1186 
1187     if (ShenandoahStoreBarrierStubC2::needs_card_barrier(node)) {
1188       card_barrier_c2(masm, addr, tmp);
1189     }
1190   }
1191 }
1192 
1193 void ShenandoahBarrierSetAssembler::card_barrier_c2(MacroAssembler* masm, Address dst, Register tmp) {
1194   // TODO: Might be a good place to implement some filters here.
1195   // For example, G1 only flips card marks for stores within a single region.
1196 
1197   __ lea(tmp, dst);
1198   __ shrptr(tmp, CardTable::card_shift());
1199   __ addptr(tmp, Address(r15_thread, in_bytes(ShenandoahThreadLocalData::card_table_offset())));
1200   Address card_address(tmp, 0);
1201 
1202   assert(CardTable::dirty_card_val() == 0, "Encoding assumption");
1203   Label L_done;
1204   if (UseCondCardMark) {
1205     __ cmpb(card_address, 0);
1206     __ jccb(Assembler::equal, L_done);
1207   }
1208   if (UseCompressedOops && CompressedOops::base() == nullptr) {
1209     __ movb(card_address, r12);
1210   } else {
1211     __ movb(card_address, 0);
1212   }
1213   __ bind(L_done);
1214 }
1215 
1216 #undef __
1217 #define __ masm.
1218 
1219 void ShenandoahLoadBarrierStubC2::emit_code(MacroAssembler& masm) {
1220   Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
1221 
1222   __ bind(*entry());
1223 
1224   Register tmp = select_temp_register(_src, _dst);
1225 
1226   Label L_lrb_done, L_lrb_slow;
1227   Label L_keepalive_done, L_keepalive_pack_and_done, L_keepalive_slow;
1228   Label L_done;
1229 
1230   // If the object is null, there is no point in applying barriers.
1231   if (_narrow) {
1232     __ testl(_dst, _dst);
1233   } else {
1234     __ testptr(_dst, _dst);
1235   }
1236   if (!_needs_keep_alive_barrier && _needs_load_ref_barrier) {
1237     __ jccb(Assembler::equal, L_done);
1238   } else {
1239     __ jcc(Assembler::equal, L_done);
1240   }
1241 
1242   // Lay out barrier mid-paths here. The goal is to do quick checks/actions
1243   // that can be done without going to slowpath calls. This also allows doing
1244   // shorter branches, where possible.
1245 
1246   if (_needs_keep_alive_barrier) {
1247     // Runtime check for keep-alive, in case the other barrier is enabled.
1248     // Otherwise the fastpath check already checked it.
1249     if (_needs_load_ref_barrier) {
1250       Address gc_state(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
1251       __ testb(gc_state, ShenandoahHeap::MARKING);
1252       __ jccb(Assembler::zero, L_keepalive_done);
1253     }
1254 
1255     // If object is narrow, we need to decode it first.
1256     if (_narrow) {
1257       __ decode_heap_oop_not_null(_dst);
1258     }
1259 
1260     // Can we store a value in the given thread's buffer?
1261     // (The index field is typed as size_t.)
1262     Address index(r15_thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
1263     Address buffer(r15_thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
1264 
1265     __ push(tmp);
1266     __ movptr(tmp, index);
1267     __ testptr(tmp, tmp);
1268     __ jcc(Assembler::zero, L_keepalive_slow);
1269     // The buffer is not full, store value into it.
1270     __ subptr(tmp, wordSize);
1271     __ movptr(index, tmp);
1272     __ addptr(tmp, buffer);
1273     __ movptr(Address(tmp, 0), _dst);
1274     __ pop(tmp);
1275 
1276     __ bind(L_keepalive_pack_and_done);
1277     if (_narrow) {
1278       __ encode_heap_oop(_dst);
1279     }
1280     __ bind(L_keepalive_done);
1281   }
1282 
1283   if (_needs_load_ref_barrier) {
1284     bool is_weak = (_node->barrier_data() & ShenandoahBitStrong) == 0;
1285 
1286     // Runtime check for KA, in case the other barrier is enabled.
1287     // Otherwise the fastpath check already checked it.
1288     if (_needs_keep_alive_barrier) {
1289       Address gc_state(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
1290       __ testb(gc_state, ShenandoahHeap::HAS_FORWARDED | (is_weak ? ShenandoahHeap::WEAK_ROOTS : 0));
1291       __ jccb(Assembler::zero, L_lrb_done);
1292     }
1293 
1294     // Collection set check. Only really applies to strong loads, as weak/phantom loads
1295     // are handled in runtime.
1296     __ push(tmp);
1297     if (!is_weak) {
1298       if (_narrow) {
1299         __ decode_heap_oop_not_null(tmp, _dst);
1300       } else {
1301         __ movptr(tmp, _dst);
1302       }
1303       __ shrptr(tmp, ShenandoahHeapRegion::region_size_bytes_shift_jint());
1304       // Check if cset address is in good spot to just use it as offset. It almost always is.
1305       Address cset_addr_arg;
1306       intptr_t cset_addr = (intptr_t) ShenandoahHeap::in_cset_fast_test_addr();
1307       if ((cset_addr >> 3) < INT32_MAX) {
1308         assert(is_aligned(cset_addr, 8), "Sanity");
1309         cset_addr_arg = Address(tmp, checked_cast<int>(cset_addr >> 3), Address::times_8);
1310       } else {
1311         __ addptr(tmp, cset_addr);
1312         cset_addr_arg = Address(tmp, 0);
1313       }
1314       __ cmpb(cset_addr_arg, 0);
1315       __ jccb(Assembler::notEqual, L_lrb_slow);
1316       __ pop(tmp); // Slow path had popped for us otherwise
1317     } else {
1318       __ jmpb(L_lrb_slow);
1319     }
1320 
1321     __ bind(L_lrb_done);
1322   }
1323 
1324   // Exit here.
1325   __ bind(L_done);
1326   __ jmp(*continuation());
1327 
1328   // Slow paths here. LRB slow path goes first: this allows the short branches from LRB fastpath,
1329   // the overwhelmingly major case.
1330   if (_needs_load_ref_barrier) {
1331     __ bind(L_lrb_slow);
1332     __ pop(tmp); // Immediately pop tmp to make sure the stack is aligned
1333 
1334       // If object is narrow, we need to decode it first.
1335     if (_narrow) {
1336       __ decode_heap_oop_not_null(_dst);
1337     }
1338 
1339     dont_preserve(_dst); // For LRB we must not preserve _dst
1340     {
1341       SaveLiveRegisters save_registers(&masm, this);
1342 
1343       // Shuffle in the arguments. The end result should be:
1344       //   c_rarg0 <-- _dst
1345       //   c_rarg1 <-- lea(_src)
1346       if (c_rarg0 == _dst) {
1347         __ lea(c_rarg1, _src);
1348       } else if (c_rarg1 == _dst) {
1349         // Set up arguments in reverse, and then flip them
1350         __ lea(c_rarg0, _src);
1351         __ xchgptr(c_rarg0, c_rarg1);
1352       } else {
1353         assert_different_registers(c_rarg1, _dst);
1354         __ lea(c_rarg1, _src);
1355         __ movptr(c_rarg0, _dst);
1356       }
1357 
1358       address entry;
1359       if (_narrow) {
1360         if ((_node->barrier_data() & ShenandoahBitStrong) != 0) {
1361           entry = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow);
1362         } else if ((_node->barrier_data() & ShenandoahBitWeak) != 0) {
1363           entry = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow);
1364         } else if ((_node->barrier_data() & ShenandoahBitPhantom) != 0) {
1365           entry = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom_narrow);
1366         }
1367       } else {
1368         if ((_node->barrier_data() & ShenandoahBitStrong) != 0) {
1369           entry = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong);
1370         } else if ((_node->barrier_data() & ShenandoahBitWeak) != 0) {
1371           entry = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak);
1372         } else if ((_node->barrier_data() & ShenandoahBitPhantom) != 0) {
1373           entry = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom);
1374         }
1375       }
1376       __ call(RuntimeAddress(entry), rax);
1377       assert(!save_registers.contains(_dst), "must not save result register");
1378       __ movptr(_dst, rax);
1379     }
1380     if (_narrow) {
1381       __ encode_heap_oop(_dst);
1382     }
1383     __ jmp(L_lrb_done);
1384   }
1385 
1386   if (_needs_keep_alive_barrier) {
1387     __ bind(L_keepalive_slow);
1388     __ pop(tmp); // Immediately pop to make sure the stack is aligned
1389 
1390     preserve(_dst); // For SATB we must preserve _dst
1391     {
1392       SaveLiveRegisters save_registers(&masm, this);
1393       if (c_rarg0 != _dst) {
1394         __ mov(c_rarg0, _dst);
1395       }
1396       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre)), rax);
1397     }
1398     __ jmp(L_keepalive_pack_and_done);
1399   }
1400 }
1401 
1402 void ShenandoahStoreBarrierStubC2::emit_code(MacroAssembler& masm) {
1403   Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
1404 
1405   __ bind(*entry());
1406 
1407   Label L_runtime, L_preval_null;
1408 
1409   // We need 2 temp registers for this code to work.
1410   // _tmp is already allocated and will carry preval for the call.
1411   // Allocate the other one now.
1412   Register tmp2 = select_temp_register(_dst, _src, _tmp);
1413 
1414   Register preval = _tmp;
1415 
1416   // Load value from memory
1417   if (_dst_narrow) {
1418     __ movl(preval, _dst);
1419   } else {
1420     __ movq(preval, _dst);
1421   }
1422 
1423   // Is the previous value null?
1424   __ testptr(preval, preval);
1425   __ jccb(Assembler::equal, L_preval_null);
1426 
1427   if (_dst_narrow) {
1428     __ decode_heap_oop_not_null(preval);
1429   }
1430 
1431   // Can we store a value in the given thread's buffer?
1432   // (The index field is typed as size_t.)
1433   Address index(r15_thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
1434   Address buffer(r15_thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
1435 
1436   Register slot = tmp2;
1437   __ push(tmp2);
1438   __ movptr(slot, index);
1439   __ testptr(slot, slot);
1440   __ jccb(Assembler::zero, L_runtime);
1441   __ subptr(slot, wordSize);
1442   __ movptr(index, slot);
1443   __ addptr(slot, buffer);
1444   __ movptr(Address(slot, 0), preval);
1445   __ pop(tmp2);
1446 
1447   // Exit here
1448   __ bind(L_preval_null);
1449   __ jmp(*continuation());
1450 
1451   __ bind(L_runtime);
1452   __ pop(tmp2); // Immediately pop tmp to make sure the stack is aligned
1453   {
1454     SaveLiveRegisters save_registers(&masm, this);
1455     if (c_rarg0 != preval) {
1456       __ mov(c_rarg0, preval);
1457     }
1458     // rax is a caller-saved, non-argument-passing register, so it does not
1459     // interfere with c_rarg0 or c_rarg1. If it contained any live value before
1460     // entering this stub, it is saved at this point, and restored after the
1461     // call. If it did not contain any live value, it is free to be used. In
1462     // either case, it is safe to use it here as a call scratch register.
1463     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre)), rax);
1464   }
1465   __ jmp(*continuation());
1466 }
1467 
1468 void ShenandoahCASBarrierStubC2::emit_code(MacroAssembler& masm) {
1469   Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
1470 
1471   __ bind(*entry());
1472 
1473   Label L_final;
1474   Label L_succeded;
1475 
1476   // check if first CAS succeded, if it did we just need to write to SATB
1477   Register tst = _cae ? _tmp1 : _result;
1478   __ testq(tst, tst);
1479   __ jnz(L_succeded);
1480 
1481 
1482   // LRB + CAS Retry
1483             // CAS has failed because the value held at addr does not match expected.
1484             // This may be a false negative because the version in memory might be
1485             // the from-space version of the same object we currently hold to-space
1486             // reference for.
1487             //
1488             // To resolve this, we need to pass the location through the LRB fixup,
1489             // this will make sure that the location has only to-space pointers.
1490             // To avoid calling into runtime often, we cset-check the object first.
1491             // We can inline most of the work here, but there is little point,
1492             // as CAS failures over cset locations must be rare. This fast-slow split
1493             // matches what we do for normal LRB.
1494 
1495             assert(_expected == rax, "expected must be rax");
1496 
1497             // Non-strong references should always go to runtime. We do not expect
1498             // CASes over non-strong locations.
1499             assert((_node->barrier_data() & ShenandoahBitStrong) != 0, "Only strong references for CASes");
1500 
1501             // (Compressed) failure witness is in _expected.
1502             // Unpack it and check if it is in collection set.
1503             if (_narrow) {
1504               __ decode_heap_oop(_expected);
1505             }
1506             __ movptr(_tmp1, _expected);
1507             __ shrptr(_tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint());
1508             __ addptr(_tmp1, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr());
1509             __ cmpb(Address(_tmp1, 0), 0);
1510             __ jcc(Assembler::zero, L_final);
1511 
1512             {
1513               SaveLiveRegisters save_registers(&masm, this);
1514               // Load up failure witness again.
1515               if (c_rarg0 != _expected) {
1516                 __ movptr(c_rarg0, _expected);
1517               }
1518               __ lea(c_rarg1, _addr);
1519 
1520               if (_narrow) {
1521                 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow), 2);
1522               } else {
1523                 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong), 2);
1524               }
1525               // We have called LRB to fix up the heap location. We do not care about its result,
1526               // as we will just try to CAS the location again.
1527             }
1528 
1529             __ bind(L_final);
1530 
1531             // Try to CAS again with the original expected value.
1532             // At this point, there can no longer be false negatives.
1533             __ movptr(_expected, _tmp2);
1534             __ lock();
1535             if (_narrow) {
1536               __ cmpxchgl(_new_val, _addr);
1537             } else {
1538               __ cmpxchgptr(_new_val, _addr);
1539             }
1540             if (!_cae) {
1541               assert(_result != noreg, "need result register");
1542               __ setcc(Assembler::equal, _result);
1543             } else {
1544               assert(_result == noreg, "no result expected");
1545             }
1546             // If the retry did not succeed skip SATB
1547             __ jcc(Assembler::notEqual, *continuation());
1548 
1549 
1550 
1551     // SATB
1552     __ bind(L_succeded);
1553             Address index(r15_thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
1554             Address buffer(r15_thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
1555 
1556             Label L_satb_pack_and_done, L_runtime;
1557 
1558             Address gc_state(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
1559             __ testb(gc_state, ShenandoahHeap::MARKING);
1560             __ jcc(Assembler::zero, *continuation());
1561 
1562             // Paranoia: CAS has succeded, so what was in memory is definitely oldval.
1563             // Instead of pulling it from other code paths, pull it from stashed value.
1564             // TODO: Figure out better way to do this.
1565             __ movptr(_expected, _tmp2);
1566 
1567             // Is the previous value null?
1568             __ cmpptr(_expected, NULL_WORD);
1569             __ jcc(Assembler::equal, *continuation());
1570 
1571             if (_narrow) {
1572               __ decode_heap_oop_not_null(_expected);
1573             }
1574 
1575             // Can we store a value in the given thread's buffer?
1576             // (The index field is typed as size_t.)
1577             __ movptr(_tmp1, index);
1578             __ testptr(_tmp1, _tmp1);
1579             __ jccb(Assembler::zero, L_runtime);
1580             // The buffer is not full, store value into it.
1581             __ subptr(_tmp1, wordSize);
1582             __ movptr(index, _tmp1);
1583             __ addptr(_tmp1, buffer);
1584             __ movptr(Address(_tmp1, 0), _expected);
1585 
1586             __ bind(L_satb_pack_and_done);
1587             if (_narrow) {
1588               __ encode_heap_oop_not_null(_expected);
1589             }
1590             __ jmp(*continuation());
1591 
1592             __ bind(L_runtime);
1593 
1594             // Expected register should not be clobbered.
1595             preserve(_expected);
1596 
1597             // Carry the CAS/CAE result over the slowpath call
1598             if (_cae) {
1599               assert(_result == noreg, "no result expected");
1600             } else {
1601               assert(_result != noreg, "need result register");
1602               preserve(_result);
1603             }
1604             {
1605               SaveLiveRegisters save_registers(&masm, this);
1606               if (c_rarg0 != _expected) {
1607                 __ mov(c_rarg0, _expected);
1608               }
1609               // rax is a caller-saved, non-argument-passing register, so it does not
1610               // interfere with c_rarg0 or c_rarg1. If it contained any live value before
1611               // entering this stub, it is saved at this point, and restored after the
1612               // call. If it did not contain any live value, it is free to be used. In
1613               // either case, it is safe to use it here as a call scratch register.
1614               __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre)), rax);
1615             }
1616             __ jmp(L_satb_pack_and_done);
1617 
1618     __ jmp(*continuation());
1619 }
1620 #undef __
1621 #endif
< prev index next >