< prev index next >

src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp

Print this page

  25  */
  26 
  27 #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
  28 #include "gc/shenandoah/mode/shenandoahMode.hpp"
  29 #include "gc/shenandoah/shenandoahBarrierSet.hpp"
  30 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
  31 #include "gc/shenandoah/shenandoahForwarding.hpp"
  32 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
  33 #include "gc/shenandoah/shenandoahHeapRegion.hpp"
  34 #include "gc/shenandoah/shenandoahRuntime.hpp"
  35 #include "gc/shenandoah/shenandoahThreadLocalData.hpp"
  36 #include "interpreter/interpreter.hpp"
  37 #include "runtime/javaThread.hpp"
  38 #include "runtime/sharedRuntime.hpp"
  39 #include "utilities/macros.hpp"
  40 #ifdef COMPILER1
  41 #include "c1/c1_LIRAssembler.hpp"
  42 #include "c1/c1_MacroAssembler.hpp"
  43 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
  44 #endif



  45 
  46 #define __ masm->
  47 
  48 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
  49                                                        Register src, Register dst, Register count) {
  50 
  51   bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
  52 
  53   if (is_reference_type(type)) {
  54     if (ShenandoahCardBarrier) {
  55       bool checkcast = (decorators & ARRAYCOPY_CHECKCAST) != 0;
  56       bool disjoint = (decorators & ARRAYCOPY_DISJOINT) != 0;
  57       bool obj_int = (type == T_OBJECT) && UseCompressedOops;
  58 
  59       // We need to save the original element count because the array copy stub
  60       // will destroy the value and we need it for the card marking barrier.
  61       if (!checkcast) {
  62         if (!obj_int) {
  63           // Save count for barrier
  64           __ movptr(r11, count);

 884     assert(!is_native, "weak must not be called off-heap");
 885     if (UseCompressedOops) {
 886       __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow), c_rarg0, c_rarg1);
 887     } else {
 888       __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak), c_rarg0, c_rarg1);
 889     }
 890   } else {
 891     assert(is_phantom, "only remaining strength");
 892     assert(is_native, "phantom must only be called off-heap");
 893     __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom), c_rarg0, c_rarg1);
 894   }
 895 
 896   __ restore_live_registers_except_rax(true);
 897 
 898   __ epilogue();
 899 }
 900 
 901 #undef __
 902 
 903 #endif // COMPILER1











































































































































































































































































































































































































  25  */
  26 
  27 #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
  28 #include "gc/shenandoah/mode/shenandoahMode.hpp"
  29 #include "gc/shenandoah/shenandoahBarrierSet.hpp"
  30 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
  31 #include "gc/shenandoah/shenandoahForwarding.hpp"
  32 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
  33 #include "gc/shenandoah/shenandoahHeapRegion.hpp"
  34 #include "gc/shenandoah/shenandoahRuntime.hpp"
  35 #include "gc/shenandoah/shenandoahThreadLocalData.hpp"
  36 #include "interpreter/interpreter.hpp"
  37 #include "runtime/javaThread.hpp"
  38 #include "runtime/sharedRuntime.hpp"
  39 #include "utilities/macros.hpp"
  40 #ifdef COMPILER1
  41 #include "c1/c1_LIRAssembler.hpp"
  42 #include "c1/c1_MacroAssembler.hpp"
  43 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
  44 #endif
  45 #ifdef COMPILER2
  46 #include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp"
  47 #endif
  48 
  49 #define __ masm->
  50 
  51 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
  52                                                        Register src, Register dst, Register count) {
  53 
  54   bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
  55 
  56   if (is_reference_type(type)) {
  57     if (ShenandoahCardBarrier) {
  58       bool checkcast = (decorators & ARRAYCOPY_CHECKCAST) != 0;
  59       bool disjoint = (decorators & ARRAYCOPY_DISJOINT) != 0;
  60       bool obj_int = (type == T_OBJECT) && UseCompressedOops;
  61 
  62       // We need to save the original element count because the array copy stub
  63       // will destroy the value and we need it for the card marking barrier.
  64       if (!checkcast) {
  65         if (!obj_int) {
  66           // Save count for barrier
  67           __ movptr(r11, count);

 887     assert(!is_native, "weak must not be called off-heap");
 888     if (UseCompressedOops) {
 889       __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow), c_rarg0, c_rarg1);
 890     } else {
 891       __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak), c_rarg0, c_rarg1);
 892     }
 893   } else {
 894     assert(is_phantom, "only remaining strength");
 895     assert(is_native, "phantom must only be called off-heap");
 896     __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom), c_rarg0, c_rarg1);
 897   }
 898 
 899   __ restore_live_registers_except_rax(true);
 900 
 901   __ epilogue();
 902 }
 903 
 904 #undef __
 905 
 906 #endif // COMPILER1
 907 
 908 #ifdef COMPILER2
 909 
 910 #undef __
 911 #define __ masm->
 912 
 913 void ShenandoahBarrierSetAssembler::load_c2(const MachNode* node, MacroAssembler* masm, Register dst, Address src, bool narrow) {
 914   // Do the actual load. This load is the candidate for implicit null check, and MUST come first.
 915   if (narrow) {
 916     __ movl(dst, src);
 917   } else {
 918     __ movq(dst, src);
 919   }
 920 
 921   ShenandoahBarrierStubC2::load_post(masm, node, dst, src, noreg, noreg, narrow);
 922 }
 923 
 924 void ShenandoahBarrierSetAssembler::store_c2(const MachNode* node, MacroAssembler* masm,
 925                                              Address dst, bool dst_narrow,
 926                                              Register src, bool src_narrow,
 927                                              Register tmp) {
 928 
 929   ShenandoahBarrierStubC2::store_pre(masm, node, tmp, dst, noreg, noreg, dst_narrow);
 930 
 931   // Need to encode into tmp, because we cannot clobber src.
 932   if (dst_narrow && !src_narrow) {
 933     __ movq(tmp, src);
 934     if ((node->barrier_data() & ShenandoahBitNotNull) == 0) {
 935       __ encode_heap_oop(tmp);
 936     } else {
 937       __ encode_heap_oop_not_null(tmp);
 938     }
 939     src = tmp;
 940   }
 941 
 942   // Do the actual store
 943   if (dst_narrow) {
 944     __ movl(dst, src);
 945   } else {
 946     __ movq(dst, src);
 947   }
 948 
 949   ShenandoahBarrierStubC2::store_post(masm, node, dst, tmp, noreg);
 950 }
 951 
 952 void ShenandoahBarrierSetAssembler::compare_and_set_c2(const MachNode* node, MacroAssembler* masm,
 953                                                        Register res, Address addr,
 954                                                        Register oldval, Register newval, Register tmp,
 955                                                        bool narrow) {
 956 
 957   assert(oldval == rax, "must be in rax for implicit use in cmpxchg");
 958 
 959   // Oldval and newval can be in the same register, but all other registers should be
 960   // distinct for extra safety, as we shuffle register values around.
 961   assert_different_registers(oldval, tmp, addr.base(), addr.index());
 962   assert_different_registers(newval, tmp, addr.base(), addr.index());
 963 
 964   ShenandoahBarrierStubC2::load_store_pre(masm, node, tmp, addr, noreg, noreg, narrow);
 965 
 966   // CAS!
 967   __ lock();
 968   if (narrow) {
 969     __ cmpxchgl(newval, addr);
 970   } else {
 971     __ cmpxchgptr(newval, addr);
 972   }
 973 
 974   // If we need a boolean result out of CAS, set the flag appropriately and promote the result.
 975   if (res != noreg) {
 976     __ setcc(Assembler::equal, res);
 977   }
 978 
 979   ShenandoahBarrierStubC2::load_store_post(masm, node, addr, tmp, noreg);
 980 }
 981 
 982 void ShenandoahBarrierSetAssembler::get_and_set_c2(const MachNode* node, MacroAssembler* masm, Register newval, Address addr, Register tmp, bool narrow) {
 983   assert_different_registers(newval, tmp, addr.base(), addr.index());
 984 
 985   ShenandoahBarrierStubC2::load_store_pre(masm, node, tmp, addr, noreg, noreg, narrow);
 986 
 987   if (narrow) {
 988     __ xchgl(newval, addr);
 989   } else {
 990     __ xchgq(newval, addr);
 991   }
 992 
 993   ShenandoahBarrierStubC2::load_store_post(masm, node, addr, tmp, noreg);
 994 }
 995 
 996 #undef __
 997 #define __ masm.
 998 
 999 void ShenandoahBarrierStubC2::cardtable(MacroAssembler& masm, Address addr, Register tmp1, Register tmp2) {
1000   Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
1001 
1002   __ lea(tmp1, addr);
1003   __ shrptr(tmp1, CardTable::card_shift());
1004   __ addptr(tmp1, Address(r15_thread, in_bytes(ShenandoahThreadLocalData::card_table_offset())));
1005   Address card_address(tmp1, 0);
1006 
1007   assert(CardTable::dirty_card_val() == 0, "Encoding assumption");
1008   Label L_done;
1009   if (UseCondCardMark) {
1010     __ cmpb(card_address, 0);
1011     __ jccb(Assembler::equal, L_done);
1012   }
1013   if (UseCompressedOops && CompressedOops::base() == nullptr) {
1014     __ movb(card_address, r12);
1015   } else {
1016     __ movb(card_address, 0);
1017   }
1018   __ bind(L_done);
1019 }
1020 
1021 void ShenandoahBarrierStubC2::enter_if_gc_state(MacroAssembler& masm, const char test_state, Register tmp) {
1022   Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
1023 
1024   Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(test_state)));
1025   __ cmpb(gc_state_fast, 0);
1026   __ jcc(Assembler::notEqual, *entry());
1027   __ bind(*continuation());
1028 }
1029 
1030 void ShenandoahBarrierStubC2::emit_code(MacroAssembler& masm) {
1031   Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
1032   assert(_needs_keep_alive_barrier || _needs_load_ref_barrier, "Why are you here?");
1033 
1034   // On x86, there is a significant penalty with unaligned branch target, for example
1035   // when the target instruction straggles the fetch line. It makes (performance) sense
1036   // to spend some code size to align the target better.
1037   __ align(16);
1038   __ bind(*entry());
1039 
1040   // If we need to load ourselves, do it here.
1041   if (_do_load) {
1042     if (_narrow) {
1043       __ movl(_obj, _addr);
1044     } else {
1045       __ movq(_obj, _addr);
1046     }
1047   }
1048 
1049   // If the object is null, there is no point in applying barriers.
1050   maybe_far_jump_if_zero(masm, _obj);
1051 
1052   // We need to make sure that loads done by callers survive across slow-path calls.
1053   // For self-loads, we need to care about the case when both KA and LRB are enabled (rare).
1054   bool needs_both_barriers = _needs_keep_alive_barrier && _needs_load_ref_barrier;
1055   if (!_do_load || needs_both_barriers) {
1056     preserve(_obj);
1057   }
1058 
1059   // Go for barriers. Barriers can return straight to continuation, as long
1060   // as another barrier is not needed.
1061   if (needs_both_barriers) {
1062     keepalive(masm, nullptr);
1063     lrb(masm);
1064   } else if (_needs_keep_alive_barrier) {
1065     keepalive(masm, continuation());
1066   } else if (_needs_load_ref_barrier) {
1067     lrb(masm);
1068   } else {
1069     ShouldNotReachHere();
1070   }
1071 }
1072 
1073 void ShenandoahBarrierStubC2::keepalive(MacroAssembler& masm, Label* L_done) {
1074   Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::MARKING)));
1075   Address index(r15_thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
1076   Address buffer(r15_thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
1077 
1078   Label L_through, L_pop_and_slow;
1079 
1080   // If another barrier is enabled as well, do a runtime check for a specific barrier.
1081   if (_needs_load_ref_barrier) {
1082     assert(L_done == nullptr, "L_done is always null when _needs_load_ref_barrier is true");
1083     __ cmpb(gc_state_fast, 0);
1084     __ jcc(Assembler::equal, L_through);
1085   }
1086 
1087   // Need temp to work, allocate one now.
1088   bool tmp_live;
1089   Register tmp = select_temp_register(tmp_live);
1090   if (tmp_live) {
1091     __ push(tmp);
1092   }
1093 
1094   // Fast-path: put object into buffer.
1095   // If buffer is already full, go slow.
1096   __ movptr(tmp, index);
1097   __ subptr(tmp, wordSize);
1098   __ jccb(Assembler::below, L_pop_and_slow);
1099   __ movptr(index, tmp);
1100   __ addptr(tmp, buffer);
1101 
1102   // Store the object in queue.
1103   // If object is narrow, we need to decode it before inserting.
1104   // We can skip the re-encoding if we know that object is not preserved.
1105   if (_narrow) {
1106     __ decode_heap_oop_not_null(_obj);
1107   }
1108   __ movptr(Address(tmp, 0), _obj);
1109   if (_narrow && is_preserved(_obj)) {
1110     __ encode_heap_oop_not_null(_obj);
1111   }
1112 
1113   // Fast-path exits here.
1114   if (tmp_live) {
1115     __ pop(tmp);
1116   }
1117 
1118   if (L_done != nullptr) {
1119     __ jmp(*L_done);
1120   } else {
1121     __ jmp(L_through);
1122   }
1123 
1124   // Slow-path: call runtime to handle.
1125   // Need to pop tmp immediately for stack to remain aligned.
1126   __ bind(L_pop_and_slow);
1127   if (tmp_live) {
1128     __ pop(tmp);
1129   }
1130   {
1131     SaveLiveRegisters slr(&masm, this);
1132 
1133     // Shuffle in the arguments. The end result should be:
1134     //   c_rarg0 <-- obj
1135     if (c_rarg0 != _obj) {
1136       __ mov(c_rarg0, _obj);
1137     }
1138 
1139     // Go to runtime and handle the rest there.
1140     __ call(RuntimeAddress(keepalive_runtime_entry_addr()));
1141   }
1142   if (L_done != nullptr) {
1143     __ jmp(*L_done);
1144   } else {
1145     __ bind(L_through);
1146   }
1147 }
1148 
1149 void ShenandoahBarrierStubC2::lrb(MacroAssembler& masm) {
1150   Label L_pop_and_slow, L_slow;
1151 
1152   // If another barrier is enabled as well, do a runtime check for a specific barrier.
1153   if (_needs_keep_alive_barrier) {
1154     char state_to_check = ShenandoahHeap::HAS_FORWARDED | (_needs_load_ref_weak_barrier ? ShenandoahHeap::WEAK_ROOTS : 0);
1155     Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(state_to_check)));
1156     __ cmpb(gc_state_fast, 0);
1157     __ jcc(Assembler::equal, *continuation());
1158   }
1159 
1160   // If weak references are being processed, weak/phantom loads need to go slow,
1161   // regardless of their cset status.
1162   if (_needs_load_ref_weak_barrier) {
1163     Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::WEAK_ROOTS)));
1164     __ cmpb(gc_state_fast, 0);
1165     __ jccb(Assembler::notEqual, L_slow);
1166   }
1167 
1168   bool is_aot = AOTCodeCache::is_on_for_dump();
1169 
1170   // Need temp to work, allocate one now.
1171   bool tmp_live;
1172   Register tmp = select_temp_register(tmp_live, /* skip_reg1 = */ is_aot ? rcx : noreg);
1173   if (tmp_live) {
1174     __ push(tmp);
1175   }
1176 
1177   // Compute the cset bitmap index
1178   if (_narrow) {
1179     __ decode_heap_oop_not_null(tmp, _obj);
1180   } else {
1181     __ movptr(tmp, _obj);
1182   }
1183 
1184   Address cset_addr_arg;
1185   intptr_t cset_addr = reinterpret_cast<intptr_t>(ShenandoahHeap::in_cset_fast_test_addr());
1186   if (!is_aot && cset_addr < INT32_MAX) {
1187     // Cset bitmap is at easily encodeable address. Just use it as displacement.
1188     __ shrptr(tmp, ShenandoahHeapRegion::region_size_bytes_shift_jint());
1189     cset_addr_arg = Address(tmp, checked_cast<int>(cset_addr));
1190   } else {
1191     bool tmp2_live;
1192     Register tmp2 = select_temp_register(tmp2_live, /* skip_reg1 = */ tmp, /* skip_reg2 = */ is_aot ? rcx : noreg);
1193     if (tmp2_live) {
1194       __ push(tmp2);
1195     }
1196     if (is_aot) {
1197       // Generating AOT code, pull the cset bitmap and region shift from AOT table.
1198       assert_different_registers(tmp, tmp2, rcx);
1199       __ push(rcx);
1200       __ lea(rcx, ExternalAddress(AOTRuntimeConstants::grain_shift_address()));
1201       __ movl(rcx, Address(rcx));
1202       __ shrptr(tmp);
1203       __ pop(rcx);
1204       __ lea(tmp2, ExternalAddress(AOTRuntimeConstants::cset_base_address()));
1205       __ addptr(tmp, Address(tmp2));
1206     } else {
1207       // Cset bitmap is far away. Add its address fully.
1208       __ shrptr(tmp, ShenandoahHeapRegion::region_size_bytes_shift_jint());
1209       __ movptr(tmp2, cset_addr);
1210       __ addptr(tmp, tmp2);
1211     }
1212     if (tmp2_live) {
1213       __ pop(tmp2);
1214     }
1215     cset_addr_arg = Address(tmp, 0);
1216   }
1217 
1218   // Cset-check. Fall-through to slow if in collection set.
1219   __ cmpb(cset_addr_arg, 0);
1220   if (tmp_live) {
1221     __ jccb(Assembler::notEqual, L_pop_and_slow);
1222     __ pop(tmp);
1223     __ jmp(*continuation());
1224   } else {
1225     // Nothing else to do, jump back
1226     __ jcc(Assembler::equal, *continuation());
1227   }
1228 
1229   // Slow path
1230   __ bind(L_pop_and_slow);
1231   // Need to pop tmp immediately for stack to remain aligned.
1232   if (tmp_live) {
1233     __ pop(tmp);
1234   }
1235   __ bind(L_slow);
1236 
1237   // Obj is the result, need to temporarily stop preserving it.
1238   bool is_obj_preserved = is_preserved(_obj);
1239   if (is_obj_preserved) {
1240     dont_preserve(_obj);
1241   }
1242   {
1243     SaveLiveRegisters slr(&masm, this);
1244 
1245     assert_different_registers(rax, c_rarg0, c_rarg1);
1246 
1247     // Shuffle in the arguments. The end result should be:
1248     //   c_rarg0 <-- obj
1249     //   c_rarg1 <-- lea(addr)
1250     if (_obj == c_rarg0) {
1251       __ lea(c_rarg1, _addr);
1252     } else if (_obj == c_rarg1) {
1253       // Set up arguments in reverse, and then flip them
1254       __ lea(c_rarg0, _addr);
1255       __ xchgptr(c_rarg0, c_rarg1);
1256     } else {
1257       assert_different_registers(_obj, c_rarg0, c_rarg1);
1258       __ lea(c_rarg1, _addr);
1259       __ movptr(c_rarg0, _obj);
1260     }
1261 
1262     // Go to runtime and handle the rest there.
1263     __ call(RuntimeAddress(lrb_runtime_entry_addr()));
1264 
1265     // Save the result where needed.
1266     if (_narrow) {
1267       __ movl(_obj, rax);
1268     } else if (_obj != rax) {
1269       __ movptr(_obj, rax);
1270     }
1271   }
1272   if (is_obj_preserved) {
1273     preserve(_obj);
1274   }
1275 
1276   __ jmp(*continuation());
1277 }
1278 
1279 int ShenandoahBarrierStubC2::available_gp_registers() {
1280   return Register::available_gp_registers();
1281 }
1282 
1283 bool ShenandoahBarrierStubC2::is_special_register(Register r) {
1284   return r == rsp || r == rbp || r == r12_heapbase || r == r15_thread;
1285 }
1286 
1287 void ShenandoahBarrierStubC2::post_init() {
1288   // Do nothing.
1289 }
1290 
1291 void ShenandoahBarrierStubC2::maybe_far_jump_if_zero(MacroAssembler& masm, Register reg) {
1292   if (_narrow) {
1293     __ testl(reg, reg);
1294   } else {
1295     __ testq(reg, reg);
1296   }
1297   __ jcc(Assembler::zero, *continuation());
1298 }
1299 
1300 #endif // COMPILER2
< prev index next >