< prev index next >

src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp

Print this page

  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  *
  25  */
  26 
  27 #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
  28 #include "gc/shenandoah/mode/shenandoahMode.hpp"
  29 #include "gc/shenandoah/shenandoahBarrierSet.hpp"
  30 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
  31 #include "gc/shenandoah/shenandoahForwarding.hpp"
  32 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
  33 #include "gc/shenandoah/shenandoahHeapRegion.hpp"
  34 #include "gc/shenandoah/shenandoahRuntime.hpp"
  35 #include "gc/shenandoah/shenandoahThreadLocalData.hpp"
  36 #include "interpreter/interpreter.hpp"

  37 #include "runtime/javaThread.hpp"
  38 #include "runtime/sharedRuntime.hpp"
  39 #include "utilities/macros.hpp"
  40 #ifdef COMPILER1
  41 #include "c1/c1_LIRAssembler.hpp"
  42 #include "c1/c1_MacroAssembler.hpp"
  43 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
  44 #endif
  45 #ifdef COMPILER2
  46 #include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp"
  47 #endif
  48 
  49 #define __ masm->
  50 
  51 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
  52                                                        Register src, Register dst, Register count) {
  53 
  54   bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
  55 
  56   if (is_reference_type(type)) {

1004   __ addptr(tmp1, Address(r15_thread, in_bytes(ShenandoahThreadLocalData::card_table_offset())));
1005   Address card_address(tmp1, 0);
1006 
1007   assert(CardTable::dirty_card_val() == 0, "Encoding assumption");
1008   Label L_done;
1009   if (UseCondCardMark) {
1010     __ cmpb(card_address, 0);
1011     __ jccb(Assembler::equal, L_done);
1012   }
1013   if (UseCompressedOops && CompressedOops::base() == nullptr) {
1014     __ movb(card_address, r12);
1015   } else {
1016     __ movb(card_address, 0);
1017   }
1018   __ bind(L_done);
1019 }
1020 
1021 void ShenandoahBarrierStubC2::enter_if_gc_state(MacroAssembler& masm, const char test_state, Register tmp) {
1022   Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
1023 






1024   Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(test_state)));
1025   __ cmpb(gc_state_fast, 0);
1026   __ jcc(Assembler::notEqual, *entry());




1027   __ bind(*continuation());
1028 }
1029 






















































1030 void ShenandoahBarrierStubC2::emit_code(MacroAssembler& masm) {
1031   Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
1032   assert(_needs_keep_alive_barrier || _needs_load_ref_barrier, "Why are you here?");
1033 
1034   // On x86, there is a significant penalty with unaligned branch target, for example
1035   // when the target instruction straggles the fetch line. It makes (performance) sense
1036   // to spend some code size to align the target better.
1037   __ align(16);
1038   __ bind(*entry());
1039 
1040   // If we need to load ourselves, do it here.
1041   if (_do_load) {
1042     if (_narrow) {
1043       __ movl(_obj, _addr);
1044     } else {
1045       __ movq(_obj, _addr);
1046     }
1047   }
1048 
1049   // If the object is null, there is no point in applying barriers.
1050   maybe_far_jump_if_zero(masm, _obj);
1051 
1052   // We need to make sure that loads done by callers survive across slow-path calls.
1053   // For self-loads, we need to care about the case when both KA and LRB are enabled (rare).
1054   bool needs_both_barriers = _needs_keep_alive_barrier && _needs_load_ref_barrier;
1055   if (!_do_load || needs_both_barriers) {
1056     preserve(_obj);
1057   }
1058 
1059   // Go for barriers. Barriers can return straight to continuation, as long
1060   // as another barrier is not needed.
1061   if (needs_both_barriers) {
1062     keepalive(masm, nullptr);
1063     lrb(masm);
1064   } else if (_needs_keep_alive_barrier) {
1065     keepalive(masm, continuation());
1066   } else if (_needs_load_ref_barrier) {
1067     lrb(masm);
1068   } else {
1069     ShouldNotReachHere();
1070   }
1071 }
1072 
1073 void ShenandoahBarrierStubC2::keepalive(MacroAssembler& masm, Label* L_done) {
1074   Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::MARKING)));
1075   Address index(r15_thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
1076   Address buffer(r15_thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
1077 
1078   Label L_through, L_pop_and_slow;
1079 
1080   // If another barrier is enabled as well, do a runtime check for a specific barrier.
1081   if (_needs_load_ref_barrier) {
1082     assert(L_done == nullptr, "L_done is always null when _needs_load_ref_barrier is true");
1083     __ cmpb(gc_state_fast, 0);


1084     __ jcc(Assembler::equal, L_through);
1085   }
1086 
1087   // Need temp to work, allocate one now.
1088   bool tmp_live;
1089   Register tmp = select_temp_register(tmp_live);
1090   if (tmp_live) {
1091     __ push(tmp);
1092   }
1093 
1094   // Fast-path: put object into buffer.
1095   // If buffer is already full, go slow.
1096   __ movptr(tmp, index);
1097   __ subptr(tmp, wordSize);
1098   __ jccb(Assembler::below, L_pop_and_slow);
1099   __ movptr(index, tmp);
1100   __ addptr(tmp, buffer);
1101 
1102   // Store the object in queue.
1103   // If object is narrow, we need to decode it before inserting.

1133     // Shuffle in the arguments. The end result should be:
1134     //   c_rarg0 <-- obj
1135     if (c_rarg0 != _obj) {
1136       __ mov(c_rarg0, _obj);
1137     }
1138 
1139     // Go to runtime and handle the rest there.
1140     // Use rax as scratch, as it will be saved if live.
1141     __ call(RuntimeAddress(keepalive_runtime_entry_addr()), rax);
1142   }
1143   if (L_done != nullptr) {
1144     __ jmp(*L_done);
1145   } else {
1146     __ bind(L_through);
1147   }
1148 }
1149 
1150 void ShenandoahBarrierStubC2::lrb(MacroAssembler& masm) {
1151   Label L_pop_and_slow, L_slow;
1152 
1153   // If another barrier is enabled as well, do a runtime check for a specific barrier.
1154   if (_needs_keep_alive_barrier) {
1155     char state_to_check = ShenandoahHeap::HAS_FORWARDED | (_needs_load_ref_weak_barrier ? ShenandoahHeap::WEAK_ROOTS : 0);
1156     Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(state_to_check)));
1157     __ cmpb(gc_state_fast, 0);
1158     __ jcc(Assembler::equal, *continuation());
1159   }
1160 
1161   // If weak references are being processed, weak/phantom loads need to go slow,
1162   // regardless of their cset status.
1163   if (_needs_load_ref_weak_barrier) {
1164     Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::WEAK_ROOTS)));
1165     __ cmpb(gc_state_fast, 0);
1166     __ jccb(Assembler::notEqual, L_slow);
1167   }
1168 
1169   bool is_aot = AOTCodeCache::is_on_for_dump();
1170 
1171   // Need temp to work, allocate one now.
1172   bool tmp_live;
1173   Register tmp = select_temp_register(tmp_live, /* skip_reg1 = */ is_aot ? rcx : noreg);
1174   if (tmp_live) {
1175     __ push(tmp);
1176   }
1177 
1178   // Compute the cset bitmap index
1179   if (_narrow) {

1273   }
1274   if (is_obj_preserved) {
1275     preserve(_obj);
1276   }
1277 
1278   __ jmp(*continuation());
1279 }
1280 
1281 int ShenandoahBarrierStubC2::available_gp_registers() {
1282   return Register::available_gp_registers();
1283 }
1284 
1285 bool ShenandoahBarrierStubC2::is_special_register(Register r) {
1286   return r == rsp || r == rbp || r == r12_heapbase || r == r15_thread;
1287 }
1288 
1289 void ShenandoahBarrierStubC2::post_init() {
1290   // Do nothing.
1291 }
1292 
1293 void ShenandoahBarrierStubC2::maybe_far_jump_if_zero(MacroAssembler& masm, Register reg) {
1294   if (_narrow) {
1295     __ testl(reg, reg);
1296   } else {
1297     __ testq(reg, reg);
1298   }
1299   __ jcc(Assembler::zero, *continuation());
1300 }
1301 
1302 #endif // COMPILER2

  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  *
  25  */
  26 
  27 #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
  28 #include "gc/shenandoah/mode/shenandoahMode.hpp"
  29 #include "gc/shenandoah/shenandoahBarrierSet.hpp"
  30 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
  31 #include "gc/shenandoah/shenandoahForwarding.hpp"
  32 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
  33 #include "gc/shenandoah/shenandoahHeapRegion.hpp"
  34 #include "gc/shenandoah/shenandoahRuntime.hpp"
  35 #include "gc/shenandoah/shenandoahThreadLocalData.hpp"
  36 #include "interpreter/interpreter.hpp"
  37 #include "nativeInst_x86.hpp"
  38 #include "runtime/javaThread.hpp"
  39 #include "runtime/sharedRuntime.hpp"
  40 #include "utilities/macros.hpp"
  41 #ifdef COMPILER1
  42 #include "c1/c1_LIRAssembler.hpp"
  43 #include "c1/c1_MacroAssembler.hpp"
  44 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
  45 #endif
  46 #ifdef COMPILER2
  47 #include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp"
  48 #endif
  49 
  50 #define __ masm->
  51 
  52 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
  53                                                        Register src, Register dst, Register count) {
  54 
  55   bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
  56 
  57   if (is_reference_type(type)) {

1005   __ addptr(tmp1, Address(r15_thread, in_bytes(ShenandoahThreadLocalData::card_table_offset())));
1006   Address card_address(tmp1, 0);
1007 
1008   assert(CardTable::dirty_card_val() == 0, "Encoding assumption");
1009   Label L_done;
1010   if (UseCondCardMark) {
1011     __ cmpb(card_address, 0);
1012     __ jccb(Assembler::equal, L_done);
1013   }
1014   if (UseCompressedOops && CompressedOops::base() == nullptr) {
1015     __ movb(card_address, r12);
1016   } else {
1017     __ movb(card_address, 0);
1018   }
1019   __ bind(L_done);
1020 }
1021 
1022 void ShenandoahBarrierStubC2::enter_if_gc_state(MacroAssembler& masm, const char test_state, Register tmp) {
1023   Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
1024 
1025   // Emit the unconditional branch in the first version of the method.
1026   // Let the rest of runtime figure out how to manage it.
1027   __ relocate(barrier_Relocation::spec(), ShenandoahThreadLocalData::gc_state_to_fast_array_index(test_state));
1028   __ jmp(*entry(), /* maybe_short = */ false);
1029 
1030 #ifdef ASSERT
1031   Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(test_state)));
1032   __ cmpb(gc_state_fast, 0);
1033   __ jccb(Assembler::zero, *continuation());
1034   __ hlt(); // Correctness bug: barrier is NOP-ed, but heap is NOT IDLE
1035 #endif
1036   // TODO: When barriers are consistently turned off at the end of the cycle, assert that barrier is NOP-ed.
1037 
1038   __ bind(*continuation());
1039 }
1040 
1041 address ShenandoahBarrierSetAssembler::parse_stub_address(address pc) {
1042   NativeInstruction* ni = nativeInstruction_at(pc);
1043   assert(ni->is_jump(), "Initial code version: GC barrier fastpath must be a jump");
1044   NativeJump* jmp = nativeJump_at(pc);
1045   return jmp->jump_destination();
1046 }
1047 
1048 void insert_5_byte_nop(address pc) {
1049   *(pc + 0) = 0x0F;
1050   *(pc + 1) = 0x1F;
1051   *(pc + 2) = 0x44;
1052   *(pc + 3) = 0x00;
1053   *(pc + 4) = 0x00;
1054   ICache::invalidate_range(pc, 5);
1055 }
1056 
1057 bool is_5_byte_nop(address pc) {
1058   if (*(pc + 0) != 0x0F) return false;
1059   if (*(pc + 1) != 0x1F) return false;
1060   if (*(pc + 2) != 0x44) return false;
1061   if (*(pc + 3) != 0x00) return false;
1062   if (*(pc + 4) != 0x00) return false;
1063   return true;
1064 }
1065 
1066 void check_at(bool cond, address pc, const char* msg) {
1067   assert(cond, "%s: at PC " PTR_FORMAT ": %02x%02x%02x%02x%02x",
1068          msg, p2i(pc), *(pc + 0), *(pc + 1), *(pc + 2), *(pc + 3), *(pc + 4));
1069 }
1070 
1071 bool ShenandoahBarrierSetAssembler::is_active(address pc) {
1072   NativeInstruction* ni = nativeInstruction_at(pc);
1073   return ni->is_jump();
1074 }
1075 
1076 void ShenandoahBarrierSetAssembler::patch_branch_to_nop(address pc) {
1077   NativeInstruction* ni = nativeInstruction_at(pc);
1078   if (ni->is_jump()) {
1079     insert_5_byte_nop(pc);
1080   } else {
1081     check_at(is_5_byte_nop(pc), pc, "Should already be nop");
1082   }
1083 }
1084 
1085 void ShenandoahBarrierSetAssembler::patch_nop_to_branch(address pc, address stub_addr) {
1086   NativeInstruction* ni = nativeInstruction_at(pc);
1087   if (is_5_byte_nop(pc)) {
1088     NativeJump::insert(pc, stub_addr);
1089   } else {
1090     check_at(ni->is_jump(), pc, "Should already be jump");
1091     check_at(nativeJump_at(pc)->jump_destination() == stub_addr, pc, "Jump should be to the same address");
1092   }
1093 }
1094 
1095 void ShenandoahBarrierStubC2::emit_code(MacroAssembler& masm) {
1096   Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
1097   assert(_needs_keep_alive_barrier || _needs_load_ref_barrier, "Why are you here?");
1098 
1099   // On x86, there is a significant penalty with unaligned branch target, for example
1100   // when the target instruction straggles the fetch line. It makes (performance) sense
1101   // to spend some code size to align the target better.
1102   __ align(16);
1103   __ bind(*entry());
1104 
1105   // If we need to load ourselves, do it here.
1106   if (_do_load) {
1107     if (_narrow) {
1108       __ movl(_obj, _addr);
1109     } else {
1110       __ movq(_obj, _addr);
1111     }
1112   }
1113 
1114   // If the object is null, there is no point in applying barriers.
1115   maybe_far_jump_if_zero(masm, _obj, continuation());
1116 
1117   // We need to make sure that loads done by callers survive across slow-path calls.
1118   // For self-loads, we need to care about the case when both KA and LRB are enabled (rare).
1119   bool needs_both_barriers = _needs_keep_alive_barrier && _needs_load_ref_barrier;
1120   if (!_do_load || needs_both_barriers) {
1121     preserve(_obj);
1122   }
1123 
1124   // Go for barriers. Barriers can return straight to continuation, as long
1125   // as another barrier is not needed.
1126   if (needs_both_barriers) {
1127     keepalive(masm, nullptr);
1128     lrb(masm);
1129   } else if (_needs_keep_alive_barrier) {
1130     keepalive(masm, continuation());
1131   } else if (_needs_load_ref_barrier) {
1132     lrb(masm);
1133   } else {
1134     ShouldNotReachHere();
1135   }
1136 }
1137 
1138 void ShenandoahBarrierStubC2::keepalive(MacroAssembler& masm, Label* L_done) {
1139   Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::MARKING)));
1140   Address index(r15_thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
1141   Address buffer(r15_thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
1142 
1143   Label L_through, L_pop_and_slow;
1144 
1145   // Hotpatched GC checks are racy: we can turn off GC state before we patch the barriers.
1146   // Therefore, alas we need a separate check here. TODO: Figure this out.
1147   __ cmpb(gc_state_fast, 0);
1148   if (L_done != nullptr) {
1149     __ jcc(Assembler::equal, *L_done);
1150   } else {
1151     __ jcc(Assembler::equal, L_through);
1152   }
1153 
1154   // Need temp to work, allocate one now.
1155   bool tmp_live;
1156   Register tmp = select_temp_register(tmp_live);
1157   if (tmp_live) {
1158     __ push(tmp);
1159   }
1160 
1161   // Fast-path: put object into buffer.
1162   // If buffer is already full, go slow.
1163   __ movptr(tmp, index);
1164   __ subptr(tmp, wordSize);
1165   __ jccb(Assembler::below, L_pop_and_slow);
1166   __ movptr(index, tmp);
1167   __ addptr(tmp, buffer);
1168 
1169   // Store the object in queue.
1170   // If object is narrow, we need to decode it before inserting.

1200     // Shuffle in the arguments. The end result should be:
1201     //   c_rarg0 <-- obj
1202     if (c_rarg0 != _obj) {
1203       __ mov(c_rarg0, _obj);
1204     }
1205 
1206     // Go to runtime and handle the rest there.
1207     // Use rax as scratch, as it will be saved if live.
1208     __ call(RuntimeAddress(keepalive_runtime_entry_addr()), rax);
1209   }
1210   if (L_done != nullptr) {
1211     __ jmp(*L_done);
1212   } else {
1213     __ bind(L_through);
1214   }
1215 }
1216 
1217 void ShenandoahBarrierStubC2::lrb(MacroAssembler& masm) {
1218   Label L_pop_and_slow, L_slow;
1219 
1220   // Hotpatched GC checks are racy: we can turn off GC state before we patch the barriers.
1221   // Therefore, alas we need a separate check here. TODO: Figure this out.
1222   char state_to_check = ShenandoahHeap::HAS_FORWARDED | (_needs_load_ref_weak_barrier ? ShenandoahHeap::WEAK_ROOTS : 0);
1223   Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(state_to_check)));
1224   __ cmpb(gc_state_fast, 0);
1225   __ jcc(Assembler::equal, *continuation());

1226 
1227   // If weak references are being processed, weak/phantom loads need to go slow,
1228   // regardless of their cset status.
1229   if (_needs_load_ref_weak_barrier) {
1230     Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::WEAK_ROOTS)));
1231     __ cmpb(gc_state_fast, 0);
1232     __ jccb(Assembler::notEqual, L_slow);
1233   }
1234 
1235   bool is_aot = AOTCodeCache::is_on_for_dump();
1236 
1237   // Need temp to work, allocate one now.
1238   bool tmp_live;
1239   Register tmp = select_temp_register(tmp_live, /* skip_reg1 = */ is_aot ? rcx : noreg);
1240   if (tmp_live) {
1241     __ push(tmp);
1242   }
1243 
1244   // Compute the cset bitmap index
1245   if (_narrow) {

1339   }
1340   if (is_obj_preserved) {
1341     preserve(_obj);
1342   }
1343 
1344   __ jmp(*continuation());
1345 }
1346 
1347 int ShenandoahBarrierStubC2::available_gp_registers() {
1348   return Register::available_gp_registers();
1349 }
1350 
1351 bool ShenandoahBarrierStubC2::is_special_register(Register r) {
1352   return r == rsp || r == rbp || r == r12_heapbase || r == r15_thread;
1353 }
1354 
1355 void ShenandoahBarrierStubC2::post_init() {
1356   // Do nothing.
1357 }
1358 
1359 void ShenandoahBarrierStubC2::maybe_far_jump_if_zero(MacroAssembler& masm, Register reg, Label* L_target) {
1360   if (_narrow) {
1361     __ testl(reg, reg);
1362   } else {
1363     __ testq(reg, reg);
1364   }
1365   __ jcc(Assembler::zero, *L_target);
1366 }
1367 
1368 #endif // COMPILER2
< prev index next >