< prev index next >

src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp

Print this page

  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  *
  25  */
  26 
  27 #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
  28 #include "gc/shenandoah/mode/shenandoahMode.hpp"
  29 #include "gc/shenandoah/shenandoahBarrierSet.hpp"
  30 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
  31 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
  32 #include "gc/shenandoah/shenandoahHeapRegion.hpp"

  33 #include "gc/shenandoah/shenandoahRuntime.hpp"
  34 #include "gc/shenandoah/shenandoahThreadLocalData.hpp"
  35 #include "interpreter/interpreter.hpp"

  36 #include "runtime/javaThread.hpp"
  37 #include "runtime/sharedRuntime.hpp"
  38 #include "utilities/macros.hpp"
  39 #ifdef COMPILER1
  40 #include "c1/c1_LIRAssembler.hpp"
  41 #include "c1/c1_MacroAssembler.hpp"
  42 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
  43 #endif
  44 #ifdef COMPILER2
  45 #include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp"
  46 #endif
  47 
  48 #define __ masm->
  49 
  50 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
  51                                                        Register src, Register dst, Register count) {
  52 
  53   bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
  54 
  55   if (is_reference_type(type)) {

 858   __ addptr(tmp1, Address(r15_thread, in_bytes(ShenandoahThreadLocalData::card_table_offset())));
 859   Address card_address(tmp1, 0);
 860 
 861   assert(CardTable::dirty_card_val() == 0, "Encoding assumption");
 862   Label L_done;
 863   if (UseCondCardMark) {
 864     __ cmpb(card_address, 0);
 865     __ jccb(Assembler::equal, L_done);
 866   }
 867   if (UseCompressedOops && CompressedOops::base() == nullptr) {
 868     __ movb(card_address, r12);
 869   } else {
 870     __ movb(card_address, 0);
 871   }
 872   __ bind(L_done);
 873 }
 874 
 875 void ShenandoahBarrierStubC2::enter_if_gc_state(MacroAssembler& masm, const char test_state, Register tmp) {
 876   Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
 877 
 878   Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(test_state)));
 879   __ cmpb(gc_state_fast, 0);
 880   __ jcc(Assembler::notEqual, *entry());


 881   __ bind(*continuation());
 882 }
 883 






















































 884 void ShenandoahBarrierStubC2::emit_code(MacroAssembler& masm) {
 885   Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
 886   assert(_needs_keep_alive_barrier || _needs_load_ref_barrier, "Why are you here?");
 887 
 888   // On x86, there is a significant penalty with unaligned branch target, for example
 889   // when the target instruction straggles the fetch line. It makes (performance) sense
 890   // to spend some code size to align the target better.
 891   __ align(16);
 892   __ bind(*entry());
 893 
 894   // If we need to load ourselves, do it here.
 895   if (_do_load) {
 896     if (_narrow) {
 897       __ movl(_obj, _addr);
 898     } else {
 899       __ movq(_obj, _addr);
 900     }
 901   }
 902 
 903   // If the object is null, there is no point in applying barriers.

 908   bool needs_both_barriers = _needs_keep_alive_barrier && _needs_load_ref_barrier;
 909   if (!_do_load || needs_both_barriers) {
 910     preserve(_obj);
 911   }
 912 
 913   // Go for barriers. Barriers can return straight to continuation, as long
 914   // as another barrier is not needed.
 915   if (needs_both_barriers) {
 916     keepalive(masm, nullptr);
 917     lrb(masm);
 918   } else if (_needs_keep_alive_barrier) {
 919     keepalive(masm, continuation());
 920   } else if (_needs_load_ref_barrier) {
 921     lrb(masm);
 922   } else {
 923     ShouldNotReachHere();
 924   }
 925 }
 926 
 927 void ShenandoahBarrierStubC2::keepalive(MacroAssembler& masm, Label* L_done) {
 928   Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::MARKING)));
 929   Address index(r15_thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
 930   Address buffer(r15_thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
 931 
 932   Label L_through, L_pop_and_slow;
 933 
 934   // If another barrier is enabled as well, do a runtime check for a specific barrier.
 935   if (_needs_load_ref_barrier) {
 936     assert(L_done == nullptr, "L_done is always null when _needs_load_ref_barrier is true");
 937     __ cmpb(gc_state_fast, 0);
 938     __ jcc(Assembler::equal, L_through);







 939   }
 940 
 941   // Need temp to work, allocate one now.
 942   bool tmp_live;
 943   Register tmp = select_temp_register(tmp_live);
 944   if (tmp_live) {
 945     __ push(tmp);
 946   }
 947 
 948   // Fast-path: put object into buffer.
 949   // If buffer is already full, go slow.
 950   __ movptr(tmp, index);
 951   __ subptr(tmp, wordSize);
 952   __ jccb(Assembler::below, L_pop_and_slow);
 953   __ movptr(index, tmp);
 954   __ addptr(tmp, buffer);
 955 
 956   // Store the object in queue.
 957   // If object is narrow, we need to decode it before inserting.
 958   // We can skip the re-encoding if we know that object is not preserved.

 987     // Shuffle in the arguments. The end result should be:
 988     //   c_rarg0 <-- obj
 989     if (c_rarg0 != _obj) {
 990       __ mov(c_rarg0, _obj);
 991     }
 992 
 993     // Go to runtime and handle the rest there.
 994     // Use rax as scratch, as it will be saved if live.
 995     __ call(RuntimeAddress(keepalive_runtime_entry_addr()), rax);
 996   }
 997   if (L_done != nullptr) {
 998     __ jmp(*L_done);
 999   } else {
1000     __ bind(L_through);
1001   }
1002 }
1003 
1004 void ShenandoahBarrierStubC2::lrb(MacroAssembler& masm) {
1005   Label L_pop_and_slow, L_slow;
1006 
1007   // If another barrier is enabled as well, do a runtime check for a specific barrier.
1008   if (_needs_keep_alive_barrier) {
1009     char state_to_check = ShenandoahHeap::HAS_FORWARDED | (_needs_load_ref_weak_barrier ? ShenandoahHeap::WEAK_ROOTS : 0);
1010     Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(state_to_check)));
1011     __ cmpb(gc_state_fast, 0);
1012     __ jcc(Assembler::equal, *continuation());
1013   }
1014 
1015   // If weak references are being processed, weak/phantom loads need to go slow,
1016   // regardless of their cset status.
1017   if (_needs_load_ref_weak_barrier) {
1018     Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::WEAK_ROOTS)));
1019     __ cmpb(gc_state_fast, 0);
1020     __ jccb(Assembler::notEqual, L_slow);












1021   }
1022 
1023   bool is_aot = AOTCodeCache::is_on_for_dump();
1024 
1025   // Need temp to work, allocate one now.
1026   bool tmp_live;
1027   Register tmp = select_temp_register(tmp_live, /* skip_reg1 = */ is_aot ? rcx : noreg);
1028   if (tmp_live) {
1029     __ push(tmp);
1030   }
1031 
1032   // Compute the cset bitmap index
1033   if (_narrow) {
1034     __ decode_heap_oop_not_null(tmp, _obj);
1035   } else {
1036     __ movptr(tmp, _obj);
1037   }
1038 
1039   Address cset_addr_arg;
1040   intptr_t cset_addr = reinterpret_cast<intptr_t>(ShenandoahHeap::in_cset_fast_test_addr());

  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  *
  25  */
  26 
  27 #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
  28 #include "gc/shenandoah/mode/shenandoahMode.hpp"
  29 #include "gc/shenandoah/shenandoahBarrierSet.hpp"
  30 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
  31 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
  32 #include "gc/shenandoah/shenandoahHeapRegion.hpp"
  33 #include "gc/shenandoah/shenandoahNMethod.inline.hpp"
  34 #include "gc/shenandoah/shenandoahRuntime.hpp"
  35 #include "gc/shenandoah/shenandoahThreadLocalData.hpp"
  36 #include "interpreter/interpreter.hpp"
  37 #include "nativeInst_x86.hpp"
  38 #include "runtime/javaThread.hpp"
  39 #include "runtime/sharedRuntime.hpp"
  40 #include "utilities/macros.hpp"
  41 #ifdef COMPILER1
  42 #include "c1/c1_LIRAssembler.hpp"
  43 #include "c1/c1_MacroAssembler.hpp"
  44 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
  45 #endif
  46 #ifdef COMPILER2
  47 #include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp"
  48 #endif
  49 
  50 #define __ masm->
  51 
  52 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
  53                                                        Register src, Register dst, Register count) {
  54 
  55   bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
  56 
  57   if (is_reference_type(type)) {

 860   __ addptr(tmp1, Address(r15_thread, in_bytes(ShenandoahThreadLocalData::card_table_offset())));
 861   Address card_address(tmp1, 0);
 862 
 863   assert(CardTable::dirty_card_val() == 0, "Encoding assumption");
 864   Label L_done;
 865   if (UseCondCardMark) {
 866     __ cmpb(card_address, 0);
 867     __ jccb(Assembler::equal, L_done);
 868   }
 869   if (UseCompressedOops && CompressedOops::base() == nullptr) {
 870     __ movb(card_address, r12);
 871   } else {
 872     __ movb(card_address, 0);
 873   }
 874   __ bind(L_done);
 875 }
 876 
 877 void ShenandoahBarrierStubC2::enter_if_gc_state(MacroAssembler& masm, const char test_state, Register tmp) {
 878   Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
 879 
 880   // Emit the unconditional branch in the first version of the method.
 881   // Let the rest of runtime figure out how to manage it.
 882   __ relocate(barrier_Relocation::spec(), ShenandoahNMethod::gc_state_to_reloc(test_state));
 883   __ jmp(*entry(), /* maybe_short = */ false);
 884 
 885   __ bind(*continuation());
 886 }
 887 
 888 address ShenandoahBarrierSetAssembler::parse_stub_address(address pc) {
 889   NativeInstruction* ni = nativeInstruction_at(pc);
 890   assert(ni->is_jump(), "Initial code version: GC barrier fastpath must be a jump");
 891   NativeJump* jmp = nativeJump_at(pc);
 892   return jmp->jump_destination();
 893 }
 894 
 895 static void insert_5_byte_nop(address pc) {
 896   *(pc + 0) = 0x0F;
 897   *(pc + 1) = 0x1F;
 898   *(pc + 2) = 0x44;
 899   *(pc + 3) = 0x00;
 900   *(pc + 4) = 0x00;
 901   ICache::invalidate_range(pc, 5);
 902 }
 903 
 904 static bool is_5_byte_nop(address pc) {
 905   if (*(pc + 0) != 0x0F) return false;
 906   if (*(pc + 1) != 0x1F) return false;
 907   if (*(pc + 2) != 0x44) return false;
 908   if (*(pc + 3) != 0x00) return false;
 909   if (*(pc + 4) != 0x00) return false;
 910   return true;
 911 }
 912 
 913 static void check_at(bool cond, address pc, const char* msg) {
 914   assert(cond, "%s: at PC " PTR_FORMAT ": %02x%02x%02x%02x%02x",
 915          msg, p2i(pc), *(pc + 0), *(pc + 1), *(pc + 2), *(pc + 3), *(pc + 4));
 916 }
 917 
 918 bool ShenandoahBarrierSetAssembler::is_active(address pc) {
 919   NativeInstruction* ni = nativeInstruction_at(pc);
 920   return ni->is_jump();
 921 }
 922 
 923 void ShenandoahBarrierSetAssembler::patch_branch_to_nop(address pc) {
 924   NativeInstruction* ni = nativeInstruction_at(pc);
 925   if (ni->is_jump()) {
 926     insert_5_byte_nop(pc);
 927   } else {
 928     check_at(is_5_byte_nop(pc), pc, "Should already be nop");
 929   }
 930 }
 931 
 932 void ShenandoahBarrierSetAssembler::patch_nop_to_branch(address pc, address stub_addr) {
 933   NativeInstruction* ni = nativeInstruction_at(pc);
 934   if (is_5_byte_nop(pc)) {
 935     NativeJump::insert(pc, stub_addr);
 936   } else {
 937     check_at(ni->is_jump(), pc, "Should already be jump");
 938     check_at(nativeJump_at(pc)->jump_destination() == stub_addr, pc, "Jump should be to the same address");
 939   }
 940 }
 941 
 942 void ShenandoahBarrierStubC2::emit_code(MacroAssembler& masm) {
 943   Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
 944   assert(_needs_keep_alive_barrier || _needs_load_ref_barrier, "Why are you here?");
 945 
 946   // On x86, there is a significant penalty with unaligned branch target, for example
 947   // when the target instruction straggles the fetch line. It makes (performance) sense
 948   // to spend some code size to align the target better.
 949   __ align(16);
 950   __ bind(*entry());
 951 
 952   // If we need to load ourselves, do it here.
 953   if (_do_load) {
 954     if (_narrow) {
 955       __ movl(_obj, _addr);
 956     } else {
 957       __ movq(_obj, _addr);
 958     }
 959   }
 960 
 961   // If the object is null, there is no point in applying barriers.

 966   bool needs_both_barriers = _needs_keep_alive_barrier && _needs_load_ref_barrier;
 967   if (!_do_load || needs_both_barriers) {
 968     preserve(_obj);
 969   }
 970 
 971   // Go for barriers. Barriers can return straight to continuation, as long
 972   // as another barrier is not needed.
 973   if (needs_both_barriers) {
 974     keepalive(masm, nullptr);
 975     lrb(masm);
 976   } else if (_needs_keep_alive_barrier) {
 977     keepalive(masm, continuation());
 978   } else if (_needs_load_ref_barrier) {
 979     lrb(masm);
 980   } else {
 981     ShouldNotReachHere();
 982   }
 983 }
 984 
 985 void ShenandoahBarrierStubC2::keepalive(MacroAssembler& masm, Label* L_done) {

 986   Address index(r15_thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
 987   Address buffer(r15_thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
 988 
 989   Label L_through, L_pop_and_slow;
 990 
 991   // If another barrier is enabled as well, do a check for a specific barrier.
 992   if (_needs_load_ref_barrier) {
 993     assert(L_done == nullptr, "Should be");
 994     // Emit the unconditional branch in the first version of the method.
 995     // Let the rest of runtime figure out how to manage it.
 996     // TODO: We could have spared the over-jump if patching knew we need the inverse branch.
 997     char state_to_check = ShenandoahHeap::MARKING;
 998     Label L_over;
 999     __ relocate(barrier_Relocation::spec(), ShenandoahNMethod::gc_state_to_reloc(state_to_check));
1000     __ jmp(L_over, /* maybe_short = */ false);
1001     __ jmp(L_through);
1002     __ bind(L_over);
1003   }
1004 
1005   // Need temp to work, allocate one now.
1006   bool tmp_live;
1007   Register tmp = select_temp_register(tmp_live);
1008   if (tmp_live) {
1009     __ push(tmp);
1010   }
1011 
1012   // Fast-path: put object into buffer.
1013   // If buffer is already full, go slow.
1014   __ movptr(tmp, index);
1015   __ subptr(tmp, wordSize);
1016   __ jccb(Assembler::below, L_pop_and_slow);
1017   __ movptr(index, tmp);
1018   __ addptr(tmp, buffer);
1019 
1020   // Store the object in queue.
1021   // If object is narrow, we need to decode it before inserting.
1022   // We can skip the re-encoding if we know that object is not preserved.

1051     // Shuffle in the arguments. The end result should be:
1052     //   c_rarg0 <-- obj
1053     if (c_rarg0 != _obj) {
1054       __ mov(c_rarg0, _obj);
1055     }
1056 
1057     // Go to runtime and handle the rest there.
1058     // Use rax as scratch, as it will be saved if live.
1059     __ call(RuntimeAddress(keepalive_runtime_entry_addr()), rax);
1060   }
1061   if (L_done != nullptr) {
1062     __ jmp(*L_done);
1063   } else {
1064     __ bind(L_through);
1065   }
1066 }
1067 
1068 void ShenandoahBarrierStubC2::lrb(MacroAssembler& masm) {
1069   Label L_pop_and_slow, L_slow;
1070 








1071   // If weak references are being processed, weak/phantom loads need to go slow,
1072   // regardless of their cset status.
1073   if (_needs_load_ref_weak_barrier) {
1074     char state_to_check = ShenandoahHeap::WEAK_ROOTS;
1075     __ relocate(barrier_Relocation::spec(), ShenandoahNMethod::gc_state_to_reloc(state_to_check));
1076     __ jmp(L_slow, /* maybe_short = */ false);
1077   }
1078 
1079   if (_needs_keep_alive_barrier) {
1080     // Emit the unconditional branch in the first version of the method.
1081     // Let the rest of runtime figure out how to manage it.
1082     // TODO: We could have spared the over-jump if patching knew we need the inverse branch.
1083     char state_to_check = ShenandoahHeap::HAS_FORWARDED | (_needs_load_ref_weak_barrier ? ShenandoahHeap::WEAK_ROOTS : 0);
1084     Label L_over;
1085     __ relocate(barrier_Relocation::spec(), ShenandoahNMethod::gc_state_to_reloc(state_to_check));
1086     __ jmp(L_over, /* maybe_short = */ false);
1087     __ jmp(*continuation());
1088     __ bind(L_over);
1089   }
1090 
1091   bool is_aot = AOTCodeCache::is_on_for_dump();
1092 
1093   // Need temp to work, allocate one now.
1094   bool tmp_live;
1095   Register tmp = select_temp_register(tmp_live, /* skip_reg1 = */ is_aot ? rcx : noreg);
1096   if (tmp_live) {
1097     __ push(tmp);
1098   }
1099 
1100   // Compute the cset bitmap index
1101   if (_narrow) {
1102     __ decode_heap_oop_not_null(tmp, _obj);
1103   } else {
1104     __ movptr(tmp, _obj);
1105   }
1106 
1107   Address cset_addr_arg;
1108   intptr_t cset_addr = reinterpret_cast<intptr_t>(ShenandoahHeap::in_cset_fast_test_addr());
< prev index next >