< prev index next >

src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp

Print this page

  25  */
  26 
  27 #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
  28 #include "gc/shenandoah/mode/shenandoahMode.hpp"
  29 #include "gc/shenandoah/shenandoahBarrierSet.hpp"
  30 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
  31 #include "gc/shenandoah/shenandoahForwarding.hpp"
  32 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
  33 #include "gc/shenandoah/shenandoahHeapRegion.hpp"
  34 #include "gc/shenandoah/shenandoahRuntime.hpp"
  35 #include "gc/shenandoah/shenandoahThreadLocalData.hpp"
  36 #include "interpreter/interp_masm.hpp"
  37 #include "interpreter/interpreter.hpp"
  38 #include "runtime/javaThread.hpp"
  39 #include "runtime/sharedRuntime.hpp"
  40 #ifdef COMPILER1
  41 #include "c1/c1_LIRAssembler.hpp"
  42 #include "c1/c1_MacroAssembler.hpp"
  43 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
  44 #endif




  45 
  46 #define __ masm->
  47 
  48 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
  49                                                        Register src, Register dst, Register count, RegSet saved_regs) {
  50   if (is_oop) {
  51     bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
  52     if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahLoadRefBarrier) {
  53 
  54       Label done;
  55 
  56       // Avoid calling runtime if count == 0
  57       __ cbz(count, done);
  58 
  59       // Is GC active?
  60       assert(!saved_regs.contains(rscratch1), "Sanity: about to clobber rscratch1");
  61       assert(!saved_regs.contains(rscratch2), "Sanity: about to clobber rscratch2");
  62       Address gc_state(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
  63       __ ldrb(rscratch1, gc_state);
  64       if (ShenandoahSATBBarrier && dest_uninitialized) {

 814       __ lea(lr, RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow)));
 815     } else {
 816       __ lea(lr, RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak)));
 817     }
 818   } else {
 819     assert(is_phantom, "only remaining strength");
 820     assert(is_native, "phantom must only be called off-heap");
 821     __ lea(lr, RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom)));
 822   }
 823   __ blr(lr);
 824   __ mov(rscratch1, r0);
 825   __ pop_call_clobbered_registers();
 826   __ mov(r0, rscratch1);
 827 
 828   __ epilogue();
 829 }
 830 
 831 #undef __
 832 
 833 #endif // COMPILER1




























































































































































































































































































































































































  25  */
  26 
  27 #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
  28 #include "gc/shenandoah/mode/shenandoahMode.hpp"
  29 #include "gc/shenandoah/shenandoahBarrierSet.hpp"
  30 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
  31 #include "gc/shenandoah/shenandoahForwarding.hpp"
  32 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
  33 #include "gc/shenandoah/shenandoahHeapRegion.hpp"
  34 #include "gc/shenandoah/shenandoahRuntime.hpp"
  35 #include "gc/shenandoah/shenandoahThreadLocalData.hpp"
  36 #include "interpreter/interp_masm.hpp"
  37 #include "interpreter/interpreter.hpp"
  38 #include "runtime/javaThread.hpp"
  39 #include "runtime/sharedRuntime.hpp"
  40 #ifdef COMPILER1
  41 #include "c1/c1_LIRAssembler.hpp"
  42 #include "c1/c1_MacroAssembler.hpp"
  43 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
  44 #endif
  45 #ifdef COMPILER2
  46 #include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp"
  47 #include "opto/output.hpp"
  48 #endif
  49 
  50 #define __ masm->
  51 
  52 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
  53                                                        Register src, Register dst, Register count, RegSet saved_regs) {
  54   if (is_oop) {
  55     bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
  56     if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahLoadRefBarrier) {
  57 
  58       Label done;
  59 
  60       // Avoid calling runtime if count == 0
  61       __ cbz(count, done);
  62 
  63       // Is GC active?
  64       assert(!saved_regs.contains(rscratch1), "Sanity: about to clobber rscratch1");
  65       assert(!saved_regs.contains(rscratch2), "Sanity: about to clobber rscratch2");
  66       Address gc_state(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
  67       __ ldrb(rscratch1, gc_state);
  68       if (ShenandoahSATBBarrier && dest_uninitialized) {

 818       __ lea(lr, RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow)));
 819     } else {
 820       __ lea(lr, RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak)));
 821     }
 822   } else {
 823     assert(is_phantom, "only remaining strength");
 824     assert(is_native, "phantom must only be called off-heap");
 825     __ lea(lr, RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom)));
 826   }
 827   __ blr(lr);
 828   __ mov(rscratch1, r0);
 829   __ pop_call_clobbered_registers();
 830   __ mov(r0, rscratch1);
 831 
 832   __ epilogue();
 833 }
 834 
 835 #undef __
 836 
 837 #endif // COMPILER1
 838 
 839 #ifdef COMPILER2
 840 
 841 #undef __
 842 #define __ masm->
 843 
 844 
 845 void ShenandoahBarrierSetAssembler::load_c2(const MachNode* node, MacroAssembler* masm, Register dst, Address src, Register tmp1, Register tmp2, bool is_narrow, bool is_acquire) {
 846   // Do the actual load. This load is the candidate for implicit null check, and MUST come first.
 847   if (is_narrow) {
 848     if (is_acquire) {
 849       __ ldarw(dst, src.base());
 850     } else {
 851       __ ldrw(dst, src);
 852     }
 853   } else {
 854     if (is_acquire) {
 855       __ ldar(dst, src.base());
 856     } else {
 857       __ ldr(dst, src);
 858     }
 859   }
 860 
 861   ShenandoahBarrierStubC2::load_post(masm, node, dst, src, tmp1, tmp2, is_narrow);
 862 }
 863 
 864 void ShenandoahBarrierSetAssembler::store_c2(const MachNode* node, MacroAssembler* masm, Address dst, bool dst_narrow,
 865     Register src, bool src_narrow, Register tmp1, Register tmp2, Register tmp3, bool is_volatile) {
 866 
 867   ShenandoahBarrierStubC2::store_pre(masm, node, tmp1, dst, tmp2, tmp3, dst_narrow);
 868 
 869   // Do the actual store
 870   if (dst_narrow) {
 871     if (!src_narrow) {
 872       // Need to encode into rscratch, because we cannot clobber src.
 873       if ((node->barrier_data() & ShenandoahBitNotNull) == 0) {
 874         __ encode_heap_oop(tmp2, src);
 875       } else {
 876         __ encode_heap_oop_not_null(tmp2, src);
 877       }
 878       src = tmp2;
 879     }
 880 
 881     if (is_volatile) {
 882       __ stlrw(src, dst.base());
 883     } else {
 884       __ strw(src, dst);
 885     }
 886   } else {
 887     if (is_volatile) {
 888       __ stlr(src, dst.base());
 889     } else {
 890       __ str(src, dst);
 891     }
 892   }
 893 
 894   ShenandoahBarrierStubC2::store_post(masm, node, dst, tmp2, tmp3);
 895 }
 896 
 897 void ShenandoahBarrierSetAssembler::compare_and_set_c2(const MachNode* node, MacroAssembler* masm, Register res, Register addr,
 898     Register oldval, Register newval, Register tmp1, Register tmp2, Register tmp3, bool exchange, bool narrow, bool weak, bool acquire) {
 899   Assembler::operand_size op_size = narrow ? Assembler::word : Assembler::xword;
 900 
 901   ShenandoahBarrierStubC2::load_store_pre(masm, node, tmp1, addr, tmp2, tmp3, narrow);
 902 
 903   // CAS!
 904   __ cmpxchg(addr, oldval, newval, op_size, acquire, /* release */ true, weak, exchange ? res : noreg);
 905 
 906   // If we need a boolean result out of CAS, set the flag appropriately and promote the result.
 907   if (!exchange) {
 908     assert(res != noreg, "need result register");
 909     __ cset(res, Assembler::EQ);
 910   }
 911 
 912   ShenandoahBarrierStubC2::load_store_post(masm, node, Address(addr, 0), tmp2, tmp3);
 913 }
 914 
 915 void ShenandoahBarrierSetAssembler::get_and_set_c2(const MachNode* node, MacroAssembler* masm, Register preval,
 916     Register newval, Register addr, Register tmp1, Register tmp2, Register tmp3, bool is_acquire) {
 917   bool is_narrow = node->bottom_type()->isa_narrowoop();
 918 
 919   ShenandoahBarrierStubC2::load_store_pre(masm, node, tmp1, addr, tmp2, tmp3, is_narrow);
 920 
 921   if (is_narrow) {
 922     if (is_acquire) {
 923       __ atomic_xchgalw(preval, newval, addr);
 924     } else {
 925       __ atomic_xchgw(preval, newval, addr);
 926     }
 927   } else {
 928     if (is_acquire) {
 929       __ atomic_xchgal(preval, newval, addr);
 930     } else {
 931       __ atomic_xchg(preval, newval, addr);
 932     }
 933   }
 934 
 935   ShenandoahBarrierStubC2::load_store_post(masm, node, Address(addr, 0), tmp2, tmp3);
 936 }
 937 
 938 #undef __
 939 #define __ masm.
 940 
 941 void ShenandoahBarrierStubC2::cardtable(MacroAssembler& masm, Address address, Register tmp1, Register tmp2) {
 942   assert(CardTable::dirty_card_val() == 0, "must be");
 943   Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
 944 
 945   // tmp1 = card table base (holder)
 946   Address curr_ct_holder_addr(rthread, in_bytes(ShenandoahThreadLocalData::card_table_offset()));
 947   __ ldr(tmp1, curr_ct_holder_addr);
 948 
 949   // tmp2 = effective address
 950   __ lea(tmp2, address);
 951 
 952   // tmp2 = &card_table[ addr >> CardTable::card_shift() ] ; card index
 953   __ add(tmp2, tmp1, tmp2, Assembler::LSR, CardTable::card_shift());
 954 
 955   if (UseCondCardMark) {
 956     Label L_already_dirty;
 957     __ ldrb(tmp1, Address(tmp2));
 958     __ cbz(tmp1, L_already_dirty);
 959     __ strb(zr, Address(tmp2));
 960     __ bind(L_already_dirty);
 961   } else {
 962     __ strb(zr, Address(tmp2));
 963   }
 964 }
 965 
 966 void ShenandoahBarrierStubC2::enter_if_gc_state(MacroAssembler& masm, const char test_state, Register tmp) {
 967   Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
 968 
 969   Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(test_state)));
 970   if (_needs_far_jump) {
 971     __ ldrb(tmp, gc_state_fast);
 972     __ cbz(tmp, *continuation());
 973     __ b(*entry());
 974   } else {
 975     __ ldrb(tmp, gc_state_fast);
 976     __ cbnz(tmp, *entry());
 977   }
 978 
 979   // This is were the slowpath stub will return to or the code above will
 980   // jump to if the checks are false
 981   __ bind(*continuation());
 982 }
 983 
 984 void ShenandoahBarrierStubC2::emit_code(MacroAssembler& masm) {
 985   Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
 986   assert(_needs_keep_alive_barrier || _needs_load_ref_barrier, "Why are you here?");
 987 
 988   __ bind(*entry());
 989 
 990   // If we need to load ourselves, do it here.
 991   if (_do_load) {
 992     if (_narrow) {
 993       __ ldrw(_obj, _addr);
 994     } else {
 995       __ ldr(_obj, _addr);
 996     }
 997   }
 998 
 999   // If the object is null, there is no point in applying barriers.
1000   maybe_far_jump_if_zero(masm, _obj);
1001 
1002   // We need to make sure that loads done by callers survive across slow-path calls.
1003   // For self-loads, we need to care about the case when both KA and LRB are enabled (rare).
1004   if (!_do_load || (_needs_keep_alive_barrier && _needs_load_ref_barrier)) {
1005     preserve(_obj);
1006   }
1007 
1008   // Go for barriers. Barriers can return straight to continuation, as long
1009   // as another barrier is not needed and we can reach the fastpath.
1010   if (_needs_keep_alive_barrier && _needs_load_ref_barrier) {
1011     keepalive(masm, nullptr);
1012     lrb(masm);
1013   } else if (_needs_keep_alive_barrier) {
1014     keepalive(masm, continuation());
1015   } else if (_needs_load_ref_barrier) {
1016     lrb(masm);
1017   } else {
1018     ShouldNotReachHere();
1019   }
1020 }
1021 
1022 void ShenandoahBarrierStubC2::maybe_far_jump_if_zero(MacroAssembler& masm, Register reg) {
1023   if (_needs_far_jump) {
1024     Label L_short_jump;
1025     __ cbnz(reg, L_short_jump);
1026     __ b(*continuation());
1027     __ bind(L_short_jump);
1028   } else {
1029     __ cbz(reg, *continuation());
1030   }
1031 }
1032 
1033 void ShenandoahBarrierStubC2::keepalive(MacroAssembler& masm, Label* L_done) {
1034   Address gcstate(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::MARKING)));
1035   Address index(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
1036   Address buffer(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
1037   Label L_through, L_slowpath;
1038 
1039   // If another barrier is enabled as well, do a runtime check for a specific barrier.
1040   if (_needs_load_ref_barrier) {
1041     assert(L_done == nullptr, "L_done is always null when _needs_load_ref_barrier is true");
1042     __ ldrb(_tmp1, gcstate);
1043     __ cbz(_tmp1, L_through);
1044   }
1045 
1046   // Fast-path: put object into buffer.
1047   // If buffer is already full, go slow.
1048   __ ldr(_tmp1, index);
1049   __ cbz(_tmp1, L_slowpath);
1050   __ sub(_tmp1, _tmp1, wordSize);
1051   __ str(_tmp1, index);
1052   __ ldr(_tmp2, buffer);
1053 
1054   // Store the object in queue.
1055   // If object is narrow, we need to decode it before inserting.
1056   if (_narrow) {
1057     __ add(_tmp2, _tmp2, _tmp1);
1058     __ decode_heap_oop_not_null(_tmp1, _obj);
1059     __ str(_tmp1, Address(_tmp2));
1060   } else {
1061     // Buffer is 64-bit address, must be in base register.
1062     __ str(_obj, Address(_tmp2, _tmp1));
1063   }
1064 
1065   // Fast-path exits here.
1066   if (L_done != nullptr) {
1067     __ b(*L_done);
1068   } else {
1069     __ b(L_through);
1070   }
1071 
1072   // Slow-path: call runtime to handle.
1073   __ bind(L_slowpath);
1074 
1075   // The Load match rule in the .ad file may have legitimized the load address
1076   // using a TEMP register and in that case we need to explicitly preserve them
1077   // here because the RA does not consider TEMP as live-in, of course.
1078   if (_needs_load_ref_barrier) {
1079     if (_addr.base() != noreg) {
1080       preserve(_addr.base());
1081     }
1082     if (_addr.index() != noreg) {
1083       preserve(_addr.index());
1084     }
1085   }
1086 
1087   {
1088     SaveLiveRegisters slr(&masm, this);
1089 
1090     // Go to runtime and handle the rest there.
1091     __ mov(c_rarg0, _obj);
1092     __ mov(lr, keepalive_runtime_entry_addr());
1093     __ blr(lr);
1094   }
1095 
1096   if (L_done != nullptr) {
1097     __ b(*L_done);
1098   } else {
1099     __ bind(L_through);
1100   }
1101 }
1102 
1103 void ShenandoahBarrierStubC2::lrb(MacroAssembler& masm) {
1104   Label L_slow;
1105 
1106   // If another barrier is enabled as well, do a runtime check for a specific barrier.
1107   if (_needs_keep_alive_barrier) {
1108     char state_to_check = ShenandoahHeap::HAS_FORWARDED | (_needs_load_ref_weak_barrier ? ShenandoahHeap::WEAK_ROOTS : 0);
1109     Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(state_to_check)));
1110     __ ldrb(_tmp1, gc_state_fast);
1111     maybe_far_jump_if_zero(masm, _tmp1);
1112   }
1113 
1114   // If weak references are being processed, weak/phantom loads need to go slow,
1115   // regardless of their cset status.
1116   if (_needs_load_ref_weak_barrier) {
1117     Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::WEAK_ROOTS)));
1118     __ ldrb(_tmp1, gc_state_fast);
1119     __ cbnz(_tmp1, L_slow);
1120   }
1121 
1122   // Cset-check. Fall-through to slow if in collection set.
1123   __ mov(_tmp1, ShenandoahHeap::in_cset_fast_test_addr());
1124   if (_narrow) {
1125     __ decode_heap_oop_not_null(_tmp2, _obj);
1126     __ add(_tmp1, _tmp1, _tmp2, Assembler::LSR, ShenandoahHeapRegion::region_size_bytes_shift_jint());
1127   } else {
1128     __ add(_tmp1, _tmp1, _obj, Assembler::LSR, ShenandoahHeapRegion::region_size_bytes_shift_jint());
1129   }
1130   __ ldrb(_tmp1, Address(_tmp1, 0));
1131   maybe_far_jump_if_zero(masm, _tmp1);
1132 
1133   // Slow path
1134   __ bind(L_slow);
1135 
1136   // Obj is the result, need to temporarily stop preserving it.
1137   bool is_obj_preserved = is_preserved(_obj);
1138   if (is_obj_preserved) {
1139     dont_preserve(_obj);
1140   }
1141   {
1142     SaveLiveRegisters slr(&masm, this);
1143 
1144     // Shuffle in the arguments. The end result should be:
1145     //   c_rarg0 <-- obj
1146     //   c_rarg1 <-- lea(addr)
1147     if (c_rarg0 == _obj) {
1148       __ lea(c_rarg1, _addr);
1149     } else if (c_rarg1 == _obj) {
1150       // Set up arguments in reverse, and then flip them
1151       __ lea(c_rarg0, _addr);
1152       // flip them
1153       __ mov(_tmp1, c_rarg0);
1154       __ mov(c_rarg0, c_rarg1);
1155       __ mov(c_rarg1, _tmp1);
1156     } else {
1157       assert_different_registers(c_rarg1, _obj);
1158       __ lea(c_rarg1, _addr);
1159       __ mov(c_rarg0, _obj);
1160     }
1161 
1162     // Go to runtime and handle the rest there.
1163     __ mov(lr, lrb_runtime_entry_addr());
1164     __ blr(lr);
1165 
1166     // Save the result where needed. Narrow entries return narrowOop (32 bits)
1167     // and AAPCS does not guarantee the upper 32 bits of x0 are zero.
1168     if (_narrow) {
1169       __ movw(_obj, r0);
1170     } else if (_obj != r0) {
1171       __ mov(_obj, r0);
1172     }
1173   }
1174   if (is_obj_preserved) {
1175     preserve(_obj);
1176   }
1177 
1178   __ b(*continuation());
1179 }
1180 
1181 int ShenandoahBarrierStubC2::available_gp_registers() {
1182   Unimplemented(); // Not used
1183   return 0;
1184 }
1185 
1186 bool ShenandoahBarrierStubC2::is_special_register(Register r) {
1187   Unimplemented(); // Not used
1188   return true;
1189 }
1190 
1191 void ShenandoahBarrierStubC2::post_init() {
1192   // If we are in scratch emit mode we assume worst case, and force the use of
1193   // far branches.
1194   PhaseOutput* const output = Compile::current()->output();
1195   if (output->in_scratch_emit_size()) {
1196     _needs_far_jump = true;
1197     return;
1198   }
1199 
1200   // The formula below is based on how c2 estimates initial buffer size for a
1201   // compilation. See C2Compiler::initial_code_buffer_size. The logic
1202   // implemented in this stub only uses short jumps (cbz, cbnz) if the
1203   // aggregation of all relevant code sections of a method fit in 1MB. We could
1204   // be more aggressive and try and compute the distance between the fastpath
1205   // branch and the stub entry but in practice not many methods reach the 1MB
1206   // size.
1207   const BufferSizingData* sizing = output->buffer_sizing_data();
1208   const int code_size = sizing->_code + sizing->_stub +
1209     PhaseOutput::MAX_inst_size + PhaseOutput::MAX_stubs_size + NativeCall::byte_size();
1210 
1211   // Maximum backward range is 1M. Maximum forward reach is 1M - 4bytes.
1212   const int cond_branch_max_reach = (int)(1*M - 4);
1213   _needs_far_jump = code_size >= cond_branch_max_reach;
1214 }
1215 
1216 #endif // COMPILER2
< prev index next >