25 */
26
27 #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
28 #include "gc/shenandoah/mode/shenandoahMode.hpp"
29 #include "gc/shenandoah/shenandoahBarrierSet.hpp"
30 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
31 #include "gc/shenandoah/shenandoahForwarding.hpp"
32 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
33 #include "gc/shenandoah/shenandoahHeapRegion.hpp"
34 #include "gc/shenandoah/shenandoahRuntime.hpp"
35 #include "gc/shenandoah/shenandoahThreadLocalData.hpp"
36 #include "interpreter/interp_masm.hpp"
37 #include "interpreter/interpreter.hpp"
38 #include "runtime/javaThread.hpp"
39 #include "runtime/sharedRuntime.hpp"
40 #ifdef COMPILER1
41 #include "c1/c1_LIRAssembler.hpp"
42 #include "c1/c1_MacroAssembler.hpp"
43 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
44 #endif
45
46 #define __ masm->
47
48 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
49 Register src, Register dst, Register count, RegSet saved_regs) {
50 if (is_oop) {
51 bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
52 if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahLoadRefBarrier) {
53
54 Label done;
55
56 // Avoid calling runtime if count == 0
57 __ cbz(count, done);
58
59 // Is GC active?
60 assert(!saved_regs.contains(rscratch1), "Sanity: about to clobber rscratch1");
61 assert(!saved_regs.contains(rscratch2), "Sanity: about to clobber rscratch2");
62 Address gc_state(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
63 __ ldrb(rscratch1, gc_state);
64 if (ShenandoahSATBBarrier && dest_uninitialized) {
814 __ lea(lr, RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow)));
815 } else {
816 __ lea(lr, RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak)));
817 }
818 } else {
819 assert(is_phantom, "only remaining strength");
820 assert(is_native, "phantom must only be called off-heap");
821 __ lea(lr, RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom)));
822 }
823 __ blr(lr);
824 __ mov(rscratch1, r0);
825 __ pop_call_clobbered_registers();
826 __ mov(r0, rscratch1);
827
828 __ epilogue();
829 }
830
831 #undef __
832
833 #endif // COMPILER1
|
25 */
26
27 #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
28 #include "gc/shenandoah/mode/shenandoahMode.hpp"
29 #include "gc/shenandoah/shenandoahBarrierSet.hpp"
30 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
31 #include "gc/shenandoah/shenandoahForwarding.hpp"
32 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
33 #include "gc/shenandoah/shenandoahHeapRegion.hpp"
34 #include "gc/shenandoah/shenandoahRuntime.hpp"
35 #include "gc/shenandoah/shenandoahThreadLocalData.hpp"
36 #include "interpreter/interp_masm.hpp"
37 #include "interpreter/interpreter.hpp"
38 #include "runtime/javaThread.hpp"
39 #include "runtime/sharedRuntime.hpp"
40 #ifdef COMPILER1
41 #include "c1/c1_LIRAssembler.hpp"
42 #include "c1/c1_MacroAssembler.hpp"
43 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
44 #endif
45 #ifdef COMPILER2
46 #include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp"
47 #include "opto/output.hpp"
48 #endif
49
50 #define __ masm->
51
52 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
53 Register src, Register dst, Register count, RegSet saved_regs) {
54 if (is_oop) {
55 bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
56 if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahLoadRefBarrier) {
57
58 Label done;
59
60 // Avoid calling runtime if count == 0
61 __ cbz(count, done);
62
63 // Is GC active?
64 assert(!saved_regs.contains(rscratch1), "Sanity: about to clobber rscratch1");
65 assert(!saved_regs.contains(rscratch2), "Sanity: about to clobber rscratch2");
66 Address gc_state(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
67 __ ldrb(rscratch1, gc_state);
68 if (ShenandoahSATBBarrier && dest_uninitialized) {
818 __ lea(lr, RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow)));
819 } else {
820 __ lea(lr, RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak)));
821 }
822 } else {
823 assert(is_phantom, "only remaining strength");
824 assert(is_native, "phantom must only be called off-heap");
825 __ lea(lr, RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom)));
826 }
827 __ blr(lr);
828 __ mov(rscratch1, r0);
829 __ pop_call_clobbered_registers();
830 __ mov(r0, rscratch1);
831
832 __ epilogue();
833 }
834
835 #undef __
836
837 #endif // COMPILER1
838
839 #ifdef COMPILER2
840
841 #undef __
842 #define __ masm->
843
844
845 void ShenandoahBarrierSetAssembler::load_c2(const MachNode* node, MacroAssembler* masm, Register dst, Address src, Register tmp1, Register tmp2, bool is_narrow, bool is_acquire) {
846 // Do the actual load. This load is the candidate for implicit null check, and MUST come first.
847 if (is_narrow) {
848 if (is_acquire) {
849 __ ldarw(dst, src.base());
850 } else {
851 __ ldrw(dst, src);
852 }
853 } else {
854 if (is_acquire) {
855 __ ldar(dst, src.base());
856 } else {
857 __ ldr(dst, src);
858 }
859 }
860
861 ShenandoahBarrierStubC2::load_post(masm, node, dst, src, tmp1, tmp2, is_narrow);
862 }
863
864 void ShenandoahBarrierSetAssembler::store_c2(const MachNode* node, MacroAssembler* masm, Address dst, bool dst_narrow,
865 Register src, bool src_narrow, Register tmp1, Register tmp2, Register tmp3, bool is_volatile) {
866
867 ShenandoahBarrierStubC2::store_pre(masm, node, tmp1, dst, tmp2, tmp3, dst_narrow);
868
869 // Do the actual store
870 if (dst_narrow) {
871 if (!src_narrow) {
872 // Need to encode into rscratch, because we cannot clobber src.
873 if ((node->barrier_data() & ShenandoahBitNotNull) == 0) {
874 __ encode_heap_oop(tmp2, src);
875 } else {
876 __ encode_heap_oop_not_null(tmp2, src);
877 }
878 src = tmp2;
879 }
880
881 if (is_volatile) {
882 __ stlrw(src, dst.base());
883 } else {
884 __ strw(src, dst);
885 }
886 } else {
887 if (is_volatile) {
888 __ stlr(src, dst.base());
889 } else {
890 __ str(src, dst);
891 }
892 }
893
894 ShenandoahBarrierStubC2::store_post(masm, node, dst, tmp2, tmp3);
895 }
896
897 void ShenandoahBarrierSetAssembler::compare_and_set_c2(const MachNode* node, MacroAssembler* masm, Register res, Register addr,
898 Register oldval, Register newval, Register tmp1, Register tmp2, Register tmp3, bool exchange, bool narrow, bool weak, bool acquire) {
899 Assembler::operand_size op_size = narrow ? Assembler::word : Assembler::xword;
900
901 ShenandoahBarrierStubC2::load_store_pre(masm, node, tmp1, addr, tmp2, tmp3, narrow);
902
903 // CAS!
904 __ cmpxchg(addr, oldval, newval, op_size, acquire, /* release */ true, weak, exchange ? res : noreg);
905
906 // If we need a boolean result out of CAS, set the flag appropriately and promote the result.
907 if (!exchange) {
908 assert(res != noreg, "need result register");
909 __ cset(res, Assembler::EQ);
910 }
911
912 ShenandoahBarrierStubC2::load_store_post(masm, node, Address(addr, 0), tmp2, tmp3);
913 }
914
915 void ShenandoahBarrierSetAssembler::get_and_set_c2(const MachNode* node, MacroAssembler* masm, Register preval,
916 Register newval, Register addr, Register tmp1, Register tmp2, Register tmp3, bool is_acquire) {
917 bool is_narrow = node->bottom_type()->isa_narrowoop();
918
919 ShenandoahBarrierStubC2::load_store_pre(masm, node, tmp1, addr, tmp2, tmp3, is_narrow);
920
921 if (is_narrow) {
922 if (is_acquire) {
923 __ atomic_xchgalw(preval, newval, addr);
924 } else {
925 __ atomic_xchgw(preval, newval, addr);
926 }
927 } else {
928 if (is_acquire) {
929 __ atomic_xchgal(preval, newval, addr);
930 } else {
931 __ atomic_xchg(preval, newval, addr);
932 }
933 }
934
935 ShenandoahBarrierStubC2::load_store_post(masm, node, Address(addr, 0), tmp2, tmp3);
936 }
937
938 #undef __
939 #define __ masm.
940
941 void ShenandoahBarrierStubC2::cardtable(MacroAssembler& masm, Address address, Register tmp1, Register tmp2) {
942 assert(CardTable::dirty_card_val() == 0, "must be");
943 Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
944
945 // tmp1 = card table base (holder)
946 Address curr_ct_holder_addr(rthread, in_bytes(ShenandoahThreadLocalData::card_table_offset()));
947 __ ldr(tmp1, curr_ct_holder_addr);
948
949 // tmp2 = effective address
950 __ lea(tmp2, address);
951
952 // tmp2 = &card_table[ addr >> CardTable::card_shift() ] ; card index
953 __ add(tmp2, tmp1, tmp2, Assembler::LSR, CardTable::card_shift());
954
955 if (UseCondCardMark) {
956 Label L_already_dirty;
957 __ ldrb(tmp1, Address(tmp2));
958 __ cbz(tmp1, L_already_dirty);
959 __ strb(zr, Address(tmp2));
960 __ bind(L_already_dirty);
961 } else {
962 __ strb(zr, Address(tmp2));
963 }
964 }
965
966 void ShenandoahBarrierStubC2::enter_if_gc_state(MacroAssembler& masm, const char test_state, Register tmp) {
967 Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
968
969 Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(test_state)));
970 if (_needs_far_jump) {
971 __ ldrb(tmp, gc_state_fast);
972 __ cbz(tmp, *continuation());
973 __ b(*entry());
974 } else {
975 __ ldrb(tmp, gc_state_fast);
976 __ cbnz(tmp, *entry());
977 }
978
979 // This is were the slowpath stub will return to or the code above will
980 // jump to if the checks are false
981 __ bind(*continuation());
982 }
983
984 void ShenandoahBarrierStubC2::emit_code(MacroAssembler& masm) {
985 Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
986 assert(_needs_keep_alive_barrier || _needs_load_ref_barrier, "Why are you here?");
987
988 __ bind(*entry());
989
990 // If we need to load ourselves, do it here.
991 if (_do_load) {
992 if (_narrow) {
993 __ ldrw(_obj, _addr);
994 } else {
995 __ ldr(_obj, _addr);
996 }
997 }
998
999 // If the object is null, there is no point in applying barriers.
1000 maybe_far_jump_if_zero(masm, _obj);
1001
1002 // We need to make sure that loads done by callers survive across slow-path calls.
1003 // For self-loads, we need to care about the case when both KA and LRB are enabled (rare).
1004 if (!_do_load || (_needs_keep_alive_barrier && _needs_load_ref_barrier)) {
1005 preserve(_obj);
1006 }
1007
1008 // Go for barriers. Barriers can return straight to continuation, as long
1009 // as another barrier is not needed and we can reach the fastpath.
1010 if (_needs_keep_alive_barrier && _needs_load_ref_barrier) {
1011 keepalive(masm, nullptr);
1012 lrb(masm);
1013 } else if (_needs_keep_alive_barrier) {
1014 keepalive(masm, continuation());
1015 } else if (_needs_load_ref_barrier) {
1016 lrb(masm);
1017 } else {
1018 ShouldNotReachHere();
1019 }
1020 }
1021
1022 void ShenandoahBarrierStubC2::maybe_far_jump_if_zero(MacroAssembler& masm, Register reg) {
1023 if (_needs_far_jump) {
1024 Label L_short_jump;
1025 __ cbnz(reg, L_short_jump);
1026 __ b(*continuation());
1027 __ bind(L_short_jump);
1028 } else {
1029 __ cbz(reg, *continuation());
1030 }
1031 }
1032
1033 void ShenandoahBarrierStubC2::keepalive(MacroAssembler& masm, Label* L_done) {
1034 Address gcstate(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::MARKING)));
1035 Address index(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
1036 Address buffer(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
1037 Label L_through, L_slowpath;
1038
1039 // If another barrier is enabled as well, do a runtime check for a specific barrier.
1040 if (_needs_load_ref_barrier) {
1041 assert(L_done == nullptr, "L_done is always null when _needs_load_ref_barrier is true");
1042 __ ldrb(_tmp1, gcstate);
1043 __ cbz(_tmp1, L_through);
1044 }
1045
1046 // Fast-path: put object into buffer.
1047 // If buffer is already full, go slow.
1048 __ ldr(_tmp1, index);
1049 __ cbz(_tmp1, L_slowpath);
1050 __ sub(_tmp1, _tmp1, wordSize);
1051 __ str(_tmp1, index);
1052 __ ldr(_tmp2, buffer);
1053
1054 // Store the object in queue.
1055 // If object is narrow, we need to decode it before inserting.
1056 if (_narrow) {
1057 __ add(_tmp2, _tmp2, _tmp1);
1058 __ decode_heap_oop_not_null(_tmp1, _obj);
1059 __ str(_tmp1, Address(_tmp2));
1060 } else {
1061 // Buffer is 64-bit address, must be in base register.
1062 __ str(_obj, Address(_tmp2, _tmp1));
1063 }
1064
1065 // Fast-path exits here.
1066 if (L_done != nullptr) {
1067 __ b(*L_done);
1068 } else {
1069 __ b(L_through);
1070 }
1071
1072 // Slow-path: call runtime to handle.
1073 __ bind(L_slowpath);
1074
1075 // The Load match rule in the .ad file may have legitimized the load address
1076 // using a TEMP register and in that case we need to explicitly preserve them
1077 // here because the RA does not consider TEMP as live-in, of course.
1078 if (_needs_load_ref_barrier) {
1079 if (_addr.base() != noreg) {
1080 preserve(_addr.base());
1081 }
1082 if (_addr.index() != noreg) {
1083 preserve(_addr.index());
1084 }
1085 }
1086
1087 {
1088 SaveLiveRegisters slr(&masm, this);
1089
1090 // Go to runtime and handle the rest there.
1091 __ mov(c_rarg0, _obj);
1092 __ mov(lr, keepalive_runtime_entry_addr());
1093 __ blr(lr);
1094 }
1095
1096 if (L_done != nullptr) {
1097 __ b(*L_done);
1098 } else {
1099 __ bind(L_through);
1100 }
1101 }
1102
1103 void ShenandoahBarrierStubC2::lrb(MacroAssembler& masm) {
1104 Label L_slow;
1105
1106 // If another barrier is enabled as well, do a runtime check for a specific barrier.
1107 if (_needs_keep_alive_barrier) {
1108 char state_to_check = ShenandoahHeap::HAS_FORWARDED | (_needs_load_ref_weak_barrier ? ShenandoahHeap::WEAK_ROOTS : 0);
1109 Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(state_to_check)));
1110 __ ldrb(_tmp1, gc_state_fast);
1111 maybe_far_jump_if_zero(masm, _tmp1);
1112 }
1113
1114 // If weak references are being processed, weak/phantom loads need to go slow,
1115 // regardless of their cset status.
1116 if (_needs_load_ref_weak_barrier) {
1117 Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::WEAK_ROOTS)));
1118 __ ldrb(_tmp1, gc_state_fast);
1119 __ cbnz(_tmp1, L_slow);
1120 }
1121
1122 // Cset-check. Fall-through to slow if in collection set.
1123 __ mov(_tmp1, ShenandoahHeap::in_cset_fast_test_addr());
1124 if (_narrow) {
1125 __ decode_heap_oop_not_null(_tmp2, _obj);
1126 __ add(_tmp1, _tmp1, _tmp2, Assembler::LSR, ShenandoahHeapRegion::region_size_bytes_shift_jint());
1127 } else {
1128 __ add(_tmp1, _tmp1, _obj, Assembler::LSR, ShenandoahHeapRegion::region_size_bytes_shift_jint());
1129 }
1130 __ ldrb(_tmp1, Address(_tmp1, 0));
1131 maybe_far_jump_if_zero(masm, _tmp1);
1132
1133 // Slow path
1134 __ bind(L_slow);
1135
1136 // Obj is the result, need to temporarily stop preserving it.
1137 bool is_obj_preserved = is_preserved(_obj);
1138 if (is_obj_preserved) {
1139 dont_preserve(_obj);
1140 }
1141 {
1142 SaveLiveRegisters slr(&masm, this);
1143
1144 // Shuffle in the arguments. The end result should be:
1145 // c_rarg0 <-- obj
1146 // c_rarg1 <-- lea(addr)
1147 if (c_rarg0 == _obj) {
1148 __ lea(c_rarg1, _addr);
1149 } else if (c_rarg1 == _obj) {
1150 // Set up arguments in reverse, and then flip them
1151 __ lea(c_rarg0, _addr);
1152 // flip them
1153 __ mov(_tmp1, c_rarg0);
1154 __ mov(c_rarg0, c_rarg1);
1155 __ mov(c_rarg1, _tmp1);
1156 } else {
1157 assert_different_registers(c_rarg1, _obj);
1158 __ lea(c_rarg1, _addr);
1159 __ mov(c_rarg0, _obj);
1160 }
1161
1162 // Go to runtime and handle the rest there.
1163 __ mov(lr, lrb_runtime_entry_addr());
1164 __ blr(lr);
1165
1166 // Save the result where needed. Narrow entries return narrowOop (32 bits)
1167 // and AAPCS does not guarantee the upper 32 bits of x0 are zero.
1168 if (_narrow) {
1169 __ movw(_obj, r0);
1170 } else if (_obj != r0) {
1171 __ mov(_obj, r0);
1172 }
1173 }
1174 if (is_obj_preserved) {
1175 preserve(_obj);
1176 }
1177
1178 __ b(*continuation());
1179 }
1180
1181 int ShenandoahBarrierStubC2::available_gp_registers() {
1182 Unimplemented(); // Not used
1183 return 0;
1184 }
1185
1186 bool ShenandoahBarrierStubC2::is_special_register(Register r) {
1187 Unimplemented(); // Not used
1188 return true;
1189 }
1190
1191 void ShenandoahBarrierStubC2::post_init() {
1192 // If we are in scratch emit mode we assume worst case, and force the use of
1193 // far branches.
1194 PhaseOutput* const output = Compile::current()->output();
1195 if (output->in_scratch_emit_size()) {
1196 _needs_far_jump = true;
1197 return;
1198 }
1199
1200 // The formula below is based on how c2 estimates initial buffer size for a
1201 // compilation. See C2Compiler::initial_code_buffer_size. The logic
1202 // implemented in this stub only uses short jumps (cbz, cbnz) if the
1203 // aggregation of all relevant code sections of a method fit in 1MB. We could
1204 // be more aggressive and try and compute the distance between the fastpath
1205 // branch and the stub entry but in practice not many methods reach the 1MB
1206 // size.
1207 const BufferSizingData* sizing = output->buffer_sizing_data();
1208 const int code_size = sizing->_code + sizing->_stub +
1209 PhaseOutput::MAX_inst_size + PhaseOutput::MAX_stubs_size + NativeCall::byte_size();
1210
1211 // Maximum backward range is 1M. Maximum forward reach is 1M - 4bytes.
1212 const int cond_branch_max_reach = (int)(1*M - 4);
1213 _needs_far_jump = code_size >= cond_branch_max_reach;
1214 }
1215
1216 #endif // COMPILER2
|