13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 *
25 */
26
27 #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
28 #include "gc/shenandoah/mode/shenandoahMode.hpp"
29 #include "gc/shenandoah/shenandoahBarrierSet.hpp"
30 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
31 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
32 #include "gc/shenandoah/shenandoahHeapRegion.hpp"
33 #include "gc/shenandoah/shenandoahRuntime.hpp"
34 #include "gc/shenandoah/shenandoahThreadLocalData.hpp"
35 #include "interpreter/interpreter.hpp"
36 #include "runtime/javaThread.hpp"
37 #include "runtime/sharedRuntime.hpp"
38 #include "utilities/macros.hpp"
39 #ifdef COMPILER1
40 #include "c1/c1_LIRAssembler.hpp"
41 #include "c1/c1_MacroAssembler.hpp"
42 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
43 #endif
44 #ifdef COMPILER2
45 #include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp"
46 #endif
47
48 #define __ masm->
49
50 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
51 Register src, Register dst, Register count) {
52
53 bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
54
55 if (is_reference_type(type)) {
858 __ addptr(tmp1, Address(r15_thread, in_bytes(ShenandoahThreadLocalData::card_table_offset())));
859 Address card_address(tmp1, 0);
860
861 assert(CardTable::dirty_card_val() == 0, "Encoding assumption");
862 Label L_done;
863 if (UseCondCardMark) {
864 __ cmpb(card_address, 0);
865 __ jccb(Assembler::equal, L_done);
866 }
867 if (UseCompressedOops && CompressedOops::base() == nullptr) {
868 __ movb(card_address, r12);
869 } else {
870 __ movb(card_address, 0);
871 }
872 __ bind(L_done);
873 }
874
875 void ShenandoahBarrierStubC2::enter_if_gc_state(MacroAssembler& masm, const char test_state, Register tmp) {
876 Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
877
878 Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(test_state)));
879 __ cmpb(gc_state_fast, 0);
880 __ jcc(Assembler::notEqual, *entry());
881 __ bind(*continuation());
882 }
883
884 void ShenandoahBarrierStubC2::emit_code(MacroAssembler& masm) {
885 Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
886 assert(_needs_keep_alive_barrier || _needs_load_ref_barrier, "Why are you here?");
887
888 // On x86, there is a significant penalty with unaligned branch target, for example
889 // when the target instruction straggles the fetch line. It makes (performance) sense
890 // to spend some code size to align the target better.
891 __ align(16);
892 __ bind(*entry());
893
894 // If we need to load ourselves, do it here.
895 if (_do_load) {
896 if (_narrow) {
897 __ movl(_obj, _addr);
898 } else {
899 __ movq(_obj, _addr);
900 }
901 }
902
903 // If the object is null, there is no point in applying barriers.
908 bool needs_both_barriers = _needs_keep_alive_barrier && _needs_load_ref_barrier;
909 if (!_do_load || needs_both_barriers) {
910 preserve(_obj);
911 }
912
913 // Go for barriers. Barriers can return straight to continuation, as long
914 // as another barrier is not needed.
915 if (needs_both_barriers) {
916 keepalive(masm, nullptr);
917 lrb(masm);
918 } else if (_needs_keep_alive_barrier) {
919 keepalive(masm, continuation());
920 } else if (_needs_load_ref_barrier) {
921 lrb(masm);
922 } else {
923 ShouldNotReachHere();
924 }
925 }
926
927 void ShenandoahBarrierStubC2::keepalive(MacroAssembler& masm, Label* L_done) {
928 Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::MARKING)));
929 Address index(r15_thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
930 Address buffer(r15_thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
931
932 Label L_through, L_pop_and_slow;
933
934 // If another barrier is enabled as well, do a runtime check for a specific barrier.
935 if (_needs_load_ref_barrier) {
936 assert(L_done == nullptr, "L_done is always null when _needs_load_ref_barrier is true");
937 __ cmpb(gc_state_fast, 0);
938 __ jcc(Assembler::equal, L_through);
939 }
940
941 // Need temp to work, allocate one now.
942 bool tmp_live;
943 Register tmp = select_temp_register(tmp_live);
944 if (tmp_live) {
945 __ push(tmp);
946 }
947
948 // Fast-path: put object into buffer.
949 // If buffer is already full, go slow.
950 __ movptr(tmp, index);
951 __ subptr(tmp, wordSize);
952 __ jccb(Assembler::below, L_pop_and_slow);
953 __ movptr(index, tmp);
954 __ addptr(tmp, buffer);
955
956 // Store the object in queue.
957 // If object is narrow, we need to decode it before inserting.
958 // We can skip the re-encoding if we know that object is not preserved.
987 // Shuffle in the arguments. The end result should be:
988 // c_rarg0 <-- obj
989 if (c_rarg0 != _obj) {
990 __ mov(c_rarg0, _obj);
991 }
992
993 // Go to runtime and handle the rest there.
994 // Use rax as scratch, as it will be saved if live.
995 __ call(RuntimeAddress(keepalive_runtime_entry_addr()), rax);
996 }
997 if (L_done != nullptr) {
998 __ jmp(*L_done);
999 } else {
1000 __ bind(L_through);
1001 }
1002 }
1003
1004 void ShenandoahBarrierStubC2::lrb(MacroAssembler& masm) {
1005 Label L_pop_and_slow, L_slow;
1006
1007 // If another barrier is enabled as well, do a runtime check for a specific barrier.
1008 if (_needs_keep_alive_barrier) {
1009 char state_to_check = ShenandoahHeap::HAS_FORWARDED | (_needs_load_ref_weak_barrier ? ShenandoahHeap::WEAK_ROOTS : 0);
1010 Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(state_to_check)));
1011 __ cmpb(gc_state_fast, 0);
1012 __ jcc(Assembler::equal, *continuation());
1013 }
1014
1015 // If weak references are being processed, weak/phantom loads need to go slow,
1016 // regardless of their cset status.
1017 if (_needs_load_ref_weak_barrier) {
1018 Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::WEAK_ROOTS)));
1019 __ cmpb(gc_state_fast, 0);
1020 __ jccb(Assembler::notEqual, L_slow);
1021 }
1022
1023 bool is_aot = AOTCodeCache::is_on_for_dump();
1024
1025 // Need temp to work, allocate one now.
1026 bool tmp_live;
1027 Register tmp = select_temp_register(tmp_live, /* skip_reg1 = */ is_aot ? rcx : noreg);
1028 if (tmp_live) {
1029 __ push(tmp);
1030 }
1031
1032 // Compute the cset bitmap index
1033 if (_narrow) {
1034 __ decode_heap_oop_not_null(tmp, _obj);
1035 } else {
1036 __ movptr(tmp, _obj);
1037 }
1038
1039 Address cset_addr_arg;
1040 intptr_t cset_addr = reinterpret_cast<intptr_t>(ShenandoahHeap::in_cset_fast_test_addr());
|
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 *
25 */
26
27 #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
28 #include "gc/shenandoah/mode/shenandoahMode.hpp"
29 #include "gc/shenandoah/shenandoahBarrierSet.hpp"
30 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
31 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
32 #include "gc/shenandoah/shenandoahHeapRegion.hpp"
33 #include "gc/shenandoah/shenandoahNMethod.inline.hpp"
34 #include "gc/shenandoah/shenandoahRuntime.hpp"
35 #include "gc/shenandoah/shenandoahThreadLocalData.hpp"
36 #include "interpreter/interpreter.hpp"
37 #include "nativeInst_x86.hpp"
38 #include "runtime/javaThread.hpp"
39 #include "runtime/sharedRuntime.hpp"
40 #include "utilities/macros.hpp"
41 #ifdef COMPILER1
42 #include "c1/c1_LIRAssembler.hpp"
43 #include "c1/c1_MacroAssembler.hpp"
44 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
45 #endif
46 #ifdef COMPILER2
47 #include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp"
48 #endif
49
50 #define __ masm->
51
52 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
53 Register src, Register dst, Register count) {
54
55 bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
56
57 if (is_reference_type(type)) {
860 __ addptr(tmp1, Address(r15_thread, in_bytes(ShenandoahThreadLocalData::card_table_offset())));
861 Address card_address(tmp1, 0);
862
863 assert(CardTable::dirty_card_val() == 0, "Encoding assumption");
864 Label L_done;
865 if (UseCondCardMark) {
866 __ cmpb(card_address, 0);
867 __ jccb(Assembler::equal, L_done);
868 }
869 if (UseCompressedOops && CompressedOops::base() == nullptr) {
870 __ movb(card_address, r12);
871 } else {
872 __ movb(card_address, 0);
873 }
874 __ bind(L_done);
875 }
876
877 void ShenandoahBarrierStubC2::enter_if_gc_state(MacroAssembler& masm, const char test_state, Register tmp) {
878 Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
879
880 // Emit the unconditional branch in the first version of the method.
881 // Let the rest of runtime figure out how to manage it.
882 __ relocate(barrier_Relocation::spec(), ShenandoahNMethod::gc_state_to_reloc(test_state));
883 __ jmp(*entry(), /* maybe_short = */ false);
884
885 __ bind(*continuation());
886 }
887
888 address ShenandoahBarrierSetAssembler::parse_stub_address(address pc) {
889 NativeInstruction* ni = nativeInstruction_at(pc);
890 assert(ni->is_jump(), "Initial code version: GC barrier fastpath must be a jump");
891 NativeJump* jmp = nativeJump_at(pc);
892 return jmp->jump_destination();
893 }
894
895 static void insert_5_byte_nop(address pc) {
896 *(pc + 0) = 0x0F;
897 *(pc + 1) = 0x1F;
898 *(pc + 2) = 0x44;
899 *(pc + 3) = 0x00;
900 *(pc + 4) = 0x00;
901 ICache::invalidate_range(pc, 5);
902 }
903
904 static bool is_5_byte_nop(address pc) {
905 if (*(pc + 0) != 0x0F) return false;
906 if (*(pc + 1) != 0x1F) return false;
907 if (*(pc + 2) != 0x44) return false;
908 if (*(pc + 3) != 0x00) return false;
909 if (*(pc + 4) != 0x00) return false;
910 return true;
911 }
912
913 static void check_at(bool cond, address pc, const char* msg) {
914 assert(cond, "%s: at PC " PTR_FORMAT ": %02x%02x%02x%02x%02x",
915 msg, p2i(pc), *(pc + 0), *(pc + 1), *(pc + 2), *(pc + 3), *(pc + 4));
916 }
917
918 bool ShenandoahBarrierSetAssembler::is_active(address pc) {
919 NativeInstruction* ni = nativeInstruction_at(pc);
920 return ni->is_jump();
921 }
922
923 void ShenandoahBarrierSetAssembler::patch_branch_to_nop(address pc) {
924 NativeInstruction* ni = nativeInstruction_at(pc);
925 if (ni->is_jump()) {
926 insert_5_byte_nop(pc);
927 } else {
928 check_at(is_5_byte_nop(pc), pc, "Should already be nop");
929 }
930 }
931
932 void ShenandoahBarrierSetAssembler::patch_nop_to_branch(address pc, address stub_addr) {
933 NativeInstruction* ni = nativeInstruction_at(pc);
934 if (is_5_byte_nop(pc)) {
935 NativeJump::insert(pc, stub_addr);
936 } else {
937 check_at(ni->is_jump(), pc, "Should already be jump");
938 check_at(nativeJump_at(pc)->jump_destination() == stub_addr, pc, "Jump should be to the same address");
939 }
940 }
941
942 void ShenandoahBarrierStubC2::emit_code(MacroAssembler& masm) {
943 Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
944 assert(_needs_keep_alive_barrier || _needs_load_ref_barrier, "Why are you here?");
945
946 // On x86, there is a significant penalty with unaligned branch target, for example
947 // when the target instruction straggles the fetch line. It makes (performance) sense
948 // to spend some code size to align the target better.
949 __ align(16);
950 __ bind(*entry());
951
952 // If we need to load ourselves, do it here.
953 if (_do_load) {
954 if (_narrow) {
955 __ movl(_obj, _addr);
956 } else {
957 __ movq(_obj, _addr);
958 }
959 }
960
961 // If the object is null, there is no point in applying barriers.
966 bool needs_both_barriers = _needs_keep_alive_barrier && _needs_load_ref_barrier;
967 if (!_do_load || needs_both_barriers) {
968 preserve(_obj);
969 }
970
971 // Go for barriers. Barriers can return straight to continuation, as long
972 // as another barrier is not needed.
973 if (needs_both_barriers) {
974 keepalive(masm, nullptr);
975 lrb(masm);
976 } else if (_needs_keep_alive_barrier) {
977 keepalive(masm, continuation());
978 } else if (_needs_load_ref_barrier) {
979 lrb(masm);
980 } else {
981 ShouldNotReachHere();
982 }
983 }
984
985 void ShenandoahBarrierStubC2::keepalive(MacroAssembler& masm, Label* L_done) {
986 Address index(r15_thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
987 Address buffer(r15_thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
988
989 Label L_through, L_pop_and_slow;
990
991 // If another barrier is enabled as well, do a check for a specific barrier.
992 if (_needs_load_ref_barrier) {
993 assert(L_done == nullptr, "Should be");
994 // Emit the unconditional branch in the first version of the method.
995 // Let the rest of runtime figure out how to manage it.
996 // TODO: We could have spared the over-jump if patching knew we need the inverse branch.
997 char state_to_check = ShenandoahHeap::MARKING;
998 Label L_over;
999 __ relocate(barrier_Relocation::spec(), ShenandoahNMethod::gc_state_to_reloc(state_to_check));
1000 __ jmp(L_over, /* maybe_short = */ false);
1001 __ jmp(L_through);
1002 __ bind(L_over);
1003 }
1004
1005 // Need temp to work, allocate one now.
1006 bool tmp_live;
1007 Register tmp = select_temp_register(tmp_live);
1008 if (tmp_live) {
1009 __ push(tmp);
1010 }
1011
1012 // Fast-path: put object into buffer.
1013 // If buffer is already full, go slow.
1014 __ movptr(tmp, index);
1015 __ subptr(tmp, wordSize);
1016 __ jccb(Assembler::below, L_pop_and_slow);
1017 __ movptr(index, tmp);
1018 __ addptr(tmp, buffer);
1019
1020 // Store the object in queue.
1021 // If object is narrow, we need to decode it before inserting.
1022 // We can skip the re-encoding if we know that object is not preserved.
1051 // Shuffle in the arguments. The end result should be:
1052 // c_rarg0 <-- obj
1053 if (c_rarg0 != _obj) {
1054 __ mov(c_rarg0, _obj);
1055 }
1056
1057 // Go to runtime and handle the rest there.
1058 // Use rax as scratch, as it will be saved if live.
1059 __ call(RuntimeAddress(keepalive_runtime_entry_addr()), rax);
1060 }
1061 if (L_done != nullptr) {
1062 __ jmp(*L_done);
1063 } else {
1064 __ bind(L_through);
1065 }
1066 }
1067
1068 void ShenandoahBarrierStubC2::lrb(MacroAssembler& masm) {
1069 Label L_pop_and_slow, L_slow;
1070
1071 // If weak references are being processed, weak/phantom loads need to go slow,
1072 // regardless of their cset status.
1073 if (_needs_load_ref_weak_barrier) {
1074 char state_to_check = ShenandoahHeap::WEAK_ROOTS;
1075 __ relocate(barrier_Relocation::spec(), ShenandoahNMethod::gc_state_to_reloc(state_to_check));
1076 __ jmp(L_slow, /* maybe_short = */ false);
1077 }
1078
1079 if (_needs_keep_alive_barrier) {
1080 // Emit the unconditional branch in the first version of the method.
1081 // Let the rest of runtime figure out how to manage it.
1082 // TODO: We could have spared the over-jump if patching knew we need the inverse branch.
1083 char state_to_check = ShenandoahHeap::HAS_FORWARDED | (_needs_load_ref_weak_barrier ? ShenandoahHeap::WEAK_ROOTS : 0);
1084 Label L_over;
1085 __ relocate(barrier_Relocation::spec(), ShenandoahNMethod::gc_state_to_reloc(state_to_check));
1086 __ jmp(L_over, /* maybe_short = */ false);
1087 __ jmp(*continuation());
1088 __ bind(L_over);
1089 }
1090
1091 bool is_aot = AOTCodeCache::is_on_for_dump();
1092
1093 // Need temp to work, allocate one now.
1094 bool tmp_live;
1095 Register tmp = select_temp_register(tmp_live, /* skip_reg1 = */ is_aot ? rcx : noreg);
1096 if (tmp_live) {
1097 __ push(tmp);
1098 }
1099
1100 // Compute the cset bitmap index
1101 if (_narrow) {
1102 __ decode_heap_oop_not_null(tmp, _obj);
1103 } else {
1104 __ movptr(tmp, _obj);
1105 }
1106
1107 Address cset_addr_arg;
1108 intptr_t cset_addr = reinterpret_cast<intptr_t>(ShenandoahHeap::in_cset_fast_test_addr());
|