706 }
707
708 // If we could not find a non-live register, select the live fallback:
709 if (tmp == noreg) {
710 tmp = fallback_live;
711 selected_live = true;
712 } else {
713 selected_live = false;
714 }
715
716 assert(tmp != noreg, "successfully selected");
717 assert_different_registers(tmp, reg1);
718 assert_different_registers(tmp, addr.base());
719 assert_different_registers(tmp, addr.index());
720 return tmp;
721 }
722
723 void ShenandoahBarrierStubC2::enter_if_gc_state(MacroAssembler& masm, const char test_state) {
724 Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
725
726 int bit_to_check = ShenandoahThreadLocalData::gc_state_to_fast_bit(test_state);
727 Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_offset()));
728 __ ldrb(rscratch1, gc_state_fast);
729 if (_use_trampoline) {
730 __ tbnz(rscratch1, bit_to_check, _trampoline_entry);
731 } else {
732 __ tbz(rscratch1, bit_to_check, *continuation());
733 __ b(*entry());
734 }
735 // This is were the slowpath stub will return to or the code above will
736 // jump to if the checks are false
737 __ bind(*continuation());
738 }
739
740 bool needs_acquiring_load_exclusive(const MachNode *n) {
741 assert(n->is_CAS(true), "expecting a compare and swap");
742 if (n->is_CAS(false)) {
743 assert(n->has_trailing_membar(), "expected trailing membar");
744 } else {
745 return n->has_trailing_membar();
746 }
747
748 // so we can just return true here
749 return true;
750 }
751
752 #undef __
753 #define __ masm->
754
755 void ShenandoahBarrierSetAssembler::compare_and_set_c2(const MachNode* node, MacroAssembler* masm, Register res, Register addr,
756 Register oldval, Register newval, bool exchange, bool narrow, bool weak) {
757 bool acquire = needs_acquiring_load_exclusive(node);
910
911 // rscratch2 = addr
912 __ lea(rscratch2, address);
913
914 // rscratch2 = &card_table[ addr >> CardTable::card_shift() ]
915 __ add(rscratch2, rscratch1, rscratch2, Assembler::LSR, CardTable::card_shift());
916
917 if (UseCondCardMark) {
918 Label L_already_dirty;
919 __ ldrb(rscratch1, Address(rscratch2));
920 __ cbz(rscratch1, L_already_dirty);
921 __ strb(zr, Address(rscratch2));
922 __ bind(L_already_dirty);
923 } else {
924 __ strb(zr, Address(rscratch2));
925 }
926 }
927 #undef __
928 #define __ masm.
929
930 // Only handles forward branch jumps, target_offset >= branch_offset
931 static bool aarch64_test_and_branch_reachable(int branch_offset, int target_offset) {
932 assert(branch_offset >= 0, "branch to stub offsets must be positive");
933 assert(target_offset >= 0, "offset in stubs section must be positive");
934 assert(target_offset >= branch_offset, "forward branches only, branch_offset -> target_offset");
935 return (target_offset - branch_offset) < (int)(32*K);
936 }
937
938 void ShenandoahBarrierStubC2::post_init(int offset) {
939 // If we are in scratch emit mode we assume worst case,
940 // and use no trampolines.
941 PhaseOutput* const output = Compile::current()->output();
942 if (output->in_scratch_emit_size()) {
943 return;
944 }
945
946 // Assume that each trampoline is one single instruction and that the stubs
947 // will follow immediately after the _code section. We emit trampolines until
948 // we can no longer do it.
949 const int code_size = output->buffer_sizing_data()->_code;
950 const int trampoline_offset = trampoline_stubs_count() * NativeInstruction::instruction_size;
951 _use_trampoline = aarch64_test_and_branch_reachable(_fastpath_branch_offset, code_size + trampoline_offset);
952 if (_use_trampoline) {
953 inc_trampoline_stubs_count();
954 }
955 }
956
957 void ShenandoahBarrierStubC2::emit_code(MacroAssembler& masm) {
958 if (_do_emit_actual) {
959 emit_code_actual(masm);
960 return;
961 }
962
963 if (_use_trampoline) {
964 // Emit the trampoline and jump to real entry.
965 const int target_offset = __ offset();
966 assert(aarch64_test_and_branch_reachable(_fastpath_branch_offset, target_offset), "trampoline should be reachable");
967 __ bind(_trampoline_entry);
968 __ b(*entry());
969 }
970
971 // Do it again, this time with actual emits.
972 _do_emit_actual = true;
973 ShenandoahBarrierStubC2::register_stub(this);
974 }
975
976 void ShenandoahBarrierStubC2::load_and_decode(MacroAssembler& masm, Label& target_if_null) {
977 if (_do_load) {
978 // Fastpath sets _obj==noreg if it tells the slowpath to do the load
979 _obj = rscratch2;
980
981 // This does the load and the decode if necessary
982 __ load_heap_oop(_obj, _addr, noreg, noreg, AS_RAW);
983
984 __ cbz(_obj, target_if_null);
985 } else {
986 // If object is narrow, we need to decode it because everything else later
987 // will need full oops.
988 if (_narrow) {
989 if (_maybe_null) {
990 __ decode_heap_oop(_obj);
991 } else {
992 __ decode_heap_oop_not_null(_obj);
993 }
994 }
995
996 if (_maybe_null) {
997 __ cbz(_obj, target_if_null);
998 }
999 }
1000 }
1001
1002 void ShenandoahBarrierStubC2::reencode_if_needed(MacroAssembler& masm) {
1003 // If object is narrow, we need to encode it before exiting.
1004 // For encoding, dst can only turn null if we are dealing with weak loads.
1005 // Otherwise, we have already null-checked. We can skip all this if we performed
1006 // the load ourselves, which means the value is not used by caller.
1007 if (!_do_load && _narrow) {
1008 if (_needs_load_ref_weak_barrier) {
1009 __ encode_heap_oop(_obj);
1010 } else {
1011 __ encode_heap_oop_not_null(_obj);
1012 }
1013 }
1014 }
1015
1016 void ShenandoahBarrierStubC2::emit_code_actual(MacroAssembler& masm) {
1017 assert(_needs_keep_alive_barrier || _needs_load_ref_barrier, "Why are you here?");
1018 __ bind(*entry());
1019
1020 load_and_decode(masm, *continuation());
1021
1022 keepalive(masm, _obj, rscratch1);
1023
1024 lrb(masm, _obj, _addr, rscratch1);
1025
1026 reencode_if_needed(masm);
1027
1028 __ b(*continuation());
1029 }
1030
1031 void ShenandoahBarrierStubC2::keepalive(MacroAssembler& masm, Register obj, Register tmp1) {
1032 Address index(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
1033 Address buffer(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
1034 Label L_runtime;
1035 Label L_done;
1036
1037 // The node doesn't even need keepalive barrier, just don't check anything else
1038 if (!_needs_keep_alive_barrier) {
1039 return ;
1040 }
1041
1042 // If another barrier is enabled as well, do a runtime check for a specific barrier.
1043 if (_needs_load_ref_barrier) {
1044 Address gcs_addr(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
1045 __ ldrb(tmp1, gcs_addr);
1046 __ tbz(tmp1, ShenandoahHeap::MARKING_BITPOS, L_done);
1047 }
1048
1049 // If buffer is full, call into runtime.
1050 __ ldr(tmp1, index);
1051 __ cbz(tmp1, L_runtime);
1052
1053 bool selected_live = false;
1054 Register tmp2 = select_temp_register(selected_live, _addr, obj);
1055 if (selected_live) {
1056 push_save_register(masm, tmp2);
1057 }
1058
1059 // The buffer is not full, store value into it.
1060 __ sub(tmp1, tmp1, wordSize);
1061 __ str(tmp1, index);
1062 __ ldr(tmp2, buffer);
1063 __ str(obj, Address(tmp2, tmp1));
1064 __ b(L_done);
1065
1066 // Runtime call
1067 __ bind(L_runtime);
1081 if (clobbered_c_rarg0) {
1082 pop_save_register(masm, c_rarg0);
1083 }
1084 }
1085
1086 __ bind(L_done);
1087
1088 if (selected_live) {
1089 pop_save_register(masm, tmp2);
1090 }
1091 }
1092
1093 void ShenandoahBarrierStubC2::lrb(MacroAssembler& masm, Register obj, Address addr, Register tmp) {
1094 Label L_done, L_slow;
1095
1096 // The node doesn't even need LRB barrier, just don't check anything else
1097 if (!_needs_load_ref_barrier) {
1098 return ;
1099 }
1100
1101 // If another barrier is enabled as well, do a runtime check for a specific barrier.
1102 if (_needs_keep_alive_barrier) {
1103 char state_to_check = ShenandoahHeap::HAS_FORWARDED | (_needs_load_ref_weak_barrier ? ShenandoahHeap::WEAK_ROOTS : 0);
1104 int bit_to_check = ShenandoahThreadLocalData::gc_state_to_fast_bit(state_to_check);
1105 Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_offset()));
1106 __ ldrb(tmp, gc_state_fast);
1107 __ tbz(tmp, bit_to_check, L_done);
1108 }
1109
1110 // If weak references are being processed, weak/phantom loads need to go slow,
1111 // regadless of their cset status.
1112 if (_needs_load_ref_weak_barrier) {
1113 Address gc_state(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
1114 __ ldrb(tmp, gc_state);
1115 __ tbnz(tmp, ShenandoahHeap::WEAK_ROOTS_BITPOS, L_slow);
1116 }
1117
1118 // Cset-check. Fall-through to slow if in collection set.
1119 assert(ShenandoahHeapRegion::region_size_bytes_shift_jint() <= 63, "Maximum shift of the add is 63");
1120 __ mov(tmp, ShenandoahHeap::in_cset_fast_test_addr());
1121 __ add(tmp, tmp, obj, Assembler::LSR, ShenandoahHeapRegion::region_size_bytes_shift_jint());
1122 __ ldrb(tmp, Address(tmp, 0));
1123 __ cbz(tmp, L_done);
1124
1125 // Slow path
1126 __ bind(L_slow);
1127 dont_preserve(obj);
1128 {
|
706 }
707
708 // If we could not find a non-live register, select the live fallback:
709 if (tmp == noreg) {
710 tmp = fallback_live;
711 selected_live = true;
712 } else {
713 selected_live = false;
714 }
715
716 assert(tmp != noreg, "successfully selected");
717 assert_different_registers(tmp, reg1);
718 assert_different_registers(tmp, addr.base());
719 assert_different_registers(tmp, addr.index());
720 return tmp;
721 }
722
723 void ShenandoahBarrierStubC2::enter_if_gc_state(MacroAssembler& masm, const char test_state) {
724 Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
725
726 // Emit the unconditional branch in the first version of the method.
727 // Let the rest of runtime figure out how to manage it.
728 __ relocate(barrier_Relocation::spec());
729 __ b(*entry());
730
731 #ifdef ASSERT
732 Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_offset()));
733 __ ldrb(rscratch1, gc_state_fast);
734 __ cbz(rscratch1, *continuation());
735 __ hlt(0); // Correctness bug: barrier is NOP-ed, but heap is NOT IDLE
736 #endif
737 __ bind(*continuation());
738 }
739
740 address ShenandoahBarrierSetAssembler::parse_stub_address(address pc) {
741 NativeInstruction* ni = nativeInstruction_at(pc);
742 assert(ni->is_jump(), "Initial code version: GC barrier fastpath must be a jump");
743 NativeJump* jmp = nativeJump_at(pc);
744 return jmp->jump_destination();
745 }
746
747 void insert_nop(address pc) {
748 *(pc + 0) = 0x1F;
749 *(pc + 1) = 0x20;
750 *(pc + 2) = 0x03;
751 *(pc + 3) = 0xD5;
752 ICache::invalidate_range(pc, 4);
753 }
754
755 bool is_nop(address pc) {
756 if (*(pc + 0) != 0x1F) return false;
757 if (*(pc + 1) != 0x20) return false;
758 if (*(pc + 2) != 0x03) return false;
759 if (*(pc + 3) != 0xD5) return false;
760 return true;
761 }
762
763 void check_at(bool cond, address pc, const char* msg) {
764 assert(cond, "%s: at PC " PTR_FORMAT ": %02x%02x%02x%02x%02x",
765 msg, p2i(pc), *(pc + 0), *(pc + 1), *(pc + 2), *(pc + 3), *(pc + 4));
766 }
767
768 void ShenandoahBarrierSetAssembler::patch_branch_to_nop(address pc) {
769 NativeInstruction* ni = nativeInstruction_at(pc);
770 if (ni->is_jump()) {
771 insert_nop(pc);
772 } else {
773 check_at(is_nop(pc), pc, "Should already be nop");
774 }
775 }
776
777 void ShenandoahBarrierSetAssembler::patch_nop_to_branch(address pc, address stub_addr) {
778 NativeInstruction* ni = nativeInstruction_at(pc);
779 if (is_nop(pc)) {
780 NativeJump::insert(pc, stub_addr);
781 } else {
782 check_at(ni->is_jump(), pc, "Should already be jump");
783 check_at(nativeJump_at(pc)->jump_destination() == stub_addr, pc, "Jump should be to the same address");
784 }
785 }
786
787 bool needs_acquiring_load_exclusive(const MachNode *n) {
788 assert(n->is_CAS(true), "expecting a compare and swap");
789 if (n->is_CAS(false)) {
790 assert(n->has_trailing_membar(), "expected trailing membar");
791 } else {
792 return n->has_trailing_membar();
793 }
794
795 // so we can just return true here
796 return true;
797 }
798
799 #undef __
800 #define __ masm->
801
802 void ShenandoahBarrierSetAssembler::compare_and_set_c2(const MachNode* node, MacroAssembler* masm, Register res, Register addr,
803 Register oldval, Register newval, bool exchange, bool narrow, bool weak) {
804 bool acquire = needs_acquiring_load_exclusive(node);
957
958 // rscratch2 = addr
959 __ lea(rscratch2, address);
960
961 // rscratch2 = &card_table[ addr >> CardTable::card_shift() ]
962 __ add(rscratch2, rscratch1, rscratch2, Assembler::LSR, CardTable::card_shift());
963
964 if (UseCondCardMark) {
965 Label L_already_dirty;
966 __ ldrb(rscratch1, Address(rscratch2));
967 __ cbz(rscratch1, L_already_dirty);
968 __ strb(zr, Address(rscratch2));
969 __ bind(L_already_dirty);
970 } else {
971 __ strb(zr, Address(rscratch2));
972 }
973 }
974 #undef __
975 #define __ masm.
976
977 void ShenandoahBarrierStubC2::load_and_decode(MacroAssembler& masm, Label& target_if_null) {
978 if (_do_load) {
979 // Fastpath sets _obj==noreg if it tells the slowpath to do the load
980 _obj = rscratch2;
981
982 // This does the load and the decode if necessary
983 __ load_heap_oop(_obj, _addr, noreg, noreg, AS_RAW);
984
985 __ cbz(_obj, target_if_null);
986 } else {
987 // If object is narrow, we need to decode it because everything else later
988 // will need full oops.
989 if (_narrow) {
990 if (_maybe_null) {
991 __ decode_heap_oop(_obj);
992 } else {
993 __ decode_heap_oop_not_null(_obj);
994 }
995 }
996
997 if (_maybe_null) {
998 __ cbz(_obj, target_if_null);
999 }
1000 }
1001 }
1002
1003 void ShenandoahBarrierStubC2::reencode_if_needed(MacroAssembler& masm) {
1004 // If object is narrow, we need to encode it before exiting.
1005 // For encoding, dst can only turn null if we are dealing with weak loads.
1006 // Otherwise, we have already null-checked. We can skip all this if we performed
1007 // the load ourselves, which means the value is not used by caller.
1008 if (!_do_load && _narrow) {
1009 if (_needs_load_ref_weak_barrier) {
1010 __ encode_heap_oop(_obj);
1011 } else {
1012 __ encode_heap_oop_not_null(_obj);
1013 }
1014 }
1015 }
1016
1017 void ShenandoahBarrierStubC2::emit_code(MacroAssembler& masm) {
1018 assert(_needs_keep_alive_barrier || _needs_load_ref_barrier, "Why are you here?");
1019 __ bind(*entry());
1020
1021 load_and_decode(masm, *continuation());
1022
1023 keepalive(masm, _obj, rscratch1);
1024
1025 lrb(masm, _obj, _addr, rscratch1);
1026
1027 reencode_if_needed(masm);
1028
1029 __ b(*continuation());
1030 }
1031
1032 void ShenandoahBarrierStubC2::keepalive(MacroAssembler& masm, Register obj, Register tmp1) {
1033 Address index(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
1034 Address buffer(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
1035 Label L_runtime;
1036 Label L_done;
1037
1038 // The node doesn't even need keepalive barrier, just don't check anything else
1039 if (!_needs_keep_alive_barrier) {
1040 return ;
1041 }
1042
1043 // Hotpatched GC checks only care about idle/non-idle state, so we need to check specific state.
1044 Address gcs_addr(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
1045 __ ldrb(tmp1, gcs_addr);
1046 __ tbz(tmp1, ShenandoahHeap::MARKING_BITPOS, L_done);
1047
1048 // If buffer is full, call into runtime.
1049 __ ldr(tmp1, index);
1050 __ cbz(tmp1, L_runtime);
1051
1052 bool selected_live = false;
1053 Register tmp2 = select_temp_register(selected_live, _addr, obj);
1054 if (selected_live) {
1055 push_save_register(masm, tmp2);
1056 }
1057
1058 // The buffer is not full, store value into it.
1059 __ sub(tmp1, tmp1, wordSize);
1060 __ str(tmp1, index);
1061 __ ldr(tmp2, buffer);
1062 __ str(obj, Address(tmp2, tmp1));
1063 __ b(L_done);
1064
1065 // Runtime call
1066 __ bind(L_runtime);
1080 if (clobbered_c_rarg0) {
1081 pop_save_register(masm, c_rarg0);
1082 }
1083 }
1084
1085 __ bind(L_done);
1086
1087 if (selected_live) {
1088 pop_save_register(masm, tmp2);
1089 }
1090 }
1091
1092 void ShenandoahBarrierStubC2::lrb(MacroAssembler& masm, Register obj, Address addr, Register tmp) {
1093 Label L_done, L_slow;
1094
1095 // The node doesn't even need LRB barrier, just don't check anything else
1096 if (!_needs_load_ref_barrier) {
1097 return ;
1098 }
1099
1100 // Hotpatched GC checks only care about idle/non-idle state, so we need to check again.
1101 char state_to_check = ShenandoahHeap::HAS_FORWARDED | (_needs_load_ref_weak_barrier ? ShenandoahHeap::WEAK_ROOTS : 0);
1102 int bit_to_check = ShenandoahThreadLocalData::gc_state_to_fast_bit(state_to_check);
1103 Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_offset()));
1104 __ ldrb(tmp, gc_state_fast);
1105 __ tbz(tmp, bit_to_check, L_done);
1106
1107 // If weak references are being processed, weak/phantom loads need to go slow,
1108 // regadless of their cset status.
1109 if (_needs_load_ref_weak_barrier) {
1110 Address gc_state(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
1111 __ ldrb(tmp, gc_state);
1112 __ tbnz(tmp, ShenandoahHeap::WEAK_ROOTS_BITPOS, L_slow);
1113 }
1114
1115 // Cset-check. Fall-through to slow if in collection set.
1116 assert(ShenandoahHeapRegion::region_size_bytes_shift_jint() <= 63, "Maximum shift of the add is 63");
1117 __ mov(tmp, ShenandoahHeap::in_cset_fast_test_addr());
1118 __ add(tmp, tmp, obj, Assembler::LSR, ShenandoahHeapRegion::region_size_bytes_shift_jint());
1119 __ ldrb(tmp, Address(tmp, 0));
1120 __ cbz(tmp, L_done);
1121
1122 // Slow path
1123 __ bind(L_slow);
1124 dont_preserve(obj);
1125 {
|