686 }
687
688 // If we could not find a non-live register, select the live fallback:
689 if (tmp == noreg) {
690 tmp = fallback_live;
691 selected_live = true;
692 } else {
693 selected_live = false;
694 }
695
696 assert(tmp != noreg, "successfully selected");
697 assert_different_registers(tmp, reg1);
698 assert_different_registers(tmp, addr.base());
699 assert_different_registers(tmp, addr.index());
700 return tmp;
701 }
702
703 void ShenandoahBarrierStubC2::enter_if_gc_state(MacroAssembler& masm, const char test_state) {
704 Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
705
706 int bit_to_check = ShenandoahThreadLocalData::gc_state_to_fast_bit(test_state);
707 Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_offset()));
708 __ ldrb(rscratch1, gc_state_fast);
709 if (_test_and_branch_reachable) {
710 __ tbnz(rscratch1, bit_to_check, _test_and_branch_reachable_entry);
711 } else {
712 __ tbz(rscratch1, bit_to_check, *continuation());
713 __ b(*entry());
714 }
715 // This is were the slowpath stub will return to or the code above will
716 // jump to if the checks are false
717 __ bind(*continuation());
718 }
719
720 bool needs_acquiring_load_exclusive(const MachNode *n) {
721 assert(n->is_CAS(true), "expecting a compare and swap");
722 if (n->is_CAS(false)) {
723 assert(n->has_trailing_membar(), "expected trailing membar");
724 } else {
725 return n->has_trailing_membar();
726 }
727
728 // so we can just return true here
729 return true;
730 }
731
732 #undef __
733 #define __ masm->
734
735 void ShenandoahBarrierSetAssembler::compare_and_set_c2(const MachNode* node, MacroAssembler* masm, Register res, Register addr,
736 Register oldval, Register newval, bool exchange, bool narrow, bool weak) {
737 bool acquire = needs_acquiring_load_exclusive(node);
860 } else {
861 if (acquire) {
862 __ ldar(dst, src.base());
863 } else {
864 __ ldr(dst, src);
865 }
866 }
867
868 // Post-barrier: LRB / KA / weak-root processing.
869 if (ShenandoahBarrierStubC2::needs_slow_barrier(node)) {
870 ShenandoahBarrierStubC2* const stub = ShenandoahBarrierStubC2::create(node, dst, src, narrow, /* do_load: */ false, __ offset());
871 char check = 0;
872 check |= ShenandoahBarrierStubC2::needs_keep_alive_barrier(node) ? ShenandoahHeap::MARKING : 0;
873 check |= ShenandoahBarrierStubC2::needs_load_ref_barrier(node) ? ShenandoahHeap::HAS_FORWARDED : 0;
874 check |= ShenandoahBarrierStubC2::needs_load_ref_barrier_weak(node) ? ShenandoahHeap::WEAK_ROOTS : 0;
875 stub->enter_if_gc_state(*masm, check);
876 }
877 }
878
879 void ShenandoahBarrierSetAssembler::card_barrier_c2(const MachNode* node, MacroAssembler* masm, Address address) {
880 if (!ShenandoahBarrierStubC2::needs_card_barrier(node)) {
881 return;
882 }
883
884 assert(CardTable::dirty_card_val() == 0, "must be");
885 Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
886
887 // rscratch1 = card table base (holder)
888 Address curr_ct_holder_addr(rthread, in_bytes(ShenandoahThreadLocalData::card_table_offset()));
889 __ ldr(rscratch1, curr_ct_holder_addr);
890
891 // rscratch2 = addr
892 __ lea(rscratch2, address);
893
894 // rscratch2 = &card_table[ addr >> CardTable::card_shift() ]
895 __ add(rscratch2, rscratch1, rscratch2, Assembler::LSR, CardTable::card_shift());
896
897 if (UseCondCardMark) {
898 Label L_already_dirty;
899 __ ldrb(rscratch1, Address(rscratch2));
900 __ cbz(rscratch1, L_already_dirty);
927 PhaseOutput* const output = Compile::current()->output();
928 if (output->in_scratch_emit_size()) {
929 return;
930 }
931
932 // Assume that each trampoline is one single instruction and that the stubs
933 // will follow immediatelly after the _code section. Therefore, we are
934 // checking if the distance between the fastpath branch and the
935 // trampoline/entry of the current Stub is less than 32K.
936 const int code_size = output->buffer_sizing_data()->_code;
937 const int trampoline_offset = trampoline_stubs_count() * NativeInstruction::instruction_size;
938 _test_and_branch_reachable = aarch64_test_and_branch_reachable(_fastpath_branch_offset, code_size + trampoline_offset);
939 if (_test_and_branch_reachable) {
940 inc_trampoline_stubs_count();
941 }
942 }
943
944 void ShenandoahBarrierStubC2::emit_code(MacroAssembler& masm) {
945 // If we reach here with _skip_trampoline set it means that earlier we
946 // emitted a trampoline to this stub and now we need to emit the actual stub.
947 if (_skip_trampoline) {
948 emit_code_actual(masm);
949 } else {
950 _skip_trampoline = true;
951
952 // The fastpath executes two branch instructions to reach this stub, let's
953 // just emit the stub here and not add a third one.
954 if (!_test_and_branch_reachable) {
955 // By registering the stub again, after setting _skip_trampoline, we'll
956 // effectivelly cause the stub to be emitted the next time ::emit_code is
957 // called.
958 ShenandoahBarrierStubC2::register_stub(this);
959 return;
960 }
961
962 // This is entry point when coming from fastpath, IFF it's able to reach here
963 // with a test and branch instruction, otherwise the entry is
964 // ShenandoahBarrierStubC2::entry();
965 const int target_offset = __ offset();
966 __ bind(_test_and_branch_reachable_entry);
967
1059 if (selected_live) {
1060 pop_save_register(masm, _obj);
1061 }
1062
1063 // Go back to fast path
1064 __ b(*continuation());
1065 }
1066
1067 void ShenandoahBarrierStubC2::keepalive(MacroAssembler& masm, Register obj, Register tmp1, Register tmp2) {
1068 Address index(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
1069 Address buffer(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
1070 Label L_runtime;
1071 Label L_done;
1072
1073 // The node doesn't even need keepalive barrier, just don't check anything else
1074 if (!_needs_keep_alive_barrier) {
1075 return ;
1076 }
1077
1078 // If another barrier is enabled as well, do a runtime check for a specific barrier.
1079 if (_needs_load_ref_barrier) {
1080 Address gcs_addr(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
1081 __ ldrb(tmp1, gcs_addr);
1082 __ tbz(tmp1, ShenandoahHeap::MARKING_BITPOS, L_done);
1083 }
1084
1085 // If buffer is full, call into runtime.
1086 __ ldr(tmp1, index);
1087 __ cbz(tmp1, L_runtime);
1088
1089 // The buffer is not full, store value into it.
1090 __ sub(tmp1, tmp1, wordSize);
1091 __ str(tmp1, index);
1092 __ ldr(tmp2, buffer);
1093 __ str(obj, Address(tmp2, tmp1));
1094 __ b(L_done);
1095
1096 // Runtime call
1097 __ bind(L_runtime);
1098
1099 preserve(obj);
1109
1110 // Restore the clobbered registers.
1111 if (clobbered_c_rarg0) {
1112 pop_save_register(masm, c_rarg0);
1113 }
1114 }
1115
1116 __ bind(L_done);
1117 }
1118
1119 void ShenandoahBarrierStubC2::lrb(MacroAssembler& masm, Register obj, Address addr, Register tmp) {
1120 Label L_done;
1121
1122 // The node doesn't even need LRB barrier, just don't check anything else
1123 if (!_needs_load_ref_barrier) {
1124 return ;
1125 }
1126
1127 if ((_node->barrier_data() & ShenandoahBitStrong) != 0) {
1128 // If another barrier is enabled as well, do a runtime check for a specific barrier.
1129 if (_needs_keep_alive_barrier) {
1130 char state_to_check = ShenandoahHeap::HAS_FORWARDED | (_needs_load_ref_weak_barrier ? ShenandoahHeap::WEAK_ROOTS : 0);
1131 int bit_to_check = ShenandoahThreadLocalData::gc_state_to_fast_bit(state_to_check);
1132 Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_offset()));
1133 __ ldrb(tmp, gc_state_fast);
1134 __ tbz(tmp, bit_to_check, L_done);
1135 }
1136
1137 // Weak/phantom loads always need to go to runtime. For strong refs we
1138 // check if the object in cset, if they are not, then we are done with LRB.
1139 assert(ShenandoahHeapRegion::region_size_bytes_shift_jint() <= 63, "Maximum shift of the add is 63");
1140 __ mov(tmp, ShenandoahHeap::in_cset_fast_test_addr());
1141 __ add(tmp, tmp, obj, Assembler::LSR, ShenandoahHeapRegion::region_size_bytes_shift_jint());
1142 __ ldrb(tmp, Address(tmp, 0));
1143 __ cbz(tmp, L_done);
1144 }
1145
1146 dont_preserve(obj);
1147 {
1148 // Shuffle in the arguments. The end result should be:
1149 // c_rarg0 <-- obj
|
686 }
687
688 // If we could not find a non-live register, select the live fallback:
689 if (tmp == noreg) {
690 tmp = fallback_live;
691 selected_live = true;
692 } else {
693 selected_live = false;
694 }
695
696 assert(tmp != noreg, "successfully selected");
697 assert_different_registers(tmp, reg1);
698 assert_different_registers(tmp, addr.base());
699 assert_different_registers(tmp, addr.index());
700 return tmp;
701 }
702
703 void ShenandoahBarrierStubC2::enter_if_gc_state(MacroAssembler& masm, const char test_state) {
704 Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
705
706 if (ShenandoahGCStateCheckRemove) {
707 // Unrealistic: remove all barrier fastpath checks.
708 } else if (ShenandoahGCStateCheckHotpatch) {
709 // Emit the unconditional branch in the first version of the method.
710 // Let the rest of runtime figure out how to manage it.
711 __ relocate(barrier_Relocation::spec());
712 __ b(*entry());
713
714 #ifdef ASSERT
715 Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_offset()));
716 __ ldrb(rscratch1, gc_state_fast);
717 __ cbz(rscratch1, *continuation());
718 __ hlt(0); // Correctness bug: barrier is NOP-ed, but heap is NOT IDLE
719 #endif
720 __ bind(*continuation());
721 } else {
722 int bit_to_check = ShenandoahThreadLocalData::gc_state_to_fast_bit(test_state);
723 Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_offset()));
724 __ ldrb(rscratch1, gc_state_fast);
725 if (_test_and_branch_reachable) {
726 __ tbnz(rscratch1, bit_to_check, _test_and_branch_reachable_entry);
727 } else {
728 __ tbz(rscratch1, bit_to_check, *continuation());
729 __ b(*entry());
730 }
731
732 // This is were the slowpath stub will return to or the code above will
733 // jump to if the checks are false
734 __ bind(*continuation());
735 }
736 }
737
738 address ShenandoahBarrierSetAssembler::parse_stub_address(address pc) {
739 NativeInstruction* ni = nativeInstruction_at(pc);
740 assert(ni->is_jump(), "Initial code version: GC barrier fastpath must be a jump");
741 NativeJump* jmp = nativeJump_at(pc);
742 return jmp->jump_destination();
743 }
744
745 void insert_nop(address pc) {
746 *(pc + 0) = 0x1F;
747 *(pc + 1) = 0x20;
748 *(pc + 2) = 0x03;
749 *(pc + 3) = 0xD5;
750 ICache::invalidate_range(pc, 4);
751 }
752
753 bool is_nop(address pc) {
754 if (*(pc + 0) != 0x1F) return false;
755 if (*(pc + 1) != 0x20) return false;
756 if (*(pc + 2) != 0x03) return false;
757 if (*(pc + 3) != 0xD5) return false;
758 return true;
759 }
760
761 void check_at(bool cond, address pc, const char* msg) {
762 assert(cond, "%s: at PC " PTR_FORMAT ": %02x%02x%02x%02x%02x",
763 msg, p2i(pc), *(pc + 0), *(pc + 1), *(pc + 2), *(pc + 3), *(pc + 4));
764 }
765
766 void ShenandoahBarrierSetAssembler::patch_branch_to_nop(address pc) {
767 NativeInstruction* ni = nativeInstruction_at(pc);
768 if (ni->is_jump()) {
769 insert_nop(pc);
770 } else {
771 check_at(is_nop(pc), pc, "Should already be nop");
772 }
773 }
774
775 void ShenandoahBarrierSetAssembler::patch_nop_to_branch(address pc, address stub_addr) {
776 NativeInstruction* ni = nativeInstruction_at(pc);
777 if (is_nop(pc)) {
778 NativeJump::insert(pc, stub_addr);
779 } else {
780 check_at(ni->is_jump(), pc, "Should already be jump");
781 check_at(nativeJump_at(pc)->jump_destination() == stub_addr, pc, "Jump should be to the same address");
782 }
783 }
784
785 bool needs_acquiring_load_exclusive(const MachNode *n) {
786 assert(n->is_CAS(true), "expecting a compare and swap");
787 if (n->is_CAS(false)) {
788 assert(n->has_trailing_membar(), "expected trailing membar");
789 } else {
790 return n->has_trailing_membar();
791 }
792
793 // so we can just return true here
794 return true;
795 }
796
797 #undef __
798 #define __ masm->
799
800 void ShenandoahBarrierSetAssembler::compare_and_set_c2(const MachNode* node, MacroAssembler* masm, Register res, Register addr,
801 Register oldval, Register newval, bool exchange, bool narrow, bool weak) {
802 bool acquire = needs_acquiring_load_exclusive(node);
925 } else {
926 if (acquire) {
927 __ ldar(dst, src.base());
928 } else {
929 __ ldr(dst, src);
930 }
931 }
932
933 // Post-barrier: LRB / KA / weak-root processing.
934 if (ShenandoahBarrierStubC2::needs_slow_barrier(node)) {
935 ShenandoahBarrierStubC2* const stub = ShenandoahBarrierStubC2::create(node, dst, src, narrow, /* do_load: */ false, __ offset());
936 char check = 0;
937 check |= ShenandoahBarrierStubC2::needs_keep_alive_barrier(node) ? ShenandoahHeap::MARKING : 0;
938 check |= ShenandoahBarrierStubC2::needs_load_ref_barrier(node) ? ShenandoahHeap::HAS_FORWARDED : 0;
939 check |= ShenandoahBarrierStubC2::needs_load_ref_barrier_weak(node) ? ShenandoahHeap::WEAK_ROOTS : 0;
940 stub->enter_if_gc_state(*masm, check);
941 }
942 }
943
944 void ShenandoahBarrierSetAssembler::card_barrier_c2(const MachNode* node, MacroAssembler* masm, Address address) {
945 if (ShenandoahSkipBarriers || (node->barrier_data() & ShenandoahBitCardMark) == 0) {
946 return;
947 }
948
949 assert(CardTable::dirty_card_val() == 0, "must be");
950 Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
951
952 // rscratch1 = card table base (holder)
953 Address curr_ct_holder_addr(rthread, in_bytes(ShenandoahThreadLocalData::card_table_offset()));
954 __ ldr(rscratch1, curr_ct_holder_addr);
955
956 // rscratch2 = addr
957 __ lea(rscratch2, address);
958
959 // rscratch2 = &card_table[ addr >> CardTable::card_shift() ]
960 __ add(rscratch2, rscratch1, rscratch2, Assembler::LSR, CardTable::card_shift());
961
962 if (UseCondCardMark) {
963 Label L_already_dirty;
964 __ ldrb(rscratch1, Address(rscratch2));
965 __ cbz(rscratch1, L_already_dirty);
992 PhaseOutput* const output = Compile::current()->output();
993 if (output->in_scratch_emit_size()) {
994 return;
995 }
996
997 // Assume that each trampoline is one single instruction and that the stubs
998 // will follow immediatelly after the _code section. Therefore, we are
999 // checking if the distance between the fastpath branch and the
1000 // trampoline/entry of the current Stub is less than 32K.
1001 const int code_size = output->buffer_sizing_data()->_code;
1002 const int trampoline_offset = trampoline_stubs_count() * NativeInstruction::instruction_size;
1003 _test_and_branch_reachable = aarch64_test_and_branch_reachable(_fastpath_branch_offset, code_size + trampoline_offset);
1004 if (_test_and_branch_reachable) {
1005 inc_trampoline_stubs_count();
1006 }
1007 }
1008
1009 void ShenandoahBarrierStubC2::emit_code(MacroAssembler& masm) {
1010 // If we reach here with _skip_trampoline set it means that earlier we
1011 // emitted a trampoline to this stub and now we need to emit the actual stub.
1012 if (ShenandoahGCStateCheckHotpatch || _skip_trampoline) {
1013 emit_code_actual(masm);
1014 } else {
1015 _skip_trampoline = true;
1016
1017 // The fastpath executes two branch instructions to reach this stub, let's
1018 // just emit the stub here and not add a third one.
1019 if (!_test_and_branch_reachable) {
1020 // By registering the stub again, after setting _skip_trampoline, we'll
1021 // effectivelly cause the stub to be emitted the next time ::emit_code is
1022 // called.
1023 ShenandoahBarrierStubC2::register_stub(this);
1024 return;
1025 }
1026
1027 // This is entry point when coming from fastpath, IFF it's able to reach here
1028 // with a test and branch instruction, otherwise the entry is
1029 // ShenandoahBarrierStubC2::entry();
1030 const int target_offset = __ offset();
1031 __ bind(_test_and_branch_reachable_entry);
1032
1124 if (selected_live) {
1125 pop_save_register(masm, _obj);
1126 }
1127
1128 // Go back to fast path
1129 __ b(*continuation());
1130 }
1131
1132 void ShenandoahBarrierStubC2::keepalive(MacroAssembler& masm, Register obj, Register tmp1, Register tmp2) {
1133 Address index(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
1134 Address buffer(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
1135 Label L_runtime;
1136 Label L_done;
1137
1138 // The node doesn't even need keepalive barrier, just don't check anything else
1139 if (!_needs_keep_alive_barrier) {
1140 return ;
1141 }
1142
1143 // If another barrier is enabled as well, do a runtime check for a specific barrier.
1144 // Hotpatched GC checks only care about idle/non-idle state, so needs a check anyhow.
1145 if (_needs_load_ref_barrier || ShenandoahGCStateCheckHotpatch) {
1146 Address gcs_addr(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
1147 __ ldrb(tmp1, gcs_addr);
1148 __ tbz(tmp1, ShenandoahHeap::MARKING_BITPOS, L_done);
1149 }
1150
1151 // If buffer is full, call into runtime.
1152 __ ldr(tmp1, index);
1153 __ cbz(tmp1, L_runtime);
1154
1155 // The buffer is not full, store value into it.
1156 __ sub(tmp1, tmp1, wordSize);
1157 __ str(tmp1, index);
1158 __ ldr(tmp2, buffer);
1159 __ str(obj, Address(tmp2, tmp1));
1160 __ b(L_done);
1161
1162 // Runtime call
1163 __ bind(L_runtime);
1164
1165 preserve(obj);
1175
1176 // Restore the clobbered registers.
1177 if (clobbered_c_rarg0) {
1178 pop_save_register(masm, c_rarg0);
1179 }
1180 }
1181
1182 __ bind(L_done);
1183 }
1184
1185 void ShenandoahBarrierStubC2::lrb(MacroAssembler& masm, Register obj, Address addr, Register tmp) {
1186 Label L_done;
1187
1188 // The node doesn't even need LRB barrier, just don't check anything else
1189 if (!_needs_load_ref_barrier) {
1190 return ;
1191 }
1192
1193 if ((_node->barrier_data() & ShenandoahBitStrong) != 0) {
1194 // If another barrier is enabled as well, do a runtime check for a specific barrier.
1195 // Hotpatched GC checks only care about idle/non-idle state, so needs a check anyhow.
1196 if (_needs_keep_alive_barrier || ShenandoahGCStateCheckHotpatch) {
1197 char state_to_check = ShenandoahHeap::HAS_FORWARDED | (_needs_load_ref_weak_barrier ? ShenandoahHeap::WEAK_ROOTS : 0);
1198 int bit_to_check = ShenandoahThreadLocalData::gc_state_to_fast_bit(state_to_check);
1199 Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_offset()));
1200 __ ldrb(tmp, gc_state_fast);
1201 __ tbz(tmp, bit_to_check, L_done);
1202 }
1203
1204 // Weak/phantom loads always need to go to runtime. For strong refs we
1205 // check if the object in cset, if they are not, then we are done with LRB.
1206 assert(ShenandoahHeapRegion::region_size_bytes_shift_jint() <= 63, "Maximum shift of the add is 63");
1207 __ mov(tmp, ShenandoahHeap::in_cset_fast_test_addr());
1208 __ add(tmp, tmp, obj, Assembler::LSR, ShenandoahHeapRegion::region_size_bytes_shift_jint());
1209 __ ldrb(tmp, Address(tmp, 0));
1210 __ cbz(tmp, L_done);
1211 }
1212
1213 dont_preserve(obj);
1214 {
1215 // Shuffle in the arguments. The end result should be:
1216 // c_rarg0 <-- obj
|