13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 *
25 */
26
27 #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
28 #include "gc/shenandoah/mode/shenandoahMode.hpp"
29 #include "gc/shenandoah/shenandoahBarrierSet.hpp"
30 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
31 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
32 #include "gc/shenandoah/shenandoahHeapRegion.hpp"
33 #include "gc/shenandoah/shenandoahRuntime.hpp"
34 #include "gc/shenandoah/shenandoahThreadLocalData.hpp"
35 #include "interpreter/interp_masm.hpp"
36 #include "interpreter/interpreter.hpp"
37 #include "runtime/javaThread.hpp"
38 #include "runtime/sharedRuntime.hpp"
39 #ifdef COMPILER1
40 #include "c1/c1_LIRAssembler.hpp"
41 #include "c1/c1_MacroAssembler.hpp"
42 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
43 #endif
44 #ifdef COMPILER2
45 #include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp"
46 #include "opto/output.hpp"
47 #endif
48
49 #define __ masm->
50
51 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
52 Register src, Register dst, Register count, RegSet saved_regs) {
53 if (is_oop) {
54 bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
55 if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahLoadRefBarrier) {
56
749 // tmp2 = effective address
750 __ lea(tmp2, address);
751
752 // tmp2 = &card_table[ addr >> CardTable::card_shift() ] ; card index
753 __ add(tmp2, tmp1, tmp2, Assembler::LSR, CardTable::card_shift());
754
755 if (UseCondCardMark) {
756 Label L_already_dirty;
757 __ ldrb(tmp1, Address(tmp2));
758 __ cbz(tmp1, L_already_dirty);
759 __ strb(zr, Address(tmp2));
760 __ bind(L_already_dirty);
761 } else {
762 __ strb(zr, Address(tmp2));
763 }
764 }
765
766 void ShenandoahBarrierStubC2::enter_if_gc_state(MacroAssembler& masm, const char test_state, Register tmp) {
767 Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
768 PhaseOutput* const output = Compile::current()->output();
769 Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(test_state)));
770
771 // We piggyback on scratch_emit_size mode to compute the slowpath stub size.
772 // We'll use that information to decide whether we need a far jump to the
773 // stub entry point or not. In scratch_emit_size mode we don't bind entry()
774 // because otherwise it will be rebound when we later emit the instructions
775 // for real.
776 if (_needs_far_jump) {
777 __ ldrb(tmp, gc_state_fast);
778 __ cbz(tmp, *continuation());
779 __ b(output->in_scratch_emit_size() ? *continuation() : *entry());
780 } else {
781 __ ldrb(tmp, gc_state_fast);
782 __ cbnz(tmp, output->in_scratch_emit_size() ? *continuation() : *entry());
783 }
784
785 // This is were the slowpath stub will return to or the code above will
786 // jump to if the checks are false
787 __ bind(*continuation());
788 }
789
790 void ShenandoahBarrierStubC2::emit_code(MacroAssembler& masm) {
791 Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
792 assert(_needs_keep_alive_barrier || _needs_load_ref_barrier, "Why are you here?");
793 PhaseOutput* const output = Compile::current()->output();
794
795 // We piggyback on scratch_emit_size mode to compute the slowpath stub size.
796 // We'll use that information to decide whether we need a far jump to the
797 // stub entry point or not. In scratch_emit_size mode we don't bind entry()
798 // because otherwise it will be rebound when we later emit the instructions
799 // for real.
800 if (!output->in_scratch_emit_size()) {
801 __ bind(*entry());
802 }
803
804 // If we need to load ourselves, do it here.
805 if (_do_load) {
806 if (_narrow) {
807 __ ldrw(_obj, _addr);
808 } else {
809 __ ldr(_obj, _addr);
840 keepalive(masm, continuation());
841 } else if (_needs_load_ref_barrier) {
842 lrb(masm);
843 } else {
844 ShouldNotReachHere();
845 }
846 }
847
848 void ShenandoahBarrierStubC2::maybe_far_jump_if_zero(MacroAssembler& masm, Register reg) {
849 if (_needs_far_jump) {
850 Label L_short_jump;
851 __ cbnz(reg, L_short_jump);
852 __ b(*continuation());
853 __ bind(L_short_jump);
854 } else {
855 __ cbz(reg, *continuation());
856 }
857 }
858
859 void ShenandoahBarrierStubC2::keepalive(MacroAssembler& masm, Label* L_done) {
860 Address gcstate(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::MARKING)));
861 Address index(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
862 Address buffer(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
863 Label L_through, L_slowpath;
864
865 // If another barrier is enabled as well, do a runtime check for a specific barrier.
866 if (_needs_load_ref_barrier) {
867 assert(L_done == nullptr, "L_done is always null when _needs_load_ref_barrier is true");
868 __ ldrb(_tmp1, gcstate);
869 __ cbz(_tmp1, L_through);
870 }
871
872 // Fast-path: put object into buffer.
873 // If buffer is already full, go slow.
874 __ ldr(_tmp1, index);
875 __ cbz(_tmp1, L_slowpath);
876 __ sub(_tmp1, _tmp1, wordSize);
877 __ str(_tmp1, index);
878 __ ldr(_tmp2, buffer);
879
880 // Store the object in queue.
881 // If object is narrow, we need to decode it before inserting.
882 if (_narrow) {
883 __ add(_tmp2, _tmp2, _tmp1);
884 __ decode_heap_oop_not_null(_tmp1, _obj);
885 __ str(_tmp1, Address(_tmp2));
886 } else {
887 // Buffer is 64-bit address, must be in base register.
888 __ str(_obj, Address(_tmp2, _tmp1));
889 }
899 __ bind(L_slowpath);
900
901 {
902 SaveLiveRegisters slr(&masm, this);
903
904 // Go to runtime and handle the rest there.
905 __ mov(c_rarg0, _obj);
906 __ lea(lr, RuntimeAddress(keepalive_runtime_entry_addr()));
907 __ blr(lr);
908 }
909 if (L_done != nullptr) {
910 __ b(*L_done);
911 } else {
912 __ bind(L_through);
913 }
914 }
915
916 void ShenandoahBarrierStubC2::lrb(MacroAssembler& masm) {
917 Label L_slow;
918
919 // If another barrier is enabled as well, do a runtime check for a specific barrier.
920 if (_needs_keep_alive_barrier) {
921 char state_to_check = ShenandoahHeap::HAS_FORWARDED | (_needs_load_ref_weak_barrier ? ShenandoahHeap::WEAK_ROOTS : 0);
922 Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(state_to_check)));
923 __ ldrb(_tmp1, gc_state_fast);
924 maybe_far_jump_if_zero(masm, _tmp1);
925 }
926
927 // If weak references are being processed, weak/phantom loads need to go slow,
928 // regardless of their cset status.
929 if (_needs_load_ref_weak_barrier) {
930 Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::WEAK_ROOTS)));
931 __ ldrb(_tmp1, gc_state_fast);
932 __ cbnz(_tmp1, L_slow);
933 }
934
935 // Cset-check. Fall-through to slow if in collection set.
936 bool is_aot = AOTCodeCache::is_on_for_dump();
937 if (!is_aot) {
938 __ mov(_tmp1, ShenandoahHeap::in_cset_fast_test_addr());
939 if (_narrow) {
940 __ decode_heap_oop_not_null(_tmp2, _obj);
941 __ add(_tmp1, _tmp1, _tmp2, Assembler::LSR, ShenandoahHeapRegion::region_size_bytes_shift_jint());
942 } else {
943 __ add(_tmp1, _tmp1, _obj, Assembler::LSR, ShenandoahHeapRegion::region_size_bytes_shift_jint());
944 }
945 } else {
946 // Generating AOT code, pull the cset bitmap and region shift from AOT table.
947 if (_narrow) {
948 __ decode_heap_oop_not_null(_tmp1, _obj);
949 } else {
950 __ mov(_tmp1, _obj);
951 }
952 __ lea(_tmp2, ExternalAddress(AOTRuntimeConstants::grain_shift_address()));
|
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 *
25 */
26
27 #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
28 #include "gc/shenandoah/mode/shenandoahMode.hpp"
29 #include "gc/shenandoah/shenandoahBarrierSet.hpp"
30 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
31 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
32 #include "gc/shenandoah/shenandoahHeapRegion.hpp"
33 #include "gc/shenandoah/shenandoahNMethod.inline.hpp"
34 #include "gc/shenandoah/shenandoahRuntime.hpp"
35 #include "gc/shenandoah/shenandoahThreadLocalData.hpp"
36 #include "interpreter/interp_masm.hpp"
37 #include "interpreter/interpreter.hpp"
38 #include "nativeInst_aarch64.hpp"
39 #include "runtime/javaThread.hpp"
40 #include "runtime/sharedRuntime.hpp"
41 #ifdef COMPILER1
42 #include "c1/c1_LIRAssembler.hpp"
43 #include "c1/c1_MacroAssembler.hpp"
44 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
45 #endif
46 #ifdef COMPILER2
47 #include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp"
48 #include "opto/output.hpp"
49 #endif
50
51 #define __ masm->
52
53 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
54 Register src, Register dst, Register count, RegSet saved_regs) {
55 if (is_oop) {
56 bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
57 if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahLoadRefBarrier) {
58
751 // tmp2 = effective address
752 __ lea(tmp2, address);
753
754 // tmp2 = &card_table[ addr >> CardTable::card_shift() ] ; card index
755 __ add(tmp2, tmp1, tmp2, Assembler::LSR, CardTable::card_shift());
756
757 if (UseCondCardMark) {
758 Label L_already_dirty;
759 __ ldrb(tmp1, Address(tmp2));
760 __ cbz(tmp1, L_already_dirty);
761 __ strb(zr, Address(tmp2));
762 __ bind(L_already_dirty);
763 } else {
764 __ strb(zr, Address(tmp2));
765 }
766 }
767
768 void ShenandoahBarrierStubC2::enter_if_gc_state(MacroAssembler& masm, const char test_state, Register tmp) {
769 Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
770 PhaseOutput* const output = Compile::current()->output();
771
772 // Emit the unconditional branch in the first version of the method.
773 // Let the rest of runtime figure out how to manage it.
774 if (output->in_scratch_emit_size()) {
775 // We piggyback on scratch_emit_size mode to compute the slowpath stub size.
776 // We'll use that information to decide whether we need a far jump to the
777 // stub entry point or not. In scratch_emit_size mode we don't bind entry()
778 // because otherwise it will be rebound when we later emit the instructions
779 // for real.
780 __ nop();
781 } else {
782 __ relocate(barrier_Relocation::spec(), ShenandoahNMethod::gc_state_to_reloc(test_state));
783 __ b(*entry());
784 }
785
786 // This is were the slowpath stub will return to or the code above will
787 // jump to if the checks are false
788 __ bind(*continuation());
789 }
790
791 address ShenandoahBarrierSetAssembler::parse_stub_address(address pc) {
792 NativeInstruction* ni = nativeInstruction_at(pc);
793 assert(ni->is_jump(), "Initial code version: GC barrier fastpath must be a jump");
794 NativeJump* jmp = nativeJump_at(pc);
795 return jmp->jump_destination();
796 }
797
798 static bool is_nop(address pc) {
799 if (*(pc + 0) != 0x1F) return false;
800 if (*(pc + 1) != 0x20) return false;
801 if (*(pc + 2) != 0x03) return false;
802 if (*(pc + 3) != 0xD5) return false;
803 return true;
804 }
805
806 static void insert_nop(address pc) {
807 *reinterpret_cast<int32_t*>(pc) = 0xD503201F;
808 assert(is_nop(pc), "Should be");
809 ICache::invalidate_range(pc, 4);
810 }
811
812 static void check_at(bool cond, address pc, const char* msg) {
813 assert(cond, "%s: at PC " PTR_FORMAT ": %02x%02x%02x%02x",
814 msg, p2i(pc), *(pc + 0), *(pc + 1), *(pc + 2), *(pc + 3));
815 }
816
817 bool ShenandoahBarrierSetAssembler::is_active(address pc) {
818 NativeInstruction* ni = nativeInstruction_at(pc);
819 return ni->is_jump();
820 }
821
822 void ShenandoahBarrierSetAssembler::patch_branch_to_nop(address pc) {
823 NativeInstruction* ni = nativeInstruction_at(pc);
824 if (ni->is_jump()) {
825 insert_nop(pc);
826 } else {
827 check_at(is_nop(pc), pc, "Should already be nop");
828 }
829 }
830
831 void ShenandoahBarrierSetAssembler::patch_nop_to_branch(address pc, address stub_addr) {
832 NativeInstruction* ni = nativeInstruction_at(pc);
833 if (is_nop(pc)) {
834 NativeJump::insert(pc, stub_addr);
835 } else {
836 check_at(ni->is_jump(), pc, "Should already be jump");
837 check_at(nativeJump_at(pc)->jump_destination() == stub_addr, pc, "Jump should be to the same address");
838 }
839 }
840
841 void ShenandoahBarrierStubC2::emit_code(MacroAssembler& masm) {
842 Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
843 assert(_needs_keep_alive_barrier || _needs_load_ref_barrier, "Why are you here?");
844 PhaseOutput* const output = Compile::current()->output();
845
846 // We piggyback on scratch_emit_size mode to compute the slowpath stub size.
847 // We'll use that information to decide whether we need a far jump to the
848 // stub entry point or not. In scratch_emit_size mode we don't bind entry()
849 // because otherwise it will be rebound when we later emit the instructions
850 // for real.
851 if (!output->in_scratch_emit_size()) {
852 __ bind(*entry());
853 }
854
855 // If we need to load ourselves, do it here.
856 if (_do_load) {
857 if (_narrow) {
858 __ ldrw(_obj, _addr);
859 } else {
860 __ ldr(_obj, _addr);
891 keepalive(masm, continuation());
892 } else if (_needs_load_ref_barrier) {
893 lrb(masm);
894 } else {
895 ShouldNotReachHere();
896 }
897 }
898
899 void ShenandoahBarrierStubC2::maybe_far_jump_if_zero(MacroAssembler& masm, Register reg) {
900 if (_needs_far_jump) {
901 Label L_short_jump;
902 __ cbnz(reg, L_short_jump);
903 __ b(*continuation());
904 __ bind(L_short_jump);
905 } else {
906 __ cbz(reg, *continuation());
907 }
908 }
909
910 void ShenandoahBarrierStubC2::keepalive(MacroAssembler& masm, Label* L_done) {
911 Address index(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
912 Address buffer(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
913 Label L_through, L_slowpath;
914
915 // If another barrier is enabled as well, do a check for a specific barrier.
916 if (_needs_load_ref_barrier) {
917 assert(L_done == nullptr, "Should be");
918 // Emit the unconditional branch in the first version of the method.
919 // Let the rest of runtime figure out how to manage it.
920 // TODO: We could have spared the over-jump if patching knew we need the inverse branch.
921 char state_to_check = ShenandoahHeap::MARKING;
922 Label L_over;
923 __ relocate(barrier_Relocation::spec(), ShenandoahNMethod::gc_state_to_reloc(state_to_check));
924 __ b(L_over);
925 __ b(L_through);
926 __ bind(L_over);
927 }
928
929 // Fast-path: put object into buffer.
930 // If buffer is already full, go slow.
931 __ ldr(_tmp1, index);
932 __ cbz(_tmp1, L_slowpath);
933 __ sub(_tmp1, _tmp1, wordSize);
934 __ str(_tmp1, index);
935 __ ldr(_tmp2, buffer);
936
937 // Store the object in queue.
938 // If object is narrow, we need to decode it before inserting.
939 if (_narrow) {
940 __ add(_tmp2, _tmp2, _tmp1);
941 __ decode_heap_oop_not_null(_tmp1, _obj);
942 __ str(_tmp1, Address(_tmp2));
943 } else {
944 // Buffer is 64-bit address, must be in base register.
945 __ str(_obj, Address(_tmp2, _tmp1));
946 }
956 __ bind(L_slowpath);
957
958 {
959 SaveLiveRegisters slr(&masm, this);
960
961 // Go to runtime and handle the rest there.
962 __ mov(c_rarg0, _obj);
963 __ lea(lr, RuntimeAddress(keepalive_runtime_entry_addr()));
964 __ blr(lr);
965 }
966 if (L_done != nullptr) {
967 __ b(*L_done);
968 } else {
969 __ bind(L_through);
970 }
971 }
972
973 void ShenandoahBarrierStubC2::lrb(MacroAssembler& masm) {
974 Label L_slow;
975
976 // If weak references are being processed, weak/phantom loads need to go slow,
977 // regardless of their cset status.
978 if (_needs_load_ref_weak_barrier) {
979 char state_to_check = ShenandoahHeap::WEAK_ROOTS;
980 __ relocate(barrier_Relocation::spec(), ShenandoahNMethod::gc_state_to_reloc(state_to_check));
981 __ b(L_slow);
982 }
983
984 if (_needs_keep_alive_barrier) {
985 // Emit the unconditional branch in the first version of the method.
986 // Let the rest of runtime figure out how to manage it.
987 // TODO: We could have spared the over-jump if patching knew we need the inverse branch.
988 char state_to_check = ShenandoahHeap::HAS_FORWARDED | (_needs_load_ref_weak_barrier ? ShenandoahHeap::WEAK_ROOTS : 0);
989 Label L_over;
990 __ relocate(barrier_Relocation::spec(), ShenandoahNMethod::gc_state_to_reloc(state_to_check));
991 __ b(L_over);
992 __ b(*continuation());
993 __ bind(L_over);
994 }
995
996 // Cset-check. Fall-through to slow if in collection set.
997 bool is_aot = AOTCodeCache::is_on_for_dump();
998 if (!is_aot) {
999 __ mov(_tmp1, ShenandoahHeap::in_cset_fast_test_addr());
1000 if (_narrow) {
1001 __ decode_heap_oop_not_null(_tmp2, _obj);
1002 __ add(_tmp1, _tmp1, _tmp2, Assembler::LSR, ShenandoahHeapRegion::region_size_bytes_shift_jint());
1003 } else {
1004 __ add(_tmp1, _tmp1, _obj, Assembler::LSR, ShenandoahHeapRegion::region_size_bytes_shift_jint());
1005 }
1006 } else {
1007 // Generating AOT code, pull the cset bitmap and region shift from AOT table.
1008 if (_narrow) {
1009 __ decode_heap_oop_not_null(_tmp1, _obj);
1010 } else {
1011 __ mov(_tmp1, _obj);
1012 }
1013 __ lea(_tmp2, ExternalAddress(AOTRuntimeConstants::grain_shift_address()));
|