25 */
26
27 #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
28 #include "gc/shenandoah/mode/shenandoahMode.hpp"
29 #include "gc/shenandoah/shenandoahBarrierSet.hpp"
30 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
31 #include "gc/shenandoah/shenandoahForwarding.hpp"
32 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
33 #include "gc/shenandoah/shenandoahHeapRegion.hpp"
34 #include "gc/shenandoah/shenandoahRuntime.hpp"
35 #include "gc/shenandoah/shenandoahThreadLocalData.hpp"
36 #include "interpreter/interp_masm.hpp"
37 #include "interpreter/interpreter.hpp"
38 #include "runtime/javaThread.hpp"
39 #include "runtime/sharedRuntime.hpp"
40 #ifdef COMPILER1
41 #include "c1/c1_LIRAssembler.hpp"
42 #include "c1/c1_MacroAssembler.hpp"
43 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
44 #endif
45
46 #define __ masm->
47
48 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
49 Register src, Register dst, Register count, RegSet saved_regs) {
50 if (is_oop) {
51 bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
52 if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahLoadRefBarrier) {
53
54 Label done;
55
56 // Avoid calling runtime if count == 0
57 __ cbz(count, done);
58
59 // Is GC active?
60 Address gc_state(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
61 __ ldrb(rscratch1, gc_state);
62 if (ShenandoahSATBBarrier && dest_uninitialized) {
63 __ tbz(rscratch1, ShenandoahHeap::HAS_FORWARDED_BITPOS, done);
64 } else {
610 if (is_cae) {
611 // We're falling through to done to indicate success. Success
612 // with is_cae is denoted by returning the value of expected as
613 // result.
614 __ mov(tmp2, expected);
615 }
616
617 __ bind(done);
618 // At entry to done, the Z (EQ) flag is on iff if the CAS
619 // operation was successful. Additionally, if is_cae, tmp2 holds
620 // the value most recently fetched from addr. In this case, success
621 // is denoted by tmp2 matching expected.
622
623 if (is_cae) {
624 __ mov(result, tmp2);
625 } else {
626 __ cset(result, Assembler::EQ);
627 }
628 }
629
630 void ShenandoahBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
631 Register start, Register count, Register scratch) {
632 assert(ShenandoahCardBarrier, "Should have been checked by caller");
633
634 Label L_loop, L_done;
635 const Register end = count;
636
637 // Zero count? Nothing to do.
638 __ cbz(count, L_done);
639
640 // end = start + count << LogBytesPerHeapOop
641 // last element address to make inclusive
642 __ lea(end, Address(start, count, Address::lsl(LogBytesPerHeapOop)));
643 __ sub(end, end, BytesPerHeapOop);
644 __ lsr(start, start, CardTable::card_shift());
645 __ lsr(end, end, CardTable::card_shift());
646
647 // number of bytes to copy
648 __ sub(count, end, start);
649
|
25 */
26
27 #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
28 #include "gc/shenandoah/mode/shenandoahMode.hpp"
29 #include "gc/shenandoah/shenandoahBarrierSet.hpp"
30 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
31 #include "gc/shenandoah/shenandoahForwarding.hpp"
32 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
33 #include "gc/shenandoah/shenandoahHeapRegion.hpp"
34 #include "gc/shenandoah/shenandoahRuntime.hpp"
35 #include "gc/shenandoah/shenandoahThreadLocalData.hpp"
36 #include "interpreter/interp_masm.hpp"
37 #include "interpreter/interpreter.hpp"
38 #include "runtime/javaThread.hpp"
39 #include "runtime/sharedRuntime.hpp"
40 #ifdef COMPILER1
41 #include "c1/c1_LIRAssembler.hpp"
42 #include "c1/c1_MacroAssembler.hpp"
43 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
44 #endif
45 #ifdef COMPILER2
46 #include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp"
47 #include "opto/output.hpp"
48 #endif
49
50 #define __ masm->
51
52 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
53 Register src, Register dst, Register count, RegSet saved_regs) {
54 if (is_oop) {
55 bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
56 if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahLoadRefBarrier) {
57
58 Label done;
59
60 // Avoid calling runtime if count == 0
61 __ cbz(count, done);
62
63 // Is GC active?
64 Address gc_state(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
65 __ ldrb(rscratch1, gc_state);
66 if (ShenandoahSATBBarrier && dest_uninitialized) {
67 __ tbz(rscratch1, ShenandoahHeap::HAS_FORWARDED_BITPOS, done);
68 } else {
614 if (is_cae) {
615 // We're falling through to done to indicate success. Success
616 // with is_cae is denoted by returning the value of expected as
617 // result.
618 __ mov(tmp2, expected);
619 }
620
621 __ bind(done);
622 // At entry to done, the Z (EQ) flag is on iff if the CAS
623 // operation was successful. Additionally, if is_cae, tmp2 holds
624 // the value most recently fetched from addr. In this case, success
625 // is denoted by tmp2 matching expected.
626
627 if (is_cae) {
628 __ mov(result, tmp2);
629 } else {
630 __ cset(result, Assembler::EQ);
631 }
632 }
633
634 #ifdef COMPILER2
635 #undef __
636 #define __ masm.
637
638 bool ShenandoahBarrierStubC2::push_save_register_if_live(MacroAssembler& masm, Register reg) {
639 if (is_live(reg)) {
640 push_save_register(masm, reg);
641 return true;
642 } else {
643 return false;
644 }
645 }
646
647 void ShenandoahBarrierStubC2::push_save_register(MacroAssembler& masm, Register reg) {
648 __ str(reg, Address(sp, push_save_slot()));
649 }
650
651 void ShenandoahBarrierStubC2::pop_save_register(MacroAssembler& masm, Register reg) {
652 __ ldr(reg, Address(sp, pop_save_slot()));
653 }
654
655 bool ShenandoahBarrierStubC2::has_live_vector_registers() {
656 RegMaskIterator rmi(preserve_set());
657 while (rmi.has_next()) {
658 const OptoReg::Name opto_reg = rmi.next();
659 const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
660 if (vm_reg->is_Register()) {
661 // Not a vector
662 } else if (vm_reg->is_FloatRegister()) {
663 // Maybe vector, assume the worst right now
664 return true;
665 } else if (vm_reg->is_PRegister()) {
666 // Vector-related register
667 return true;
668 } else {
669 fatal("Unexpected register type");
670 }
671 }
672 return false;
673 }
674
675 bool ShenandoahBarrierStubC2::is_live(Register reg) {
676 // TODO: Precompute the generic register map for faster lookups.
677 RegMaskIterator rmi(preserve_set());
678 while (rmi.has_next()) {
679 const OptoReg::Name opto_reg = rmi.next();
680 const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
681 if (vm_reg->is_Register() && reg == vm_reg->as_Register()) {
682 return true;
683 }
684 }
685 return false;
686 }
687
688 Register ShenandoahBarrierStubC2::select_temp_register(bool& selected_live, Address addr, Register reg1) {
689 Register tmp = noreg;
690 Register fallback_live = noreg;
691
692 // Try to select non-live first:
693 for (int i = 0; i < Register::number_of_registers; i++) {
694 Register r = as_Register(i);
695 if (r != rfp && r != sp && r != lr &&
696 r != rheapbase && r != rthread &&
697 r != rscratch1 && r != rscratch2 &&
698 r != reg1 && r != addr.base() && r != addr.index()) {
699 if (!is_live(r)) {
700 tmp = r;
701 break;
702 } else if (fallback_live == noreg) {
703 fallback_live = r;
704 }
705 }
706 }
707
708 // If we could not find a non-live register, select the live fallback:
709 if (tmp == noreg) {
710 tmp = fallback_live;
711 selected_live = true;
712 } else {
713 selected_live = false;
714 }
715
716 assert(tmp != noreg, "successfully selected");
717 assert_different_registers(tmp, reg1);
718 assert_different_registers(tmp, addr.base());
719 assert_different_registers(tmp, addr.index());
720 return tmp;
721 }
722
723 void ShenandoahBarrierStubC2::enter_if_gc_state(MacroAssembler& masm, const char test_state) {
724 Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
725
726 int bit_to_check = ShenandoahThreadLocalData::gc_state_to_fast_bit(test_state);
727 Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_offset()));
728 __ ldrb(rscratch1, gc_state_fast);
729 if (_use_trampoline) {
730 __ tbnz(rscratch1, bit_to_check, _trampoline_entry);
731 } else {
732 __ tbz(rscratch1, bit_to_check, *continuation());
733 __ b(*entry());
734 }
735 // This is were the slowpath stub will return to or the code above will
736 // jump to if the checks are false
737 __ bind(*continuation());
738 }
739
740 #undef __
741 #define __ masm->
742
743 void ShenandoahBarrierSetAssembler::compare_and_set_c2(const MachNode* node, MacroAssembler* masm, Register res, Register addr,
744 Register oldval, Register newval, bool exchange, bool narrow, bool weak, bool acquire) {
745 Assembler::operand_size op_size = narrow ? Assembler::word : Assembler::xword;
746
747 // Pre-barrier covers several things:
748 // a. Avoids false positives from CAS encountering to-space memory values.
749 // b. Satisfies the need for LRB for the CAE result.
750 // c. Records old value for the sake of SATB.
751 //
752 // (a) and (b) are covered because load barrier does memory location fixup.
753 // (c) is covered by KA on the current memory value.
754 if (ShenandoahBarrierStubC2::needs_slow_barrier(node)) {
755 ShenandoahBarrierStubC2* const stub = ShenandoahBarrierStubC2::create(node, noreg, addr, narrow, /* do_load: */ true, __ offset());
756 char check = 0;
757 check |= ShenandoahBarrierStubC2::needs_keep_alive_barrier(node) ? ShenandoahHeap::MARKING : 0;
758 check |= ShenandoahBarrierStubC2::needs_load_ref_barrier(node) ? ShenandoahHeap::HAS_FORWARDED : 0;
759 assert(!ShenandoahBarrierStubC2::needs_load_ref_barrier_weak(node), "Not supported for CAS");
760 stub->enter_if_gc_state(*masm, check);
761 }
762
763 // CAS!
764 __ cmpxchg(addr, oldval, newval, op_size, acquire, /* release */ true, weak, exchange ? res : noreg);
765
766 // If we need a boolean result out of CAS, set the flag appropriately and promote the result.
767 if (!exchange) {
768 assert(res != noreg, "need result register");
769 __ cset(res, Assembler::EQ);
770 }
771
772 // Post-barrier deals with card updates.
773 card_barrier_c2(node, masm, Address(addr, 0));
774 }
775
776 void ShenandoahBarrierSetAssembler::get_and_set_c2(const MachNode* node, MacroAssembler* masm, Register preval,
777 Register newval, Register addr, bool acquire) {
778 bool narrow = node->bottom_type()->isa_narrowoop();
779
780 // Pre-barrier covers several things:
781 // a. Satisfies the need for LRB for the GAS result.
782 // b. Records old value for the sake of SATB.
783 //
784 // (a) is covered because load barrier does memory location fixup.
785 // (b) is covered by KA on the current memory value.
786 if (ShenandoahBarrierStubC2::needs_slow_barrier(node)) {
787 ShenandoahBarrierStubC2* const stub = ShenandoahBarrierStubC2::create(node, noreg, addr, narrow, /* do_load: */ true, __ offset());
788 char check = 0;
789 check |= ShenandoahBarrierStubC2::needs_keep_alive_barrier(node) ? ShenandoahHeap::MARKING : 0;
790 check |= ShenandoahBarrierStubC2::needs_load_ref_barrier(node) ? ShenandoahHeap::HAS_FORWARDED : 0;
791 assert(!ShenandoahBarrierStubC2::needs_load_ref_barrier_weak(node), "Not supported for GAS");
792 stub->enter_if_gc_state(*masm, check);
793 }
794
795 if (narrow) {
796 if (acquire) {
797 __ atomic_xchgalw(preval, newval, addr);
798 } else {
799 __ atomic_xchgw(preval, newval, addr);
800 }
801 } else {
802 if (acquire) {
803 __ atomic_xchgal(preval, newval, addr);
804 } else {
805 __ atomic_xchg(preval, newval, addr);
806 }
807 }
808
809 // Post-barrier deals with card updates.
810 card_barrier_c2(node, masm, Address(addr, 0));
811 }
812
813 void ShenandoahBarrierSetAssembler::store_c2(const MachNode* node, MacroAssembler* masm, Address dst, bool dst_narrow,
814 Register src, bool src_narrow, bool is_volatile) {
815
816 // Pre-barrier: SATB, keep-alive the current memory value.
817 if (ShenandoahBarrierStubC2::needs_slow_barrier(node)) {
818 assert(!ShenandoahBarrierStubC2::needs_load_ref_barrier(node), "Should not be required for stores");
819 ShenandoahBarrierStubC2* const stub = ShenandoahBarrierStubC2::create(node, noreg, dst, dst_narrow, /* do_load: */ true, __ offset());
820 stub->enter_if_gc_state(*masm, ShenandoahHeap::MARKING);
821 }
822
823 // Do the actual store
824 if (dst_narrow) {
825 if (!src_narrow) {
826 // Need to encode into rscratch, because we cannot clobber src.
827 // TODO: Maybe there is a matcher way to test that src is unused after this?
828 __ mov(rscratch1, src);
829 if (ShenandoahBarrierStubC2::maybe_null(node)) {
830 __ encode_heap_oop(rscratch1);
831 } else {
832 __ encode_heap_oop_not_null(rscratch1);
833 }
834 src = rscratch1;
835 }
836
837 if (is_volatile) {
838 __ stlrw(src, dst.base());
839 } else {
840 __ strw(src, dst);
841 }
842 } else {
843 if (is_volatile) {
844 __ stlr(src, dst.base());
845 } else {
846 __ str(src, dst);
847 }
848 }
849
850 // Post-barrier: card updates.
851 card_barrier_c2(node, masm, dst);
852 }
853
854 void ShenandoahBarrierSetAssembler::load_c2(const MachNode* node, MacroAssembler* masm, Register dst, Address src, bool is_narrow, bool is_acquire) {
855 // Do the actual load. This load is the candidate for implicit null check, and MUST come first.
856 if (is_narrow) {
857 if (is_acquire) {
858 __ ldarw(dst, src.base());
859 } else {
860 __ ldrw(dst, src);
861 }
862 } else {
863 if (is_acquire) {
864 __ ldar(dst, src.base());
865 } else {
866 __ ldr(dst, src);
867 }
868 }
869
870 // Post-barrier: LRB / KA / weak-root processing.
871 if (ShenandoahBarrierStubC2::needs_slow_barrier(node)) {
872 ShenandoahBarrierStubC2* const stub = ShenandoahBarrierStubC2::create(node, dst, src, is_narrow, /* do_load: */ false, __ offset());
873 char check = 0;
874 check |= ShenandoahBarrierStubC2::needs_keep_alive_barrier(node) ? ShenandoahHeap::MARKING : 0;
875 check |= ShenandoahBarrierStubC2::needs_load_ref_barrier(node) ? ShenandoahHeap::HAS_FORWARDED : 0;
876 check |= ShenandoahBarrierStubC2::needs_load_ref_barrier_weak(node) ? ShenandoahHeap::WEAK_ROOTS : 0;
877 stub->enter_if_gc_state(*masm, check);
878 }
879 }
880
881 void ShenandoahBarrierSetAssembler::card_barrier_c2(const MachNode* node, MacroAssembler* masm, Address address) {
882 if (!ShenandoahBarrierStubC2::needs_card_barrier(node)) {
883 return;
884 }
885
886 assert(CardTable::dirty_card_val() == 0, "must be");
887 Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
888
889 // rscratch1 = card table base (holder)
890 Address curr_ct_holder_addr(rthread, in_bytes(ShenandoahThreadLocalData::card_table_offset()));
891 __ ldr(rscratch1, curr_ct_holder_addr);
892
893 // rscratch2 = addr
894 __ lea(rscratch2, address);
895
896 // rscratch2 = &card_table[ addr >> CardTable::card_shift() ]
897 __ add(rscratch2, rscratch1, rscratch2, Assembler::LSR, CardTable::card_shift());
898
899 if (UseCondCardMark) {
900 Label L_already_dirty;
901 __ ldrb(rscratch1, Address(rscratch2));
902 __ cbz(rscratch1, L_already_dirty);
903 __ strb(zr, Address(rscratch2));
904 __ bind(L_already_dirty);
905 } else {
906 __ strb(zr, Address(rscratch2));
907 }
908 }
909 #undef __
910 #define __ masm.
911
912 // Only handles forward branch jumps, target_offset >= branch_offset
913 static bool aarch64_test_and_branch_reachable(int branch_offset, int target_offset) {
914 assert(branch_offset >= 0, "branch to stub offsets must be positive");
915 assert(target_offset >= 0, "offset in stubs section must be positive");
916 assert(target_offset >= branch_offset, "forward branches only, branch_offset -> target_offset");
917 return (target_offset - branch_offset) < (int)(32*K);
918 }
919
920 void ShenandoahBarrierStubC2::post_init(int offset) {
921 // If we are in scratch emit mode we assume worst case,
922 // and use no trampolines.
923 PhaseOutput* const output = Compile::current()->output();
924 if (output->in_scratch_emit_size()) {
925 return;
926 }
927
928 // Assume that each trampoline is one single instruction and that the stubs
929 // will follow immediately after the _code section. We emit trampolines until
930 // we can no longer do it.
931 const int code_size = output->buffer_sizing_data()->_code;
932 const int trampoline_offset = trampoline_stubs_count() * NativeInstruction::instruction_size;
933 _use_trampoline = aarch64_test_and_branch_reachable(_fastpath_branch_offset, code_size + trampoline_offset);
934 if (_use_trampoline) {
935 inc_trampoline_stubs_count();
936 }
937 }
938
939 void ShenandoahBarrierStubC2::emit_code(MacroAssembler& masm) {
940 Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
941
942 assert(_needs_keep_alive_barrier || _needs_load_ref_barrier, "Why are you here?");
943
944 if (_do_emit_actual) {
945 Label L_done;
946
947 __ bind(*entry());
948
949 load_and_decode(masm, L_done);
950
951 keepalive(masm, _obj, rscratch1);
952
953 lrb(masm, _obj, _addr, rscratch1);
954
955 reencode_if_needed(masm);
956
957 __ bind(L_done);
958 __ b(*continuation());
959 } else {
960 // If we'll need a trampoline for this stub emit it here.
961 if (_use_trampoline) {
962 const int target_offset = __ offset();
963 assert(aarch64_test_and_branch_reachable(_fastpath_branch_offset, target_offset), "trampoline should be reachable");
964 __ bind(_trampoline_entry);
965 __ b(*entry());
966 }
967
968 // Register this stub, this time with actual emits.
969 _do_emit_actual = true;
970 ShenandoahBarrierStubC2::register_stub(this);
971 }
972 }
973
974 void ShenandoahBarrierStubC2::load_and_decode(MacroAssembler& masm, Label& target_if_null) {
975 if (_do_load) {
976 // Fastpath sets _obj==noreg if it tells the slowpath to do the load
977 _obj = rscratch2;
978
979 // This does the load and the decode if necessary
980 __ load_heap_oop(_obj, _addr, noreg, noreg, AS_RAW);
981
982 __ cbz(_obj, target_if_null);
983 } else {
984 // If object is narrow, we need to decode it because everything else later
985 // will need full oops.
986 if (_narrow) {
987 if (_maybe_null) {
988 __ decode_heap_oop(_obj);
989 } else {
990 __ decode_heap_oop_not_null(_obj);
991 }
992 }
993
994 if (_maybe_null) {
995 __ cbz(_obj, target_if_null);
996 }
997 }
998 }
999
1000 void ShenandoahBarrierStubC2::reencode_if_needed(MacroAssembler& masm) {
1001 // If object is narrow, we need to encode it before exiting.
1002 // For encoding, dst can only turn null if we are dealing with weak loads.
1003 // Otherwise, we have already null-checked. We can skip all this if we performed
1004 // the load ourselves, which means the value is not used by caller.
1005 if (!_do_load && _narrow) {
1006 if (_needs_load_ref_weak_barrier) {
1007 __ encode_heap_oop(_obj);
1008 } else {
1009 __ encode_heap_oop_not_null(_obj);
1010 }
1011 }
1012 }
1013
1014 void ShenandoahBarrierStubC2::keepalive(MacroAssembler& masm, Register obj, Register tmp1, Label* L_done_unused) {
1015 Address index(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
1016 Address buffer(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
1017 Label L_runtime;
1018 Label L_done;
1019
1020 // The node doesn't even need keepalive barrier, just don't check anything else
1021 if (!_needs_keep_alive_barrier) {
1022 return ;
1023 }
1024
1025 // If another barrier is enabled as well, do a runtime check for a specific barrier.
1026 if (_needs_load_ref_barrier) {
1027 Address gcs_addr(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
1028 __ ldrb(tmp1, gcs_addr);
1029 __ tbz(tmp1, ShenandoahHeap::MARKING_BITPOS, L_done);
1030 }
1031
1032 // If buffer is full, call into runtime.
1033 __ ldr(tmp1, index);
1034 __ cbz(tmp1, L_runtime);
1035
1036 bool selected_live = false;
1037 Register tmp2 = select_temp_register(selected_live, _addr, obj);
1038 if (selected_live) {
1039 push_save_register(masm, tmp2);
1040 }
1041
1042 // The buffer is not full, store value into it.
1043 __ sub(tmp1, tmp1, wordSize);
1044 __ str(tmp1, index);
1045 __ ldr(tmp2, buffer);
1046 __ str(obj, Address(tmp2, tmp1));
1047 __ b(L_done);
1048
1049 // Runtime call
1050 __ bind(L_runtime);
1051
1052 preserve(obj);
1053 {
1054 bool clobbered_c_rarg0 = false;
1055 if (c_rarg0 != obj) {
1056 clobbered_c_rarg0 = push_save_register_if_live(masm, c_rarg0);
1057 __ mov(c_rarg0, obj);
1058 }
1059
1060 // Go to runtime stub and handle the rest there.
1061 __ far_call(RuntimeAddress(keepalive_runtime_entry_addr()));
1062
1063 // Restore the clobbered registers.
1064 if (clobbered_c_rarg0) {
1065 pop_save_register(masm, c_rarg0);
1066 }
1067 }
1068
1069 __ bind(L_done);
1070
1071 if (selected_live) {
1072 pop_save_register(masm, tmp2);
1073 }
1074 }
1075
1076 void ShenandoahBarrierStubC2::lrb(MacroAssembler& masm, Register obj, Address addr, Register tmp, Label* L_done_unused) {
1077 Label L_done, L_slow;
1078
1079 // The node doesn't even need LRB barrier, just don't check anything else
1080 if (!_needs_load_ref_barrier) {
1081 return ;
1082 }
1083
1084 // If another barrier is enabled as well, do a runtime check for a specific barrier.
1085 if (_needs_keep_alive_barrier) {
1086 char state_to_check = ShenandoahHeap::HAS_FORWARDED | (_needs_load_ref_weak_barrier ? ShenandoahHeap::WEAK_ROOTS : 0);
1087 int bit_to_check = ShenandoahThreadLocalData::gc_state_to_fast_bit(state_to_check);
1088 Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_offset()));
1089 __ ldrb(tmp, gc_state_fast);
1090 __ tbz(tmp, bit_to_check, L_done);
1091 }
1092
1093 // If weak references are being processed, weak/phantom loads need to go slow,
1094 // regadless of their cset status.
1095 if (_needs_load_ref_weak_barrier) {
1096 Address gc_state(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
1097 __ ldrb(tmp, gc_state);
1098 __ tbnz(tmp, ShenandoahHeap::WEAK_ROOTS_BITPOS, L_slow);
1099 }
1100
1101 // Cset-check. Fall-through to slow if in collection set.
1102 assert(ShenandoahHeapRegion::region_size_bytes_shift_jint() <= 63, "Maximum shift of the add is 63");
1103 __ mov(tmp, ShenandoahHeap::in_cset_fast_test_addr());
1104 __ add(tmp, tmp, obj, Assembler::LSR, ShenandoahHeapRegion::region_size_bytes_shift_jint());
1105 __ ldrb(tmp, Address(tmp, 0));
1106 __ cbz(tmp, L_done);
1107
1108 // Slow path
1109 __ bind(L_slow);
1110 dont_preserve(obj);
1111 {
1112 // Shuffle in the arguments. The end result should be:
1113 // c_rarg0 <-- obj
1114 // c_rarg1 <-- lea(addr)
1115 //
1116 // Save clobbered registers before overwriting them, unless they
1117 // carry obj, which would be overwritten on return.
1118 bool clobbered_c_rarg0 = false;
1119 bool clobbered_c_rarg1 = false;
1120 bool clobbered_r0 = false;
1121
1122 if (c_rarg0 == obj) {
1123 clobbered_c_rarg1 = push_save_register_if_live(masm, c_rarg1);
1124 __ lea(c_rarg1, addr);
1125 } else if (c_rarg1 == obj) {
1126 // Set up arguments in reverse, and then flip them
1127 clobbered_c_rarg0 = push_save_register_if_live(masm, c_rarg0);
1128 __ lea(c_rarg0, addr);
1129 // flip them
1130 __ mov(rscratch1, c_rarg0);
1131 __ mov(c_rarg0, c_rarg1);
1132 __ mov(c_rarg1, rscratch1);
1133 } else {
1134 assert_different_registers(c_rarg1, obj);
1135 clobbered_c_rarg0 = push_save_register_if_live(masm, c_rarg0);
1136 clobbered_c_rarg1 = push_save_register_if_live(masm, c_rarg1);
1137 __ lea(c_rarg1, addr);
1138 __ mov(c_rarg0, obj);
1139 }
1140
1141 // The runtime call will clobber r0 at return. If obj isn't r0 then we need
1142 // to save obj.
1143 if (obj != r0) {
1144 clobbered_r0 = push_save_register_if_live(masm, r0);
1145 }
1146
1147 // Go to runtime stub and handle the rest there.
1148 __ far_call(RuntimeAddress(lrb_runtime_entry_addr()));
1149
1150 // Save the result where needed and restore the clobbered registers.
1151 if (obj != r0) {
1152 __ mov(obj, r0);
1153 }
1154 if (clobbered_r0) {
1155 pop_save_register(masm, r0);
1156 }
1157 if (clobbered_c_rarg1) {
1158 pop_save_register(masm, c_rarg1);
1159 }
1160 if (clobbered_c_rarg0) {
1161 pop_save_register(masm, c_rarg0);
1162 }
1163 }
1164
1165 __ bind(L_done);
1166 }
1167
1168 #undef __
1169 #define __ masm->
1170
1171 #endif // COMPILER2
1172
1173 void ShenandoahBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
1174 Register start, Register count, Register scratch) {
1175 assert(ShenandoahCardBarrier, "Should have been checked by caller");
1176
1177 Label L_loop, L_done;
1178 const Register end = count;
1179
1180 // Zero count? Nothing to do.
1181 __ cbz(count, L_done);
1182
1183 // end = start + count << LogBytesPerHeapOop
1184 // last element address to make inclusive
1185 __ lea(end, Address(start, count, Address::lsl(LogBytesPerHeapOop)));
1186 __ sub(end, end, BytesPerHeapOop);
1187 __ lsr(start, start, CardTable::card_shift());
1188 __ lsr(end, end, CardTable::card_shift());
1189
1190 // number of bytes to copy
1191 __ sub(count, end, start);
1192
|