< prev index next >

src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp

Print this page

 706   }
 707 
 708   // If we could not find a non-live register, select the live fallback:
 709   if (tmp == noreg) {
 710     tmp = fallback_live;
 711     selected_live = true;
 712   } else {
 713     selected_live = false;
 714   }
 715 
 716   assert(tmp != noreg, "successfully selected");
 717   assert_different_registers(tmp, reg1);
 718   assert_different_registers(tmp, addr.base());
 719   assert_different_registers(tmp, addr.index());
 720   return tmp;
 721 }
 722 
 723 void ShenandoahBarrierStubC2::enter_if_gc_state(MacroAssembler& masm, const char test_state) {
 724   Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
 725 
 726   int bit_to_check = ShenandoahThreadLocalData::gc_state_to_fast_bit(test_state);





 727   Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_offset()));
 728   __ ldrb(rscratch1, gc_state_fast);
 729   if (_use_trampoline) {
 730     __ tbnz(rscratch1, bit_to_check, _trampoline_entry);




































 731   } else {
 732     __ tbz(rscratch1, bit_to_check, *continuation());
 733     __ b(*entry());









 734   }
 735   // This is were the slowpath stub will return to or the code above will
 736   // jump to if the checks are false
 737   __ bind(*continuation());
 738 }
 739 
 740 bool needs_acquiring_load_exclusive(const MachNode *n) {
 741   assert(n->is_CAS(true), "expecting a compare and swap");
 742   if (n->is_CAS(false)) {
 743     assert(n->has_trailing_membar(), "expected trailing membar");
 744   } else {
 745     return n->has_trailing_membar();
 746   }
 747 
 748   // so we can just return true here
 749   return true;
 750 }
 751 
 752 #undef __
 753 #define __ masm->
 754 
 755 void ShenandoahBarrierSetAssembler::compare_and_set_c2(const MachNode* node, MacroAssembler* masm, Register res, Register addr,
 756     Register oldval, Register newval, bool exchange, bool narrow, bool weak) {
 757   bool acquire = needs_acquiring_load_exclusive(node);

 910 
 911   // rscratch2 = addr
 912   __ lea(rscratch2, address);
 913 
 914   // rscratch2 = &card_table[ addr >> CardTable::card_shift() ]
 915   __ add(rscratch2, rscratch1, rscratch2, Assembler::LSR, CardTable::card_shift());
 916 
 917   if (UseCondCardMark) {
 918     Label L_already_dirty;
 919     __ ldrb(rscratch1, Address(rscratch2));
 920     __ cbz(rscratch1, L_already_dirty);
 921     __ strb(zr, Address(rscratch2));
 922     __ bind(L_already_dirty);
 923   } else {
 924     __ strb(zr, Address(rscratch2));
 925   }
 926 }
 927 #undef __
 928 #define __ masm.
 929 
 930 // Only handles forward branch jumps, target_offset >= branch_offset
 931 static bool aarch64_test_and_branch_reachable(int branch_offset, int target_offset) {
 932   assert(branch_offset >= 0, "branch to stub offsets must be positive");
 933   assert(target_offset >= 0, "offset in stubs section must be positive");
 934   assert(target_offset >= branch_offset, "forward branches only, branch_offset -> target_offset");
 935   return (target_offset - branch_offset) < (int)(32*K);
 936 }
 937 
 938 void ShenandoahBarrierStubC2::post_init(int offset) {
 939   // If we are in scratch emit mode we assume worst case,
 940   // and use no trampolines.
 941   PhaseOutput* const output = Compile::current()->output();
 942   if (output->in_scratch_emit_size()) {
 943     return;
 944   }
 945 
 946   // Assume that each trampoline is one single instruction and that the stubs
 947   // will follow immediately after the _code section. We emit trampolines until
 948   // we can no longer do it.
 949   const int code_size = output->buffer_sizing_data()->_code;
 950   const int trampoline_offset = trampoline_stubs_count() * NativeInstruction::instruction_size;
 951   _use_trampoline = aarch64_test_and_branch_reachable(_fastpath_branch_offset, code_size + trampoline_offset);
 952   if (_use_trampoline) {
 953     inc_trampoline_stubs_count();
 954   }
 955 }
 956 
 957 void ShenandoahBarrierStubC2::emit_code(MacroAssembler& masm) {
 958   if (_do_emit_actual) {
 959     emit_code_actual(masm);
 960     return;
 961   }
 962 
 963   if (_use_trampoline) {
 964     // Emit the trampoline and jump to real entry.
 965     const int target_offset = __ offset();
 966     assert(aarch64_test_and_branch_reachable(_fastpath_branch_offset, target_offset), "trampoline should be reachable");
 967     __ bind(_trampoline_entry);
 968     __ b(*entry());
 969   }
 970 
 971   // Do it again, this time with actual emits.
 972   _do_emit_actual = true;
 973   ShenandoahBarrierStubC2::register_stub(this);
 974 }
 975 
 976 void ShenandoahBarrierStubC2::load_and_decode(MacroAssembler& masm, Label& target_if_null) {
 977   if (_do_load) {
 978     // Fastpath sets _obj==noreg if it tells the slowpath to do the load
 979     _obj = rscratch2;
 980 
 981     // This does the load and the decode if necessary
 982     __ load_heap_oop(_obj, _addr, noreg, noreg, AS_RAW);
 983 
 984     __ cbz(_obj, target_if_null);
 985   } else {
 986     // If object is narrow, we need to decode it because everything else later
 987     // will need full oops.
 988     if (_narrow) {
 989       if (_maybe_null) {
 990         __ decode_heap_oop(_obj);
 991       } else {
 992         __ decode_heap_oop_not_null(_obj);
 993       }
 994     }
 995 
 996     if (_maybe_null) {
 997       __ cbz(_obj, target_if_null);
 998     }
 999   }
1000 }
1001 
1002 void ShenandoahBarrierStubC2::reencode_if_needed(MacroAssembler& masm) {
1003   // If object is narrow, we need to encode it before exiting.
1004   // For encoding, dst can only turn null if we are dealing with weak loads.
1005   // Otherwise, we have already null-checked. We can skip all this if we performed
1006   // the load ourselves, which means the value is not used by caller.
1007   if (!_do_load && _narrow) {
1008     if (_needs_load_ref_weak_barrier) {
1009       __ encode_heap_oop(_obj);
1010     } else {
1011       __ encode_heap_oop_not_null(_obj);
1012     }
1013   }
1014 }
1015 
1016 void ShenandoahBarrierStubC2::emit_code_actual(MacroAssembler& masm) {
1017   assert(_needs_keep_alive_barrier || _needs_load_ref_barrier, "Why are you here?");
1018   __ bind(*entry());
1019 
1020   load_and_decode(masm, *continuation());
1021 
1022   keepalive(masm, _obj, rscratch1);
1023 
1024   lrb(masm, _obj, _addr, rscratch1);
1025 
1026   reencode_if_needed(masm);
1027 
1028   __ b(*continuation());
1029 }
1030 
1031 void ShenandoahBarrierStubC2::keepalive(MacroAssembler& masm, Register obj, Register tmp1) {
1032   Address index(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
1033   Address buffer(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
1034   Label L_runtime;
1035   Label L_done;
1036 
1037   // The node doesn't even need keepalive barrier, just don't check anything else
1038   if (!_needs_keep_alive_barrier) {
1039     return ;
1040   }
1041 
1042   // If another barrier is enabled as well, do a runtime check for a specific barrier.
1043   if (_needs_load_ref_barrier) {
1044     Address gcs_addr(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
1045     __ ldrb(tmp1, gcs_addr);
1046     __ tbz(tmp1, ShenandoahHeap::MARKING_BITPOS, L_done);
1047   }
1048 
1049   // If buffer is full, call into runtime.
1050   __ ldr(tmp1, index);
1051   __ cbz(tmp1, L_runtime);
1052 
1053   bool selected_live = false;
1054   Register tmp2 = select_temp_register(selected_live, _addr, obj);
1055   if (selected_live) {
1056     push_save_register(masm, tmp2);
1057   }
1058 
1059   // The buffer is not full, store value into it.
1060   __ sub(tmp1, tmp1, wordSize);
1061   __ str(tmp1, index);
1062   __ ldr(tmp2, buffer);
1063   __ str(obj, Address(tmp2, tmp1));
1064   __ b(L_done);
1065 
1066   // Runtime call
1067   __ bind(L_runtime);

1081     if (clobbered_c_rarg0) {
1082       pop_save_register(masm, c_rarg0);
1083     }
1084   }
1085 
1086   __ bind(L_done);
1087 
1088   if (selected_live) {
1089     pop_save_register(masm, tmp2);
1090   }
1091 }
1092 
1093 void ShenandoahBarrierStubC2::lrb(MacroAssembler& masm, Register obj, Address addr, Register tmp) {
1094   Label L_done, L_slow;
1095 
1096   // The node doesn't even need LRB barrier, just don't check anything else
1097   if (!_needs_load_ref_barrier) {
1098     return ;
1099   }
1100 
1101   // If another barrier is enabled as well, do a runtime check for a specific barrier.
1102   if (_needs_keep_alive_barrier) {
1103     char state_to_check = ShenandoahHeap::HAS_FORWARDED | (_needs_load_ref_weak_barrier ? ShenandoahHeap::WEAK_ROOTS : 0);
1104     int bit_to_check = ShenandoahThreadLocalData::gc_state_to_fast_bit(state_to_check);
1105     Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_offset()));
1106     __ ldrb(tmp, gc_state_fast);
1107     __ tbz(tmp, bit_to_check, L_done);
1108   }
1109 
1110   // If weak references are being processed, weak/phantom loads need to go slow,
1111   // regadless of their cset status.
1112   if (_needs_load_ref_weak_barrier) {
1113     Address gc_state(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
1114     __ ldrb(tmp, gc_state);
1115     __ tbnz(tmp, ShenandoahHeap::WEAK_ROOTS_BITPOS, L_slow);
1116   }
1117 
1118   // Cset-check. Fall-through to slow if in collection set.
1119   assert(ShenandoahHeapRegion::region_size_bytes_shift_jint() <= 63, "Maximum shift of the add is 63");
1120   __ mov(tmp, ShenandoahHeap::in_cset_fast_test_addr());
1121   __ add(tmp, tmp, obj, Assembler::LSR, ShenandoahHeapRegion::region_size_bytes_shift_jint());
1122   __ ldrb(tmp, Address(tmp, 0));
1123   __ cbz(tmp, L_done);
1124 
1125   // Slow path
1126   __ bind(L_slow);
1127   dont_preserve(obj);
1128   {

 706   }
 707 
 708   // If we could not find a non-live register, select the live fallback:
 709   if (tmp == noreg) {
 710     tmp = fallback_live;
 711     selected_live = true;
 712   } else {
 713     selected_live = false;
 714   }
 715 
 716   assert(tmp != noreg, "successfully selected");
 717   assert_different_registers(tmp, reg1);
 718   assert_different_registers(tmp, addr.base());
 719   assert_different_registers(tmp, addr.index());
 720   return tmp;
 721 }
 722 
 723 void ShenandoahBarrierStubC2::enter_if_gc_state(MacroAssembler& masm, const char test_state) {
 724   Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
 725 
 726   // Emit the unconditional branch in the first version of the method.
 727   // Let the rest of runtime figure out how to manage it.
 728   __ relocate(barrier_Relocation::spec());
 729   __ b(*entry());
 730 
 731 #ifdef ASSERT
 732   Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_offset()));
 733   __ ldrb(rscratch1, gc_state_fast);
 734   __ cbz(rscratch1, *continuation());
 735   __ hlt(0); // Correctness bug: barrier is NOP-ed, but heap is NOT IDLE
 736 #endif
 737   __ bind(*continuation());
 738 }
 739 
 740 address ShenandoahBarrierSetAssembler::parse_stub_address(address pc) {
 741   NativeInstruction* ni = nativeInstruction_at(pc);
 742   assert(ni->is_jump(), "Initial code version: GC barrier fastpath must be a jump");
 743   NativeJump* jmp = nativeJump_at(pc);
 744   return jmp->jump_destination();
 745 }
 746 
 747 void insert_nop(address pc) {
 748   *(pc + 0) = 0x1F;
 749   *(pc + 1) = 0x20;
 750   *(pc + 2) = 0x03;
 751   *(pc + 3) = 0xD5;
 752   ICache::invalidate_range(pc, 4);
 753 }
 754 
 755 bool is_nop(address pc) {
 756   if (*(pc + 0) != 0x1F) return false;
 757   if (*(pc + 1) != 0x20) return false;
 758   if (*(pc + 2) != 0x03) return false;
 759   if (*(pc + 3) != 0xD5) return false;
 760   return true;
 761 }
 762 
 763 void check_at(bool cond, address pc, const char* msg) {
 764   assert(cond, "%s: at PC " PTR_FORMAT ": %02x%02x%02x%02x%02x",
 765          msg, p2i(pc), *(pc + 0), *(pc + 1), *(pc + 2), *(pc + 3), *(pc + 4));
 766 }
 767 
 768 void ShenandoahBarrierSetAssembler::patch_branch_to_nop(address pc) {
 769   NativeInstruction* ni = nativeInstruction_at(pc);
 770   if (ni->is_jump()) {
 771     insert_nop(pc);
 772   } else {
 773     check_at(is_nop(pc), pc, "Should already be nop");
 774   }
 775 }
 776 
 777 void ShenandoahBarrierSetAssembler::patch_nop_to_branch(address pc, address stub_addr) {
 778   NativeInstruction* ni = nativeInstruction_at(pc);
 779   if (is_nop(pc)) {
 780     NativeJump::insert(pc, stub_addr);
 781   } else {
 782     check_at(ni->is_jump(), pc, "Should already be jump");
 783     check_at(nativeJump_at(pc)->jump_destination() == stub_addr, pc, "Jump should be to the same address");
 784   }



 785 }
 786 
 787 bool needs_acquiring_load_exclusive(const MachNode *n) {
 788   assert(n->is_CAS(true), "expecting a compare and swap");
 789   if (n->is_CAS(false)) {
 790     assert(n->has_trailing_membar(), "expected trailing membar");
 791   } else {
 792     return n->has_trailing_membar();
 793   }
 794 
 795   // so we can just return true here
 796   return true;
 797 }
 798 
 799 #undef __
 800 #define __ masm->
 801 
 802 void ShenandoahBarrierSetAssembler::compare_and_set_c2(const MachNode* node, MacroAssembler* masm, Register res, Register addr,
 803     Register oldval, Register newval, bool exchange, bool narrow, bool weak) {
 804   bool acquire = needs_acquiring_load_exclusive(node);

 957 
 958   // rscratch2 = addr
 959   __ lea(rscratch2, address);
 960 
 961   // rscratch2 = &card_table[ addr >> CardTable::card_shift() ]
 962   __ add(rscratch2, rscratch1, rscratch2, Assembler::LSR, CardTable::card_shift());
 963 
 964   if (UseCondCardMark) {
 965     Label L_already_dirty;
 966     __ ldrb(rscratch1, Address(rscratch2));
 967     __ cbz(rscratch1, L_already_dirty);
 968     __ strb(zr, Address(rscratch2));
 969     __ bind(L_already_dirty);
 970   } else {
 971     __ strb(zr, Address(rscratch2));
 972   }
 973 }
 974 #undef __
 975 #define __ masm.
 976 














































 977 void ShenandoahBarrierStubC2::load_and_decode(MacroAssembler& masm, Label& target_if_null) {
 978   if (_do_load) {
 979     // Fastpath sets _obj==noreg if it tells the slowpath to do the load
 980     _obj = rscratch2;
 981 
 982     // This does the load and the decode if necessary
 983     __ load_heap_oop(_obj, _addr, noreg, noreg, AS_RAW);
 984 
 985     __ cbz(_obj, target_if_null);
 986   } else {
 987     // If object is narrow, we need to decode it because everything else later
 988     // will need full oops.
 989     if (_narrow) {
 990       if (_maybe_null) {
 991         __ decode_heap_oop(_obj);
 992       } else {
 993         __ decode_heap_oop_not_null(_obj);
 994       }
 995     }
 996 
 997     if (_maybe_null) {
 998       __ cbz(_obj, target_if_null);
 999     }
1000   }
1001 }
1002 
1003 void ShenandoahBarrierStubC2::reencode_if_needed(MacroAssembler& masm) {
1004   // If object is narrow, we need to encode it before exiting.
1005   // For encoding, dst can only turn null if we are dealing with weak loads.
1006   // Otherwise, we have already null-checked. We can skip all this if we performed
1007   // the load ourselves, which means the value is not used by caller.
1008   if (!_do_load && _narrow) {
1009     if (_needs_load_ref_weak_barrier) {
1010       __ encode_heap_oop(_obj);
1011     } else {
1012       __ encode_heap_oop_not_null(_obj);
1013     }
1014   }
1015 }
1016 
1017 void ShenandoahBarrierStubC2::emit_code(MacroAssembler& masm) {
1018   assert(_needs_keep_alive_barrier || _needs_load_ref_barrier, "Why are you here?");
1019   __ bind(*entry());
1020 
1021   load_and_decode(masm, *continuation());
1022 
1023   keepalive(masm, _obj, rscratch1);
1024 
1025   lrb(masm, _obj, _addr, rscratch1);
1026 
1027   reencode_if_needed(masm);
1028 
1029   __ b(*continuation());
1030 }
1031 
1032 void ShenandoahBarrierStubC2::keepalive(MacroAssembler& masm, Register obj, Register tmp1) {
1033   Address index(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
1034   Address buffer(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
1035   Label L_runtime;
1036   Label L_done;
1037 
1038   // The node doesn't even need keepalive barrier, just don't check anything else
1039   if (!_needs_keep_alive_barrier) {
1040     return ;
1041   }
1042 
1043   // Hotpatched GC checks only care about idle/non-idle state, so we need to check specific state.
1044   Address gcs_addr(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
1045   __ ldrb(tmp1, gcs_addr);
1046   __ tbz(tmp1, ShenandoahHeap::MARKING_BITPOS, L_done);


1047 
1048   // If buffer is full, call into runtime.
1049   __ ldr(tmp1, index);
1050   __ cbz(tmp1, L_runtime);
1051 
1052   bool selected_live = false;
1053   Register tmp2 = select_temp_register(selected_live, _addr, obj);
1054   if (selected_live) {
1055     push_save_register(masm, tmp2);
1056   }
1057 
1058   // The buffer is not full, store value into it.
1059   __ sub(tmp1, tmp1, wordSize);
1060   __ str(tmp1, index);
1061   __ ldr(tmp2, buffer);
1062   __ str(obj, Address(tmp2, tmp1));
1063   __ b(L_done);
1064 
1065   // Runtime call
1066   __ bind(L_runtime);

1080     if (clobbered_c_rarg0) {
1081       pop_save_register(masm, c_rarg0);
1082     }
1083   }
1084 
1085   __ bind(L_done);
1086 
1087   if (selected_live) {
1088     pop_save_register(masm, tmp2);
1089   }
1090 }
1091 
1092 void ShenandoahBarrierStubC2::lrb(MacroAssembler& masm, Register obj, Address addr, Register tmp) {
1093   Label L_done, L_slow;
1094 
1095   // The node doesn't even need LRB barrier, just don't check anything else
1096   if (!_needs_load_ref_barrier) {
1097     return ;
1098   }
1099 
1100   // Hotpatched GC checks only care about idle/non-idle state, so we need to check again.
1101   char state_to_check = ShenandoahHeap::HAS_FORWARDED | (_needs_load_ref_weak_barrier ? ShenandoahHeap::WEAK_ROOTS : 0);
1102   int bit_to_check = ShenandoahThreadLocalData::gc_state_to_fast_bit(state_to_check);
1103   Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_offset()));
1104   __ ldrb(tmp, gc_state_fast);
1105   __ tbz(tmp, bit_to_check, L_done);


1106 
1107   // If weak references are being processed, weak/phantom loads need to go slow,
1108   // regadless of their cset status.
1109   if (_needs_load_ref_weak_barrier) {
1110     Address gc_state(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
1111     __ ldrb(tmp, gc_state);
1112     __ tbnz(tmp, ShenandoahHeap::WEAK_ROOTS_BITPOS, L_slow);
1113   }
1114 
1115   // Cset-check. Fall-through to slow if in collection set.
1116   assert(ShenandoahHeapRegion::region_size_bytes_shift_jint() <= 63, "Maximum shift of the add is 63");
1117   __ mov(tmp, ShenandoahHeap::in_cset_fast_test_addr());
1118   __ add(tmp, tmp, obj, Assembler::LSR, ShenandoahHeapRegion::region_size_bytes_shift_jint());
1119   __ ldrb(tmp, Address(tmp, 0));
1120   __ cbz(tmp, L_done);
1121 
1122   // Slow path
1123   __ bind(L_slow);
1124   dont_preserve(obj);
1125   {
< prev index next >