1631
1632 int MachCallDynamicJavaNode::ret_addr_offset()
1633 {
1634 return 16; // movz, movk, movk, bl
1635 }
1636
1637 int MachCallRuntimeNode::ret_addr_offset() {
1638 // for generated stubs the call will be
1639 // bl(addr)
1640 // or with far branches
1641 // bl(trampoline_stub)
1642 // for real runtime callouts it will be six instructions
1643 // see aarch64_enc_java_to_runtime
1644 // adr(rscratch2, retaddr)
1645 // lea(rscratch1, RuntimeAddress(addr)
1646 // stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
1647 // blr(rscratch1)
1648 CodeBlob *cb = CodeCache::find_blob(_entry_point);
1649 if (cb) {
1650 return 1 * NativeInstruction::instruction_size;
1651 } else {
1652 return 6 * NativeInstruction::instruction_size;
1653 }
1654 }
1655
1656 //=============================================================================
1657
1658 #ifndef PRODUCT
1659 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1660 st->print("BREAKPOINT");
1661 }
1662 #endif
1663
1664 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1665 __ brk(0);
1666 }
1667
1668 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
1669 return MachNode::size(ra_);
1670 }
1739 if (C->stub_function() == nullptr && BarrierSet::barrier_set()->barrier_set_nmethod() != nullptr) {
1740 st->print("\n\t");
1741 st->print("ldr rscratch1, [guard]\n\t");
1742 st->print("dmb ishld\n\t");
1743 st->print("ldr rscratch2, [rthread, #thread_disarmed_guard_value_offset]\n\t");
1744 st->print("cmp rscratch1, rscratch2\n\t");
1745 st->print("b.eq skip");
1746 st->print("\n\t");
1747 st->print("blr #nmethod_entry_barrier_stub\n\t");
1748 st->print("b skip\n\t");
1749 st->print("guard: int\n\t");
1750 st->print("\n\t");
1751 st->print("skip:\n\t");
1752 }
1753 }
1754 #endif
1755
1756 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1757 Compile* C = ra_->C;
1758
1759 // n.b. frame size includes space for return pc and rfp
1760 const int framesize = C->output()->frame_size_in_bytes();
1761
1762 // insert a nop at the start of the prolog so we can patch in a
1763 // branch if we need to invalidate the method later
1764 __ nop();
1765
1766 if (C->clinit_barrier_on_entry()) {
1767 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
1768
1769 Label L_skip_barrier;
1770
1771 __ mov_metadata(rscratch2, C->method()->holder()->constant_encoding());
1772 __ clinit_barrier(rscratch2, rscratch1, &L_skip_barrier);
1773 __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
1774 __ bind(L_skip_barrier);
1775 }
1776
1777 if (C->max_vector_size() > 0) {
1778 __ reinitialize_ptrue();
1779 }
1780
1781 int bangsize = C->output()->bang_size_in_bytes();
1782 if (C->output()->need_stack_bang(bangsize))
1783 __ generate_stack_overflow_check(bangsize);
1784
1785 __ build_frame(framesize);
1786
1787 if (C->stub_function() == nullptr) {
1788 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
1789 if (BarrierSet::barrier_set()->barrier_set_nmethod() != nullptr) {
1790 // Dummy labels for just measuring the code size
1791 Label dummy_slow_path;
1792 Label dummy_continuation;
1793 Label dummy_guard;
1794 Label* slow_path = &dummy_slow_path;
1795 Label* continuation = &dummy_continuation;
1796 Label* guard = &dummy_guard;
1797 if (!Compile::current()->output()->in_scratch_emit_size()) {
1798 // Use real labels from actual stub when not emitting code for the purpose of measuring its size
1799 C2EntryBarrierStub* stub = new (Compile::current()->comp_arena()) C2EntryBarrierStub();
1800 Compile::current()->output()->add_stub(stub);
1801 slow_path = &stub->entry();
1802 continuation = &stub->continuation();
1803 guard = &stub->guard();
1804 }
1805 // In the C2 code, we move the non-hot part of nmethod entry barriers out-of-line to a stub.
1806 bs->nmethod_entry_barrier(masm, slow_path, continuation, guard);
1807 }
1808 }
1809
1810 if (VerifyStackAtCalls) {
1811 Unimplemented();
1812 }
1813
1814 C->output()->set_frame_complete(__ offset());
1815
1816 if (C->has_mach_constant_base_node()) {
1817 // NOTE: We set the table base offset here because users might be
1818 // emitted before MachConstantBaseNode.
1819 ConstantTable& constant_table = C->output()->constant_table();
1820 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1821 }
1822 }
1823
1824 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1825 {
1826 return MachNode::size(ra_); // too many variables; just compute it
1827 // the hard way
1828 }
1829
1830 int MachPrologNode::reloc() const
1831 {
1832 return 0;
1833 }
1834
1835 //=============================================================================
1836
1837 #ifndef PRODUCT
1838 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1839 Compile* C = ra_->C;
1840 int framesize = C->output()->frame_slots() << LogBytesPerInt;
1841
1842 st->print("# pop frame %d\n\t",framesize);
1843
1844 if (framesize == 0) {
1845 st->print("ldp lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1846 } else if (framesize < ((1 << 9) + 2 * wordSize)) {
1847 st->print("ldp lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
1848 st->print("add sp, sp, #%d\n\t", framesize);
1849 } else {
1852 st->print("ldp lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1853 }
1854 if (VM_Version::use_rop_protection()) {
1855 st->print("autiaz\n\t");
1856 st->print("ldr zr, [lr]\n\t");
1857 }
1858
1859 if (do_polling() && C->is_method_compilation()) {
1860 st->print("# test polling word\n\t");
1861 st->print("ldr rscratch1, [rthread],#%d\n\t", in_bytes(JavaThread::polling_word_offset()));
1862 st->print("cmp sp, rscratch1\n\t");
1863 st->print("bhi #slow_path");
1864 }
1865 }
1866 #endif
1867
1868 void MachEpilogNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1869 Compile* C = ra_->C;
1870 int framesize = C->output()->frame_slots() << LogBytesPerInt;
1871
1872 __ remove_frame(framesize);
1873
1874 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1875 __ reserved_stack_check();
1876 }
1877
1878 if (do_polling() && C->is_method_compilation()) {
1879 Label dummy_label;
1880 Label* code_stub = &dummy_label;
1881 if (!C->output()->in_scratch_emit_size()) {
1882 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1883 C->output()->add_stub(stub);
1884 code_stub = &stub->entry();
1885 }
1886 __ relocate(relocInfo::poll_return_type);
1887 __ safepoint_poll(*code_stub, true /* at_return */, false /* acquire */, true /* in_nmethod */);
1888 }
1889 }
1890
1891 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
1892 // Variable size. Determine dynamically.
1893 return MachNode::size(ra_);
1894 }
1895
1896 int MachEpilogNode::reloc() const {
1897 // Return number of relocatable values contained in this instruction.
1898 return 1; // 1 for polling page.
1899 }
1900
1901 const Pipeline * MachEpilogNode::pipeline() const {
1902 return MachNode::pipeline_class();
1903 }
1904
1905 //=============================================================================
1906
1907 static enum RC rc_class(OptoReg::Name reg) {
1908
1909 if (reg == OptoReg::Bad) {
1910 return rc_bad;
1911 }
1912
1913 // we have 32 int registers * 2 halves
1914 int slots_of_int_registers = Register::number_of_registers * Register::max_slots_per_register;
1915
2171 void BoxLockNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
2172 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2173 int reg = ra_->get_encode(this);
2174
2175 // This add will handle any 24-bit signed offset. 24 bits allows an
2176 // 8 megabyte stack frame.
2177 __ add(as_Register(reg), sp, offset);
2178 }
2179
2180 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
2181 // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
2182 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2183
2184 if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
2185 return NativeInstruction::instruction_size;
2186 } else {
2187 return 2 * NativeInstruction::instruction_size;
2188 }
2189 }
2190
2191 //=============================================================================
2192
2193 #ifndef PRODUCT
2194 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2195 {
2196 st->print_cr("# MachUEPNode");
2197 if (UseCompressedClassPointers) {
2198 st->print_cr("\tldrw rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2199 st->print_cr("\tldrw r10, [rscratch2 + CompiledICData::speculated_klass_offset()]\t# compressed klass");
2200 st->print_cr("\tcmpw rscratch1, r10");
2201 } else {
2202 st->print_cr("\tldr rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2203 st->print_cr("\tldr r10, [rscratch2 + CompiledICData::speculated_klass_offset()]\t# compressed klass");
2204 st->print_cr("\tcmp rscratch1, r10");
2205 }
2206 st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
2207 }
2208 #endif
2209
2210 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2211 {
2212 __ ic_check(InteriorEntryAlignment);
2213 }
2214
2215 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
2216 {
2217 return MachNode::size(ra_);
2218 }
2219
2220 // REQUIRED EMIT CODE
2221
2222 //=============================================================================
2223
2224 // Emit exception handler code.
2225 int HandlerImpl::emit_exception_handler(C2_MacroAssembler* masm)
2226 {
2227 // mov rscratch1 #exception_blob_entry_point
2228 // br rscratch1
2229 // Note that the code buffer's insts_mark is always relative to insts.
2230 // That's why we must use the macroassembler to generate a handler.
2231 address base = __ start_a_stub(size_exception_handler());
2232 if (base == nullptr) {
2233 ciEnv::current()->record_failure("CodeCache is full");
2234 return 0; // CodeBuffer::expand failed
2235 }
2236 int offset = __ offset();
2237 __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
2238 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
2239 __ end_a_stub();
3653 %}
3654
3655 enc_class aarch64_enc_java_dynamic_call(method meth) %{
3656 int method_index = resolved_method_index(masm);
3657 address call = __ ic_call((address)$meth$$method, method_index);
3658 if (call == nullptr) {
3659 ciEnv::current()->record_failure("CodeCache is full");
3660 return;
3661 }
3662 __ post_call_nop();
3663 if (Compile::current()->max_vector_size() > 0) {
3664 __ reinitialize_ptrue();
3665 }
3666 %}
3667
3668 enc_class aarch64_enc_call_epilog() %{
3669 if (VerifyStackAtCalls) {
3670 // Check that stack depth is unchanged: find majik cookie on stack
3671 __ call_Unimplemented();
3672 }
3673 %}
3674
3675 enc_class aarch64_enc_java_to_runtime(method meth) %{
3676 // some calls to generated routines (arraycopy code) are scheduled
3677 // by C2 as runtime calls. if so we can call them using a br (they
3678 // will be in a reachable segment) otherwise we have to use a blr
3679 // which loads the absolute address into a register.
3680 address entry = (address)$meth$$method;
3681 CodeBlob *cb = CodeCache::find_blob(entry);
3682 if (cb) {
3683 address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
3684 if (call == nullptr) {
3685 ciEnv::current()->record_failure("CodeCache is full");
3686 return;
3687 }
3688 __ post_call_nop();
3689 } else {
3690 Label retaddr;
3691 __ adr(rscratch2, retaddr);
3692 __ lea(rscratch1, RuntimeAddress(entry));
6660 instruct loadConL(iRegLNoSp dst, immL src)
6661 %{
6662 match(Set dst src);
6663
6664 ins_cost(INSN_COST);
6665 format %{ "mov $dst, $src\t# long" %}
6666
6667 ins_encode( aarch64_enc_mov_imm(dst, src) );
6668
6669 ins_pipe(ialu_imm);
6670 %}
6671
6672 // Load Pointer Constant
6673
6674 instruct loadConP(iRegPNoSp dst, immP con)
6675 %{
6676 match(Set dst con);
6677
6678 ins_cost(INSN_COST * 4);
6679 format %{
6680 "mov $dst, $con\t# ptr\n\t"
6681 %}
6682
6683 ins_encode(aarch64_enc_mov_p(dst, con));
6684
6685 ins_pipe(ialu_imm);
6686 %}
6687
6688 // Load Null Pointer Constant
6689
6690 instruct loadConP0(iRegPNoSp dst, immP0 con)
6691 %{
6692 match(Set dst con);
6693
6694 ins_cost(INSN_COST);
6695 format %{ "mov $dst, $con\t# nullptr ptr" %}
6696
6697 ins_encode(aarch64_enc_mov_p0(dst, con));
6698
6699 ins_pipe(ialu_imm);
6700 %}
7881 %}
7882
7883 // ============================================================================
7884 // Cast/Convert Instructions
7885
7886 instruct castX2P(iRegPNoSp dst, iRegL src) %{
7887 match(Set dst (CastX2P src));
7888
7889 ins_cost(INSN_COST);
7890 format %{ "mov $dst, $src\t# long -> ptr" %}
7891
7892 ins_encode %{
7893 if ($dst$$reg != $src$$reg) {
7894 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
7895 }
7896 %}
7897
7898 ins_pipe(ialu_reg);
7899 %}
7900
7901 instruct castP2X(iRegLNoSp dst, iRegP src) %{
7902 match(Set dst (CastP2X src));
7903
7904 ins_cost(INSN_COST);
7905 format %{ "mov $dst, $src\t# ptr -> long" %}
7906
7907 ins_encode %{
7908 if ($dst$$reg != $src$$reg) {
7909 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
7910 }
7911 %}
7912
7913 ins_pipe(ialu_reg);
7914 %}
7915
7916 // Convert oop into int for vectors alignment masking
7917 instruct convP2I(iRegINoSp dst, iRegP src) %{
7918 match(Set dst (ConvL2I (CastP2X src)));
7919
7920 ins_cost(INSN_COST);
14702
14703 match(Set dst (MoveL2D src));
14704
14705 effect(DEF dst, USE src);
14706
14707 ins_cost(INSN_COST);
14708
14709 format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
14710
14711 ins_encode %{
14712 __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
14713 %}
14714
14715 ins_pipe(fp_l2d);
14716
14717 %}
14718
14719 // ============================================================================
14720 // clearing of an array
14721
14722 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
14723 %{
14724 match(Set dummy (ClearArray cnt base));
14725 effect(USE_KILL cnt, USE_KILL base, KILL cr);
14726
14727 ins_cost(4 * INSN_COST);
14728 format %{ "ClearArray $cnt, $base" %}
14729
14730 ins_encode %{
14731 address tpc = __ zero_words($base$$Register, $cnt$$Register);
14732 if (tpc == nullptr) {
14733 ciEnv::current()->record_failure("CodeCache is full");
14734 return;
14735 }
14736 %}
14737
14738 ins_pipe(pipe_class_memory);
14739 %}
14740
14741 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 temp, Universe dummy, rFlagsReg cr)
14742 %{
14743 predicate((uint64_t)n->in(2)->get_long()
14744 < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
14745 match(Set dummy (ClearArray cnt base));
14746 effect(TEMP temp, USE_KILL base, KILL cr);
14747
14748 ins_cost(4 * INSN_COST);
14749 format %{ "ClearArray $cnt, $base" %}
14750
14751 ins_encode %{
14752 address tpc = __ zero_words($base$$Register, (uint64_t)$cnt$$constant);
14753 if (tpc == nullptr) {
14754 ciEnv::current()->record_failure("CodeCache is full");
14755 return;
14756 }
14757 %}
14758
14759 ins_pipe(pipe_class_memory);
14760 %}
14761
14762 // ============================================================================
14763 // Overflow Math Instructions
14764
16055
16056 // Call Runtime Instruction
16057
16058 instruct CallLeafDirect(method meth)
16059 %{
16060 match(CallLeaf);
16061
16062 effect(USE meth);
16063
16064 ins_cost(CALL_COST);
16065
16066 format %{ "CALL, runtime leaf $meth" %}
16067
16068 ins_encode( aarch64_enc_java_to_runtime(meth) );
16069
16070 ins_pipe(pipe_class_call);
16071 %}
16072
16073 // Call Runtime Instruction
16074
16075 instruct CallLeafNoFPDirect(method meth)
16076 %{
16077 match(CallLeafNoFP);
16078
16079 effect(USE meth);
16080
16081 ins_cost(CALL_COST);
16082
16083 format %{ "CALL, runtime leaf nofp $meth" %}
16084
16085 ins_encode( aarch64_enc_java_to_runtime(meth) );
16086
16087 ins_pipe(pipe_class_call);
16088 %}
16089
16090 // Tail Call; Jump from runtime stub to Java code.
16091 // Also known as an 'interprocedural jump'.
16092 // Target of jump will eventually return to caller.
16093 // TailJump below removes the return address.
16094 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_ptr)
16095 %{
16096 match(TailCall jump_target method_ptr);
|
1631
1632 int MachCallDynamicJavaNode::ret_addr_offset()
1633 {
1634 return 16; // movz, movk, movk, bl
1635 }
1636
1637 int MachCallRuntimeNode::ret_addr_offset() {
1638 // for generated stubs the call will be
1639 // bl(addr)
1640 // or with far branches
1641 // bl(trampoline_stub)
1642 // for real runtime callouts it will be six instructions
1643 // see aarch64_enc_java_to_runtime
1644 // adr(rscratch2, retaddr)
1645 // lea(rscratch1, RuntimeAddress(addr)
1646 // stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
1647 // blr(rscratch1)
1648 CodeBlob *cb = CodeCache::find_blob(_entry_point);
1649 if (cb) {
1650 return 1 * NativeInstruction::instruction_size;
1651 } else if (_entry_point == nullptr) {
1652 // See CallLeafNoFPIndirect
1653 return 1 * NativeInstruction::instruction_size;
1654 } else {
1655 return 6 * NativeInstruction::instruction_size;
1656 }
1657 }
1658
1659 //=============================================================================
1660
1661 #ifndef PRODUCT
1662 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1663 st->print("BREAKPOINT");
1664 }
1665 #endif
1666
1667 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1668 __ brk(0);
1669 }
1670
1671 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
1672 return MachNode::size(ra_);
1673 }
1742 if (C->stub_function() == nullptr && BarrierSet::barrier_set()->barrier_set_nmethod() != nullptr) {
1743 st->print("\n\t");
1744 st->print("ldr rscratch1, [guard]\n\t");
1745 st->print("dmb ishld\n\t");
1746 st->print("ldr rscratch2, [rthread, #thread_disarmed_guard_value_offset]\n\t");
1747 st->print("cmp rscratch1, rscratch2\n\t");
1748 st->print("b.eq skip");
1749 st->print("\n\t");
1750 st->print("blr #nmethod_entry_barrier_stub\n\t");
1751 st->print("b skip\n\t");
1752 st->print("guard: int\n\t");
1753 st->print("\n\t");
1754 st->print("skip:\n\t");
1755 }
1756 }
1757 #endif
1758
1759 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1760 Compile* C = ra_->C;
1761
1762 // insert a nop at the start of the prolog so we can patch in a
1763 // branch if we need to invalidate the method later
1764 __ nop();
1765
1766 __ verified_entry(C, 0);
1767
1768 if (C->stub_function() == nullptr) {
1769 __ entry_barrier();
1770 }
1771
1772 if (!Compile::current()->output()->in_scratch_emit_size()) {
1773 __ bind(*_verified_entry);
1774 }
1775
1776 if (VerifyStackAtCalls) {
1777 Unimplemented();
1778 }
1779
1780 C->output()->set_frame_complete(__ offset());
1781
1782 if (C->has_mach_constant_base_node()) {
1783 // NOTE: We set the table base offset here because users might be
1784 // emitted before MachConstantBaseNode.
1785 ConstantTable& constant_table = C->output()->constant_table();
1786 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1787 }
1788 }
1789
1790 int MachPrologNode::reloc() const
1791 {
1792 return 0;
1793 }
1794
1795 //=============================================================================
1796
1797 #ifndef PRODUCT
1798 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1799 Compile* C = ra_->C;
1800 int framesize = C->output()->frame_slots() << LogBytesPerInt;
1801
1802 st->print("# pop frame %d\n\t",framesize);
1803
1804 if (framesize == 0) {
1805 st->print("ldp lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1806 } else if (framesize < ((1 << 9) + 2 * wordSize)) {
1807 st->print("ldp lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
1808 st->print("add sp, sp, #%d\n\t", framesize);
1809 } else {
1812 st->print("ldp lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1813 }
1814 if (VM_Version::use_rop_protection()) {
1815 st->print("autiaz\n\t");
1816 st->print("ldr zr, [lr]\n\t");
1817 }
1818
1819 if (do_polling() && C->is_method_compilation()) {
1820 st->print("# test polling word\n\t");
1821 st->print("ldr rscratch1, [rthread],#%d\n\t", in_bytes(JavaThread::polling_word_offset()));
1822 st->print("cmp sp, rscratch1\n\t");
1823 st->print("bhi #slow_path");
1824 }
1825 }
1826 #endif
1827
1828 void MachEpilogNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1829 Compile* C = ra_->C;
1830 int framesize = C->output()->frame_slots() << LogBytesPerInt;
1831
1832 __ remove_frame(framesize, C->needs_stack_repair());
1833
1834 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1835 __ reserved_stack_check();
1836 }
1837
1838 if (do_polling() && C->is_method_compilation()) {
1839 Label dummy_label;
1840 Label* code_stub = &dummy_label;
1841 if (!C->output()->in_scratch_emit_size()) {
1842 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1843 C->output()->add_stub(stub);
1844 code_stub = &stub->entry();
1845 }
1846 __ relocate(relocInfo::poll_return_type);
1847 __ safepoint_poll(*code_stub, true /* at_return */, false /* acquire */, true /* in_nmethod */);
1848 }
1849 }
1850
1851 int MachEpilogNode::reloc() const {
1852 // Return number of relocatable values contained in this instruction.
1853 return 1; // 1 for polling page.
1854 }
1855
1856 const Pipeline * MachEpilogNode::pipeline() const {
1857 return MachNode::pipeline_class();
1858 }
1859
1860 //=============================================================================
1861
1862 static enum RC rc_class(OptoReg::Name reg) {
1863
1864 if (reg == OptoReg::Bad) {
1865 return rc_bad;
1866 }
1867
1868 // we have 32 int registers * 2 halves
1869 int slots_of_int_registers = Register::number_of_registers * Register::max_slots_per_register;
1870
2126 void BoxLockNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
2127 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2128 int reg = ra_->get_encode(this);
2129
2130 // This add will handle any 24-bit signed offset. 24 bits allows an
2131 // 8 megabyte stack frame.
2132 __ add(as_Register(reg), sp, offset);
2133 }
2134
2135 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
2136 // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
2137 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2138
2139 if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
2140 return NativeInstruction::instruction_size;
2141 } else {
2142 return 2 * NativeInstruction::instruction_size;
2143 }
2144 }
2145
2146 ///=============================================================================
2147 #ifndef PRODUCT
2148 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2149 {
2150 st->print_cr("# MachVEPNode");
2151 if (!_verified) {
2152 st->print_cr("\t load_class");
2153 } else {
2154 st->print_cr("\t unpack_inline_arg");
2155 }
2156 }
2157 #endif
2158
2159 void MachVEPNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const
2160 {
2161 if (!_verified) {
2162 __ ic_check(1);
2163 } else {
2164 // insert a nop at the start of the prolog so we can patch in a
2165 // branch if we need to invalidate the method later
2166 __ nop();
2167
2168 // TODO 8284443 Avoid creation of temporary frame
2169 if (ra_->C->stub_function() == nullptr) {
2170 __ verified_entry(ra_->C, 0);
2171 __ entry_barrier();
2172 int framesize = ra_->C->output()->frame_slots() << LogBytesPerInt;
2173 __ remove_frame(framesize, false);
2174 }
2175 // Unpack inline type args passed as oop and then jump to
2176 // the verified entry point (skipping the unverified entry).
2177 int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
2178 // Emit code for verified entry and save increment for stack repair on return
2179 __ verified_entry(ra_->C, sp_inc);
2180 if (Compile::current()->output()->in_scratch_emit_size()) {
2181 Label dummy_verified_entry;
2182 __ b(dummy_verified_entry);
2183 } else {
2184 __ b(*_verified_entry);
2185 }
2186 }
2187 }
2188
2189 //=============================================================================
2190 #ifndef PRODUCT
2191 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2192 {
2193 st->print_cr("# MachUEPNode");
2194 if (UseCompressedClassPointers) {
2195 st->print_cr("\tldrw rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2196 st->print_cr("\tldrw r10, [rscratch2 + CompiledICData::speculated_klass_offset()]\t# compressed klass");
2197 st->print_cr("\tcmpw rscratch1, r10");
2198 } else {
2199 st->print_cr("\tldr rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2200 st->print_cr("\tldr r10, [rscratch2 + CompiledICData::speculated_klass_offset()]\t# compressed klass");
2201 st->print_cr("\tcmp rscratch1, r10");
2202 }
2203 st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
2204 }
2205 #endif
2206
2207 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2208 {
2209 __ ic_check(InteriorEntryAlignment);
2210 }
2211
2212 // REQUIRED EMIT CODE
2213
2214 //=============================================================================
2215
2216 // Emit exception handler code.
2217 int HandlerImpl::emit_exception_handler(C2_MacroAssembler* masm)
2218 {
2219 // mov rscratch1 #exception_blob_entry_point
2220 // br rscratch1
2221 // Note that the code buffer's insts_mark is always relative to insts.
2222 // That's why we must use the macroassembler to generate a handler.
2223 address base = __ start_a_stub(size_exception_handler());
2224 if (base == nullptr) {
2225 ciEnv::current()->record_failure("CodeCache is full");
2226 return 0; // CodeBuffer::expand failed
2227 }
2228 int offset = __ offset();
2229 __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
2230 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
2231 __ end_a_stub();
3645 %}
3646
3647 enc_class aarch64_enc_java_dynamic_call(method meth) %{
3648 int method_index = resolved_method_index(masm);
3649 address call = __ ic_call((address)$meth$$method, method_index);
3650 if (call == nullptr) {
3651 ciEnv::current()->record_failure("CodeCache is full");
3652 return;
3653 }
3654 __ post_call_nop();
3655 if (Compile::current()->max_vector_size() > 0) {
3656 __ reinitialize_ptrue();
3657 }
3658 %}
3659
3660 enc_class aarch64_enc_call_epilog() %{
3661 if (VerifyStackAtCalls) {
3662 // Check that stack depth is unchanged: find majik cookie on stack
3663 __ call_Unimplemented();
3664 }
3665 if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic()) {
3666 // The last return value is not set by the callee but used to pass IsInit information to compiled code.
3667 // Search for the corresponding projection, get the register and emit code that initialized it.
3668 uint con = (tf()->range_cc()->cnt() - 1);
3669 for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
3670 ProjNode* proj = fast_out(i)->as_Proj();
3671 if (proj->_con == con) {
3672 // Set IsInit if r0 is non-null (a non-null value is returned buffered or scalarized)
3673 OptoReg::Name optoReg = ra_->get_reg_first(proj);
3674 VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
3675 Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
3676 __ cmp(r0, zr);
3677 __ cset(toReg, Assembler::NE);
3678 if (reg->is_stack()) {
3679 int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
3680 __ str(toReg, Address(sp, st_off));
3681 }
3682 break;
3683 }
3684 }
3685 if (return_value_is_used()) {
3686 // An inline type is returned as fields in multiple registers.
3687 // R0 either contains an oop if the inline type is buffered or a pointer
3688 // to the corresponding InlineKlass with the lowest bit set to 1. Zero r0
3689 // if the lowest bit is set to allow C2 to use the oop after null checking.
3690 // r0 &= (r0 & 1) - 1
3691 __ andr(rscratch1, r0, 0x1);
3692 __ sub(rscratch1, rscratch1, 0x1);
3693 __ andr(r0, r0, rscratch1);
3694 }
3695 }
3696 %}
3697
3698 enc_class aarch64_enc_java_to_runtime(method meth) %{
3699 // some calls to generated routines (arraycopy code) are scheduled
3700 // by C2 as runtime calls. if so we can call them using a br (they
3701 // will be in a reachable segment) otherwise we have to use a blr
3702 // which loads the absolute address into a register.
3703 address entry = (address)$meth$$method;
3704 CodeBlob *cb = CodeCache::find_blob(entry);
3705 if (cb) {
3706 address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
3707 if (call == nullptr) {
3708 ciEnv::current()->record_failure("CodeCache is full");
3709 return;
3710 }
3711 __ post_call_nop();
3712 } else {
3713 Label retaddr;
3714 __ adr(rscratch2, retaddr);
3715 __ lea(rscratch1, RuntimeAddress(entry));
6683 instruct loadConL(iRegLNoSp dst, immL src)
6684 %{
6685 match(Set dst src);
6686
6687 ins_cost(INSN_COST);
6688 format %{ "mov $dst, $src\t# long" %}
6689
6690 ins_encode( aarch64_enc_mov_imm(dst, src) );
6691
6692 ins_pipe(ialu_imm);
6693 %}
6694
6695 // Load Pointer Constant
6696
6697 instruct loadConP(iRegPNoSp dst, immP con)
6698 %{
6699 match(Set dst con);
6700
6701 ins_cost(INSN_COST * 4);
6702 format %{
6703 "mov $dst, $con\t# ptr"
6704 %}
6705
6706 ins_encode(aarch64_enc_mov_p(dst, con));
6707
6708 ins_pipe(ialu_imm);
6709 %}
6710
6711 // Load Null Pointer Constant
6712
6713 instruct loadConP0(iRegPNoSp dst, immP0 con)
6714 %{
6715 match(Set dst con);
6716
6717 ins_cost(INSN_COST);
6718 format %{ "mov $dst, $con\t# nullptr ptr" %}
6719
6720 ins_encode(aarch64_enc_mov_p0(dst, con));
6721
6722 ins_pipe(ialu_imm);
6723 %}
7904 %}
7905
7906 // ============================================================================
7907 // Cast/Convert Instructions
7908
7909 instruct castX2P(iRegPNoSp dst, iRegL src) %{
7910 match(Set dst (CastX2P src));
7911
7912 ins_cost(INSN_COST);
7913 format %{ "mov $dst, $src\t# long -> ptr" %}
7914
7915 ins_encode %{
7916 if ($dst$$reg != $src$$reg) {
7917 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
7918 }
7919 %}
7920
7921 ins_pipe(ialu_reg);
7922 %}
7923
7924 instruct castN2X(iRegLNoSp dst, iRegN src) %{
7925 match(Set dst (CastP2X src));
7926
7927 ins_cost(INSN_COST);
7928 format %{ "mov $dst, $src\t# ptr -> long" %}
7929
7930 ins_encode %{
7931 if ($dst$$reg != $src$$reg) {
7932 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
7933 }
7934 %}
7935
7936 ins_pipe(ialu_reg);
7937 %}
7938
7939 instruct castP2X(iRegLNoSp dst, iRegP src) %{
7940 match(Set dst (CastP2X src));
7941
7942 ins_cost(INSN_COST);
7943 format %{ "mov $dst, $src\t# ptr -> long" %}
7944
7945 ins_encode %{
7946 if ($dst$$reg != $src$$reg) {
7947 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
7948 }
7949 %}
7950
7951 ins_pipe(ialu_reg);
7952 %}
7953
7954 // Convert oop into int for vectors alignment masking
7955 instruct convP2I(iRegINoSp dst, iRegP src) %{
7956 match(Set dst (ConvL2I (CastP2X src)));
7957
7958 ins_cost(INSN_COST);
14740
14741 match(Set dst (MoveL2D src));
14742
14743 effect(DEF dst, USE src);
14744
14745 ins_cost(INSN_COST);
14746
14747 format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
14748
14749 ins_encode %{
14750 __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
14751 %}
14752
14753 ins_pipe(fp_l2d);
14754
14755 %}
14756
14757 // ============================================================================
14758 // clearing of an array
14759
14760 instruct clearArray_reg_reg_immL0(iRegL_R11 cnt, iRegP_R10 base, immL0 zero, Universe dummy, rFlagsReg cr)
14761 %{
14762 match(Set dummy (ClearArray (Binary cnt base) zero));
14763 effect(USE_KILL cnt, USE_KILL base, KILL cr);
14764
14765 ins_cost(4 * INSN_COST);
14766 format %{ "ClearArray $cnt, $base" %}
14767
14768 ins_encode %{
14769 address tpc = __ zero_words($base$$Register, $cnt$$Register);
14770 if (tpc == nullptr) {
14771 ciEnv::current()->record_failure("CodeCache is full");
14772 return;
14773 }
14774 %}
14775
14776 ins_pipe(pipe_class_memory);
14777 %}
14778
14779 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, iRegL val, Universe dummy, rFlagsReg cr)
14780 %{
14781 predicate(((ClearArrayNode*)n)->word_copy_only());
14782 match(Set dummy (ClearArray (Binary cnt base) val));
14783 effect(USE_KILL cnt, USE_KILL base, KILL cr);
14784
14785 ins_cost(4 * INSN_COST);
14786 format %{ "ClearArray $cnt, $base, $val" %}
14787
14788 ins_encode %{
14789 __ fill_words($base$$Register, $cnt$$Register, $val$$Register);
14790 %}
14791
14792 ins_pipe(pipe_class_memory);
14793 %}
14794
14795 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 temp, Universe dummy, rFlagsReg cr)
14796 %{
14797 predicate((uint64_t)n->in(2)->get_long()
14798 < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord)
14799 && !((ClearArrayNode*)n)->word_copy_only());
14800 match(Set dummy (ClearArray cnt base));
14801 effect(TEMP temp, USE_KILL base, KILL cr);
14802
14803 ins_cost(4 * INSN_COST);
14804 format %{ "ClearArray $cnt, $base" %}
14805
14806 ins_encode %{
14807 address tpc = __ zero_words($base$$Register, (uint64_t)$cnt$$constant);
14808 if (tpc == nullptr) {
14809 ciEnv::current()->record_failure("CodeCache is full");
14810 return;
14811 }
14812 %}
14813
14814 ins_pipe(pipe_class_memory);
14815 %}
14816
14817 // ============================================================================
14818 // Overflow Math Instructions
14819
16110
16111 // Call Runtime Instruction
16112
16113 instruct CallLeafDirect(method meth)
16114 %{
16115 match(CallLeaf);
16116
16117 effect(USE meth);
16118
16119 ins_cost(CALL_COST);
16120
16121 format %{ "CALL, runtime leaf $meth" %}
16122
16123 ins_encode( aarch64_enc_java_to_runtime(meth) );
16124
16125 ins_pipe(pipe_class_call);
16126 %}
16127
16128 // Call Runtime Instruction
16129
16130 // entry point is null, target holds the address to call
16131 instruct CallLeafNoFPIndirect(iRegP target)
16132 %{
16133 predicate(n->as_Call()->entry_point() == nullptr);
16134
16135 match(CallLeafNoFP target);
16136
16137 ins_cost(CALL_COST);
16138
16139 format %{ "CALL, runtime leaf nofp indirect $target" %}
16140
16141 ins_encode %{
16142 __ blr($target$$Register);
16143 %}
16144
16145 ins_pipe(pipe_class_call);
16146 %}
16147
16148 instruct CallLeafNoFPDirect(method meth)
16149 %{
16150 predicate(n->as_Call()->entry_point() != nullptr);
16151
16152 match(CallLeafNoFP);
16153
16154 effect(USE meth);
16155
16156 ins_cost(CALL_COST);
16157
16158 format %{ "CALL, runtime leaf nofp $meth" %}
16159
16160 ins_encode( aarch64_enc_java_to_runtime(meth) );
16161
16162 ins_pipe(pipe_class_call);
16163 %}
16164
16165 // Tail Call; Jump from runtime stub to Java code.
16166 // Also known as an 'interprocedural jump'.
16167 // Target of jump will eventually return to caller.
16168 // TailJump below removes the return address.
16169 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_ptr)
16170 %{
16171 match(TailCall jump_target method_ptr);
|