1637
1638 int MachCallDynamicJavaNode::ret_addr_offset()
1639 {
1640 return 16; // movz, movk, movk, bl
1641 }
1642
1643 int MachCallRuntimeNode::ret_addr_offset() {
1644 // for generated stubs the call will be
1645 // bl(addr)
1646 // or with far branches
1647 // bl(trampoline_stub)
1648 // for real runtime callouts it will be six instructions
1649 // see aarch64_enc_java_to_runtime
1650 // adr(rscratch2, retaddr)
1651 // str(rscratch2, Address(rthread, JavaThread::last_Java_pc_offset()));
1652 // lea(rscratch1, RuntimeAddress(addr)
1653 // blr(rscratch1)
1654 CodeBlob *cb = CodeCache::find_blob(_entry_point);
1655 if (cb) {
1656 return 1 * NativeInstruction::instruction_size;
1657 } else {
1658 return 6 * NativeInstruction::instruction_size;
1659 }
1660 }
1661
1662 //=============================================================================
1663
1664 #ifndef PRODUCT
1665 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1666 st->print("BREAKPOINT");
1667 }
1668 #endif
1669
1670 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1671 __ brk(0);
1672 }
1673
1674 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
1675 return MachNode::size(ra_);
1676 }
1745 if (C->stub_function() == nullptr && BarrierSet::barrier_set()->barrier_set_nmethod() != nullptr) {
1746 st->print("\n\t");
1747 st->print("ldr rscratch1, [guard]\n\t");
1748 st->print("dmb ishld\n\t");
1749 st->print("ldr rscratch2, [rthread, #thread_disarmed_guard_value_offset]\n\t");
1750 st->print("cmp rscratch1, rscratch2\n\t");
1751 st->print("b.eq skip");
1752 st->print("\n\t");
1753 st->print("blr #nmethod_entry_barrier_stub\n\t");
1754 st->print("b skip\n\t");
1755 st->print("guard: int\n\t");
1756 st->print("\n\t");
1757 st->print("skip:\n\t");
1758 }
1759 }
1760 #endif
1761
1762 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1763 Compile* C = ra_->C;
1764
1765 // n.b. frame size includes space for return pc and rfp
1766 const int framesize = C->output()->frame_size_in_bytes();
1767
1768 // insert a nop at the start of the prolog so we can patch in a
1769 // branch if we need to invalidate the method later
1770 __ nop();
1771
1772 if (C->clinit_barrier_on_entry()) {
1773 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
1774
1775 Label L_skip_barrier;
1776
1777 __ mov_metadata(rscratch2, C->method()->holder()->constant_encoding());
1778 __ clinit_barrier(rscratch2, rscratch1, &L_skip_barrier);
1779 __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
1780 __ bind(L_skip_barrier);
1781 }
1782
1783 if (C->max_vector_size() > 0) {
1784 __ reinitialize_ptrue();
1785 }
1786
1787 int bangsize = C->output()->bang_size_in_bytes();
1788 if (C->output()->need_stack_bang(bangsize))
1789 __ generate_stack_overflow_check(bangsize);
1790
1791 __ build_frame(framesize);
1792
1793 if (C->stub_function() == nullptr) {
1794 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
1795 if (BarrierSet::barrier_set()->barrier_set_nmethod() != nullptr) {
1796 // Dummy labels for just measuring the code size
1797 Label dummy_slow_path;
1798 Label dummy_continuation;
1799 Label dummy_guard;
1800 Label* slow_path = &dummy_slow_path;
1801 Label* continuation = &dummy_continuation;
1802 Label* guard = &dummy_guard;
1803 if (!Compile::current()->output()->in_scratch_emit_size()) {
1804 // Use real labels from actual stub when not emitting code for the purpose of measuring its size
1805 C2EntryBarrierStub* stub = new (Compile::current()->comp_arena()) C2EntryBarrierStub();
1806 Compile::current()->output()->add_stub(stub);
1807 slow_path = &stub->entry();
1808 continuation = &stub->continuation();
1809 guard = &stub->guard();
1810 }
1811 // In the C2 code, we move the non-hot part of nmethod entry barriers out-of-line to a stub.
1812 bs->nmethod_entry_barrier(masm, slow_path, continuation, guard);
1813 }
1814 }
1815
1816 if (VerifyStackAtCalls) {
1817 Unimplemented();
1818 }
1819
1820 C->output()->set_frame_complete(__ offset());
1821
1822 if (C->has_mach_constant_base_node()) {
1823 // NOTE: We set the table base offset here because users might be
1824 // emitted before MachConstantBaseNode.
1825 ConstantTable& constant_table = C->output()->constant_table();
1826 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1827 }
1828 }
1829
1830 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1831 {
1832 return MachNode::size(ra_); // too many variables; just compute it
1833 // the hard way
1834 }
1835
1836 int MachPrologNode::reloc() const
1837 {
1838 return 0;
1839 }
1840
1841 //=============================================================================
1842
1843 #ifndef PRODUCT
1844 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1845 Compile* C = ra_->C;
1846 int framesize = C->output()->frame_slots() << LogBytesPerInt;
1847
1848 st->print("# pop frame %d\n\t",framesize);
1849
1850 if (framesize == 0) {
1851 st->print("ldp lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1852 } else if (framesize < ((1 << 9) + 2 * wordSize)) {
1853 st->print("ldp lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
1854 st->print("add sp, sp, #%d\n\t", framesize);
1855 } else {
1858 st->print("ldp lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1859 }
1860 if (VM_Version::use_rop_protection()) {
1861 st->print("autiaz\n\t");
1862 st->print("ldr zr, [lr]\n\t");
1863 }
1864
1865 if (do_polling() && C->is_method_compilation()) {
1866 st->print("# test polling word\n\t");
1867 st->print("ldr rscratch1, [rthread],#%d\n\t", in_bytes(JavaThread::polling_word_offset()));
1868 st->print("cmp sp, rscratch1\n\t");
1869 st->print("bhi #slow_path");
1870 }
1871 }
1872 #endif
1873
1874 void MachEpilogNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1875 Compile* C = ra_->C;
1876 int framesize = C->output()->frame_slots() << LogBytesPerInt;
1877
1878 __ remove_frame(framesize);
1879
1880 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1881 __ reserved_stack_check();
1882 }
1883
1884 if (do_polling() && C->is_method_compilation()) {
1885 Label dummy_label;
1886 Label* code_stub = &dummy_label;
1887 if (!C->output()->in_scratch_emit_size()) {
1888 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1889 C->output()->add_stub(stub);
1890 code_stub = &stub->entry();
1891 }
1892 __ relocate(relocInfo::poll_return_type);
1893 __ safepoint_poll(*code_stub, true /* at_return */, false /* acquire */, true /* in_nmethod */);
1894 }
1895 }
1896
1897 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
1898 // Variable size. Determine dynamically.
1899 return MachNode::size(ra_);
1900 }
1901
1902 int MachEpilogNode::reloc() const {
1903 // Return number of relocatable values contained in this instruction.
1904 return 1; // 1 for polling page.
1905 }
1906
1907 const Pipeline * MachEpilogNode::pipeline() const {
1908 return MachNode::pipeline_class();
1909 }
1910
1911 //=============================================================================
1912
1913 static enum RC rc_class(OptoReg::Name reg) {
1914
1915 if (reg == OptoReg::Bad) {
1916 return rc_bad;
1917 }
1918
1919 // we have 32 int registers * 2 halves
1920 int slots_of_int_registers = Register::number_of_registers * Register::max_slots_per_register;
1921
2177 void BoxLockNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
2178 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2179 int reg = ra_->get_encode(this);
2180
2181 // This add will handle any 24-bit signed offset. 24 bits allows an
2182 // 8 megabyte stack frame.
2183 __ add(as_Register(reg), sp, offset);
2184 }
2185
2186 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
2187 // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
2188 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2189
2190 if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
2191 return NativeInstruction::instruction_size;
2192 } else {
2193 return 2 * NativeInstruction::instruction_size;
2194 }
2195 }
2196
2197 //=============================================================================
2198
2199 #ifndef PRODUCT
2200 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2201 {
2202 st->print_cr("# MachUEPNode");
2203 if (UseCompressedClassPointers) {
2204 st->print_cr("\tldrw rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2205 st->print_cr("\tldrw r10, [rscratch2 + CompiledICData::speculated_klass_offset()]\t# compressed klass");
2206 st->print_cr("\tcmpw rscratch1, r10");
2207 } else {
2208 st->print_cr("\tldr rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2209 st->print_cr("\tldr r10, [rscratch2 + CompiledICData::speculated_klass_offset()]\t# compressed klass");
2210 st->print_cr("\tcmp rscratch1, r10");
2211 }
2212 st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
2213 }
2214 #endif
2215
2216 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2217 {
2218 __ ic_check(InteriorEntryAlignment);
2219 }
2220
2221 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
2222 {
2223 return MachNode::size(ra_);
2224 }
2225
2226 // REQUIRED EMIT CODE
2227
2228 //=============================================================================
2229
2230 // Emit exception handler code.
2231 int HandlerImpl::emit_exception_handler(C2_MacroAssembler* masm)
2232 {
2233 // mov rscratch1 #exception_blob_entry_point
2234 // br rscratch1
2235 // Note that the code buffer's insts_mark is always relative to insts.
2236 // That's why we must use the macroassembler to generate a handler.
2237 address base = __ start_a_stub(size_exception_handler());
2238 if (base == nullptr) {
2239 ciEnv::current()->record_failure("CodeCache is full");
2240 return 0; // CodeBuffer::expand failed
2241 }
2242 int offset = __ offset();
2243 __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
2244 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
2245 __ end_a_stub();
3678 %}
3679
3680 enc_class aarch64_enc_java_dynamic_call(method meth) %{
3681 int method_index = resolved_method_index(masm);
3682 address call = __ ic_call((address)$meth$$method, method_index);
3683 if (call == nullptr) {
3684 ciEnv::current()->record_failure("CodeCache is full");
3685 return;
3686 }
3687 __ post_call_nop();
3688 if (Compile::current()->max_vector_size() > 0) {
3689 __ reinitialize_ptrue();
3690 }
3691 %}
3692
3693 enc_class aarch64_enc_call_epilog() %{
3694 if (VerifyStackAtCalls) {
3695 // Check that stack depth is unchanged: find majik cookie on stack
3696 __ call_Unimplemented();
3697 }
3698 %}
3699
3700 enc_class aarch64_enc_java_to_runtime(method meth) %{
3701 // some calls to generated routines (arraycopy code) are scheduled
3702 // by C2 as runtime calls. if so we can call them using a br (they
3703 // will be in a reachable segment) otherwise we have to use a blr
3704 // which loads the absolute address into a register.
3705 address entry = (address)$meth$$method;
3706 CodeBlob *cb = CodeCache::find_blob(entry);
3707 if (cb) {
3708 address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
3709 if (call == nullptr) {
3710 ciEnv::current()->record_failure("CodeCache is full");
3711 return;
3712 }
3713 __ post_call_nop();
3714 } else {
3715 Label retaddr;
3716 // Make the anchor frame walkable
3717 __ adr(rscratch2, retaddr);
6773 instruct loadConL(iRegLNoSp dst, immL src)
6774 %{
6775 match(Set dst src);
6776
6777 ins_cost(INSN_COST);
6778 format %{ "mov $dst, $src\t# long" %}
6779
6780 ins_encode( aarch64_enc_mov_imm(dst, src) );
6781
6782 ins_pipe(ialu_imm);
6783 %}
6784
6785 // Load Pointer Constant
6786
6787 instruct loadConP(iRegPNoSp dst, immP con)
6788 %{
6789 match(Set dst con);
6790
6791 ins_cost(INSN_COST * 4);
6792 format %{
6793 "mov $dst, $con\t# ptr\n\t"
6794 %}
6795
6796 ins_encode(aarch64_enc_mov_p(dst, con));
6797
6798 ins_pipe(ialu_imm);
6799 %}
6800
6801 // Load Null Pointer Constant
6802
6803 instruct loadConP0(iRegPNoSp dst, immP0 con)
6804 %{
6805 match(Set dst con);
6806
6807 ins_cost(INSN_COST);
6808 format %{ "mov $dst, $con\t# nullptr ptr" %}
6809
6810 ins_encode(aarch64_enc_mov_p0(dst, con));
6811
6812 ins_pipe(ialu_imm);
6813 %}
7969 %}
7970
7971 // ============================================================================
7972 // Cast/Convert Instructions
7973
7974 instruct castX2P(iRegPNoSp dst, iRegL src) %{
7975 match(Set dst (CastX2P src));
7976
7977 ins_cost(INSN_COST);
7978 format %{ "mov $dst, $src\t# long -> ptr" %}
7979
7980 ins_encode %{
7981 if ($dst$$reg != $src$$reg) {
7982 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
7983 }
7984 %}
7985
7986 ins_pipe(ialu_reg);
7987 %}
7988
7989 instruct castP2X(iRegLNoSp dst, iRegP src) %{
7990 match(Set dst (CastP2X src));
7991
7992 ins_cost(INSN_COST);
7993 format %{ "mov $dst, $src\t# ptr -> long" %}
7994
7995 ins_encode %{
7996 if ($dst$$reg != $src$$reg) {
7997 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
7998 }
7999 %}
8000
8001 ins_pipe(ialu_reg);
8002 %}
8003
8004 // Convert oop into int for vectors alignment masking
8005 instruct convP2I(iRegINoSp dst, iRegP src) %{
8006 match(Set dst (ConvL2I (CastP2X src)));
8007
8008 ins_cost(INSN_COST);
14794
14795 match(Set dst (MoveL2D src));
14796
14797 effect(DEF dst, USE src);
14798
14799 ins_cost(INSN_COST);
14800
14801 format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
14802
14803 ins_encode %{
14804 __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
14805 %}
14806
14807 ins_pipe(fp_l2d);
14808
14809 %}
14810
14811 // ============================================================================
14812 // clearing of an array
14813
14814 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
14815 %{
14816 match(Set dummy (ClearArray cnt base));
14817 effect(USE_KILL cnt, USE_KILL base, KILL cr);
14818
14819 ins_cost(4 * INSN_COST);
14820 format %{ "ClearArray $cnt, $base" %}
14821
14822 ins_encode %{
14823 address tpc = __ zero_words($base$$Register, $cnt$$Register);
14824 if (tpc == nullptr) {
14825 ciEnv::current()->record_failure("CodeCache is full");
14826 return;
14827 }
14828 %}
14829
14830 ins_pipe(pipe_class_memory);
14831 %}
14832
14833 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 temp, Universe dummy, rFlagsReg cr)
14834 %{
14835 predicate((uint64_t)n->in(2)->get_long()
14836 < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
14837 match(Set dummy (ClearArray cnt base));
14838 effect(TEMP temp, USE_KILL base, KILL cr);
14839
14840 ins_cost(4 * INSN_COST);
14841 format %{ "ClearArray $cnt, $base" %}
14842
14843 ins_encode %{
14844 address tpc = __ zero_words($base$$Register, (uint64_t)$cnt$$constant);
14845 if (tpc == nullptr) {
14846 ciEnv::current()->record_failure("CodeCache is full");
14847 return;
14848 }
14849 %}
14850
14851 ins_pipe(pipe_class_memory);
14852 %}
14853
14854 // ============================================================================
14855 // Overflow Math Instructions
14856
16167 %}
16168
16169 // Call Runtime Instruction without safepoint and with vector arguments
16170 instruct CallLeafDirectVector(method meth)
16171 %{
16172 match(CallLeafVector);
16173
16174 effect(USE meth);
16175
16176 ins_cost(CALL_COST);
16177
16178 format %{ "CALL, runtime leaf vector $meth" %}
16179
16180 ins_encode(aarch64_enc_java_to_runtime(meth));
16181
16182 ins_pipe(pipe_class_call);
16183 %}
16184
16185 // Call Runtime Instruction
16186
16187 instruct CallLeafNoFPDirect(method meth)
16188 %{
16189 match(CallLeafNoFP);
16190
16191 effect(USE meth);
16192
16193 ins_cost(CALL_COST);
16194
16195 format %{ "CALL, runtime leaf nofp $meth" %}
16196
16197 ins_encode( aarch64_enc_java_to_runtime(meth) );
16198
16199 ins_pipe(pipe_class_call);
16200 %}
16201
16202 // Tail Call; Jump from runtime stub to Java code.
16203 // Also known as an 'interprocedural jump'.
16204 // Target of jump will eventually return to caller.
16205 // TailJump below removes the return address.
16206 // Don't use rfp for 'jump_target' because a MachEpilogNode has already been
16207 // emitted just above the TailCall which has reset rfp to the caller state.
16208 instruct TailCalljmpInd(iRegPNoSpNoRfp jump_target, inline_cache_RegP method_ptr)
|
1637
1638 int MachCallDynamicJavaNode::ret_addr_offset()
1639 {
1640 return 16; // movz, movk, movk, bl
1641 }
1642
1643 int MachCallRuntimeNode::ret_addr_offset() {
1644 // for generated stubs the call will be
1645 // bl(addr)
1646 // or with far branches
1647 // bl(trampoline_stub)
1648 // for real runtime callouts it will be six instructions
1649 // see aarch64_enc_java_to_runtime
1650 // adr(rscratch2, retaddr)
1651 // str(rscratch2, Address(rthread, JavaThread::last_Java_pc_offset()));
1652 // lea(rscratch1, RuntimeAddress(addr)
1653 // blr(rscratch1)
1654 CodeBlob *cb = CodeCache::find_blob(_entry_point);
1655 if (cb) {
1656 return 1 * NativeInstruction::instruction_size;
1657 } else if (_entry_point == nullptr) {
1658 // See CallLeafNoFPIndirect
1659 return 1 * NativeInstruction::instruction_size;
1660 } else {
1661 return 6 * NativeInstruction::instruction_size;
1662 }
1663 }
1664
1665 //=============================================================================
1666
1667 #ifndef PRODUCT
1668 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1669 st->print("BREAKPOINT");
1670 }
1671 #endif
1672
1673 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1674 __ brk(0);
1675 }
1676
1677 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
1678 return MachNode::size(ra_);
1679 }
1748 if (C->stub_function() == nullptr && BarrierSet::barrier_set()->barrier_set_nmethod() != nullptr) {
1749 st->print("\n\t");
1750 st->print("ldr rscratch1, [guard]\n\t");
1751 st->print("dmb ishld\n\t");
1752 st->print("ldr rscratch2, [rthread, #thread_disarmed_guard_value_offset]\n\t");
1753 st->print("cmp rscratch1, rscratch2\n\t");
1754 st->print("b.eq skip");
1755 st->print("\n\t");
1756 st->print("blr #nmethod_entry_barrier_stub\n\t");
1757 st->print("b skip\n\t");
1758 st->print("guard: int\n\t");
1759 st->print("\n\t");
1760 st->print("skip:\n\t");
1761 }
1762 }
1763 #endif
1764
1765 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1766 Compile* C = ra_->C;
1767
1768 // insert a nop at the start of the prolog so we can patch in a
1769 // branch if we need to invalidate the method later
1770 __ nop();
1771
1772 __ verified_entry(C, 0);
1773
1774 if (C->stub_function() == nullptr) {
1775 __ entry_barrier();
1776 }
1777
1778 if (!Compile::current()->output()->in_scratch_emit_size()) {
1779 __ bind(*_verified_entry);
1780 }
1781
1782 if (VerifyStackAtCalls) {
1783 Unimplemented();
1784 }
1785
1786 C->output()->set_frame_complete(__ offset());
1787
1788 if (C->has_mach_constant_base_node()) {
1789 // NOTE: We set the table base offset here because users might be
1790 // emitted before MachConstantBaseNode.
1791 ConstantTable& constant_table = C->output()->constant_table();
1792 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1793 }
1794 }
1795
1796 int MachPrologNode::reloc() const
1797 {
1798 return 0;
1799 }
1800
1801 //=============================================================================
1802
1803 #ifndef PRODUCT
1804 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1805 Compile* C = ra_->C;
1806 int framesize = C->output()->frame_slots() << LogBytesPerInt;
1807
1808 st->print("# pop frame %d\n\t",framesize);
1809
1810 if (framesize == 0) {
1811 st->print("ldp lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1812 } else if (framesize < ((1 << 9) + 2 * wordSize)) {
1813 st->print("ldp lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
1814 st->print("add sp, sp, #%d\n\t", framesize);
1815 } else {
1818 st->print("ldp lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1819 }
1820 if (VM_Version::use_rop_protection()) {
1821 st->print("autiaz\n\t");
1822 st->print("ldr zr, [lr]\n\t");
1823 }
1824
1825 if (do_polling() && C->is_method_compilation()) {
1826 st->print("# test polling word\n\t");
1827 st->print("ldr rscratch1, [rthread],#%d\n\t", in_bytes(JavaThread::polling_word_offset()));
1828 st->print("cmp sp, rscratch1\n\t");
1829 st->print("bhi #slow_path");
1830 }
1831 }
1832 #endif
1833
1834 void MachEpilogNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1835 Compile* C = ra_->C;
1836 int framesize = C->output()->frame_slots() << LogBytesPerInt;
1837
1838 __ remove_frame(framesize, C->needs_stack_repair());
1839
1840 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1841 __ reserved_stack_check();
1842 }
1843
1844 if (do_polling() && C->is_method_compilation()) {
1845 Label dummy_label;
1846 Label* code_stub = &dummy_label;
1847 if (!C->output()->in_scratch_emit_size()) {
1848 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1849 C->output()->add_stub(stub);
1850 code_stub = &stub->entry();
1851 }
1852 __ relocate(relocInfo::poll_return_type);
1853 __ safepoint_poll(*code_stub, true /* at_return */, false /* acquire */, true /* in_nmethod */);
1854 }
1855 }
1856
1857 int MachEpilogNode::reloc() const {
1858 // Return number of relocatable values contained in this instruction.
1859 return 1; // 1 for polling page.
1860 }
1861
1862 const Pipeline * MachEpilogNode::pipeline() const {
1863 return MachNode::pipeline_class();
1864 }
1865
1866 //=============================================================================
1867
1868 static enum RC rc_class(OptoReg::Name reg) {
1869
1870 if (reg == OptoReg::Bad) {
1871 return rc_bad;
1872 }
1873
1874 // we have 32 int registers * 2 halves
1875 int slots_of_int_registers = Register::number_of_registers * Register::max_slots_per_register;
1876
2132 void BoxLockNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
2133 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2134 int reg = ra_->get_encode(this);
2135
2136 // This add will handle any 24-bit signed offset. 24 bits allows an
2137 // 8 megabyte stack frame.
2138 __ add(as_Register(reg), sp, offset);
2139 }
2140
2141 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
2142 // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
2143 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2144
2145 if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
2146 return NativeInstruction::instruction_size;
2147 } else {
2148 return 2 * NativeInstruction::instruction_size;
2149 }
2150 }
2151
2152 ///=============================================================================
2153 #ifndef PRODUCT
2154 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2155 {
2156 st->print_cr("# MachVEPNode");
2157 if (!_verified) {
2158 st->print_cr("\t load_class");
2159 } else {
2160 st->print_cr("\t unpack_inline_arg");
2161 }
2162 }
2163 #endif
2164
2165 void MachVEPNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const
2166 {
2167 if (!_verified) {
2168 __ ic_check(1);
2169 } else {
2170 // insert a nop at the start of the prolog so we can patch in a
2171 // branch if we need to invalidate the method later
2172 __ nop();
2173
2174 // TODO 8284443 Avoid creation of temporary frame
2175 if (ra_->C->stub_function() == nullptr) {
2176 __ verified_entry(ra_->C, 0);
2177 __ entry_barrier();
2178 int framesize = ra_->C->output()->frame_slots() << LogBytesPerInt;
2179 __ remove_frame(framesize, false);
2180 }
2181 // Unpack inline type args passed as oop and then jump to
2182 // the verified entry point (skipping the unverified entry).
2183 int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
2184 // Emit code for verified entry and save increment for stack repair on return
2185 __ verified_entry(ra_->C, sp_inc);
2186 if (Compile::current()->output()->in_scratch_emit_size()) {
2187 Label dummy_verified_entry;
2188 __ b(dummy_verified_entry);
2189 } else {
2190 __ b(*_verified_entry);
2191 }
2192 }
2193 }
2194
2195 //=============================================================================
2196 #ifndef PRODUCT
2197 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2198 {
2199 st->print_cr("# MachUEPNode");
2200 if (UseCompressedClassPointers) {
2201 st->print_cr("\tldrw rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2202 st->print_cr("\tldrw r10, [rscratch2 + CompiledICData::speculated_klass_offset()]\t# compressed klass");
2203 st->print_cr("\tcmpw rscratch1, r10");
2204 } else {
2205 st->print_cr("\tldr rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2206 st->print_cr("\tldr r10, [rscratch2 + CompiledICData::speculated_klass_offset()]\t# compressed klass");
2207 st->print_cr("\tcmp rscratch1, r10");
2208 }
2209 st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
2210 }
2211 #endif
2212
2213 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2214 {
2215 __ ic_check(InteriorEntryAlignment);
2216 }
2217
2218 // REQUIRED EMIT CODE
2219
2220 //=============================================================================
2221
2222 // Emit exception handler code.
2223 int HandlerImpl::emit_exception_handler(C2_MacroAssembler* masm)
2224 {
2225 // mov rscratch1 #exception_blob_entry_point
2226 // br rscratch1
2227 // Note that the code buffer's insts_mark is always relative to insts.
2228 // That's why we must use the macroassembler to generate a handler.
2229 address base = __ start_a_stub(size_exception_handler());
2230 if (base == nullptr) {
2231 ciEnv::current()->record_failure("CodeCache is full");
2232 return 0; // CodeBuffer::expand failed
2233 }
2234 int offset = __ offset();
2235 __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
2236 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
2237 __ end_a_stub();
3670 %}
3671
3672 enc_class aarch64_enc_java_dynamic_call(method meth) %{
3673 int method_index = resolved_method_index(masm);
3674 address call = __ ic_call((address)$meth$$method, method_index);
3675 if (call == nullptr) {
3676 ciEnv::current()->record_failure("CodeCache is full");
3677 return;
3678 }
3679 __ post_call_nop();
3680 if (Compile::current()->max_vector_size() > 0) {
3681 __ reinitialize_ptrue();
3682 }
3683 %}
3684
3685 enc_class aarch64_enc_call_epilog() %{
3686 if (VerifyStackAtCalls) {
3687 // Check that stack depth is unchanged: find majik cookie on stack
3688 __ call_Unimplemented();
3689 }
3690 if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic()) {
3691 // The last return value is not set by the callee but used to pass IsInit information to compiled code.
3692 // Search for the corresponding projection, get the register and emit code that initialized it.
3693 uint con = (tf()->range_cc()->cnt() - 1);
3694 for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
3695 ProjNode* proj = fast_out(i)->as_Proj();
3696 if (proj->_con == con) {
3697 // Set IsInit if r0 is non-null (a non-null value is returned buffered or scalarized)
3698 OptoReg::Name optoReg = ra_->get_reg_first(proj);
3699 VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
3700 Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
3701 __ cmp(r0, zr);
3702 __ cset(toReg, Assembler::NE);
3703 if (reg->is_stack()) {
3704 int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
3705 __ str(toReg, Address(sp, st_off));
3706 }
3707 break;
3708 }
3709 }
3710 if (return_value_is_used()) {
3711 // An inline type is returned as fields in multiple registers.
3712 // R0 either contains an oop if the inline type is buffered or a pointer
3713 // to the corresponding InlineKlass with the lowest bit set to 1. Zero r0
3714 // if the lowest bit is set to allow C2 to use the oop after null checking.
3715 // r0 &= (r0 & 1) - 1
3716 __ andr(rscratch1, r0, 0x1);
3717 __ sub(rscratch1, rscratch1, 0x1);
3718 __ andr(r0, r0, rscratch1);
3719 }
3720 }
3721 %}
3722
3723 enc_class aarch64_enc_java_to_runtime(method meth) %{
3724 // some calls to generated routines (arraycopy code) are scheduled
3725 // by C2 as runtime calls. if so we can call them using a br (they
3726 // will be in a reachable segment) otherwise we have to use a blr
3727 // which loads the absolute address into a register.
3728 address entry = (address)$meth$$method;
3729 CodeBlob *cb = CodeCache::find_blob(entry);
3730 if (cb) {
3731 address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
3732 if (call == nullptr) {
3733 ciEnv::current()->record_failure("CodeCache is full");
3734 return;
3735 }
3736 __ post_call_nop();
3737 } else {
3738 Label retaddr;
3739 // Make the anchor frame walkable
3740 __ adr(rscratch2, retaddr);
6796 instruct loadConL(iRegLNoSp dst, immL src)
6797 %{
6798 match(Set dst src);
6799
6800 ins_cost(INSN_COST);
6801 format %{ "mov $dst, $src\t# long" %}
6802
6803 ins_encode( aarch64_enc_mov_imm(dst, src) );
6804
6805 ins_pipe(ialu_imm);
6806 %}
6807
6808 // Load Pointer Constant
6809
6810 instruct loadConP(iRegPNoSp dst, immP con)
6811 %{
6812 match(Set dst con);
6813
6814 ins_cost(INSN_COST * 4);
6815 format %{
6816 "mov $dst, $con\t# ptr"
6817 %}
6818
6819 ins_encode(aarch64_enc_mov_p(dst, con));
6820
6821 ins_pipe(ialu_imm);
6822 %}
6823
6824 // Load Null Pointer Constant
6825
6826 instruct loadConP0(iRegPNoSp dst, immP0 con)
6827 %{
6828 match(Set dst con);
6829
6830 ins_cost(INSN_COST);
6831 format %{ "mov $dst, $con\t# nullptr ptr" %}
6832
6833 ins_encode(aarch64_enc_mov_p0(dst, con));
6834
6835 ins_pipe(ialu_imm);
6836 %}
7992 %}
7993
7994 // ============================================================================
7995 // Cast/Convert Instructions
7996
7997 instruct castX2P(iRegPNoSp dst, iRegL src) %{
7998 match(Set dst (CastX2P src));
7999
8000 ins_cost(INSN_COST);
8001 format %{ "mov $dst, $src\t# long -> ptr" %}
8002
8003 ins_encode %{
8004 if ($dst$$reg != $src$$reg) {
8005 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8006 }
8007 %}
8008
8009 ins_pipe(ialu_reg);
8010 %}
8011
8012 instruct castI2N(iRegNNoSp dst, iRegI src) %{
8013 match(Set dst (CastI2N src));
8014
8015 ins_cost(INSN_COST);
8016 format %{ "mov $dst, $src\t# int -> narrow ptr" %}
8017
8018 ins_encode %{
8019 if ($dst$$reg != $src$$reg) {
8020 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8021 }
8022 %}
8023
8024 ins_pipe(ialu_reg);
8025 %}
8026
8027 instruct castN2X(iRegLNoSp dst, iRegN src) %{
8028 match(Set dst (CastP2X src));
8029
8030 ins_cost(INSN_COST);
8031 format %{ "mov $dst, $src\t# ptr -> long" %}
8032
8033 ins_encode %{
8034 if ($dst$$reg != $src$$reg) {
8035 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8036 }
8037 %}
8038
8039 ins_pipe(ialu_reg);
8040 %}
8041
8042 instruct castP2X(iRegLNoSp dst, iRegP src) %{
8043 match(Set dst (CastP2X src));
8044
8045 ins_cost(INSN_COST);
8046 format %{ "mov $dst, $src\t# ptr -> long" %}
8047
8048 ins_encode %{
8049 if ($dst$$reg != $src$$reg) {
8050 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8051 }
8052 %}
8053
8054 ins_pipe(ialu_reg);
8055 %}
8056
8057 // Convert oop into int for vectors alignment masking
8058 instruct convP2I(iRegINoSp dst, iRegP src) %{
8059 match(Set dst (ConvL2I (CastP2X src)));
8060
8061 ins_cost(INSN_COST);
14847
14848 match(Set dst (MoveL2D src));
14849
14850 effect(DEF dst, USE src);
14851
14852 ins_cost(INSN_COST);
14853
14854 format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
14855
14856 ins_encode %{
14857 __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
14858 %}
14859
14860 ins_pipe(fp_l2d);
14861
14862 %}
14863
14864 // ============================================================================
14865 // clearing of an array
14866
14867 instruct clearArray_reg_reg_immL0(iRegL_R11 cnt, iRegP_R10 base, immL0 zero, Universe dummy, rFlagsReg cr)
14868 %{
14869 match(Set dummy (ClearArray (Binary cnt base) zero));
14870 effect(USE_KILL cnt, USE_KILL base, KILL cr);
14871
14872 ins_cost(4 * INSN_COST);
14873 format %{ "ClearArray $cnt, $base" %}
14874
14875 ins_encode %{
14876 address tpc = __ zero_words($base$$Register, $cnt$$Register);
14877 if (tpc == nullptr) {
14878 ciEnv::current()->record_failure("CodeCache is full");
14879 return;
14880 }
14881 %}
14882
14883 ins_pipe(pipe_class_memory);
14884 %}
14885
14886 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, iRegL val, Universe dummy, rFlagsReg cr)
14887 %{
14888 predicate(((ClearArrayNode*)n)->word_copy_only());
14889 match(Set dummy (ClearArray (Binary cnt base) val));
14890 effect(USE_KILL cnt, USE_KILL base, KILL cr);
14891
14892 ins_cost(4 * INSN_COST);
14893 format %{ "ClearArray $cnt, $base, $val" %}
14894
14895 ins_encode %{
14896 __ fill_words($base$$Register, $cnt$$Register, $val$$Register);
14897 %}
14898
14899 ins_pipe(pipe_class_memory);
14900 %}
14901
14902 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 temp, Universe dummy, rFlagsReg cr)
14903 %{
14904 predicate((uint64_t)n->in(2)->get_long()
14905 < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord)
14906 && !((ClearArrayNode*)n)->word_copy_only());
14907 match(Set dummy (ClearArray cnt base));
14908 effect(TEMP temp, USE_KILL base, KILL cr);
14909
14910 ins_cost(4 * INSN_COST);
14911 format %{ "ClearArray $cnt, $base" %}
14912
14913 ins_encode %{
14914 address tpc = __ zero_words($base$$Register, (uint64_t)$cnt$$constant);
14915 if (tpc == nullptr) {
14916 ciEnv::current()->record_failure("CodeCache is full");
14917 return;
14918 }
14919 %}
14920
14921 ins_pipe(pipe_class_memory);
14922 %}
14923
14924 // ============================================================================
14925 // Overflow Math Instructions
14926
16237 %}
16238
16239 // Call Runtime Instruction without safepoint and with vector arguments
16240 instruct CallLeafDirectVector(method meth)
16241 %{
16242 match(CallLeafVector);
16243
16244 effect(USE meth);
16245
16246 ins_cost(CALL_COST);
16247
16248 format %{ "CALL, runtime leaf vector $meth" %}
16249
16250 ins_encode(aarch64_enc_java_to_runtime(meth));
16251
16252 ins_pipe(pipe_class_call);
16253 %}
16254
16255 // Call Runtime Instruction
16256
16257 // entry point is null, target holds the address to call
16258 instruct CallLeafNoFPIndirect(iRegP target)
16259 %{
16260 predicate(n->as_Call()->entry_point() == nullptr);
16261
16262 match(CallLeafNoFP target);
16263
16264 ins_cost(CALL_COST);
16265
16266 format %{ "CALL, runtime leaf nofp indirect $target" %}
16267
16268 ins_encode %{
16269 __ blr($target$$Register);
16270 %}
16271
16272 ins_pipe(pipe_class_call);
16273 %}
16274
16275 instruct CallLeafNoFPDirect(method meth)
16276 %{
16277 predicate(n->as_Call()->entry_point() != nullptr);
16278
16279 match(CallLeafNoFP);
16280
16281 effect(USE meth);
16282
16283 ins_cost(CALL_COST);
16284
16285 format %{ "CALL, runtime leaf nofp $meth" %}
16286
16287 ins_encode( aarch64_enc_java_to_runtime(meth) );
16288
16289 ins_pipe(pipe_class_call);
16290 %}
16291
16292 // Tail Call; Jump from runtime stub to Java code.
16293 // Also known as an 'interprocedural jump'.
16294 // Target of jump will eventually return to caller.
16295 // TailJump below removes the return address.
16296 // Don't use rfp for 'jump_target' because a MachEpilogNode has already been
16297 // emitted just above the TailCall which has reset rfp to the caller state.
16298 instruct TailCalljmpInd(iRegPNoSpNoRfp jump_target, inline_cache_RegP method_ptr)
|