1677
1678 int MachCallDynamicJavaNode::ret_addr_offset()
1679 {
1680 return 16; // movz, movk, movk, bl
1681 }
1682
1683 int MachCallRuntimeNode::ret_addr_offset() {
1684 // for generated stubs the call will be
1685 // bl(addr)
1686 // or with far branches
1687 // bl(trampoline_stub)
1688 // for real runtime callouts it will be six instructions
1689 // see aarch64_enc_java_to_runtime
1690 // adr(rscratch2, retaddr)
1691 // str(rscratch2, Address(rthread, JavaThread::last_Java_pc_offset()));
1692 // lea(rscratch1, RuntimeAddress(addr)
1693 // blr(rscratch1)
1694 CodeBlob *cb = CodeCache::find_blob(_entry_point);
1695 if (cb) {
1696 return 1 * NativeInstruction::instruction_size;
1697 } else {
1698 return 6 * NativeInstruction::instruction_size;
1699 }
1700 }
1701
1702 //=============================================================================
1703
1704 #ifndef PRODUCT
1705 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1706 st->print("BREAKPOINT");
1707 }
1708 #endif
1709
1710 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1711 __ brk(0);
1712 }
1713
1714 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
1715 return MachNode::size(ra_);
1716 }
1785 if (C->stub_function() == nullptr) {
1786 st->print("\n\t");
1787 st->print("ldr rscratch1, [guard]\n\t");
1788 st->print("dmb ishld\n\t");
1789 st->print("ldr rscratch2, [rthread, #thread_disarmed_guard_value_offset]\n\t");
1790 st->print("cmp rscratch1, rscratch2\n\t");
1791 st->print("b.eq skip");
1792 st->print("\n\t");
1793 st->print("blr #nmethod_entry_barrier_stub\n\t");
1794 st->print("b skip\n\t");
1795 st->print("guard: int\n\t");
1796 st->print("\n\t");
1797 st->print("skip:\n\t");
1798 }
1799 }
1800 #endif
1801
1802 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1803 Compile* C = ra_->C;
1804
1805 // n.b. frame size includes space for return pc and rfp
1806 const int framesize = C->output()->frame_size_in_bytes();
1807
1808 if (C->clinit_barrier_on_entry()) {
1809 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
1810
1811 Label L_skip_barrier;
1812
1813 __ mov_metadata(rscratch2, C->method()->holder()->constant_encoding());
1814 __ clinit_barrier(rscratch2, rscratch1, &L_skip_barrier);
1815 __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
1816 __ bind(L_skip_barrier);
1817 }
1818
1819 if (C->max_vector_size() > 0) {
1820 __ reinitialize_ptrue();
1821 }
1822
1823 int bangsize = C->output()->bang_size_in_bytes();
1824 if (C->output()->need_stack_bang(bangsize))
1825 __ generate_stack_overflow_check(bangsize);
1826
1827 __ build_frame(framesize);
1828
1829 if (C->stub_function() == nullptr) {
1830 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
1831 // Dummy labels for just measuring the code size
1832 Label dummy_slow_path;
1833 Label dummy_continuation;
1834 Label dummy_guard;
1835 Label* slow_path = &dummy_slow_path;
1836 Label* continuation = &dummy_continuation;
1837 Label* guard = &dummy_guard;
1838 if (!Compile::current()->output()->in_scratch_emit_size()) {
1839 // Use real labels from actual stub when not emitting code for the purpose of measuring its size
1840 C2EntryBarrierStub* stub = new (Compile::current()->comp_arena()) C2EntryBarrierStub();
1841 Compile::current()->output()->add_stub(stub);
1842 slow_path = &stub->entry();
1843 continuation = &stub->continuation();
1844 guard = &stub->guard();
1845 }
1846 // In the C2 code, we move the non-hot part of nmethod entry barriers out-of-line to a stub.
1847 bs->nmethod_entry_barrier(masm, slow_path, continuation, guard);
1848 }
1849
1850 if (VerifyStackAtCalls) {
1851 Unimplemented();
1852 }
1853
1854 C->output()->set_frame_complete(__ offset());
1855
1856 if (C->has_mach_constant_base_node()) {
1857 // NOTE: We set the table base offset here because users might be
1858 // emitted before MachConstantBaseNode.
1859 ConstantTable& constant_table = C->output()->constant_table();
1860 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1861 }
1862 }
1863
1864 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1865 {
1866 return MachNode::size(ra_); // too many variables; just compute it
1867 // the hard way
1868 }
1869
1870 int MachPrologNode::reloc() const
1871 {
1872 return 0;
1873 }
1874
1875 //=============================================================================
1876
1877 #ifndef PRODUCT
1878 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1879 Compile* C = ra_->C;
1880 int framesize = C->output()->frame_slots() << LogBytesPerInt;
1881
1882 st->print("# pop frame %d\n\t",framesize);
1883
1884 if (framesize == 0) {
1885 st->print("ldp lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1886 } else if (framesize < ((1 << 9) + 2 * wordSize)) {
1887 st->print("ldp lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
1888 st->print("add sp, sp, #%d\n\t", framesize);
1889 } else {
1892 st->print("ldp lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1893 }
1894 if (VM_Version::use_rop_protection()) {
1895 st->print("autiaz\n\t");
1896 st->print("ldr zr, [lr]\n\t");
1897 }
1898
1899 if (do_polling() && C->is_method_compilation()) {
1900 st->print("# test polling word\n\t");
1901 st->print("ldr rscratch1, [rthread],#%d\n\t", in_bytes(JavaThread::polling_word_offset()));
1902 st->print("cmp sp, rscratch1\n\t");
1903 st->print("bhi #slow_path");
1904 }
1905 }
1906 #endif
1907
1908 void MachEpilogNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1909 Compile* C = ra_->C;
1910 int framesize = C->output()->frame_slots() << LogBytesPerInt;
1911
1912 __ remove_frame(framesize);
1913
1914 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1915 __ reserved_stack_check();
1916 }
1917
1918 if (do_polling() && C->is_method_compilation()) {
1919 Label dummy_label;
1920 Label* code_stub = &dummy_label;
1921 if (!C->output()->in_scratch_emit_size()) {
1922 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1923 C->output()->add_stub(stub);
1924 code_stub = &stub->entry();
1925 }
1926 __ relocate(relocInfo::poll_return_type);
1927 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
1928 }
1929 }
1930
1931 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
1932 // Variable size. Determine dynamically.
1933 return MachNode::size(ra_);
1934 }
1935
1936 int MachEpilogNode::reloc() const {
1937 // Return number of relocatable values contained in this instruction.
1938 return 1; // 1 for polling page.
1939 }
1940
1941 const Pipeline * MachEpilogNode::pipeline() const {
1942 return MachNode::pipeline_class();
1943 }
1944
1945 //=============================================================================
1946
1947 static enum RC rc_class(OptoReg::Name reg) {
1948
1949 if (reg == OptoReg::Bad) {
1950 return rc_bad;
1951 }
1952
1953 // we have 32 int registers * 2 halves
1954 int slots_of_int_registers = Register::number_of_registers * Register::max_slots_per_register;
1955
2211 void BoxLockNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
2212 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2213 int reg = ra_->get_encode(this);
2214
2215 // This add will handle any 24-bit signed offset. 24 bits allows an
2216 // 8 megabyte stack frame.
2217 __ add(as_Register(reg), sp, offset);
2218 }
2219
2220 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
2221 // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
2222 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2223
2224 if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
2225 return NativeInstruction::instruction_size;
2226 } else {
2227 return 2 * NativeInstruction::instruction_size;
2228 }
2229 }
2230
2231 //=============================================================================
2232
2233 #ifndef PRODUCT
2234 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2235 {
2236 st->print_cr("# MachUEPNode");
2237 if (UseCompressedClassPointers) {
2238 st->print_cr("\tldrw rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2239 st->print_cr("\tldrw r10, [rscratch2 + CompiledICData::speculated_klass_offset()]\t# compressed klass");
2240 st->print_cr("\tcmpw rscratch1, r10");
2241 } else {
2242 st->print_cr("\tldr rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2243 st->print_cr("\tldr r10, [rscratch2 + CompiledICData::speculated_klass_offset()]\t# compressed klass");
2244 st->print_cr("\tcmp rscratch1, r10");
2245 }
2246 st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
2247 }
2248 #endif
2249
2250 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2251 {
2252 __ ic_check(InteriorEntryAlignment);
2253 }
2254
2255 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
2256 {
2257 return MachNode::size(ra_);
2258 }
2259
2260 // REQUIRED EMIT CODE
2261
2262 //=============================================================================
2263
2264 // Emit exception handler code.
2265 int HandlerImpl::emit_exception_handler(C2_MacroAssembler* masm)
2266 {
2267 // mov rscratch1 #exception_blob_entry_point
2268 // br rscratch1
2269 // Note that the code buffer's insts_mark is always relative to insts.
2270 // That's why we must use the macroassembler to generate a handler.
2271 address base = __ start_a_stub(size_exception_handler());
2272 if (base == nullptr) {
2273 ciEnv::current()->record_failure("CodeCache is full");
2274 return 0; // CodeBuffer::expand failed
2275 }
2276 int offset = __ offset();
2277 __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
2278 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
2279 __ end_a_stub();
3741 %}
3742
3743 enc_class aarch64_enc_java_dynamic_call(method meth) %{
3744 int method_index = resolved_method_index(masm);
3745 address call = __ ic_call((address)$meth$$method, method_index);
3746 if (call == nullptr) {
3747 ciEnv::current()->record_failure("CodeCache is full");
3748 return;
3749 }
3750 __ post_call_nop();
3751 if (Compile::current()->max_vector_size() > 0) {
3752 __ reinitialize_ptrue();
3753 }
3754 %}
3755
3756 enc_class aarch64_enc_call_epilog() %{
3757 if (VerifyStackAtCalls) {
3758 // Check that stack depth is unchanged: find majik cookie on stack
3759 __ call_Unimplemented();
3760 }
3761 %}
3762
3763 enc_class aarch64_enc_java_to_runtime(method meth) %{
3764 // some calls to generated routines (arraycopy code) are scheduled
3765 // by C2 as runtime calls. if so we can call them using a br (they
3766 // will be in a reachable segment) otherwise we have to use a blr
3767 // which loads the absolute address into a register.
3768 address entry = (address)$meth$$method;
3769 CodeBlob *cb = CodeCache::find_blob(entry);
3770 if (cb) {
3771 address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
3772 if (call == nullptr) {
3773 ciEnv::current()->record_failure("CodeCache is full");
3774 return;
3775 }
3776 __ post_call_nop();
3777 } else {
3778 Label retaddr;
3779 // Make the anchor frame walkable
3780 __ adr(rscratch2, retaddr);
6922 instruct loadConL(iRegLNoSp dst, immL src)
6923 %{
6924 match(Set dst src);
6925
6926 ins_cost(INSN_COST);
6927 format %{ "mov $dst, $src\t# long" %}
6928
6929 ins_encode( aarch64_enc_mov_imm(dst, src) );
6930
6931 ins_pipe(ialu_imm);
6932 %}
6933
6934 // Load Pointer Constant
6935
6936 instruct loadConP(iRegPNoSp dst, immP con)
6937 %{
6938 match(Set dst con);
6939
6940 ins_cost(INSN_COST * 4);
6941 format %{
6942 "mov $dst, $con\t# ptr\n\t"
6943 %}
6944
6945 ins_encode(aarch64_enc_mov_p(dst, con));
6946
6947 ins_pipe(ialu_imm);
6948 %}
6949
6950 // Load Null Pointer Constant
6951
6952 instruct loadConP0(iRegPNoSp dst, immP0 con)
6953 %{
6954 match(Set dst con);
6955
6956 ins_cost(INSN_COST);
6957 format %{ "mov $dst, $con\t# nullptr ptr" %}
6958
6959 ins_encode(aarch64_enc_mov_p0(dst, con));
6960
6961 ins_pipe(ialu_imm);
6962 %}
8115 %}
8116
8117 // ============================================================================
8118 // Cast/Convert Instructions
8119
8120 instruct castX2P(iRegPNoSp dst, iRegL src) %{
8121 match(Set dst (CastX2P src));
8122
8123 ins_cost(INSN_COST);
8124 format %{ "mov $dst, $src\t# long -> ptr" %}
8125
8126 ins_encode %{
8127 if ($dst$$reg != $src$$reg) {
8128 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8129 }
8130 %}
8131
8132 ins_pipe(ialu_reg);
8133 %}
8134
8135 instruct castP2X(iRegLNoSp dst, iRegP src) %{
8136 match(Set dst (CastP2X src));
8137
8138 ins_cost(INSN_COST);
8139 format %{ "mov $dst, $src\t# ptr -> long" %}
8140
8141 ins_encode %{
8142 if ($dst$$reg != $src$$reg) {
8143 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8144 }
8145 %}
8146
8147 ins_pipe(ialu_reg);
8148 %}
8149
8150 // Convert oop into int for vectors alignment masking
8151 instruct convP2I(iRegINoSp dst, iRegP src) %{
8152 match(Set dst (ConvL2I (CastP2X src)));
8153
8154 ins_cost(INSN_COST);
15068
15069 match(Set dst (MoveL2D src));
15070
15071 effect(DEF dst, USE src);
15072
15073 ins_cost(INSN_COST);
15074
15075 format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
15076
15077 ins_encode %{
15078 __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
15079 %}
15080
15081 ins_pipe(fp_l2d);
15082
15083 %}
15084
15085 // ============================================================================
15086 // clearing of an array
15087
15088 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
15089 %{
15090 match(Set dummy (ClearArray cnt base));
15091 effect(USE_KILL cnt, USE_KILL base, KILL cr);
15092
15093 ins_cost(4 * INSN_COST);
15094 format %{ "ClearArray $cnt, $base" %}
15095
15096 ins_encode %{
15097 address tpc = __ zero_words($base$$Register, $cnt$$Register);
15098 if (tpc == nullptr) {
15099 ciEnv::current()->record_failure("CodeCache is full");
15100 return;
15101 }
15102 %}
15103
15104 ins_pipe(pipe_class_memory);
15105 %}
15106
15107 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 temp, Universe dummy, rFlagsReg cr)
15108 %{
15109 predicate((uint64_t)n->in(2)->get_long()
15110 < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
15111 match(Set dummy (ClearArray cnt base));
15112 effect(TEMP temp, USE_KILL base, KILL cr);
15113
15114 ins_cost(4 * INSN_COST);
15115 format %{ "ClearArray $cnt, $base" %}
15116
15117 ins_encode %{
15118 address tpc = __ zero_words($base$$Register, (uint64_t)$cnt$$constant);
15119 if (tpc == nullptr) {
15120 ciEnv::current()->record_failure("CodeCache is full");
15121 return;
15122 }
15123 %}
15124
15125 ins_pipe(pipe_class_memory);
15126 %}
15127
15128 // ============================================================================
15129 // Overflow Math Instructions
15130
16407 %}
16408
16409 // Call Runtime Instruction without safepoint and with vector arguments
16410 instruct CallLeafDirectVector(method meth)
16411 %{
16412 match(CallLeafVector);
16413
16414 effect(USE meth);
16415
16416 ins_cost(CALL_COST);
16417
16418 format %{ "CALL, runtime leaf vector $meth" %}
16419
16420 ins_encode(aarch64_enc_java_to_runtime(meth));
16421
16422 ins_pipe(pipe_class_call);
16423 %}
16424
16425 // Call Runtime Instruction
16426
16427 instruct CallLeafNoFPDirect(method meth)
16428 %{
16429 match(CallLeafNoFP);
16430
16431 effect(USE meth);
16432
16433 ins_cost(CALL_COST);
16434
16435 format %{ "CALL, runtime leaf nofp $meth" %}
16436
16437 ins_encode( aarch64_enc_java_to_runtime(meth) );
16438
16439 ins_pipe(pipe_class_call);
16440 %}
16441
16442 // Tail Call; Jump from runtime stub to Java code.
16443 // Also known as an 'interprocedural jump'.
16444 // Target of jump will eventually return to caller.
16445 // TailJump below removes the return address.
16446 // Don't use rfp for 'jump_target' because a MachEpilogNode has already been
16447 // emitted just above the TailCall which has reset rfp to the caller state.
16448 instruct TailCalljmpInd(iRegPNoSpNoRfp jump_target, inline_cache_RegP method_ptr)
|
1677
1678 int MachCallDynamicJavaNode::ret_addr_offset()
1679 {
1680 return 16; // movz, movk, movk, bl
1681 }
1682
1683 int MachCallRuntimeNode::ret_addr_offset() {
1684 // for generated stubs the call will be
1685 // bl(addr)
1686 // or with far branches
1687 // bl(trampoline_stub)
1688 // for real runtime callouts it will be six instructions
1689 // see aarch64_enc_java_to_runtime
1690 // adr(rscratch2, retaddr)
1691 // str(rscratch2, Address(rthread, JavaThread::last_Java_pc_offset()));
1692 // lea(rscratch1, RuntimeAddress(addr)
1693 // blr(rscratch1)
1694 CodeBlob *cb = CodeCache::find_blob(_entry_point);
1695 if (cb) {
1696 return 1 * NativeInstruction::instruction_size;
1697 } else if (_entry_point == nullptr) {
1698 // See CallLeafNoFPIndirect
1699 return 1 * NativeInstruction::instruction_size;
1700 } else {
1701 return 6 * NativeInstruction::instruction_size;
1702 }
1703 }
1704
1705 //=============================================================================
1706
1707 #ifndef PRODUCT
1708 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1709 st->print("BREAKPOINT");
1710 }
1711 #endif
1712
1713 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1714 __ brk(0);
1715 }
1716
1717 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
1718 return MachNode::size(ra_);
1719 }
1788 if (C->stub_function() == nullptr) {
1789 st->print("\n\t");
1790 st->print("ldr rscratch1, [guard]\n\t");
1791 st->print("dmb ishld\n\t");
1792 st->print("ldr rscratch2, [rthread, #thread_disarmed_guard_value_offset]\n\t");
1793 st->print("cmp rscratch1, rscratch2\n\t");
1794 st->print("b.eq skip");
1795 st->print("\n\t");
1796 st->print("blr #nmethod_entry_barrier_stub\n\t");
1797 st->print("b skip\n\t");
1798 st->print("guard: int\n\t");
1799 st->print("\n\t");
1800 st->print("skip:\n\t");
1801 }
1802 }
1803 #endif
1804
1805 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1806 Compile* C = ra_->C;
1807
1808
1809 __ verified_entry(C, 0);
1810
1811 if (C->stub_function() == nullptr) {
1812 __ entry_barrier();
1813 }
1814
1815 if (!Compile::current()->output()->in_scratch_emit_size()) {
1816 __ bind(*_verified_entry);
1817 }
1818
1819 if (VerifyStackAtCalls) {
1820 Unimplemented();
1821 }
1822
1823 C->output()->set_frame_complete(__ offset());
1824
1825 if (C->has_mach_constant_base_node()) {
1826 // NOTE: We set the table base offset here because users might be
1827 // emitted before MachConstantBaseNode.
1828 ConstantTable& constant_table = C->output()->constant_table();
1829 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1830 }
1831 }
1832
1833 int MachPrologNode::reloc() const
1834 {
1835 return 0;
1836 }
1837
1838 //=============================================================================
1839
1840 #ifndef PRODUCT
1841 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1842 Compile* C = ra_->C;
1843 int framesize = C->output()->frame_slots() << LogBytesPerInt;
1844
1845 st->print("# pop frame %d\n\t",framesize);
1846
1847 if (framesize == 0) {
1848 st->print("ldp lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1849 } else if (framesize < ((1 << 9) + 2 * wordSize)) {
1850 st->print("ldp lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
1851 st->print("add sp, sp, #%d\n\t", framesize);
1852 } else {
1855 st->print("ldp lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1856 }
1857 if (VM_Version::use_rop_protection()) {
1858 st->print("autiaz\n\t");
1859 st->print("ldr zr, [lr]\n\t");
1860 }
1861
1862 if (do_polling() && C->is_method_compilation()) {
1863 st->print("# test polling word\n\t");
1864 st->print("ldr rscratch1, [rthread],#%d\n\t", in_bytes(JavaThread::polling_word_offset()));
1865 st->print("cmp sp, rscratch1\n\t");
1866 st->print("bhi #slow_path");
1867 }
1868 }
1869 #endif
1870
1871 void MachEpilogNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1872 Compile* C = ra_->C;
1873 int framesize = C->output()->frame_slots() << LogBytesPerInt;
1874
1875 __ remove_frame(framesize, C->needs_stack_repair());
1876
1877 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1878 __ reserved_stack_check();
1879 }
1880
1881 if (do_polling() && C->is_method_compilation()) {
1882 Label dummy_label;
1883 Label* code_stub = &dummy_label;
1884 if (!C->output()->in_scratch_emit_size()) {
1885 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1886 C->output()->add_stub(stub);
1887 code_stub = &stub->entry();
1888 }
1889 __ relocate(relocInfo::poll_return_type);
1890 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
1891 }
1892 }
1893
1894 int MachEpilogNode::reloc() const {
1895 // Return number of relocatable values contained in this instruction.
1896 return 1; // 1 for polling page.
1897 }
1898
1899 const Pipeline * MachEpilogNode::pipeline() const {
1900 return MachNode::pipeline_class();
1901 }
1902
1903 //=============================================================================
1904
1905 static enum RC rc_class(OptoReg::Name reg) {
1906
1907 if (reg == OptoReg::Bad) {
1908 return rc_bad;
1909 }
1910
1911 // we have 32 int registers * 2 halves
1912 int slots_of_int_registers = Register::number_of_registers * Register::max_slots_per_register;
1913
2169 void BoxLockNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
2170 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2171 int reg = ra_->get_encode(this);
2172
2173 // This add will handle any 24-bit signed offset. 24 bits allows an
2174 // 8 megabyte stack frame.
2175 __ add(as_Register(reg), sp, offset);
2176 }
2177
2178 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
2179 // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
2180 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2181
2182 if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
2183 return NativeInstruction::instruction_size;
2184 } else {
2185 return 2 * NativeInstruction::instruction_size;
2186 }
2187 }
2188
2189 ///=============================================================================
2190 #ifndef PRODUCT
2191 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2192 {
2193 st->print_cr("# MachVEPNode");
2194 if (!_verified) {
2195 st->print_cr("\t load_class");
2196 } else {
2197 st->print_cr("\t unpack_inline_arg");
2198 }
2199 }
2200 #endif
2201
2202 void MachVEPNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const
2203 {
2204 if (!_verified) {
2205 __ ic_check(1);
2206 } else {
2207 // TODO 8284443 Avoid creation of temporary frame
2208 if (ra_->C->stub_function() == nullptr) {
2209 __ verified_entry(ra_->C, 0);
2210 __ entry_barrier();
2211 int framesize = ra_->C->output()->frame_slots() << LogBytesPerInt;
2212 __ remove_frame(framesize, false);
2213 }
2214 // Unpack inline type args passed as oop and then jump to
2215 // the verified entry point (skipping the unverified entry).
2216 int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
2217 // Emit code for verified entry and save increment for stack repair on return
2218 __ verified_entry(ra_->C, sp_inc);
2219 if (Compile::current()->output()->in_scratch_emit_size()) {
2220 Label dummy_verified_entry;
2221 __ b(dummy_verified_entry);
2222 } else {
2223 __ b(*_verified_entry);
2224 }
2225 }
2226 }
2227
2228 //=============================================================================
2229 #ifndef PRODUCT
2230 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2231 {
2232 st->print_cr("# MachUEPNode");
2233 if (UseCompressedClassPointers) {
2234 st->print_cr("\tldrw rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2235 st->print_cr("\tldrw r10, [rscratch2 + CompiledICData::speculated_klass_offset()]\t# compressed klass");
2236 st->print_cr("\tcmpw rscratch1, r10");
2237 } else {
2238 st->print_cr("\tldr rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2239 st->print_cr("\tldr r10, [rscratch2 + CompiledICData::speculated_klass_offset()]\t# compressed klass");
2240 st->print_cr("\tcmp rscratch1, r10");
2241 }
2242 st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
2243 }
2244 #endif
2245
2246 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2247 {
2248 __ ic_check(InteriorEntryAlignment);
2249 }
2250
2251 // REQUIRED EMIT CODE
2252
2253 //=============================================================================
2254
2255 // Emit exception handler code.
2256 int HandlerImpl::emit_exception_handler(C2_MacroAssembler* masm)
2257 {
2258 // mov rscratch1 #exception_blob_entry_point
2259 // br rscratch1
2260 // Note that the code buffer's insts_mark is always relative to insts.
2261 // That's why we must use the macroassembler to generate a handler.
2262 address base = __ start_a_stub(size_exception_handler());
2263 if (base == nullptr) {
2264 ciEnv::current()->record_failure("CodeCache is full");
2265 return 0; // CodeBuffer::expand failed
2266 }
2267 int offset = __ offset();
2268 __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
2269 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
2270 __ end_a_stub();
3732 %}
3733
3734 enc_class aarch64_enc_java_dynamic_call(method meth) %{
3735 int method_index = resolved_method_index(masm);
3736 address call = __ ic_call((address)$meth$$method, method_index);
3737 if (call == nullptr) {
3738 ciEnv::current()->record_failure("CodeCache is full");
3739 return;
3740 }
3741 __ post_call_nop();
3742 if (Compile::current()->max_vector_size() > 0) {
3743 __ reinitialize_ptrue();
3744 }
3745 %}
3746
3747 enc_class aarch64_enc_call_epilog() %{
3748 if (VerifyStackAtCalls) {
3749 // Check that stack depth is unchanged: find majik cookie on stack
3750 __ call_Unimplemented();
3751 }
3752 if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) {
3753 // The last return value is not set by the callee but used to pass the null marker to compiled code.
3754 // Search for the corresponding projection, get the register and emit code that initialized it.
3755 uint con = (tf()->range_cc()->cnt() - 1);
3756 for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
3757 ProjNode* proj = fast_out(i)->as_Proj();
3758 if (proj->_con == con) {
3759 // Set null marker if r0 is non-null (a non-null value is returned buffered or scalarized)
3760 OptoReg::Name optoReg = ra_->get_reg_first(proj);
3761 VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
3762 Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
3763 __ cmp(r0, zr);
3764 __ cset(toReg, Assembler::NE);
3765 if (reg->is_stack()) {
3766 int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
3767 __ str(toReg, Address(sp, st_off));
3768 }
3769 break;
3770 }
3771 }
3772 if (return_value_is_used()) {
3773 // An inline type is returned as fields in multiple registers.
3774 // R0 either contains an oop if the inline type is buffered or a pointer
3775 // to the corresponding InlineKlass with the lowest bit set to 1. Zero r0
3776 // if the lowest bit is set to allow C2 to use the oop after null checking.
3777 // r0 &= (r0 & 1) - 1
3778 __ andr(rscratch1, r0, 0x1);
3779 __ sub(rscratch1, rscratch1, 0x1);
3780 __ andr(r0, r0, rscratch1);
3781 }
3782 }
3783 %}
3784
3785 enc_class aarch64_enc_java_to_runtime(method meth) %{
3786 // some calls to generated routines (arraycopy code) are scheduled
3787 // by C2 as runtime calls. if so we can call them using a br (they
3788 // will be in a reachable segment) otherwise we have to use a blr
3789 // which loads the absolute address into a register.
3790 address entry = (address)$meth$$method;
3791 CodeBlob *cb = CodeCache::find_blob(entry);
3792 if (cb) {
3793 address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
3794 if (call == nullptr) {
3795 ciEnv::current()->record_failure("CodeCache is full");
3796 return;
3797 }
3798 __ post_call_nop();
3799 } else {
3800 Label retaddr;
3801 // Make the anchor frame walkable
3802 __ adr(rscratch2, retaddr);
6944 instruct loadConL(iRegLNoSp dst, immL src)
6945 %{
6946 match(Set dst src);
6947
6948 ins_cost(INSN_COST);
6949 format %{ "mov $dst, $src\t# long" %}
6950
6951 ins_encode( aarch64_enc_mov_imm(dst, src) );
6952
6953 ins_pipe(ialu_imm);
6954 %}
6955
6956 // Load Pointer Constant
6957
6958 instruct loadConP(iRegPNoSp dst, immP con)
6959 %{
6960 match(Set dst con);
6961
6962 ins_cost(INSN_COST * 4);
6963 format %{
6964 "mov $dst, $con\t# ptr"
6965 %}
6966
6967 ins_encode(aarch64_enc_mov_p(dst, con));
6968
6969 ins_pipe(ialu_imm);
6970 %}
6971
6972 // Load Null Pointer Constant
6973
6974 instruct loadConP0(iRegPNoSp dst, immP0 con)
6975 %{
6976 match(Set dst con);
6977
6978 ins_cost(INSN_COST);
6979 format %{ "mov $dst, $con\t# nullptr ptr" %}
6980
6981 ins_encode(aarch64_enc_mov_p0(dst, con));
6982
6983 ins_pipe(ialu_imm);
6984 %}
8137 %}
8138
8139 // ============================================================================
8140 // Cast/Convert Instructions
8141
8142 instruct castX2P(iRegPNoSp dst, iRegL src) %{
8143 match(Set dst (CastX2P src));
8144
8145 ins_cost(INSN_COST);
8146 format %{ "mov $dst, $src\t# long -> ptr" %}
8147
8148 ins_encode %{
8149 if ($dst$$reg != $src$$reg) {
8150 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8151 }
8152 %}
8153
8154 ins_pipe(ialu_reg);
8155 %}
8156
8157 instruct castI2N(iRegNNoSp dst, iRegI src) %{
8158 match(Set dst (CastI2N src));
8159
8160 ins_cost(INSN_COST);
8161 format %{ "mov $dst, $src\t# int -> narrow ptr" %}
8162
8163 ins_encode %{
8164 if ($dst$$reg != $src$$reg) {
8165 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8166 }
8167 %}
8168
8169 ins_pipe(ialu_reg);
8170 %}
8171
8172 instruct castN2X(iRegLNoSp dst, iRegN src) %{
8173 match(Set dst (CastP2X src));
8174
8175 ins_cost(INSN_COST);
8176 format %{ "mov $dst, $src\t# ptr -> long" %}
8177
8178 ins_encode %{
8179 if ($dst$$reg != $src$$reg) {
8180 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8181 }
8182 %}
8183
8184 ins_pipe(ialu_reg);
8185 %}
8186
8187 instruct castP2X(iRegLNoSp dst, iRegP src) %{
8188 match(Set dst (CastP2X src));
8189
8190 ins_cost(INSN_COST);
8191 format %{ "mov $dst, $src\t# ptr -> long" %}
8192
8193 ins_encode %{
8194 if ($dst$$reg != $src$$reg) {
8195 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8196 }
8197 %}
8198
8199 ins_pipe(ialu_reg);
8200 %}
8201
8202 // Convert oop into int for vectors alignment masking
8203 instruct convP2I(iRegINoSp dst, iRegP src) %{
8204 match(Set dst (ConvL2I (CastP2X src)));
8205
8206 ins_cost(INSN_COST);
15120
15121 match(Set dst (MoveL2D src));
15122
15123 effect(DEF dst, USE src);
15124
15125 ins_cost(INSN_COST);
15126
15127 format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
15128
15129 ins_encode %{
15130 __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
15131 %}
15132
15133 ins_pipe(fp_l2d);
15134
15135 %}
15136
15137 // ============================================================================
15138 // clearing of an array
15139
15140 instruct clearArray_reg_reg_immL0(iRegL_R11 cnt, iRegP_R10 base, immL0 zero, Universe dummy, rFlagsReg cr)
15141 %{
15142 match(Set dummy (ClearArray (Binary cnt base) zero));
15143 effect(USE_KILL cnt, USE_KILL base, KILL cr);
15144
15145 ins_cost(4 * INSN_COST);
15146 format %{ "ClearArray $cnt, $base" %}
15147
15148 ins_encode %{
15149 address tpc = __ zero_words($base$$Register, $cnt$$Register);
15150 if (tpc == nullptr) {
15151 ciEnv::current()->record_failure("CodeCache is full");
15152 return;
15153 }
15154 %}
15155
15156 ins_pipe(pipe_class_memory);
15157 %}
15158
15159 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, iRegL val, Universe dummy, rFlagsReg cr)
15160 %{
15161 predicate(((ClearArrayNode*)n)->word_copy_only());
15162 match(Set dummy (ClearArray (Binary cnt base) val));
15163 effect(USE_KILL cnt, USE_KILL base, KILL cr);
15164
15165 ins_cost(4 * INSN_COST);
15166 format %{ "ClearArray $cnt, $base, $val" %}
15167
15168 ins_encode %{
15169 __ fill_words($base$$Register, $cnt$$Register, $val$$Register);
15170 %}
15171
15172 ins_pipe(pipe_class_memory);
15173 %}
15174
15175 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 temp, Universe dummy, rFlagsReg cr)
15176 %{
15177 predicate((uint64_t)n->in(2)->get_long()
15178 < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord)
15179 && !((ClearArrayNode*)n)->word_copy_only());
15180 match(Set dummy (ClearArray cnt base));
15181 effect(TEMP temp, USE_KILL base, KILL cr);
15182
15183 ins_cost(4 * INSN_COST);
15184 format %{ "ClearArray $cnt, $base" %}
15185
15186 ins_encode %{
15187 address tpc = __ zero_words($base$$Register, (uint64_t)$cnt$$constant);
15188 if (tpc == nullptr) {
15189 ciEnv::current()->record_failure("CodeCache is full");
15190 return;
15191 }
15192 %}
15193
15194 ins_pipe(pipe_class_memory);
15195 %}
15196
15197 // ============================================================================
15198 // Overflow Math Instructions
15199
16476 %}
16477
16478 // Call Runtime Instruction without safepoint and with vector arguments
16479 instruct CallLeafDirectVector(method meth)
16480 %{
16481 match(CallLeafVector);
16482
16483 effect(USE meth);
16484
16485 ins_cost(CALL_COST);
16486
16487 format %{ "CALL, runtime leaf vector $meth" %}
16488
16489 ins_encode(aarch64_enc_java_to_runtime(meth));
16490
16491 ins_pipe(pipe_class_call);
16492 %}
16493
16494 // Call Runtime Instruction
16495
16496 // entry point is null, target holds the address to call
16497 instruct CallLeafNoFPIndirect(iRegP target)
16498 %{
16499 predicate(n->as_Call()->entry_point() == nullptr);
16500
16501 match(CallLeafNoFP target);
16502
16503 ins_cost(CALL_COST);
16504
16505 format %{ "CALL, runtime leaf nofp indirect $target" %}
16506
16507 ins_encode %{
16508 __ blr($target$$Register);
16509 %}
16510
16511 ins_pipe(pipe_class_call);
16512 %}
16513
16514 instruct CallLeafNoFPDirect(method meth)
16515 %{
16516 predicate(n->as_Call()->entry_point() != nullptr);
16517
16518 match(CallLeafNoFP);
16519
16520 effect(USE meth);
16521
16522 ins_cost(CALL_COST);
16523
16524 format %{ "CALL, runtime leaf nofp $meth" %}
16525
16526 ins_encode( aarch64_enc_java_to_runtime(meth) );
16527
16528 ins_pipe(pipe_class_call);
16529 %}
16530
16531 // Tail Call; Jump from runtime stub to Java code.
16532 // Also known as an 'interprocedural jump'.
16533 // Target of jump will eventually return to caller.
16534 // TailJump below removes the return address.
16535 // Don't use rfp for 'jump_target' because a MachEpilogNode has already been
16536 // emitted just above the TailCall which has reset rfp to the caller state.
16537 instruct TailCalljmpInd(iRegPNoSpNoRfp jump_target, inline_cache_RegP method_ptr)
|