1637
1638 int MachCallDynamicJavaNode::ret_addr_offset()
1639 {
1640 return 16; // movz, movk, movk, bl
1641 }
1642
1643 int MachCallRuntimeNode::ret_addr_offset() {
1644 // for generated stubs the call will be
1645 // bl(addr)
1646 // or with far branches
1647 // bl(trampoline_stub)
1648 // for real runtime callouts it will be six instructions
1649 // see aarch64_enc_java_to_runtime
1650 // adr(rscratch2, retaddr)
1651 // str(rscratch2, Address(rthread, JavaThread::last_Java_pc_offset()));
1652 // lea(rscratch1, RuntimeAddress(addr)
1653 // blr(rscratch1)
1654 CodeBlob *cb = CodeCache::find_blob(_entry_point);
1655 if (cb) {
1656 return 1 * NativeInstruction::instruction_size;
1657 } else {
1658 return 6 * NativeInstruction::instruction_size;
1659 }
1660 }
1661
1662 //=============================================================================
1663
1664 #ifndef PRODUCT
1665 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1666 st->print("BREAKPOINT");
1667 }
1668 #endif
1669
1670 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1671 __ brk(0);
1672 }
1673
1674 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
1675 return MachNode::size(ra_);
1676 }
1745 if (C->stub_function() == nullptr) {
1746 st->print("\n\t");
1747 st->print("ldr rscratch1, [guard]\n\t");
1748 st->print("dmb ishld\n\t");
1749 st->print("ldr rscratch2, [rthread, #thread_disarmed_guard_value_offset]\n\t");
1750 st->print("cmp rscratch1, rscratch2\n\t");
1751 st->print("b.eq skip");
1752 st->print("\n\t");
1753 st->print("blr #nmethod_entry_barrier_stub\n\t");
1754 st->print("b skip\n\t");
1755 st->print("guard: int\n\t");
1756 st->print("\n\t");
1757 st->print("skip:\n\t");
1758 }
1759 }
1760 #endif
1761
1762 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1763 Compile* C = ra_->C;
1764
1765 // n.b. frame size includes space for return pc and rfp
1766 const int framesize = C->output()->frame_size_in_bytes();
1767
1768 if (C->clinit_barrier_on_entry()) {
1769 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
1770
1771 Label L_skip_barrier;
1772
1773 __ mov_metadata(rscratch2, C->method()->holder()->constant_encoding());
1774 __ clinit_barrier(rscratch2, rscratch1, &L_skip_barrier);
1775 __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
1776 __ bind(L_skip_barrier);
1777 }
1778
1779 if (C->max_vector_size() > 0) {
1780 __ reinitialize_ptrue();
1781 }
1782
1783 int bangsize = C->output()->bang_size_in_bytes();
1784 if (C->output()->need_stack_bang(bangsize))
1785 __ generate_stack_overflow_check(bangsize);
1786
1787 __ build_frame(framesize);
1788
1789 if (C->stub_function() == nullptr) {
1790 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
1791 // Dummy labels for just measuring the code size
1792 Label dummy_slow_path;
1793 Label dummy_continuation;
1794 Label dummy_guard;
1795 Label* slow_path = &dummy_slow_path;
1796 Label* continuation = &dummy_continuation;
1797 Label* guard = &dummy_guard;
1798 if (!Compile::current()->output()->in_scratch_emit_size()) {
1799 // Use real labels from actual stub when not emitting code for the purpose of measuring its size
1800 C2EntryBarrierStub* stub = new (Compile::current()->comp_arena()) C2EntryBarrierStub();
1801 Compile::current()->output()->add_stub(stub);
1802 slow_path = &stub->entry();
1803 continuation = &stub->continuation();
1804 guard = &stub->guard();
1805 }
1806 // In the C2 code, we move the non-hot part of nmethod entry barriers out-of-line to a stub.
1807 bs->nmethod_entry_barrier(masm, slow_path, continuation, guard);
1808 }
1809
1810 if (VerifyStackAtCalls) {
1811 Unimplemented();
1812 }
1813
1814 C->output()->set_frame_complete(__ offset());
1815
1816 if (C->has_mach_constant_base_node()) {
1817 // NOTE: We set the table base offset here because users might be
1818 // emitted before MachConstantBaseNode.
1819 ConstantTable& constant_table = C->output()->constant_table();
1820 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1821 }
1822 }
1823
1824 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1825 {
1826 return MachNode::size(ra_); // too many variables; just compute it
1827 // the hard way
1828 }
1829
1830 int MachPrologNode::reloc() const
1831 {
1832 return 0;
1833 }
1834
1835 //=============================================================================
1836
1837 #ifndef PRODUCT
1838 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1839 Compile* C = ra_->C;
1840 int framesize = C->output()->frame_slots() << LogBytesPerInt;
1841
1842 st->print("# pop frame %d\n\t",framesize);
1843
1844 if (framesize == 0) {
1845 st->print("ldp lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1846 } else if (framesize < ((1 << 9) + 2 * wordSize)) {
1847 st->print("ldp lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
1848 st->print("add sp, sp, #%d\n\t", framesize);
1849 } else {
1852 st->print("ldp lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1853 }
1854 if (VM_Version::use_rop_protection()) {
1855 st->print("autiaz\n\t");
1856 st->print("ldr zr, [lr]\n\t");
1857 }
1858
1859 if (do_polling() && C->is_method_compilation()) {
1860 st->print("# test polling word\n\t");
1861 st->print("ldr rscratch1, [rthread],#%d\n\t", in_bytes(JavaThread::polling_word_offset()));
1862 st->print("cmp sp, rscratch1\n\t");
1863 st->print("bhi #slow_path");
1864 }
1865 }
1866 #endif
1867
1868 void MachEpilogNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1869 Compile* C = ra_->C;
1870 int framesize = C->output()->frame_slots() << LogBytesPerInt;
1871
1872 __ remove_frame(framesize);
1873
1874 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1875 __ reserved_stack_check();
1876 }
1877
1878 if (do_polling() && C->is_method_compilation()) {
1879 Label dummy_label;
1880 Label* code_stub = &dummy_label;
1881 if (!C->output()->in_scratch_emit_size()) {
1882 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1883 C->output()->add_stub(stub);
1884 code_stub = &stub->entry();
1885 }
1886 __ relocate(relocInfo::poll_return_type);
1887 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
1888 }
1889 }
1890
1891 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
1892 // Variable size. Determine dynamically.
1893 return MachNode::size(ra_);
1894 }
1895
1896 int MachEpilogNode::reloc() const {
1897 // Return number of relocatable values contained in this instruction.
1898 return 1; // 1 for polling page.
1899 }
1900
1901 const Pipeline * MachEpilogNode::pipeline() const {
1902 return MachNode::pipeline_class();
1903 }
1904
1905 //=============================================================================
1906
1907 static enum RC rc_class(OptoReg::Name reg) {
1908
1909 if (reg == OptoReg::Bad) {
1910 return rc_bad;
1911 }
1912
1913 // we have 32 int registers * 2 halves
1914 int slots_of_int_registers = Register::number_of_registers * Register::max_slots_per_register;
1915
2171 void BoxLockNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
2172 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2173 int reg = ra_->get_encode(this);
2174
2175 // This add will handle any 24-bit signed offset. 24 bits allows an
2176 // 8 megabyte stack frame.
2177 __ add(as_Register(reg), sp, offset);
2178 }
2179
2180 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
2181 // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
2182 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2183
2184 if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
2185 return NativeInstruction::instruction_size;
2186 } else {
2187 return 2 * NativeInstruction::instruction_size;
2188 }
2189 }
2190
2191 //=============================================================================
2192
2193 #ifndef PRODUCT
2194 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2195 {
2196 st->print_cr("# MachUEPNode");
2197 if (UseCompressedClassPointers) {
2198 st->print_cr("\tldrw rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2199 st->print_cr("\tldrw r10, [rscratch2 + CompiledICData::speculated_klass_offset()]\t# compressed klass");
2200 st->print_cr("\tcmpw rscratch1, r10");
2201 } else {
2202 st->print_cr("\tldr rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2203 st->print_cr("\tldr r10, [rscratch2 + CompiledICData::speculated_klass_offset()]\t# compressed klass");
2204 st->print_cr("\tcmp rscratch1, r10");
2205 }
2206 st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
2207 }
2208 #endif
2209
2210 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2211 {
2212 __ ic_check(InteriorEntryAlignment);
2213 }
2214
2215 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
2216 {
2217 return MachNode::size(ra_);
2218 }
2219
2220 // REQUIRED EMIT CODE
2221
2222 //=============================================================================
2223
2224 // Emit exception handler code.
2225 int HandlerImpl::emit_exception_handler(C2_MacroAssembler* masm)
2226 {
2227 // mov rscratch1 #exception_blob_entry_point
2228 // br rscratch1
2229 // Note that the code buffer's insts_mark is always relative to insts.
2230 // That's why we must use the macroassembler to generate a handler.
2231 address base = __ start_a_stub(size_exception_handler());
2232 if (base == nullptr) {
2233 ciEnv::current()->record_failure("CodeCache is full");
2234 return 0; // CodeBuffer::expand failed
2235 }
2236 int offset = __ offset();
2237 __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
2238 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
2239 __ end_a_stub();
3692 %}
3693
3694 enc_class aarch64_enc_java_dynamic_call(method meth) %{
3695 int method_index = resolved_method_index(masm);
3696 address call = __ ic_call((address)$meth$$method, method_index);
3697 if (call == nullptr) {
3698 ciEnv::current()->record_failure("CodeCache is full");
3699 return;
3700 }
3701 __ post_call_nop();
3702 if (Compile::current()->max_vector_size() > 0) {
3703 __ reinitialize_ptrue();
3704 }
3705 %}
3706
3707 enc_class aarch64_enc_call_epilog() %{
3708 if (VerifyStackAtCalls) {
3709 // Check that stack depth is unchanged: find majik cookie on stack
3710 __ call_Unimplemented();
3711 }
3712 %}
3713
3714 enc_class aarch64_enc_java_to_runtime(method meth) %{
3715 // some calls to generated routines (arraycopy code) are scheduled
3716 // by C2 as runtime calls. if so we can call them using a br (they
3717 // will be in a reachable segment) otherwise we have to use a blr
3718 // which loads the absolute address into a register.
3719 address entry = (address)$meth$$method;
3720 CodeBlob *cb = CodeCache::find_blob(entry);
3721 if (cb) {
3722 address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
3723 if (call == nullptr) {
3724 ciEnv::current()->record_failure("CodeCache is full");
3725 return;
3726 }
3727 __ post_call_nop();
3728 } else {
3729 Label retaddr;
3730 // Make the anchor frame walkable
3731 __ adr(rscratch2, retaddr);
6801 instruct loadConL(iRegLNoSp dst, immL src)
6802 %{
6803 match(Set dst src);
6804
6805 ins_cost(INSN_COST);
6806 format %{ "mov $dst, $src\t# long" %}
6807
6808 ins_encode( aarch64_enc_mov_imm(dst, src) );
6809
6810 ins_pipe(ialu_imm);
6811 %}
6812
6813 // Load Pointer Constant
6814
6815 instruct loadConP(iRegPNoSp dst, immP con)
6816 %{
6817 match(Set dst con);
6818
6819 ins_cost(INSN_COST * 4);
6820 format %{
6821 "mov $dst, $con\t# ptr\n\t"
6822 %}
6823
6824 ins_encode(aarch64_enc_mov_p(dst, con));
6825
6826 ins_pipe(ialu_imm);
6827 %}
6828
6829 // Load Null Pointer Constant
6830
6831 instruct loadConP0(iRegPNoSp dst, immP0 con)
6832 %{
6833 match(Set dst con);
6834
6835 ins_cost(INSN_COST);
6836 format %{ "mov $dst, $con\t# nullptr ptr" %}
6837
6838 ins_encode(aarch64_enc_mov_p0(dst, con));
6839
6840 ins_pipe(ialu_imm);
6841 %}
8010 %}
8011
8012 // ============================================================================
8013 // Cast/Convert Instructions
8014
8015 instruct castX2P(iRegPNoSp dst, iRegL src) %{
8016 match(Set dst (CastX2P src));
8017
8018 ins_cost(INSN_COST);
8019 format %{ "mov $dst, $src\t# long -> ptr" %}
8020
8021 ins_encode %{
8022 if ($dst$$reg != $src$$reg) {
8023 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8024 }
8025 %}
8026
8027 ins_pipe(ialu_reg);
8028 %}
8029
8030 instruct castP2X(iRegLNoSp dst, iRegP src) %{
8031 match(Set dst (CastP2X src));
8032
8033 ins_cost(INSN_COST);
8034 format %{ "mov $dst, $src\t# ptr -> long" %}
8035
8036 ins_encode %{
8037 if ($dst$$reg != $src$$reg) {
8038 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8039 }
8040 %}
8041
8042 ins_pipe(ialu_reg);
8043 %}
8044
8045 // Convert oop into int for vectors alignment masking
8046 instruct convP2I(iRegINoSp dst, iRegP src) %{
8047 match(Set dst (ConvL2I (CastP2X src)));
8048
8049 ins_cost(INSN_COST);
14963
14964 match(Set dst (MoveL2D src));
14965
14966 effect(DEF dst, USE src);
14967
14968 ins_cost(INSN_COST);
14969
14970 format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
14971
14972 ins_encode %{
14973 __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
14974 %}
14975
14976 ins_pipe(fp_l2d);
14977
14978 %}
14979
14980 // ============================================================================
14981 // clearing of an array
14982
14983 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
14984 %{
14985 match(Set dummy (ClearArray cnt base));
14986 effect(USE_KILL cnt, USE_KILL base, KILL cr);
14987
14988 ins_cost(4 * INSN_COST);
14989 format %{ "ClearArray $cnt, $base" %}
14990
14991 ins_encode %{
14992 address tpc = __ zero_words($base$$Register, $cnt$$Register);
14993 if (tpc == nullptr) {
14994 ciEnv::current()->record_failure("CodeCache is full");
14995 return;
14996 }
14997 %}
14998
14999 ins_pipe(pipe_class_memory);
15000 %}
15001
15002 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 temp, Universe dummy, rFlagsReg cr)
15003 %{
15004 predicate((uint64_t)n->in(2)->get_long()
15005 < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
15006 match(Set dummy (ClearArray cnt base));
15007 effect(TEMP temp, USE_KILL base, KILL cr);
15008
15009 ins_cost(4 * INSN_COST);
15010 format %{ "ClearArray $cnt, $base" %}
15011
15012 ins_encode %{
15013 address tpc = __ zero_words($base$$Register, (uint64_t)$cnt$$constant);
15014 if (tpc == nullptr) {
15015 ciEnv::current()->record_failure("CodeCache is full");
15016 return;
15017 }
15018 %}
15019
15020 ins_pipe(pipe_class_memory);
15021 %}
15022
15023 // ============================================================================
15024 // Overflow Math Instructions
15025
16336 %}
16337
16338 // Call Runtime Instruction without safepoint and with vector arguments
16339 instruct CallLeafDirectVector(method meth)
16340 %{
16341 match(CallLeafVector);
16342
16343 effect(USE meth);
16344
16345 ins_cost(CALL_COST);
16346
16347 format %{ "CALL, runtime leaf vector $meth" %}
16348
16349 ins_encode(aarch64_enc_java_to_runtime(meth));
16350
16351 ins_pipe(pipe_class_call);
16352 %}
16353
16354 // Call Runtime Instruction
16355
16356 instruct CallLeafNoFPDirect(method meth)
16357 %{
16358 match(CallLeafNoFP);
16359
16360 effect(USE meth);
16361
16362 ins_cost(CALL_COST);
16363
16364 format %{ "CALL, runtime leaf nofp $meth" %}
16365
16366 ins_encode( aarch64_enc_java_to_runtime(meth) );
16367
16368 ins_pipe(pipe_class_call);
16369 %}
16370
16371 // Tail Call; Jump from runtime stub to Java code.
16372 // Also known as an 'interprocedural jump'.
16373 // Target of jump will eventually return to caller.
16374 // TailJump below removes the return address.
16375 // Don't use rfp for 'jump_target' because a MachEpilogNode has already been
16376 // emitted just above the TailCall which has reset rfp to the caller state.
16377 instruct TailCalljmpInd(iRegPNoSpNoRfp jump_target, inline_cache_RegP method_ptr)
|
1637
1638 int MachCallDynamicJavaNode::ret_addr_offset()
1639 {
1640 return 16; // movz, movk, movk, bl
1641 }
1642
1643 int MachCallRuntimeNode::ret_addr_offset() {
1644 // for generated stubs the call will be
1645 // bl(addr)
1646 // or with far branches
1647 // bl(trampoline_stub)
1648 // for real runtime callouts it will be six instructions
1649 // see aarch64_enc_java_to_runtime
1650 // adr(rscratch2, retaddr)
1651 // str(rscratch2, Address(rthread, JavaThread::last_Java_pc_offset()));
1652 // lea(rscratch1, RuntimeAddress(addr)
1653 // blr(rscratch1)
1654 CodeBlob *cb = CodeCache::find_blob(_entry_point);
1655 if (cb) {
1656 return 1 * NativeInstruction::instruction_size;
1657 } else if (_entry_point == nullptr) {
1658 // See CallLeafNoFPIndirect
1659 return 1 * NativeInstruction::instruction_size;
1660 } else {
1661 return 6 * NativeInstruction::instruction_size;
1662 }
1663 }
1664
1665 //=============================================================================
1666
1667 #ifndef PRODUCT
1668 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1669 st->print("BREAKPOINT");
1670 }
1671 #endif
1672
1673 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1674 __ brk(0);
1675 }
1676
1677 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
1678 return MachNode::size(ra_);
1679 }
1748 if (C->stub_function() == nullptr) {
1749 st->print("\n\t");
1750 st->print("ldr rscratch1, [guard]\n\t");
1751 st->print("dmb ishld\n\t");
1752 st->print("ldr rscratch2, [rthread, #thread_disarmed_guard_value_offset]\n\t");
1753 st->print("cmp rscratch1, rscratch2\n\t");
1754 st->print("b.eq skip");
1755 st->print("\n\t");
1756 st->print("blr #nmethod_entry_barrier_stub\n\t");
1757 st->print("b skip\n\t");
1758 st->print("guard: int\n\t");
1759 st->print("\n\t");
1760 st->print("skip:\n\t");
1761 }
1762 }
1763 #endif
1764
1765 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1766 Compile* C = ra_->C;
1767
1768
1769 __ verified_entry(C, 0);
1770
1771 if (C->stub_function() == nullptr) {
1772 __ entry_barrier();
1773 }
1774
1775 if (!Compile::current()->output()->in_scratch_emit_size()) {
1776 __ bind(*_verified_entry);
1777 }
1778
1779 if (VerifyStackAtCalls) {
1780 Unimplemented();
1781 }
1782
1783 C->output()->set_frame_complete(__ offset());
1784
1785 if (C->has_mach_constant_base_node()) {
1786 // NOTE: We set the table base offset here because users might be
1787 // emitted before MachConstantBaseNode.
1788 ConstantTable& constant_table = C->output()->constant_table();
1789 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1790 }
1791 }
1792
1793 int MachPrologNode::reloc() const
1794 {
1795 return 0;
1796 }
1797
1798 //=============================================================================
1799
1800 #ifndef PRODUCT
1801 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1802 Compile* C = ra_->C;
1803 int framesize = C->output()->frame_slots() << LogBytesPerInt;
1804
1805 st->print("# pop frame %d\n\t",framesize);
1806
1807 if (framesize == 0) {
1808 st->print("ldp lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1809 } else if (framesize < ((1 << 9) + 2 * wordSize)) {
1810 st->print("ldp lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
1811 st->print("add sp, sp, #%d\n\t", framesize);
1812 } else {
1815 st->print("ldp lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1816 }
1817 if (VM_Version::use_rop_protection()) {
1818 st->print("autiaz\n\t");
1819 st->print("ldr zr, [lr]\n\t");
1820 }
1821
1822 if (do_polling() && C->is_method_compilation()) {
1823 st->print("# test polling word\n\t");
1824 st->print("ldr rscratch1, [rthread],#%d\n\t", in_bytes(JavaThread::polling_word_offset()));
1825 st->print("cmp sp, rscratch1\n\t");
1826 st->print("bhi #slow_path");
1827 }
1828 }
1829 #endif
1830
1831 void MachEpilogNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1832 Compile* C = ra_->C;
1833 int framesize = C->output()->frame_slots() << LogBytesPerInt;
1834
1835 __ remove_frame(framesize, C->needs_stack_repair());
1836
1837 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1838 __ reserved_stack_check();
1839 }
1840
1841 if (do_polling() && C->is_method_compilation()) {
1842 Label dummy_label;
1843 Label* code_stub = &dummy_label;
1844 if (!C->output()->in_scratch_emit_size()) {
1845 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1846 C->output()->add_stub(stub);
1847 code_stub = &stub->entry();
1848 }
1849 __ relocate(relocInfo::poll_return_type);
1850 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
1851 }
1852 }
1853
1854 int MachEpilogNode::reloc() const {
1855 // Return number of relocatable values contained in this instruction.
1856 return 1; // 1 for polling page.
1857 }
1858
1859 const Pipeline * MachEpilogNode::pipeline() const {
1860 return MachNode::pipeline_class();
1861 }
1862
1863 //=============================================================================
1864
1865 static enum RC rc_class(OptoReg::Name reg) {
1866
1867 if (reg == OptoReg::Bad) {
1868 return rc_bad;
1869 }
1870
1871 // we have 32 int registers * 2 halves
1872 int slots_of_int_registers = Register::number_of_registers * Register::max_slots_per_register;
1873
2129 void BoxLockNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
2130 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2131 int reg = ra_->get_encode(this);
2132
2133 // This add will handle any 24-bit signed offset. 24 bits allows an
2134 // 8 megabyte stack frame.
2135 __ add(as_Register(reg), sp, offset);
2136 }
2137
2138 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
2139 // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
2140 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2141
2142 if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
2143 return NativeInstruction::instruction_size;
2144 } else {
2145 return 2 * NativeInstruction::instruction_size;
2146 }
2147 }
2148
2149 ///=============================================================================
2150 #ifndef PRODUCT
2151 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2152 {
2153 st->print_cr("# MachVEPNode");
2154 if (!_verified) {
2155 st->print_cr("\t load_class");
2156 } else {
2157 st->print_cr("\t unpack_inline_arg");
2158 }
2159 }
2160 #endif
2161
2162 void MachVEPNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const
2163 {
2164 if (!_verified) {
2165 __ ic_check(1);
2166 } else {
2167 // TODO 8284443 Avoid creation of temporary frame
2168 if (ra_->C->stub_function() == nullptr) {
2169 __ verified_entry(ra_->C, 0);
2170 __ entry_barrier();
2171 int framesize = ra_->C->output()->frame_slots() << LogBytesPerInt;
2172 __ remove_frame(framesize, false);
2173 }
2174 // Unpack inline type args passed as oop and then jump to
2175 // the verified entry point (skipping the unverified entry).
2176 int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
2177 // Emit code for verified entry and save increment for stack repair on return
2178 __ verified_entry(ra_->C, sp_inc);
2179 if (Compile::current()->output()->in_scratch_emit_size()) {
2180 Label dummy_verified_entry;
2181 __ b(dummy_verified_entry);
2182 } else {
2183 __ b(*_verified_entry);
2184 }
2185 }
2186 }
2187
2188 //=============================================================================
2189 #ifndef PRODUCT
2190 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2191 {
2192 st->print_cr("# MachUEPNode");
2193 if (UseCompressedClassPointers) {
2194 st->print_cr("\tldrw rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2195 st->print_cr("\tldrw r10, [rscratch2 + CompiledICData::speculated_klass_offset()]\t# compressed klass");
2196 st->print_cr("\tcmpw rscratch1, r10");
2197 } else {
2198 st->print_cr("\tldr rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2199 st->print_cr("\tldr r10, [rscratch2 + CompiledICData::speculated_klass_offset()]\t# compressed klass");
2200 st->print_cr("\tcmp rscratch1, r10");
2201 }
2202 st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
2203 }
2204 #endif
2205
2206 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2207 {
2208 __ ic_check(InteriorEntryAlignment);
2209 }
2210
2211 // REQUIRED EMIT CODE
2212
2213 //=============================================================================
2214
2215 // Emit exception handler code.
2216 int HandlerImpl::emit_exception_handler(C2_MacroAssembler* masm)
2217 {
2218 // mov rscratch1 #exception_blob_entry_point
2219 // br rscratch1
2220 // Note that the code buffer's insts_mark is always relative to insts.
2221 // That's why we must use the macroassembler to generate a handler.
2222 address base = __ start_a_stub(size_exception_handler());
2223 if (base == nullptr) {
2224 ciEnv::current()->record_failure("CodeCache is full");
2225 return 0; // CodeBuffer::expand failed
2226 }
2227 int offset = __ offset();
2228 __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
2229 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
2230 __ end_a_stub();
3683 %}
3684
3685 enc_class aarch64_enc_java_dynamic_call(method meth) %{
3686 int method_index = resolved_method_index(masm);
3687 address call = __ ic_call((address)$meth$$method, method_index);
3688 if (call == nullptr) {
3689 ciEnv::current()->record_failure("CodeCache is full");
3690 return;
3691 }
3692 __ post_call_nop();
3693 if (Compile::current()->max_vector_size() > 0) {
3694 __ reinitialize_ptrue();
3695 }
3696 %}
3697
3698 enc_class aarch64_enc_call_epilog() %{
3699 if (VerifyStackAtCalls) {
3700 // Check that stack depth is unchanged: find majik cookie on stack
3701 __ call_Unimplemented();
3702 }
3703 if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) {
3704 // The last return value is not set by the callee but used to pass the null marker to compiled code.
3705 // Search for the corresponding projection, get the register and emit code that initialized it.
3706 uint con = (tf()->range_cc()->cnt() - 1);
3707 for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
3708 ProjNode* proj = fast_out(i)->as_Proj();
3709 if (proj->_con == con) {
3710 // Set null marker if r0 is non-null (a non-null value is returned buffered or scalarized)
3711 OptoReg::Name optoReg = ra_->get_reg_first(proj);
3712 VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
3713 Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
3714 __ cmp(r0, zr);
3715 __ cset(toReg, Assembler::NE);
3716 if (reg->is_stack()) {
3717 int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
3718 __ str(toReg, Address(sp, st_off));
3719 }
3720 break;
3721 }
3722 }
3723 if (return_value_is_used()) {
3724 // An inline type is returned as fields in multiple registers.
3725 // R0 either contains an oop if the inline type is buffered or a pointer
3726 // to the corresponding InlineKlass with the lowest bit set to 1. Zero r0
3727 // if the lowest bit is set to allow C2 to use the oop after null checking.
3728 // r0 &= (r0 & 1) - 1
3729 __ andr(rscratch1, r0, 0x1);
3730 __ sub(rscratch1, rscratch1, 0x1);
3731 __ andr(r0, r0, rscratch1);
3732 }
3733 }
3734 %}
3735
3736 enc_class aarch64_enc_java_to_runtime(method meth) %{
3737 // some calls to generated routines (arraycopy code) are scheduled
3738 // by C2 as runtime calls. if so we can call them using a br (they
3739 // will be in a reachable segment) otherwise we have to use a blr
3740 // which loads the absolute address into a register.
3741 address entry = (address)$meth$$method;
3742 CodeBlob *cb = CodeCache::find_blob(entry);
3743 if (cb) {
3744 address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
3745 if (call == nullptr) {
3746 ciEnv::current()->record_failure("CodeCache is full");
3747 return;
3748 }
3749 __ post_call_nop();
3750 } else {
3751 Label retaddr;
3752 // Make the anchor frame walkable
3753 __ adr(rscratch2, retaddr);
6823 instruct loadConL(iRegLNoSp dst, immL src)
6824 %{
6825 match(Set dst src);
6826
6827 ins_cost(INSN_COST);
6828 format %{ "mov $dst, $src\t# long" %}
6829
6830 ins_encode( aarch64_enc_mov_imm(dst, src) );
6831
6832 ins_pipe(ialu_imm);
6833 %}
6834
6835 // Load Pointer Constant
6836
6837 instruct loadConP(iRegPNoSp dst, immP con)
6838 %{
6839 match(Set dst con);
6840
6841 ins_cost(INSN_COST * 4);
6842 format %{
6843 "mov $dst, $con\t# ptr"
6844 %}
6845
6846 ins_encode(aarch64_enc_mov_p(dst, con));
6847
6848 ins_pipe(ialu_imm);
6849 %}
6850
6851 // Load Null Pointer Constant
6852
6853 instruct loadConP0(iRegPNoSp dst, immP0 con)
6854 %{
6855 match(Set dst con);
6856
6857 ins_cost(INSN_COST);
6858 format %{ "mov $dst, $con\t# nullptr ptr" %}
6859
6860 ins_encode(aarch64_enc_mov_p0(dst, con));
6861
6862 ins_pipe(ialu_imm);
6863 %}
8032 %}
8033
8034 // ============================================================================
8035 // Cast/Convert Instructions
8036
8037 instruct castX2P(iRegPNoSp dst, iRegL src) %{
8038 match(Set dst (CastX2P src));
8039
8040 ins_cost(INSN_COST);
8041 format %{ "mov $dst, $src\t# long -> ptr" %}
8042
8043 ins_encode %{
8044 if ($dst$$reg != $src$$reg) {
8045 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8046 }
8047 %}
8048
8049 ins_pipe(ialu_reg);
8050 %}
8051
8052 instruct castI2N(iRegNNoSp dst, iRegI src) %{
8053 match(Set dst (CastI2N src));
8054
8055 ins_cost(INSN_COST);
8056 format %{ "mov $dst, $src\t# int -> narrow ptr" %}
8057
8058 ins_encode %{
8059 if ($dst$$reg != $src$$reg) {
8060 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8061 }
8062 %}
8063
8064 ins_pipe(ialu_reg);
8065 %}
8066
8067 instruct castN2X(iRegLNoSp dst, iRegN src) %{
8068 match(Set dst (CastP2X src));
8069
8070 ins_cost(INSN_COST);
8071 format %{ "mov $dst, $src\t# ptr -> long" %}
8072
8073 ins_encode %{
8074 if ($dst$$reg != $src$$reg) {
8075 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8076 }
8077 %}
8078
8079 ins_pipe(ialu_reg);
8080 %}
8081
8082 instruct castP2X(iRegLNoSp dst, iRegP src) %{
8083 match(Set dst (CastP2X src));
8084
8085 ins_cost(INSN_COST);
8086 format %{ "mov $dst, $src\t# ptr -> long" %}
8087
8088 ins_encode %{
8089 if ($dst$$reg != $src$$reg) {
8090 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8091 }
8092 %}
8093
8094 ins_pipe(ialu_reg);
8095 %}
8096
8097 // Convert oop into int for vectors alignment masking
8098 instruct convP2I(iRegINoSp dst, iRegP src) %{
8099 match(Set dst (ConvL2I (CastP2X src)));
8100
8101 ins_cost(INSN_COST);
15015
15016 match(Set dst (MoveL2D src));
15017
15018 effect(DEF dst, USE src);
15019
15020 ins_cost(INSN_COST);
15021
15022 format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
15023
15024 ins_encode %{
15025 __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
15026 %}
15027
15028 ins_pipe(fp_l2d);
15029
15030 %}
15031
15032 // ============================================================================
15033 // clearing of an array
15034
15035 instruct clearArray_reg_reg_immL0(iRegL_R11 cnt, iRegP_R10 base, immL0 zero, Universe dummy, rFlagsReg cr)
15036 %{
15037 match(Set dummy (ClearArray (Binary cnt base) zero));
15038 effect(USE_KILL cnt, USE_KILL base, KILL cr);
15039
15040 ins_cost(4 * INSN_COST);
15041 format %{ "ClearArray $cnt, $base" %}
15042
15043 ins_encode %{
15044 address tpc = __ zero_words($base$$Register, $cnt$$Register);
15045 if (tpc == nullptr) {
15046 ciEnv::current()->record_failure("CodeCache is full");
15047 return;
15048 }
15049 %}
15050
15051 ins_pipe(pipe_class_memory);
15052 %}
15053
15054 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, iRegL val, Universe dummy, rFlagsReg cr)
15055 %{
15056 predicate(((ClearArrayNode*)n)->word_copy_only());
15057 match(Set dummy (ClearArray (Binary cnt base) val));
15058 effect(USE_KILL cnt, USE_KILL base, KILL cr);
15059
15060 ins_cost(4 * INSN_COST);
15061 format %{ "ClearArray $cnt, $base, $val" %}
15062
15063 ins_encode %{
15064 __ fill_words($base$$Register, $cnt$$Register, $val$$Register);
15065 %}
15066
15067 ins_pipe(pipe_class_memory);
15068 %}
15069
15070 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 temp, Universe dummy, rFlagsReg cr)
15071 %{
15072 predicate((uint64_t)n->in(2)->get_long()
15073 < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord)
15074 && !((ClearArrayNode*)n)->word_copy_only());
15075 match(Set dummy (ClearArray cnt base));
15076 effect(TEMP temp, USE_KILL base, KILL cr);
15077
15078 ins_cost(4 * INSN_COST);
15079 format %{ "ClearArray $cnt, $base" %}
15080
15081 ins_encode %{
15082 address tpc = __ zero_words($base$$Register, (uint64_t)$cnt$$constant);
15083 if (tpc == nullptr) {
15084 ciEnv::current()->record_failure("CodeCache is full");
15085 return;
15086 }
15087 %}
15088
15089 ins_pipe(pipe_class_memory);
15090 %}
15091
15092 // ============================================================================
15093 // Overflow Math Instructions
15094
16405 %}
16406
16407 // Call Runtime Instruction without safepoint and with vector arguments
16408 instruct CallLeafDirectVector(method meth)
16409 %{
16410 match(CallLeafVector);
16411
16412 effect(USE meth);
16413
16414 ins_cost(CALL_COST);
16415
16416 format %{ "CALL, runtime leaf vector $meth" %}
16417
16418 ins_encode(aarch64_enc_java_to_runtime(meth));
16419
16420 ins_pipe(pipe_class_call);
16421 %}
16422
16423 // Call Runtime Instruction
16424
16425 // entry point is null, target holds the address to call
16426 instruct CallLeafNoFPIndirect(iRegP target)
16427 %{
16428 predicate(n->as_Call()->entry_point() == nullptr);
16429
16430 match(CallLeafNoFP target);
16431
16432 ins_cost(CALL_COST);
16433
16434 format %{ "CALL, runtime leaf nofp indirect $target" %}
16435
16436 ins_encode %{
16437 __ blr($target$$Register);
16438 %}
16439
16440 ins_pipe(pipe_class_call);
16441 %}
16442
16443 instruct CallLeafNoFPDirect(method meth)
16444 %{
16445 predicate(n->as_Call()->entry_point() != nullptr);
16446
16447 match(CallLeafNoFP);
16448
16449 effect(USE meth);
16450
16451 ins_cost(CALL_COST);
16452
16453 format %{ "CALL, runtime leaf nofp $meth" %}
16454
16455 ins_encode( aarch64_enc_java_to_runtime(meth) );
16456
16457 ins_pipe(pipe_class_call);
16458 %}
16459
16460 // Tail Call; Jump from runtime stub to Java code.
16461 // Also known as an 'interprocedural jump'.
16462 // Target of jump will eventually return to caller.
16463 // TailJump below removes the return address.
16464 // Don't use rfp for 'jump_target' because a MachEpilogNode has already been
16465 // emitted just above the TailCall which has reset rfp to the caller state.
16466 instruct TailCalljmpInd(iRegPNoSpNoRfp jump_target, inline_cache_RegP method_ptr)
|