1673
1674 int MachCallDynamicJavaNode::ret_addr_offset()
1675 {
1676 return 16; // movz, movk, movk, bl
1677 }
1678
1679 int MachCallRuntimeNode::ret_addr_offset() {
1680 // for generated stubs the call will be
1681 // bl(addr)
1682 // or with far branches
1683 // bl(trampoline_stub)
1684 // for real runtime callouts it will be six instructions
1685 // see aarch64_enc_java_to_runtime
1686 // adr(rscratch2, retaddr)
1687 // str(rscratch2, Address(rthread, JavaThread::last_Java_pc_offset()));
1688 // lea(rscratch1, RuntimeAddress(addr)
1689 // blr(rscratch1)
1690 CodeBlob *cb = CodeCache::find_blob(_entry_point);
1691 if (cb) {
1692 return 1 * NativeInstruction::instruction_size;
1693 } else {
1694 return 6 * NativeInstruction::instruction_size;
1695 }
1696 }
1697
1698 //=============================================================================
1699
1700 #ifndef PRODUCT
1701 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1702 st->print("BREAKPOINT");
1703 }
1704 #endif
1705
1706 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1707 __ brk(0);
1708 }
1709
1710 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
1711 return MachNode::size(ra_);
1712 }
1781 if (C->stub_function() == nullptr) {
1782 st->print("\n\t");
1783 st->print("ldr rscratch1, [guard]\n\t");
1784 st->print("dmb ishld\n\t");
1785 st->print("ldr rscratch2, [rthread, #thread_disarmed_guard_value_offset]\n\t");
1786 st->print("cmp rscratch1, rscratch2\n\t");
1787 st->print("b.eq skip");
1788 st->print("\n\t");
1789 st->print("blr #nmethod_entry_barrier_stub\n\t");
1790 st->print("b skip\n\t");
1791 st->print("guard: int\n\t");
1792 st->print("\n\t");
1793 st->print("skip:\n\t");
1794 }
1795 }
1796 #endif
1797
1798 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1799 Compile* C = ra_->C;
1800
1801 // n.b. frame size includes space for return pc and rfp
1802 const int framesize = C->output()->frame_size_in_bytes();
1803
1804 if (C->clinit_barrier_on_entry()) {
1805 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
1806
1807 Label L_skip_barrier;
1808
1809 __ mov_metadata(rscratch2, C->method()->holder()->constant_encoding());
1810 __ clinit_barrier(rscratch2, rscratch1, &L_skip_barrier);
1811 __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
1812 __ bind(L_skip_barrier);
1813 }
1814
1815 if (C->max_vector_size() > 0) {
1816 __ reinitialize_ptrue();
1817 }
1818
1819 int bangsize = C->output()->bang_size_in_bytes();
1820 if (C->output()->need_stack_bang(bangsize))
1821 __ generate_stack_overflow_check(bangsize);
1822
1823 __ build_frame(framesize);
1824
1825 if (C->stub_function() == nullptr) {
1826 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
1827 // Dummy labels for just measuring the code size
1828 Label dummy_slow_path;
1829 Label dummy_continuation;
1830 Label dummy_guard;
1831 Label* slow_path = &dummy_slow_path;
1832 Label* continuation = &dummy_continuation;
1833 Label* guard = &dummy_guard;
1834 if (!Compile::current()->output()->in_scratch_emit_size()) {
1835 // Use real labels from actual stub when not emitting code for the purpose of measuring its size
1836 C2EntryBarrierStub* stub = new (Compile::current()->comp_arena()) C2EntryBarrierStub();
1837 Compile::current()->output()->add_stub(stub);
1838 slow_path = &stub->entry();
1839 continuation = &stub->continuation();
1840 guard = &stub->guard();
1841 }
1842 // In the C2 code, we move the non-hot part of nmethod entry barriers out-of-line to a stub.
1843 bs->nmethod_entry_barrier(masm, slow_path, continuation, guard);
1844 }
1845
1846 if (VerifyStackAtCalls) {
1847 Unimplemented();
1848 }
1849
1850 C->output()->set_frame_complete(__ offset());
1851
1852 if (C->has_mach_constant_base_node()) {
1853 // NOTE: We set the table base offset here because users might be
1854 // emitted before MachConstantBaseNode.
1855 ConstantTable& constant_table = C->output()->constant_table();
1856 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1857 }
1858 }
1859
1860 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1861 {
1862 return MachNode::size(ra_); // too many variables; just compute it
1863 // the hard way
1864 }
1865
1866 int MachPrologNode::reloc() const
1867 {
1868 return 0;
1869 }
1870
1871 //=============================================================================
1872
1873 #ifndef PRODUCT
1874 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1875 Compile* C = ra_->C;
1876 int framesize = C->output()->frame_slots() << LogBytesPerInt;
1877
1878 st->print("# pop frame %d\n\t",framesize);
1879
1880 if (framesize == 0) {
1881 st->print("ldp lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1882 } else if (framesize < ((1 << 9) + 2 * wordSize)) {
1883 st->print("ldp lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
1884 st->print("add sp, sp, #%d\n\t", framesize);
1885 } else {
1888 st->print("ldp lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1889 }
1890 if (VM_Version::use_rop_protection()) {
1891 st->print("autiaz\n\t");
1892 st->print("ldr zr, [lr]\n\t");
1893 }
1894
1895 if (do_polling() && C->is_method_compilation()) {
1896 st->print("# test polling word\n\t");
1897 st->print("ldr rscratch1, [rthread],#%d\n\t", in_bytes(JavaThread::polling_word_offset()));
1898 st->print("cmp sp, rscratch1\n\t");
1899 st->print("bhi #slow_path");
1900 }
1901 }
1902 #endif
1903
1904 void MachEpilogNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1905 Compile* C = ra_->C;
1906 int framesize = C->output()->frame_slots() << LogBytesPerInt;
1907
1908 __ remove_frame(framesize);
1909
1910 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1911 __ reserved_stack_check();
1912 }
1913
1914 if (do_polling() && C->is_method_compilation()) {
1915 Label dummy_label;
1916 Label* code_stub = &dummy_label;
1917 if (!C->output()->in_scratch_emit_size()) {
1918 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1919 C->output()->add_stub(stub);
1920 code_stub = &stub->entry();
1921 }
1922 __ relocate(relocInfo::poll_return_type);
1923 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
1924 }
1925 }
1926
1927 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
1928 // Variable size. Determine dynamically.
1929 return MachNode::size(ra_);
1930 }
1931
1932 int MachEpilogNode::reloc() const {
1933 // Return number of relocatable values contained in this instruction.
1934 return 1; // 1 for polling page.
1935 }
1936
1937 const Pipeline * MachEpilogNode::pipeline() const {
1938 return MachNode::pipeline_class();
1939 }
1940
1941 //=============================================================================
1942
1943 static enum RC rc_class(OptoReg::Name reg) {
1944
1945 if (reg == OptoReg::Bad) {
1946 return rc_bad;
1947 }
1948
1949 // we have 32 int registers * 2 halves
1950 int slots_of_int_registers = Register::number_of_registers * Register::max_slots_per_register;
1951
2210 void BoxLockNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
2211 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2212 int reg = ra_->get_encode(this);
2213
2214 // This add will handle any 24-bit signed offset. 24 bits allows an
2215 // 8 megabyte stack frame.
2216 __ add(as_Register(reg), sp, offset);
2217 }
2218
2219 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
2220 // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
2221 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2222
2223 if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
2224 return NativeInstruction::instruction_size;
2225 } else {
2226 return 2 * NativeInstruction::instruction_size;
2227 }
2228 }
2229
2230 //=============================================================================
2231
2232 #ifndef PRODUCT
2233 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2234 {
2235 st->print_cr("# MachUEPNode");
2236 st->print_cr("\tldrw rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2237 st->print_cr("\tldrw r10, [rscratch2 + CompiledICData::speculated_klass_offset()]\t# compressed klass");
2238 st->print_cr("\tcmpw rscratch1, r10");
2239 st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
2240 }
2241 #endif
2242
2243 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2244 {
2245 __ ic_check(InteriorEntryAlignment);
2246 }
2247
2248 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
2249 {
2250 return MachNode::size(ra_);
2251 }
2252
2253 // REQUIRED EMIT CODE
2254
2255 //=============================================================================
2256
2257 // Emit deopt handler code.
2258 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm)
2259 {
2260 // Note that the code buffer's insts_mark is always relative to insts.
2261 // That's why we must use the macroassembler to generate a handler.
2262 address base = __ start_a_stub(size_deopt_handler());
2263 if (base == nullptr) {
2264 ciEnv::current()->record_failure("CodeCache is full");
2265 return 0; // CodeBuffer::expand failed
2266 }
2267
2268 int offset = __ offset();
2269 Label start;
2270 __ bind(start);
2271 __ far_call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
2272
3649 %}
3650
3651 enc_class aarch64_enc_java_dynamic_call(method meth) %{
3652 int method_index = resolved_method_index(masm);
3653 address call = __ ic_call((address)$meth$$method, method_index);
3654 if (call == nullptr) {
3655 ciEnv::current()->record_failure("CodeCache is full");
3656 return;
3657 }
3658 __ post_call_nop();
3659 if (Compile::current()->max_vector_size() > 0) {
3660 __ reinitialize_ptrue();
3661 }
3662 %}
3663
3664 enc_class aarch64_enc_call_epilog() %{
3665 if (VerifyStackAtCalls) {
3666 // Check that stack depth is unchanged: find majik cookie on stack
3667 __ call_Unimplemented();
3668 }
3669 %}
3670
3671 enc_class aarch64_enc_java_to_runtime(method meth) %{
3672 // some calls to generated routines (arraycopy code) are scheduled
3673 // by C2 as runtime calls. if so we can call them using a br (they
3674 // will be in a reachable segment) otherwise we have to use a blr
3675 // which loads the absolute address into a register.
3676 address entry = (address)$meth$$method;
3677 CodeBlob *cb = CodeCache::find_blob(entry);
3678 if (cb) {
3679 address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
3680 if (call == nullptr) {
3681 ciEnv::current()->record_failure("CodeCache is full");
3682 return;
3683 }
3684 __ post_call_nop();
3685 } else {
3686 Label retaddr;
3687 // Make the anchor frame walkable
3688 __ adr(rscratch2, retaddr);
3938 operand immI_gt_1()
3939 %{
3940 predicate(n->get_int() > 1);
3941 match(ConI);
3942
3943 op_cost(0);
3944 format %{ %}
3945 interface(CONST_INTER);
3946 %}
3947
3948 operand immI_le_4()
3949 %{
3950 predicate(n->get_int() <= 4);
3951 match(ConI);
3952
3953 op_cost(0);
3954 format %{ %}
3955 interface(CONST_INTER);
3956 %}
3957
3958 operand immI_16()
3959 %{
3960 predicate(n->get_int() == 16);
3961 match(ConI);
3962
3963 op_cost(0);
3964 format %{ %}
3965 interface(CONST_INTER);
3966 %}
3967
3968 operand immI_24()
3969 %{
3970 predicate(n->get_int() == 24);
3971 match(ConI);
3972
3973 op_cost(0);
3974 format %{ %}
3975 interface(CONST_INTER);
3976 %}
3977
6837 instruct loadConL(iRegLNoSp dst, immL src)
6838 %{
6839 match(Set dst src);
6840
6841 ins_cost(INSN_COST);
6842 format %{ "mov $dst, $src\t# long" %}
6843
6844 ins_encode( aarch64_enc_mov_imm(dst, src) );
6845
6846 ins_pipe(ialu_imm);
6847 %}
6848
6849 // Load Pointer Constant
6850
6851 instruct loadConP(iRegPNoSp dst, immP con)
6852 %{
6853 match(Set dst con);
6854
6855 ins_cost(INSN_COST * 4);
6856 format %{
6857 "mov $dst, $con\t# ptr\n\t"
6858 %}
6859
6860 ins_encode(aarch64_enc_mov_p(dst, con));
6861
6862 ins_pipe(ialu_imm);
6863 %}
6864
6865 // Load Null Pointer Constant
6866
6867 instruct loadConP0(iRegPNoSp dst, immP0 con)
6868 %{
6869 match(Set dst con);
6870
6871 ins_cost(INSN_COST);
6872 format %{ "mov $dst, $con\t# nullptr ptr" %}
6873
6874 ins_encode(aarch64_enc_mov_p0(dst, con));
6875
6876 ins_pipe(ialu_imm);
6877 %}
8073 %}
8074
8075 // ============================================================================
8076 // Cast/Convert Instructions
8077
8078 instruct castX2P(iRegPNoSp dst, iRegL src) %{
8079 match(Set dst (CastX2P src));
8080
8081 ins_cost(INSN_COST);
8082 format %{ "mov $dst, $src\t# long -> ptr" %}
8083
8084 ins_encode %{
8085 if ($dst$$reg != $src$$reg) {
8086 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8087 }
8088 %}
8089
8090 ins_pipe(ialu_reg);
8091 %}
8092
8093 instruct castP2X(iRegLNoSp dst, iRegP src) %{
8094 match(Set dst (CastP2X src));
8095
8096 ins_cost(INSN_COST);
8097 format %{ "mov $dst, $src\t# ptr -> long" %}
8098
8099 ins_encode %{
8100 if ($dst$$reg != $src$$reg) {
8101 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8102 }
8103 %}
8104
8105 ins_pipe(ialu_reg);
8106 %}
8107
8108 // Convert oop into int for vectors alignment masking
8109 instruct convP2I(iRegINoSp dst, iRegP src) %{
8110 match(Set dst (ConvL2I (CastP2X src)));
8111
8112 ins_cost(INSN_COST);
14061
14062 match(Set dst (MoveL2D src));
14063
14064 effect(DEF dst, USE src);
14065
14066 ins_cost(INSN_COST);
14067
14068 format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
14069
14070 ins_encode %{
14071 __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
14072 %}
14073
14074 ins_pipe(fp_l2d);
14075
14076 %}
14077
14078 // ============================================================================
14079 // clearing of an array
14080
14081 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
14082 %{
14083 match(Set dummy (ClearArray cnt base));
14084 effect(USE_KILL cnt, USE_KILL base, KILL cr);
14085
14086 ins_cost(4 * INSN_COST);
14087 format %{ "ClearArray $cnt, $base" %}
14088
14089 ins_encode %{
14090 address tpc = __ zero_words($base$$Register, $cnt$$Register);
14091 if (tpc == nullptr) {
14092 ciEnv::current()->record_failure("CodeCache is full");
14093 return;
14094 }
14095 %}
14096
14097 ins_pipe(pipe_class_memory);
14098 %}
14099
14100 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 temp, Universe dummy, rFlagsReg cr)
14101 %{
14102 predicate((uint64_t)n->in(2)->get_long()
14103 < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
14104 match(Set dummy (ClearArray cnt base));
14105 effect(TEMP temp, USE_KILL base, KILL cr);
14106
14107 ins_cost(4 * INSN_COST);
14108 format %{ "ClearArray $cnt, $base" %}
14109
14110 ins_encode %{
14111 address tpc = __ zero_words($base$$Register, (uint64_t)$cnt$$constant);
14112 if (tpc == nullptr) {
14113 ciEnv::current()->record_failure("CodeCache is full");
14114 return;
14115 }
14116 %}
14117
14118 ins_pipe(pipe_class_memory);
14119 %}
14120
14121 // ============================================================================
14122 // Overflow Math Instructions
14123
15400 %}
15401
15402 // Call Runtime Instruction without safepoint and with vector arguments
15403 instruct CallLeafDirectVector(method meth)
15404 %{
15405 match(CallLeafVector);
15406
15407 effect(USE meth);
15408
15409 ins_cost(CALL_COST);
15410
15411 format %{ "CALL, runtime leaf vector $meth" %}
15412
15413 ins_encode(aarch64_enc_java_to_runtime(meth));
15414
15415 ins_pipe(pipe_class_call);
15416 %}
15417
15418 // Call Runtime Instruction
15419
15420 instruct CallLeafNoFPDirect(method meth)
15421 %{
15422 match(CallLeafNoFP);
15423
15424 effect(USE meth);
15425
15426 ins_cost(CALL_COST);
15427
15428 format %{ "CALL, runtime leaf nofp $meth" %}
15429
15430 ins_encode( aarch64_enc_java_to_runtime(meth) );
15431
15432 ins_pipe(pipe_class_call);
15433 %}
15434
15435 // Tail Call; Jump from runtime stub to Java code.
15436 // Also known as an 'interprocedural jump'.
15437 // Target of jump will eventually return to caller.
15438 // TailJump below removes the return address.
15439 // Don't use rfp for 'jump_target' because a MachEpilogNode has already been
15440 // emitted just above the TailCall which has reset rfp to the caller state.
15441 instruct TailCalljmpInd(iRegPNoSpNoRfp jump_target, inline_cache_RegP method_ptr)
|
1673
1674 int MachCallDynamicJavaNode::ret_addr_offset()
1675 {
1676 return 16; // movz, movk, movk, bl
1677 }
1678
1679 int MachCallRuntimeNode::ret_addr_offset() {
1680 // for generated stubs the call will be
1681 // bl(addr)
1682 // or with far branches
1683 // bl(trampoline_stub)
1684 // for real runtime callouts it will be six instructions
1685 // see aarch64_enc_java_to_runtime
1686 // adr(rscratch2, retaddr)
1687 // str(rscratch2, Address(rthread, JavaThread::last_Java_pc_offset()));
1688 // lea(rscratch1, RuntimeAddress(addr)
1689 // blr(rscratch1)
1690 CodeBlob *cb = CodeCache::find_blob(_entry_point);
1691 if (cb) {
1692 return 1 * NativeInstruction::instruction_size;
1693 } else if (_entry_point == nullptr) {
1694 // See CallLeafNoFPIndirect
1695 return 1 * NativeInstruction::instruction_size;
1696 } else {
1697 return 6 * NativeInstruction::instruction_size;
1698 }
1699 }
1700
1701 //=============================================================================
1702
1703 #ifndef PRODUCT
1704 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1705 st->print("BREAKPOINT");
1706 }
1707 #endif
1708
1709 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1710 __ brk(0);
1711 }
1712
1713 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
1714 return MachNode::size(ra_);
1715 }
1784 if (C->stub_function() == nullptr) {
1785 st->print("\n\t");
1786 st->print("ldr rscratch1, [guard]\n\t");
1787 st->print("dmb ishld\n\t");
1788 st->print("ldr rscratch2, [rthread, #thread_disarmed_guard_value_offset]\n\t");
1789 st->print("cmp rscratch1, rscratch2\n\t");
1790 st->print("b.eq skip");
1791 st->print("\n\t");
1792 st->print("blr #nmethod_entry_barrier_stub\n\t");
1793 st->print("b skip\n\t");
1794 st->print("guard: int\n\t");
1795 st->print("\n\t");
1796 st->print("skip:\n\t");
1797 }
1798 }
1799 #endif
1800
1801 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1802 Compile* C = ra_->C;
1803
1804
1805 __ verified_entry(C, 0);
1806
1807 if (C->stub_function() == nullptr) {
1808 __ entry_barrier();
1809 }
1810
1811 if (!Compile::current()->output()->in_scratch_emit_size()) {
1812 __ bind(*_verified_entry);
1813 }
1814
1815 if (VerifyStackAtCalls) {
1816 Unimplemented();
1817 }
1818
1819 C->output()->set_frame_complete(__ offset());
1820
1821 if (C->has_mach_constant_base_node()) {
1822 // NOTE: We set the table base offset here because users might be
1823 // emitted before MachConstantBaseNode.
1824 ConstantTable& constant_table = C->output()->constant_table();
1825 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1826 }
1827 }
1828
1829 int MachPrologNode::reloc() const
1830 {
1831 return 0;
1832 }
1833
1834 //=============================================================================
1835
1836 #ifndef PRODUCT
1837 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1838 Compile* C = ra_->C;
1839 int framesize = C->output()->frame_slots() << LogBytesPerInt;
1840
1841 st->print("# pop frame %d\n\t",framesize);
1842
1843 if (framesize == 0) {
1844 st->print("ldp lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1845 } else if (framesize < ((1 << 9) + 2 * wordSize)) {
1846 st->print("ldp lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
1847 st->print("add sp, sp, #%d\n\t", framesize);
1848 } else {
1851 st->print("ldp lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1852 }
1853 if (VM_Version::use_rop_protection()) {
1854 st->print("autiaz\n\t");
1855 st->print("ldr zr, [lr]\n\t");
1856 }
1857
1858 if (do_polling() && C->is_method_compilation()) {
1859 st->print("# test polling word\n\t");
1860 st->print("ldr rscratch1, [rthread],#%d\n\t", in_bytes(JavaThread::polling_word_offset()));
1861 st->print("cmp sp, rscratch1\n\t");
1862 st->print("bhi #slow_path");
1863 }
1864 }
1865 #endif
1866
1867 void MachEpilogNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1868 Compile* C = ra_->C;
1869 int framesize = C->output()->frame_slots() << LogBytesPerInt;
1870
1871 __ remove_frame(framesize, C->needs_stack_repair());
1872
1873 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1874 __ reserved_stack_check();
1875 }
1876
1877 if (do_polling() && C->is_method_compilation()) {
1878 Label dummy_label;
1879 Label* code_stub = &dummy_label;
1880 if (!C->output()->in_scratch_emit_size()) {
1881 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1882 C->output()->add_stub(stub);
1883 code_stub = &stub->entry();
1884 }
1885 __ relocate(relocInfo::poll_return_type);
1886 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
1887 }
1888 }
1889
1890 int MachEpilogNode::reloc() const {
1891 // Return number of relocatable values contained in this instruction.
1892 return 1; // 1 for polling page.
1893 }
1894
1895 const Pipeline * MachEpilogNode::pipeline() const {
1896 return MachNode::pipeline_class();
1897 }
1898
1899 //=============================================================================
1900
1901 static enum RC rc_class(OptoReg::Name reg) {
1902
1903 if (reg == OptoReg::Bad) {
1904 return rc_bad;
1905 }
1906
1907 // we have 32 int registers * 2 halves
1908 int slots_of_int_registers = Register::number_of_registers * Register::max_slots_per_register;
1909
2168 void BoxLockNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
2169 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2170 int reg = ra_->get_encode(this);
2171
2172 // This add will handle any 24-bit signed offset. 24 bits allows an
2173 // 8 megabyte stack frame.
2174 __ add(as_Register(reg), sp, offset);
2175 }
2176
2177 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
2178 // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
2179 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2180
2181 if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
2182 return NativeInstruction::instruction_size;
2183 } else {
2184 return 2 * NativeInstruction::instruction_size;
2185 }
2186 }
2187
2188 ///=============================================================================
2189 #ifndef PRODUCT
2190 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2191 {
2192 st->print_cr("# MachVEPNode");
2193 if (!_verified) {
2194 st->print_cr("\t load_class");
2195 } else {
2196 st->print_cr("\t unpack_inline_arg");
2197 }
2198 }
2199 #endif
2200
2201 void MachVEPNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const
2202 {
2203 if (!_verified) {
2204 __ ic_check(1);
2205 } else {
2206 if (ra_->C->stub_function() == nullptr) {
2207 // Emit the entry barrier in a temporary frame before unpacking because
2208 // it can deopt, which would require packing the scalarized args again.
2209 __ verified_entry(ra_->C, 0);
2210 __ entry_barrier();
2211 int framesize = ra_->C->output()->frame_slots() << LogBytesPerInt;
2212 __ remove_frame(framesize, false);
2213 }
2214 // Unpack inline type args passed as oop and then jump to
2215 // the verified entry point (skipping the unverified entry).
2216 int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
2217 // Emit code for verified entry and save increment for stack repair on return
2218 __ verified_entry(ra_->C, sp_inc);
2219 if (Compile::current()->output()->in_scratch_emit_size()) {
2220 Label dummy_verified_entry;
2221 __ b(dummy_verified_entry);
2222 } else {
2223 __ b(*_verified_entry);
2224 }
2225 }
2226 }
2227
2228 //=============================================================================
2229 #ifndef PRODUCT
2230 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2231 {
2232 st->print_cr("# MachUEPNode");
2233 st->print_cr("\tldrw rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2234 st->print_cr("\tldrw r10, [rscratch2 + CompiledICData::speculated_klass_offset()]\t# compressed klass");
2235 st->print_cr("\tcmpw rscratch1, r10");
2236 st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
2237 }
2238 #endif
2239
2240 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2241 {
2242 __ ic_check(InteriorEntryAlignment);
2243 }
2244
2245 // REQUIRED EMIT CODE
2246
2247 //=============================================================================
2248
2249 // Emit deopt handler code.
2250 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm)
2251 {
2252 // Note that the code buffer's insts_mark is always relative to insts.
2253 // That's why we must use the macroassembler to generate a handler.
2254 address base = __ start_a_stub(size_deopt_handler());
2255 if (base == nullptr) {
2256 ciEnv::current()->record_failure("CodeCache is full");
2257 return 0; // CodeBuffer::expand failed
2258 }
2259
2260 int offset = __ offset();
2261 Label start;
2262 __ bind(start);
2263 __ far_call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
2264
3641 %}
3642
3643 enc_class aarch64_enc_java_dynamic_call(method meth) %{
3644 int method_index = resolved_method_index(masm);
3645 address call = __ ic_call((address)$meth$$method, method_index);
3646 if (call == nullptr) {
3647 ciEnv::current()->record_failure("CodeCache is full");
3648 return;
3649 }
3650 __ post_call_nop();
3651 if (Compile::current()->max_vector_size() > 0) {
3652 __ reinitialize_ptrue();
3653 }
3654 %}
3655
3656 enc_class aarch64_enc_call_epilog() %{
3657 if (VerifyStackAtCalls) {
3658 // Check that stack depth is unchanged: find majik cookie on stack
3659 __ call_Unimplemented();
3660 }
3661 if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) {
3662 // The last return value is not set by the callee but used to pass the null marker to compiled code.
3663 // Search for the corresponding projection, get the register and emit code that initialized it.
3664 uint con = (tf()->range_cc()->cnt() - 1);
3665 for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
3666 ProjNode* proj = fast_out(i)->as_Proj();
3667 if (proj->_con == con) {
3668 // Set null marker if r0 is non-null (a non-null value is returned buffered or scalarized)
3669 OptoReg::Name optoReg = ra_->get_reg_first(proj);
3670 VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
3671 Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
3672 __ cmp(r0, zr);
3673 __ cset(toReg, Assembler::NE);
3674 if (reg->is_stack()) {
3675 int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
3676 __ str(toReg, Address(sp, st_off));
3677 }
3678 break;
3679 }
3680 }
3681 if (return_value_is_used()) {
3682 // An inline type is returned as fields in multiple registers.
3683 // R0 either contains an oop if the inline type is buffered or a pointer
3684 // to the corresponding InlineKlass with the lowest bit set to 1. Zero r0
3685 // if the lowest bit is set to allow C2 to use the oop after null checking.
3686 // r0 &= (r0 & 1) - 1
3687 __ andr(rscratch1, r0, 0x1);
3688 __ sub(rscratch1, rscratch1, 0x1);
3689 __ andr(r0, r0, rscratch1);
3690 }
3691 }
3692 %}
3693
3694 enc_class aarch64_enc_java_to_runtime(method meth) %{
3695 // some calls to generated routines (arraycopy code) are scheduled
3696 // by C2 as runtime calls. if so we can call them using a br (they
3697 // will be in a reachable segment) otherwise we have to use a blr
3698 // which loads the absolute address into a register.
3699 address entry = (address)$meth$$method;
3700 CodeBlob *cb = CodeCache::find_blob(entry);
3701 if (cb) {
3702 address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
3703 if (call == nullptr) {
3704 ciEnv::current()->record_failure("CodeCache is full");
3705 return;
3706 }
3707 __ post_call_nop();
3708 } else {
3709 Label retaddr;
3710 // Make the anchor frame walkable
3711 __ adr(rscratch2, retaddr);
3961 operand immI_gt_1()
3962 %{
3963 predicate(n->get_int() > 1);
3964 match(ConI);
3965
3966 op_cost(0);
3967 format %{ %}
3968 interface(CONST_INTER);
3969 %}
3970
3971 operand immI_le_4()
3972 %{
3973 predicate(n->get_int() <= 4);
3974 match(ConI);
3975
3976 op_cost(0);
3977 format %{ %}
3978 interface(CONST_INTER);
3979 %}
3980
3981 operand immI_4()
3982 %{
3983 predicate(n->get_int() == 4);
3984 match(ConI);
3985
3986 op_cost(0);
3987 format %{ %}
3988 interface(CONST_INTER);
3989 %}
3990
3991 operand immI_16()
3992 %{
3993 predicate(n->get_int() == 16);
3994 match(ConI);
3995
3996 op_cost(0);
3997 format %{ %}
3998 interface(CONST_INTER);
3999 %}
4000
4001 operand immI_24()
4002 %{
4003 predicate(n->get_int() == 24);
4004 match(ConI);
4005
4006 op_cost(0);
4007 format %{ %}
4008 interface(CONST_INTER);
4009 %}
4010
6870 instruct loadConL(iRegLNoSp dst, immL src)
6871 %{
6872 match(Set dst src);
6873
6874 ins_cost(INSN_COST);
6875 format %{ "mov $dst, $src\t# long" %}
6876
6877 ins_encode( aarch64_enc_mov_imm(dst, src) );
6878
6879 ins_pipe(ialu_imm);
6880 %}
6881
6882 // Load Pointer Constant
6883
6884 instruct loadConP(iRegPNoSp dst, immP con)
6885 %{
6886 match(Set dst con);
6887
6888 ins_cost(INSN_COST * 4);
6889 format %{
6890 "mov $dst, $con\t# ptr"
6891 %}
6892
6893 ins_encode(aarch64_enc_mov_p(dst, con));
6894
6895 ins_pipe(ialu_imm);
6896 %}
6897
6898 // Load Null Pointer Constant
6899
6900 instruct loadConP0(iRegPNoSp dst, immP0 con)
6901 %{
6902 match(Set dst con);
6903
6904 ins_cost(INSN_COST);
6905 format %{ "mov $dst, $con\t# nullptr ptr" %}
6906
6907 ins_encode(aarch64_enc_mov_p0(dst, con));
6908
6909 ins_pipe(ialu_imm);
6910 %}
8106 %}
8107
8108 // ============================================================================
8109 // Cast/Convert Instructions
8110
8111 instruct castX2P(iRegPNoSp dst, iRegL src) %{
8112 match(Set dst (CastX2P src));
8113
8114 ins_cost(INSN_COST);
8115 format %{ "mov $dst, $src\t# long -> ptr" %}
8116
8117 ins_encode %{
8118 if ($dst$$reg != $src$$reg) {
8119 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8120 }
8121 %}
8122
8123 ins_pipe(ialu_reg);
8124 %}
8125
8126 instruct castI2N(iRegNNoSp dst, iRegI src) %{
8127 match(Set dst (CastI2N src));
8128
8129 ins_cost(INSN_COST);
8130 format %{ "mov $dst, $src\t# int -> narrow ptr" %}
8131
8132 ins_encode %{
8133 if ($dst$$reg != $src$$reg) {
8134 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8135 }
8136 %}
8137
8138 ins_pipe(ialu_reg);
8139 %}
8140
8141 instruct castN2X(iRegLNoSp dst, iRegN src) %{
8142 match(Set dst (CastP2X src));
8143
8144 ins_cost(INSN_COST);
8145 format %{ "mov $dst, $src\t# ptr -> long" %}
8146
8147 ins_encode %{
8148 if ($dst$$reg != $src$$reg) {
8149 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8150 }
8151 %}
8152
8153 ins_pipe(ialu_reg);
8154 %}
8155
8156 instruct castP2X(iRegLNoSp dst, iRegP src) %{
8157 match(Set dst (CastP2X src));
8158
8159 ins_cost(INSN_COST);
8160 format %{ "mov $dst, $src\t# ptr -> long" %}
8161
8162 ins_encode %{
8163 if ($dst$$reg != $src$$reg) {
8164 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8165 }
8166 %}
8167
8168 ins_pipe(ialu_reg);
8169 %}
8170
8171 // Convert oop into int for vectors alignment masking
8172 instruct convP2I(iRegINoSp dst, iRegP src) %{
8173 match(Set dst (ConvL2I (CastP2X src)));
8174
8175 ins_cost(INSN_COST);
14124
14125 match(Set dst (MoveL2D src));
14126
14127 effect(DEF dst, USE src);
14128
14129 ins_cost(INSN_COST);
14130
14131 format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
14132
14133 ins_encode %{
14134 __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
14135 %}
14136
14137 ins_pipe(fp_l2d);
14138
14139 %}
14140
14141 // ============================================================================
14142 // clearing of an array
14143
14144 instruct clearArray_reg_reg_immL0(iRegL_R11 cnt, iRegP_R10 base, immL0 zero, Universe dummy, rFlagsReg cr)
14145 %{
14146 match(Set dummy (ClearArray (Binary cnt base) zero));
14147 effect(USE_KILL cnt, USE_KILL base, KILL cr);
14148
14149 ins_cost(4 * INSN_COST);
14150 format %{ "ClearArray $cnt, $base" %}
14151
14152 ins_encode %{
14153 address tpc = __ zero_words($base$$Register, $cnt$$Register);
14154 if (tpc == nullptr) {
14155 ciEnv::current()->record_failure("CodeCache is full");
14156 return;
14157 }
14158 %}
14159
14160 ins_pipe(pipe_class_memory);
14161 %}
14162
14163 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, iRegL val, Universe dummy, rFlagsReg cr)
14164 %{
14165 predicate(((ClearArrayNode*)n)->word_copy_only());
14166 match(Set dummy (ClearArray (Binary cnt base) val));
14167 effect(USE_KILL cnt, USE_KILL base, KILL cr);
14168
14169 ins_cost(4 * INSN_COST);
14170 format %{ "ClearArray $cnt, $base, $val" %}
14171
14172 ins_encode %{
14173 __ fill_words($base$$Register, $cnt$$Register, $val$$Register);
14174 %}
14175
14176 ins_pipe(pipe_class_memory);
14177 %}
14178
14179 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 temp, Universe dummy, rFlagsReg cr)
14180 %{
14181 predicate((uint64_t)n->in(2)->get_long()
14182 < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord)
14183 && !((ClearArrayNode*)n)->word_copy_only());
14184 match(Set dummy (ClearArray cnt base));
14185 effect(TEMP temp, USE_KILL base, KILL cr);
14186
14187 ins_cost(4 * INSN_COST);
14188 format %{ "ClearArray $cnt, $base" %}
14189
14190 ins_encode %{
14191 address tpc = __ zero_words($base$$Register, (uint64_t)$cnt$$constant);
14192 if (tpc == nullptr) {
14193 ciEnv::current()->record_failure("CodeCache is full");
14194 return;
14195 }
14196 %}
14197
14198 ins_pipe(pipe_class_memory);
14199 %}
14200
14201 // ============================================================================
14202 // Overflow Math Instructions
14203
15480 %}
15481
15482 // Call Runtime Instruction without safepoint and with vector arguments
15483 instruct CallLeafDirectVector(method meth)
15484 %{
15485 match(CallLeafVector);
15486
15487 effect(USE meth);
15488
15489 ins_cost(CALL_COST);
15490
15491 format %{ "CALL, runtime leaf vector $meth" %}
15492
15493 ins_encode(aarch64_enc_java_to_runtime(meth));
15494
15495 ins_pipe(pipe_class_call);
15496 %}
15497
15498 // Call Runtime Instruction
15499
15500 // entry point is null, target holds the address to call
15501 instruct CallLeafNoFPIndirect(iRegP target)
15502 %{
15503 predicate(n->as_Call()->entry_point() == nullptr);
15504
15505 match(CallLeafNoFP target);
15506
15507 ins_cost(CALL_COST);
15508
15509 format %{ "CALL, runtime leaf nofp indirect $target" %}
15510
15511 ins_encode %{
15512 __ blr($target$$Register);
15513 %}
15514
15515 ins_pipe(pipe_class_call);
15516 %}
15517
15518 instruct CallLeafNoFPDirect(method meth)
15519 %{
15520 predicate(n->as_Call()->entry_point() != nullptr);
15521
15522 match(CallLeafNoFP);
15523
15524 effect(USE meth);
15525
15526 ins_cost(CALL_COST);
15527
15528 format %{ "CALL, runtime leaf nofp $meth" %}
15529
15530 ins_encode( aarch64_enc_java_to_runtime(meth) );
15531
15532 ins_pipe(pipe_class_call);
15533 %}
15534
15535 // Tail Call; Jump from runtime stub to Java code.
15536 // Also known as an 'interprocedural jump'.
15537 // Target of jump will eventually return to caller.
15538 // TailJump below removes the return address.
15539 // Don't use rfp for 'jump_target' because a MachEpilogNode has already been
15540 // emitted just above the TailCall which has reset rfp to the caller state.
15541 instruct TailCalljmpInd(iRegPNoSpNoRfp jump_target, inline_cache_RegP method_ptr)
|