1667
1668 int MachCallDynamicJavaNode::ret_addr_offset()
1669 {
1670 return 16; // movz, movk, movk, bl
1671 }
1672
1673 int MachCallRuntimeNode::ret_addr_offset() {
1674 // for generated stubs the call will be
1675 // bl(addr)
1676 // or with far branches
1677 // bl(trampoline_stub)
1678 // for real runtime callouts it will be six instructions
1679 // see aarch64_enc_java_to_runtime
1680 // adr(rscratch2, retaddr)
1681 // str(rscratch2, Address(rthread, JavaThread::last_Java_pc_offset()));
1682 // lea(rscratch1, RuntimeAddress(addr)
1683 // blr(rscratch1)
1684 CodeBlob *cb = CodeCache::find_blob(_entry_point);
1685 if (cb) {
1686 return 1 * NativeInstruction::instruction_size;
1687 } else {
1688 return 6 * NativeInstruction::instruction_size;
1689 }
1690 }
1691
1692 //=============================================================================
1693
1694 #ifndef PRODUCT
1695 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1696 st->print("BREAKPOINT");
1697 }
1698 #endif
1699
1700 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1701 __ brk(0);
1702 }
1703
1704 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
1705 return MachNode::size(ra_);
1706 }
1775 if (C->stub_function() == nullptr) {
1776 st->print("\n\t");
1777 st->print("ldr rscratch1, [guard]\n\t");
1778 st->print("dmb ishld\n\t");
1779 st->print("ldr rscratch2, [rthread, #thread_disarmed_guard_value_offset]\n\t");
1780 st->print("cmp rscratch1, rscratch2\n\t");
1781 st->print("b.eq skip");
1782 st->print("\n\t");
1783 st->print("blr #nmethod_entry_barrier_stub\n\t");
1784 st->print("b skip\n\t");
1785 st->print("guard: int\n\t");
1786 st->print("\n\t");
1787 st->print("skip:\n\t");
1788 }
1789 }
1790 #endif
1791
1792 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1793 Compile* C = ra_->C;
1794
1795 // n.b. frame size includes space for return pc and rfp
1796 const int framesize = C->output()->frame_size_in_bytes();
1797
1798 if (C->clinit_barrier_on_entry()) {
1799 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
1800
1801 Label L_skip_barrier;
1802
1803 __ mov_metadata(rscratch2, C->method()->holder()->constant_encoding());
1804 __ clinit_barrier(rscratch2, rscratch1, &L_skip_barrier);
1805 __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
1806 __ bind(L_skip_barrier);
1807 }
1808
1809 if (C->max_vector_size() > 0) {
1810 __ reinitialize_ptrue();
1811 }
1812
1813 int bangsize = C->output()->bang_size_in_bytes();
1814 if (C->output()->need_stack_bang(bangsize))
1815 __ generate_stack_overflow_check(bangsize);
1816
1817 __ build_frame(framesize);
1818
1819 if (C->stub_function() == nullptr) {
1820 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
1821 // Dummy labels for just measuring the code size
1822 Label dummy_slow_path;
1823 Label dummy_continuation;
1824 Label dummy_guard;
1825 Label* slow_path = &dummy_slow_path;
1826 Label* continuation = &dummy_continuation;
1827 Label* guard = &dummy_guard;
1828 if (!Compile::current()->output()->in_scratch_emit_size()) {
1829 // Use real labels from actual stub when not emitting code for the purpose of measuring its size
1830 C2EntryBarrierStub* stub = new (Compile::current()->comp_arena()) C2EntryBarrierStub();
1831 Compile::current()->output()->add_stub(stub);
1832 slow_path = &stub->entry();
1833 continuation = &stub->continuation();
1834 guard = &stub->guard();
1835 }
1836 // In the C2 code, we move the non-hot part of nmethod entry barriers out-of-line to a stub.
1837 bs->nmethod_entry_barrier(masm, slow_path, continuation, guard);
1838 }
1839
1840 if (VerifyStackAtCalls) {
1841 Unimplemented();
1842 }
1843
1844 C->output()->set_frame_complete(__ offset());
1845
1846 if (C->has_mach_constant_base_node()) {
1847 // NOTE: We set the table base offset here because users might be
1848 // emitted before MachConstantBaseNode.
1849 ConstantTable& constant_table = C->output()->constant_table();
1850 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1851 }
1852 }
1853
1854 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1855 {
1856 return MachNode::size(ra_); // too many variables; just compute it
1857 // the hard way
1858 }
1859
1860 int MachPrologNode::reloc() const
1861 {
1862 return 0;
1863 }
1864
1865 //=============================================================================
1866
1867 #ifndef PRODUCT
1868 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1869 Compile* C = ra_->C;
1870 int framesize = C->output()->frame_slots() << LogBytesPerInt;
1871
1872 st->print("# pop frame %d\n\t",framesize);
1873
1874 if (framesize == 0) {
1875 st->print("ldp lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1876 } else if (framesize < ((1 << 9) + 2 * wordSize)) {
1877 st->print("ldp lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
1878 st->print("add sp, sp, #%d\n\t", framesize);
1879 } else {
1882 st->print("ldp lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1883 }
1884 if (VM_Version::use_rop_protection()) {
1885 st->print("autiaz\n\t");
1886 st->print("ldr zr, [lr]\n\t");
1887 }
1888
1889 if (do_polling() && C->is_method_compilation()) {
1890 st->print("# test polling word\n\t");
1891 st->print("ldr rscratch1, [rthread],#%d\n\t", in_bytes(JavaThread::polling_word_offset()));
1892 st->print("cmp sp, rscratch1\n\t");
1893 st->print("bhi #slow_path");
1894 }
1895 }
1896 #endif
1897
1898 void MachEpilogNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1899 Compile* C = ra_->C;
1900 int framesize = C->output()->frame_slots() << LogBytesPerInt;
1901
1902 __ remove_frame(framesize);
1903
1904 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1905 __ reserved_stack_check();
1906 }
1907
1908 if (do_polling() && C->is_method_compilation()) {
1909 Label dummy_label;
1910 Label* code_stub = &dummy_label;
1911 if (!C->output()->in_scratch_emit_size()) {
1912 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1913 C->output()->add_stub(stub);
1914 code_stub = &stub->entry();
1915 }
1916 __ relocate(relocInfo::poll_return_type);
1917 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
1918 }
1919 }
1920
1921 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
1922 // Variable size. Determine dynamically.
1923 return MachNode::size(ra_);
1924 }
1925
1926 int MachEpilogNode::reloc() const {
1927 // Return number of relocatable values contained in this instruction.
1928 return 1; // 1 for polling page.
1929 }
1930
1931 const Pipeline * MachEpilogNode::pipeline() const {
1932 return MachNode::pipeline_class();
1933 }
1934
1935 //=============================================================================
1936
1937 static enum RC rc_class(OptoReg::Name reg) {
1938
1939 if (reg == OptoReg::Bad) {
1940 return rc_bad;
1941 }
1942
1943 // we have 32 int registers * 2 halves
1944 int slots_of_int_registers = Register::number_of_registers * Register::max_slots_per_register;
1945
2204 void BoxLockNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
2205 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2206 int reg = ra_->get_encode(this);
2207
2208 // This add will handle any 24-bit signed offset. 24 bits allows an
2209 // 8 megabyte stack frame.
2210 __ add(as_Register(reg), sp, offset);
2211 }
2212
2213 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
2214 // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
2215 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2216
2217 if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
2218 return NativeInstruction::instruction_size;
2219 } else {
2220 return 2 * NativeInstruction::instruction_size;
2221 }
2222 }
2223
2224 //=============================================================================
2225
2226 #ifndef PRODUCT
2227 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2228 {
2229 st->print_cr("# MachUEPNode");
2230 st->print_cr("\tldrw rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2231 st->print_cr("\tldrw r10, [rscratch2 + CompiledICData::speculated_klass_offset()]\t# compressed klass");
2232 st->print_cr("\tcmpw rscratch1, r10");
2233 st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
2234 }
2235 #endif
2236
2237 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2238 {
2239 __ ic_check(InteriorEntryAlignment);
2240 }
2241
2242 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
2243 {
2244 return MachNode::size(ra_);
2245 }
2246
2247 // REQUIRED EMIT CODE
2248
2249 //=============================================================================
2250
2251 // Emit deopt handler code.
2252 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm)
2253 {
2254 // Note that the code buffer's insts_mark is always relative to insts.
2255 // That's why we must use the macroassembler to generate a handler.
2256 address base = __ start_a_stub(size_deopt_handler());
2257 if (base == nullptr) {
2258 ciEnv::current()->record_failure("CodeCache is full");
2259 return 0; // CodeBuffer::expand failed
2260 }
2261
2262 int offset = __ offset();
2263 Label start;
2264 __ bind(start);
2265 __ far_call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
2266
3670 %}
3671
3672 enc_class aarch64_enc_java_dynamic_call(method meth) %{
3673 int method_index = resolved_method_index(masm);
3674 address call = __ ic_call((address)$meth$$method, method_index);
3675 if (call == nullptr) {
3676 ciEnv::current()->record_failure("CodeCache is full");
3677 return;
3678 }
3679 __ post_call_nop();
3680 if (Compile::current()->max_vector_size() > 0) {
3681 __ reinitialize_ptrue();
3682 }
3683 %}
3684
3685 enc_class aarch64_enc_call_epilog() %{
3686 if (VerifyStackAtCalls) {
3687 // Check that stack depth is unchanged: find majik cookie on stack
3688 __ call_Unimplemented();
3689 }
3690 %}
3691
3692 enc_class aarch64_enc_java_to_runtime(method meth) %{
3693 // some calls to generated routines (arraycopy code) are scheduled
3694 // by C2 as runtime calls. if so we can call them using a br (they
3695 // will be in a reachable segment) otherwise we have to use a blr
3696 // which loads the absolute address into a register.
3697 address entry = (address)$meth$$method;
3698 CodeBlob *cb = CodeCache::find_blob(entry);
3699 if (cb) {
3700 address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
3701 if (call == nullptr) {
3702 ciEnv::current()->record_failure("CodeCache is full");
3703 return;
3704 }
3705 __ post_call_nop();
3706 } else {
3707 Label retaddr;
3708 // Make the anchor frame walkable
3709 __ adr(rscratch2, retaddr);
3959 operand immI_gt_1()
3960 %{
3961 predicate(n->get_int() > 1);
3962 match(ConI);
3963
3964 op_cost(0);
3965 format %{ %}
3966 interface(CONST_INTER);
3967 %}
3968
3969 operand immI_le_4()
3970 %{
3971 predicate(n->get_int() <= 4);
3972 match(ConI);
3973
3974 op_cost(0);
3975 format %{ %}
3976 interface(CONST_INTER);
3977 %}
3978
3979 operand immI_16()
3980 %{
3981 predicate(n->get_int() == 16);
3982 match(ConI);
3983
3984 op_cost(0);
3985 format %{ %}
3986 interface(CONST_INTER);
3987 %}
3988
3989 operand immI_24()
3990 %{
3991 predicate(n->get_int() == 24);
3992 match(ConI);
3993
3994 op_cost(0);
3995 format %{ %}
3996 interface(CONST_INTER);
3997 %}
3998
8095 %}
8096
8097 // ============================================================================
8098 // Cast/Convert Instructions
8099
8100 instruct castX2P(iRegPNoSp dst, iRegL src) %{
8101 match(Set dst (CastX2P src));
8102
8103 ins_cost(INSN_COST);
8104 format %{ "mov $dst, $src\t# long -> ptr" %}
8105
8106 ins_encode %{
8107 if ($dst$$reg != $src$$reg) {
8108 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8109 }
8110 %}
8111
8112 ins_pipe(ialu_reg);
8113 %}
8114
8115 instruct castP2X(iRegLNoSp dst, iRegP src) %{
8116 match(Set dst (CastP2X src));
8117
8118 ins_cost(INSN_COST);
8119 format %{ "mov $dst, $src\t# ptr -> long" %}
8120
8121 ins_encode %{
8122 if ($dst$$reg != $src$$reg) {
8123 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8124 }
8125 %}
8126
8127 ins_pipe(ialu_reg);
8128 %}
8129
8130 // Convert oop into int for vectors alignment masking
8131 instruct convP2I(iRegINoSp dst, iRegP src) %{
8132 match(Set dst (ConvL2I (CastP2X src)));
8133
8134 ins_cost(INSN_COST);
14083
14084 match(Set dst (MoveL2D src));
14085
14086 effect(DEF dst, USE src);
14087
14088 ins_cost(INSN_COST);
14089
14090 format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
14091
14092 ins_encode %{
14093 __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
14094 %}
14095
14096 ins_pipe(fp_l2d);
14097
14098 %}
14099
14100 // ============================================================================
14101 // clearing of an array
14102
14103 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
14104 %{
14105 match(Set dummy (ClearArray cnt base));
14106 effect(USE_KILL cnt, USE_KILL base, KILL cr);
14107
14108 ins_cost(4 * INSN_COST);
14109 format %{ "ClearArray $cnt, $base" %}
14110
14111 ins_encode %{
14112 address tpc = __ zero_words($base$$Register, $cnt$$Register);
14113 if (tpc == nullptr) {
14114 ciEnv::current()->record_failure("CodeCache is full");
14115 return;
14116 }
14117 %}
14118
14119 ins_pipe(pipe_class_memory);
14120 %}
14121
14122 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 temp, Universe dummy, rFlagsReg cr)
14123 %{
14124 predicate((uint64_t)n->in(2)->get_long()
14125 < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
14126 match(Set dummy (ClearArray cnt base));
14127 effect(TEMP temp, USE_KILL base, KILL cr);
14128
14129 ins_cost(4 * INSN_COST);
14130 format %{ "ClearArray $cnt, $base" %}
14131
14132 ins_encode %{
14133 address tpc = __ zero_words($base$$Register, (uint64_t)$cnt$$constant);
14134 if (tpc == nullptr) {
14135 ciEnv::current()->record_failure("CodeCache is full");
14136 return;
14137 }
14138 %}
14139
14140 ins_pipe(pipe_class_memory);
14141 %}
14142
14143 // ============================================================================
14144 // Overflow Math Instructions
14145
14146 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
15422 %}
15423
15424 // Call Runtime Instruction without safepoint and with vector arguments
15425 instruct CallLeafDirectVector(method meth)
15426 %{
15427 match(CallLeafVector);
15428
15429 effect(USE meth);
15430
15431 ins_cost(CALL_COST);
15432
15433 format %{ "CALL, runtime leaf vector $meth" %}
15434
15435 ins_encode(aarch64_enc_java_to_runtime(meth));
15436
15437 ins_pipe(pipe_class_call);
15438 %}
15439
15440 // Call Runtime Instruction
15441
15442 instruct CallLeafNoFPDirect(method meth)
15443 %{
15444 match(CallLeafNoFP);
15445
15446 effect(USE meth);
15447
15448 ins_cost(CALL_COST);
15449
15450 format %{ "CALL, runtime leaf nofp $meth" %}
15451
15452 ins_encode( aarch64_enc_java_to_runtime(meth) );
15453
15454 ins_pipe(pipe_class_call);
15455 %}
15456
15457 // Tail Call; Jump from runtime stub to Java code.
15458 // Also known as an 'interprocedural jump'.
15459 // Target of jump will eventually return to caller.
15460 // TailJump below removes the return address.
15461 // Don't use rfp for 'jump_target' because a MachEpilogNode has already been
15462 // emitted just above the TailCall which has reset rfp to the caller state.
15463 instruct TailCalljmpInd(iRegPNoSpNoRfp jump_target, inline_cache_RegP method_ptr)
|
1667
1668 int MachCallDynamicJavaNode::ret_addr_offset()
1669 {
1670 return 16; // movz, movk, movk, bl
1671 }
1672
1673 int MachCallRuntimeNode::ret_addr_offset() {
1674 // for generated stubs the call will be
1675 // bl(addr)
1676 // or with far branches
1677 // bl(trampoline_stub)
1678 // for real runtime callouts it will be six instructions
1679 // see aarch64_enc_java_to_runtime
1680 // adr(rscratch2, retaddr)
1681 // str(rscratch2, Address(rthread, JavaThread::last_Java_pc_offset()));
1682 // lea(rscratch1, RuntimeAddress(addr)
1683 // blr(rscratch1)
1684 CodeBlob *cb = CodeCache::find_blob(_entry_point);
1685 if (cb) {
1686 return 1 * NativeInstruction::instruction_size;
1687 } else if (_entry_point == nullptr) {
1688 // See CallLeafNoFPIndirect
1689 return 1 * NativeInstruction::instruction_size;
1690 } else {
1691 return 6 * NativeInstruction::instruction_size;
1692 }
1693 }
1694
1695 //=============================================================================
1696
1697 #ifndef PRODUCT
1698 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1699 st->print("BREAKPOINT");
1700 }
1701 #endif
1702
1703 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1704 __ brk(0);
1705 }
1706
1707 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
1708 return MachNode::size(ra_);
1709 }
1778 if (C->stub_function() == nullptr) {
1779 st->print("\n\t");
1780 st->print("ldr rscratch1, [guard]\n\t");
1781 st->print("dmb ishld\n\t");
1782 st->print("ldr rscratch2, [rthread, #thread_disarmed_guard_value_offset]\n\t");
1783 st->print("cmp rscratch1, rscratch2\n\t");
1784 st->print("b.eq skip");
1785 st->print("\n\t");
1786 st->print("blr #nmethod_entry_barrier_stub\n\t");
1787 st->print("b skip\n\t");
1788 st->print("guard: int\n\t");
1789 st->print("\n\t");
1790 st->print("skip:\n\t");
1791 }
1792 }
1793 #endif
1794
1795 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1796 Compile* C = ra_->C;
1797
1798
1799 __ verified_entry(C, 0);
1800
1801 if (C->stub_function() == nullptr) {
1802 __ entry_barrier();
1803 }
1804
1805 if (!Compile::current()->output()->in_scratch_emit_size()) {
1806 __ bind(*_verified_entry);
1807 }
1808
1809 if (VerifyStackAtCalls) {
1810 Unimplemented();
1811 }
1812
1813 C->output()->set_frame_complete(__ offset());
1814
1815 if (C->has_mach_constant_base_node()) {
1816 // NOTE: We set the table base offset here because users might be
1817 // emitted before MachConstantBaseNode.
1818 ConstantTable& constant_table = C->output()->constant_table();
1819 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1820 }
1821 }
1822
1823 int MachPrologNode::reloc() const
1824 {
1825 return 0;
1826 }
1827
1828 //=============================================================================
1829
1830 #ifndef PRODUCT
1831 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1832 Compile* C = ra_->C;
1833 int framesize = C->output()->frame_slots() << LogBytesPerInt;
1834
1835 st->print("# pop frame %d\n\t",framesize);
1836
1837 if (framesize == 0) {
1838 st->print("ldp lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1839 } else if (framesize < ((1 << 9) + 2 * wordSize)) {
1840 st->print("ldp lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
1841 st->print("add sp, sp, #%d\n\t", framesize);
1842 } else {
1845 st->print("ldp lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1846 }
1847 if (VM_Version::use_rop_protection()) {
1848 st->print("autiaz\n\t");
1849 st->print("ldr zr, [lr]\n\t");
1850 }
1851
1852 if (do_polling() && C->is_method_compilation()) {
1853 st->print("# test polling word\n\t");
1854 st->print("ldr rscratch1, [rthread],#%d\n\t", in_bytes(JavaThread::polling_word_offset()));
1855 st->print("cmp sp, rscratch1\n\t");
1856 st->print("bhi #slow_path");
1857 }
1858 }
1859 #endif
1860
1861 void MachEpilogNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1862 Compile* C = ra_->C;
1863 int framesize = C->output()->frame_slots() << LogBytesPerInt;
1864
1865 __ remove_frame(framesize, C->needs_stack_repair());
1866
1867 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1868 __ reserved_stack_check();
1869 }
1870
1871 if (do_polling() && C->is_method_compilation()) {
1872 Label dummy_label;
1873 Label* code_stub = &dummy_label;
1874 if (!C->output()->in_scratch_emit_size()) {
1875 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1876 C->output()->add_stub(stub);
1877 code_stub = &stub->entry();
1878 }
1879 __ relocate(relocInfo::poll_return_type);
1880 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
1881 }
1882 }
1883
1884 int MachEpilogNode::reloc() const {
1885 // Return number of relocatable values contained in this instruction.
1886 return 1; // 1 for polling page.
1887 }
1888
1889 const Pipeline * MachEpilogNode::pipeline() const {
1890 return MachNode::pipeline_class();
1891 }
1892
1893 //=============================================================================
1894
1895 static enum RC rc_class(OptoReg::Name reg) {
1896
1897 if (reg == OptoReg::Bad) {
1898 return rc_bad;
1899 }
1900
1901 // we have 32 int registers * 2 halves
1902 int slots_of_int_registers = Register::number_of_registers * Register::max_slots_per_register;
1903
2162 void BoxLockNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
2163 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2164 int reg = ra_->get_encode(this);
2165
2166 // This add will handle any 24-bit signed offset. 24 bits allows an
2167 // 8 megabyte stack frame.
2168 __ add(as_Register(reg), sp, offset);
2169 }
2170
2171 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
2172 // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
2173 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2174
2175 if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
2176 return NativeInstruction::instruction_size;
2177 } else {
2178 return 2 * NativeInstruction::instruction_size;
2179 }
2180 }
2181
2182 ///=============================================================================
2183 #ifndef PRODUCT
2184 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2185 {
2186 st->print_cr("# MachVEPNode");
2187 if (!_verified) {
2188 st->print_cr("\t load_class");
2189 } else {
2190 st->print_cr("\t unpack_inline_arg");
2191 }
2192 }
2193 #endif
2194
2195 void MachVEPNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const
2196 {
2197 if (!_verified) {
2198 __ ic_check(1);
2199 } else {
2200 if (ra_->C->stub_function() == nullptr) {
2201 // Emit the entry barrier in a temporary frame before unpacking because
2202 // it can deopt, which would require packing the scalarized args again.
2203 __ verified_entry(ra_->C, 0);
2204 __ entry_barrier();
2205 int framesize = ra_->C->output()->frame_slots() << LogBytesPerInt;
2206 __ remove_frame(framesize, false);
2207 }
2208 // Unpack inline type args passed as oop and then jump to
2209 // the verified entry point (skipping the unverified entry).
2210 int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
2211 // Emit code for verified entry and save increment for stack repair on return
2212 __ verified_entry(ra_->C, sp_inc);
2213 if (Compile::current()->output()->in_scratch_emit_size()) {
2214 Label dummy_verified_entry;
2215 __ b(dummy_verified_entry);
2216 } else {
2217 __ b(*_verified_entry);
2218 }
2219 }
2220 }
2221
2222 //=============================================================================
2223 #ifndef PRODUCT
2224 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2225 {
2226 st->print_cr("# MachUEPNode");
2227 st->print_cr("\tldrw rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2228 st->print_cr("\tldrw r10, [rscratch2 + CompiledICData::speculated_klass_offset()]\t# compressed klass");
2229 st->print_cr("\tcmpw rscratch1, r10");
2230 st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
2231 }
2232 #endif
2233
2234 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2235 {
2236 __ ic_check(InteriorEntryAlignment);
2237 }
2238
2239 // REQUIRED EMIT CODE
2240
2241 //=============================================================================
2242
2243 // Emit deopt handler code.
2244 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm)
2245 {
2246 // Note that the code buffer's insts_mark is always relative to insts.
2247 // That's why we must use the macroassembler to generate a handler.
2248 address base = __ start_a_stub(size_deopt_handler());
2249 if (base == nullptr) {
2250 ciEnv::current()->record_failure("CodeCache is full");
2251 return 0; // CodeBuffer::expand failed
2252 }
2253
2254 int offset = __ offset();
2255 Label start;
2256 __ bind(start);
2257 __ far_call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
2258
3662 %}
3663
3664 enc_class aarch64_enc_java_dynamic_call(method meth) %{
3665 int method_index = resolved_method_index(masm);
3666 address call = __ ic_call((address)$meth$$method, method_index);
3667 if (call == nullptr) {
3668 ciEnv::current()->record_failure("CodeCache is full");
3669 return;
3670 }
3671 __ post_call_nop();
3672 if (Compile::current()->max_vector_size() > 0) {
3673 __ reinitialize_ptrue();
3674 }
3675 %}
3676
3677 enc_class aarch64_enc_call_epilog() %{
3678 if (VerifyStackAtCalls) {
3679 // Check that stack depth is unchanged: find majik cookie on stack
3680 __ call_Unimplemented();
3681 }
3682 if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) {
3683 // The last return value is not set by the callee but used to pass the null marker to compiled code.
3684 // Search for the corresponding projection, get the register and emit code that initializes it.
3685 uint con = (tf()->range_cc()->cnt() - 1);
3686 for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
3687 ProjNode* proj = fast_out(i)->as_Proj();
3688 if (proj->_con == con) {
3689 // Set null marker if r0 is non-null (a non-null value is returned buffered or scalarized)
3690 OptoReg::Name optoReg = ra_->get_reg_first(proj);
3691 VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
3692 Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
3693 __ cmp(r0, zr);
3694 __ cset(toReg, Assembler::NE);
3695 if (reg->is_stack()) {
3696 int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
3697 __ str(toReg, Address(sp, st_off));
3698 }
3699 break;
3700 }
3701 }
3702 if (return_value_is_used()) {
3703 // An inline type is returned as fields in multiple registers.
3704 // R0 either contains an oop if the inline type is buffered or a pointer
3705 // to the corresponding InlineKlass with the lowest bit set to 1. Zero r0
3706 // if the lowest bit is set to allow C2 to use the oop after null checking.
3707 // r0 &= (r0 & 1) - 1
3708 __ andr(rscratch1, r0, 0x1);
3709 __ sub(rscratch1, rscratch1, 0x1);
3710 __ andr(r0, r0, rscratch1);
3711 }
3712 }
3713 %}
3714
3715 enc_class aarch64_enc_java_to_runtime(method meth) %{
3716 // some calls to generated routines (arraycopy code) are scheduled
3717 // by C2 as runtime calls. if so we can call them using a br (they
3718 // will be in a reachable segment) otherwise we have to use a blr
3719 // which loads the absolute address into a register.
3720 address entry = (address)$meth$$method;
3721 CodeBlob *cb = CodeCache::find_blob(entry);
3722 if (cb) {
3723 address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
3724 if (call == nullptr) {
3725 ciEnv::current()->record_failure("CodeCache is full");
3726 return;
3727 }
3728 __ post_call_nop();
3729 } else {
3730 Label retaddr;
3731 // Make the anchor frame walkable
3732 __ adr(rscratch2, retaddr);
3982 operand immI_gt_1()
3983 %{
3984 predicate(n->get_int() > 1);
3985 match(ConI);
3986
3987 op_cost(0);
3988 format %{ %}
3989 interface(CONST_INTER);
3990 %}
3991
3992 operand immI_le_4()
3993 %{
3994 predicate(n->get_int() <= 4);
3995 match(ConI);
3996
3997 op_cost(0);
3998 format %{ %}
3999 interface(CONST_INTER);
4000 %}
4001
4002 operand immI_4()
4003 %{
4004 predicate(n->get_int() == 4);
4005 match(ConI);
4006
4007 op_cost(0);
4008 format %{ %}
4009 interface(CONST_INTER);
4010 %}
4011
4012 operand immI_16()
4013 %{
4014 predicate(n->get_int() == 16);
4015 match(ConI);
4016
4017 op_cost(0);
4018 format %{ %}
4019 interface(CONST_INTER);
4020 %}
4021
4022 operand immI_24()
4023 %{
4024 predicate(n->get_int() == 24);
4025 match(ConI);
4026
4027 op_cost(0);
4028 format %{ %}
4029 interface(CONST_INTER);
4030 %}
4031
8128 %}
8129
8130 // ============================================================================
8131 // Cast/Convert Instructions
8132
8133 instruct castX2P(iRegPNoSp dst, iRegL src) %{
8134 match(Set dst (CastX2P src));
8135
8136 ins_cost(INSN_COST);
8137 format %{ "mov $dst, $src\t# long -> ptr" %}
8138
8139 ins_encode %{
8140 if ($dst$$reg != $src$$reg) {
8141 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8142 }
8143 %}
8144
8145 ins_pipe(ialu_reg);
8146 %}
8147
8148 instruct castI2N(iRegNNoSp dst, iRegI src) %{
8149 match(Set dst (CastI2N src));
8150
8151 ins_cost(INSN_COST);
8152 format %{ "mov $dst, $src\t# int -> narrow ptr" %}
8153
8154 ins_encode %{
8155 if ($dst$$reg != $src$$reg) {
8156 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8157 }
8158 %}
8159
8160 ins_pipe(ialu_reg);
8161 %}
8162
8163 instruct castN2X(iRegLNoSp dst, iRegN src) %{
8164 match(Set dst (CastP2X src));
8165
8166 ins_cost(INSN_COST);
8167 format %{ "mov $dst, $src\t# ptr -> long" %}
8168
8169 ins_encode %{
8170 if ($dst$$reg != $src$$reg) {
8171 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8172 }
8173 %}
8174
8175 ins_pipe(ialu_reg);
8176 %}
8177
8178 instruct castP2X(iRegLNoSp dst, iRegP src) %{
8179 match(Set dst (CastP2X src));
8180
8181 ins_cost(INSN_COST);
8182 format %{ "mov $dst, $src\t# ptr -> long" %}
8183
8184 ins_encode %{
8185 if ($dst$$reg != $src$$reg) {
8186 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8187 }
8188 %}
8189
8190 ins_pipe(ialu_reg);
8191 %}
8192
8193 // Convert oop into int for vectors alignment masking
8194 instruct convP2I(iRegINoSp dst, iRegP src) %{
8195 match(Set dst (ConvL2I (CastP2X src)));
8196
8197 ins_cost(INSN_COST);
14146
14147 match(Set dst (MoveL2D src));
14148
14149 effect(DEF dst, USE src);
14150
14151 ins_cost(INSN_COST);
14152
14153 format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
14154
14155 ins_encode %{
14156 __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
14157 %}
14158
14159 ins_pipe(fp_l2d);
14160
14161 %}
14162
14163 // ============================================================================
14164 // clearing of an array
14165
14166 instruct clearArray_reg_reg_immL0(iRegL_R11 cnt, iRegP_R10 base, immL0 zero, Universe dummy, rFlagsReg cr)
14167 %{
14168 match(Set dummy (ClearArray (Binary cnt base) zero));
14169 effect(USE_KILL cnt, USE_KILL base, KILL cr);
14170
14171 ins_cost(4 * INSN_COST);
14172 format %{ "ClearArray $cnt, $base" %}
14173
14174 ins_encode %{
14175 address tpc = __ zero_words($base$$Register, $cnt$$Register);
14176 if (tpc == nullptr) {
14177 ciEnv::current()->record_failure("CodeCache is full");
14178 return;
14179 }
14180 %}
14181
14182 ins_pipe(pipe_class_memory);
14183 %}
14184
14185 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, iRegL val, Universe dummy, rFlagsReg cr)
14186 %{
14187 predicate(((ClearArrayNode*)n)->word_copy_only());
14188 match(Set dummy (ClearArray (Binary cnt base) val));
14189 effect(USE_KILL cnt, USE_KILL base, KILL cr);
14190
14191 ins_cost(4 * INSN_COST);
14192 format %{ "ClearArray $cnt, $base, $val" %}
14193
14194 ins_encode %{
14195 __ fill_words($base$$Register, $cnt$$Register, $val$$Register);
14196 %}
14197
14198 ins_pipe(pipe_class_memory);
14199 %}
14200
14201 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 temp, immL0 zero, Universe dummy, rFlagsReg cr)
14202 %{
14203 predicate((uint64_t)n->in(2)->in(1)->get_long()
14204 < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord)
14205 && !((ClearArrayNode*)n)->word_copy_only());
14206 match(Set dummy (ClearArray (Binary cnt base) zero));
14207 effect(TEMP temp, USE_KILL base, KILL cr);
14208
14209 ins_cost(4 * INSN_COST);
14210 format %{ "ClearArray $cnt, $base" %}
14211
14212 ins_encode %{
14213 address tpc = __ zero_words($base$$Register, (uint64_t)$cnt$$constant);
14214 if (tpc == nullptr) {
14215 ciEnv::current()->record_failure("CodeCache is full");
14216 return;
14217 }
14218 %}
14219
14220 ins_pipe(pipe_class_memory);
14221 %}
14222
14223 // ============================================================================
14224 // Overflow Math Instructions
14225
14226 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
15502 %}
15503
15504 // Call Runtime Instruction without safepoint and with vector arguments
15505 instruct CallLeafDirectVector(method meth)
15506 %{
15507 match(CallLeafVector);
15508
15509 effect(USE meth);
15510
15511 ins_cost(CALL_COST);
15512
15513 format %{ "CALL, runtime leaf vector $meth" %}
15514
15515 ins_encode(aarch64_enc_java_to_runtime(meth));
15516
15517 ins_pipe(pipe_class_call);
15518 %}
15519
15520 // Call Runtime Instruction
15521
15522 // entry point is null, target holds the address to call
15523 instruct CallLeafNoFPIndirect(iRegP target)
15524 %{
15525 predicate(n->as_Call()->entry_point() == nullptr);
15526
15527 match(CallLeafNoFP target);
15528
15529 ins_cost(CALL_COST);
15530
15531 format %{ "CALL, runtime leaf nofp indirect $target" %}
15532
15533 ins_encode %{
15534 __ blr($target$$Register);
15535 %}
15536
15537 ins_pipe(pipe_class_call);
15538 %}
15539
15540 instruct CallLeafNoFPDirect(method meth)
15541 %{
15542 predicate(n->as_Call()->entry_point() != nullptr);
15543
15544 match(CallLeafNoFP);
15545
15546 effect(USE meth);
15547
15548 ins_cost(CALL_COST);
15549
15550 format %{ "CALL, runtime leaf nofp $meth" %}
15551
15552 ins_encode( aarch64_enc_java_to_runtime(meth) );
15553
15554 ins_pipe(pipe_class_call);
15555 %}
15556
15557 // Tail Call; Jump from runtime stub to Java code.
15558 // Also known as an 'interprocedural jump'.
15559 // Target of jump will eventually return to caller.
15560 // TailJump below removes the return address.
15561 // Don't use rfp for 'jump_target' because a MachEpilogNode has already been
15562 // emitted just above the TailCall which has reset rfp to the caller state.
15563 instruct TailCalljmpInd(iRegPNoSpNoRfp jump_target, inline_cache_RegP method_ptr)
|