1627
1628 int MachCallDynamicJavaNode::ret_addr_offset()
1629 {
1630 return 16; // movz, movk, movk, bl
1631 }
1632
1633 int MachCallRuntimeNode::ret_addr_offset() {
1634 // for generated stubs the call will be
1635 // bl(addr)
1636 // or with far branches
1637 // bl(trampoline_stub)
1638 // for real runtime callouts it will be six instructions
1639 // see aarch64_enc_java_to_runtime
1640 // adr(rscratch2, retaddr)
1641 // lea(rscratch1, RuntimeAddress(addr)
1642 // stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
1643 // blr(rscratch1)
1644 CodeBlob *cb = CodeCache::find_blob(_entry_point);
1645 if (cb) {
1646 return 1 * NativeInstruction::instruction_size;
1647 } else {
1648 return 6 * NativeInstruction::instruction_size;
1649 }
1650 }
1651
1652 //=============================================================================
1653
1654 #ifndef PRODUCT
1655 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1656 st->print("BREAKPOINT");
1657 }
1658 #endif
1659
1660 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1661 C2_MacroAssembler _masm(&cbuf);
1662 __ brk(0);
1663 }
1664
1665 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
1666 return MachNode::size(ra_);
1738 st->print("\n\t");
1739 st->print("ldr rscratch1, [guard]\n\t");
1740 st->print("dmb ishld\n\t");
1741 st->print("ldr rscratch2, [rthread, #thread_disarmed_guard_value_offset]\n\t");
1742 st->print("cmp rscratch1, rscratch2\n\t");
1743 st->print("b.eq skip");
1744 st->print("\n\t");
1745 st->print("blr #nmethod_entry_barrier_stub\n\t");
1746 st->print("b skip\n\t");
1747 st->print("guard: int\n\t");
1748 st->print("\n\t");
1749 st->print("skip:\n\t");
1750 }
1751 }
1752 #endif
1753
1754 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1755 Compile* C = ra_->C;
1756 C2_MacroAssembler _masm(&cbuf);
1757
1758 // n.b. frame size includes space for return pc and rfp
1759 const int framesize = C->output()->frame_size_in_bytes();
1760
1761 // insert a nop at the start of the prolog so we can patch in a
1762 // branch if we need to invalidate the method later
1763 __ nop();
1764
1765 if (C->clinit_barrier_on_entry()) {
1766 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
1767
1768 Label L_skip_barrier;
1769
1770 __ mov_metadata(rscratch2, C->method()->holder()->constant_encoding());
1771 __ clinit_barrier(rscratch2, rscratch1, &L_skip_barrier);
1772 __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
1773 __ bind(L_skip_barrier);
1774 }
1775
1776 if (C->max_vector_size() > 0) {
1777 __ reinitialize_ptrue();
1778 }
1779
1780 int bangsize = C->output()->bang_size_in_bytes();
1781 if (C->output()->need_stack_bang(bangsize))
1782 __ generate_stack_overflow_check(bangsize);
1783
1784 __ build_frame(framesize);
1785
1786 if (C->stub_function() == nullptr) {
1787 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
1788 if (BarrierSet::barrier_set()->barrier_set_nmethod() != nullptr) {
1789 // Dummy labels for just measuring the code size
1790 Label dummy_slow_path;
1791 Label dummy_continuation;
1792 Label dummy_guard;
1793 Label* slow_path = &dummy_slow_path;
1794 Label* continuation = &dummy_continuation;
1795 Label* guard = &dummy_guard;
1796 if (!Compile::current()->output()->in_scratch_emit_size()) {
1797 // Use real labels from actual stub when not emitting code for the purpose of measuring its size
1798 C2EntryBarrierStub* stub = new (Compile::current()->comp_arena()) C2EntryBarrierStub();
1799 Compile::current()->output()->add_stub(stub);
1800 slow_path = &stub->entry();
1801 continuation = &stub->continuation();
1802 guard = &stub->guard();
1803 }
1804 // In the C2 code, we move the non-hot part of nmethod entry barriers out-of-line to a stub.
1805 bs->nmethod_entry_barrier(&_masm, slow_path, continuation, guard);
1806 }
1807 }
1808
1809 if (VerifyStackAtCalls) {
1810 Unimplemented();
1811 }
1812
1813 C->output()->set_frame_complete(cbuf.insts_size());
1814
1815 if (C->has_mach_constant_base_node()) {
1816 // NOTE: We set the table base offset here because users might be
1817 // emitted before MachConstantBaseNode.
1818 ConstantTable& constant_table = C->output()->constant_table();
1819 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1820 }
1821 }
1822
1823 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1824 {
1825 return MachNode::size(ra_); // too many variables; just compute it
1826 // the hard way
1827 }
1828
1829 int MachPrologNode::reloc() const
1830 {
1831 return 0;
1832 }
1833
1834 //=============================================================================
1835
1836 #ifndef PRODUCT
1837 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1838 Compile* C = ra_->C;
1839 int framesize = C->output()->frame_slots() << LogBytesPerInt;
1840
1841 st->print("# pop frame %d\n\t",framesize);
1842
1843 if (framesize == 0) {
1844 st->print("ldp lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1845 } else if (framesize < ((1 << 9) + 2 * wordSize)) {
1846 st->print("ldp lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
1847 st->print("add sp, sp, #%d\n\t", framesize);
1848 } else {
1852 }
1853 if (VM_Version::use_rop_protection()) {
1854 st->print("autiaz\n\t");
1855 st->print("ldr zr, [lr]\n\t");
1856 }
1857
1858 if (do_polling() && C->is_method_compilation()) {
1859 st->print("# test polling word\n\t");
1860 st->print("ldr rscratch1, [rthread],#%d\n\t", in_bytes(JavaThread::polling_word_offset()));
1861 st->print("cmp sp, rscratch1\n\t");
1862 st->print("bhi #slow_path");
1863 }
1864 }
1865 #endif
1866
1867 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1868 Compile* C = ra_->C;
1869 C2_MacroAssembler _masm(&cbuf);
1870 int framesize = C->output()->frame_slots() << LogBytesPerInt;
1871
1872 __ remove_frame(framesize);
1873
1874 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1875 __ reserved_stack_check();
1876 }
1877
1878 if (do_polling() && C->is_method_compilation()) {
1879 Label dummy_label;
1880 Label* code_stub = &dummy_label;
1881 if (!C->output()->in_scratch_emit_size()) {
1882 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1883 C->output()->add_stub(stub);
1884 code_stub = &stub->entry();
1885 }
1886 __ relocate(relocInfo::poll_return_type);
1887 __ safepoint_poll(*code_stub, true /* at_return */, false /* acquire */, true /* in_nmethod */);
1888 }
1889 }
1890
1891 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
1892 // Variable size. Determine dynamically.
1893 return MachNode::size(ra_);
1894 }
1895
1896 int MachEpilogNode::reloc() const {
1897 // Return number of relocatable values contained in this instruction.
1898 return 1; // 1 for polling page.
1899 }
1900
1901 const Pipeline * MachEpilogNode::pipeline() const {
1902 return MachNode::pipeline_class();
1903 }
1904
1905 //=============================================================================
1906
1907 // Figure out which register class each belongs in: rc_int, rc_float or
1908 // rc_stack.
1909 enum RC { rc_bad, rc_int, rc_float, rc_predicate, rc_stack };
1910
1911 static enum RC rc_class(OptoReg::Name reg) {
1912
1913 if (reg == OptoReg::Bad) {
1914 return rc_bad;
1915 }
2181
2182 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2183 int reg = ra_->get_encode(this);
2184
2185 // This add will handle any 24-bit signed offset. 24 bits allows an
2186 // 8 megabyte stack frame.
2187 __ add(as_Register(reg), sp, offset);
2188 }
2189
2190 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
2191 // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
2192 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2193
2194 if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
2195 return NativeInstruction::instruction_size;
2196 } else {
2197 return 2 * NativeInstruction::instruction_size;
2198 }
2199 }
2200
2201 //=============================================================================
2202
2203 #ifndef PRODUCT
2204 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2205 {
2206 st->print_cr("# MachUEPNode");
2207 if (UseCompressedClassPointers) {
2208 st->print_cr("\tldrw rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2209 st->print_cr("\tldrw r10, [rscratch2 + CompiledICData::speculated_klass_offset()]\t# compressed klass");
2210 st->print_cr("\tcmpw rscratch1, r10");
2211 } else {
2212 st->print_cr("\tldr rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2213 st->print_cr("\tldr r10, [rscratch2 + CompiledICData::speculated_klass_offset()]\t# compressed klass");
2214 st->print_cr("\tcmp rscratch1, r10");
2215 }
2216 st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
2217 }
2218 #endif
2219
2220 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
2221 {
2222 // This is the unverified entry point.
2223 C2_MacroAssembler _masm(&cbuf);
2224 __ ic_check(InteriorEntryAlignment);
2225 }
2226
2227 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
2228 {
2229 return MachNode::size(ra_);
2230 }
2231
2232 // REQUIRED EMIT CODE
2233
2234 //=============================================================================
2235
2236 // Emit exception handler code.
2237 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
2238 {
2239 // mov rscratch1 #exception_blob_entry_point
2240 // br rscratch1
2241 // Note that the code buffer's insts_mark is always relative to insts.
2242 // That's why we must use the macroassembler to generate a handler.
2243 C2_MacroAssembler _masm(&cbuf);
2244 address base = __ start_a_stub(size_exception_handler());
2245 if (base == nullptr) {
2246 ciEnv::current()->record_failure("CodeCache is full");
2247 return 0; // CodeBuffer::expand failed
2248 }
2249 int offset = __ offset();
2250 __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
2251 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
3727 enc_class aarch64_enc_java_dynamic_call(method meth) %{
3728 C2_MacroAssembler _masm(&cbuf);
3729 int method_index = resolved_method_index(cbuf);
3730 address call = __ ic_call((address)$meth$$method, method_index);
3731 if (call == nullptr) {
3732 ciEnv::current()->record_failure("CodeCache is full");
3733 return;
3734 }
3735 __ post_call_nop();
3736 if (Compile::current()->max_vector_size() > 0) {
3737 __ reinitialize_ptrue();
3738 }
3739 %}
3740
3741 enc_class aarch64_enc_call_epilog() %{
3742 C2_MacroAssembler _masm(&cbuf);
3743 if (VerifyStackAtCalls) {
3744 // Check that stack depth is unchanged: find majik cookie on stack
3745 __ call_Unimplemented();
3746 }
3747 %}
3748
3749 enc_class aarch64_enc_java_to_runtime(method meth) %{
3750 C2_MacroAssembler _masm(&cbuf);
3751
3752 // some calls to generated routines (arraycopy code) are scheduled
3753 // by C2 as runtime calls. if so we can call them using a br (they
3754 // will be in a reachable segment) otherwise we have to use a blr
3755 // which loads the absolute address into a register.
3756 address entry = (address)$meth$$method;
3757 CodeBlob *cb = CodeCache::find_blob(entry);
3758 if (cb) {
3759 address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
3760 if (call == nullptr) {
3761 ciEnv::current()->record_failure("CodeCache is full");
3762 return;
3763 }
3764 __ post_call_nop();
3765 } else {
3766 Label retaddr;
7180 instruct loadConL(iRegLNoSp dst, immL src)
7181 %{
7182 match(Set dst src);
7183
7184 ins_cost(INSN_COST);
7185 format %{ "mov $dst, $src\t# long" %}
7186
7187 ins_encode( aarch64_enc_mov_imm(dst, src) );
7188
7189 ins_pipe(ialu_imm);
7190 %}
7191
7192 // Load Pointer Constant
7193
7194 instruct loadConP(iRegPNoSp dst, immP con)
7195 %{
7196 match(Set dst con);
7197
7198 ins_cost(INSN_COST * 4);
7199 format %{
7200 "mov $dst, $con\t# ptr\n\t"
7201 %}
7202
7203 ins_encode(aarch64_enc_mov_p(dst, con));
7204
7205 ins_pipe(ialu_imm);
7206 %}
7207
7208 // Load Null Pointer Constant
7209
7210 instruct loadConP0(iRegPNoSp dst, immP0 con)
7211 %{
7212 match(Set dst con);
7213
7214 ins_cost(INSN_COST);
7215 format %{ "mov $dst, $con\t# null pointer" %}
7216
7217 ins_encode(aarch64_enc_mov_p0(dst, con));
7218
7219 ins_pipe(ialu_imm);
7220 %}
8401 %}
8402
8403 // ============================================================================
8404 // Cast/Convert Instructions
8405
8406 instruct castX2P(iRegPNoSp dst, iRegL src) %{
8407 match(Set dst (CastX2P src));
8408
8409 ins_cost(INSN_COST);
8410 format %{ "mov $dst, $src\t# long -> ptr" %}
8411
8412 ins_encode %{
8413 if ($dst$$reg != $src$$reg) {
8414 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8415 }
8416 %}
8417
8418 ins_pipe(ialu_reg);
8419 %}
8420
8421 instruct castP2X(iRegLNoSp dst, iRegP src) %{
8422 match(Set dst (CastP2X src));
8423
8424 ins_cost(INSN_COST);
8425 format %{ "mov $dst, $src\t# ptr -> long" %}
8426
8427 ins_encode %{
8428 if ($dst$$reg != $src$$reg) {
8429 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8430 }
8431 %}
8432
8433 ins_pipe(ialu_reg);
8434 %}
8435
8436 // Convert oop into int for vectors alignment masking
8437 instruct convP2I(iRegINoSp dst, iRegP src) %{
8438 match(Set dst (ConvL2I (CastP2X src)));
8439
8440 ins_cost(INSN_COST);
15222
15223 match(Set dst (MoveL2D src));
15224
15225 effect(DEF dst, USE src);
15226
15227 ins_cost(INSN_COST);
15228
15229 format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
15230
15231 ins_encode %{
15232 __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
15233 %}
15234
15235 ins_pipe(fp_l2d);
15236
15237 %}
15238
15239 // ============================================================================
15240 // clearing of an array
15241
15242 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
15243 %{
15244 match(Set dummy (ClearArray cnt base));
15245 effect(USE_KILL cnt, USE_KILL base, KILL cr);
15246
15247 ins_cost(4 * INSN_COST);
15248 format %{ "ClearArray $cnt, $base" %}
15249
15250 ins_encode %{
15251 address tpc = __ zero_words($base$$Register, $cnt$$Register);
15252 if (tpc == nullptr) {
15253 ciEnv::current()->record_failure("CodeCache is full");
15254 return;
15255 }
15256 %}
15257
15258 ins_pipe(pipe_class_memory);
15259 %}
15260
15261 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 temp, Universe dummy, rFlagsReg cr)
15262 %{
15263 predicate((uint64_t)n->in(2)->get_long()
15264 < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
15265 match(Set dummy (ClearArray cnt base));
15266 effect(TEMP temp, USE_KILL base, KILL cr);
15267
15268 ins_cost(4 * INSN_COST);
15269 format %{ "ClearArray $cnt, $base" %}
15270
15271 ins_encode %{
15272 address tpc = __ zero_words($base$$Register, (uint64_t)$cnt$$constant);
15273 if (tpc == nullptr) {
15274 ciEnv::current()->record_failure("CodeCache is full");
15275 return;
15276 }
15277 %}
15278
15279 ins_pipe(pipe_class_memory);
15280 %}
15281
15282 // ============================================================================
15283 // Overflow Math Instructions
15284
16544
16545 // Call Runtime Instruction
16546
16547 instruct CallLeafDirect(method meth)
16548 %{
16549 match(CallLeaf);
16550
16551 effect(USE meth);
16552
16553 ins_cost(CALL_COST);
16554
16555 format %{ "CALL, runtime leaf $meth" %}
16556
16557 ins_encode( aarch64_enc_java_to_runtime(meth) );
16558
16559 ins_pipe(pipe_class_call);
16560 %}
16561
16562 // Call Runtime Instruction
16563
16564 instruct CallLeafNoFPDirect(method meth)
16565 %{
16566 match(CallLeafNoFP);
16567
16568 effect(USE meth);
16569
16570 ins_cost(CALL_COST);
16571
16572 format %{ "CALL, runtime leaf nofp $meth" %}
16573
16574 ins_encode( aarch64_enc_java_to_runtime(meth) );
16575
16576 ins_pipe(pipe_class_call);
16577 %}
16578
16579 // Tail Call; Jump from runtime stub to Java code.
16580 // Also known as an 'interprocedural jump'.
16581 // Target of jump will eventually return to caller.
16582 // TailJump below removes the return address.
16583 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_ptr)
16584 %{
16585 match(TailCall jump_target method_ptr);
|
1627
1628 int MachCallDynamicJavaNode::ret_addr_offset()
1629 {
1630 return 16; // movz, movk, movk, bl
1631 }
1632
1633 int MachCallRuntimeNode::ret_addr_offset() {
1634 // for generated stubs the call will be
1635 // bl(addr)
1636 // or with far branches
1637 // bl(trampoline_stub)
1638 // for real runtime callouts it will be six instructions
1639 // see aarch64_enc_java_to_runtime
1640 // adr(rscratch2, retaddr)
1641 // lea(rscratch1, RuntimeAddress(addr)
1642 // stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
1643 // blr(rscratch1)
1644 CodeBlob *cb = CodeCache::find_blob(_entry_point);
1645 if (cb) {
1646 return 1 * NativeInstruction::instruction_size;
1647 } else if (_entry_point == nullptr) {
1648 // See CallLeafNoFPIndirect
1649 return 1 * NativeInstruction::instruction_size;
1650 } else {
1651 return 6 * NativeInstruction::instruction_size;
1652 }
1653 }
1654
1655 //=============================================================================
1656
1657 #ifndef PRODUCT
1658 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1659 st->print("BREAKPOINT");
1660 }
1661 #endif
1662
1663 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1664 C2_MacroAssembler _masm(&cbuf);
1665 __ brk(0);
1666 }
1667
1668 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
1669 return MachNode::size(ra_);
1741 st->print("\n\t");
1742 st->print("ldr rscratch1, [guard]\n\t");
1743 st->print("dmb ishld\n\t");
1744 st->print("ldr rscratch2, [rthread, #thread_disarmed_guard_value_offset]\n\t");
1745 st->print("cmp rscratch1, rscratch2\n\t");
1746 st->print("b.eq skip");
1747 st->print("\n\t");
1748 st->print("blr #nmethod_entry_barrier_stub\n\t");
1749 st->print("b skip\n\t");
1750 st->print("guard: int\n\t");
1751 st->print("\n\t");
1752 st->print("skip:\n\t");
1753 }
1754 }
1755 #endif
1756
1757 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1758 Compile* C = ra_->C;
1759 C2_MacroAssembler _masm(&cbuf);
1760
1761 // insert a nop at the start of the prolog so we can patch in a
1762 // branch if we need to invalidate the method later
1763 __ nop();
1764
1765 __ verified_entry(C, 0);
1766
1767 if (C->stub_function() == nullptr) {
1768 __ entry_barrier();
1769 }
1770
1771 if (!Compile::current()->output()->in_scratch_emit_size()) {
1772 __ bind(*_verified_entry);
1773 }
1774
1775 if (VerifyStackAtCalls) {
1776 Unimplemented();
1777 }
1778
1779 C->output()->set_frame_complete(cbuf.insts_size());
1780
1781 if (C->has_mach_constant_base_node()) {
1782 // NOTE: We set the table base offset here because users might be
1783 // emitted before MachConstantBaseNode.
1784 ConstantTable& constant_table = C->output()->constant_table();
1785 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1786 }
1787 }
1788
1789 int MachPrologNode::reloc() const
1790 {
1791 return 0;
1792 }
1793
1794 //=============================================================================
1795
1796 #ifndef PRODUCT
1797 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1798 Compile* C = ra_->C;
1799 int framesize = C->output()->frame_slots() << LogBytesPerInt;
1800
1801 st->print("# pop frame %d\n\t",framesize);
1802
1803 if (framesize == 0) {
1804 st->print("ldp lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1805 } else if (framesize < ((1 << 9) + 2 * wordSize)) {
1806 st->print("ldp lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
1807 st->print("add sp, sp, #%d\n\t", framesize);
1808 } else {
1812 }
1813 if (VM_Version::use_rop_protection()) {
1814 st->print("autiaz\n\t");
1815 st->print("ldr zr, [lr]\n\t");
1816 }
1817
1818 if (do_polling() && C->is_method_compilation()) {
1819 st->print("# test polling word\n\t");
1820 st->print("ldr rscratch1, [rthread],#%d\n\t", in_bytes(JavaThread::polling_word_offset()));
1821 st->print("cmp sp, rscratch1\n\t");
1822 st->print("bhi #slow_path");
1823 }
1824 }
1825 #endif
1826
1827 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1828 Compile* C = ra_->C;
1829 C2_MacroAssembler _masm(&cbuf);
1830 int framesize = C->output()->frame_slots() << LogBytesPerInt;
1831
1832 __ remove_frame(framesize, C->needs_stack_repair());
1833
1834 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1835 __ reserved_stack_check();
1836 }
1837
1838 if (do_polling() && C->is_method_compilation()) {
1839 Label dummy_label;
1840 Label* code_stub = &dummy_label;
1841 if (!C->output()->in_scratch_emit_size()) {
1842 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1843 C->output()->add_stub(stub);
1844 code_stub = &stub->entry();
1845 }
1846 __ relocate(relocInfo::poll_return_type);
1847 __ safepoint_poll(*code_stub, true /* at_return */, false /* acquire */, true /* in_nmethod */);
1848 }
1849 }
1850
1851 int MachEpilogNode::reloc() const {
1852 // Return number of relocatable values contained in this instruction.
1853 return 1; // 1 for polling page.
1854 }
1855
1856 const Pipeline * MachEpilogNode::pipeline() const {
1857 return MachNode::pipeline_class();
1858 }
1859
1860 //=============================================================================
1861
1862 // Figure out which register class each belongs in: rc_int, rc_float or
1863 // rc_stack.
1864 enum RC { rc_bad, rc_int, rc_float, rc_predicate, rc_stack };
1865
1866 static enum RC rc_class(OptoReg::Name reg) {
1867
1868 if (reg == OptoReg::Bad) {
1869 return rc_bad;
1870 }
2136
2137 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2138 int reg = ra_->get_encode(this);
2139
2140 // This add will handle any 24-bit signed offset. 24 bits allows an
2141 // 8 megabyte stack frame.
2142 __ add(as_Register(reg), sp, offset);
2143 }
2144
2145 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
2146 // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
2147 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2148
2149 if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
2150 return NativeInstruction::instruction_size;
2151 } else {
2152 return 2 * NativeInstruction::instruction_size;
2153 }
2154 }
2155
2156 ///=============================================================================
2157 #ifndef PRODUCT
2158 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2159 {
2160 st->print_cr("# MachVEPNode");
2161 if (!_verified) {
2162 st->print_cr("\t load_class");
2163 } else {
2164 st->print_cr("\t unpack_inline_arg");
2165 }
2166 }
2167 #endif
2168
2169 void MachVEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
2170 {
2171 C2_MacroAssembler _masm(&cbuf);
2172
2173 if (!_verified) {
2174 Label skip;
2175 __ cmp_klass(j_rarg0, rscratch2, rscratch1);
2176 __ br(Assembler::EQ, skip);
2177 __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
2178 __ bind(skip);
2179
2180 } else {
2181 // insert a nop at the start of the prolog so we can patch in a
2182 // branch if we need to invalidate the method later
2183 __ nop();
2184
2185 // TODO 8284443 Avoid creation of temporary frame
2186 if (ra_->C->stub_function() == nullptr) {
2187 __ verified_entry(ra_->C, 0);
2188 __ entry_barrier();
2189 int framesize = ra_->C->output()->frame_slots() << LogBytesPerInt;
2190 __ remove_frame(framesize, false);
2191 }
2192 // Unpack inline type args passed as oop and then jump to
2193 // the verified entry point (skipping the unverified entry).
2194 int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
2195 // Emit code for verified entry and save increment for stack repair on return
2196 __ verified_entry(ra_->C, sp_inc);
2197 if (Compile::current()->output()->in_scratch_emit_size()) {
2198 Label dummy_verified_entry;
2199 __ b(dummy_verified_entry);
2200 } else {
2201 __ b(*_verified_entry);
2202 }
2203 }
2204 }
2205
2206 //=============================================================================
2207 #ifndef PRODUCT
2208 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2209 {
2210 st->print_cr("# MachUEPNode");
2211 if (UseCompressedClassPointers) {
2212 st->print_cr("\tldrw rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2213 st->print_cr("\tldrw r10, [rscratch2 + CompiledICData::speculated_klass_offset()]\t# compressed klass");
2214 st->print_cr("\tcmpw rscratch1, r10");
2215 } else {
2216 st->print_cr("\tldr rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2217 st->print_cr("\tldr r10, [rscratch2 + CompiledICData::speculated_klass_offset()]\t# compressed klass");
2218 st->print_cr("\tcmp rscratch1, r10");
2219 }
2220 st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
2221 }
2222 #endif
2223
2224 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
2225 {
2226 // This is the unverified entry point.
2227 C2_MacroAssembler _masm(&cbuf);
2228 __ ic_check(InteriorEntryAlignment);
2229 }
2230
2231 // REQUIRED EMIT CODE
2232
2233 //=============================================================================
2234
2235 // Emit exception handler code.
2236 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
2237 {
2238 // mov rscratch1 #exception_blob_entry_point
2239 // br rscratch1
2240 // Note that the code buffer's insts_mark is always relative to insts.
2241 // That's why we must use the macroassembler to generate a handler.
2242 C2_MacroAssembler _masm(&cbuf);
2243 address base = __ start_a_stub(size_exception_handler());
2244 if (base == nullptr) {
2245 ciEnv::current()->record_failure("CodeCache is full");
2246 return 0; // CodeBuffer::expand failed
2247 }
2248 int offset = __ offset();
2249 __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
2250 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
3726 enc_class aarch64_enc_java_dynamic_call(method meth) %{
3727 C2_MacroAssembler _masm(&cbuf);
3728 int method_index = resolved_method_index(cbuf);
3729 address call = __ ic_call((address)$meth$$method, method_index);
3730 if (call == nullptr) {
3731 ciEnv::current()->record_failure("CodeCache is full");
3732 return;
3733 }
3734 __ post_call_nop();
3735 if (Compile::current()->max_vector_size() > 0) {
3736 __ reinitialize_ptrue();
3737 }
3738 %}
3739
3740 enc_class aarch64_enc_call_epilog() %{
3741 C2_MacroAssembler _masm(&cbuf);
3742 if (VerifyStackAtCalls) {
3743 // Check that stack depth is unchanged: find majik cookie on stack
3744 __ call_Unimplemented();
3745 }
3746 if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic()) {
3747 // The last return value is not set by the callee but used to pass IsInit information to compiled code.
3748 // Search for the corresponding projection, get the register and emit code that initialized it.
3749 uint con = (tf()->range_cc()->cnt() - 1);
3750 for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
3751 ProjNode* proj = fast_out(i)->as_Proj();
3752 if (proj->_con == con) {
3753 // Set IsInit if r0 is non-null (a non-null value is returned buffered or scalarized)
3754 OptoReg::Name optoReg = ra_->get_reg_first(proj);
3755 VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
3756 Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
3757 __ cmp(r0, zr);
3758 __ cset(toReg, Assembler::NE);
3759 if (reg->is_stack()) {
3760 int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
3761 __ str(toReg, Address(sp, st_off));
3762 }
3763 break;
3764 }
3765 }
3766 if (return_value_is_used()) {
3767 // An inline type is returned as fields in multiple registers.
3768 // R0 either contains an oop if the inline type is buffered or a pointer
3769 // to the corresponding InlineKlass with the lowest bit set to 1. Zero r0
3770 // if the lowest bit is set to allow C2 to use the oop after null checking.
3771 // r0 &= (r0 & 1) - 1
3772 __ andr(rscratch1, r0, 0x1);
3773 __ sub(rscratch1, rscratch1, 0x1);
3774 __ andr(r0, r0, rscratch1);
3775 }
3776 }
3777 %}
3778
3779 enc_class aarch64_enc_java_to_runtime(method meth) %{
3780 C2_MacroAssembler _masm(&cbuf);
3781
3782 // some calls to generated routines (arraycopy code) are scheduled
3783 // by C2 as runtime calls. if so we can call them using a br (they
3784 // will be in a reachable segment) otherwise we have to use a blr
3785 // which loads the absolute address into a register.
3786 address entry = (address)$meth$$method;
3787 CodeBlob *cb = CodeCache::find_blob(entry);
3788 if (cb) {
3789 address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
3790 if (call == nullptr) {
3791 ciEnv::current()->record_failure("CodeCache is full");
3792 return;
3793 }
3794 __ post_call_nop();
3795 } else {
3796 Label retaddr;
7210 instruct loadConL(iRegLNoSp dst, immL src)
7211 %{
7212 match(Set dst src);
7213
7214 ins_cost(INSN_COST);
7215 format %{ "mov $dst, $src\t# long" %}
7216
7217 ins_encode( aarch64_enc_mov_imm(dst, src) );
7218
7219 ins_pipe(ialu_imm);
7220 %}
7221
7222 // Load Pointer Constant
7223
7224 instruct loadConP(iRegPNoSp dst, immP con)
7225 %{
7226 match(Set dst con);
7227
7228 ins_cost(INSN_COST * 4);
7229 format %{
7230 "mov $dst, $con\t# ptr"
7231 %}
7232
7233 ins_encode(aarch64_enc_mov_p(dst, con));
7234
7235 ins_pipe(ialu_imm);
7236 %}
7237
7238 // Load Null Pointer Constant
7239
7240 instruct loadConP0(iRegPNoSp dst, immP0 con)
7241 %{
7242 match(Set dst con);
7243
7244 ins_cost(INSN_COST);
7245 format %{ "mov $dst, $con\t# null pointer" %}
7246
7247 ins_encode(aarch64_enc_mov_p0(dst, con));
7248
7249 ins_pipe(ialu_imm);
7250 %}
8431 %}
8432
8433 // ============================================================================
8434 // Cast/Convert Instructions
8435
8436 instruct castX2P(iRegPNoSp dst, iRegL src) %{
8437 match(Set dst (CastX2P src));
8438
8439 ins_cost(INSN_COST);
8440 format %{ "mov $dst, $src\t# long -> ptr" %}
8441
8442 ins_encode %{
8443 if ($dst$$reg != $src$$reg) {
8444 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8445 }
8446 %}
8447
8448 ins_pipe(ialu_reg);
8449 %}
8450
8451 instruct castN2X(iRegLNoSp dst, iRegN src) %{
8452 match(Set dst (CastP2X src));
8453
8454 ins_cost(INSN_COST);
8455 format %{ "mov $dst, $src\t# ptr -> long" %}
8456
8457 ins_encode %{
8458 if ($dst$$reg != $src$$reg) {
8459 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8460 }
8461 %}
8462
8463 ins_pipe(ialu_reg);
8464 %}
8465
8466 instruct castP2X(iRegLNoSp dst, iRegP src) %{
8467 match(Set dst (CastP2X src));
8468
8469 ins_cost(INSN_COST);
8470 format %{ "mov $dst, $src\t# ptr -> long" %}
8471
8472 ins_encode %{
8473 if ($dst$$reg != $src$$reg) {
8474 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8475 }
8476 %}
8477
8478 ins_pipe(ialu_reg);
8479 %}
8480
8481 // Convert oop into int for vectors alignment masking
8482 instruct convP2I(iRegINoSp dst, iRegP src) %{
8483 match(Set dst (ConvL2I (CastP2X src)));
8484
8485 ins_cost(INSN_COST);
15267
15268 match(Set dst (MoveL2D src));
15269
15270 effect(DEF dst, USE src);
15271
15272 ins_cost(INSN_COST);
15273
15274 format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
15275
15276 ins_encode %{
15277 __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
15278 %}
15279
15280 ins_pipe(fp_l2d);
15281
15282 %}
15283
15284 // ============================================================================
15285 // clearing of an array
15286
15287 instruct clearArray_reg_reg_immL0(iRegL_R11 cnt, iRegP_R10 base, immL0 zero, Universe dummy, rFlagsReg cr)
15288 %{
15289 match(Set dummy (ClearArray (Binary cnt base) zero));
15290 effect(USE_KILL cnt, USE_KILL base, KILL cr);
15291
15292 ins_cost(4 * INSN_COST);
15293 format %{ "ClearArray $cnt, $base" %}
15294
15295 ins_encode %{
15296 address tpc = __ zero_words($base$$Register, $cnt$$Register);
15297 if (tpc == nullptr) {
15298 ciEnv::current()->record_failure("CodeCache is full");
15299 return;
15300 }
15301 %}
15302
15303 ins_pipe(pipe_class_memory);
15304 %}
15305
15306 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, iRegL val, Universe dummy, rFlagsReg cr)
15307 %{
15308 predicate(((ClearArrayNode*)n)->word_copy_only());
15309 match(Set dummy (ClearArray (Binary cnt base) val));
15310 effect(USE_KILL cnt, USE_KILL base, KILL cr);
15311
15312 ins_cost(4 * INSN_COST);
15313 format %{ "ClearArray $cnt, $base, $val" %}
15314
15315 ins_encode %{
15316 __ fill_words($base$$Register, $cnt$$Register, $val$$Register);
15317 %}
15318
15319 ins_pipe(pipe_class_memory);
15320 %}
15321
15322 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 temp, Universe dummy, rFlagsReg cr)
15323 %{
15324 predicate((uint64_t)n->in(2)->get_long()
15325 < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord)
15326 && !((ClearArrayNode*)n)->word_copy_only());
15327 match(Set dummy (ClearArray cnt base));
15328 effect(TEMP temp, USE_KILL base, KILL cr);
15329
15330 ins_cost(4 * INSN_COST);
15331 format %{ "ClearArray $cnt, $base" %}
15332
15333 ins_encode %{
15334 address tpc = __ zero_words($base$$Register, (uint64_t)$cnt$$constant);
15335 if (tpc == nullptr) {
15336 ciEnv::current()->record_failure("CodeCache is full");
15337 return;
15338 }
15339 %}
15340
15341 ins_pipe(pipe_class_memory);
15342 %}
15343
15344 // ============================================================================
15345 // Overflow Math Instructions
15346
16606
16607 // Call Runtime Instruction
16608
16609 instruct CallLeafDirect(method meth)
16610 %{
16611 match(CallLeaf);
16612
16613 effect(USE meth);
16614
16615 ins_cost(CALL_COST);
16616
16617 format %{ "CALL, runtime leaf $meth" %}
16618
16619 ins_encode( aarch64_enc_java_to_runtime(meth) );
16620
16621 ins_pipe(pipe_class_call);
16622 %}
16623
16624 // Call Runtime Instruction
16625
16626 // entry point is null, target holds the address to call
16627 instruct CallLeafNoFPIndirect(iRegP target)
16628 %{
16629 predicate(n->as_Call()->entry_point() == nullptr);
16630
16631 match(CallLeafNoFP target);
16632
16633 ins_cost(CALL_COST);
16634
16635 format %{ "CALL, runtime leaf nofp indirect $target" %}
16636
16637 ins_encode %{
16638 __ blr($target$$Register);
16639 %}
16640
16641 ins_pipe(pipe_class_call);
16642 %}
16643
16644 instruct CallLeafNoFPDirect(method meth)
16645 %{
16646 predicate(n->as_Call()->entry_point() != nullptr);
16647
16648 match(CallLeafNoFP);
16649
16650 effect(USE meth);
16651
16652 ins_cost(CALL_COST);
16653
16654 format %{ "CALL, runtime leaf nofp $meth" %}
16655
16656 ins_encode( aarch64_enc_java_to_runtime(meth) );
16657
16658 ins_pipe(pipe_class_call);
16659 %}
16660
16661 // Tail Call; Jump from runtime stub to Java code.
16662 // Also known as an 'interprocedural jump'.
16663 // Target of jump will eventually return to caller.
16664 // TailJump below removes the return address.
16665 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_ptr)
16666 %{
16667 match(TailCall jump_target method_ptr);
|