< prev index next >

src/hotspot/cpu/aarch64/aarch64.ad

Print this page

 1637 
 1638 int MachCallDynamicJavaNode::ret_addr_offset()
 1639 {
 1640   return 16; // movz, movk, movk, bl
 1641 }
 1642 
 1643 int MachCallRuntimeNode::ret_addr_offset() {
 1644   // for generated stubs the call will be
 1645   //   bl(addr)
 1646   // or with far branches
 1647   //   bl(trampoline_stub)
 1648   // for real runtime callouts it will be six instructions
 1649   // see aarch64_enc_java_to_runtime
 1650   //   adr(rscratch2, retaddr)
 1651   //   str(rscratch2, Address(rthread, JavaThread::last_Java_pc_offset()));
 1652   //   lea(rscratch1, RuntimeAddress(addr)
 1653   //   blr(rscratch1)
 1654   CodeBlob *cb = CodeCache::find_blob(_entry_point);
 1655   if (cb) {
 1656     return 1 * NativeInstruction::instruction_size;



 1657   } else {
 1658     return 6 * NativeInstruction::instruction_size;
 1659   }
 1660 }
 1661 
 1662 //=============================================================================
 1663 
 1664 #ifndef PRODUCT
 1665 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1666   st->print("BREAKPOINT");
 1667 }
 1668 #endif
 1669 
 1670 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1671   __ brk(0);
 1672 }
 1673 
 1674 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
 1675   return MachNode::size(ra_);
 1676 }

 1745   if (C->stub_function() == nullptr) {
 1746     st->print("\n\t");
 1747     st->print("ldr  rscratch1, [guard]\n\t");
 1748     st->print("dmb ishld\n\t");
 1749     st->print("ldr  rscratch2, [rthread, #thread_disarmed_guard_value_offset]\n\t");
 1750     st->print("cmp  rscratch1, rscratch2\n\t");
 1751     st->print("b.eq skip");
 1752     st->print("\n\t");
 1753     st->print("blr #nmethod_entry_barrier_stub\n\t");
 1754     st->print("b skip\n\t");
 1755     st->print("guard: int\n\t");
 1756     st->print("\n\t");
 1757     st->print("skip:\n\t");
 1758   }
 1759 }
 1760 #endif
 1761 
 1762 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1763   Compile* C = ra_->C;
 1764 
 1765   // n.b. frame size includes space for return pc and rfp
 1766   const int framesize = C->output()->frame_size_in_bytes();
 1767 
 1768   if (C->clinit_barrier_on_entry()) {
 1769     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
 1770 
 1771     Label L_skip_barrier;
 1772 
 1773     __ mov_metadata(rscratch2, C->method()->holder()->constant_encoding());
 1774     __ clinit_barrier(rscratch2, rscratch1, &L_skip_barrier);
 1775     __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
 1776     __ bind(L_skip_barrier);
 1777   }
 1778 
 1779   if (C->max_vector_size() > 0) {
 1780     __ reinitialize_ptrue();
 1781   }
 1782 
 1783   int bangsize = C->output()->bang_size_in_bytes();
 1784   if (C->output()->need_stack_bang(bangsize))
 1785     __ generate_stack_overflow_check(bangsize);
 1786 
 1787   __ build_frame(framesize);
 1788 
 1789   if (C->stub_function() == nullptr) {
 1790     BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 1791     // Dummy labels for just measuring the code size
 1792     Label dummy_slow_path;
 1793     Label dummy_continuation;
 1794     Label dummy_guard;
 1795     Label* slow_path = &dummy_slow_path;
 1796     Label* continuation = &dummy_continuation;
 1797     Label* guard = &dummy_guard;
 1798     if (!Compile::current()->output()->in_scratch_emit_size()) {
 1799       // Use real labels from actual stub when not emitting code for the purpose of measuring its size
 1800       C2EntryBarrierStub* stub = new (Compile::current()->comp_arena()) C2EntryBarrierStub();
 1801       Compile::current()->output()->add_stub(stub);
 1802       slow_path = &stub->entry();
 1803       continuation = &stub->continuation();
 1804       guard = &stub->guard();
 1805     }
 1806     // In the C2 code, we move the non-hot part of nmethod entry barriers out-of-line to a stub.
 1807     bs->nmethod_entry_barrier(masm, slow_path, continuation, guard);
 1808   }
 1809 
 1810   if (VerifyStackAtCalls) {
 1811     Unimplemented();
 1812   }
 1813 
 1814   C->output()->set_frame_complete(__ offset());
 1815 
 1816   if (C->has_mach_constant_base_node()) {
 1817     // NOTE: We set the table base offset here because users might be
 1818     // emitted before MachConstantBaseNode.
 1819     ConstantTable& constant_table = C->output()->constant_table();
 1820     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1821   }
 1822 }
 1823 
 1824 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 1825 {
 1826   return MachNode::size(ra_); // too many variables; just compute it
 1827                               // the hard way
 1828 }
 1829 
 1830 int MachPrologNode::reloc() const
 1831 {
 1832   return 0;
 1833 }
 1834 
 1835 //=============================================================================
 1836 
 1837 #ifndef PRODUCT
 1838 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1839   Compile* C = ra_->C;
 1840   int framesize = C->output()->frame_slots() << LogBytesPerInt;
 1841 
 1842   st->print("# pop frame %d\n\t",framesize);
 1843 
 1844   if (framesize == 0) {
 1845     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
 1846   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
 1847     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
 1848     st->print("add  sp, sp, #%d\n\t", framesize);
 1849   } else {

 1852     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
 1853   }
 1854   if (VM_Version::use_rop_protection()) {
 1855     st->print("autiaz\n\t");
 1856     st->print("ldr  zr, [lr]\n\t");
 1857   }
 1858 
 1859   if (do_polling() && C->is_method_compilation()) {
 1860     st->print("# test polling word\n\t");
 1861     st->print("ldr  rscratch1, [rthread],#%d\n\t", in_bytes(JavaThread::polling_word_offset()));
 1862     st->print("cmp  sp, rscratch1\n\t");
 1863     st->print("bhi #slow_path");
 1864   }
 1865 }
 1866 #endif
 1867 
 1868 void MachEpilogNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1869   Compile* C = ra_->C;
 1870   int framesize = C->output()->frame_slots() << LogBytesPerInt;
 1871 
 1872   __ remove_frame(framesize);
 1873 
 1874   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1875     __ reserved_stack_check();
 1876   }
 1877 
 1878   if (do_polling() && C->is_method_compilation()) {
 1879     Label dummy_label;
 1880     Label* code_stub = &dummy_label;
 1881     if (!C->output()->in_scratch_emit_size()) {
 1882       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1883       C->output()->add_stub(stub);
 1884       code_stub = &stub->entry();
 1885     }
 1886     __ relocate(relocInfo::poll_return_type);
 1887     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
 1888   }
 1889 }
 1890 
 1891 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 1892   // Variable size. Determine dynamically.
 1893   return MachNode::size(ra_);
 1894 }
 1895 
 1896 int MachEpilogNode::reloc() const {
 1897   // Return number of relocatable values contained in this instruction.
 1898   return 1; // 1 for polling page.
 1899 }
 1900 
 1901 const Pipeline * MachEpilogNode::pipeline() const {
 1902   return MachNode::pipeline_class();
 1903 }
 1904 
 1905 //=============================================================================
 1906 
 1907 static enum RC rc_class(OptoReg::Name reg) {
 1908 
 1909   if (reg == OptoReg::Bad) {
 1910     return rc_bad;
 1911   }
 1912 
 1913   // we have 32 int registers * 2 halves
 1914   int slots_of_int_registers = Register::number_of_registers * Register::max_slots_per_register;
 1915 

 2171 void BoxLockNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 2172   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2173   int reg    = ra_->get_encode(this);
 2174 
 2175   // This add will handle any 24-bit signed offset. 24 bits allows an
 2176   // 8 megabyte stack frame.
 2177   __ add(as_Register(reg), sp, offset);
 2178 }
 2179 
 2180 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
 2181   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
 2182   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2183 
 2184   if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
 2185     return NativeInstruction::instruction_size;
 2186   } else {
 2187     return 2 * NativeInstruction::instruction_size;
 2188   }
 2189 }
 2190 
 2191 //=============================================================================











 2192 



























 2193 #ifndef PRODUCT
 2194 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2195 {
 2196   st->print_cr("# MachUEPNode");
 2197   if (UseCompressedClassPointers) {
 2198     st->print_cr("\tldrw rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2199     st->print_cr("\tldrw r10, [rscratch2 + CompiledICData::speculated_klass_offset()]\t# compressed klass");
 2200     st->print_cr("\tcmpw rscratch1, r10");
 2201   } else {
 2202     st->print_cr("\tldr rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2203     st->print_cr("\tldr r10, [rscratch2 + CompiledICData::speculated_klass_offset()]\t# compressed klass");
 2204     st->print_cr("\tcmp rscratch1, r10");
 2205   }
 2206   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
 2207 }
 2208 #endif
 2209 
 2210 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2211 {
 2212   __ ic_check(InteriorEntryAlignment);
 2213 }
 2214 
 2215 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 2216 {
 2217   return MachNode::size(ra_);
 2218 }
 2219 
 2220 // REQUIRED EMIT CODE
 2221 
 2222 //=============================================================================
 2223 
 2224 // Emit exception handler code.
 2225 int HandlerImpl::emit_exception_handler(C2_MacroAssembler* masm)
 2226 {
 2227   // mov rscratch1 #exception_blob_entry_point
 2228   // br rscratch1
 2229   // Note that the code buffer's insts_mark is always relative to insts.
 2230   // That's why we must use the macroassembler to generate a handler.
 2231   address base = __ start_a_stub(size_exception_handler());
 2232   if (base == nullptr) {
 2233     ciEnv::current()->record_failure("CodeCache is full");
 2234     return 0;  // CodeBuffer::expand failed
 2235   }
 2236   int offset = __ offset();
 2237   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
 2238   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
 2239   __ end_a_stub();

 3692   %}
 3693 
 3694   enc_class aarch64_enc_java_dynamic_call(method meth) %{
 3695     int method_index = resolved_method_index(masm);
 3696     address call = __ ic_call((address)$meth$$method, method_index);
 3697     if (call == nullptr) {
 3698       ciEnv::current()->record_failure("CodeCache is full");
 3699       return;
 3700     }
 3701     __ post_call_nop();
 3702     if (Compile::current()->max_vector_size() > 0) {
 3703       __ reinitialize_ptrue();
 3704     }
 3705   %}
 3706 
 3707   enc_class aarch64_enc_call_epilog() %{
 3708     if (VerifyStackAtCalls) {
 3709       // Check that stack depth is unchanged: find majik cookie on stack
 3710       __ call_Unimplemented();
 3711     }































 3712   %}
 3713 
 3714   enc_class aarch64_enc_java_to_runtime(method meth) %{
 3715     // some calls to generated routines (arraycopy code) are scheduled
 3716     // by C2 as runtime calls. if so we can call them using a br (they
 3717     // will be in a reachable segment) otherwise we have to use a blr
 3718     // which loads the absolute address into a register.
 3719     address entry = (address)$meth$$method;
 3720     CodeBlob *cb = CodeCache::find_blob(entry);
 3721     if (cb) {
 3722       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
 3723       if (call == nullptr) {
 3724         ciEnv::current()->record_failure("CodeCache is full");
 3725         return;
 3726       }
 3727       __ post_call_nop();
 3728     } else {
 3729       Label retaddr;
 3730       // Make the anchor frame walkable
 3731       __ adr(rscratch2, retaddr);

 6801 instruct loadConL(iRegLNoSp dst, immL src)
 6802 %{
 6803   match(Set dst src);
 6804 
 6805   ins_cost(INSN_COST);
 6806   format %{ "mov $dst, $src\t# long" %}
 6807 
 6808   ins_encode( aarch64_enc_mov_imm(dst, src) );
 6809 
 6810   ins_pipe(ialu_imm);
 6811 %}
 6812 
 6813 // Load Pointer Constant
 6814 
 6815 instruct loadConP(iRegPNoSp dst, immP con)
 6816 %{
 6817   match(Set dst con);
 6818 
 6819   ins_cost(INSN_COST * 4);
 6820   format %{
 6821     "mov  $dst, $con\t# ptr\n\t"
 6822   %}
 6823 
 6824   ins_encode(aarch64_enc_mov_p(dst, con));
 6825 
 6826   ins_pipe(ialu_imm);
 6827 %}
 6828 
 6829 // Load Null Pointer Constant
 6830 
 6831 instruct loadConP0(iRegPNoSp dst, immP0 con)
 6832 %{
 6833   match(Set dst con);
 6834 
 6835   ins_cost(INSN_COST);
 6836   format %{ "mov  $dst, $con\t# nullptr ptr" %}
 6837 
 6838   ins_encode(aarch64_enc_mov_p0(dst, con));
 6839 
 6840   ins_pipe(ialu_imm);
 6841 %}

 8010 %}
 8011 
 8012 // ============================================================================
 8013 // Cast/Convert Instructions
 8014 
 8015 instruct castX2P(iRegPNoSp dst, iRegL src) %{
 8016   match(Set dst (CastX2P src));
 8017 
 8018   ins_cost(INSN_COST);
 8019   format %{ "mov $dst, $src\t# long -> ptr" %}
 8020 
 8021   ins_encode %{
 8022     if ($dst$$reg != $src$$reg) {
 8023       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
 8024     }
 8025   %}
 8026 
 8027   ins_pipe(ialu_reg);
 8028 %}
 8029 






























 8030 instruct castP2X(iRegLNoSp dst, iRegP src) %{
 8031   match(Set dst (CastP2X src));
 8032 
 8033   ins_cost(INSN_COST);
 8034   format %{ "mov $dst, $src\t# ptr -> long" %}
 8035 
 8036   ins_encode %{
 8037     if ($dst$$reg != $src$$reg) {
 8038       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
 8039     }
 8040   %}
 8041 
 8042   ins_pipe(ialu_reg);
 8043 %}
 8044 
 8045 // Convert oop into int for vectors alignment masking
 8046 instruct convP2I(iRegINoSp dst, iRegP src) %{
 8047   match(Set dst (ConvL2I (CastP2X src)));
 8048 
 8049   ins_cost(INSN_COST);

14963 
14964   match(Set dst (MoveL2D src));
14965 
14966   effect(DEF dst, USE src);
14967 
14968   ins_cost(INSN_COST);
14969 
14970   format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
14971 
14972   ins_encode %{
14973     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
14974   %}
14975 
14976   ins_pipe(fp_l2d);
14977 
14978 %}
14979 
14980 // ============================================================================
14981 // clearing of an array
14982 
14983 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
14984 %{
14985   match(Set dummy (ClearArray cnt base));
14986   effect(USE_KILL cnt, USE_KILL base, KILL cr);
14987 
14988   ins_cost(4 * INSN_COST);
14989   format %{ "ClearArray $cnt, $base" %}
14990 
14991   ins_encode %{
14992     address tpc = __ zero_words($base$$Register, $cnt$$Register);
14993     if (tpc == nullptr) {
14994       ciEnv::current()->record_failure("CodeCache is full");
14995       return;
14996     }
14997   %}
14998 
14999   ins_pipe(pipe_class_memory);
15000 %}
15001 
















15002 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 temp, Universe dummy, rFlagsReg cr)
15003 %{
15004   predicate((uint64_t)n->in(2)->get_long()
15005             < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord));

15006   match(Set dummy (ClearArray cnt base));
15007   effect(TEMP temp, USE_KILL base, KILL cr);
15008 
15009   ins_cost(4 * INSN_COST);
15010   format %{ "ClearArray $cnt, $base" %}
15011 
15012   ins_encode %{
15013     address tpc = __ zero_words($base$$Register, (uint64_t)$cnt$$constant);
15014     if (tpc == nullptr) {
15015       ciEnv::current()->record_failure("CodeCache is full");
15016       return;
15017     }
15018   %}
15019 
15020   ins_pipe(pipe_class_memory);
15021 %}
15022 
15023 // ============================================================================
15024 // Overflow Math Instructions
15025 

16336 %}
16337 
16338 // Call Runtime Instruction without safepoint and with vector arguments
16339 instruct CallLeafDirectVector(method meth)
16340 %{
16341   match(CallLeafVector);
16342 
16343   effect(USE meth);
16344 
16345   ins_cost(CALL_COST);
16346 
16347   format %{ "CALL, runtime leaf vector $meth" %}
16348 
16349   ins_encode(aarch64_enc_java_to_runtime(meth));
16350 
16351   ins_pipe(pipe_class_call);
16352 %}
16353 
16354 // Call Runtime Instruction
16355 


















16356 instruct CallLeafNoFPDirect(method meth)
16357 %{


16358   match(CallLeafNoFP);
16359 
16360   effect(USE meth);
16361 
16362   ins_cost(CALL_COST);
16363 
16364   format %{ "CALL, runtime leaf nofp $meth" %}
16365 
16366   ins_encode( aarch64_enc_java_to_runtime(meth) );
16367 
16368   ins_pipe(pipe_class_call);
16369 %}
16370 
16371 // Tail Call; Jump from runtime stub to Java code.
16372 // Also known as an 'interprocedural jump'.
16373 // Target of jump will eventually return to caller.
16374 // TailJump below removes the return address.
16375 // Don't use rfp for 'jump_target' because a MachEpilogNode has already been
16376 // emitted just above the TailCall which has reset rfp to the caller state.
16377 instruct TailCalljmpInd(iRegPNoSpNoRfp jump_target, inline_cache_RegP method_ptr)

 1637 
 1638 int MachCallDynamicJavaNode::ret_addr_offset()
 1639 {
 1640   return 16; // movz, movk, movk, bl
 1641 }
 1642 
 1643 int MachCallRuntimeNode::ret_addr_offset() {
 1644   // for generated stubs the call will be
 1645   //   bl(addr)
 1646   // or with far branches
 1647   //   bl(trampoline_stub)
 1648   // for real runtime callouts it will be six instructions
 1649   // see aarch64_enc_java_to_runtime
 1650   //   adr(rscratch2, retaddr)
 1651   //   str(rscratch2, Address(rthread, JavaThread::last_Java_pc_offset()));
 1652   //   lea(rscratch1, RuntimeAddress(addr)
 1653   //   blr(rscratch1)
 1654   CodeBlob *cb = CodeCache::find_blob(_entry_point);
 1655   if (cb) {
 1656     return 1 * NativeInstruction::instruction_size;
 1657   } else if (_entry_point == nullptr) {
 1658     // See CallLeafNoFPIndirect
 1659     return 1 * NativeInstruction::instruction_size;
 1660   } else {
 1661     return 6 * NativeInstruction::instruction_size;
 1662   }
 1663 }
 1664 
 1665 //=============================================================================
 1666 
 1667 #ifndef PRODUCT
 1668 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1669   st->print("BREAKPOINT");
 1670 }
 1671 #endif
 1672 
 1673 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1674   __ brk(0);
 1675 }
 1676 
 1677 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
 1678   return MachNode::size(ra_);
 1679 }

 1748   if (C->stub_function() == nullptr) {
 1749     st->print("\n\t");
 1750     st->print("ldr  rscratch1, [guard]\n\t");
 1751     st->print("dmb ishld\n\t");
 1752     st->print("ldr  rscratch2, [rthread, #thread_disarmed_guard_value_offset]\n\t");
 1753     st->print("cmp  rscratch1, rscratch2\n\t");
 1754     st->print("b.eq skip");
 1755     st->print("\n\t");
 1756     st->print("blr #nmethod_entry_barrier_stub\n\t");
 1757     st->print("b skip\n\t");
 1758     st->print("guard: int\n\t");
 1759     st->print("\n\t");
 1760     st->print("skip:\n\t");
 1761   }
 1762 }
 1763 #endif
 1764 
 1765 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1766   Compile* C = ra_->C;
 1767 







 1768 
 1769   __ verified_entry(C, 0);




 1770 
 1771   if (C->stub_function() == nullptr) {
 1772     __ entry_barrier();
 1773   }
 1774 
 1775   if (!Compile::current()->output()->in_scratch_emit_size()) {
 1776     __ bind(*_verified_entry);























 1777   }
 1778 
 1779   if (VerifyStackAtCalls) {
 1780     Unimplemented();
 1781   }
 1782 
 1783   C->output()->set_frame_complete(__ offset());
 1784 
 1785   if (C->has_mach_constant_base_node()) {
 1786     // NOTE: We set the table base offset here because users might be
 1787     // emitted before MachConstantBaseNode.
 1788     ConstantTable& constant_table = C->output()->constant_table();
 1789     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1790   }
 1791 }
 1792 






 1793 int MachPrologNode::reloc() const
 1794 {
 1795   return 0;
 1796 }
 1797 
 1798 //=============================================================================
 1799 
 1800 #ifndef PRODUCT
 1801 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1802   Compile* C = ra_->C;
 1803   int framesize = C->output()->frame_slots() << LogBytesPerInt;
 1804 
 1805   st->print("# pop frame %d\n\t",framesize);
 1806 
 1807   if (framesize == 0) {
 1808     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
 1809   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
 1810     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
 1811     st->print("add  sp, sp, #%d\n\t", framesize);
 1812   } else {

 1815     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
 1816   }
 1817   if (VM_Version::use_rop_protection()) {
 1818     st->print("autiaz\n\t");
 1819     st->print("ldr  zr, [lr]\n\t");
 1820   }
 1821 
 1822   if (do_polling() && C->is_method_compilation()) {
 1823     st->print("# test polling word\n\t");
 1824     st->print("ldr  rscratch1, [rthread],#%d\n\t", in_bytes(JavaThread::polling_word_offset()));
 1825     st->print("cmp  sp, rscratch1\n\t");
 1826     st->print("bhi #slow_path");
 1827   }
 1828 }
 1829 #endif
 1830 
 1831 void MachEpilogNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1832   Compile* C = ra_->C;
 1833   int framesize = C->output()->frame_slots() << LogBytesPerInt;
 1834 
 1835   __ remove_frame(framesize, C->needs_stack_repair());
 1836 
 1837   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1838     __ reserved_stack_check();
 1839   }
 1840 
 1841   if (do_polling() && C->is_method_compilation()) {
 1842     Label dummy_label;
 1843     Label* code_stub = &dummy_label;
 1844     if (!C->output()->in_scratch_emit_size()) {
 1845       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1846       C->output()->add_stub(stub);
 1847       code_stub = &stub->entry();
 1848     }
 1849     __ relocate(relocInfo::poll_return_type);
 1850     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
 1851   }
 1852 }
 1853 





 1854 int MachEpilogNode::reloc() const {
 1855   // Return number of relocatable values contained in this instruction.
 1856   return 1; // 1 for polling page.
 1857 }
 1858 
 1859 const Pipeline * MachEpilogNode::pipeline() const {
 1860   return MachNode::pipeline_class();
 1861 }
 1862 
 1863 //=============================================================================
 1864 
 1865 static enum RC rc_class(OptoReg::Name reg) {
 1866 
 1867   if (reg == OptoReg::Bad) {
 1868     return rc_bad;
 1869   }
 1870 
 1871   // we have 32 int registers * 2 halves
 1872   int slots_of_int_registers = Register::number_of_registers * Register::max_slots_per_register;
 1873 

 2129 void BoxLockNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 2130   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2131   int reg    = ra_->get_encode(this);
 2132 
 2133   // This add will handle any 24-bit signed offset. 24 bits allows an
 2134   // 8 megabyte stack frame.
 2135   __ add(as_Register(reg), sp, offset);
 2136 }
 2137 
 2138 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
 2139   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
 2140   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2141 
 2142   if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
 2143     return NativeInstruction::instruction_size;
 2144   } else {
 2145     return 2 * NativeInstruction::instruction_size;
 2146   }
 2147 }
 2148 
 2149 ///=============================================================================
 2150 #ifndef PRODUCT
 2151 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2152 {
 2153   st->print_cr("# MachVEPNode");
 2154   if (!_verified) {
 2155     st->print_cr("\t load_class");
 2156   } else {
 2157     st->print_cr("\t unpack_inline_arg");
 2158   }
 2159 }
 2160 #endif
 2161 
 2162 void MachVEPNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const
 2163 {
 2164   if (!_verified) {
 2165     __ ic_check(1);
 2166   } else {
 2167     // TODO 8284443 Avoid creation of temporary frame
 2168     if (ra_->C->stub_function() == nullptr) {
 2169       __ verified_entry(ra_->C, 0);
 2170       __ entry_barrier();
 2171       int framesize = ra_->C->output()->frame_slots() << LogBytesPerInt;
 2172       __ remove_frame(framesize, false);
 2173     }
 2174     // Unpack inline type args passed as oop and then jump to
 2175     // the verified entry point (skipping the unverified entry).
 2176     int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
 2177     // Emit code for verified entry and save increment for stack repair on return
 2178     __ verified_entry(ra_->C, sp_inc);
 2179     if (Compile::current()->output()->in_scratch_emit_size()) {
 2180       Label dummy_verified_entry;
 2181       __ b(dummy_verified_entry);
 2182     } else {
 2183       __ b(*_verified_entry);
 2184     }
 2185   }
 2186 }
 2187 
 2188 //=============================================================================
 2189 #ifndef PRODUCT
 2190 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2191 {
 2192   st->print_cr("# MachUEPNode");
 2193   if (UseCompressedClassPointers) {
 2194     st->print_cr("\tldrw rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2195     st->print_cr("\tldrw r10, [rscratch2 + CompiledICData::speculated_klass_offset()]\t# compressed klass");
 2196     st->print_cr("\tcmpw rscratch1, r10");
 2197   } else {
 2198     st->print_cr("\tldr rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2199     st->print_cr("\tldr r10, [rscratch2 + CompiledICData::speculated_klass_offset()]\t# compressed klass");
 2200     st->print_cr("\tcmp rscratch1, r10");
 2201   }
 2202   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
 2203 }
 2204 #endif
 2205 
 2206 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2207 {
 2208   __ ic_check(InteriorEntryAlignment);
 2209 }
 2210 





 2211 // REQUIRED EMIT CODE
 2212 
 2213 //=============================================================================
 2214 
 2215 // Emit exception handler code.
 2216 int HandlerImpl::emit_exception_handler(C2_MacroAssembler* masm)
 2217 {
 2218   // mov rscratch1 #exception_blob_entry_point
 2219   // br rscratch1
 2220   // Note that the code buffer's insts_mark is always relative to insts.
 2221   // That's why we must use the macroassembler to generate a handler.
 2222   address base = __ start_a_stub(size_exception_handler());
 2223   if (base == nullptr) {
 2224     ciEnv::current()->record_failure("CodeCache is full");
 2225     return 0;  // CodeBuffer::expand failed
 2226   }
 2227   int offset = __ offset();
 2228   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
 2229   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
 2230   __ end_a_stub();

 3683   %}
 3684 
 3685   enc_class aarch64_enc_java_dynamic_call(method meth) %{
 3686     int method_index = resolved_method_index(masm);
 3687     address call = __ ic_call((address)$meth$$method, method_index);
 3688     if (call == nullptr) {
 3689       ciEnv::current()->record_failure("CodeCache is full");
 3690       return;
 3691     }
 3692     __ post_call_nop();
 3693     if (Compile::current()->max_vector_size() > 0) {
 3694       __ reinitialize_ptrue();
 3695     }
 3696   %}
 3697 
 3698   enc_class aarch64_enc_call_epilog() %{
 3699     if (VerifyStackAtCalls) {
 3700       // Check that stack depth is unchanged: find majik cookie on stack
 3701       __ call_Unimplemented();
 3702     }
 3703     if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) {
 3704       // The last return value is not set by the callee but used to pass the null marker to compiled code.
 3705       // Search for the corresponding projection, get the register and emit code that initialized it.
 3706       uint con = (tf()->range_cc()->cnt() - 1);
 3707       for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
 3708         ProjNode* proj = fast_out(i)->as_Proj();
 3709         if (proj->_con == con) {
 3710           // Set null marker if r0 is non-null (a non-null value is returned buffered or scalarized)
 3711           OptoReg::Name optoReg = ra_->get_reg_first(proj);
 3712           VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
 3713           Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
 3714           __ cmp(r0, zr);
 3715           __ cset(toReg, Assembler::NE);
 3716           if (reg->is_stack()) {
 3717             int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
 3718             __ str(toReg, Address(sp, st_off));
 3719           }
 3720           break;
 3721         }
 3722       }
 3723       if (return_value_is_used()) {
 3724         // An inline type is returned as fields in multiple registers.
 3725         // R0 either contains an oop if the inline type is buffered or a pointer
 3726         // to the corresponding InlineKlass with the lowest bit set to 1. Zero r0
 3727         // if the lowest bit is set to allow C2 to use the oop after null checking.
 3728         // r0 &= (r0 & 1) - 1
 3729         __ andr(rscratch1, r0, 0x1);
 3730         __ sub(rscratch1, rscratch1, 0x1);
 3731         __ andr(r0, r0, rscratch1);
 3732       }
 3733     }
 3734   %}
 3735 
 3736   enc_class aarch64_enc_java_to_runtime(method meth) %{
 3737     // some calls to generated routines (arraycopy code) are scheduled
 3738     // by C2 as runtime calls. if so we can call them using a br (they
 3739     // will be in a reachable segment) otherwise we have to use a blr
 3740     // which loads the absolute address into a register.
 3741     address entry = (address)$meth$$method;
 3742     CodeBlob *cb = CodeCache::find_blob(entry);
 3743     if (cb) {
 3744       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
 3745       if (call == nullptr) {
 3746         ciEnv::current()->record_failure("CodeCache is full");
 3747         return;
 3748       }
 3749       __ post_call_nop();
 3750     } else {
 3751       Label retaddr;
 3752       // Make the anchor frame walkable
 3753       __ adr(rscratch2, retaddr);

 6823 instruct loadConL(iRegLNoSp dst, immL src)
 6824 %{
 6825   match(Set dst src);
 6826 
 6827   ins_cost(INSN_COST);
 6828   format %{ "mov $dst, $src\t# long" %}
 6829 
 6830   ins_encode( aarch64_enc_mov_imm(dst, src) );
 6831 
 6832   ins_pipe(ialu_imm);
 6833 %}
 6834 
 6835 // Load Pointer Constant
 6836 
 6837 instruct loadConP(iRegPNoSp dst, immP con)
 6838 %{
 6839   match(Set dst con);
 6840 
 6841   ins_cost(INSN_COST * 4);
 6842   format %{
 6843     "mov  $dst, $con\t# ptr"
 6844   %}
 6845 
 6846   ins_encode(aarch64_enc_mov_p(dst, con));
 6847 
 6848   ins_pipe(ialu_imm);
 6849 %}
 6850 
 6851 // Load Null Pointer Constant
 6852 
 6853 instruct loadConP0(iRegPNoSp dst, immP0 con)
 6854 %{
 6855   match(Set dst con);
 6856 
 6857   ins_cost(INSN_COST);
 6858   format %{ "mov  $dst, $con\t# nullptr ptr" %}
 6859 
 6860   ins_encode(aarch64_enc_mov_p0(dst, con));
 6861 
 6862   ins_pipe(ialu_imm);
 6863 %}

 8032 %}
 8033 
 8034 // ============================================================================
 8035 // Cast/Convert Instructions
 8036 
 8037 instruct castX2P(iRegPNoSp dst, iRegL src) %{
 8038   match(Set dst (CastX2P src));
 8039 
 8040   ins_cost(INSN_COST);
 8041   format %{ "mov $dst, $src\t# long -> ptr" %}
 8042 
 8043   ins_encode %{
 8044     if ($dst$$reg != $src$$reg) {
 8045       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
 8046     }
 8047   %}
 8048 
 8049   ins_pipe(ialu_reg);
 8050 %}
 8051 
 8052 instruct castI2N(iRegNNoSp dst, iRegI src) %{
 8053   match(Set dst (CastI2N src));
 8054 
 8055   ins_cost(INSN_COST);
 8056   format %{ "mov $dst, $src\t# int -> narrow ptr" %}
 8057 
 8058   ins_encode %{
 8059     if ($dst$$reg != $src$$reg) {
 8060       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
 8061     }
 8062   %}
 8063 
 8064   ins_pipe(ialu_reg);
 8065 %}
 8066 
 8067 instruct castN2X(iRegLNoSp dst, iRegN src) %{
 8068   match(Set dst (CastP2X src));
 8069 
 8070   ins_cost(INSN_COST);
 8071   format %{ "mov $dst, $src\t# ptr -> long" %}
 8072 
 8073   ins_encode %{
 8074     if ($dst$$reg != $src$$reg) {
 8075       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
 8076     }
 8077   %}
 8078 
 8079   ins_pipe(ialu_reg);
 8080 %}
 8081 
 8082 instruct castP2X(iRegLNoSp dst, iRegP src) %{
 8083   match(Set dst (CastP2X src));
 8084 
 8085   ins_cost(INSN_COST);
 8086   format %{ "mov $dst, $src\t# ptr -> long" %}
 8087 
 8088   ins_encode %{
 8089     if ($dst$$reg != $src$$reg) {
 8090       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
 8091     }
 8092   %}
 8093 
 8094   ins_pipe(ialu_reg);
 8095 %}
 8096 
 8097 // Convert oop into int for vectors alignment masking
 8098 instruct convP2I(iRegINoSp dst, iRegP src) %{
 8099   match(Set dst (ConvL2I (CastP2X src)));
 8100 
 8101   ins_cost(INSN_COST);

15015 
15016   match(Set dst (MoveL2D src));
15017 
15018   effect(DEF dst, USE src);
15019 
15020   ins_cost(INSN_COST);
15021 
15022   format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
15023 
15024   ins_encode %{
15025     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
15026   %}
15027 
15028   ins_pipe(fp_l2d);
15029 
15030 %}
15031 
15032 // ============================================================================
15033 // clearing of an array
15034 
15035 instruct clearArray_reg_reg_immL0(iRegL_R11 cnt, iRegP_R10 base, immL0 zero, Universe dummy, rFlagsReg cr)
15036 %{
15037   match(Set dummy (ClearArray (Binary cnt base) zero));
15038   effect(USE_KILL cnt, USE_KILL base, KILL cr);
15039 
15040   ins_cost(4 * INSN_COST);
15041   format %{ "ClearArray $cnt, $base" %}
15042 
15043   ins_encode %{
15044     address tpc = __ zero_words($base$$Register, $cnt$$Register);
15045     if (tpc == nullptr) {
15046       ciEnv::current()->record_failure("CodeCache is full");
15047       return;
15048     }
15049   %}
15050 
15051   ins_pipe(pipe_class_memory);
15052 %}
15053 
15054 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, iRegL val, Universe dummy, rFlagsReg cr)
15055 %{
15056   predicate(((ClearArrayNode*)n)->word_copy_only());
15057   match(Set dummy (ClearArray (Binary cnt base) val));
15058   effect(USE_KILL cnt, USE_KILL base, KILL cr);
15059 
15060   ins_cost(4 * INSN_COST);
15061   format %{ "ClearArray $cnt, $base, $val" %}
15062 
15063   ins_encode %{
15064     __ fill_words($base$$Register, $cnt$$Register, $val$$Register);
15065   %}
15066 
15067   ins_pipe(pipe_class_memory);
15068 %}
15069 
15070 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 temp, Universe dummy, rFlagsReg cr)
15071 %{
15072   predicate((uint64_t)n->in(2)->get_long()
15073             < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord)
15074             && !((ClearArrayNode*)n)->word_copy_only());
15075   match(Set dummy (ClearArray cnt base));
15076   effect(TEMP temp, USE_KILL base, KILL cr);
15077 
15078   ins_cost(4 * INSN_COST);
15079   format %{ "ClearArray $cnt, $base" %}
15080 
15081   ins_encode %{
15082     address tpc = __ zero_words($base$$Register, (uint64_t)$cnt$$constant);
15083     if (tpc == nullptr) {
15084       ciEnv::current()->record_failure("CodeCache is full");
15085       return;
15086     }
15087   %}
15088 
15089   ins_pipe(pipe_class_memory);
15090 %}
15091 
15092 // ============================================================================
15093 // Overflow Math Instructions
15094 

16405 %}
16406 
16407 // Call Runtime Instruction without safepoint and with vector arguments
16408 instruct CallLeafDirectVector(method meth)
16409 %{
16410   match(CallLeafVector);
16411 
16412   effect(USE meth);
16413 
16414   ins_cost(CALL_COST);
16415 
16416   format %{ "CALL, runtime leaf vector $meth" %}
16417 
16418   ins_encode(aarch64_enc_java_to_runtime(meth));
16419 
16420   ins_pipe(pipe_class_call);
16421 %}
16422 
16423 // Call Runtime Instruction
16424 
16425 // entry point is null, target holds the address to call
16426 instruct CallLeafNoFPIndirect(iRegP target)
16427 %{
16428   predicate(n->as_Call()->entry_point() == nullptr);
16429 
16430   match(CallLeafNoFP target);
16431 
16432   ins_cost(CALL_COST);
16433 
16434   format %{ "CALL, runtime leaf nofp indirect $target" %}
16435 
16436   ins_encode %{
16437     __ blr($target$$Register);
16438   %}
16439 
16440   ins_pipe(pipe_class_call);
16441 %}
16442 
16443 instruct CallLeafNoFPDirect(method meth)
16444 %{
16445   predicate(n->as_Call()->entry_point() != nullptr);
16446 
16447   match(CallLeafNoFP);
16448 
16449   effect(USE meth);
16450 
16451   ins_cost(CALL_COST);
16452 
16453   format %{ "CALL, runtime leaf nofp $meth" %}
16454 
16455   ins_encode( aarch64_enc_java_to_runtime(meth) );
16456 
16457   ins_pipe(pipe_class_call);
16458 %}
16459 
16460 // Tail Call; Jump from runtime stub to Java code.
16461 // Also known as an 'interprocedural jump'.
16462 // Target of jump will eventually return to caller.
16463 // TailJump below removes the return address.
16464 // Don't use rfp for 'jump_target' because a MachEpilogNode has already been
16465 // emitted just above the TailCall which has reset rfp to the caller state.
16466 instruct TailCalljmpInd(iRegPNoSpNoRfp jump_target, inline_cache_RegP method_ptr)
< prev index next >