< prev index next >

src/hotspot/cpu/aarch64/aarch64.ad

Print this page

 1637 
 1638 int MachCallDynamicJavaNode::ret_addr_offset()
 1639 {
 1640   return 16; // movz, movk, movk, bl
 1641 }
 1642 
 1643 int MachCallRuntimeNode::ret_addr_offset() {
 1644   // for generated stubs the call will be
 1645   //   bl(addr)
 1646   // or with far branches
 1647   //   bl(trampoline_stub)
 1648   // for real runtime callouts it will be six instructions
 1649   // see aarch64_enc_java_to_runtime
 1650   //   adr(rscratch2, retaddr)
 1651   //   str(rscratch2, Address(rthread, JavaThread::last_Java_pc_offset()));
 1652   //   lea(rscratch1, RuntimeAddress(addr)
 1653   //   blr(rscratch1)
 1654   CodeBlob *cb = CodeCache::find_blob(_entry_point);
 1655   if (cb) {
 1656     return 1 * NativeInstruction::instruction_size;



 1657   } else {
 1658     return 6 * NativeInstruction::instruction_size;
 1659   }
 1660 }
 1661 
 1662 //=============================================================================
 1663 
 1664 #ifndef PRODUCT
 1665 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1666   st->print("BREAKPOINT");
 1667 }
 1668 #endif
 1669 
 1670 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1671   __ brk(0);
 1672 }
 1673 
 1674 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
 1675   return MachNode::size(ra_);
 1676 }

 1745   if (C->stub_function() == nullptr) {
 1746     st->print("\n\t");
 1747     st->print("ldr  rscratch1, [guard]\n\t");
 1748     st->print("dmb ishld\n\t");
 1749     st->print("ldr  rscratch2, [rthread, #thread_disarmed_guard_value_offset]\n\t");
 1750     st->print("cmp  rscratch1, rscratch2\n\t");
 1751     st->print("b.eq skip");
 1752     st->print("\n\t");
 1753     st->print("blr #nmethod_entry_barrier_stub\n\t");
 1754     st->print("b skip\n\t");
 1755     st->print("guard: int\n\t");
 1756     st->print("\n\t");
 1757     st->print("skip:\n\t");
 1758   }
 1759 }
 1760 #endif
 1761 
 1762 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1763   Compile* C = ra_->C;
 1764 
 1765   // n.b. frame size includes space for return pc and rfp
 1766   const int framesize = C->output()->frame_size_in_bytes();
 1767 
 1768   // insert a nop at the start of the prolog so we can patch in a
 1769   // branch if we need to invalidate the method later
 1770   __ nop();
 1771 
 1772   if (C->clinit_barrier_on_entry()) {
 1773     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
 1774 
 1775     Label L_skip_barrier;
 1776 
 1777     __ mov_metadata(rscratch2, C->method()->holder()->constant_encoding());
 1778     __ clinit_barrier(rscratch2, rscratch1, &L_skip_barrier);
 1779     __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
 1780     __ bind(L_skip_barrier);
 1781   }
 1782 
 1783   if (C->max_vector_size() > 0) {
 1784     __ reinitialize_ptrue();
 1785   }
 1786 
 1787   int bangsize = C->output()->bang_size_in_bytes();
 1788   if (C->output()->need_stack_bang(bangsize))
 1789     __ generate_stack_overflow_check(bangsize);
 1790 
 1791   __ build_frame(framesize);
 1792 
 1793   if (C->stub_function() == nullptr) {
 1794     BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 1795     // Dummy labels for just measuring the code size
 1796     Label dummy_slow_path;
 1797     Label dummy_continuation;
 1798     Label dummy_guard;
 1799     Label* slow_path = &dummy_slow_path;
 1800     Label* continuation = &dummy_continuation;
 1801     Label* guard = &dummy_guard;
 1802     if (!Compile::current()->output()->in_scratch_emit_size()) {
 1803       // Use real labels from actual stub when not emitting code for the purpose of measuring its size
 1804       C2EntryBarrierStub* stub = new (Compile::current()->comp_arena()) C2EntryBarrierStub();
 1805       Compile::current()->output()->add_stub(stub);
 1806       slow_path = &stub->entry();
 1807       continuation = &stub->continuation();
 1808       guard = &stub->guard();
 1809     }
 1810     // In the C2 code, we move the non-hot part of nmethod entry barriers out-of-line to a stub.
 1811     bs->nmethod_entry_barrier(masm, slow_path, continuation, guard);
 1812   }
 1813 
 1814   if (VerifyStackAtCalls) {
 1815     Unimplemented();
 1816   }
 1817 
 1818   C->output()->set_frame_complete(__ offset());
 1819 
 1820   if (C->has_mach_constant_base_node()) {
 1821     // NOTE: We set the table base offset here because users might be
 1822     // emitted before MachConstantBaseNode.
 1823     ConstantTable& constant_table = C->output()->constant_table();
 1824     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1825   }
 1826 }
 1827 
 1828 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 1829 {
 1830   return MachNode::size(ra_); // too many variables; just compute it
 1831                               // the hard way
 1832 }
 1833 
 1834 int MachPrologNode::reloc() const
 1835 {
 1836   return 0;
 1837 }
 1838 
 1839 //=============================================================================
 1840 
 1841 #ifndef PRODUCT
 1842 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1843   Compile* C = ra_->C;
 1844   int framesize = C->output()->frame_slots() << LogBytesPerInt;
 1845 
 1846   st->print("# pop frame %d\n\t",framesize);
 1847 
 1848   if (framesize == 0) {
 1849     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
 1850   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
 1851     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
 1852     st->print("add  sp, sp, #%d\n\t", framesize);
 1853   } else {

 1856     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
 1857   }
 1858   if (VM_Version::use_rop_protection()) {
 1859     st->print("autiaz\n\t");
 1860     st->print("ldr  zr, [lr]\n\t");
 1861   }
 1862 
 1863   if (do_polling() && C->is_method_compilation()) {
 1864     st->print("# test polling word\n\t");
 1865     st->print("ldr  rscratch1, [rthread],#%d\n\t", in_bytes(JavaThread::polling_word_offset()));
 1866     st->print("cmp  sp, rscratch1\n\t");
 1867     st->print("bhi #slow_path");
 1868   }
 1869 }
 1870 #endif
 1871 
 1872 void MachEpilogNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1873   Compile* C = ra_->C;
 1874   int framesize = C->output()->frame_slots() << LogBytesPerInt;
 1875 
 1876   __ remove_frame(framesize);
 1877 
 1878   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1879     __ reserved_stack_check();
 1880   }
 1881 
 1882   if (do_polling() && C->is_method_compilation()) {
 1883     Label dummy_label;
 1884     Label* code_stub = &dummy_label;
 1885     if (!C->output()->in_scratch_emit_size()) {
 1886       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1887       C->output()->add_stub(stub);
 1888       code_stub = &stub->entry();
 1889     }
 1890     __ relocate(relocInfo::poll_return_type);
 1891     __ safepoint_poll(*code_stub, true /* at_return */, false /* acquire */, true /* in_nmethod */);
 1892   }
 1893 }
 1894 
 1895 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 1896   // Variable size. Determine dynamically.
 1897   return MachNode::size(ra_);
 1898 }
 1899 
 1900 int MachEpilogNode::reloc() const {
 1901   // Return number of relocatable values contained in this instruction.
 1902   return 1; // 1 for polling page.
 1903 }
 1904 
 1905 const Pipeline * MachEpilogNode::pipeline() const {
 1906   return MachNode::pipeline_class();
 1907 }
 1908 
 1909 //=============================================================================
 1910 
 1911 static enum RC rc_class(OptoReg::Name reg) {
 1912 
 1913   if (reg == OptoReg::Bad) {
 1914     return rc_bad;
 1915   }
 1916 
 1917   // we have 32 int registers * 2 halves
 1918   int slots_of_int_registers = Register::number_of_registers * Register::max_slots_per_register;
 1919 

 2175 void BoxLockNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 2176   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2177   int reg    = ra_->get_encode(this);
 2178 
 2179   // This add will handle any 24-bit signed offset. 24 bits allows an
 2180   // 8 megabyte stack frame.
 2181   __ add(as_Register(reg), sp, offset);
 2182 }
 2183 
 2184 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
 2185   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
 2186   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2187 
 2188   if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
 2189     return NativeInstruction::instruction_size;
 2190   } else {
 2191     return 2 * NativeInstruction::instruction_size;
 2192   }
 2193 }
 2194 
 2195 //=============================================================================











 2196 































 2197 #ifndef PRODUCT
 2198 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2199 {
 2200   st->print_cr("# MachUEPNode");
 2201   if (UseCompressedClassPointers) {
 2202     st->print_cr("\tldrw rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2203     st->print_cr("\tldrw r10, [rscratch2 + CompiledICData::speculated_klass_offset()]\t# compressed klass");
 2204     st->print_cr("\tcmpw rscratch1, r10");
 2205   } else {
 2206     st->print_cr("\tldr rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2207     st->print_cr("\tldr r10, [rscratch2 + CompiledICData::speculated_klass_offset()]\t# compressed klass");
 2208     st->print_cr("\tcmp rscratch1, r10");
 2209   }
 2210   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
 2211 }
 2212 #endif
 2213 
 2214 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2215 {
 2216   __ ic_check(InteriorEntryAlignment);
 2217 }
 2218 
 2219 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 2220 {
 2221   return MachNode::size(ra_);
 2222 }
 2223 
 2224 // REQUIRED EMIT CODE
 2225 
 2226 //=============================================================================
 2227 
 2228 // Emit exception handler code.
 2229 int HandlerImpl::emit_exception_handler(C2_MacroAssembler* masm)
 2230 {
 2231   // mov rscratch1 #exception_blob_entry_point
 2232   // br rscratch1
 2233   // Note that the code buffer's insts_mark is always relative to insts.
 2234   // That's why we must use the macroassembler to generate a handler.
 2235   address base = __ start_a_stub(size_exception_handler());
 2236   if (base == nullptr) {
 2237     ciEnv::current()->record_failure("CodeCache is full");
 2238     return 0;  // CodeBuffer::expand failed
 2239   }
 2240   int offset = __ offset();
 2241   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
 2242   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
 2243   __ end_a_stub();

 3696   %}
 3697 
 3698   enc_class aarch64_enc_java_dynamic_call(method meth) %{
 3699     int method_index = resolved_method_index(masm);
 3700     address call = __ ic_call((address)$meth$$method, method_index);
 3701     if (call == nullptr) {
 3702       ciEnv::current()->record_failure("CodeCache is full");
 3703       return;
 3704     }
 3705     __ post_call_nop();
 3706     if (Compile::current()->max_vector_size() > 0) {
 3707       __ reinitialize_ptrue();
 3708     }
 3709   %}
 3710 
 3711   enc_class aarch64_enc_call_epilog() %{
 3712     if (VerifyStackAtCalls) {
 3713       // Check that stack depth is unchanged: find majik cookie on stack
 3714       __ call_Unimplemented();
 3715     }































 3716   %}
 3717 
 3718   enc_class aarch64_enc_java_to_runtime(method meth) %{
 3719     // some calls to generated routines (arraycopy code) are scheduled
 3720     // by C2 as runtime calls. if so we can call them using a br (they
 3721     // will be in a reachable segment) otherwise we have to use a blr
 3722     // which loads the absolute address into a register.
 3723     address entry = (address)$meth$$method;
 3724     CodeBlob *cb = CodeCache::find_blob(entry);
 3725     if (cb) {
 3726       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
 3727       if (call == nullptr) {
 3728         ciEnv::current()->record_failure("CodeCache is full");
 3729         return;
 3730       }
 3731       __ post_call_nop();
 3732     } else {
 3733       Label retaddr;
 3734       // Make the anchor frame walkable
 3735       __ adr(rscratch2, retaddr);

 6801 instruct loadConL(iRegLNoSp dst, immL src)
 6802 %{
 6803   match(Set dst src);
 6804 
 6805   ins_cost(INSN_COST);
 6806   format %{ "mov $dst, $src\t# long" %}
 6807 
 6808   ins_encode( aarch64_enc_mov_imm(dst, src) );
 6809 
 6810   ins_pipe(ialu_imm);
 6811 %}
 6812 
 6813 // Load Pointer Constant
 6814 
 6815 instruct loadConP(iRegPNoSp dst, immP con)
 6816 %{
 6817   match(Set dst con);
 6818 
 6819   ins_cost(INSN_COST * 4);
 6820   format %{
 6821     "mov  $dst, $con\t# ptr\n\t"
 6822   %}
 6823 
 6824   ins_encode(aarch64_enc_mov_p(dst, con));
 6825 
 6826   ins_pipe(ialu_imm);
 6827 %}
 6828 
 6829 // Load Null Pointer Constant
 6830 
 6831 instruct loadConP0(iRegPNoSp dst, immP0 con)
 6832 %{
 6833   match(Set dst con);
 6834 
 6835   ins_cost(INSN_COST);
 6836   format %{ "mov  $dst, $con\t# nullptr ptr" %}
 6837 
 6838   ins_encode(aarch64_enc_mov_p0(dst, con));
 6839 
 6840   ins_pipe(ialu_imm);
 6841 %}

 8012 %}
 8013 
 8014 // ============================================================================
 8015 // Cast/Convert Instructions
 8016 
 8017 instruct castX2P(iRegPNoSp dst, iRegL src) %{
 8018   match(Set dst (CastX2P src));
 8019 
 8020   ins_cost(INSN_COST);
 8021   format %{ "mov $dst, $src\t# long -> ptr" %}
 8022 
 8023   ins_encode %{
 8024     if ($dst$$reg != $src$$reg) {
 8025       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
 8026     }
 8027   %}
 8028 
 8029   ins_pipe(ialu_reg);
 8030 %}
 8031 






























 8032 instruct castP2X(iRegLNoSp dst, iRegP src) %{
 8033   match(Set dst (CastP2X src));
 8034 
 8035   ins_cost(INSN_COST);
 8036   format %{ "mov $dst, $src\t# ptr -> long" %}
 8037 
 8038   ins_encode %{
 8039     if ($dst$$reg != $src$$reg) {
 8040       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
 8041     }
 8042   %}
 8043 
 8044   ins_pipe(ialu_reg);
 8045 %}
 8046 
 8047 // Convert oop into int for vectors alignment masking
 8048 instruct convP2I(iRegINoSp dst, iRegP src) %{
 8049   match(Set dst (ConvL2I (CastP2X src)));
 8050 
 8051   ins_cost(INSN_COST);

14955 
14956   match(Set dst (MoveL2D src));
14957 
14958   effect(DEF dst, USE src);
14959 
14960   ins_cost(INSN_COST);
14961 
14962   format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
14963 
14964   ins_encode %{
14965     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
14966   %}
14967 
14968   ins_pipe(fp_l2d);
14969 
14970 %}
14971 
14972 // ============================================================================
14973 // clearing of an array
14974 
14975 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
14976 %{
14977   match(Set dummy (ClearArray cnt base));
14978   effect(USE_KILL cnt, USE_KILL base, KILL cr);
14979 
14980   ins_cost(4 * INSN_COST);
14981   format %{ "ClearArray $cnt, $base" %}
14982 
14983   ins_encode %{
14984     address tpc = __ zero_words($base$$Register, $cnt$$Register);
14985     if (tpc == nullptr) {
14986       ciEnv::current()->record_failure("CodeCache is full");
14987       return;
14988     }
14989   %}
14990 
14991   ins_pipe(pipe_class_memory);
14992 %}
14993 
















14994 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 temp, Universe dummy, rFlagsReg cr)
14995 %{
14996   predicate((uint64_t)n->in(2)->get_long()
14997             < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord));

14998   match(Set dummy (ClearArray cnt base));
14999   effect(TEMP temp, USE_KILL base, KILL cr);
15000 
15001   ins_cost(4 * INSN_COST);
15002   format %{ "ClearArray $cnt, $base" %}
15003 
15004   ins_encode %{
15005     address tpc = __ zero_words($base$$Register, (uint64_t)$cnt$$constant);
15006     if (tpc == nullptr) {
15007       ciEnv::current()->record_failure("CodeCache is full");
15008       return;
15009     }
15010   %}
15011 
15012   ins_pipe(pipe_class_memory);
15013 %}
15014 
15015 // ============================================================================
15016 // Overflow Math Instructions
15017 

16328 %}
16329 
16330 // Call Runtime Instruction without safepoint and with vector arguments
16331 instruct CallLeafDirectVector(method meth)
16332 %{
16333   match(CallLeafVector);
16334 
16335   effect(USE meth);
16336 
16337   ins_cost(CALL_COST);
16338 
16339   format %{ "CALL, runtime leaf vector $meth" %}
16340 
16341   ins_encode(aarch64_enc_java_to_runtime(meth));
16342 
16343   ins_pipe(pipe_class_call);
16344 %}
16345 
16346 // Call Runtime Instruction
16347 


















16348 instruct CallLeafNoFPDirect(method meth)
16349 %{


16350   match(CallLeafNoFP);
16351 
16352   effect(USE meth);
16353 
16354   ins_cost(CALL_COST);
16355 
16356   format %{ "CALL, runtime leaf nofp $meth" %}
16357 
16358   ins_encode( aarch64_enc_java_to_runtime(meth) );
16359 
16360   ins_pipe(pipe_class_call);
16361 %}
16362 
16363 // Tail Call; Jump from runtime stub to Java code.
16364 // Also known as an 'interprocedural jump'.
16365 // Target of jump will eventually return to caller.
16366 // TailJump below removes the return address.
16367 // Don't use rfp for 'jump_target' because a MachEpilogNode has already been
16368 // emitted just above the TailCall which has reset rfp to the caller state.
16369 instruct TailCalljmpInd(iRegPNoSpNoRfp jump_target, inline_cache_RegP method_ptr)

 1637 
 1638 int MachCallDynamicJavaNode::ret_addr_offset()
 1639 {
 1640   return 16; // movz, movk, movk, bl
 1641 }
 1642 
 1643 int MachCallRuntimeNode::ret_addr_offset() {
 1644   // for generated stubs the call will be
 1645   //   bl(addr)
 1646   // or with far branches
 1647   //   bl(trampoline_stub)
 1648   // for real runtime callouts it will be six instructions
 1649   // see aarch64_enc_java_to_runtime
 1650   //   adr(rscratch2, retaddr)
 1651   //   str(rscratch2, Address(rthread, JavaThread::last_Java_pc_offset()));
 1652   //   lea(rscratch1, RuntimeAddress(addr)
 1653   //   blr(rscratch1)
 1654   CodeBlob *cb = CodeCache::find_blob(_entry_point);
 1655   if (cb) {
 1656     return 1 * NativeInstruction::instruction_size;
 1657   } else if (_entry_point == nullptr) {
 1658     // See CallLeafNoFPIndirect
 1659     return 1 * NativeInstruction::instruction_size;
 1660   } else {
 1661     return 6 * NativeInstruction::instruction_size;
 1662   }
 1663 }
 1664 
 1665 //=============================================================================
 1666 
 1667 #ifndef PRODUCT
 1668 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1669   st->print("BREAKPOINT");
 1670 }
 1671 #endif
 1672 
 1673 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1674   __ brk(0);
 1675 }
 1676 
 1677 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
 1678   return MachNode::size(ra_);
 1679 }

 1748   if (C->stub_function() == nullptr) {
 1749     st->print("\n\t");
 1750     st->print("ldr  rscratch1, [guard]\n\t");
 1751     st->print("dmb ishld\n\t");
 1752     st->print("ldr  rscratch2, [rthread, #thread_disarmed_guard_value_offset]\n\t");
 1753     st->print("cmp  rscratch1, rscratch2\n\t");
 1754     st->print("b.eq skip");
 1755     st->print("\n\t");
 1756     st->print("blr #nmethod_entry_barrier_stub\n\t");
 1757     st->print("b skip\n\t");
 1758     st->print("guard: int\n\t");
 1759     st->print("\n\t");
 1760     st->print("skip:\n\t");
 1761   }
 1762 }
 1763 #endif
 1764 
 1765 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1766   Compile* C = ra_->C;
 1767 



 1768   // insert a nop at the start of the prolog so we can patch in a
 1769   // branch if we need to invalidate the method later
 1770   __ nop();
 1771 
 1772   __ verified_entry(C, 0);



 1773 
 1774   if (C->stub_function() == nullptr) {
 1775     __ entry_barrier();






 1776   }
 1777 
 1778   if (!Compile::current()->output()->in_scratch_emit_size()) {
 1779     __ bind(*_verified_entry);























 1780   }
 1781 
 1782   if (VerifyStackAtCalls) {
 1783     Unimplemented();
 1784   }
 1785 
 1786   C->output()->set_frame_complete(__ offset());
 1787 
 1788   if (C->has_mach_constant_base_node()) {
 1789     // NOTE: We set the table base offset here because users might be
 1790     // emitted before MachConstantBaseNode.
 1791     ConstantTable& constant_table = C->output()->constant_table();
 1792     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1793   }
 1794 }
 1795 






 1796 int MachPrologNode::reloc() const
 1797 {
 1798   return 0;
 1799 }
 1800 
 1801 //=============================================================================
 1802 
 1803 #ifndef PRODUCT
 1804 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1805   Compile* C = ra_->C;
 1806   int framesize = C->output()->frame_slots() << LogBytesPerInt;
 1807 
 1808   st->print("# pop frame %d\n\t",framesize);
 1809 
 1810   if (framesize == 0) {
 1811     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
 1812   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
 1813     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
 1814     st->print("add  sp, sp, #%d\n\t", framesize);
 1815   } else {

 1818     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
 1819   }
 1820   if (VM_Version::use_rop_protection()) {
 1821     st->print("autiaz\n\t");
 1822     st->print("ldr  zr, [lr]\n\t");
 1823   }
 1824 
 1825   if (do_polling() && C->is_method_compilation()) {
 1826     st->print("# test polling word\n\t");
 1827     st->print("ldr  rscratch1, [rthread],#%d\n\t", in_bytes(JavaThread::polling_word_offset()));
 1828     st->print("cmp  sp, rscratch1\n\t");
 1829     st->print("bhi #slow_path");
 1830   }
 1831 }
 1832 #endif
 1833 
 1834 void MachEpilogNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1835   Compile* C = ra_->C;
 1836   int framesize = C->output()->frame_slots() << LogBytesPerInt;
 1837 
 1838   __ remove_frame(framesize, C->needs_stack_repair());
 1839 
 1840   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1841     __ reserved_stack_check();
 1842   }
 1843 
 1844   if (do_polling() && C->is_method_compilation()) {
 1845     Label dummy_label;
 1846     Label* code_stub = &dummy_label;
 1847     if (!C->output()->in_scratch_emit_size()) {
 1848       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1849       C->output()->add_stub(stub);
 1850       code_stub = &stub->entry();
 1851     }
 1852     __ relocate(relocInfo::poll_return_type);
 1853     __ safepoint_poll(*code_stub, true /* at_return */, false /* acquire */, true /* in_nmethod */);
 1854   }
 1855 }
 1856 





 1857 int MachEpilogNode::reloc() const {
 1858   // Return number of relocatable values contained in this instruction.
 1859   return 1; // 1 for polling page.
 1860 }
 1861 
 1862 const Pipeline * MachEpilogNode::pipeline() const {
 1863   return MachNode::pipeline_class();
 1864 }
 1865 
 1866 //=============================================================================
 1867 
 1868 static enum RC rc_class(OptoReg::Name reg) {
 1869 
 1870   if (reg == OptoReg::Bad) {
 1871     return rc_bad;
 1872   }
 1873 
 1874   // we have 32 int registers * 2 halves
 1875   int slots_of_int_registers = Register::number_of_registers * Register::max_slots_per_register;
 1876 

 2132 void BoxLockNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 2133   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2134   int reg    = ra_->get_encode(this);
 2135 
 2136   // This add will handle any 24-bit signed offset. 24 bits allows an
 2137   // 8 megabyte stack frame.
 2138   __ add(as_Register(reg), sp, offset);
 2139 }
 2140 
 2141 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
 2142   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
 2143   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2144 
 2145   if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
 2146     return NativeInstruction::instruction_size;
 2147   } else {
 2148     return 2 * NativeInstruction::instruction_size;
 2149   }
 2150 }
 2151 
 2152 ///=============================================================================
 2153 #ifndef PRODUCT
 2154 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2155 {
 2156   st->print_cr("# MachVEPNode");
 2157   if (!_verified) {
 2158     st->print_cr("\t load_class");
 2159   } else {
 2160     st->print_cr("\t unpack_inline_arg");
 2161   }
 2162 }
 2163 #endif
 2164 
 2165 void MachVEPNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const
 2166 {
 2167   if (!_verified) {
 2168     __ ic_check(1);
 2169   } else {
 2170     // insert a nop at the start of the prolog so we can patch in a
 2171     // branch if we need to invalidate the method later
 2172     __ nop();
 2173 
 2174     // TODO 8284443 Avoid creation of temporary frame
 2175     if (ra_->C->stub_function() == nullptr) {
 2176       __ verified_entry(ra_->C, 0);
 2177       __ entry_barrier();
 2178       int framesize = ra_->C->output()->frame_slots() << LogBytesPerInt;
 2179       __ remove_frame(framesize, false);
 2180     }
 2181     // Unpack inline type args passed as oop and then jump to
 2182     // the verified entry point (skipping the unverified entry).
 2183     int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
 2184     // Emit code for verified entry and save increment for stack repair on return
 2185     __ verified_entry(ra_->C, sp_inc);
 2186     if (Compile::current()->output()->in_scratch_emit_size()) {
 2187       Label dummy_verified_entry;
 2188       __ b(dummy_verified_entry);
 2189     } else {
 2190       __ b(*_verified_entry);
 2191     }
 2192   }
 2193 }
 2194 
 2195 //=============================================================================
 2196 #ifndef PRODUCT
 2197 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2198 {
 2199   st->print_cr("# MachUEPNode");
 2200   if (UseCompressedClassPointers) {
 2201     st->print_cr("\tldrw rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2202     st->print_cr("\tldrw r10, [rscratch2 + CompiledICData::speculated_klass_offset()]\t# compressed klass");
 2203     st->print_cr("\tcmpw rscratch1, r10");
 2204   } else {
 2205     st->print_cr("\tldr rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2206     st->print_cr("\tldr r10, [rscratch2 + CompiledICData::speculated_klass_offset()]\t# compressed klass");
 2207     st->print_cr("\tcmp rscratch1, r10");
 2208   }
 2209   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
 2210 }
 2211 #endif
 2212 
 2213 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2214 {
 2215   __ ic_check(InteriorEntryAlignment);
 2216 }
 2217 





 2218 // REQUIRED EMIT CODE
 2219 
 2220 //=============================================================================
 2221 
 2222 // Emit exception handler code.
 2223 int HandlerImpl::emit_exception_handler(C2_MacroAssembler* masm)
 2224 {
 2225   // mov rscratch1 #exception_blob_entry_point
 2226   // br rscratch1
 2227   // Note that the code buffer's insts_mark is always relative to insts.
 2228   // That's why we must use the macroassembler to generate a handler.
 2229   address base = __ start_a_stub(size_exception_handler());
 2230   if (base == nullptr) {
 2231     ciEnv::current()->record_failure("CodeCache is full");
 2232     return 0;  // CodeBuffer::expand failed
 2233   }
 2234   int offset = __ offset();
 2235   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
 2236   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
 2237   __ end_a_stub();

 3690   %}
 3691 
 3692   enc_class aarch64_enc_java_dynamic_call(method meth) %{
 3693     int method_index = resolved_method_index(masm);
 3694     address call = __ ic_call((address)$meth$$method, method_index);
 3695     if (call == nullptr) {
 3696       ciEnv::current()->record_failure("CodeCache is full");
 3697       return;
 3698     }
 3699     __ post_call_nop();
 3700     if (Compile::current()->max_vector_size() > 0) {
 3701       __ reinitialize_ptrue();
 3702     }
 3703   %}
 3704 
 3705   enc_class aarch64_enc_call_epilog() %{
 3706     if (VerifyStackAtCalls) {
 3707       // Check that stack depth is unchanged: find majik cookie on stack
 3708       __ call_Unimplemented();
 3709     }
 3710     if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic()) {
 3711       // The last return value is not set by the callee but used to pass the null marker to compiled code.
 3712       // Search for the corresponding projection, get the register and emit code that initialized it.
 3713       uint con = (tf()->range_cc()->cnt() - 1);
 3714       for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
 3715         ProjNode* proj = fast_out(i)->as_Proj();
 3716         if (proj->_con == con) {
 3717           // Set null marker if r0 is non-null (a non-null value is returned buffered or scalarized)
 3718           OptoReg::Name optoReg = ra_->get_reg_first(proj);
 3719           VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
 3720           Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
 3721           __ cmp(r0, zr);
 3722           __ cset(toReg, Assembler::NE);
 3723           if (reg->is_stack()) {
 3724             int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
 3725             __ str(toReg, Address(sp, st_off));
 3726           }
 3727           break;
 3728         }
 3729       }
 3730       if (return_value_is_used()) {
 3731         // An inline type is returned as fields in multiple registers.
 3732         // R0 either contains an oop if the inline type is buffered or a pointer
 3733         // to the corresponding InlineKlass with the lowest bit set to 1. Zero r0
 3734         // if the lowest bit is set to allow C2 to use the oop after null checking.
 3735         // r0 &= (r0 & 1) - 1
 3736         __ andr(rscratch1, r0, 0x1);
 3737         __ sub(rscratch1, rscratch1, 0x1);
 3738         __ andr(r0, r0, rscratch1);
 3739       }
 3740     }
 3741   %}
 3742 
 3743   enc_class aarch64_enc_java_to_runtime(method meth) %{
 3744     // some calls to generated routines (arraycopy code) are scheduled
 3745     // by C2 as runtime calls. if so we can call them using a br (they
 3746     // will be in a reachable segment) otherwise we have to use a blr
 3747     // which loads the absolute address into a register.
 3748     address entry = (address)$meth$$method;
 3749     CodeBlob *cb = CodeCache::find_blob(entry);
 3750     if (cb) {
 3751       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
 3752       if (call == nullptr) {
 3753         ciEnv::current()->record_failure("CodeCache is full");
 3754         return;
 3755       }
 3756       __ post_call_nop();
 3757     } else {
 3758       Label retaddr;
 3759       // Make the anchor frame walkable
 3760       __ adr(rscratch2, retaddr);

 6826 instruct loadConL(iRegLNoSp dst, immL src)
 6827 %{
 6828   match(Set dst src);
 6829 
 6830   ins_cost(INSN_COST);
 6831   format %{ "mov $dst, $src\t# long" %}
 6832 
 6833   ins_encode( aarch64_enc_mov_imm(dst, src) );
 6834 
 6835   ins_pipe(ialu_imm);
 6836 %}
 6837 
 6838 // Load Pointer Constant
 6839 
 6840 instruct loadConP(iRegPNoSp dst, immP con)
 6841 %{
 6842   match(Set dst con);
 6843 
 6844   ins_cost(INSN_COST * 4);
 6845   format %{
 6846     "mov  $dst, $con\t# ptr"
 6847   %}
 6848 
 6849   ins_encode(aarch64_enc_mov_p(dst, con));
 6850 
 6851   ins_pipe(ialu_imm);
 6852 %}
 6853 
 6854 // Load Null Pointer Constant
 6855 
 6856 instruct loadConP0(iRegPNoSp dst, immP0 con)
 6857 %{
 6858   match(Set dst con);
 6859 
 6860   ins_cost(INSN_COST);
 6861   format %{ "mov  $dst, $con\t# nullptr ptr" %}
 6862 
 6863   ins_encode(aarch64_enc_mov_p0(dst, con));
 6864 
 6865   ins_pipe(ialu_imm);
 6866 %}

 8037 %}
 8038 
 8039 // ============================================================================
 8040 // Cast/Convert Instructions
 8041 
 8042 instruct castX2P(iRegPNoSp dst, iRegL src) %{
 8043   match(Set dst (CastX2P src));
 8044 
 8045   ins_cost(INSN_COST);
 8046   format %{ "mov $dst, $src\t# long -> ptr" %}
 8047 
 8048   ins_encode %{
 8049     if ($dst$$reg != $src$$reg) {
 8050       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
 8051     }
 8052   %}
 8053 
 8054   ins_pipe(ialu_reg);
 8055 %}
 8056 
 8057 instruct castI2N(iRegNNoSp dst, iRegI src) %{
 8058   match(Set dst (CastI2N src));
 8059 
 8060   ins_cost(INSN_COST);
 8061   format %{ "mov $dst, $src\t# int -> narrow ptr" %}
 8062 
 8063   ins_encode %{
 8064     if ($dst$$reg != $src$$reg) {
 8065       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
 8066     }
 8067   %}
 8068 
 8069   ins_pipe(ialu_reg);
 8070 %}
 8071 
 8072 instruct castN2X(iRegLNoSp dst, iRegN src) %{
 8073   match(Set dst (CastP2X src));
 8074 
 8075   ins_cost(INSN_COST);
 8076   format %{ "mov $dst, $src\t# ptr -> long" %}
 8077 
 8078   ins_encode %{
 8079     if ($dst$$reg != $src$$reg) {
 8080       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
 8081     }
 8082   %}
 8083 
 8084   ins_pipe(ialu_reg);
 8085 %}
 8086 
 8087 instruct castP2X(iRegLNoSp dst, iRegP src) %{
 8088   match(Set dst (CastP2X src));
 8089 
 8090   ins_cost(INSN_COST);
 8091   format %{ "mov $dst, $src\t# ptr -> long" %}
 8092 
 8093   ins_encode %{
 8094     if ($dst$$reg != $src$$reg) {
 8095       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
 8096     }
 8097   %}
 8098 
 8099   ins_pipe(ialu_reg);
 8100 %}
 8101 
 8102 // Convert oop into int for vectors alignment masking
 8103 instruct convP2I(iRegINoSp dst, iRegP src) %{
 8104   match(Set dst (ConvL2I (CastP2X src)));
 8105 
 8106   ins_cost(INSN_COST);

15010 
15011   match(Set dst (MoveL2D src));
15012 
15013   effect(DEF dst, USE src);
15014 
15015   ins_cost(INSN_COST);
15016 
15017   format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
15018 
15019   ins_encode %{
15020     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
15021   %}
15022 
15023   ins_pipe(fp_l2d);
15024 
15025 %}
15026 
15027 // ============================================================================
15028 // clearing of an array
15029 
15030 instruct clearArray_reg_reg_immL0(iRegL_R11 cnt, iRegP_R10 base, immL0 zero, Universe dummy, rFlagsReg cr)
15031 %{
15032   match(Set dummy (ClearArray (Binary cnt base) zero));
15033   effect(USE_KILL cnt, USE_KILL base, KILL cr);
15034 
15035   ins_cost(4 * INSN_COST);
15036   format %{ "ClearArray $cnt, $base" %}
15037 
15038   ins_encode %{
15039     address tpc = __ zero_words($base$$Register, $cnt$$Register);
15040     if (tpc == nullptr) {
15041       ciEnv::current()->record_failure("CodeCache is full");
15042       return;
15043     }
15044   %}
15045 
15046   ins_pipe(pipe_class_memory);
15047 %}
15048 
15049 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, iRegL val, Universe dummy, rFlagsReg cr)
15050 %{
15051   predicate(((ClearArrayNode*)n)->word_copy_only());
15052   match(Set dummy (ClearArray (Binary cnt base) val));
15053   effect(USE_KILL cnt, USE_KILL base, KILL cr);
15054 
15055   ins_cost(4 * INSN_COST);
15056   format %{ "ClearArray $cnt, $base, $val" %}
15057 
15058   ins_encode %{
15059     __ fill_words($base$$Register, $cnt$$Register, $val$$Register);
15060   %}
15061 
15062   ins_pipe(pipe_class_memory);
15063 %}
15064 
15065 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 temp, Universe dummy, rFlagsReg cr)
15066 %{
15067   predicate((uint64_t)n->in(2)->get_long()
15068             < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord)
15069             && !((ClearArrayNode*)n)->word_copy_only());
15070   match(Set dummy (ClearArray cnt base));
15071   effect(TEMP temp, USE_KILL base, KILL cr);
15072 
15073   ins_cost(4 * INSN_COST);
15074   format %{ "ClearArray $cnt, $base" %}
15075 
15076   ins_encode %{
15077     address tpc = __ zero_words($base$$Register, (uint64_t)$cnt$$constant);
15078     if (tpc == nullptr) {
15079       ciEnv::current()->record_failure("CodeCache is full");
15080       return;
15081     }
15082   %}
15083 
15084   ins_pipe(pipe_class_memory);
15085 %}
15086 
15087 // ============================================================================
15088 // Overflow Math Instructions
15089 

16400 %}
16401 
16402 // Call Runtime Instruction without safepoint and with vector arguments
16403 instruct CallLeafDirectVector(method meth)
16404 %{
16405   match(CallLeafVector);
16406 
16407   effect(USE meth);
16408 
16409   ins_cost(CALL_COST);
16410 
16411   format %{ "CALL, runtime leaf vector $meth" %}
16412 
16413   ins_encode(aarch64_enc_java_to_runtime(meth));
16414 
16415   ins_pipe(pipe_class_call);
16416 %}
16417 
16418 // Call Runtime Instruction
16419 
16420 // entry point is null, target holds the address to call
16421 instruct CallLeafNoFPIndirect(iRegP target)
16422 %{
16423   predicate(n->as_Call()->entry_point() == nullptr);
16424 
16425   match(CallLeafNoFP target);
16426 
16427   ins_cost(CALL_COST);
16428 
16429   format %{ "CALL, runtime leaf nofp indirect $target" %}
16430 
16431   ins_encode %{
16432     __ blr($target$$Register);
16433   %}
16434 
16435   ins_pipe(pipe_class_call);
16436 %}
16437 
16438 instruct CallLeafNoFPDirect(method meth)
16439 %{
16440   predicate(n->as_Call()->entry_point() != nullptr);
16441 
16442   match(CallLeafNoFP);
16443 
16444   effect(USE meth);
16445 
16446   ins_cost(CALL_COST);
16447 
16448   format %{ "CALL, runtime leaf nofp $meth" %}
16449 
16450   ins_encode( aarch64_enc_java_to_runtime(meth) );
16451 
16452   ins_pipe(pipe_class_call);
16453 %}
16454 
16455 // Tail Call; Jump from runtime stub to Java code.
16456 // Also known as an 'interprocedural jump'.
16457 // Target of jump will eventually return to caller.
16458 // TailJump below removes the return address.
16459 // Don't use rfp for 'jump_target' because a MachEpilogNode has already been
16460 // emitted just above the TailCall which has reset rfp to the caller state.
16461 instruct TailCalljmpInd(iRegPNoSpNoRfp jump_target, inline_cache_RegP method_ptr)
< prev index next >