< prev index next >

src/hotspot/cpu/aarch64/aarch64.ad

Print this page

 1631 
 1632 int MachCallDynamicJavaNode::ret_addr_offset()
 1633 {
 1634   return 16; // movz, movk, movk, bl
 1635 }
 1636 
 1637 int MachCallRuntimeNode::ret_addr_offset() {
 1638   // for generated stubs the call will be
 1639   //   bl(addr)
 1640   // or with far branches
 1641   //   bl(trampoline_stub)
 1642   // for real runtime callouts it will be six instructions
 1643   // see aarch64_enc_java_to_runtime
 1644   //   adr(rscratch2, retaddr)
 1645   //   lea(rscratch1, RuntimeAddress(addr)
 1646   //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
 1647   //   blr(rscratch1)
 1648   CodeBlob *cb = CodeCache::find_blob(_entry_point);
 1649   if (cb) {
 1650     return 1 * NativeInstruction::instruction_size;



 1651   } else {
 1652     return 6 * NativeInstruction::instruction_size;
 1653   }
 1654 }
 1655 
 1656 //=============================================================================
 1657 
 1658 #ifndef PRODUCT
 1659 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1660   st->print("BREAKPOINT");
 1661 }
 1662 #endif
 1663 
 1664 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1665   __ brk(0);
 1666 }
 1667 
 1668 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
 1669   return MachNode::size(ra_);
 1670 }

 1739   if (C->stub_function() == nullptr && BarrierSet::barrier_set()->barrier_set_nmethod() != nullptr) {
 1740     st->print("\n\t");
 1741     st->print("ldr  rscratch1, [guard]\n\t");
 1742     st->print("dmb ishld\n\t");
 1743     st->print("ldr  rscratch2, [rthread, #thread_disarmed_guard_value_offset]\n\t");
 1744     st->print("cmp  rscratch1, rscratch2\n\t");
 1745     st->print("b.eq skip");
 1746     st->print("\n\t");
 1747     st->print("blr #nmethod_entry_barrier_stub\n\t");
 1748     st->print("b skip\n\t");
 1749     st->print("guard: int\n\t");
 1750     st->print("\n\t");
 1751     st->print("skip:\n\t");
 1752   }
 1753 }
 1754 #endif
 1755 
 1756 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1757   Compile* C = ra_->C;
 1758 
 1759   // n.b. frame size includes space for return pc and rfp
 1760   const int framesize = C->output()->frame_size_in_bytes();
 1761 
 1762   // insert a nop at the start of the prolog so we can patch in a
 1763   // branch if we need to invalidate the method later
 1764   __ nop();
 1765 
 1766   if (C->clinit_barrier_on_entry()) {
 1767     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
 1768 
 1769     Label L_skip_barrier;
 1770 
 1771     __ mov_metadata(rscratch2, C->method()->holder()->constant_encoding());
 1772     __ clinit_barrier(rscratch2, rscratch1, &L_skip_barrier);
 1773     __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
 1774     __ bind(L_skip_barrier);
 1775   }
 1776 
 1777   if (C->max_vector_size() > 0) {
 1778     __ reinitialize_ptrue();
 1779   }
 1780 
 1781   int bangsize = C->output()->bang_size_in_bytes();
 1782   if (C->output()->need_stack_bang(bangsize))
 1783     __ generate_stack_overflow_check(bangsize);
 1784 
 1785   __ build_frame(framesize);
 1786 
 1787   if (C->stub_function() == nullptr) {
 1788     BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 1789     if (BarrierSet::barrier_set()->barrier_set_nmethod() != nullptr) {
 1790       // Dummy labels for just measuring the code size
 1791       Label dummy_slow_path;
 1792       Label dummy_continuation;
 1793       Label dummy_guard;
 1794       Label* slow_path = &dummy_slow_path;
 1795       Label* continuation = &dummy_continuation;
 1796       Label* guard = &dummy_guard;
 1797       if (!Compile::current()->output()->in_scratch_emit_size()) {
 1798         // Use real labels from actual stub when not emitting code for the purpose of measuring its size
 1799         C2EntryBarrierStub* stub = new (Compile::current()->comp_arena()) C2EntryBarrierStub();
 1800         Compile::current()->output()->add_stub(stub);
 1801         slow_path = &stub->entry();
 1802         continuation = &stub->continuation();
 1803         guard = &stub->guard();
 1804       }
 1805       // In the C2 code, we move the non-hot part of nmethod entry barriers out-of-line to a stub.
 1806       bs->nmethod_entry_barrier(masm, slow_path, continuation, guard);
 1807     }
 1808   }
 1809 
 1810   if (VerifyStackAtCalls) {
 1811     Unimplemented();
 1812   }
 1813 
 1814   C->output()->set_frame_complete(__ offset());
 1815 
 1816   if (C->has_mach_constant_base_node()) {
 1817     // NOTE: We set the table base offset here because users might be
 1818     // emitted before MachConstantBaseNode.
 1819     ConstantTable& constant_table = C->output()->constant_table();
 1820     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1821   }
 1822 }
 1823 
 1824 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 1825 {
 1826   return MachNode::size(ra_); // too many variables; just compute it
 1827                               // the hard way
 1828 }
 1829 
 1830 int MachPrologNode::reloc() const
 1831 {
 1832   return 0;
 1833 }
 1834 
 1835 //=============================================================================
 1836 
 1837 #ifndef PRODUCT
 1838 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1839   Compile* C = ra_->C;
 1840   int framesize = C->output()->frame_slots() << LogBytesPerInt;
 1841 
 1842   st->print("# pop frame %d\n\t",framesize);
 1843 
 1844   if (framesize == 0) {
 1845     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
 1846   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
 1847     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
 1848     st->print("add  sp, sp, #%d\n\t", framesize);
 1849   } else {

 1852     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
 1853   }
 1854   if (VM_Version::use_rop_protection()) {
 1855     st->print("autiaz\n\t");
 1856     st->print("ldr  zr, [lr]\n\t");
 1857   }
 1858 
 1859   if (do_polling() && C->is_method_compilation()) {
 1860     st->print("# test polling word\n\t");
 1861     st->print("ldr  rscratch1, [rthread],#%d\n\t", in_bytes(JavaThread::polling_word_offset()));
 1862     st->print("cmp  sp, rscratch1\n\t");
 1863     st->print("bhi #slow_path");
 1864   }
 1865 }
 1866 #endif
 1867 
 1868 void MachEpilogNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1869   Compile* C = ra_->C;
 1870   int framesize = C->output()->frame_slots() << LogBytesPerInt;
 1871 
 1872   __ remove_frame(framesize);
 1873 
 1874   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1875     __ reserved_stack_check();
 1876   }
 1877 
 1878   if (do_polling() && C->is_method_compilation()) {
 1879     Label dummy_label;
 1880     Label* code_stub = &dummy_label;
 1881     if (!C->output()->in_scratch_emit_size()) {
 1882       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1883       C->output()->add_stub(stub);
 1884       code_stub = &stub->entry();
 1885     }
 1886     __ relocate(relocInfo::poll_return_type);
 1887     __ safepoint_poll(*code_stub, true /* at_return */, false /* acquire */, true /* in_nmethod */);
 1888   }
 1889 }
 1890 
 1891 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 1892   // Variable size. Determine dynamically.
 1893   return MachNode::size(ra_);
 1894 }
 1895 
 1896 int MachEpilogNode::reloc() const {
 1897   // Return number of relocatable values contained in this instruction.
 1898   return 1; // 1 for polling page.
 1899 }
 1900 
 1901 const Pipeline * MachEpilogNode::pipeline() const {
 1902   return MachNode::pipeline_class();
 1903 }
 1904 
 1905 //=============================================================================
 1906 
 1907 static enum RC rc_class(OptoReg::Name reg) {
 1908 
 1909   if (reg == OptoReg::Bad) {
 1910     return rc_bad;
 1911   }
 1912 
 1913   // we have 32 int registers * 2 halves
 1914   int slots_of_int_registers = Register::number_of_registers * Register::max_slots_per_register;
 1915 

 2171 void BoxLockNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 2172   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2173   int reg    = ra_->get_encode(this);
 2174 
 2175   // This add will handle any 24-bit signed offset. 24 bits allows an
 2176   // 8 megabyte stack frame.
 2177   __ add(as_Register(reg), sp, offset);
 2178 }
 2179 
 2180 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
 2181   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
 2182   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2183 
 2184   if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
 2185     return NativeInstruction::instruction_size;
 2186   } else {
 2187     return 2 * NativeInstruction::instruction_size;
 2188   }
 2189 }
 2190 
 2191 //=============================================================================









































 2192 

 2193 #ifndef PRODUCT
 2194 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2195 {
 2196   st->print_cr("# MachUEPNode");
 2197   if (UseCompressedClassPointers) {
 2198     st->print_cr("\tldrw rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2199     st->print_cr("\tldrw r10, [rscratch2 + CompiledICData::speculated_klass_offset()]\t# compressed klass");
 2200     st->print_cr("\tcmpw rscratch1, r10");
 2201   } else {
 2202     st->print_cr("\tldr rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2203     st->print_cr("\tldr r10, [rscratch2 + CompiledICData::speculated_klass_offset()]\t# compressed klass");
 2204     st->print_cr("\tcmp rscratch1, r10");
 2205   }
 2206   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
 2207 }
 2208 #endif
 2209 
 2210 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2211 {
 2212   __ ic_check(InteriorEntryAlignment);
 2213 }
 2214 
 2215 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 2216 {
 2217   return MachNode::size(ra_);
 2218 }
 2219 
 2220 // REQUIRED EMIT CODE
 2221 
 2222 //=============================================================================
 2223 
 2224 // Emit exception handler code.
 2225 int HandlerImpl::emit_exception_handler(C2_MacroAssembler* masm)
 2226 {
 2227   // mov rscratch1 #exception_blob_entry_point
 2228   // br rscratch1
 2229   // Note that the code buffer's insts_mark is always relative to insts.
 2230   // That's why we must use the macroassembler to generate a handler.
 2231   address base = __ start_a_stub(size_exception_handler());
 2232   if (base == nullptr) {
 2233     ciEnv::current()->record_failure("CodeCache is full");
 2234     return 0;  // CodeBuffer::expand failed
 2235   }
 2236   int offset = __ offset();
 2237   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
 2238   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
 2239   __ end_a_stub();

 3653   %}
 3654 
 3655   enc_class aarch64_enc_java_dynamic_call(method meth) %{
 3656     int method_index = resolved_method_index(masm);
 3657     address call = __ ic_call((address)$meth$$method, method_index);
 3658     if (call == nullptr) {
 3659       ciEnv::current()->record_failure("CodeCache is full");
 3660       return;
 3661     }
 3662     __ post_call_nop();
 3663     if (Compile::current()->max_vector_size() > 0) {
 3664       __ reinitialize_ptrue();
 3665     }
 3666   %}
 3667 
 3668   enc_class aarch64_enc_call_epilog() %{
 3669     if (VerifyStackAtCalls) {
 3670       // Check that stack depth is unchanged: find majik cookie on stack
 3671       __ call_Unimplemented();
 3672     }































 3673   %}
 3674 
 3675   enc_class aarch64_enc_java_to_runtime(method meth) %{
 3676     // some calls to generated routines (arraycopy code) are scheduled
 3677     // by C2 as runtime calls. if so we can call them using a br (they
 3678     // will be in a reachable segment) otherwise we have to use a blr
 3679     // which loads the absolute address into a register.
 3680     address entry = (address)$meth$$method;
 3681     CodeBlob *cb = CodeCache::find_blob(entry);
 3682     if (cb) {
 3683       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
 3684       if (call == nullptr) {
 3685         ciEnv::current()->record_failure("CodeCache is full");
 3686         return;
 3687       }
 3688       __ post_call_nop();
 3689     } else {
 3690       Label retaddr;
 3691       __ adr(rscratch2, retaddr);
 3692       __ lea(rscratch1, RuntimeAddress(entry));

 6660 instruct loadConL(iRegLNoSp dst, immL src)
 6661 %{
 6662   match(Set dst src);
 6663 
 6664   ins_cost(INSN_COST);
 6665   format %{ "mov $dst, $src\t# long" %}
 6666 
 6667   ins_encode( aarch64_enc_mov_imm(dst, src) );
 6668 
 6669   ins_pipe(ialu_imm);
 6670 %}
 6671 
 6672 // Load Pointer Constant
 6673 
 6674 instruct loadConP(iRegPNoSp dst, immP con)
 6675 %{
 6676   match(Set dst con);
 6677 
 6678   ins_cost(INSN_COST * 4);
 6679   format %{
 6680     "mov  $dst, $con\t# ptr\n\t"
 6681   %}
 6682 
 6683   ins_encode(aarch64_enc_mov_p(dst, con));
 6684 
 6685   ins_pipe(ialu_imm);
 6686 %}
 6687 
 6688 // Load Null Pointer Constant
 6689 
 6690 instruct loadConP0(iRegPNoSp dst, immP0 con)
 6691 %{
 6692   match(Set dst con);
 6693 
 6694   ins_cost(INSN_COST);
 6695   format %{ "mov  $dst, $con\t# nullptr ptr" %}
 6696 
 6697   ins_encode(aarch64_enc_mov_p0(dst, con));
 6698 
 6699   ins_pipe(ialu_imm);
 6700 %}

 7881 %}
 7882 
 7883 // ============================================================================
 7884 // Cast/Convert Instructions
 7885 
 7886 instruct castX2P(iRegPNoSp dst, iRegL src) %{
 7887   match(Set dst (CastX2P src));
 7888 
 7889   ins_cost(INSN_COST);
 7890   format %{ "mov $dst, $src\t# long -> ptr" %}
 7891 
 7892   ins_encode %{
 7893     if ($dst$$reg != $src$$reg) {
 7894       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
 7895     }
 7896   %}
 7897 
 7898   ins_pipe(ialu_reg);
 7899 %}
 7900 















 7901 instruct castP2X(iRegLNoSp dst, iRegP src) %{
 7902   match(Set dst (CastP2X src));
 7903 
 7904   ins_cost(INSN_COST);
 7905   format %{ "mov $dst, $src\t# ptr -> long" %}
 7906 
 7907   ins_encode %{
 7908     if ($dst$$reg != $src$$reg) {
 7909       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
 7910     }
 7911   %}
 7912 
 7913   ins_pipe(ialu_reg);
 7914 %}
 7915 
 7916 // Convert oop into int for vectors alignment masking
 7917 instruct convP2I(iRegINoSp dst, iRegP src) %{
 7918   match(Set dst (ConvL2I (CastP2X src)));
 7919 
 7920   ins_cost(INSN_COST);

14702 
14703   match(Set dst (MoveL2D src));
14704 
14705   effect(DEF dst, USE src);
14706 
14707   ins_cost(INSN_COST);
14708 
14709   format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
14710 
14711   ins_encode %{
14712     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
14713   %}
14714 
14715   ins_pipe(fp_l2d);
14716 
14717 %}
14718 
14719 // ============================================================================
14720 // clearing of an array
14721 
14722 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
14723 %{
14724   match(Set dummy (ClearArray cnt base));
14725   effect(USE_KILL cnt, USE_KILL base, KILL cr);
14726 
14727   ins_cost(4 * INSN_COST);
14728   format %{ "ClearArray $cnt, $base" %}
14729 
14730   ins_encode %{
14731     address tpc = __ zero_words($base$$Register, $cnt$$Register);
14732     if (tpc == nullptr) {
14733       ciEnv::current()->record_failure("CodeCache is full");
14734       return;
14735     }
14736   %}
14737 
14738   ins_pipe(pipe_class_memory);
14739 %}
14740 
















14741 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 temp, Universe dummy, rFlagsReg cr)
14742 %{
14743   predicate((uint64_t)n->in(2)->get_long()
14744             < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord));

14745   match(Set dummy (ClearArray cnt base));
14746   effect(TEMP temp, USE_KILL base, KILL cr);
14747 
14748   ins_cost(4 * INSN_COST);
14749   format %{ "ClearArray $cnt, $base" %}
14750 
14751   ins_encode %{
14752     address tpc = __ zero_words($base$$Register, (uint64_t)$cnt$$constant);
14753     if (tpc == nullptr) {
14754       ciEnv::current()->record_failure("CodeCache is full");
14755       return;
14756     }
14757   %}
14758 
14759   ins_pipe(pipe_class_memory);
14760 %}
14761 
14762 // ============================================================================
14763 // Overflow Math Instructions
14764 

16055 
16056 // Call Runtime Instruction
16057 
16058 instruct CallLeafDirect(method meth)
16059 %{
16060   match(CallLeaf);
16061 
16062   effect(USE meth);
16063 
16064   ins_cost(CALL_COST);
16065 
16066   format %{ "CALL, runtime leaf $meth" %}
16067 
16068   ins_encode( aarch64_enc_java_to_runtime(meth) );
16069 
16070   ins_pipe(pipe_class_call);
16071 %}
16072 
16073 // Call Runtime Instruction
16074 


















16075 instruct CallLeafNoFPDirect(method meth)
16076 %{


16077   match(CallLeafNoFP);
16078 
16079   effect(USE meth);
16080 
16081   ins_cost(CALL_COST);
16082 
16083   format %{ "CALL, runtime leaf nofp $meth" %}
16084 
16085   ins_encode( aarch64_enc_java_to_runtime(meth) );
16086 
16087   ins_pipe(pipe_class_call);
16088 %}
16089 
16090 // Tail Call; Jump from runtime stub to Java code.
16091 // Also known as an 'interprocedural jump'.
16092 // Target of jump will eventually return to caller.
16093 // TailJump below removes the return address.
16094 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_ptr)
16095 %{
16096   match(TailCall jump_target method_ptr);

 1631 
 1632 int MachCallDynamicJavaNode::ret_addr_offset()
 1633 {
 1634   return 16; // movz, movk, movk, bl
 1635 }
 1636 
 1637 int MachCallRuntimeNode::ret_addr_offset() {
 1638   // for generated stubs the call will be
 1639   //   bl(addr)
 1640   // or with far branches
 1641   //   bl(trampoline_stub)
 1642   // for real runtime callouts it will be six instructions
 1643   // see aarch64_enc_java_to_runtime
 1644   //   adr(rscratch2, retaddr)
 1645   //   lea(rscratch1, RuntimeAddress(addr)
 1646   //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
 1647   //   blr(rscratch1)
 1648   CodeBlob *cb = CodeCache::find_blob(_entry_point);
 1649   if (cb) {
 1650     return 1 * NativeInstruction::instruction_size;
 1651   } else if (_entry_point == nullptr) {
 1652     // See CallLeafNoFPIndirect
 1653     return 1 * NativeInstruction::instruction_size;
 1654   } else {
 1655     return 6 * NativeInstruction::instruction_size;
 1656   }
 1657 }
 1658 
 1659 //=============================================================================
 1660 
 1661 #ifndef PRODUCT
 1662 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1663   st->print("BREAKPOINT");
 1664 }
 1665 #endif
 1666 
 1667 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1668   __ brk(0);
 1669 }
 1670 
 1671 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
 1672   return MachNode::size(ra_);
 1673 }

 1742   if (C->stub_function() == nullptr && BarrierSet::barrier_set()->barrier_set_nmethod() != nullptr) {
 1743     st->print("\n\t");
 1744     st->print("ldr  rscratch1, [guard]\n\t");
 1745     st->print("dmb ishld\n\t");
 1746     st->print("ldr  rscratch2, [rthread, #thread_disarmed_guard_value_offset]\n\t");
 1747     st->print("cmp  rscratch1, rscratch2\n\t");
 1748     st->print("b.eq skip");
 1749     st->print("\n\t");
 1750     st->print("blr #nmethod_entry_barrier_stub\n\t");
 1751     st->print("b skip\n\t");
 1752     st->print("guard: int\n\t");
 1753     st->print("\n\t");
 1754     st->print("skip:\n\t");
 1755   }
 1756 }
 1757 #endif
 1758 
 1759 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1760   Compile* C = ra_->C;
 1761 



 1762   // insert a nop at the start of the prolog so we can patch in a
 1763   // branch if we need to invalidate the method later
 1764   __ nop();
 1765 
 1766   __ verified_entry(C, 0);









 1767 
 1768   if (C->stub_function() == nullptr) {
 1769     __ entry_barrier();
 1770   }
 1771 
 1772   if (!Compile::current()->output()->in_scratch_emit_size()) {
 1773     __ bind(*_verified_entry);

























 1774   }
 1775 
 1776   if (VerifyStackAtCalls) {
 1777     Unimplemented();
 1778   }
 1779 
 1780   C->output()->set_frame_complete(__ offset());
 1781 
 1782   if (C->has_mach_constant_base_node()) {
 1783     // NOTE: We set the table base offset here because users might be
 1784     // emitted before MachConstantBaseNode.
 1785     ConstantTable& constant_table = C->output()->constant_table();
 1786     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1787   }
 1788 }
 1789 






 1790 int MachPrologNode::reloc() const
 1791 {
 1792   return 0;
 1793 }
 1794 
 1795 //=============================================================================
 1796 
 1797 #ifndef PRODUCT
 1798 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1799   Compile* C = ra_->C;
 1800   int framesize = C->output()->frame_slots() << LogBytesPerInt;
 1801 
 1802   st->print("# pop frame %d\n\t",framesize);
 1803 
 1804   if (framesize == 0) {
 1805     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
 1806   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
 1807     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
 1808     st->print("add  sp, sp, #%d\n\t", framesize);
 1809   } else {

 1812     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
 1813   }
 1814   if (VM_Version::use_rop_protection()) {
 1815     st->print("autiaz\n\t");
 1816     st->print("ldr  zr, [lr]\n\t");
 1817   }
 1818 
 1819   if (do_polling() && C->is_method_compilation()) {
 1820     st->print("# test polling word\n\t");
 1821     st->print("ldr  rscratch1, [rthread],#%d\n\t", in_bytes(JavaThread::polling_word_offset()));
 1822     st->print("cmp  sp, rscratch1\n\t");
 1823     st->print("bhi #slow_path");
 1824   }
 1825 }
 1826 #endif
 1827 
 1828 void MachEpilogNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1829   Compile* C = ra_->C;
 1830   int framesize = C->output()->frame_slots() << LogBytesPerInt;
 1831 
 1832   __ remove_frame(framesize, C->needs_stack_repair());
 1833 
 1834   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1835     __ reserved_stack_check();
 1836   }
 1837 
 1838   if (do_polling() && C->is_method_compilation()) {
 1839     Label dummy_label;
 1840     Label* code_stub = &dummy_label;
 1841     if (!C->output()->in_scratch_emit_size()) {
 1842       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1843       C->output()->add_stub(stub);
 1844       code_stub = &stub->entry();
 1845     }
 1846     __ relocate(relocInfo::poll_return_type);
 1847     __ safepoint_poll(*code_stub, true /* at_return */, false /* acquire */, true /* in_nmethod */);
 1848   }
 1849 }
 1850 





 1851 int MachEpilogNode::reloc() const {
 1852   // Return number of relocatable values contained in this instruction.
 1853   return 1; // 1 for polling page.
 1854 }
 1855 
 1856 const Pipeline * MachEpilogNode::pipeline() const {
 1857   return MachNode::pipeline_class();
 1858 }
 1859 
 1860 //=============================================================================
 1861 
 1862 static enum RC rc_class(OptoReg::Name reg) {
 1863 
 1864   if (reg == OptoReg::Bad) {
 1865     return rc_bad;
 1866   }
 1867 
 1868   // we have 32 int registers * 2 halves
 1869   int slots_of_int_registers = Register::number_of_registers * Register::max_slots_per_register;
 1870 

 2126 void BoxLockNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 2127   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2128   int reg    = ra_->get_encode(this);
 2129 
 2130   // This add will handle any 24-bit signed offset. 24 bits allows an
 2131   // 8 megabyte stack frame.
 2132   __ add(as_Register(reg), sp, offset);
 2133 }
 2134 
 2135 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
 2136   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
 2137   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2138 
 2139   if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
 2140     return NativeInstruction::instruction_size;
 2141   } else {
 2142     return 2 * NativeInstruction::instruction_size;
 2143   }
 2144 }
 2145 
 2146 ///=============================================================================
 2147 #ifndef PRODUCT
 2148 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2149 {
 2150   st->print_cr("# MachVEPNode");
 2151   if (!_verified) {
 2152     st->print_cr("\t load_class");
 2153   } else {
 2154     st->print_cr("\t unpack_inline_arg");
 2155   }
 2156 }
 2157 #endif
 2158 
 2159 void MachVEPNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const
 2160 {
 2161   if (!_verified) {
 2162     __ ic_check(1);
 2163   } else {
 2164     // insert a nop at the start of the prolog so we can patch in a
 2165     // branch if we need to invalidate the method later
 2166     __ nop();
 2167 
 2168     // TODO 8284443 Avoid creation of temporary frame
 2169     if (ra_->C->stub_function() == nullptr) {
 2170       __ verified_entry(ra_->C, 0);
 2171       __ entry_barrier();
 2172       int framesize = ra_->C->output()->frame_slots() << LogBytesPerInt;
 2173       __ remove_frame(framesize, false);
 2174     }
 2175     // Unpack inline type args passed as oop and then jump to
 2176     // the verified entry point (skipping the unverified entry).
 2177     int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
 2178     // Emit code for verified entry and save increment for stack repair on return
 2179     __ verified_entry(ra_->C, sp_inc);
 2180     if (Compile::current()->output()->in_scratch_emit_size()) {
 2181       Label dummy_verified_entry;
 2182       __ b(dummy_verified_entry);
 2183     } else {
 2184       __ b(*_verified_entry);
 2185     }
 2186   }
 2187 }
 2188 
 2189 //=============================================================================
 2190 #ifndef PRODUCT
 2191 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2192 {
 2193   st->print_cr("# MachUEPNode");
 2194   if (UseCompressedClassPointers) {
 2195     st->print_cr("\tldrw rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2196     st->print_cr("\tldrw r10, [rscratch2 + CompiledICData::speculated_klass_offset()]\t# compressed klass");
 2197     st->print_cr("\tcmpw rscratch1, r10");
 2198   } else {
 2199     st->print_cr("\tldr rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2200     st->print_cr("\tldr r10, [rscratch2 + CompiledICData::speculated_klass_offset()]\t# compressed klass");
 2201     st->print_cr("\tcmp rscratch1, r10");
 2202   }
 2203   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
 2204 }
 2205 #endif
 2206 
 2207 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2208 {
 2209   __ ic_check(InteriorEntryAlignment);
 2210 }
 2211 





 2212 // REQUIRED EMIT CODE
 2213 
 2214 //=============================================================================
 2215 
 2216 // Emit exception handler code.
 2217 int HandlerImpl::emit_exception_handler(C2_MacroAssembler* masm)
 2218 {
 2219   // mov rscratch1 #exception_blob_entry_point
 2220   // br rscratch1
 2221   // Note that the code buffer's insts_mark is always relative to insts.
 2222   // That's why we must use the macroassembler to generate a handler.
 2223   address base = __ start_a_stub(size_exception_handler());
 2224   if (base == nullptr) {
 2225     ciEnv::current()->record_failure("CodeCache is full");
 2226     return 0;  // CodeBuffer::expand failed
 2227   }
 2228   int offset = __ offset();
 2229   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
 2230   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
 2231   __ end_a_stub();

 3645   %}
 3646 
 3647   enc_class aarch64_enc_java_dynamic_call(method meth) %{
 3648     int method_index = resolved_method_index(masm);
 3649     address call = __ ic_call((address)$meth$$method, method_index);
 3650     if (call == nullptr) {
 3651       ciEnv::current()->record_failure("CodeCache is full");
 3652       return;
 3653     }
 3654     __ post_call_nop();
 3655     if (Compile::current()->max_vector_size() > 0) {
 3656       __ reinitialize_ptrue();
 3657     }
 3658   %}
 3659 
 3660   enc_class aarch64_enc_call_epilog() %{
 3661     if (VerifyStackAtCalls) {
 3662       // Check that stack depth is unchanged: find majik cookie on stack
 3663       __ call_Unimplemented();
 3664     }
 3665     if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic()) {
 3666       // The last return value is not set by the callee but used to pass IsInit information to compiled code.
 3667       // Search for the corresponding projection, get the register and emit code that initialized it.
 3668       uint con = (tf()->range_cc()->cnt() - 1);
 3669       for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
 3670         ProjNode* proj = fast_out(i)->as_Proj();
 3671         if (proj->_con == con) {
 3672           // Set IsInit if r0 is non-null (a non-null value is returned buffered or scalarized)
 3673           OptoReg::Name optoReg = ra_->get_reg_first(proj);
 3674           VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
 3675           Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
 3676           __ cmp(r0, zr);
 3677           __ cset(toReg, Assembler::NE);
 3678           if (reg->is_stack()) {
 3679             int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
 3680             __ str(toReg, Address(sp, st_off));
 3681           }
 3682           break;
 3683         }
 3684       }
 3685       if (return_value_is_used()) {
 3686         // An inline type is returned as fields in multiple registers.
 3687         // R0 either contains an oop if the inline type is buffered or a pointer
 3688         // to the corresponding InlineKlass with the lowest bit set to 1. Zero r0
 3689         // if the lowest bit is set to allow C2 to use the oop after null checking.
 3690         // r0 &= (r0 & 1) - 1
 3691         __ andr(rscratch1, r0, 0x1);
 3692         __ sub(rscratch1, rscratch1, 0x1);
 3693         __ andr(r0, r0, rscratch1);
 3694       }
 3695     }
 3696   %}
 3697 
 3698   enc_class aarch64_enc_java_to_runtime(method meth) %{
 3699     // some calls to generated routines (arraycopy code) are scheduled
 3700     // by C2 as runtime calls. if so we can call them using a br (they
 3701     // will be in a reachable segment) otherwise we have to use a blr
 3702     // which loads the absolute address into a register.
 3703     address entry = (address)$meth$$method;
 3704     CodeBlob *cb = CodeCache::find_blob(entry);
 3705     if (cb) {
 3706       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
 3707       if (call == nullptr) {
 3708         ciEnv::current()->record_failure("CodeCache is full");
 3709         return;
 3710       }
 3711       __ post_call_nop();
 3712     } else {
 3713       Label retaddr;
 3714       __ adr(rscratch2, retaddr);
 3715       __ lea(rscratch1, RuntimeAddress(entry));

 6683 instruct loadConL(iRegLNoSp dst, immL src)
 6684 %{
 6685   match(Set dst src);
 6686 
 6687   ins_cost(INSN_COST);
 6688   format %{ "mov $dst, $src\t# long" %}
 6689 
 6690   ins_encode( aarch64_enc_mov_imm(dst, src) );
 6691 
 6692   ins_pipe(ialu_imm);
 6693 %}
 6694 
 6695 // Load Pointer Constant
 6696 
 6697 instruct loadConP(iRegPNoSp dst, immP con)
 6698 %{
 6699   match(Set dst con);
 6700 
 6701   ins_cost(INSN_COST * 4);
 6702   format %{
 6703     "mov  $dst, $con\t# ptr"
 6704   %}
 6705 
 6706   ins_encode(aarch64_enc_mov_p(dst, con));
 6707 
 6708   ins_pipe(ialu_imm);
 6709 %}
 6710 
 6711 // Load Null Pointer Constant
 6712 
 6713 instruct loadConP0(iRegPNoSp dst, immP0 con)
 6714 %{
 6715   match(Set dst con);
 6716 
 6717   ins_cost(INSN_COST);
 6718   format %{ "mov  $dst, $con\t# nullptr ptr" %}
 6719 
 6720   ins_encode(aarch64_enc_mov_p0(dst, con));
 6721 
 6722   ins_pipe(ialu_imm);
 6723 %}

 7904 %}
 7905 
 7906 // ============================================================================
 7907 // Cast/Convert Instructions
 7908 
 7909 instruct castX2P(iRegPNoSp dst, iRegL src) %{
 7910   match(Set dst (CastX2P src));
 7911 
 7912   ins_cost(INSN_COST);
 7913   format %{ "mov $dst, $src\t# long -> ptr" %}
 7914 
 7915   ins_encode %{
 7916     if ($dst$$reg != $src$$reg) {
 7917       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
 7918     }
 7919   %}
 7920 
 7921   ins_pipe(ialu_reg);
 7922 %}
 7923 
 7924 instruct castN2X(iRegLNoSp dst, iRegN src) %{
 7925   match(Set dst (CastP2X src));
 7926 
 7927   ins_cost(INSN_COST);
 7928   format %{ "mov $dst, $src\t# ptr -> long" %}
 7929 
 7930   ins_encode %{
 7931     if ($dst$$reg != $src$$reg) {
 7932       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
 7933     }
 7934   %}
 7935 
 7936   ins_pipe(ialu_reg);
 7937 %}
 7938 
 7939 instruct castP2X(iRegLNoSp dst, iRegP src) %{
 7940   match(Set dst (CastP2X src));
 7941 
 7942   ins_cost(INSN_COST);
 7943   format %{ "mov $dst, $src\t# ptr -> long" %}
 7944 
 7945   ins_encode %{
 7946     if ($dst$$reg != $src$$reg) {
 7947       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
 7948     }
 7949   %}
 7950 
 7951   ins_pipe(ialu_reg);
 7952 %}
 7953 
 7954 // Convert oop into int for vectors alignment masking
 7955 instruct convP2I(iRegINoSp dst, iRegP src) %{
 7956   match(Set dst (ConvL2I (CastP2X src)));
 7957 
 7958   ins_cost(INSN_COST);

14740 
14741   match(Set dst (MoveL2D src));
14742 
14743   effect(DEF dst, USE src);
14744 
14745   ins_cost(INSN_COST);
14746 
14747   format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
14748 
14749   ins_encode %{
14750     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
14751   %}
14752 
14753   ins_pipe(fp_l2d);
14754 
14755 %}
14756 
14757 // ============================================================================
14758 // clearing of an array
14759 
14760 instruct clearArray_reg_reg_immL0(iRegL_R11 cnt, iRegP_R10 base, immL0 zero, Universe dummy, rFlagsReg cr)
14761 %{
14762   match(Set dummy (ClearArray (Binary cnt base) zero));
14763   effect(USE_KILL cnt, USE_KILL base, KILL cr);
14764 
14765   ins_cost(4 * INSN_COST);
14766   format %{ "ClearArray $cnt, $base" %}
14767 
14768   ins_encode %{
14769     address tpc = __ zero_words($base$$Register, $cnt$$Register);
14770     if (tpc == nullptr) {
14771       ciEnv::current()->record_failure("CodeCache is full");
14772       return;
14773     }
14774   %}
14775 
14776   ins_pipe(pipe_class_memory);
14777 %}
14778 
14779 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, iRegL val, Universe dummy, rFlagsReg cr)
14780 %{
14781   predicate(((ClearArrayNode*)n)->word_copy_only());
14782   match(Set dummy (ClearArray (Binary cnt base) val));
14783   effect(USE_KILL cnt, USE_KILL base, KILL cr);
14784 
14785   ins_cost(4 * INSN_COST);
14786   format %{ "ClearArray $cnt, $base, $val" %}
14787 
14788   ins_encode %{
14789     __ fill_words($base$$Register, $cnt$$Register, $val$$Register);
14790   %}
14791 
14792   ins_pipe(pipe_class_memory);
14793 %}
14794 
14795 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 temp, Universe dummy, rFlagsReg cr)
14796 %{
14797   predicate((uint64_t)n->in(2)->get_long()
14798             < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord)
14799             && !((ClearArrayNode*)n)->word_copy_only());
14800   match(Set dummy (ClearArray cnt base));
14801   effect(TEMP temp, USE_KILL base, KILL cr);
14802 
14803   ins_cost(4 * INSN_COST);
14804   format %{ "ClearArray $cnt, $base" %}
14805 
14806   ins_encode %{
14807     address tpc = __ zero_words($base$$Register, (uint64_t)$cnt$$constant);
14808     if (tpc == nullptr) {
14809       ciEnv::current()->record_failure("CodeCache is full");
14810       return;
14811     }
14812   %}
14813 
14814   ins_pipe(pipe_class_memory);
14815 %}
14816 
14817 // ============================================================================
14818 // Overflow Math Instructions
14819 

16110 
16111 // Call Runtime Instruction
16112 
16113 instruct CallLeafDirect(method meth)
16114 %{
16115   match(CallLeaf);
16116 
16117   effect(USE meth);
16118 
16119   ins_cost(CALL_COST);
16120 
16121   format %{ "CALL, runtime leaf $meth" %}
16122 
16123   ins_encode( aarch64_enc_java_to_runtime(meth) );
16124 
16125   ins_pipe(pipe_class_call);
16126 %}
16127 
16128 // Call Runtime Instruction
16129 
16130 // entry point is null, target holds the address to call
16131 instruct CallLeafNoFPIndirect(iRegP target)
16132 %{
16133   predicate(n->as_Call()->entry_point() == nullptr);
16134 
16135   match(CallLeafNoFP target);
16136 
16137   ins_cost(CALL_COST);
16138 
16139   format %{ "CALL, runtime leaf nofp indirect $target" %}
16140 
16141   ins_encode %{
16142     __ blr($target$$Register);
16143   %}
16144 
16145   ins_pipe(pipe_class_call);
16146 %}
16147 
16148 instruct CallLeafNoFPDirect(method meth)
16149 %{
16150   predicate(n->as_Call()->entry_point() != nullptr);
16151 
16152   match(CallLeafNoFP);
16153 
16154   effect(USE meth);
16155 
16156   ins_cost(CALL_COST);
16157 
16158   format %{ "CALL, runtime leaf nofp $meth" %}
16159 
16160   ins_encode( aarch64_enc_java_to_runtime(meth) );
16161 
16162   ins_pipe(pipe_class_call);
16163 %}
16164 
16165 // Tail Call; Jump from runtime stub to Java code.
16166 // Also known as an 'interprocedural jump'.
16167 // Target of jump will eventually return to caller.
16168 // TailJump below removes the return address.
16169 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_ptr)
16170 %{
16171   match(TailCall jump_target method_ptr);
< prev index next >