< prev index next >

src/hotspot/cpu/aarch64/aarch64.ad

Print this page

 1627 
 1628 int MachCallDynamicJavaNode::ret_addr_offset()
 1629 {
 1630   return 16; // movz, movk, movk, bl
 1631 }
 1632 
 1633 int MachCallRuntimeNode::ret_addr_offset() {
 1634   // for generated stubs the call will be
 1635   //   bl(addr)
 1636   // or with far branches
 1637   //   bl(trampoline_stub)
 1638   // for real runtime callouts it will be six instructions
 1639   // see aarch64_enc_java_to_runtime
 1640   //   adr(rscratch2, retaddr)
 1641   //   lea(rscratch1, RuntimeAddress(addr)
 1642   //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
 1643   //   blr(rscratch1)
 1644   CodeBlob *cb = CodeCache::find_blob(_entry_point);
 1645   if (cb) {
 1646     return 1 * NativeInstruction::instruction_size;



 1647   } else {
 1648     return 6 * NativeInstruction::instruction_size;
 1649   }
 1650 }
 1651 
 1652 //=============================================================================
 1653 
 1654 #ifndef PRODUCT
 1655 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1656   st->print("BREAKPOINT");
 1657 }
 1658 #endif
 1659 
 1660 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1661   C2_MacroAssembler _masm(&cbuf);
 1662   __ brk(0);
 1663 }
 1664 
 1665 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
 1666   return MachNode::size(ra_);

 1738     st->print("\n\t");
 1739     st->print("ldr  rscratch1, [guard]\n\t");
 1740     st->print("dmb ishld\n\t");
 1741     st->print("ldr  rscratch2, [rthread, #thread_disarmed_guard_value_offset]\n\t");
 1742     st->print("cmp  rscratch1, rscratch2\n\t");
 1743     st->print("b.eq skip");
 1744     st->print("\n\t");
 1745     st->print("blr #nmethod_entry_barrier_stub\n\t");
 1746     st->print("b skip\n\t");
 1747     st->print("guard: int\n\t");
 1748     st->print("\n\t");
 1749     st->print("skip:\n\t");
 1750   }
 1751 }
 1752 #endif
 1753 
 1754 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1755   Compile* C = ra_->C;
 1756   C2_MacroAssembler _masm(&cbuf);
 1757 
 1758   // n.b. frame size includes space for return pc and rfp
 1759   const int framesize = C->output()->frame_size_in_bytes();
 1760 
 1761   // insert a nop at the start of the prolog so we can patch in a
 1762   // branch if we need to invalidate the method later
 1763   __ nop();
 1764 
 1765   if (C->clinit_barrier_on_entry()) {
 1766     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
 1767 
 1768     Label L_skip_barrier;
 1769 
 1770     __ mov_metadata(rscratch2, C->method()->holder()->constant_encoding());
 1771     __ clinit_barrier(rscratch2, rscratch1, &L_skip_barrier);
 1772     __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
 1773     __ bind(L_skip_barrier);
 1774   }
 1775 
 1776   if (C->max_vector_size() > 0) {
 1777     __ reinitialize_ptrue();
 1778   }
 1779 
 1780   int bangsize = C->output()->bang_size_in_bytes();
 1781   if (C->output()->need_stack_bang(bangsize))
 1782     __ generate_stack_overflow_check(bangsize);
 1783 
 1784   __ build_frame(framesize);
 1785 
 1786   if (C->stub_function() == nullptr) {
 1787     BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 1788     if (BarrierSet::barrier_set()->barrier_set_nmethod() != nullptr) {
 1789       // Dummy labels for just measuring the code size
 1790       Label dummy_slow_path;
 1791       Label dummy_continuation;
 1792       Label dummy_guard;
 1793       Label* slow_path = &dummy_slow_path;
 1794       Label* continuation = &dummy_continuation;
 1795       Label* guard = &dummy_guard;
 1796       if (!Compile::current()->output()->in_scratch_emit_size()) {
 1797         // Use real labels from actual stub when not emitting code for the purpose of measuring its size
 1798         C2EntryBarrierStub* stub = new (Compile::current()->comp_arena()) C2EntryBarrierStub();
 1799         Compile::current()->output()->add_stub(stub);
 1800         slow_path = &stub->entry();
 1801         continuation = &stub->continuation();
 1802         guard = &stub->guard();
 1803       }
 1804       // In the C2 code, we move the non-hot part of nmethod entry barriers out-of-line to a stub.
 1805       bs->nmethod_entry_barrier(&_masm, slow_path, continuation, guard);
 1806     }
 1807   }
 1808 
 1809   if (VerifyStackAtCalls) {
 1810     Unimplemented();
 1811   }
 1812 
 1813   C->output()->set_frame_complete(cbuf.insts_size());
 1814 
 1815   if (C->has_mach_constant_base_node()) {
 1816     // NOTE: We set the table base offset here because users might be
 1817     // emitted before MachConstantBaseNode.
 1818     ConstantTable& constant_table = C->output()->constant_table();
 1819     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1820   }
 1821 }
 1822 
 1823 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 1824 {
 1825   return MachNode::size(ra_); // too many variables; just compute it
 1826                               // the hard way
 1827 }
 1828 
 1829 int MachPrologNode::reloc() const
 1830 {
 1831   return 0;
 1832 }
 1833 
 1834 //=============================================================================
 1835 
 1836 #ifndef PRODUCT
 1837 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1838   Compile* C = ra_->C;
 1839   int framesize = C->output()->frame_slots() << LogBytesPerInt;
 1840 
 1841   st->print("# pop frame %d\n\t",framesize);
 1842 
 1843   if (framesize == 0) {
 1844     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
 1845   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
 1846     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
 1847     st->print("add  sp, sp, #%d\n\t", framesize);
 1848   } else {

 1852   }
 1853   if (VM_Version::use_rop_protection()) {
 1854     st->print("autiaz\n\t");
 1855     st->print("ldr  zr, [lr]\n\t");
 1856   }
 1857 
 1858   if (do_polling() && C->is_method_compilation()) {
 1859     st->print("# test polling word\n\t");
 1860     st->print("ldr  rscratch1, [rthread],#%d\n\t", in_bytes(JavaThread::polling_word_offset()));
 1861     st->print("cmp  sp, rscratch1\n\t");
 1862     st->print("bhi #slow_path");
 1863   }
 1864 }
 1865 #endif
 1866 
 1867 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1868   Compile* C = ra_->C;
 1869   C2_MacroAssembler _masm(&cbuf);
 1870   int framesize = C->output()->frame_slots() << LogBytesPerInt;
 1871 
 1872   __ remove_frame(framesize);
 1873 
 1874   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1875     __ reserved_stack_check();
 1876   }
 1877 
 1878   if (do_polling() && C->is_method_compilation()) {
 1879     Label dummy_label;
 1880     Label* code_stub = &dummy_label;
 1881     if (!C->output()->in_scratch_emit_size()) {
 1882       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1883       C->output()->add_stub(stub);
 1884       code_stub = &stub->entry();
 1885     }
 1886     __ relocate(relocInfo::poll_return_type);
 1887     __ safepoint_poll(*code_stub, true /* at_return */, false /* acquire */, true /* in_nmethod */);
 1888   }
 1889 }
 1890 
 1891 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 1892   // Variable size. Determine dynamically.
 1893   return MachNode::size(ra_);
 1894 }
 1895 
 1896 int MachEpilogNode::reloc() const {
 1897   // Return number of relocatable values contained in this instruction.
 1898   return 1; // 1 for polling page.
 1899 }
 1900 
 1901 const Pipeline * MachEpilogNode::pipeline() const {
 1902   return MachNode::pipeline_class();
 1903 }
 1904 
 1905 //=============================================================================
 1906 
 1907 // Figure out which register class each belongs in: rc_int, rc_float or
 1908 // rc_stack.
 1909 enum RC { rc_bad, rc_int, rc_float, rc_predicate, rc_stack };
 1910 
 1911 static enum RC rc_class(OptoReg::Name reg) {
 1912 
 1913   if (reg == OptoReg::Bad) {
 1914     return rc_bad;
 1915   }

 2181 
 2182   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2183   int reg    = ra_->get_encode(this);
 2184 
 2185   // This add will handle any 24-bit signed offset. 24 bits allows an
 2186   // 8 megabyte stack frame.
 2187   __ add(as_Register(reg), sp, offset);
 2188 }
 2189 
 2190 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
 2191   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
 2192   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2193 
 2194   if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
 2195     return NativeInstruction::instruction_size;
 2196   } else {
 2197     return 2 * NativeInstruction::instruction_size;
 2198   }
 2199 }
 2200 
 2201 //=============================================================================






















 2202 



























 2203 #ifndef PRODUCT
 2204 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2205 {
 2206   st->print_cr("# MachUEPNode");
 2207   if (UseCompressedClassPointers) {
 2208     st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2209     if (CompressedKlassPointers::shift() != 0) {
 2210       st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
 2211     }
 2212   } else {
 2213    st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2214   }
 2215   st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
 2216   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
 2217 }
 2218 #endif
 2219 
 2220 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 2221 {
 2222   // This is the unverified entry point.
 2223   C2_MacroAssembler _masm(&cbuf);

 2224 

 2225   __ cmp_klass(j_rarg0, rscratch2, rscratch1);
 2226   Label skip;
 2227   // TODO
 2228   // can we avoid this skip and still use a reloc?
 2229   __ br(Assembler::EQ, skip);
 2230   __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 2231   __ bind(skip);
 2232 }
 2233 
 2234 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 2235 {
 2236   return MachNode::size(ra_);
 2237 }
 2238 
 2239 // REQUIRED EMIT CODE
 2240 
 2241 //=============================================================================
 2242 
 2243 // Emit exception handler code.
 2244 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
 2245 {
 2246   // mov rscratch1 #exception_blob_entry_point
 2247   // br rscratch1
 2248   // Note that the code buffer's insts_mark is always relative to insts.
 2249   // That's why we must use the macroassembler to generate a handler.
 2250   C2_MacroAssembler _masm(&cbuf);
 2251   address base = __ start_a_stub(size_exception_handler());
 2252   if (base == nullptr) {
 2253     ciEnv::current()->record_failure("CodeCache is full");
 2254     return 0;  // CodeBuffer::expand failed
 2255   }
 2256   int offset = __ offset();
 2257   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
 2258   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");

 3734   enc_class aarch64_enc_java_dynamic_call(method meth) %{
 3735     C2_MacroAssembler _masm(&cbuf);
 3736     int method_index = resolved_method_index(cbuf);
 3737     address call = __ ic_call((address)$meth$$method, method_index);
 3738     if (call == nullptr) {
 3739       ciEnv::current()->record_failure("CodeCache is full");
 3740       return;
 3741     }
 3742     __ post_call_nop();
 3743     if (Compile::current()->max_vector_size() > 0) {
 3744       __ reinitialize_ptrue();
 3745     }
 3746   %}
 3747 
 3748   enc_class aarch64_enc_call_epilog() %{
 3749     C2_MacroAssembler _masm(&cbuf);
 3750     if (VerifyStackAtCalls) {
 3751       // Check that stack depth is unchanged: find majik cookie on stack
 3752       __ call_Unimplemented();
 3753     }































 3754   %}
 3755 
 3756   enc_class aarch64_enc_java_to_runtime(method meth) %{
 3757     C2_MacroAssembler _masm(&cbuf);
 3758 
 3759     // some calls to generated routines (arraycopy code) are scheduled
 3760     // by C2 as runtime calls. if so we can call them using a br (they
 3761     // will be in a reachable segment) otherwise we have to use a blr
 3762     // which loads the absolute address into a register.
 3763     address entry = (address)$meth$$method;
 3764     CodeBlob *cb = CodeCache::find_blob(entry);
 3765     if (cb) {
 3766       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
 3767       if (call == nullptr) {
 3768         ciEnv::current()->record_failure("CodeCache is full");
 3769         return;
 3770       }
 3771       __ post_call_nop();
 3772     } else {
 3773       Label retaddr;

 7187 instruct loadConL(iRegLNoSp dst, immL src)
 7188 %{
 7189   match(Set dst src);
 7190 
 7191   ins_cost(INSN_COST);
 7192   format %{ "mov $dst, $src\t# long" %}
 7193 
 7194   ins_encode( aarch64_enc_mov_imm(dst, src) );
 7195 
 7196   ins_pipe(ialu_imm);
 7197 %}
 7198 
 7199 // Load Pointer Constant
 7200 
 7201 instruct loadConP(iRegPNoSp dst, immP con)
 7202 %{
 7203   match(Set dst con);
 7204 
 7205   ins_cost(INSN_COST * 4);
 7206   format %{
 7207     "mov  $dst, $con\t# ptr\n\t"
 7208   %}
 7209 
 7210   ins_encode(aarch64_enc_mov_p(dst, con));
 7211 
 7212   ins_pipe(ialu_imm);
 7213 %}
 7214 
 7215 // Load Null Pointer Constant
 7216 
 7217 instruct loadConP0(iRegPNoSp dst, immP0 con)
 7218 %{
 7219   match(Set dst con);
 7220 
 7221   ins_cost(INSN_COST);
 7222   format %{ "mov  $dst, $con\t# null pointer" %}
 7223 
 7224   ins_encode(aarch64_enc_mov_p0(dst, con));
 7225 
 7226   ins_pipe(ialu_imm);
 7227 %}

 8409 %}
 8410 
 8411 // ============================================================================
 8412 // Cast/Convert Instructions
 8413 
 8414 instruct castX2P(iRegPNoSp dst, iRegL src) %{
 8415   match(Set dst (CastX2P src));
 8416 
 8417   ins_cost(INSN_COST);
 8418   format %{ "mov $dst, $src\t# long -> ptr" %}
 8419 
 8420   ins_encode %{
 8421     if ($dst$$reg != $src$$reg) {
 8422       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
 8423     }
 8424   %}
 8425 
 8426   ins_pipe(ialu_reg);
 8427 %}
 8428 















 8429 instruct castP2X(iRegLNoSp dst, iRegP src) %{
 8430   match(Set dst (CastP2X src));
 8431 
 8432   ins_cost(INSN_COST);
 8433   format %{ "mov $dst, $src\t# ptr -> long" %}
 8434 
 8435   ins_encode %{
 8436     if ($dst$$reg != $src$$reg) {
 8437       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
 8438     }
 8439   %}
 8440 
 8441   ins_pipe(ialu_reg);
 8442 %}
 8443 
 8444 // Convert oop into int for vectors alignment masking
 8445 instruct convP2I(iRegINoSp dst, iRegP src) %{
 8446   match(Set dst (ConvL2I (CastP2X src)));
 8447 
 8448   ins_cost(INSN_COST);

15230 
15231   match(Set dst (MoveL2D src));
15232 
15233   effect(DEF dst, USE src);
15234 
15235   ins_cost(INSN_COST);
15236 
15237   format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
15238 
15239   ins_encode %{
15240     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
15241   %}
15242 
15243   ins_pipe(fp_l2d);
15244 
15245 %}
15246 
15247 // ============================================================================
15248 // clearing of an array
15249 
15250 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
15251 %{
15252   match(Set dummy (ClearArray cnt base));
15253   effect(USE_KILL cnt, USE_KILL base, KILL cr);
15254 
15255   ins_cost(4 * INSN_COST);
15256   format %{ "ClearArray $cnt, $base" %}
15257 
15258   ins_encode %{
15259     address tpc = __ zero_words($base$$Register, $cnt$$Register);
15260     if (tpc == nullptr) {
15261       ciEnv::current()->record_failure("CodeCache is full");
15262       return;
15263     }
15264   %}
15265 
15266   ins_pipe(pipe_class_memory);
15267 %}
15268 
















15269 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 temp, Universe dummy, rFlagsReg cr)
15270 %{
15271   predicate((uint64_t)n->in(2)->get_long()
15272             < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord));

15273   match(Set dummy (ClearArray cnt base));
15274   effect(TEMP temp, USE_KILL base, KILL cr);
15275 
15276   ins_cost(4 * INSN_COST);
15277   format %{ "ClearArray $cnt, $base" %}
15278 
15279   ins_encode %{
15280     address tpc = __ zero_words($base$$Register, (uint64_t)$cnt$$constant);
15281     if (tpc == nullptr) {
15282       ciEnv::current()->record_failure("CodeCache is full");
15283       return;
15284     }
15285   %}
15286 
15287   ins_pipe(pipe_class_memory);
15288 %}
15289 
15290 // ============================================================================
15291 // Overflow Math Instructions
15292 

16552 
16553 // Call Runtime Instruction
16554 
16555 instruct CallLeafDirect(method meth)
16556 %{
16557   match(CallLeaf);
16558 
16559   effect(USE meth);
16560 
16561   ins_cost(CALL_COST);
16562 
16563   format %{ "CALL, runtime leaf $meth" %}
16564 
16565   ins_encode( aarch64_enc_java_to_runtime(meth) );
16566 
16567   ins_pipe(pipe_class_call);
16568 %}
16569 
16570 // Call Runtime Instruction
16571 


















16572 instruct CallLeafNoFPDirect(method meth)
16573 %{


16574   match(CallLeafNoFP);
16575 
16576   effect(USE meth);
16577 
16578   ins_cost(CALL_COST);
16579 
16580   format %{ "CALL, runtime leaf nofp $meth" %}
16581 
16582   ins_encode( aarch64_enc_java_to_runtime(meth) );
16583 
16584   ins_pipe(pipe_class_call);
16585 %}
16586 
16587 // Tail Call; Jump from runtime stub to Java code.
16588 // Also known as an 'interprocedural jump'.
16589 // Target of jump will eventually return to caller.
16590 // TailJump below removes the return address.
16591 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_ptr)
16592 %{
16593   match(TailCall jump_target method_ptr);

 1627 
 1628 int MachCallDynamicJavaNode::ret_addr_offset()
 1629 {
 1630   return 16; // movz, movk, movk, bl
 1631 }
 1632 
 1633 int MachCallRuntimeNode::ret_addr_offset() {
 1634   // for generated stubs the call will be
 1635   //   bl(addr)
 1636   // or with far branches
 1637   //   bl(trampoline_stub)
 1638   // for real runtime callouts it will be six instructions
 1639   // see aarch64_enc_java_to_runtime
 1640   //   adr(rscratch2, retaddr)
 1641   //   lea(rscratch1, RuntimeAddress(addr)
 1642   //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
 1643   //   blr(rscratch1)
 1644   CodeBlob *cb = CodeCache::find_blob(_entry_point);
 1645   if (cb) {
 1646     return 1 * NativeInstruction::instruction_size;
 1647   } else if (_entry_point == nullptr) {
 1648     // See CallLeafNoFPIndirect
 1649     return 1 * NativeInstruction::instruction_size;
 1650   } else {
 1651     return 6 * NativeInstruction::instruction_size;
 1652   }
 1653 }
 1654 
 1655 //=============================================================================
 1656 
 1657 #ifndef PRODUCT
 1658 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1659   st->print("BREAKPOINT");
 1660 }
 1661 #endif
 1662 
 1663 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1664   C2_MacroAssembler _masm(&cbuf);
 1665   __ brk(0);
 1666 }
 1667 
 1668 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
 1669   return MachNode::size(ra_);

 1741     st->print("\n\t");
 1742     st->print("ldr  rscratch1, [guard]\n\t");
 1743     st->print("dmb ishld\n\t");
 1744     st->print("ldr  rscratch2, [rthread, #thread_disarmed_guard_value_offset]\n\t");
 1745     st->print("cmp  rscratch1, rscratch2\n\t");
 1746     st->print("b.eq skip");
 1747     st->print("\n\t");
 1748     st->print("blr #nmethod_entry_barrier_stub\n\t");
 1749     st->print("b skip\n\t");
 1750     st->print("guard: int\n\t");
 1751     st->print("\n\t");
 1752     st->print("skip:\n\t");
 1753   }
 1754 }
 1755 #endif
 1756 
 1757 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1758   Compile* C = ra_->C;
 1759   C2_MacroAssembler _masm(&cbuf);
 1760 



 1761   // insert a nop at the start of the prolog so we can patch in a
 1762   // branch if we need to invalidate the method later
 1763   __ nop();
 1764 
 1765   __ verified_entry(C, 0);



 1766 
 1767   if (C->stub_function() == nullptr) {
 1768     __ entry_barrier();






 1769   }
 1770 
 1771   if (!Compile::current()->output()->in_scratch_emit_size()) {
 1772     __ bind(*_verified_entry);

























 1773   }
 1774 
 1775   if (VerifyStackAtCalls) {
 1776     Unimplemented();
 1777   }
 1778 
 1779   C->output()->set_frame_complete(cbuf.insts_size());
 1780 
 1781   if (C->has_mach_constant_base_node()) {
 1782     // NOTE: We set the table base offset here because users might be
 1783     // emitted before MachConstantBaseNode.
 1784     ConstantTable& constant_table = C->output()->constant_table();
 1785     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1786   }
 1787 }
 1788 






 1789 int MachPrologNode::reloc() const
 1790 {
 1791   return 0;
 1792 }
 1793 
 1794 //=============================================================================
 1795 
 1796 #ifndef PRODUCT
 1797 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1798   Compile* C = ra_->C;
 1799   int framesize = C->output()->frame_slots() << LogBytesPerInt;
 1800 
 1801   st->print("# pop frame %d\n\t",framesize);
 1802 
 1803   if (framesize == 0) {
 1804     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
 1805   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
 1806     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
 1807     st->print("add  sp, sp, #%d\n\t", framesize);
 1808   } else {

 1812   }
 1813   if (VM_Version::use_rop_protection()) {
 1814     st->print("autiaz\n\t");
 1815     st->print("ldr  zr, [lr]\n\t");
 1816   }
 1817 
 1818   if (do_polling() && C->is_method_compilation()) {
 1819     st->print("# test polling word\n\t");
 1820     st->print("ldr  rscratch1, [rthread],#%d\n\t", in_bytes(JavaThread::polling_word_offset()));
 1821     st->print("cmp  sp, rscratch1\n\t");
 1822     st->print("bhi #slow_path");
 1823   }
 1824 }
 1825 #endif
 1826 
 1827 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1828   Compile* C = ra_->C;
 1829   C2_MacroAssembler _masm(&cbuf);
 1830   int framesize = C->output()->frame_slots() << LogBytesPerInt;
 1831 
 1832   __ remove_frame(framesize, C->needs_stack_repair());
 1833 
 1834   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1835     __ reserved_stack_check();
 1836   }
 1837 
 1838   if (do_polling() && C->is_method_compilation()) {
 1839     Label dummy_label;
 1840     Label* code_stub = &dummy_label;
 1841     if (!C->output()->in_scratch_emit_size()) {
 1842       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1843       C->output()->add_stub(stub);
 1844       code_stub = &stub->entry();
 1845     }
 1846     __ relocate(relocInfo::poll_return_type);
 1847     __ safepoint_poll(*code_stub, true /* at_return */, false /* acquire */, true /* in_nmethod */);
 1848   }
 1849 }
 1850 





 1851 int MachEpilogNode::reloc() const {
 1852   // Return number of relocatable values contained in this instruction.
 1853   return 1; // 1 for polling page.
 1854 }
 1855 
 1856 const Pipeline * MachEpilogNode::pipeline() const {
 1857   return MachNode::pipeline_class();
 1858 }
 1859 
 1860 //=============================================================================
 1861 
 1862 // Figure out which register class each belongs in: rc_int, rc_float or
 1863 // rc_stack.
 1864 enum RC { rc_bad, rc_int, rc_float, rc_predicate, rc_stack };
 1865 
 1866 static enum RC rc_class(OptoReg::Name reg) {
 1867 
 1868   if (reg == OptoReg::Bad) {
 1869     return rc_bad;
 1870   }

 2136 
 2137   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2138   int reg    = ra_->get_encode(this);
 2139 
 2140   // This add will handle any 24-bit signed offset. 24 bits allows an
 2141   // 8 megabyte stack frame.
 2142   __ add(as_Register(reg), sp, offset);
 2143 }
 2144 
 2145 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
 2146   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
 2147   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2148 
 2149   if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
 2150     return NativeInstruction::instruction_size;
 2151   } else {
 2152     return 2 * NativeInstruction::instruction_size;
 2153   }
 2154 }
 2155 
 2156 ///=============================================================================
 2157 #ifndef PRODUCT
 2158 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2159 {
 2160   st->print_cr("# MachVEPNode");
 2161   if (!_verified) {
 2162     st->print_cr("\t load_class");
 2163   } else {
 2164     st->print_cr("\t unpack_inline_arg");
 2165   }
 2166 }
 2167 #endif
 2168 
 2169 void MachVEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 2170 {
 2171   C2_MacroAssembler _masm(&cbuf);
 2172 
 2173   if (!_verified) {
 2174     Label skip;
 2175     __ cmp_klass(j_rarg0, rscratch2, rscratch1);
 2176     __ br(Assembler::EQ, skip);
 2177       __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 2178     __ bind(skip);
 2179 
 2180   } else {
 2181     // insert a nop at the start of the prolog so we can patch in a
 2182     // branch if we need to invalidate the method later
 2183     __ nop();
 2184 
 2185     // TODO 8284443 Avoid creation of temporary frame
 2186     if (ra_->C->stub_function() == nullptr) {
 2187       __ verified_entry(ra_->C, 0);
 2188       __ entry_barrier();
 2189       int framesize = ra_->C->output()->frame_slots() << LogBytesPerInt;
 2190       __ remove_frame(framesize, false);
 2191     }
 2192     // Unpack inline type args passed as oop and then jump to
 2193     // the verified entry point (skipping the unverified entry).
 2194     int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
 2195     // Emit code for verified entry and save increment for stack repair on return
 2196     __ verified_entry(ra_->C, sp_inc);
 2197     if (Compile::current()->output()->in_scratch_emit_size()) {
 2198       Label dummy_verified_entry;
 2199       __ b(dummy_verified_entry);
 2200     } else {
 2201       __ b(*_verified_entry);
 2202     }
 2203   }
 2204 }
 2205 
 2206 //=============================================================================
 2207 #ifndef PRODUCT
 2208 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2209 {
 2210   st->print_cr("# MachUEPNode");
 2211   if (UseCompressedClassPointers) {
 2212     st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2213     if (CompressedKlassPointers::shift() != 0) {
 2214       st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
 2215     }
 2216   } else {
 2217    st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2218   }
 2219   st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
 2220   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
 2221 }
 2222 #endif
 2223 
 2224 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 2225 {
 2226   // This is the unverified entry point.
 2227   C2_MacroAssembler _masm(&cbuf);
 2228   Label skip;
 2229 
 2230   // UseCompressedClassPointers logic are inside cmp_klass
 2231   __ cmp_klass(j_rarg0, rscratch2, rscratch1);
 2232 
 2233   // TODO
 2234   // can we avoid this skip and still use a reloc?
 2235   __ br(Assembler::EQ, skip);
 2236   __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 2237   __ bind(skip);
 2238 }
 2239 





 2240 // REQUIRED EMIT CODE
 2241 
 2242 //=============================================================================
 2243 
 2244 // Emit exception handler code.
 2245 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
 2246 {
 2247   // mov rscratch1 #exception_blob_entry_point
 2248   // br rscratch1
 2249   // Note that the code buffer's insts_mark is always relative to insts.
 2250   // That's why we must use the macroassembler to generate a handler.
 2251   C2_MacroAssembler _masm(&cbuf);
 2252   address base = __ start_a_stub(size_exception_handler());
 2253   if (base == nullptr) {
 2254     ciEnv::current()->record_failure("CodeCache is full");
 2255     return 0;  // CodeBuffer::expand failed
 2256   }
 2257   int offset = __ offset();
 2258   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
 2259   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");

 3735   enc_class aarch64_enc_java_dynamic_call(method meth) %{
 3736     C2_MacroAssembler _masm(&cbuf);
 3737     int method_index = resolved_method_index(cbuf);
 3738     address call = __ ic_call((address)$meth$$method, method_index);
 3739     if (call == nullptr) {
 3740       ciEnv::current()->record_failure("CodeCache is full");
 3741       return;
 3742     }
 3743     __ post_call_nop();
 3744     if (Compile::current()->max_vector_size() > 0) {
 3745       __ reinitialize_ptrue();
 3746     }
 3747   %}
 3748 
 3749   enc_class aarch64_enc_call_epilog() %{
 3750     C2_MacroAssembler _masm(&cbuf);
 3751     if (VerifyStackAtCalls) {
 3752       // Check that stack depth is unchanged: find majik cookie on stack
 3753       __ call_Unimplemented();
 3754     }
 3755     if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic()) {
 3756       // The last return value is not set by the callee but used to pass IsInit information to compiled code.
 3757       // Search for the corresponding projection, get the register and emit code that initialized it.
 3758       uint con = (tf()->range_cc()->cnt() - 1);
 3759       for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
 3760         ProjNode* proj = fast_out(i)->as_Proj();
 3761         if (proj->_con == con) {
 3762           // Set IsInit if r0 is non-null (a non-null value is returned buffered or scalarized)
 3763           OptoReg::Name optoReg = ra_->get_reg_first(proj);
 3764           VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
 3765           Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
 3766           __ cmp(r0, zr);
 3767           __ cset(toReg, Assembler::NE);
 3768           if (reg->is_stack()) {
 3769             int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
 3770             __ str(toReg, Address(sp, st_off));
 3771           }
 3772           break;
 3773         }
 3774       }
 3775       if (return_value_is_used()) {
 3776         // An inline type is returned as fields in multiple registers.
 3777         // R0 either contains an oop if the inline type is buffered or a pointer
 3778         // to the corresponding InlineKlass with the lowest bit set to 1. Zero r0
 3779         // if the lowest bit is set to allow C2 to use the oop after null checking.
 3780         // r0 &= (r0 & 1) - 1
 3781         __ andr(rscratch1, r0, 0x1);
 3782         __ sub(rscratch1, rscratch1, 0x1);
 3783         __ andr(r0, r0, rscratch1);
 3784       }
 3785     }
 3786   %}
 3787 
 3788   enc_class aarch64_enc_java_to_runtime(method meth) %{
 3789     C2_MacroAssembler _masm(&cbuf);
 3790 
 3791     // some calls to generated routines (arraycopy code) are scheduled
 3792     // by C2 as runtime calls. if so we can call them using a br (they
 3793     // will be in a reachable segment) otherwise we have to use a blr
 3794     // which loads the absolute address into a register.
 3795     address entry = (address)$meth$$method;
 3796     CodeBlob *cb = CodeCache::find_blob(entry);
 3797     if (cb) {
 3798       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
 3799       if (call == nullptr) {
 3800         ciEnv::current()->record_failure("CodeCache is full");
 3801         return;
 3802       }
 3803       __ post_call_nop();
 3804     } else {
 3805       Label retaddr;

 7219 instruct loadConL(iRegLNoSp dst, immL src)
 7220 %{
 7221   match(Set dst src);
 7222 
 7223   ins_cost(INSN_COST);
 7224   format %{ "mov $dst, $src\t# long" %}
 7225 
 7226   ins_encode( aarch64_enc_mov_imm(dst, src) );
 7227 
 7228   ins_pipe(ialu_imm);
 7229 %}
 7230 
 7231 // Load Pointer Constant
 7232 
 7233 instruct loadConP(iRegPNoSp dst, immP con)
 7234 %{
 7235   match(Set dst con);
 7236 
 7237   ins_cost(INSN_COST * 4);
 7238   format %{
 7239     "mov  $dst, $con\t# ptr"
 7240   %}
 7241 
 7242   ins_encode(aarch64_enc_mov_p(dst, con));
 7243 
 7244   ins_pipe(ialu_imm);
 7245 %}
 7246 
 7247 // Load Null Pointer Constant
 7248 
 7249 instruct loadConP0(iRegPNoSp dst, immP0 con)
 7250 %{
 7251   match(Set dst con);
 7252 
 7253   ins_cost(INSN_COST);
 7254   format %{ "mov  $dst, $con\t# null pointer" %}
 7255 
 7256   ins_encode(aarch64_enc_mov_p0(dst, con));
 7257 
 7258   ins_pipe(ialu_imm);
 7259 %}

 8441 %}
 8442 
 8443 // ============================================================================
 8444 // Cast/Convert Instructions
 8445 
 8446 instruct castX2P(iRegPNoSp dst, iRegL src) %{
 8447   match(Set dst (CastX2P src));
 8448 
 8449   ins_cost(INSN_COST);
 8450   format %{ "mov $dst, $src\t# long -> ptr" %}
 8451 
 8452   ins_encode %{
 8453     if ($dst$$reg != $src$$reg) {
 8454       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
 8455     }
 8456   %}
 8457 
 8458   ins_pipe(ialu_reg);
 8459 %}
 8460 
 8461 instruct castN2X(iRegLNoSp dst, iRegN src) %{
 8462   match(Set dst (CastP2X src));
 8463 
 8464   ins_cost(INSN_COST);
 8465   format %{ "mov $dst, $src\t# ptr -> long" %}
 8466 
 8467   ins_encode %{
 8468     if ($dst$$reg != $src$$reg) {
 8469       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
 8470     }
 8471   %}
 8472 
 8473   ins_pipe(ialu_reg);
 8474 %}
 8475 
 8476 instruct castP2X(iRegLNoSp dst, iRegP src) %{
 8477   match(Set dst (CastP2X src));
 8478 
 8479   ins_cost(INSN_COST);
 8480   format %{ "mov $dst, $src\t# ptr -> long" %}
 8481 
 8482   ins_encode %{
 8483     if ($dst$$reg != $src$$reg) {
 8484       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
 8485     }
 8486   %}
 8487 
 8488   ins_pipe(ialu_reg);
 8489 %}
 8490 
 8491 // Convert oop into int for vectors alignment masking
 8492 instruct convP2I(iRegINoSp dst, iRegP src) %{
 8493   match(Set dst (ConvL2I (CastP2X src)));
 8494 
 8495   ins_cost(INSN_COST);

15277 
15278   match(Set dst (MoveL2D src));
15279 
15280   effect(DEF dst, USE src);
15281 
15282   ins_cost(INSN_COST);
15283 
15284   format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
15285 
15286   ins_encode %{
15287     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
15288   %}
15289 
15290   ins_pipe(fp_l2d);
15291 
15292 %}
15293 
15294 // ============================================================================
15295 // clearing of an array
15296 
15297 instruct clearArray_reg_reg_immL0(iRegL_R11 cnt, iRegP_R10 base, immL0 zero, Universe dummy, rFlagsReg cr)
15298 %{
15299   match(Set dummy (ClearArray (Binary cnt base) zero));
15300   effect(USE_KILL cnt, USE_KILL base, KILL cr);
15301 
15302   ins_cost(4 * INSN_COST);
15303   format %{ "ClearArray $cnt, $base" %}
15304 
15305   ins_encode %{
15306     address tpc = __ zero_words($base$$Register, $cnt$$Register);
15307     if (tpc == nullptr) {
15308       ciEnv::current()->record_failure("CodeCache is full");
15309       return;
15310     }
15311   %}
15312 
15313   ins_pipe(pipe_class_memory);
15314 %}
15315 
15316 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, iRegL val, Universe dummy, rFlagsReg cr)
15317 %{
15318   predicate(((ClearArrayNode*)n)->word_copy_only());
15319   match(Set dummy (ClearArray (Binary cnt base) val));
15320   effect(USE_KILL cnt, USE_KILL base, KILL cr);
15321 
15322   ins_cost(4 * INSN_COST);
15323   format %{ "ClearArray $cnt, $base, $val" %}
15324 
15325   ins_encode %{
15326     __ fill_words($base$$Register, $cnt$$Register, $val$$Register);
15327   %}
15328 
15329   ins_pipe(pipe_class_memory);
15330 %}
15331 
15332 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 temp, Universe dummy, rFlagsReg cr)
15333 %{
15334   predicate((uint64_t)n->in(2)->get_long()
15335             < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord)
15336             && !((ClearArrayNode*)n)->word_copy_only());
15337   match(Set dummy (ClearArray cnt base));
15338   effect(TEMP temp, USE_KILL base, KILL cr);
15339 
15340   ins_cost(4 * INSN_COST);
15341   format %{ "ClearArray $cnt, $base" %}
15342 
15343   ins_encode %{
15344     address tpc = __ zero_words($base$$Register, (uint64_t)$cnt$$constant);
15345     if (tpc == nullptr) {
15346       ciEnv::current()->record_failure("CodeCache is full");
15347       return;
15348     }
15349   %}
15350 
15351   ins_pipe(pipe_class_memory);
15352 %}
15353 
15354 // ============================================================================
15355 // Overflow Math Instructions
15356 

16616 
16617 // Call Runtime Instruction
16618 
16619 instruct CallLeafDirect(method meth)
16620 %{
16621   match(CallLeaf);
16622 
16623   effect(USE meth);
16624 
16625   ins_cost(CALL_COST);
16626 
16627   format %{ "CALL, runtime leaf $meth" %}
16628 
16629   ins_encode( aarch64_enc_java_to_runtime(meth) );
16630 
16631   ins_pipe(pipe_class_call);
16632 %}
16633 
16634 // Call Runtime Instruction
16635 
16636 // entry point is null, target holds the address to call
16637 instruct CallLeafNoFPIndirect(iRegP target)
16638 %{
16639   predicate(n->as_Call()->entry_point() == nullptr);
16640 
16641   match(CallLeafNoFP target);
16642 
16643   ins_cost(CALL_COST);
16644 
16645   format %{ "CALL, runtime leaf nofp indirect $target" %}
16646 
16647   ins_encode %{
16648     __ blr($target$$Register);
16649   %}
16650 
16651   ins_pipe(pipe_class_call);
16652 %}
16653 
16654 instruct CallLeafNoFPDirect(method meth)
16655 %{
16656   predicate(n->as_Call()->entry_point() != nullptr);
16657 
16658   match(CallLeafNoFP);
16659 
16660   effect(USE meth);
16661 
16662   ins_cost(CALL_COST);
16663 
16664   format %{ "CALL, runtime leaf nofp $meth" %}
16665 
16666   ins_encode( aarch64_enc_java_to_runtime(meth) );
16667 
16668   ins_pipe(pipe_class_call);
16669 %}
16670 
16671 // Tail Call; Jump from runtime stub to Java code.
16672 // Also known as an 'interprocedural jump'.
16673 // Target of jump will eventually return to caller.
16674 // TailJump below removes the return address.
16675 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_ptr)
16676 %{
16677   match(TailCall jump_target method_ptr);
< prev index next >