< prev index next >

src/hotspot/cpu/aarch64/aarch64.ad

Print this page

 1220     // registers conditionally reserved.
 1221 
 1222     _ANY_REG32_mask = _ALL_REG32_mask;
 1223     _ANY_REG32_mask.Remove(OptoReg::as_OptoReg(r31_sp->as_VMReg()));
 1224 
 1225     _ANY_REG_mask = _ALL_REG_mask;
 1226 
 1227     _PTR_REG_mask = _ALL_REG_mask;
 1228 
 1229     _NO_SPECIAL_REG32_mask = _ALL_REG32_mask;
 1230     _NO_SPECIAL_REG32_mask.SUBTRACT(_NON_ALLOCATABLE_REG32_mask);
 1231 
 1232     _NO_SPECIAL_REG_mask = _ALL_REG_mask;
 1233     _NO_SPECIAL_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask);
 1234 
 1235     _NO_SPECIAL_PTR_REG_mask = _ALL_REG_mask;
 1236     _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask);
 1237 
 1238     // r27 is not allocatable when compressed oops is on and heapbase is not
 1239     // zero, compressed klass pointers doesn't use r27 after JDK-8234794
 1240     if (UseCompressedOops && (CompressedOops::ptrs_base() != NULL)) {
 1241       _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(r27->as_VMReg()));
 1242       _NO_SPECIAL_REG_mask.Remove(OptoReg::as_OptoReg(r27->as_VMReg()));
 1243       _NO_SPECIAL_PTR_REG_mask.Remove(OptoReg::as_OptoReg(r27->as_VMReg()));
 1244     }
 1245 
 1246     // r29 is not allocatable when PreserveFramePointer is on
 1247     if (PreserveFramePointer) {
 1248       _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(r29->as_VMReg()));
 1249       _NO_SPECIAL_REG_mask.Remove(OptoReg::as_OptoReg(r29->as_VMReg()));
 1250       _NO_SPECIAL_PTR_REG_mask.Remove(OptoReg::as_OptoReg(r29->as_VMReg()));
 1251     }
 1252   }
 1253 
 1254   // Optimizaton of volatile gets and puts
 1255   // -------------------------------------
 1256   //
 1257   // AArch64 has ldar<x> and stlr<x> instructions which we can safely
 1258   // use to implement volatile reads and writes. For a volatile read
 1259   // we simply need
 1260   //

 1564   bool release = mbvol->trailing_store();
 1565   assert(!release || (mbvol->in(MemBarNode::Precedent)->is_Store() && mbvol->in(MemBarNode::Precedent)->as_Store()->is_release()), "");
 1566 #ifdef ASSERT
 1567   if (release) {
 1568     Node* leading = mbvol->leading_membar();
 1569     assert(leading->Opcode() == Op_MemBarRelease, "");
 1570     assert(leading->as_MemBar()->leading_store(), "");
 1571     assert(leading->as_MemBar()->trailing_membar() == mbvol, "");
 1572   }
 1573 #endif
 1574 
 1575   return release;
 1576 }
 1577 
 1578 // predicates controlling emit of str<x>/stlr<x>
 1579 
 1580 bool needs_releasing_store(const Node *n)
 1581 {
 1582   // assert n->is_Store();
 1583   StoreNode *st = n->as_Store();
 1584   return st->trailing_membar() != NULL;
 1585 }
 1586 
 1587 // predicate controlling translation of CAS
 1588 //
 1589 // returns true if CAS needs to use an acquiring load otherwise false
 1590 
 1591 bool needs_acquiring_load_exclusive(const Node *n)
 1592 {
 1593   assert(is_CAS(n->Opcode(), true), "expecting a compare and swap");
 1594   LoadStoreNode* ldst = n->as_LoadStore();
 1595   if (is_CAS(n->Opcode(), false)) {
 1596     assert(ldst->trailing_membar() != NULL, "expected trailing membar");
 1597   } else {
 1598     return ldst->trailing_membar() != NULL;
 1599   }
 1600 
 1601   // so we can just return true here
 1602   return true;
 1603 }
 1604 
 1605 #define __ _masm.
 1606 
 1607 // advance declarations for helper functions to convert register
 1608 // indices to register objects
 1609 
 1610 // the ad file has to provide implementations of certain methods
 1611 // expected by the generic code
 1612 //
 1613 // REQUIRED FUNCTIONALITY
 1614 
 1615 //=============================================================================
 1616 
 1617 // !!!!! Special hack to get all types of calls to specify the byte offset
 1618 //       from the start of the call to the point where the return address

 1627 
 1628 int MachCallDynamicJavaNode::ret_addr_offset()
 1629 {
 1630   return 16; // movz, movk, movk, bl
 1631 }
 1632 
 1633 int MachCallRuntimeNode::ret_addr_offset() {
 1634   // for generated stubs the call will be
 1635   //   bl(addr)
 1636   // or with far branches
 1637   //   bl(trampoline_stub)
 1638   // for real runtime callouts it will be six instructions
 1639   // see aarch64_enc_java_to_runtime
 1640   //   adr(rscratch2, retaddr)
 1641   //   lea(rscratch1, RuntimeAddress(addr)
 1642   //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
 1643   //   blr(rscratch1)
 1644   CodeBlob *cb = CodeCache::find_blob(_entry_point);
 1645   if (cb) {
 1646     return 1 * NativeInstruction::instruction_size;



 1647   } else {
 1648     return 6 * NativeInstruction::instruction_size;
 1649   }
 1650 }
 1651 
 1652 //=============================================================================
 1653 
 1654 #ifndef PRODUCT
 1655 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1656   st->print("BREAKPOINT");
 1657 }
 1658 #endif
 1659 
 1660 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1661   C2_MacroAssembler _masm(&cbuf);
 1662   __ brk(0);
 1663 }
 1664 
 1665 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
 1666   return MachNode::size(ra_);

 1717 
 1718   int framesize = C->output()->frame_slots() << LogBytesPerInt;
 1719 
 1720   if (C->output()->need_stack_bang(framesize))
 1721     st->print("# stack bang size=%d\n\t", framesize);
 1722 
 1723   if (VM_Version::use_rop_protection()) {
 1724     st->print("ldr  zr, [lr]\n\t");
 1725     st->print("paciaz\n\t");
 1726   }
 1727   if (framesize < ((1 << 9) + 2 * wordSize)) {
 1728     st->print("sub  sp, sp, #%d\n\t", framesize);
 1729     st->print("stp  rfp, lr, [sp, #%d]", framesize - 2 * wordSize);
 1730     if (PreserveFramePointer) st->print("\n\tadd  rfp, sp, #%d", framesize - 2 * wordSize);
 1731   } else {
 1732     st->print("stp  lr, rfp, [sp, #%d]!\n\t", -(2 * wordSize));
 1733     if (PreserveFramePointer) st->print("mov  rfp, sp\n\t");
 1734     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
 1735     st->print("sub  sp, sp, rscratch1");
 1736   }
 1737   if (C->stub_function() == NULL && BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
 1738     st->print("\n\t");
 1739     st->print("ldr  rscratch1, [guard]\n\t");
 1740     st->print("dmb ishld\n\t");
 1741     st->print("ldr  rscratch2, [rthread, #thread_disarmed_guard_value_offset]\n\t");
 1742     st->print("cmp  rscratch1, rscratch2\n\t");
 1743     st->print("b.eq skip");
 1744     st->print("\n\t");
 1745     st->print("blr #nmethod_entry_barrier_stub\n\t");
 1746     st->print("b skip\n\t");
 1747     st->print("guard: int\n\t");
 1748     st->print("\n\t");
 1749     st->print("skip:\n\t");
 1750   }
 1751 }
 1752 #endif
 1753 
 1754 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1755   Compile* C = ra_->C;
 1756   C2_MacroAssembler _masm(&cbuf);
 1757 
 1758   // n.b. frame size includes space for return pc and rfp
 1759   const int framesize = C->output()->frame_size_in_bytes();
 1760 
 1761   // insert a nop at the start of the prolog so we can patch in a
 1762   // branch if we need to invalidate the method later
 1763   __ nop();
 1764 
 1765   if (C->clinit_barrier_on_entry()) {
 1766     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
 1767 
 1768     Label L_skip_barrier;
 1769 
 1770     __ mov_metadata(rscratch2, C->method()->holder()->constant_encoding());
 1771     __ clinit_barrier(rscratch2, rscratch1, &L_skip_barrier);
 1772     __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
 1773     __ bind(L_skip_barrier);
 1774   }
 1775 
 1776   if (C->max_vector_size() > 0) {
 1777     __ reinitialize_ptrue();
 1778   }
 1779 
 1780   int bangsize = C->output()->bang_size_in_bytes();
 1781   if (C->output()->need_stack_bang(bangsize))
 1782     __ generate_stack_overflow_check(bangsize);
 1783 
 1784   __ build_frame(framesize);
 1785 
 1786   if (C->stub_function() == NULL) {
 1787     BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 1788     if (BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
 1789       // Dummy labels for just measuring the code size
 1790       Label dummy_slow_path;
 1791       Label dummy_continuation;
 1792       Label dummy_guard;
 1793       Label* slow_path = &dummy_slow_path;
 1794       Label* continuation = &dummy_continuation;
 1795       Label* guard = &dummy_guard;
 1796       if (!Compile::current()->output()->in_scratch_emit_size()) {
 1797         // Use real labels from actual stub when not emitting code for the purpose of measuring its size
 1798         C2EntryBarrierStub* stub = new (Compile::current()->comp_arena()) C2EntryBarrierStub();
 1799         Compile::current()->output()->add_stub(stub);
 1800         slow_path = &stub->entry();
 1801         continuation = &stub->continuation();
 1802         guard = &stub->guard();
 1803       }
 1804       // In the C2 code, we move the non-hot part of nmethod entry barriers out-of-line to a stub.
 1805       bs->nmethod_entry_barrier(&_masm, slow_path, continuation, guard);
 1806     }
 1807   }
 1808 
 1809   if (VerifyStackAtCalls) {
 1810     Unimplemented();
 1811   }
 1812 
 1813   C->output()->set_frame_complete(cbuf.insts_size());
 1814 
 1815   if (C->has_mach_constant_base_node()) {
 1816     // NOTE: We set the table base offset here because users might be
 1817     // emitted before MachConstantBaseNode.
 1818     ConstantTable& constant_table = C->output()->constant_table();
 1819     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1820   }
 1821 }
 1822 
 1823 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 1824 {
 1825   return MachNode::size(ra_); // too many variables; just compute it
 1826                               // the hard way
 1827 }
 1828 
 1829 int MachPrologNode::reloc() const
 1830 {
 1831   return 0;
 1832 }
 1833 
 1834 //=============================================================================
 1835 
 1836 #ifndef PRODUCT
 1837 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1838   Compile* C = ra_->C;
 1839   int framesize = C->output()->frame_slots() << LogBytesPerInt;
 1840 
 1841   st->print("# pop frame %d\n\t",framesize);
 1842 
 1843   if (framesize == 0) {
 1844     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
 1845   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
 1846     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
 1847     st->print("add  sp, sp, #%d\n\t", framesize);
 1848   } else {

 1852   }
 1853   if (VM_Version::use_rop_protection()) {
 1854     st->print("autiaz\n\t");
 1855     st->print("ldr  zr, [lr]\n\t");
 1856   }
 1857 
 1858   if (do_polling() && C->is_method_compilation()) {
 1859     st->print("# test polling word\n\t");
 1860     st->print("ldr  rscratch1, [rthread],#%d\n\t", in_bytes(JavaThread::polling_word_offset()));
 1861     st->print("cmp  sp, rscratch1\n\t");
 1862     st->print("bhi #slow_path");
 1863   }
 1864 }
 1865 #endif
 1866 
 1867 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1868   Compile* C = ra_->C;
 1869   C2_MacroAssembler _masm(&cbuf);
 1870   int framesize = C->output()->frame_slots() << LogBytesPerInt;
 1871 
 1872   __ remove_frame(framesize);
 1873 
 1874   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1875     __ reserved_stack_check();
 1876   }
 1877 
 1878   if (do_polling() && C->is_method_compilation()) {
 1879     Label dummy_label;
 1880     Label* code_stub = &dummy_label;
 1881     if (!C->output()->in_scratch_emit_size()) {
 1882       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1883       C->output()->add_stub(stub);
 1884       code_stub = &stub->entry();
 1885     }
 1886     __ relocate(relocInfo::poll_return_type);
 1887     __ safepoint_poll(*code_stub, true /* at_return */, false /* acquire */, true /* in_nmethod */);
 1888   }
 1889 }
 1890 
 1891 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 1892   // Variable size. Determine dynamically.
 1893   return MachNode::size(ra_);
 1894 }
 1895 
 1896 int MachEpilogNode::reloc() const {
 1897   // Return number of relocatable values contained in this instruction.
 1898   return 1; // 1 for polling page.
 1899 }
 1900 
 1901 const Pipeline * MachEpilogNode::pipeline() const {
 1902   return MachNode::pipeline_class();
 1903 }
 1904 
 1905 //=============================================================================
 1906 
 1907 // Figure out which register class each belongs in: rc_int, rc_float or
 1908 // rc_stack.
 1909 enum RC { rc_bad, rc_int, rc_float, rc_predicate, rc_stack };
 1910 
 1911 static enum RC rc_class(OptoReg::Name reg) {
 1912 
 1913   if (reg == OptoReg::Bad) {
 1914     return rc_bad;
 1915   }

 2136       }
 2137       st->print("\t# vector spill size = %d", vsize);
 2138     } else if (ideal_reg() == Op_RegVectMask) {
 2139       assert(Matcher::supports_scalable_vector(), "bad register type for spill");
 2140       int vsize = Matcher::scalable_predicate_reg_slots() * 32;
 2141       st->print("\t# predicate spill size = %d", vsize);
 2142     } else {
 2143       st->print("\t# spill size = %d", is64 ? 64 : 32);
 2144     }
 2145   }
 2146 
 2147   return 0;
 2148 
 2149 }
 2150 
 2151 #ifndef PRODUCT
 2152 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 2153   if (!ra_)
 2154     st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
 2155   else
 2156     implementation(NULL, ra_, false, st);
 2157 }
 2158 #endif
 2159 
 2160 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 2161   implementation(&cbuf, ra_, false, NULL);
 2162 }
 2163 
 2164 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 2165   return MachNode::size(ra_);
 2166 }
 2167 
 2168 //=============================================================================
 2169 
 2170 #ifndef PRODUCT
 2171 void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 2172   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2173   int reg = ra_->get_reg_first(this);
 2174   st->print("add %s, rsp, #%d]\t# box lock",
 2175             Matcher::regName[reg], offset);
 2176 }
 2177 #endif
 2178 
 2179 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 2180   C2_MacroAssembler _masm(&cbuf);
 2181 
 2182   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2183   int reg    = ra_->get_encode(this);
 2184 
 2185   // This add will handle any 24-bit signed offset. 24 bits allows an
 2186   // 8 megabyte stack frame.
 2187   __ add(as_Register(reg), sp, offset);
 2188 }
 2189 
 2190 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
 2191   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
 2192   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2193 
 2194   if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
 2195     return NativeInstruction::instruction_size;
 2196   } else {
 2197     return 2 * NativeInstruction::instruction_size;
 2198   }
 2199 }
 2200 
 2201 //=============================================================================











 2202 






































 2203 #ifndef PRODUCT
 2204 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2205 {
 2206   st->print_cr("# MachUEPNode");
 2207   if (UseCompressedClassPointers) {
 2208     st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2209     if (CompressedKlassPointers::shift() != 0) {
 2210       st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
 2211     }
 2212   } else {
 2213    st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2214   }
 2215   st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
 2216   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
 2217 }
 2218 #endif
 2219 
 2220 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 2221 {
 2222   // This is the unverified entry point.
 2223   C2_MacroAssembler _masm(&cbuf);

 2224 

 2225   __ cmp_klass(j_rarg0, rscratch2, rscratch1);
 2226   Label skip;
 2227   // TODO
 2228   // can we avoid this skip and still use a reloc?
 2229   __ br(Assembler::EQ, skip);
 2230   __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 2231   __ bind(skip);
 2232 }
 2233 
 2234 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 2235 {
 2236   return MachNode::size(ra_);
 2237 }
 2238 
 2239 // REQUIRED EMIT CODE
 2240 
 2241 //=============================================================================
 2242 
 2243 // Emit exception handler code.
 2244 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
 2245 {
 2246   // mov rscratch1 #exception_blob_entry_point
 2247   // br rscratch1
 2248   // Note that the code buffer's insts_mark is always relative to insts.
 2249   // That's why we must use the macroassembler to generate a handler.
 2250   C2_MacroAssembler _masm(&cbuf);
 2251   address base = __ start_a_stub(size_exception_handler());
 2252   if (base == NULL) {
 2253     ciEnv::current()->record_failure("CodeCache is full");
 2254     return 0;  // CodeBuffer::expand failed
 2255   }
 2256   int offset = __ offset();
 2257   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
 2258   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
 2259   __ end_a_stub();
 2260   return offset;
 2261 }
 2262 
 2263 // Emit deopt handler code.
 2264 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
 2265 {
 2266   // Note that the code buffer's insts_mark is always relative to insts.
 2267   // That's why we must use the macroassembler to generate a handler.
 2268   C2_MacroAssembler _masm(&cbuf);
 2269   address base = __ start_a_stub(size_deopt_handler());
 2270   if (base == NULL) {
 2271     ciEnv::current()->record_failure("CodeCache is full");
 2272     return 0;  // CodeBuffer::expand failed
 2273   }
 2274   int offset = __ offset();
 2275 
 2276   __ adr(lr, __ pc());
 2277   __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
 2278 
 2279   assert(__ offset() - offset == (int) size_deopt_handler(), "overflow");
 2280   __ end_a_stub();
 2281   return offset;
 2282 }
 2283 
 2284 // REQUIRED MATCHER CODE
 2285 
 2286 //=============================================================================
 2287 
 2288 bool Matcher::match_rule_supported(int opcode) {
 2289   if (!has_match_rule(opcode))
 2290     return false;

 2393   }
 2394   switch(len) {
 2395     // For 16-bit/32-bit mask vector, reuse VecD.
 2396     case  2:
 2397     case  4:
 2398     case  8: return Op_VecD;
 2399     case 16: return Op_VecX;
 2400   }
 2401   ShouldNotReachHere();
 2402   return 0;
 2403 }
 2404 
 2405 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
 2406   assert(Matcher::is_generic_vector(generic_opnd), "not generic");
 2407   switch (ideal_reg) {
 2408     case Op_VecA: return new vecAOper();
 2409     case Op_VecD: return new vecDOper();
 2410     case Op_VecX: return new vecXOper();
 2411   }
 2412   ShouldNotReachHere();
 2413   return NULL;
 2414 }
 2415 
 2416 bool Matcher::is_reg2reg_move(MachNode* m) {
 2417   return false;
 2418 }
 2419 
 2420 bool Matcher::is_generic_vector(MachOper* opnd)  {
 2421   return opnd->opcode() == VREG;
 2422 }
 2423 
 2424 // Return whether or not this register is ever used as an argument.
 2425 // This function is used on startup to build the trampoline stubs in
 2426 // generateOptoStub.  Registers not mentioned will be killed by the VM
 2427 // call in the trampoline, and arguments in those registers not be
 2428 // available to the callee.
 2429 bool Matcher::can_be_java_arg(int reg)
 2430 {
 2431   return
 2432     reg ==  R0_num || reg == R0_H_num ||
 2433     reg ==  R1_num || reg == R1_H_num ||

 2566       result = Assembler::VS; break;
 2567     case BoolTest::no_overflow:
 2568       result = Assembler::VC; break;
 2569     default:
 2570       ShouldNotReachHere();
 2571       return Assembler::Condition(-1);
 2572   }
 2573 
 2574   // Check conversion
 2575   if (cond & BoolTest::unsigned_compare) {
 2576     assert(cmpOpUOper((BoolTest::mask)((int)cond & ~(BoolTest::unsigned_compare))).ccode() == result, "Invalid conversion");
 2577   } else {
 2578     assert(cmpOpOper(cond).ccode() == result, "Invalid conversion");
 2579   }
 2580 
 2581   return result;
 2582 }
 2583 
 2584 // Binary src (Replicate con)
 2585 bool is_valid_sve_arith_imm_pattern(Node* n, Node* m) {
 2586   if (n == NULL || m == NULL) {
 2587     return false;
 2588   }
 2589 
 2590   if (UseSVE == 0 || m->Opcode() != Op_Replicate) {
 2591     return false;
 2592   }
 2593 
 2594   Node* imm_node = m->in(1);
 2595   if (!imm_node->is_Con()) {
 2596     return false;
 2597   }
 2598 
 2599   const Type* t = imm_node->bottom_type();
 2600   if (!(t->isa_int() || t->isa_long())) {
 2601     return false;
 2602   }
 2603 
 2604   switch (n->Opcode()) {
 2605   case Op_AndV:
 2606   case Op_OrV:
 2607   case Op_XorV: {
 2608     Assembler::SIMD_RegVariant T = Assembler::elemType_to_regVariant(Matcher::vector_element_basic_type(n));
 2609     uint64_t value = t->isa_long() ? (uint64_t)imm_node->get_long() : (uint64_t)imm_node->get_int();
 2610     return Assembler::operand_valid_for_sve_logical_immediate(Assembler::regVariant_to_elemBits(T), value);
 2611   }
 2612   case Op_AddVB:
 2613     return (imm_node->get_int() <= 255 && imm_node->get_int() >= -255);
 2614   case Op_AddVS:
 2615   case Op_AddVI:
 2616     return Assembler::operand_valid_for_sve_add_sub_immediate((int64_t)imm_node->get_int());
 2617   case Op_AddVL:
 2618     return Assembler::operand_valid_for_sve_add_sub_immediate(imm_node->get_long());
 2619   default:
 2620     return false;
 2621   }
 2622 }
 2623 
 2624 // (XorV src (Replicate m1))
 2625 // (XorVMask src (MaskAll m1))
 2626 bool is_vector_bitwise_not_pattern(Node* n, Node* m) {
 2627   if (n != NULL && m != NULL) {
 2628     return (n->Opcode() == Op_XorV || n->Opcode() == Op_XorVMask) &&
 2629            VectorNode::is_all_ones_vector(m);
 2630   }
 2631   return false;
 2632 }
 2633 
 2634 // Should the matcher clone input 'm' of node 'n'?
 2635 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
 2636   if (is_vshift_con_pattern(n, m) ||
 2637       is_vector_bitwise_not_pattern(n, m) ||
 2638       is_valid_sve_arith_imm_pattern(n, m)) {
 2639     mstack.push(m, Visit);
 2640     return true;
 2641   }
 2642   return false;
 2643 }
 2644 
 2645 // Should the Matcher clone shifts on addressing modes, expecting them
 2646 // to be subsumed into complex addressing expressions or compute them
 2647 // into registers?

 3413     } else {
 3414       __ movw(dst_reg, con);
 3415     }
 3416   %}
 3417 
 3418   enc_class aarch64_enc_mov_imm(iRegL dst, immL src) %{
 3419     C2_MacroAssembler _masm(&cbuf);
 3420     Register dst_reg = as_Register($dst$$reg);
 3421     uint64_t con = (uint64_t)$src$$constant;
 3422     if (con == 0) {
 3423       __ mov(dst_reg, zr);
 3424     } else {
 3425       __ mov(dst_reg, con);
 3426     }
 3427   %}
 3428 
 3429   enc_class aarch64_enc_mov_p(iRegP dst, immP src) %{
 3430     C2_MacroAssembler _masm(&cbuf);
 3431     Register dst_reg = as_Register($dst$$reg);
 3432     address con = (address)$src$$constant;
 3433     if (con == NULL || con == (address)1) {
 3434       ShouldNotReachHere();
 3435     } else {
 3436       relocInfo::relocType rtype = $src->constant_reloc();
 3437       if (rtype == relocInfo::oop_type) {
 3438         __ movoop(dst_reg, (jobject)con);
 3439       } else if (rtype == relocInfo::metadata_type) {
 3440         __ mov_metadata(dst_reg, (Metadata*)con);
 3441       } else {
 3442         assert(rtype == relocInfo::none, "unexpected reloc type");
 3443         if (! __ is_valid_AArch64_address(con) ||
 3444             con < (address)(uintptr_t)os::vm_page_size()) {
 3445           __ mov(dst_reg, con);
 3446         } else {
 3447           uint64_t offset;
 3448           __ adrp(dst_reg, con, offset);
 3449           __ add(dst_reg, dst_reg, offset);
 3450         }
 3451       }
 3452     }
 3453   %}

 3456     C2_MacroAssembler _masm(&cbuf);
 3457     Register dst_reg = as_Register($dst$$reg);
 3458     __ mov(dst_reg, zr);
 3459   %}
 3460 
 3461   enc_class aarch64_enc_mov_p1(iRegP dst, immP_1 src) %{
 3462     C2_MacroAssembler _masm(&cbuf);
 3463     Register dst_reg = as_Register($dst$$reg);
 3464     __ mov(dst_reg, (uint64_t)1);
 3465   %}
 3466 
 3467   enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
 3468     C2_MacroAssembler _masm(&cbuf);
 3469     __ load_byte_map_base($dst$$Register);
 3470   %}
 3471 
 3472   enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
 3473     C2_MacroAssembler _masm(&cbuf);
 3474     Register dst_reg = as_Register($dst$$reg);
 3475     address con = (address)$src$$constant;
 3476     if (con == NULL) {
 3477       ShouldNotReachHere();
 3478     } else {
 3479       relocInfo::relocType rtype = $src->constant_reloc();
 3480       assert(rtype == relocInfo::oop_type, "unexpected reloc type");
 3481       __ set_narrow_oop(dst_reg, (jobject)con);
 3482     }
 3483   %}
 3484 
 3485   enc_class aarch64_enc_mov_n0(iRegN dst, immN0 src) %{
 3486     C2_MacroAssembler _masm(&cbuf);
 3487     Register dst_reg = as_Register($dst$$reg);
 3488     __ mov(dst_reg, zr);
 3489   %}
 3490 
 3491   enc_class aarch64_enc_mov_nk(iRegN dst, immNKlass src) %{
 3492     C2_MacroAssembler _masm(&cbuf);
 3493     Register dst_reg = as_Register($dst$$reg);
 3494     address con = (address)$src$$constant;
 3495     if (con == NULL) {
 3496       ShouldNotReachHere();
 3497     } else {
 3498       relocInfo::relocType rtype = $src->constant_reloc();
 3499       assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
 3500       __ set_narrow_klass(dst_reg, (Klass *)con);
 3501     }
 3502   %}
 3503 
 3504   // arithmetic encodings
 3505 
 3506   enc_class aarch64_enc_addsubw_imm(iRegI dst, iRegI src1, immIAddSub src2) %{
 3507     C2_MacroAssembler _masm(&cbuf);
 3508     Register dst_reg = as_Register($dst$$reg);
 3509     Register src_reg = as_Register($src1$$reg);
 3510     int32_t con = (int32_t)$src2$$constant;
 3511     // add has primary == 0, subtract has primary == 1
 3512     if ($primary) { con = -con; }
 3513     if (con < 0) {
 3514       __ subw(dst_reg, src_reg, -con);
 3515     } else {

 3658     Label *L = $lbl$$label;
 3659     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
 3660   %}
 3661 
 3662   enc_class aarch64_enc_br_conU(cmpOpU cmp, label lbl) %{
 3663     C2_MacroAssembler _masm(&cbuf);
 3664     Label *L = $lbl$$label;
 3665     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
 3666   %}
 3667 
 3668   enc_class aarch64_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result)
 3669   %{
 3670      Register sub_reg = as_Register($sub$$reg);
 3671      Register super_reg = as_Register($super$$reg);
 3672      Register temp_reg = as_Register($temp$$reg);
 3673      Register result_reg = as_Register($result$$reg);
 3674 
 3675      Label miss;
 3676      C2_MacroAssembler _masm(&cbuf);
 3677      __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
 3678                                      NULL, &miss,
 3679                                      /*set_cond_codes:*/ true);
 3680      if ($primary) {
 3681        __ mov(result_reg, zr);
 3682      }
 3683      __ bind(miss);
 3684   %}
 3685 
 3686   enc_class aarch64_enc_java_static_call(method meth) %{
 3687     C2_MacroAssembler _masm(&cbuf);
 3688 
 3689     address addr = (address)$meth$$method;
 3690     address call;
 3691     if (!_method) {
 3692       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
 3693       call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type));
 3694       if (call == NULL) {
 3695         ciEnv::current()->record_failure("CodeCache is full");
 3696         return;
 3697       }
 3698     } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
 3699       // The NOP here is purely to ensure that eliding a call to
 3700       // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
 3701       __ nop();
 3702       __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
 3703     } else {
 3704       int method_index = resolved_method_index(cbuf);
 3705       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 3706                                                   : static_call_Relocation::spec(method_index);
 3707       call = __ trampoline_call(Address(addr, rspec));
 3708       if (call == NULL) {
 3709         ciEnv::current()->record_failure("CodeCache is full");
 3710         return;
 3711       }
 3712       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 3713         // Calls of the same statically bound method can share
 3714         // a stub to the interpreter.
 3715         cbuf.shared_stub_to_interp_for(_method, call - cbuf.insts_begin());
 3716       } else {
 3717         // Emit stub for static call
 3718         address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, call);
 3719         if (stub == NULL) {
 3720           ciEnv::current()->record_failure("CodeCache is full");
 3721           return;
 3722         }
 3723       }
 3724     }
 3725 
 3726     __ post_call_nop();
 3727 
 3728     // Only non uncommon_trap calls need to reinitialize ptrue.
 3729     if (Compile::current()->max_vector_size() > 0 && uncommon_trap_request() == 0) {
 3730       __ reinitialize_ptrue();
 3731     }
 3732   %}
 3733 
 3734   enc_class aarch64_enc_java_dynamic_call(method meth) %{
 3735     C2_MacroAssembler _masm(&cbuf);
 3736     int method_index = resolved_method_index(cbuf);
 3737     address call = __ ic_call((address)$meth$$method, method_index);
 3738     if (call == NULL) {
 3739       ciEnv::current()->record_failure("CodeCache is full");
 3740       return;
 3741     }
 3742     __ post_call_nop();
 3743     if (Compile::current()->max_vector_size() > 0) {
 3744       __ reinitialize_ptrue();
 3745     }
 3746   %}
 3747 
 3748   enc_class aarch64_enc_call_epilog() %{
 3749     C2_MacroAssembler _masm(&cbuf);
 3750     if (VerifyStackAtCalls) {
 3751       // Check that stack depth is unchanged: find majik cookie on stack
 3752       __ call_Unimplemented();
 3753     }































 3754   %}
 3755 
 3756   enc_class aarch64_enc_java_to_runtime(method meth) %{
 3757     C2_MacroAssembler _masm(&cbuf);
 3758 
 3759     // some calls to generated routines (arraycopy code) are scheduled
 3760     // by C2 as runtime calls. if so we can call them using a br (they
 3761     // will be in a reachable segment) otherwise we have to use a blr
 3762     // which loads the absolute address into a register.
 3763     address entry = (address)$meth$$method;
 3764     CodeBlob *cb = CodeCache::find_blob(entry);
 3765     if (cb) {
 3766       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
 3767       if (call == NULL) {
 3768         ciEnv::current()->record_failure("CodeCache is full");
 3769         return;
 3770       }
 3771       __ post_call_nop();
 3772     } else {
 3773       Label retaddr;
 3774       __ adr(rscratch2, retaddr);
 3775       __ lea(rscratch1, RuntimeAddress(entry));
 3776       // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc()
 3777       __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
 3778       __ blr(rscratch1);
 3779       __ bind(retaddr);
 3780       __ post_call_nop();
 3781       __ add(sp, sp, 2 * wordSize);
 3782     }
 3783     if (Compile::current()->max_vector_size() > 0) {
 3784       __ reinitialize_ptrue();
 3785     }
 3786   %}
 3787 

 4646 operand immL_32bits()
 4647 %{
 4648   predicate(n->get_long() == 0xFFFFFFFFL);
 4649   match(ConL);
 4650   op_cost(0);
 4651   format %{ %}
 4652   interface(CONST_INTER);
 4653 %}
 4654 
 4655 // Pointer operands
 4656 // Pointer Immediate
 4657 operand immP()
 4658 %{
 4659   match(ConP);
 4660 
 4661   op_cost(0);
 4662   format %{ %}
 4663   interface(CONST_INTER);
 4664 %}
 4665 
 4666 // NULL Pointer Immediate
 4667 operand immP0()
 4668 %{
 4669   predicate(n->get_ptr() == 0);
 4670   match(ConP);
 4671 
 4672   op_cost(0);
 4673   format %{ %}
 4674   interface(CONST_INTER);
 4675 %}
 4676 
 4677 // Pointer Immediate One
 4678 // this is used in object initialization (initial object header)
 4679 operand immP_1()
 4680 %{
 4681   predicate(n->get_ptr() == 1);
 4682   match(ConP);
 4683 
 4684   op_cost(0);
 4685   format %{ %}
 4686   interface(CONST_INTER);

 4778 operand immFPacked()
 4779 %{
 4780   predicate(Assembler::operand_valid_for_float_immediate((double)n->getf()));
 4781   match(ConF);
 4782   op_cost(0);
 4783   format %{ %}
 4784   interface(CONST_INTER);
 4785 %}
 4786 
 4787 // Narrow pointer operands
 4788 // Narrow Pointer Immediate
 4789 operand immN()
 4790 %{
 4791   match(ConN);
 4792 
 4793   op_cost(0);
 4794   format %{ %}
 4795   interface(CONST_INTER);
 4796 %}
 4797 
 4798 // Narrow NULL Pointer Immediate
 4799 operand immN0()
 4800 %{
 4801   predicate(n->get_narrowcon() == 0);
 4802   match(ConN);
 4803 
 4804   op_cost(0);
 4805   format %{ %}
 4806   interface(CONST_INTER);
 4807 %}
 4808 
 4809 operand immNKlass()
 4810 %{
 4811   match(ConNKlass);
 4812 
 4813   op_cost(0);
 4814   format %{ %}
 4815   interface(CONST_INTER);
 4816 %}
 4817 
 4818 // Integer 32 bit Register Operands

 7187 instruct loadConL(iRegLNoSp dst, immL src)
 7188 %{
 7189   match(Set dst src);
 7190 
 7191   ins_cost(INSN_COST);
 7192   format %{ "mov $dst, $src\t# long" %}
 7193 
 7194   ins_encode( aarch64_enc_mov_imm(dst, src) );
 7195 
 7196   ins_pipe(ialu_imm);
 7197 %}
 7198 
 7199 // Load Pointer Constant
 7200 
 7201 instruct loadConP(iRegPNoSp dst, immP con)
 7202 %{
 7203   match(Set dst con);
 7204 
 7205   ins_cost(INSN_COST * 4);
 7206   format %{
 7207     "mov  $dst, $con\t# ptr\n\t"
 7208   %}
 7209 
 7210   ins_encode(aarch64_enc_mov_p(dst, con));
 7211 
 7212   ins_pipe(ialu_imm);
 7213 %}
 7214 
 7215 // Load Null Pointer Constant
 7216 
 7217 instruct loadConP0(iRegPNoSp dst, immP0 con)
 7218 %{
 7219   match(Set dst con);
 7220 
 7221   ins_cost(INSN_COST);
 7222   format %{ "mov  $dst, $con\t# NULL ptr" %}
 7223 
 7224   ins_encode(aarch64_enc_mov_p0(dst, con));
 7225 
 7226   ins_pipe(ialu_imm);
 7227 %}
 7228 
 7229 // Load Pointer Constant One
 7230 
 7231 instruct loadConP1(iRegPNoSp dst, immP_1 con)
 7232 %{
 7233   match(Set dst con);
 7234 
 7235   ins_cost(INSN_COST);
 7236   format %{ "mov  $dst, $con\t# NULL ptr" %}
 7237 
 7238   ins_encode(aarch64_enc_mov_p1(dst, con));
 7239 
 7240   ins_pipe(ialu_imm);
 7241 %}
 7242 
 7243 // Load Byte Map Base Constant
 7244 
 7245 instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
 7246 %{
 7247   match(Set dst con);
 7248 
 7249   ins_cost(INSN_COST);
 7250   format %{ "adr  $dst, $con\t# Byte Map Base" %}
 7251 
 7252   ins_encode(aarch64_enc_mov_byte_map_base(dst, con));
 7253 
 7254   ins_pipe(ialu_imm);
 7255 %}
 7256 

 7258 
 7259 instruct loadConN(iRegNNoSp dst, immN con)
 7260 %{
 7261   match(Set dst con);
 7262 
 7263   ins_cost(INSN_COST * 4);
 7264   format %{ "mov  $dst, $con\t# compressed ptr" %}
 7265 
 7266   ins_encode(aarch64_enc_mov_n(dst, con));
 7267 
 7268   ins_pipe(ialu_imm);
 7269 %}
 7270 
 7271 // Load Narrow Null Pointer Constant
 7272 
 7273 instruct loadConN0(iRegNNoSp dst, immN0 con)
 7274 %{
 7275   match(Set dst con);
 7276 
 7277   ins_cost(INSN_COST);
 7278   format %{ "mov  $dst, $con\t# compressed NULL ptr" %}
 7279 
 7280   ins_encode(aarch64_enc_mov_n0(dst, con));
 7281 
 7282   ins_pipe(ialu_imm);
 7283 %}
 7284 
 7285 // Load Narrow Klass Constant
 7286 
 7287 instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
 7288 %{
 7289   match(Set dst con);
 7290 
 7291   ins_cost(INSN_COST);
 7292   format %{ "mov  $dst, $con\t# compressed klass ptr" %}
 7293 
 7294   ins_encode(aarch64_enc_mov_nk(dst, con));
 7295 
 7296   ins_pipe(ialu_imm);
 7297 %}
 7298 

 8390 %}
 8391 
 8392 // ============================================================================
 8393 // Cast/Convert Instructions
 8394 
 8395 instruct castX2P(iRegPNoSp dst, iRegL src) %{
 8396   match(Set dst (CastX2P src));
 8397 
 8398   ins_cost(INSN_COST);
 8399   format %{ "mov $dst, $src\t# long -> ptr" %}
 8400 
 8401   ins_encode %{
 8402     if ($dst$$reg != $src$$reg) {
 8403       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
 8404     }
 8405   %}
 8406 
 8407   ins_pipe(ialu_reg);
 8408 %}
 8409 















 8410 instruct castP2X(iRegLNoSp dst, iRegP src) %{
 8411   match(Set dst (CastP2X src));
 8412 
 8413   ins_cost(INSN_COST);
 8414   format %{ "mov $dst, $src\t# ptr -> long" %}
 8415 
 8416   ins_encode %{
 8417     if ($dst$$reg != $src$$reg) {
 8418       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
 8419     }
 8420   %}
 8421 
 8422   ins_pipe(ialu_reg);
 8423 %}
 8424 
 8425 // Convert oop into int for vectors alignment masking
 8426 instruct convP2I(iRegINoSp dst, iRegP src) %{
 8427   match(Set dst (ConvL2I (CastP2X src)));
 8428 
 8429   ins_cost(INSN_COST);

15211 
15212   match(Set dst (MoveL2D src));
15213 
15214   effect(DEF dst, USE src);
15215 
15216   ins_cost(INSN_COST);
15217 
15218   format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
15219 
15220   ins_encode %{
15221     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
15222   %}
15223 
15224   ins_pipe(fp_l2d);
15225 
15226 %}
15227 
15228 // ============================================================================
15229 // clearing of an array
15230 
15231 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
15232 %{
15233   match(Set dummy (ClearArray cnt base));
15234   effect(USE_KILL cnt, USE_KILL base, KILL cr);
15235 
15236   ins_cost(4 * INSN_COST);
15237   format %{ "ClearArray $cnt, $base" %}
15238 
15239   ins_encode %{
15240     address tpc = __ zero_words($base$$Register, $cnt$$Register);
15241     if (tpc == NULL) {
15242       ciEnv::current()->record_failure("CodeCache is full");
15243       return;
15244     }
15245   %}
15246 
15247   ins_pipe(pipe_class_memory);
15248 %}
15249 
















15250 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 temp, Universe dummy, rFlagsReg cr)
15251 %{
15252   predicate((uint64_t)n->in(2)->get_long()
15253             < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord));

15254   match(Set dummy (ClearArray cnt base));
15255   effect(TEMP temp, USE_KILL base, KILL cr);
15256 
15257   ins_cost(4 * INSN_COST);
15258   format %{ "ClearArray $cnt, $base" %}
15259 
15260   ins_encode %{
15261     address tpc = __ zero_words($base$$Register, (uint64_t)$cnt$$constant);
15262     if (tpc == NULL) {
15263       ciEnv::current()->record_failure("CodeCache is full");
15264       return;
15265     }
15266   %}
15267 
15268   ins_pipe(pipe_class_memory);
15269 %}
15270 
15271 // ============================================================================
15272 // Overflow Math Instructions
15273 
15274 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
15275 %{
15276   match(Set cr (OverflowAddI op1 op2));
15277 
15278   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
15279   ins_cost(INSN_COST);
15280   ins_encode %{
15281     __ cmnw($op1$$Register, $op2$$Register);
15282   %}

16533 
16534 // Call Runtime Instruction
16535 
16536 instruct CallLeafDirect(method meth)
16537 %{
16538   match(CallLeaf);
16539 
16540   effect(USE meth);
16541 
16542   ins_cost(CALL_COST);
16543 
16544   format %{ "CALL, runtime leaf $meth" %}
16545 
16546   ins_encode( aarch64_enc_java_to_runtime(meth) );
16547 
16548   ins_pipe(pipe_class_call);
16549 %}
16550 
16551 // Call Runtime Instruction
16552 


















16553 instruct CallLeafNoFPDirect(method meth)
16554 %{


16555   match(CallLeafNoFP);
16556 
16557   effect(USE meth);
16558 
16559   ins_cost(CALL_COST);
16560 
16561   format %{ "CALL, runtime leaf nofp $meth" %}
16562 
16563   ins_encode( aarch64_enc_java_to_runtime(meth) );
16564 
16565   ins_pipe(pipe_class_call);
16566 %}
16567 
16568 // Tail Call; Jump from runtime stub to Java code.
16569 // Also known as an 'interprocedural jump'.
16570 // Target of jump will eventually return to caller.
16571 // TailJump below removes the return address.
16572 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_ptr)
16573 %{
16574   match(TailCall jump_target method_ptr);

17107   ins_pipe(pipe_class_memory);
17108 %}
17109 
17110 instruct array_equalsB(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
17111                        iRegP_R3 tmp1, iRegP_R4 tmp2, iRegP_R5 tmp3,
17112                        vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2, vRegD_V3 vtmp3,
17113                        vRegD_V4 vtmp4, vRegD_V5 vtmp5, vRegD_V6 vtmp6, vRegD_V7 vtmp7,
17114                        iRegP_R10 tmp, rFlagsReg cr)
17115 %{
17116   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
17117   match(Set result (AryEq ary1 ary2));
17118   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3,
17119          TEMP vtmp0, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4, TEMP vtmp5,
17120          TEMP vtmp6, TEMP vtmp7, KILL cr);
17121 
17122   format %{ "Array Equals $ary1,ary2 -> $result # KILL $ary1 $ary2 $tmp $tmp1 $tmp2 $tmp3 V0-V7 cr" %}
17123   ins_encode %{
17124     address tpc = __ arrays_equals($ary1$$Register, $ary2$$Register,
17125                                    $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
17126                                    $result$$Register, $tmp$$Register, 1);
17127     if (tpc == NULL) {
17128       ciEnv::current()->record_failure("CodeCache is full");
17129       return;
17130     }
17131   %}
17132   ins_pipe(pipe_class_memory);
17133 %}
17134 
17135 instruct array_equalsC(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
17136                        iRegP_R3 tmp1, iRegP_R4 tmp2, iRegP_R5 tmp3,
17137                        vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2, vRegD_V3 vtmp3,
17138                        vRegD_V4 vtmp4, vRegD_V5 vtmp5, vRegD_V6 vtmp6, vRegD_V7 vtmp7,
17139                        iRegP_R10 tmp, rFlagsReg cr)
17140 %{
17141   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
17142   match(Set result (AryEq ary1 ary2));
17143   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3,
17144          TEMP vtmp0, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4, TEMP vtmp5,
17145          TEMP vtmp6, TEMP vtmp7, KILL cr);
17146 
17147   format %{ "Array Equals $ary1,ary2 -> $result # KILL $ary1 $ary2 $tmp $tmp1 $tmp2 $tmp3 V0-V7 cr" %}
17148   ins_encode %{
17149     address tpc = __ arrays_equals($ary1$$Register, $ary2$$Register,
17150                                    $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
17151                                    $result$$Register, $tmp$$Register, 2);
17152     if (tpc == NULL) {
17153       ciEnv::current()->record_failure("CodeCache is full");
17154       return;
17155     }
17156   %}
17157   ins_pipe(pipe_class_memory);
17158 %}
17159 
17160 instruct count_positives(iRegP_R1 ary1, iRegI_R2 len, iRegI_R0 result, rFlagsReg cr)
17161 %{
17162   match(Set result (CountPositives ary1 len));
17163   effect(USE_KILL ary1, USE_KILL len, KILL cr);
17164   format %{ "count positives byte[] $ary1,$len -> $result" %}
17165   ins_encode %{
17166     address tpc = __ count_positives($ary1$$Register, $len$$Register, $result$$Register);
17167     if (tpc == NULL) {
17168       ciEnv::current()->record_failure("CodeCache is full");
17169       return;
17170     }
17171   %}
17172   ins_pipe( pipe_slow );
17173 %}
17174 
17175 // fast char[] to byte[] compression
17176 instruct string_compress(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
17177                          vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2,
17178                          vRegD_V3 vtmp3, vRegD_V4 vtmp4, vRegD_V5 vtmp5,
17179                          iRegI_R0 result, rFlagsReg cr)
17180 %{
17181   match(Set result (StrCompressedCopy src (Binary dst len)));
17182   effect(TEMP vtmp0, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4, TEMP vtmp5,
17183          USE_KILL src, USE_KILL dst, USE len, KILL cr);
17184 
17185   format %{ "String Compress $src,$dst,$len -> $result # KILL $src $dst V0-V5 cr" %}
17186   ins_encode %{
17187     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,

17190                            $vtmp4$$FloatRegister, $vtmp5$$FloatRegister);
17191   %}
17192   ins_pipe(pipe_slow);
17193 %}
17194 
17195 // fast byte[] to char[] inflation
17196 instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len, iRegP_R3 tmp,
17197                         vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2, vRegD_V3 vtmp3,
17198                         vRegD_V4 vtmp4, vRegD_V5 vtmp5, vRegD_V6 vtmp6, rFlagsReg cr)
17199 %{
17200   match(Set dummy (StrInflatedCopy src (Binary dst len)));
17201   effect(TEMP vtmp0, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3,
17202          TEMP vtmp4, TEMP vtmp5, TEMP vtmp6, TEMP tmp,
17203          USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
17204 
17205   format %{ "String Inflate $src,$dst # KILL $tmp $src $dst $len V0-V6 cr" %}
17206   ins_encode %{
17207     address tpc = __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
17208                                         $vtmp0$$FloatRegister, $vtmp1$$FloatRegister,
17209                                         $vtmp2$$FloatRegister, $tmp$$Register);
17210     if (tpc == NULL) {
17211       ciEnv::current()->record_failure("CodeCache is full");
17212       return;
17213     }
17214   %}
17215   ins_pipe(pipe_class_memory);
17216 %}
17217 
17218 // encode char[] to byte[] in ISO_8859_1
17219 instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
17220                           vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2,
17221                           vRegD_V3 vtmp3, vRegD_V4 vtmp4, vRegD_V5 vtmp5,
17222                           iRegI_R0 result, rFlagsReg cr)
17223 %{
17224   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
17225   match(Set result (EncodeISOArray src (Binary dst len)));
17226   effect(USE_KILL src, USE_KILL dst, USE len, KILL vtmp0, KILL vtmp1,
17227          KILL vtmp2, KILL vtmp3, KILL vtmp4, KILL vtmp5, KILL cr);
17228 
17229   format %{ "Encode ISO array $src,$dst,$len -> $result # KILL $src $dst V0-V5 cr" %}
17230   ins_encode %{

 1220     // registers conditionally reserved.
 1221 
 1222     _ANY_REG32_mask = _ALL_REG32_mask;
 1223     _ANY_REG32_mask.Remove(OptoReg::as_OptoReg(r31_sp->as_VMReg()));
 1224 
 1225     _ANY_REG_mask = _ALL_REG_mask;
 1226 
 1227     _PTR_REG_mask = _ALL_REG_mask;
 1228 
 1229     _NO_SPECIAL_REG32_mask = _ALL_REG32_mask;
 1230     _NO_SPECIAL_REG32_mask.SUBTRACT(_NON_ALLOCATABLE_REG32_mask);
 1231 
 1232     _NO_SPECIAL_REG_mask = _ALL_REG_mask;
 1233     _NO_SPECIAL_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask);
 1234 
 1235     _NO_SPECIAL_PTR_REG_mask = _ALL_REG_mask;
 1236     _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask);
 1237 
 1238     // r27 is not allocatable when compressed oops is on and heapbase is not
 1239     // zero, compressed klass pointers doesn't use r27 after JDK-8234794
 1240     if (UseCompressedOops && (CompressedOops::ptrs_base() != nullptr)) {
 1241       _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(r27->as_VMReg()));
 1242       _NO_SPECIAL_REG_mask.Remove(OptoReg::as_OptoReg(r27->as_VMReg()));
 1243       _NO_SPECIAL_PTR_REG_mask.Remove(OptoReg::as_OptoReg(r27->as_VMReg()));
 1244     }
 1245 
 1246     // r29 is not allocatable when PreserveFramePointer is on
 1247     if (PreserveFramePointer) {
 1248       _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(r29->as_VMReg()));
 1249       _NO_SPECIAL_REG_mask.Remove(OptoReg::as_OptoReg(r29->as_VMReg()));
 1250       _NO_SPECIAL_PTR_REG_mask.Remove(OptoReg::as_OptoReg(r29->as_VMReg()));
 1251     }
 1252   }
 1253 
 1254   // Optimizaton of volatile gets and puts
 1255   // -------------------------------------
 1256   //
 1257   // AArch64 has ldar<x> and stlr<x> instructions which we can safely
 1258   // use to implement volatile reads and writes. For a volatile read
 1259   // we simply need
 1260   //

 1564   bool release = mbvol->trailing_store();
 1565   assert(!release || (mbvol->in(MemBarNode::Precedent)->is_Store() && mbvol->in(MemBarNode::Precedent)->as_Store()->is_release()), "");
 1566 #ifdef ASSERT
 1567   if (release) {
 1568     Node* leading = mbvol->leading_membar();
 1569     assert(leading->Opcode() == Op_MemBarRelease, "");
 1570     assert(leading->as_MemBar()->leading_store(), "");
 1571     assert(leading->as_MemBar()->trailing_membar() == mbvol, "");
 1572   }
 1573 #endif
 1574 
 1575   return release;
 1576 }
 1577 
 1578 // predicates controlling emit of str<x>/stlr<x>
 1579 
 1580 bool needs_releasing_store(const Node *n)
 1581 {
 1582   // assert n->is_Store();
 1583   StoreNode *st = n->as_Store();
 1584   return st->trailing_membar() != nullptr;
 1585 }
 1586 
 1587 // predicate controlling translation of CAS
 1588 //
 1589 // returns true if CAS needs to use an acquiring load otherwise false
 1590 
 1591 bool needs_acquiring_load_exclusive(const Node *n)
 1592 {
 1593   assert(is_CAS(n->Opcode(), true), "expecting a compare and swap");
 1594   LoadStoreNode* ldst = n->as_LoadStore();
 1595   if (is_CAS(n->Opcode(), false)) {
 1596     assert(ldst->trailing_membar() != nullptr, "expected trailing membar");
 1597   } else {
 1598     return ldst->trailing_membar() != nullptr;
 1599   }
 1600 
 1601   // so we can just return true here
 1602   return true;
 1603 }
 1604 
 1605 #define __ _masm.
 1606 
 1607 // advance declarations for helper functions to convert register
 1608 // indices to register objects
 1609 
 1610 // the ad file has to provide implementations of certain methods
 1611 // expected by the generic code
 1612 //
 1613 // REQUIRED FUNCTIONALITY
 1614 
 1615 //=============================================================================
 1616 
 1617 // !!!!! Special hack to get all types of calls to specify the byte offset
 1618 //       from the start of the call to the point where the return address

 1627 
 1628 int MachCallDynamicJavaNode::ret_addr_offset()
 1629 {
 1630   return 16; // movz, movk, movk, bl
 1631 }
 1632 
 1633 int MachCallRuntimeNode::ret_addr_offset() {
 1634   // for generated stubs the call will be
 1635   //   bl(addr)
 1636   // or with far branches
 1637   //   bl(trampoline_stub)
 1638   // for real runtime callouts it will be six instructions
 1639   // see aarch64_enc_java_to_runtime
 1640   //   adr(rscratch2, retaddr)
 1641   //   lea(rscratch1, RuntimeAddress(addr)
 1642   //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
 1643   //   blr(rscratch1)
 1644   CodeBlob *cb = CodeCache::find_blob(_entry_point);
 1645   if (cb) {
 1646     return 1 * NativeInstruction::instruction_size;
 1647   } else if (_entry_point == nullptr) {
 1648     // See CallLeafNoFPIndirect
 1649     return 1 * NativeInstruction::instruction_size;
 1650   } else {
 1651     return 6 * NativeInstruction::instruction_size;
 1652   }
 1653 }
 1654 
 1655 //=============================================================================
 1656 
 1657 #ifndef PRODUCT
 1658 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1659   st->print("BREAKPOINT");
 1660 }
 1661 #endif
 1662 
 1663 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1664   C2_MacroAssembler _masm(&cbuf);
 1665   __ brk(0);
 1666 }
 1667 
 1668 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
 1669   return MachNode::size(ra_);

 1720 
 1721   int framesize = C->output()->frame_slots() << LogBytesPerInt;
 1722 
 1723   if (C->output()->need_stack_bang(framesize))
 1724     st->print("# stack bang size=%d\n\t", framesize);
 1725 
 1726   if (VM_Version::use_rop_protection()) {
 1727     st->print("ldr  zr, [lr]\n\t");
 1728     st->print("paciaz\n\t");
 1729   }
 1730   if (framesize < ((1 << 9) + 2 * wordSize)) {
 1731     st->print("sub  sp, sp, #%d\n\t", framesize);
 1732     st->print("stp  rfp, lr, [sp, #%d]", framesize - 2 * wordSize);
 1733     if (PreserveFramePointer) st->print("\n\tadd  rfp, sp, #%d", framesize - 2 * wordSize);
 1734   } else {
 1735     st->print("stp  lr, rfp, [sp, #%d]!\n\t", -(2 * wordSize));
 1736     if (PreserveFramePointer) st->print("mov  rfp, sp\n\t");
 1737     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
 1738     st->print("sub  sp, sp, rscratch1");
 1739   }
 1740   if (C->stub_function() == nullptr && BarrierSet::barrier_set()->barrier_set_nmethod() != nullptr) {
 1741     st->print("\n\t");
 1742     st->print("ldr  rscratch1, [guard]\n\t");
 1743     st->print("dmb ishld\n\t");
 1744     st->print("ldr  rscratch2, [rthread, #thread_disarmed_guard_value_offset]\n\t");
 1745     st->print("cmp  rscratch1, rscratch2\n\t");
 1746     st->print("b.eq skip");
 1747     st->print("\n\t");
 1748     st->print("blr #nmethod_entry_barrier_stub\n\t");
 1749     st->print("b skip\n\t");
 1750     st->print("guard: int\n\t");
 1751     st->print("\n\t");
 1752     st->print("skip:\n\t");
 1753   }
 1754 }
 1755 #endif
 1756 
 1757 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1758   Compile* C = ra_->C;
 1759   C2_MacroAssembler _masm(&cbuf);
 1760 



 1761   // insert a nop at the start of the prolog so we can patch in a
 1762   // branch if we need to invalidate the method later
 1763   __ nop();
 1764 
 1765   __ verified_entry(C, 0);



 1766 
 1767   if (C->stub_function() == nullptr) {
 1768     __ entry_barrier();


 1769   }
 1770 
 1771   if (!Compile::current()->output()->in_scratch_emit_size()) {
 1772     __ bind(*_verified_entry);





























 1773   }
 1774 
 1775   if (VerifyStackAtCalls) {
 1776     Unimplemented();
 1777   }
 1778 
 1779   C->output()->set_frame_complete(cbuf.insts_size());
 1780 
 1781   if (C->has_mach_constant_base_node()) {
 1782     // NOTE: We set the table base offset here because users might be
 1783     // emitted before MachConstantBaseNode.
 1784     ConstantTable& constant_table = C->output()->constant_table();
 1785     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1786   }
 1787 }
 1788 






 1789 int MachPrologNode::reloc() const
 1790 {
 1791   return 0;
 1792 }
 1793 
 1794 //=============================================================================
 1795 
 1796 #ifndef PRODUCT
 1797 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1798   Compile* C = ra_->C;
 1799   int framesize = C->output()->frame_slots() << LogBytesPerInt;
 1800 
 1801   st->print("# pop frame %d\n\t",framesize);
 1802 
 1803   if (framesize == 0) {
 1804     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
 1805   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
 1806     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
 1807     st->print("add  sp, sp, #%d\n\t", framesize);
 1808   } else {

 1812   }
 1813   if (VM_Version::use_rop_protection()) {
 1814     st->print("autiaz\n\t");
 1815     st->print("ldr  zr, [lr]\n\t");
 1816   }
 1817 
 1818   if (do_polling() && C->is_method_compilation()) {
 1819     st->print("# test polling word\n\t");
 1820     st->print("ldr  rscratch1, [rthread],#%d\n\t", in_bytes(JavaThread::polling_word_offset()));
 1821     st->print("cmp  sp, rscratch1\n\t");
 1822     st->print("bhi #slow_path");
 1823   }
 1824 }
 1825 #endif
 1826 
 1827 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1828   Compile* C = ra_->C;
 1829   C2_MacroAssembler _masm(&cbuf);
 1830   int framesize = C->output()->frame_slots() << LogBytesPerInt;
 1831 
 1832   __ remove_frame(framesize, C->needs_stack_repair());
 1833 
 1834   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1835     __ reserved_stack_check();
 1836   }
 1837 
 1838   if (do_polling() && C->is_method_compilation()) {
 1839     Label dummy_label;
 1840     Label* code_stub = &dummy_label;
 1841     if (!C->output()->in_scratch_emit_size()) {
 1842       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1843       C->output()->add_stub(stub);
 1844       code_stub = &stub->entry();
 1845     }
 1846     __ relocate(relocInfo::poll_return_type);
 1847     __ safepoint_poll(*code_stub, true /* at_return */, false /* acquire */, true /* in_nmethod */);
 1848   }
 1849 }
 1850 





 1851 int MachEpilogNode::reloc() const {
 1852   // Return number of relocatable values contained in this instruction.
 1853   return 1; // 1 for polling page.
 1854 }
 1855 
 1856 const Pipeline * MachEpilogNode::pipeline() const {
 1857   return MachNode::pipeline_class();
 1858 }
 1859 
 1860 //=============================================================================
 1861 
 1862 // Figure out which register class each belongs in: rc_int, rc_float or
 1863 // rc_stack.
 1864 enum RC { rc_bad, rc_int, rc_float, rc_predicate, rc_stack };
 1865 
 1866 static enum RC rc_class(OptoReg::Name reg) {
 1867 
 1868   if (reg == OptoReg::Bad) {
 1869     return rc_bad;
 1870   }

 2091       }
 2092       st->print("\t# vector spill size = %d", vsize);
 2093     } else if (ideal_reg() == Op_RegVectMask) {
 2094       assert(Matcher::supports_scalable_vector(), "bad register type for spill");
 2095       int vsize = Matcher::scalable_predicate_reg_slots() * 32;
 2096       st->print("\t# predicate spill size = %d", vsize);
 2097     } else {
 2098       st->print("\t# spill size = %d", is64 ? 64 : 32);
 2099     }
 2100   }
 2101 
 2102   return 0;
 2103 
 2104 }
 2105 
 2106 #ifndef PRODUCT
 2107 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 2108   if (!ra_)
 2109     st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
 2110   else
 2111     implementation(nullptr, ra_, false, st);
 2112 }
 2113 #endif
 2114 
 2115 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 2116   implementation(&cbuf, ra_, false, nullptr);
 2117 }
 2118 
 2119 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 2120   return MachNode::size(ra_);
 2121 }
 2122 
 2123 //=============================================================================
 2124 
 2125 #ifndef PRODUCT
 2126 void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 2127   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2128   int reg = ra_->get_reg_first(this);
 2129   st->print("add %s, rsp, #%d]\t# box lock",
 2130             Matcher::regName[reg], offset);
 2131 }
 2132 #endif
 2133 
 2134 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 2135   C2_MacroAssembler _masm(&cbuf);
 2136 
 2137   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2138   int reg    = ra_->get_encode(this);
 2139 
 2140   // This add will handle any 24-bit signed offset. 24 bits allows an
 2141   // 8 megabyte stack frame.
 2142   __ add(as_Register(reg), sp, offset);
 2143 }
 2144 
 2145 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
 2146   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
 2147   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2148 
 2149   if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
 2150     return NativeInstruction::instruction_size;
 2151   } else {
 2152     return 2 * NativeInstruction::instruction_size;
 2153   }
 2154 }
 2155 
 2156 ///=============================================================================
 2157 #ifndef PRODUCT
 2158 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2159 {
 2160   st->print_cr("# MachVEPNode");
 2161   if (!_verified) {
 2162     st->print_cr("\t load_class");
 2163   } else {
 2164     st->print_cr("\t unpack_inline_arg");
 2165   }
 2166 }
 2167 #endif
 2168 
 2169 void MachVEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 2170 {
 2171   C2_MacroAssembler _masm(&cbuf);
 2172 
 2173   if (!_verified) {
 2174     Label skip;
 2175     __ cmp_klass(j_rarg0, rscratch2, rscratch1);
 2176     __ br(Assembler::EQ, skip);
 2177       __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 2178     __ bind(skip);
 2179 
 2180   } else {
 2181     // insert a nop at the start of the prolog so we can patch in a
 2182     // branch if we need to invalidate the method later
 2183     __ nop();
 2184 
 2185     // TODO 8284443 Avoid creation of temporary frame
 2186     if (ra_->C->stub_function() == nullptr) {
 2187       __ verified_entry(ra_->C, 0);
 2188       __ entry_barrier();
 2189       int framesize = ra_->C->output()->frame_slots() << LogBytesPerInt;
 2190       __ remove_frame(framesize, false);
 2191     }
 2192     // Unpack inline type args passed as oop and then jump to
 2193     // the verified entry point (skipping the unverified entry).
 2194     int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
 2195     // Emit code for verified entry and save increment for stack repair on return
 2196     __ verified_entry(ra_->C, sp_inc);
 2197     if (Compile::current()->output()->in_scratch_emit_size()) {
 2198       Label dummy_verified_entry;
 2199       __ b(dummy_verified_entry);
 2200     } else {
 2201       __ b(*_verified_entry);
 2202     }
 2203   }
 2204 }
 2205 
 2206 //=============================================================================
 2207 #ifndef PRODUCT
 2208 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2209 {
 2210   st->print_cr("# MachUEPNode");
 2211   if (UseCompressedClassPointers) {
 2212     st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2213     if (CompressedKlassPointers::shift() != 0) {
 2214       st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
 2215     }
 2216   } else {
 2217    st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2218   }
 2219   st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
 2220   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
 2221 }
 2222 #endif
 2223 
 2224 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 2225 {
 2226   // This is the unverified entry point.
 2227   C2_MacroAssembler _masm(&cbuf);
 2228   Label skip;
 2229 
 2230   // UseCompressedClassPointers logic are inside cmp_klass
 2231   __ cmp_klass(j_rarg0, rscratch2, rscratch1);
 2232 
 2233   // TODO
 2234   // can we avoid this skip and still use a reloc?
 2235   __ br(Assembler::EQ, skip);
 2236   __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 2237   __ bind(skip);
 2238 }
 2239 





 2240 // REQUIRED EMIT CODE
 2241 
 2242 //=============================================================================
 2243 
 2244 // Emit exception handler code.
 2245 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
 2246 {
 2247   // mov rscratch1 #exception_blob_entry_point
 2248   // br rscratch1
 2249   // Note that the code buffer's insts_mark is always relative to insts.
 2250   // That's why we must use the macroassembler to generate a handler.
 2251   C2_MacroAssembler _masm(&cbuf);
 2252   address base = __ start_a_stub(size_exception_handler());
 2253   if (base == nullptr) {
 2254     ciEnv::current()->record_failure("CodeCache is full");
 2255     return 0;  // CodeBuffer::expand failed
 2256   }
 2257   int offset = __ offset();
 2258   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
 2259   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
 2260   __ end_a_stub();
 2261   return offset;
 2262 }
 2263 
 2264 // Emit deopt handler code.
 2265 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
 2266 {
 2267   // Note that the code buffer's insts_mark is always relative to insts.
 2268   // That's why we must use the macroassembler to generate a handler.
 2269   C2_MacroAssembler _masm(&cbuf);
 2270   address base = __ start_a_stub(size_deopt_handler());
 2271   if (base == nullptr) {
 2272     ciEnv::current()->record_failure("CodeCache is full");
 2273     return 0;  // CodeBuffer::expand failed
 2274   }
 2275   int offset = __ offset();
 2276 
 2277   __ adr(lr, __ pc());
 2278   __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
 2279 
 2280   assert(__ offset() - offset == (int) size_deopt_handler(), "overflow");
 2281   __ end_a_stub();
 2282   return offset;
 2283 }
 2284 
 2285 // REQUIRED MATCHER CODE
 2286 
 2287 //=============================================================================
 2288 
 2289 bool Matcher::match_rule_supported(int opcode) {
 2290   if (!has_match_rule(opcode))
 2291     return false;

 2394   }
 2395   switch(len) {
 2396     // For 16-bit/32-bit mask vector, reuse VecD.
 2397     case  2:
 2398     case  4:
 2399     case  8: return Op_VecD;
 2400     case 16: return Op_VecX;
 2401   }
 2402   ShouldNotReachHere();
 2403   return 0;
 2404 }
 2405 
 2406 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
 2407   assert(Matcher::is_generic_vector(generic_opnd), "not generic");
 2408   switch (ideal_reg) {
 2409     case Op_VecA: return new vecAOper();
 2410     case Op_VecD: return new vecDOper();
 2411     case Op_VecX: return new vecXOper();
 2412   }
 2413   ShouldNotReachHere();
 2414   return nullptr;
 2415 }
 2416 
 2417 bool Matcher::is_reg2reg_move(MachNode* m) {
 2418   return false;
 2419 }
 2420 
 2421 bool Matcher::is_generic_vector(MachOper* opnd)  {
 2422   return opnd->opcode() == VREG;
 2423 }
 2424 
 2425 // Return whether or not this register is ever used as an argument.
 2426 // This function is used on startup to build the trampoline stubs in
 2427 // generateOptoStub.  Registers not mentioned will be killed by the VM
 2428 // call in the trampoline, and arguments in those registers not be
 2429 // available to the callee.
 2430 bool Matcher::can_be_java_arg(int reg)
 2431 {
 2432   return
 2433     reg ==  R0_num || reg == R0_H_num ||
 2434     reg ==  R1_num || reg == R1_H_num ||

 2567       result = Assembler::VS; break;
 2568     case BoolTest::no_overflow:
 2569       result = Assembler::VC; break;
 2570     default:
 2571       ShouldNotReachHere();
 2572       return Assembler::Condition(-1);
 2573   }
 2574 
 2575   // Check conversion
 2576   if (cond & BoolTest::unsigned_compare) {
 2577     assert(cmpOpUOper((BoolTest::mask)((int)cond & ~(BoolTest::unsigned_compare))).ccode() == result, "Invalid conversion");
 2578   } else {
 2579     assert(cmpOpOper(cond).ccode() == result, "Invalid conversion");
 2580   }
 2581 
 2582   return result;
 2583 }
 2584 
 2585 // Binary src (Replicate con)
 2586 bool is_valid_sve_arith_imm_pattern(Node* n, Node* m) {
 2587   if (n == nullptr || m == nullptr) {
 2588     return false;
 2589   }
 2590 
 2591   if (UseSVE == 0 || m->Opcode() != Op_Replicate) {
 2592     return false;
 2593   }
 2594 
 2595   Node* imm_node = m->in(1);
 2596   if (!imm_node->is_Con()) {
 2597     return false;
 2598   }
 2599 
 2600   const Type* t = imm_node->bottom_type();
 2601   if (!(t->isa_int() || t->isa_long())) {
 2602     return false;
 2603   }
 2604 
 2605   switch (n->Opcode()) {
 2606   case Op_AndV:
 2607   case Op_OrV:
 2608   case Op_XorV: {
 2609     Assembler::SIMD_RegVariant T = Assembler::elemType_to_regVariant(Matcher::vector_element_basic_type(n));
 2610     uint64_t value = t->isa_long() ? (uint64_t)imm_node->get_long() : (uint64_t)imm_node->get_int();
 2611     return Assembler::operand_valid_for_sve_logical_immediate(Assembler::regVariant_to_elemBits(T), value);
 2612   }
 2613   case Op_AddVB:
 2614     return (imm_node->get_int() <= 255 && imm_node->get_int() >= -255);
 2615   case Op_AddVS:
 2616   case Op_AddVI:
 2617     return Assembler::operand_valid_for_sve_add_sub_immediate((int64_t)imm_node->get_int());
 2618   case Op_AddVL:
 2619     return Assembler::operand_valid_for_sve_add_sub_immediate(imm_node->get_long());
 2620   default:
 2621     return false;
 2622   }
 2623 }
 2624 
 2625 // (XorV src (Replicate m1))
 2626 // (XorVMask src (MaskAll m1))
 2627 bool is_vector_bitwise_not_pattern(Node* n, Node* m) {
 2628   if (n != nullptr && m != nullptr) {
 2629     return (n->Opcode() == Op_XorV || n->Opcode() == Op_XorVMask) &&
 2630            VectorNode::is_all_ones_vector(m);
 2631   }
 2632   return false;
 2633 }
 2634 
 2635 // Should the matcher clone input 'm' of node 'n'?
 2636 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
 2637   if (is_vshift_con_pattern(n, m) ||
 2638       is_vector_bitwise_not_pattern(n, m) ||
 2639       is_valid_sve_arith_imm_pattern(n, m)) {
 2640     mstack.push(m, Visit);
 2641     return true;
 2642   }
 2643   return false;
 2644 }
 2645 
 2646 // Should the Matcher clone shifts on addressing modes, expecting them
 2647 // to be subsumed into complex addressing expressions or compute them
 2648 // into registers?

 3414     } else {
 3415       __ movw(dst_reg, con);
 3416     }
 3417   %}
 3418 
 3419   enc_class aarch64_enc_mov_imm(iRegL dst, immL src) %{
 3420     C2_MacroAssembler _masm(&cbuf);
 3421     Register dst_reg = as_Register($dst$$reg);
 3422     uint64_t con = (uint64_t)$src$$constant;
 3423     if (con == 0) {
 3424       __ mov(dst_reg, zr);
 3425     } else {
 3426       __ mov(dst_reg, con);
 3427     }
 3428   %}
 3429 
 3430   enc_class aarch64_enc_mov_p(iRegP dst, immP src) %{
 3431     C2_MacroAssembler _masm(&cbuf);
 3432     Register dst_reg = as_Register($dst$$reg);
 3433     address con = (address)$src$$constant;
 3434     if (con == nullptr || con == (address)1) {
 3435       ShouldNotReachHere();
 3436     } else {
 3437       relocInfo::relocType rtype = $src->constant_reloc();
 3438       if (rtype == relocInfo::oop_type) {
 3439         __ movoop(dst_reg, (jobject)con);
 3440       } else if (rtype == relocInfo::metadata_type) {
 3441         __ mov_metadata(dst_reg, (Metadata*)con);
 3442       } else {
 3443         assert(rtype == relocInfo::none, "unexpected reloc type");
 3444         if (! __ is_valid_AArch64_address(con) ||
 3445             con < (address)(uintptr_t)os::vm_page_size()) {
 3446           __ mov(dst_reg, con);
 3447         } else {
 3448           uint64_t offset;
 3449           __ adrp(dst_reg, con, offset);
 3450           __ add(dst_reg, dst_reg, offset);
 3451         }
 3452       }
 3453     }
 3454   %}

 3457     C2_MacroAssembler _masm(&cbuf);
 3458     Register dst_reg = as_Register($dst$$reg);
 3459     __ mov(dst_reg, zr);
 3460   %}
 3461 
 3462   enc_class aarch64_enc_mov_p1(iRegP dst, immP_1 src) %{
 3463     C2_MacroAssembler _masm(&cbuf);
 3464     Register dst_reg = as_Register($dst$$reg);
 3465     __ mov(dst_reg, (uint64_t)1);
 3466   %}
 3467 
 3468   enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
 3469     C2_MacroAssembler _masm(&cbuf);
 3470     __ load_byte_map_base($dst$$Register);
 3471   %}
 3472 
 3473   enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
 3474     C2_MacroAssembler _masm(&cbuf);
 3475     Register dst_reg = as_Register($dst$$reg);
 3476     address con = (address)$src$$constant;
 3477     if (con == nullptr) {
 3478       ShouldNotReachHere();
 3479     } else {
 3480       relocInfo::relocType rtype = $src->constant_reloc();
 3481       assert(rtype == relocInfo::oop_type, "unexpected reloc type");
 3482       __ set_narrow_oop(dst_reg, (jobject)con);
 3483     }
 3484   %}
 3485 
 3486   enc_class aarch64_enc_mov_n0(iRegN dst, immN0 src) %{
 3487     C2_MacroAssembler _masm(&cbuf);
 3488     Register dst_reg = as_Register($dst$$reg);
 3489     __ mov(dst_reg, zr);
 3490   %}
 3491 
 3492   enc_class aarch64_enc_mov_nk(iRegN dst, immNKlass src) %{
 3493     C2_MacroAssembler _masm(&cbuf);
 3494     Register dst_reg = as_Register($dst$$reg);
 3495     address con = (address)$src$$constant;
 3496     if (con == nullptr) {
 3497       ShouldNotReachHere();
 3498     } else {
 3499       relocInfo::relocType rtype = $src->constant_reloc();
 3500       assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
 3501       __ set_narrow_klass(dst_reg, (Klass *)con);
 3502     }
 3503   %}
 3504 
 3505   // arithmetic encodings
 3506 
 3507   enc_class aarch64_enc_addsubw_imm(iRegI dst, iRegI src1, immIAddSub src2) %{
 3508     C2_MacroAssembler _masm(&cbuf);
 3509     Register dst_reg = as_Register($dst$$reg);
 3510     Register src_reg = as_Register($src1$$reg);
 3511     int32_t con = (int32_t)$src2$$constant;
 3512     // add has primary == 0, subtract has primary == 1
 3513     if ($primary) { con = -con; }
 3514     if (con < 0) {
 3515       __ subw(dst_reg, src_reg, -con);
 3516     } else {

 3659     Label *L = $lbl$$label;
 3660     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
 3661   %}
 3662 
 3663   enc_class aarch64_enc_br_conU(cmpOpU cmp, label lbl) %{
 3664     C2_MacroAssembler _masm(&cbuf);
 3665     Label *L = $lbl$$label;
 3666     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
 3667   %}
 3668 
 3669   enc_class aarch64_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result)
 3670   %{
 3671      Register sub_reg = as_Register($sub$$reg);
 3672      Register super_reg = as_Register($super$$reg);
 3673      Register temp_reg = as_Register($temp$$reg);
 3674      Register result_reg = as_Register($result$$reg);
 3675 
 3676      Label miss;
 3677      C2_MacroAssembler _masm(&cbuf);
 3678      __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
 3679                                      nullptr, &miss,
 3680                                      /*set_cond_codes:*/ true);
 3681      if ($primary) {
 3682        __ mov(result_reg, zr);
 3683      }
 3684      __ bind(miss);
 3685   %}
 3686 
 3687   enc_class aarch64_enc_java_static_call(method meth) %{
 3688     C2_MacroAssembler _masm(&cbuf);
 3689 
 3690     address addr = (address)$meth$$method;
 3691     address call;
 3692     if (!_method) {
 3693       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
 3694       call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type));
 3695       if (call == nullptr) {
 3696         ciEnv::current()->record_failure("CodeCache is full");
 3697         return;
 3698       }
 3699     } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
 3700       // The NOP here is purely to ensure that eliding a call to
 3701       // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
 3702       __ nop();
 3703       __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
 3704     } else {
 3705       int method_index = resolved_method_index(cbuf);
 3706       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 3707                                                   : static_call_Relocation::spec(method_index);
 3708       call = __ trampoline_call(Address(addr, rspec));
 3709       if (call == nullptr) {
 3710         ciEnv::current()->record_failure("CodeCache is full");
 3711         return;
 3712       }
 3713       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 3714         // Calls of the same statically bound method can share
 3715         // a stub to the interpreter.
 3716         cbuf.shared_stub_to_interp_for(_method, call - cbuf.insts_begin());
 3717       } else {
 3718         // Emit stub for static call
 3719         address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, call);
 3720         if (stub == nullptr) {
 3721           ciEnv::current()->record_failure("CodeCache is full");
 3722           return;
 3723         }
 3724       }
 3725     }
 3726 
 3727     __ post_call_nop();
 3728 
 3729     // Only non uncommon_trap calls need to reinitialize ptrue.
 3730     if (Compile::current()->max_vector_size() > 0 && uncommon_trap_request() == 0) {
 3731       __ reinitialize_ptrue();
 3732     }
 3733   %}
 3734 
 3735   enc_class aarch64_enc_java_dynamic_call(method meth) %{
 3736     C2_MacroAssembler _masm(&cbuf);
 3737     int method_index = resolved_method_index(cbuf);
 3738     address call = __ ic_call((address)$meth$$method, method_index);
 3739     if (call == nullptr) {
 3740       ciEnv::current()->record_failure("CodeCache is full");
 3741       return;
 3742     }
 3743     __ post_call_nop();
 3744     if (Compile::current()->max_vector_size() > 0) {
 3745       __ reinitialize_ptrue();
 3746     }
 3747   %}
 3748 
 3749   enc_class aarch64_enc_call_epilog() %{
 3750     C2_MacroAssembler _masm(&cbuf);
 3751     if (VerifyStackAtCalls) {
 3752       // Check that stack depth is unchanged: find majik cookie on stack
 3753       __ call_Unimplemented();
 3754     }
 3755     if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic()) {
 3756       // The last return value is not set by the callee but used to pass IsInit information to compiled code.
 3757       // Search for the corresponding projection, get the register and emit code that initialized it.
 3758       uint con = (tf()->range_cc()->cnt() - 1);
 3759       for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
 3760         ProjNode* proj = fast_out(i)->as_Proj();
 3761         if (proj->_con == con) {
 3762           // Set IsInit if r0 is non-null (a non-null value is returned buffered or scalarized)
 3763           OptoReg::Name optoReg = ra_->get_reg_first(proj);
 3764           VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
 3765           Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
 3766           __ cmp(r0, zr);
 3767           __ cset(toReg, Assembler::NE);
 3768           if (reg->is_stack()) {
 3769             int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
 3770             __ str(toReg, Address(sp, st_off));
 3771           }
 3772           break;
 3773         }
 3774       }
 3775       if (return_value_is_used()) {
 3776         // An inline type is returned as fields in multiple registers.
 3777         // R0 either contains an oop if the inline type is buffered or a pointer
 3778         // to the corresponding InlineKlass with the lowest bit set to 1. Zero r0
 3779         // if the lowest bit is set to allow C2 to use the oop after null checking.
 3780         // r0 &= (r0 & 1) - 1
 3781         __ andr(rscratch1, r0, 0x1);
 3782         __ sub(rscratch1, rscratch1, 0x1);
 3783         __ andr(r0, r0, rscratch1);
 3784       }
 3785     }
 3786   %}
 3787 
 3788   enc_class aarch64_enc_java_to_runtime(method meth) %{
 3789     C2_MacroAssembler _masm(&cbuf);
 3790 
 3791     // some calls to generated routines (arraycopy code) are scheduled
 3792     // by C2 as runtime calls. if so we can call them using a br (they
 3793     // will be in a reachable segment) otherwise we have to use a blr
 3794     // which loads the absolute address into a register.
 3795     address entry = (address)$meth$$method;
 3796     CodeBlob *cb = CodeCache::find_blob(entry);
 3797     if (cb) {
 3798       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
 3799       if (call == nullptr) {
 3800         ciEnv::current()->record_failure("CodeCache is full");
 3801         return;
 3802       }
 3803       __ post_call_nop();
 3804     } else {
 3805       Label retaddr;
 3806       __ adr(rscratch2, retaddr);
 3807       __ lea(rscratch1, RuntimeAddress(entry));
 3808       // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc()
 3809       __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
 3810       __ blr(rscratch1);
 3811       __ bind(retaddr);
 3812       __ post_call_nop();
 3813       __ add(sp, sp, 2 * wordSize);
 3814     }
 3815     if (Compile::current()->max_vector_size() > 0) {
 3816       __ reinitialize_ptrue();
 3817     }
 3818   %}
 3819 

 4678 operand immL_32bits()
 4679 %{
 4680   predicate(n->get_long() == 0xFFFFFFFFL);
 4681   match(ConL);
 4682   op_cost(0);
 4683   format %{ %}
 4684   interface(CONST_INTER);
 4685 %}
 4686 
 4687 // Pointer operands
 4688 // Pointer Immediate
 4689 operand immP()
 4690 %{
 4691   match(ConP);
 4692 
 4693   op_cost(0);
 4694   format %{ %}
 4695   interface(CONST_INTER);
 4696 %}
 4697 
 4698 // nullptr Pointer Immediate
 4699 operand immP0()
 4700 %{
 4701   predicate(n->get_ptr() == 0);
 4702   match(ConP);
 4703 
 4704   op_cost(0);
 4705   format %{ %}
 4706   interface(CONST_INTER);
 4707 %}
 4708 
 4709 // Pointer Immediate One
 4710 // this is used in object initialization (initial object header)
 4711 operand immP_1()
 4712 %{
 4713   predicate(n->get_ptr() == 1);
 4714   match(ConP);
 4715 
 4716   op_cost(0);
 4717   format %{ %}
 4718   interface(CONST_INTER);

 4810 operand immFPacked()
 4811 %{
 4812   predicate(Assembler::operand_valid_for_float_immediate((double)n->getf()));
 4813   match(ConF);
 4814   op_cost(0);
 4815   format %{ %}
 4816   interface(CONST_INTER);
 4817 %}
 4818 
 4819 // Narrow pointer operands
 4820 // Narrow Pointer Immediate
 4821 operand immN()
 4822 %{
 4823   match(ConN);
 4824 
 4825   op_cost(0);
 4826   format %{ %}
 4827   interface(CONST_INTER);
 4828 %}
 4829 
 4830 // Narrow nullptr Pointer Immediate
 4831 operand immN0()
 4832 %{
 4833   predicate(n->get_narrowcon() == 0);
 4834   match(ConN);
 4835 
 4836   op_cost(0);
 4837   format %{ %}
 4838   interface(CONST_INTER);
 4839 %}
 4840 
 4841 operand immNKlass()
 4842 %{
 4843   match(ConNKlass);
 4844 
 4845   op_cost(0);
 4846   format %{ %}
 4847   interface(CONST_INTER);
 4848 %}
 4849 
 4850 // Integer 32 bit Register Operands

 7219 instruct loadConL(iRegLNoSp dst, immL src)
 7220 %{
 7221   match(Set dst src);
 7222 
 7223   ins_cost(INSN_COST);
 7224   format %{ "mov $dst, $src\t# long" %}
 7225 
 7226   ins_encode( aarch64_enc_mov_imm(dst, src) );
 7227 
 7228   ins_pipe(ialu_imm);
 7229 %}
 7230 
 7231 // Load Pointer Constant
 7232 
 7233 instruct loadConP(iRegPNoSp dst, immP con)
 7234 %{
 7235   match(Set dst con);
 7236 
 7237   ins_cost(INSN_COST * 4);
 7238   format %{
 7239     "mov  $dst, $con\t# ptr"
 7240   %}
 7241 
 7242   ins_encode(aarch64_enc_mov_p(dst, con));
 7243 
 7244   ins_pipe(ialu_imm);
 7245 %}
 7246 
 7247 // Load Null Pointer Constant
 7248 
 7249 instruct loadConP0(iRegPNoSp dst, immP0 con)
 7250 %{
 7251   match(Set dst con);
 7252 
 7253   ins_cost(INSN_COST);
 7254   format %{ "mov  $dst, $con\t# nullptr ptr" %}
 7255 
 7256   ins_encode(aarch64_enc_mov_p0(dst, con));
 7257 
 7258   ins_pipe(ialu_imm);
 7259 %}
 7260 
 7261 // Load Pointer Constant One
 7262 
 7263 instruct loadConP1(iRegPNoSp dst, immP_1 con)
 7264 %{
 7265   match(Set dst con);
 7266 
 7267   ins_cost(INSN_COST);
 7268   format %{ "mov  $dst, $con\t# nullptr ptr" %}
 7269 
 7270   ins_encode(aarch64_enc_mov_p1(dst, con));
 7271 
 7272   ins_pipe(ialu_imm);
 7273 %}
 7274 
 7275 // Load Byte Map Base Constant
 7276 
 7277 instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
 7278 %{
 7279   match(Set dst con);
 7280 
 7281   ins_cost(INSN_COST);
 7282   format %{ "adr  $dst, $con\t# Byte Map Base" %}
 7283 
 7284   ins_encode(aarch64_enc_mov_byte_map_base(dst, con));
 7285 
 7286   ins_pipe(ialu_imm);
 7287 %}
 7288 

 7290 
 7291 instruct loadConN(iRegNNoSp dst, immN con)
 7292 %{
 7293   match(Set dst con);
 7294 
 7295   ins_cost(INSN_COST * 4);
 7296   format %{ "mov  $dst, $con\t# compressed ptr" %}
 7297 
 7298   ins_encode(aarch64_enc_mov_n(dst, con));
 7299 
 7300   ins_pipe(ialu_imm);
 7301 %}
 7302 
 7303 // Load Narrow Null Pointer Constant
 7304 
 7305 instruct loadConN0(iRegNNoSp dst, immN0 con)
 7306 %{
 7307   match(Set dst con);
 7308 
 7309   ins_cost(INSN_COST);
 7310   format %{ "mov  $dst, $con\t# compressed nullptr ptr" %}
 7311 
 7312   ins_encode(aarch64_enc_mov_n0(dst, con));
 7313 
 7314   ins_pipe(ialu_imm);
 7315 %}
 7316 
 7317 // Load Narrow Klass Constant
 7318 
 7319 instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
 7320 %{
 7321   match(Set dst con);
 7322 
 7323   ins_cost(INSN_COST);
 7324   format %{ "mov  $dst, $con\t# compressed klass ptr" %}
 7325 
 7326   ins_encode(aarch64_enc_mov_nk(dst, con));
 7327 
 7328   ins_pipe(ialu_imm);
 7329 %}
 7330 

 8422 %}
 8423 
 8424 // ============================================================================
 8425 // Cast/Convert Instructions
 8426 
 8427 instruct castX2P(iRegPNoSp dst, iRegL src) %{
 8428   match(Set dst (CastX2P src));
 8429 
 8430   ins_cost(INSN_COST);
 8431   format %{ "mov $dst, $src\t# long -> ptr" %}
 8432 
 8433   ins_encode %{
 8434     if ($dst$$reg != $src$$reg) {
 8435       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
 8436     }
 8437   %}
 8438 
 8439   ins_pipe(ialu_reg);
 8440 %}
 8441 
 8442 instruct castN2X(iRegLNoSp dst, iRegN src) %{
 8443   match(Set dst (CastP2X src));
 8444 
 8445   ins_cost(INSN_COST);
 8446   format %{ "mov $dst, $src\t# ptr -> long" %}
 8447 
 8448   ins_encode %{
 8449     if ($dst$$reg != $src$$reg) {
 8450       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
 8451     }
 8452   %}
 8453 
 8454   ins_pipe(ialu_reg);
 8455 %}
 8456 
 8457 instruct castP2X(iRegLNoSp dst, iRegP src) %{
 8458   match(Set dst (CastP2X src));
 8459 
 8460   ins_cost(INSN_COST);
 8461   format %{ "mov $dst, $src\t# ptr -> long" %}
 8462 
 8463   ins_encode %{
 8464     if ($dst$$reg != $src$$reg) {
 8465       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
 8466     }
 8467   %}
 8468 
 8469   ins_pipe(ialu_reg);
 8470 %}
 8471 
 8472 // Convert oop into int for vectors alignment masking
 8473 instruct convP2I(iRegINoSp dst, iRegP src) %{
 8474   match(Set dst (ConvL2I (CastP2X src)));
 8475 
 8476   ins_cost(INSN_COST);

15258 
15259   match(Set dst (MoveL2D src));
15260 
15261   effect(DEF dst, USE src);
15262 
15263   ins_cost(INSN_COST);
15264 
15265   format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
15266 
15267   ins_encode %{
15268     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
15269   %}
15270 
15271   ins_pipe(fp_l2d);
15272 
15273 %}
15274 
15275 // ============================================================================
15276 // clearing of an array
15277 
15278 instruct clearArray_reg_reg_immL0(iRegL_R11 cnt, iRegP_R10 base, immL0 zero, Universe dummy, rFlagsReg cr)
15279 %{
15280   match(Set dummy (ClearArray (Binary cnt base) zero));
15281   effect(USE_KILL cnt, USE_KILL base, KILL cr);
15282 
15283   ins_cost(4 * INSN_COST);
15284   format %{ "ClearArray $cnt, $base" %}
15285 
15286   ins_encode %{
15287     address tpc = __ zero_words($base$$Register, $cnt$$Register);
15288     if (tpc == nullptr) {
15289       ciEnv::current()->record_failure("CodeCache is full");
15290       return;
15291     }
15292   %}
15293 
15294   ins_pipe(pipe_class_memory);
15295 %}
15296 
15297 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, iRegL val, Universe dummy, rFlagsReg cr)
15298 %{
15299   predicate(((ClearArrayNode*)n)->word_copy_only());
15300   match(Set dummy (ClearArray (Binary cnt base) val));
15301   effect(USE_KILL cnt, USE_KILL base, KILL cr);
15302 
15303   ins_cost(4 * INSN_COST);
15304   format %{ "ClearArray $cnt, $base, $val" %}
15305 
15306   ins_encode %{
15307     __ fill_words($base$$Register, $cnt$$Register, $val$$Register);
15308   %}
15309 
15310   ins_pipe(pipe_class_memory);
15311 %}
15312 
15313 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 temp, Universe dummy, rFlagsReg cr)
15314 %{
15315   predicate((uint64_t)n->in(2)->get_long()
15316             < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord)
15317             && !((ClearArrayNode*)n)->word_copy_only());
15318   match(Set dummy (ClearArray cnt base));
15319   effect(TEMP temp, USE_KILL base, KILL cr);
15320 
15321   ins_cost(4 * INSN_COST);
15322   format %{ "ClearArray $cnt, $base" %}
15323 
15324   ins_encode %{
15325     address tpc = __ zero_words($base$$Register, (uint64_t)$cnt$$constant);
15326     if (tpc == nullptr) {
15327       ciEnv::current()->record_failure("CodeCache is full");
15328       return;
15329     }
15330   %}
15331 
15332   ins_pipe(pipe_class_memory);
15333 %}
15334 
15335 // ============================================================================
15336 // Overflow Math Instructions
15337 
15338 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
15339 %{
15340   match(Set cr (OverflowAddI op1 op2));
15341 
15342   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
15343   ins_cost(INSN_COST);
15344   ins_encode %{
15345     __ cmnw($op1$$Register, $op2$$Register);
15346   %}

16597 
16598 // Call Runtime Instruction
16599 
16600 instruct CallLeafDirect(method meth)
16601 %{
16602   match(CallLeaf);
16603 
16604   effect(USE meth);
16605 
16606   ins_cost(CALL_COST);
16607 
16608   format %{ "CALL, runtime leaf $meth" %}
16609 
16610   ins_encode( aarch64_enc_java_to_runtime(meth) );
16611 
16612   ins_pipe(pipe_class_call);
16613 %}
16614 
16615 // Call Runtime Instruction
16616 
16617 // entry point is null, target holds the address to call
16618 instruct CallLeafNoFPIndirect(iRegP target)
16619 %{
16620   predicate(n->as_Call()->entry_point() == nullptr);
16621 
16622   match(CallLeafNoFP target);
16623 
16624   ins_cost(CALL_COST);
16625 
16626   format %{ "CALL, runtime leaf nofp indirect $target" %}
16627 
16628   ins_encode %{
16629     __ blr($target$$Register);
16630   %}
16631 
16632   ins_pipe(pipe_class_call);
16633 %}
16634 
16635 instruct CallLeafNoFPDirect(method meth)
16636 %{
16637   predicate(n->as_Call()->entry_point() != nullptr);
16638 
16639   match(CallLeafNoFP);
16640 
16641   effect(USE meth);
16642 
16643   ins_cost(CALL_COST);
16644 
16645   format %{ "CALL, runtime leaf nofp $meth" %}
16646 
16647   ins_encode( aarch64_enc_java_to_runtime(meth) );
16648 
16649   ins_pipe(pipe_class_call);
16650 %}
16651 
16652 // Tail Call; Jump from runtime stub to Java code.
16653 // Also known as an 'interprocedural jump'.
16654 // Target of jump will eventually return to caller.
16655 // TailJump below removes the return address.
16656 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_ptr)
16657 %{
16658   match(TailCall jump_target method_ptr);

17191   ins_pipe(pipe_class_memory);
17192 %}
17193 
17194 instruct array_equalsB(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
17195                        iRegP_R3 tmp1, iRegP_R4 tmp2, iRegP_R5 tmp3,
17196                        vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2, vRegD_V3 vtmp3,
17197                        vRegD_V4 vtmp4, vRegD_V5 vtmp5, vRegD_V6 vtmp6, vRegD_V7 vtmp7,
17198                        iRegP_R10 tmp, rFlagsReg cr)
17199 %{
17200   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
17201   match(Set result (AryEq ary1 ary2));
17202   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3,
17203          TEMP vtmp0, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4, TEMP vtmp5,
17204          TEMP vtmp6, TEMP vtmp7, KILL cr);
17205 
17206   format %{ "Array Equals $ary1,ary2 -> $result # KILL $ary1 $ary2 $tmp $tmp1 $tmp2 $tmp3 V0-V7 cr" %}
17207   ins_encode %{
17208     address tpc = __ arrays_equals($ary1$$Register, $ary2$$Register,
17209                                    $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
17210                                    $result$$Register, $tmp$$Register, 1);
17211     if (tpc == nullptr) {
17212       ciEnv::current()->record_failure("CodeCache is full");
17213       return;
17214     }
17215   %}
17216   ins_pipe(pipe_class_memory);
17217 %}
17218 
17219 instruct array_equalsC(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
17220                        iRegP_R3 tmp1, iRegP_R4 tmp2, iRegP_R5 tmp3,
17221                        vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2, vRegD_V3 vtmp3,
17222                        vRegD_V4 vtmp4, vRegD_V5 vtmp5, vRegD_V6 vtmp6, vRegD_V7 vtmp7,
17223                        iRegP_R10 tmp, rFlagsReg cr)
17224 %{
17225   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
17226   match(Set result (AryEq ary1 ary2));
17227   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3,
17228          TEMP vtmp0, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4, TEMP vtmp5,
17229          TEMP vtmp6, TEMP vtmp7, KILL cr);
17230 
17231   format %{ "Array Equals $ary1,ary2 -> $result # KILL $ary1 $ary2 $tmp $tmp1 $tmp2 $tmp3 V0-V7 cr" %}
17232   ins_encode %{
17233     address tpc = __ arrays_equals($ary1$$Register, $ary2$$Register,
17234                                    $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
17235                                    $result$$Register, $tmp$$Register, 2);
17236     if (tpc == nullptr) {
17237       ciEnv::current()->record_failure("CodeCache is full");
17238       return;
17239     }
17240   %}
17241   ins_pipe(pipe_class_memory);
17242 %}
17243 
17244 instruct count_positives(iRegP_R1 ary1, iRegI_R2 len, iRegI_R0 result, rFlagsReg cr)
17245 %{
17246   match(Set result (CountPositives ary1 len));
17247   effect(USE_KILL ary1, USE_KILL len, KILL cr);
17248   format %{ "count positives byte[] $ary1,$len -> $result" %}
17249   ins_encode %{
17250     address tpc = __ count_positives($ary1$$Register, $len$$Register, $result$$Register);
17251     if (tpc == nullptr) {
17252       ciEnv::current()->record_failure("CodeCache is full");
17253       return;
17254     }
17255   %}
17256   ins_pipe( pipe_slow );
17257 %}
17258 
17259 // fast char[] to byte[] compression
17260 instruct string_compress(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
17261                          vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2,
17262                          vRegD_V3 vtmp3, vRegD_V4 vtmp4, vRegD_V5 vtmp5,
17263                          iRegI_R0 result, rFlagsReg cr)
17264 %{
17265   match(Set result (StrCompressedCopy src (Binary dst len)));
17266   effect(TEMP vtmp0, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4, TEMP vtmp5,
17267          USE_KILL src, USE_KILL dst, USE len, KILL cr);
17268 
17269   format %{ "String Compress $src,$dst,$len -> $result # KILL $src $dst V0-V5 cr" %}
17270   ins_encode %{
17271     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,

17274                            $vtmp4$$FloatRegister, $vtmp5$$FloatRegister);
17275   %}
17276   ins_pipe(pipe_slow);
17277 %}
17278 
17279 // fast byte[] to char[] inflation
17280 instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len, iRegP_R3 tmp,
17281                         vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2, vRegD_V3 vtmp3,
17282                         vRegD_V4 vtmp4, vRegD_V5 vtmp5, vRegD_V6 vtmp6, rFlagsReg cr)
17283 %{
17284   match(Set dummy (StrInflatedCopy src (Binary dst len)));
17285   effect(TEMP vtmp0, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3,
17286          TEMP vtmp4, TEMP vtmp5, TEMP vtmp6, TEMP tmp,
17287          USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
17288 
17289   format %{ "String Inflate $src,$dst # KILL $tmp $src $dst $len V0-V6 cr" %}
17290   ins_encode %{
17291     address tpc = __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
17292                                         $vtmp0$$FloatRegister, $vtmp1$$FloatRegister,
17293                                         $vtmp2$$FloatRegister, $tmp$$Register);
17294     if (tpc == nullptr) {
17295       ciEnv::current()->record_failure("CodeCache is full");
17296       return;
17297     }
17298   %}
17299   ins_pipe(pipe_class_memory);
17300 %}
17301 
17302 // encode char[] to byte[] in ISO_8859_1
17303 instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
17304                           vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2,
17305                           vRegD_V3 vtmp3, vRegD_V4 vtmp4, vRegD_V5 vtmp5,
17306                           iRegI_R0 result, rFlagsReg cr)
17307 %{
17308   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
17309   match(Set result (EncodeISOArray src (Binary dst len)));
17310   effect(USE_KILL src, USE_KILL dst, USE len, KILL vtmp0, KILL vtmp1,
17311          KILL vtmp2, KILL vtmp3, KILL vtmp4, KILL vtmp5, KILL cr);
17312 
17313   format %{ "Encode ISO array $src,$dst,$len -> $result # KILL $src $dst V0-V5 cr" %}
17314   ins_encode %{
< prev index next >