1220 // registers conditionally reserved.
1221
1222 _ANY_REG32_mask = _ALL_REG32_mask;
1223 _ANY_REG32_mask.Remove(OptoReg::as_OptoReg(r31_sp->as_VMReg()));
1224
1225 _ANY_REG_mask = _ALL_REG_mask;
1226
1227 _PTR_REG_mask = _ALL_REG_mask;
1228
1229 _NO_SPECIAL_REG32_mask = _ALL_REG32_mask;
1230 _NO_SPECIAL_REG32_mask.SUBTRACT(_NON_ALLOCATABLE_REG32_mask);
1231
1232 _NO_SPECIAL_REG_mask = _ALL_REG_mask;
1233 _NO_SPECIAL_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask);
1234
1235 _NO_SPECIAL_PTR_REG_mask = _ALL_REG_mask;
1236 _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask);
1237
1238 // r27 is not allocatable when compressed oops is on and heapbase is not
1239 // zero, compressed klass pointers doesn't use r27 after JDK-8234794
1240 if (UseCompressedOops && (CompressedOops::ptrs_base() != NULL)) {
1241 _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(r27->as_VMReg()));
1242 _NO_SPECIAL_REG_mask.Remove(OptoReg::as_OptoReg(r27->as_VMReg()));
1243 _NO_SPECIAL_PTR_REG_mask.Remove(OptoReg::as_OptoReg(r27->as_VMReg()));
1244 }
1245
1246 // r29 is not allocatable when PreserveFramePointer is on
1247 if (PreserveFramePointer) {
1248 _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(r29->as_VMReg()));
1249 _NO_SPECIAL_REG_mask.Remove(OptoReg::as_OptoReg(r29->as_VMReg()));
1250 _NO_SPECIAL_PTR_REG_mask.Remove(OptoReg::as_OptoReg(r29->as_VMReg()));
1251 }
1252 }
1253
1254 // Optimizaton of volatile gets and puts
1255 // -------------------------------------
1256 //
1257 // AArch64 has ldar<x> and stlr<x> instructions which we can safely
1258 // use to implement volatile reads and writes. For a volatile read
1259 // we simply need
1260 //
1564 bool release = mbvol->trailing_store();
1565 assert(!release || (mbvol->in(MemBarNode::Precedent)->is_Store() && mbvol->in(MemBarNode::Precedent)->as_Store()->is_release()), "");
1566 #ifdef ASSERT
1567 if (release) {
1568 Node* leading = mbvol->leading_membar();
1569 assert(leading->Opcode() == Op_MemBarRelease, "");
1570 assert(leading->as_MemBar()->leading_store(), "");
1571 assert(leading->as_MemBar()->trailing_membar() == mbvol, "");
1572 }
1573 #endif
1574
1575 return release;
1576 }
1577
1578 // predicates controlling emit of str<x>/stlr<x>
1579
1580 bool needs_releasing_store(const Node *n)
1581 {
1582 // assert n->is_Store();
1583 StoreNode *st = n->as_Store();
1584 return st->trailing_membar() != NULL;
1585 }
1586
1587 // predicate controlling translation of CAS
1588 //
1589 // returns true if CAS needs to use an acquiring load otherwise false
1590
1591 bool needs_acquiring_load_exclusive(const Node *n)
1592 {
1593 assert(is_CAS(n->Opcode(), true), "expecting a compare and swap");
1594 LoadStoreNode* ldst = n->as_LoadStore();
1595 if (is_CAS(n->Opcode(), false)) {
1596 assert(ldst->trailing_membar() != NULL, "expected trailing membar");
1597 } else {
1598 return ldst->trailing_membar() != NULL;
1599 }
1600
1601 // so we can just return true here
1602 return true;
1603 }
1604
1605 #define __ _masm.
1606
1607 // advance declarations for helper functions to convert register
1608 // indices to register objects
1609
1610 // the ad file has to provide implementations of certain methods
1611 // expected by the generic code
1612 //
1613 // REQUIRED FUNCTIONALITY
1614
1615 //=============================================================================
1616
1617 // !!!!! Special hack to get all types of calls to specify the byte offset
1618 // from the start of the call to the point where the return address
1627
1628 int MachCallDynamicJavaNode::ret_addr_offset()
1629 {
1630 return 16; // movz, movk, movk, bl
1631 }
1632
1633 int MachCallRuntimeNode::ret_addr_offset() {
1634 // for generated stubs the call will be
1635 // bl(addr)
1636 // or with far branches
1637 // bl(trampoline_stub)
1638 // for real runtime callouts it will be six instructions
1639 // see aarch64_enc_java_to_runtime
1640 // adr(rscratch2, retaddr)
1641 // lea(rscratch1, RuntimeAddress(addr)
1642 // stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
1643 // blr(rscratch1)
1644 CodeBlob *cb = CodeCache::find_blob(_entry_point);
1645 if (cb) {
1646 return 1 * NativeInstruction::instruction_size;
1647 } else {
1648 return 6 * NativeInstruction::instruction_size;
1649 }
1650 }
1651
1652 //=============================================================================
1653
1654 #ifndef PRODUCT
1655 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1656 st->print("BREAKPOINT");
1657 }
1658 #endif
1659
1660 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1661 C2_MacroAssembler _masm(&cbuf);
1662 __ brk(0);
1663 }
1664
1665 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
1666 return MachNode::size(ra_);
1717
1718 int framesize = C->output()->frame_slots() << LogBytesPerInt;
1719
1720 if (C->output()->need_stack_bang(framesize))
1721 st->print("# stack bang size=%d\n\t", framesize);
1722
1723 if (VM_Version::use_rop_protection()) {
1724 st->print("ldr zr, [lr]\n\t");
1725 st->print("paciaz\n\t");
1726 }
1727 if (framesize < ((1 << 9) + 2 * wordSize)) {
1728 st->print("sub sp, sp, #%d\n\t", framesize);
1729 st->print("stp rfp, lr, [sp, #%d]", framesize - 2 * wordSize);
1730 if (PreserveFramePointer) st->print("\n\tadd rfp, sp, #%d", framesize - 2 * wordSize);
1731 } else {
1732 st->print("stp lr, rfp, [sp, #%d]!\n\t", -(2 * wordSize));
1733 if (PreserveFramePointer) st->print("mov rfp, sp\n\t");
1734 st->print("mov rscratch1, #%d\n\t", framesize - 2 * wordSize);
1735 st->print("sub sp, sp, rscratch1");
1736 }
1737 if (C->stub_function() == NULL && BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
1738 st->print("\n\t");
1739 st->print("ldr rscratch1, [guard]\n\t");
1740 st->print("dmb ishld\n\t");
1741 st->print("ldr rscratch2, [rthread, #thread_disarmed_guard_value_offset]\n\t");
1742 st->print("cmp rscratch1, rscratch2\n\t");
1743 st->print("b.eq skip");
1744 st->print("\n\t");
1745 st->print("blr #nmethod_entry_barrier_stub\n\t");
1746 st->print("b skip\n\t");
1747 st->print("guard: int\n\t");
1748 st->print("\n\t");
1749 st->print("skip:\n\t");
1750 }
1751 }
1752 #endif
1753
1754 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1755 Compile* C = ra_->C;
1756 C2_MacroAssembler _masm(&cbuf);
1757
1758 // n.b. frame size includes space for return pc and rfp
1759 const int framesize = C->output()->frame_size_in_bytes();
1760
1761 // insert a nop at the start of the prolog so we can patch in a
1762 // branch if we need to invalidate the method later
1763 __ nop();
1764
1765 if (C->clinit_barrier_on_entry()) {
1766 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
1767
1768 Label L_skip_barrier;
1769
1770 __ mov_metadata(rscratch2, C->method()->holder()->constant_encoding());
1771 __ clinit_barrier(rscratch2, rscratch1, &L_skip_barrier);
1772 __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
1773 __ bind(L_skip_barrier);
1774 }
1775
1776 if (C->max_vector_size() > 0) {
1777 __ reinitialize_ptrue();
1778 }
1779
1780 int bangsize = C->output()->bang_size_in_bytes();
1781 if (C->output()->need_stack_bang(bangsize))
1782 __ generate_stack_overflow_check(bangsize);
1783
1784 __ build_frame(framesize);
1785
1786 if (C->stub_function() == NULL) {
1787 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
1788 if (BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
1789 // Dummy labels for just measuring the code size
1790 Label dummy_slow_path;
1791 Label dummy_continuation;
1792 Label dummy_guard;
1793 Label* slow_path = &dummy_slow_path;
1794 Label* continuation = &dummy_continuation;
1795 Label* guard = &dummy_guard;
1796 if (!Compile::current()->output()->in_scratch_emit_size()) {
1797 // Use real labels from actual stub when not emitting code for the purpose of measuring its size
1798 C2EntryBarrierStub* stub = new (Compile::current()->comp_arena()) C2EntryBarrierStub();
1799 Compile::current()->output()->add_stub(stub);
1800 slow_path = &stub->entry();
1801 continuation = &stub->continuation();
1802 guard = &stub->guard();
1803 }
1804 // In the C2 code, we move the non-hot part of nmethod entry barriers out-of-line to a stub.
1805 bs->nmethod_entry_barrier(&_masm, slow_path, continuation, guard);
1806 }
1807 }
1808
1809 if (VerifyStackAtCalls) {
1810 Unimplemented();
1811 }
1812
1813 C->output()->set_frame_complete(cbuf.insts_size());
1814
1815 if (C->has_mach_constant_base_node()) {
1816 // NOTE: We set the table base offset here because users might be
1817 // emitted before MachConstantBaseNode.
1818 ConstantTable& constant_table = C->output()->constant_table();
1819 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1820 }
1821 }
1822
1823 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1824 {
1825 return MachNode::size(ra_); // too many variables; just compute it
1826 // the hard way
1827 }
1828
1829 int MachPrologNode::reloc() const
1830 {
1831 return 0;
1832 }
1833
1834 //=============================================================================
1835
1836 #ifndef PRODUCT
1837 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1838 Compile* C = ra_->C;
1839 int framesize = C->output()->frame_slots() << LogBytesPerInt;
1840
1841 st->print("# pop frame %d\n\t",framesize);
1842
1843 if (framesize == 0) {
1844 st->print("ldp lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1845 } else if (framesize < ((1 << 9) + 2 * wordSize)) {
1846 st->print("ldp lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
1847 st->print("add sp, sp, #%d\n\t", framesize);
1848 } else {
1852 }
1853 if (VM_Version::use_rop_protection()) {
1854 st->print("autiaz\n\t");
1855 st->print("ldr zr, [lr]\n\t");
1856 }
1857
1858 if (do_polling() && C->is_method_compilation()) {
1859 st->print("# test polling word\n\t");
1860 st->print("ldr rscratch1, [rthread],#%d\n\t", in_bytes(JavaThread::polling_word_offset()));
1861 st->print("cmp sp, rscratch1\n\t");
1862 st->print("bhi #slow_path");
1863 }
1864 }
1865 #endif
1866
1867 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1868 Compile* C = ra_->C;
1869 C2_MacroAssembler _masm(&cbuf);
1870 int framesize = C->output()->frame_slots() << LogBytesPerInt;
1871
1872 __ remove_frame(framesize);
1873
1874 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1875 __ reserved_stack_check();
1876 }
1877
1878 if (do_polling() && C->is_method_compilation()) {
1879 Label dummy_label;
1880 Label* code_stub = &dummy_label;
1881 if (!C->output()->in_scratch_emit_size()) {
1882 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1883 C->output()->add_stub(stub);
1884 code_stub = &stub->entry();
1885 }
1886 __ relocate(relocInfo::poll_return_type);
1887 __ safepoint_poll(*code_stub, true /* at_return */, false /* acquire */, true /* in_nmethod */);
1888 }
1889 }
1890
1891 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
1892 // Variable size. Determine dynamically.
1893 return MachNode::size(ra_);
1894 }
1895
1896 int MachEpilogNode::reloc() const {
1897 // Return number of relocatable values contained in this instruction.
1898 return 1; // 1 for polling page.
1899 }
1900
1901 const Pipeline * MachEpilogNode::pipeline() const {
1902 return MachNode::pipeline_class();
1903 }
1904
1905 //=============================================================================
1906
1907 // Figure out which register class each belongs in: rc_int, rc_float or
1908 // rc_stack.
1909 enum RC { rc_bad, rc_int, rc_float, rc_predicate, rc_stack };
1910
1911 static enum RC rc_class(OptoReg::Name reg) {
1912
1913 if (reg == OptoReg::Bad) {
1914 return rc_bad;
1915 }
2136 }
2137 st->print("\t# vector spill size = %d", vsize);
2138 } else if (ideal_reg() == Op_RegVectMask) {
2139 assert(Matcher::supports_scalable_vector(), "bad register type for spill");
2140 int vsize = Matcher::scalable_predicate_reg_slots() * 32;
2141 st->print("\t# predicate spill size = %d", vsize);
2142 } else {
2143 st->print("\t# spill size = %d", is64 ? 64 : 32);
2144 }
2145 }
2146
2147 return 0;
2148
2149 }
2150
2151 #ifndef PRODUCT
2152 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2153 if (!ra_)
2154 st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
2155 else
2156 implementation(NULL, ra_, false, st);
2157 }
2158 #endif
2159
2160 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2161 implementation(&cbuf, ra_, false, NULL);
2162 }
2163
2164 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
2165 return MachNode::size(ra_);
2166 }
2167
2168 //=============================================================================
2169
2170 #ifndef PRODUCT
2171 void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2172 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2173 int reg = ra_->get_reg_first(this);
2174 st->print("add %s, rsp, #%d]\t# box lock",
2175 Matcher::regName[reg], offset);
2176 }
2177 #endif
2178
2179 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2180 C2_MacroAssembler _masm(&cbuf);
2181
2182 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2183 int reg = ra_->get_encode(this);
2184
2185 // This add will handle any 24-bit signed offset. 24 bits allows an
2186 // 8 megabyte stack frame.
2187 __ add(as_Register(reg), sp, offset);
2188 }
2189
2190 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
2191 // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
2192 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2193
2194 if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
2195 return NativeInstruction::instruction_size;
2196 } else {
2197 return 2 * NativeInstruction::instruction_size;
2198 }
2199 }
2200
2201 //=============================================================================
2202
2203 #ifndef PRODUCT
2204 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2205 {
2206 st->print_cr("# MachUEPNode");
2207 if (UseCompressedClassPointers) {
2208 st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2209 if (CompressedKlassPointers::shift() != 0) {
2210 st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
2211 }
2212 } else {
2213 st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2214 }
2215 st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
2216 st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
2217 }
2218 #endif
2219
2220 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
2221 {
2222 // This is the unverified entry point.
2223 C2_MacroAssembler _masm(&cbuf);
2224
2225 __ cmp_klass(j_rarg0, rscratch2, rscratch1);
2226 Label skip;
2227 // TODO
2228 // can we avoid this skip and still use a reloc?
2229 __ br(Assembler::EQ, skip);
2230 __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
2231 __ bind(skip);
2232 }
2233
2234 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
2235 {
2236 return MachNode::size(ra_);
2237 }
2238
2239 // REQUIRED EMIT CODE
2240
2241 //=============================================================================
2242
2243 // Emit exception handler code.
2244 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
2245 {
2246 // mov rscratch1 #exception_blob_entry_point
2247 // br rscratch1
2248 // Note that the code buffer's insts_mark is always relative to insts.
2249 // That's why we must use the macroassembler to generate a handler.
2250 C2_MacroAssembler _masm(&cbuf);
2251 address base = __ start_a_stub(size_exception_handler());
2252 if (base == NULL) {
2253 ciEnv::current()->record_failure("CodeCache is full");
2254 return 0; // CodeBuffer::expand failed
2255 }
2256 int offset = __ offset();
2257 __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
2258 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
2259 __ end_a_stub();
2260 return offset;
2261 }
2262
2263 // Emit deopt handler code.
2264 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
2265 {
2266 // Note that the code buffer's insts_mark is always relative to insts.
2267 // That's why we must use the macroassembler to generate a handler.
2268 C2_MacroAssembler _masm(&cbuf);
2269 address base = __ start_a_stub(size_deopt_handler());
2270 if (base == NULL) {
2271 ciEnv::current()->record_failure("CodeCache is full");
2272 return 0; // CodeBuffer::expand failed
2273 }
2274 int offset = __ offset();
2275
2276 __ adr(lr, __ pc());
2277 __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
2278
2279 assert(__ offset() - offset == (int) size_deopt_handler(), "overflow");
2280 __ end_a_stub();
2281 return offset;
2282 }
2283
2284 // REQUIRED MATCHER CODE
2285
2286 //=============================================================================
2287
2288 bool Matcher::match_rule_supported(int opcode) {
2289 if (!has_match_rule(opcode))
2290 return false;
2393 }
2394 switch(len) {
2395 // For 16-bit/32-bit mask vector, reuse VecD.
2396 case 2:
2397 case 4:
2398 case 8: return Op_VecD;
2399 case 16: return Op_VecX;
2400 }
2401 ShouldNotReachHere();
2402 return 0;
2403 }
2404
2405 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
2406 assert(Matcher::is_generic_vector(generic_opnd), "not generic");
2407 switch (ideal_reg) {
2408 case Op_VecA: return new vecAOper();
2409 case Op_VecD: return new vecDOper();
2410 case Op_VecX: return new vecXOper();
2411 }
2412 ShouldNotReachHere();
2413 return NULL;
2414 }
2415
2416 bool Matcher::is_reg2reg_move(MachNode* m) {
2417 return false;
2418 }
2419
2420 bool Matcher::is_generic_vector(MachOper* opnd) {
2421 return opnd->opcode() == VREG;
2422 }
2423
2424 // Return whether or not this register is ever used as an argument.
2425 // This function is used on startup to build the trampoline stubs in
2426 // generateOptoStub. Registers not mentioned will be killed by the VM
2427 // call in the trampoline, and arguments in those registers not be
2428 // available to the callee.
2429 bool Matcher::can_be_java_arg(int reg)
2430 {
2431 return
2432 reg == R0_num || reg == R0_H_num ||
2433 reg == R1_num || reg == R1_H_num ||
2566 result = Assembler::VS; break;
2567 case BoolTest::no_overflow:
2568 result = Assembler::VC; break;
2569 default:
2570 ShouldNotReachHere();
2571 return Assembler::Condition(-1);
2572 }
2573
2574 // Check conversion
2575 if (cond & BoolTest::unsigned_compare) {
2576 assert(cmpOpUOper((BoolTest::mask)((int)cond & ~(BoolTest::unsigned_compare))).ccode() == result, "Invalid conversion");
2577 } else {
2578 assert(cmpOpOper(cond).ccode() == result, "Invalid conversion");
2579 }
2580
2581 return result;
2582 }
2583
2584 // Binary src (Replicate con)
2585 bool is_valid_sve_arith_imm_pattern(Node* n, Node* m) {
2586 if (n == NULL || m == NULL) {
2587 return false;
2588 }
2589
2590 if (UseSVE == 0 || m->Opcode() != Op_Replicate) {
2591 return false;
2592 }
2593
2594 Node* imm_node = m->in(1);
2595 if (!imm_node->is_Con()) {
2596 return false;
2597 }
2598
2599 const Type* t = imm_node->bottom_type();
2600 if (!(t->isa_int() || t->isa_long())) {
2601 return false;
2602 }
2603
2604 switch (n->Opcode()) {
2605 case Op_AndV:
2606 case Op_OrV:
2607 case Op_XorV: {
2608 Assembler::SIMD_RegVariant T = Assembler::elemType_to_regVariant(Matcher::vector_element_basic_type(n));
2609 uint64_t value = t->isa_long() ? (uint64_t)imm_node->get_long() : (uint64_t)imm_node->get_int();
2610 return Assembler::operand_valid_for_sve_logical_immediate(Assembler::regVariant_to_elemBits(T), value);
2611 }
2612 case Op_AddVB:
2613 return (imm_node->get_int() <= 255 && imm_node->get_int() >= -255);
2614 case Op_AddVS:
2615 case Op_AddVI:
2616 return Assembler::operand_valid_for_sve_add_sub_immediate((int64_t)imm_node->get_int());
2617 case Op_AddVL:
2618 return Assembler::operand_valid_for_sve_add_sub_immediate(imm_node->get_long());
2619 default:
2620 return false;
2621 }
2622 }
2623
2624 // (XorV src (Replicate m1))
2625 // (XorVMask src (MaskAll m1))
2626 bool is_vector_bitwise_not_pattern(Node* n, Node* m) {
2627 if (n != NULL && m != NULL) {
2628 return (n->Opcode() == Op_XorV || n->Opcode() == Op_XorVMask) &&
2629 VectorNode::is_all_ones_vector(m);
2630 }
2631 return false;
2632 }
2633
2634 // Should the matcher clone input 'm' of node 'n'?
2635 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
2636 if (is_vshift_con_pattern(n, m) ||
2637 is_vector_bitwise_not_pattern(n, m) ||
2638 is_valid_sve_arith_imm_pattern(n, m)) {
2639 mstack.push(m, Visit);
2640 return true;
2641 }
2642 return false;
2643 }
2644
2645 // Should the Matcher clone shifts on addressing modes, expecting them
2646 // to be subsumed into complex addressing expressions or compute them
2647 // into registers?
3413 } else {
3414 __ movw(dst_reg, con);
3415 }
3416 %}
3417
3418 enc_class aarch64_enc_mov_imm(iRegL dst, immL src) %{
3419 C2_MacroAssembler _masm(&cbuf);
3420 Register dst_reg = as_Register($dst$$reg);
3421 uint64_t con = (uint64_t)$src$$constant;
3422 if (con == 0) {
3423 __ mov(dst_reg, zr);
3424 } else {
3425 __ mov(dst_reg, con);
3426 }
3427 %}
3428
3429 enc_class aarch64_enc_mov_p(iRegP dst, immP src) %{
3430 C2_MacroAssembler _masm(&cbuf);
3431 Register dst_reg = as_Register($dst$$reg);
3432 address con = (address)$src$$constant;
3433 if (con == NULL || con == (address)1) {
3434 ShouldNotReachHere();
3435 } else {
3436 relocInfo::relocType rtype = $src->constant_reloc();
3437 if (rtype == relocInfo::oop_type) {
3438 __ movoop(dst_reg, (jobject)con);
3439 } else if (rtype == relocInfo::metadata_type) {
3440 __ mov_metadata(dst_reg, (Metadata*)con);
3441 } else {
3442 assert(rtype == relocInfo::none, "unexpected reloc type");
3443 if (! __ is_valid_AArch64_address(con) ||
3444 con < (address)(uintptr_t)os::vm_page_size()) {
3445 __ mov(dst_reg, con);
3446 } else {
3447 uint64_t offset;
3448 __ adrp(dst_reg, con, offset);
3449 __ add(dst_reg, dst_reg, offset);
3450 }
3451 }
3452 }
3453 %}
3456 C2_MacroAssembler _masm(&cbuf);
3457 Register dst_reg = as_Register($dst$$reg);
3458 __ mov(dst_reg, zr);
3459 %}
3460
3461 enc_class aarch64_enc_mov_p1(iRegP dst, immP_1 src) %{
3462 C2_MacroAssembler _masm(&cbuf);
3463 Register dst_reg = as_Register($dst$$reg);
3464 __ mov(dst_reg, (uint64_t)1);
3465 %}
3466
3467 enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
3468 C2_MacroAssembler _masm(&cbuf);
3469 __ load_byte_map_base($dst$$Register);
3470 %}
3471
3472 enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
3473 C2_MacroAssembler _masm(&cbuf);
3474 Register dst_reg = as_Register($dst$$reg);
3475 address con = (address)$src$$constant;
3476 if (con == NULL) {
3477 ShouldNotReachHere();
3478 } else {
3479 relocInfo::relocType rtype = $src->constant_reloc();
3480 assert(rtype == relocInfo::oop_type, "unexpected reloc type");
3481 __ set_narrow_oop(dst_reg, (jobject)con);
3482 }
3483 %}
3484
3485 enc_class aarch64_enc_mov_n0(iRegN dst, immN0 src) %{
3486 C2_MacroAssembler _masm(&cbuf);
3487 Register dst_reg = as_Register($dst$$reg);
3488 __ mov(dst_reg, zr);
3489 %}
3490
3491 enc_class aarch64_enc_mov_nk(iRegN dst, immNKlass src) %{
3492 C2_MacroAssembler _masm(&cbuf);
3493 Register dst_reg = as_Register($dst$$reg);
3494 address con = (address)$src$$constant;
3495 if (con == NULL) {
3496 ShouldNotReachHere();
3497 } else {
3498 relocInfo::relocType rtype = $src->constant_reloc();
3499 assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
3500 __ set_narrow_klass(dst_reg, (Klass *)con);
3501 }
3502 %}
3503
3504 // arithmetic encodings
3505
3506 enc_class aarch64_enc_addsubw_imm(iRegI dst, iRegI src1, immIAddSub src2) %{
3507 C2_MacroAssembler _masm(&cbuf);
3508 Register dst_reg = as_Register($dst$$reg);
3509 Register src_reg = as_Register($src1$$reg);
3510 int32_t con = (int32_t)$src2$$constant;
3511 // add has primary == 0, subtract has primary == 1
3512 if ($primary) { con = -con; }
3513 if (con < 0) {
3514 __ subw(dst_reg, src_reg, -con);
3515 } else {
3658 Label *L = $lbl$$label;
3659 __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
3660 %}
3661
3662 enc_class aarch64_enc_br_conU(cmpOpU cmp, label lbl) %{
3663 C2_MacroAssembler _masm(&cbuf);
3664 Label *L = $lbl$$label;
3665 __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
3666 %}
3667
3668 enc_class aarch64_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result)
3669 %{
3670 Register sub_reg = as_Register($sub$$reg);
3671 Register super_reg = as_Register($super$$reg);
3672 Register temp_reg = as_Register($temp$$reg);
3673 Register result_reg = as_Register($result$$reg);
3674
3675 Label miss;
3676 C2_MacroAssembler _masm(&cbuf);
3677 __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
3678 NULL, &miss,
3679 /*set_cond_codes:*/ true);
3680 if ($primary) {
3681 __ mov(result_reg, zr);
3682 }
3683 __ bind(miss);
3684 %}
3685
3686 enc_class aarch64_enc_java_static_call(method meth) %{
3687 C2_MacroAssembler _masm(&cbuf);
3688
3689 address addr = (address)$meth$$method;
3690 address call;
3691 if (!_method) {
3692 // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
3693 call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type));
3694 if (call == NULL) {
3695 ciEnv::current()->record_failure("CodeCache is full");
3696 return;
3697 }
3698 } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
3699 // The NOP here is purely to ensure that eliding a call to
3700 // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
3701 __ nop();
3702 __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
3703 } else {
3704 int method_index = resolved_method_index(cbuf);
3705 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
3706 : static_call_Relocation::spec(method_index);
3707 call = __ trampoline_call(Address(addr, rspec));
3708 if (call == NULL) {
3709 ciEnv::current()->record_failure("CodeCache is full");
3710 return;
3711 }
3712 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
3713 // Calls of the same statically bound method can share
3714 // a stub to the interpreter.
3715 cbuf.shared_stub_to_interp_for(_method, call - cbuf.insts_begin());
3716 } else {
3717 // Emit stub for static call
3718 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, call);
3719 if (stub == NULL) {
3720 ciEnv::current()->record_failure("CodeCache is full");
3721 return;
3722 }
3723 }
3724 }
3725
3726 __ post_call_nop();
3727
3728 // Only non uncommon_trap calls need to reinitialize ptrue.
3729 if (Compile::current()->max_vector_size() > 0 && uncommon_trap_request() == 0) {
3730 __ reinitialize_ptrue();
3731 }
3732 %}
3733
3734 enc_class aarch64_enc_java_dynamic_call(method meth) %{
3735 C2_MacroAssembler _masm(&cbuf);
3736 int method_index = resolved_method_index(cbuf);
3737 address call = __ ic_call((address)$meth$$method, method_index);
3738 if (call == NULL) {
3739 ciEnv::current()->record_failure("CodeCache is full");
3740 return;
3741 }
3742 __ post_call_nop();
3743 if (Compile::current()->max_vector_size() > 0) {
3744 __ reinitialize_ptrue();
3745 }
3746 %}
3747
3748 enc_class aarch64_enc_call_epilog() %{
3749 C2_MacroAssembler _masm(&cbuf);
3750 if (VerifyStackAtCalls) {
3751 // Check that stack depth is unchanged: find majik cookie on stack
3752 __ call_Unimplemented();
3753 }
3754 %}
3755
3756 enc_class aarch64_enc_java_to_runtime(method meth) %{
3757 C2_MacroAssembler _masm(&cbuf);
3758
3759 // some calls to generated routines (arraycopy code) are scheduled
3760 // by C2 as runtime calls. if so we can call them using a br (they
3761 // will be in a reachable segment) otherwise we have to use a blr
3762 // which loads the absolute address into a register.
3763 address entry = (address)$meth$$method;
3764 CodeBlob *cb = CodeCache::find_blob(entry);
3765 if (cb) {
3766 address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
3767 if (call == NULL) {
3768 ciEnv::current()->record_failure("CodeCache is full");
3769 return;
3770 }
3771 __ post_call_nop();
3772 } else {
3773 Label retaddr;
3774 __ adr(rscratch2, retaddr);
3775 __ lea(rscratch1, RuntimeAddress(entry));
3776 // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc()
3777 __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
3778 __ blr(rscratch1);
3779 __ bind(retaddr);
3780 __ post_call_nop();
3781 __ add(sp, sp, 2 * wordSize);
3782 }
3783 if (Compile::current()->max_vector_size() > 0) {
3784 __ reinitialize_ptrue();
3785 }
3786 %}
3787
4646 operand immL_32bits()
4647 %{
4648 predicate(n->get_long() == 0xFFFFFFFFL);
4649 match(ConL);
4650 op_cost(0);
4651 format %{ %}
4652 interface(CONST_INTER);
4653 %}
4654
4655 // Pointer operands
4656 // Pointer Immediate
4657 operand immP()
4658 %{
4659 match(ConP);
4660
4661 op_cost(0);
4662 format %{ %}
4663 interface(CONST_INTER);
4664 %}
4665
4666 // NULL Pointer Immediate
4667 operand immP0()
4668 %{
4669 predicate(n->get_ptr() == 0);
4670 match(ConP);
4671
4672 op_cost(0);
4673 format %{ %}
4674 interface(CONST_INTER);
4675 %}
4676
4677 // Pointer Immediate One
4678 // this is used in object initialization (initial object header)
4679 operand immP_1()
4680 %{
4681 predicate(n->get_ptr() == 1);
4682 match(ConP);
4683
4684 op_cost(0);
4685 format %{ %}
4686 interface(CONST_INTER);
4778 operand immFPacked()
4779 %{
4780 predicate(Assembler::operand_valid_for_float_immediate((double)n->getf()));
4781 match(ConF);
4782 op_cost(0);
4783 format %{ %}
4784 interface(CONST_INTER);
4785 %}
4786
4787 // Narrow pointer operands
4788 // Narrow Pointer Immediate
4789 operand immN()
4790 %{
4791 match(ConN);
4792
4793 op_cost(0);
4794 format %{ %}
4795 interface(CONST_INTER);
4796 %}
4797
4798 // Narrow NULL Pointer Immediate
4799 operand immN0()
4800 %{
4801 predicate(n->get_narrowcon() == 0);
4802 match(ConN);
4803
4804 op_cost(0);
4805 format %{ %}
4806 interface(CONST_INTER);
4807 %}
4808
4809 operand immNKlass()
4810 %{
4811 match(ConNKlass);
4812
4813 op_cost(0);
4814 format %{ %}
4815 interface(CONST_INTER);
4816 %}
4817
4818 // Integer 32 bit Register Operands
7187 instruct loadConL(iRegLNoSp dst, immL src)
7188 %{
7189 match(Set dst src);
7190
7191 ins_cost(INSN_COST);
7192 format %{ "mov $dst, $src\t# long" %}
7193
7194 ins_encode( aarch64_enc_mov_imm(dst, src) );
7195
7196 ins_pipe(ialu_imm);
7197 %}
7198
7199 // Load Pointer Constant
7200
7201 instruct loadConP(iRegPNoSp dst, immP con)
7202 %{
7203 match(Set dst con);
7204
7205 ins_cost(INSN_COST * 4);
7206 format %{
7207 "mov $dst, $con\t# ptr\n\t"
7208 %}
7209
7210 ins_encode(aarch64_enc_mov_p(dst, con));
7211
7212 ins_pipe(ialu_imm);
7213 %}
7214
7215 // Load Null Pointer Constant
7216
7217 instruct loadConP0(iRegPNoSp dst, immP0 con)
7218 %{
7219 match(Set dst con);
7220
7221 ins_cost(INSN_COST);
7222 format %{ "mov $dst, $con\t# NULL ptr" %}
7223
7224 ins_encode(aarch64_enc_mov_p0(dst, con));
7225
7226 ins_pipe(ialu_imm);
7227 %}
7228
7229 // Load Pointer Constant One
7230
7231 instruct loadConP1(iRegPNoSp dst, immP_1 con)
7232 %{
7233 match(Set dst con);
7234
7235 ins_cost(INSN_COST);
7236 format %{ "mov $dst, $con\t# NULL ptr" %}
7237
7238 ins_encode(aarch64_enc_mov_p1(dst, con));
7239
7240 ins_pipe(ialu_imm);
7241 %}
7242
7243 // Load Byte Map Base Constant
7244
7245 instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
7246 %{
7247 match(Set dst con);
7248
7249 ins_cost(INSN_COST);
7250 format %{ "adr $dst, $con\t# Byte Map Base" %}
7251
7252 ins_encode(aarch64_enc_mov_byte_map_base(dst, con));
7253
7254 ins_pipe(ialu_imm);
7255 %}
7256
7258
7259 instruct loadConN(iRegNNoSp dst, immN con)
7260 %{
7261 match(Set dst con);
7262
7263 ins_cost(INSN_COST * 4);
7264 format %{ "mov $dst, $con\t# compressed ptr" %}
7265
7266 ins_encode(aarch64_enc_mov_n(dst, con));
7267
7268 ins_pipe(ialu_imm);
7269 %}
7270
7271 // Load Narrow Null Pointer Constant
7272
7273 instruct loadConN0(iRegNNoSp dst, immN0 con)
7274 %{
7275 match(Set dst con);
7276
7277 ins_cost(INSN_COST);
7278 format %{ "mov $dst, $con\t# compressed NULL ptr" %}
7279
7280 ins_encode(aarch64_enc_mov_n0(dst, con));
7281
7282 ins_pipe(ialu_imm);
7283 %}
7284
7285 // Load Narrow Klass Constant
7286
7287 instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
7288 %{
7289 match(Set dst con);
7290
7291 ins_cost(INSN_COST);
7292 format %{ "mov $dst, $con\t# compressed klass ptr" %}
7293
7294 ins_encode(aarch64_enc_mov_nk(dst, con));
7295
7296 ins_pipe(ialu_imm);
7297 %}
7298
8390 %}
8391
8392 // ============================================================================
8393 // Cast/Convert Instructions
8394
8395 instruct castX2P(iRegPNoSp dst, iRegL src) %{
8396 match(Set dst (CastX2P src));
8397
8398 ins_cost(INSN_COST);
8399 format %{ "mov $dst, $src\t# long -> ptr" %}
8400
8401 ins_encode %{
8402 if ($dst$$reg != $src$$reg) {
8403 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8404 }
8405 %}
8406
8407 ins_pipe(ialu_reg);
8408 %}
8409
8410 instruct castP2X(iRegLNoSp dst, iRegP src) %{
8411 match(Set dst (CastP2X src));
8412
8413 ins_cost(INSN_COST);
8414 format %{ "mov $dst, $src\t# ptr -> long" %}
8415
8416 ins_encode %{
8417 if ($dst$$reg != $src$$reg) {
8418 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8419 }
8420 %}
8421
8422 ins_pipe(ialu_reg);
8423 %}
8424
8425 // Convert oop into int for vectors alignment masking
8426 instruct convP2I(iRegINoSp dst, iRegP src) %{
8427 match(Set dst (ConvL2I (CastP2X src)));
8428
8429 ins_cost(INSN_COST);
15211
15212 match(Set dst (MoveL2D src));
15213
15214 effect(DEF dst, USE src);
15215
15216 ins_cost(INSN_COST);
15217
15218 format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
15219
15220 ins_encode %{
15221 __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
15222 %}
15223
15224 ins_pipe(fp_l2d);
15225
15226 %}
15227
15228 // ============================================================================
15229 // clearing of an array
15230
15231 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
15232 %{
15233 match(Set dummy (ClearArray cnt base));
15234 effect(USE_KILL cnt, USE_KILL base, KILL cr);
15235
15236 ins_cost(4 * INSN_COST);
15237 format %{ "ClearArray $cnt, $base" %}
15238
15239 ins_encode %{
15240 address tpc = __ zero_words($base$$Register, $cnt$$Register);
15241 if (tpc == NULL) {
15242 ciEnv::current()->record_failure("CodeCache is full");
15243 return;
15244 }
15245 %}
15246
15247 ins_pipe(pipe_class_memory);
15248 %}
15249
15250 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 temp, Universe dummy, rFlagsReg cr)
15251 %{
15252 predicate((uint64_t)n->in(2)->get_long()
15253 < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
15254 match(Set dummy (ClearArray cnt base));
15255 effect(TEMP temp, USE_KILL base, KILL cr);
15256
15257 ins_cost(4 * INSN_COST);
15258 format %{ "ClearArray $cnt, $base" %}
15259
15260 ins_encode %{
15261 address tpc = __ zero_words($base$$Register, (uint64_t)$cnt$$constant);
15262 if (tpc == NULL) {
15263 ciEnv::current()->record_failure("CodeCache is full");
15264 return;
15265 }
15266 %}
15267
15268 ins_pipe(pipe_class_memory);
15269 %}
15270
15271 // ============================================================================
15272 // Overflow Math Instructions
15273
15274 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
15275 %{
15276 match(Set cr (OverflowAddI op1 op2));
15277
15278 format %{ "cmnw $op1, $op2\t# overflow check int" %}
15279 ins_cost(INSN_COST);
15280 ins_encode %{
15281 __ cmnw($op1$$Register, $op2$$Register);
15282 %}
16533
16534 // Call Runtime Instruction
16535
16536 instruct CallLeafDirect(method meth)
16537 %{
16538 match(CallLeaf);
16539
16540 effect(USE meth);
16541
16542 ins_cost(CALL_COST);
16543
16544 format %{ "CALL, runtime leaf $meth" %}
16545
16546 ins_encode( aarch64_enc_java_to_runtime(meth) );
16547
16548 ins_pipe(pipe_class_call);
16549 %}
16550
16551 // Call Runtime Instruction
16552
16553 instruct CallLeafNoFPDirect(method meth)
16554 %{
16555 match(CallLeafNoFP);
16556
16557 effect(USE meth);
16558
16559 ins_cost(CALL_COST);
16560
16561 format %{ "CALL, runtime leaf nofp $meth" %}
16562
16563 ins_encode( aarch64_enc_java_to_runtime(meth) );
16564
16565 ins_pipe(pipe_class_call);
16566 %}
16567
16568 // Tail Call; Jump from runtime stub to Java code.
16569 // Also known as an 'interprocedural jump'.
16570 // Target of jump will eventually return to caller.
16571 // TailJump below removes the return address.
16572 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_ptr)
16573 %{
16574 match(TailCall jump_target method_ptr);
17107 ins_pipe(pipe_class_memory);
17108 %}
17109
17110 instruct array_equalsB(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
17111 iRegP_R3 tmp1, iRegP_R4 tmp2, iRegP_R5 tmp3,
17112 vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2, vRegD_V3 vtmp3,
17113 vRegD_V4 vtmp4, vRegD_V5 vtmp5, vRegD_V6 vtmp6, vRegD_V7 vtmp7,
17114 iRegP_R10 tmp, rFlagsReg cr)
17115 %{
17116 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
17117 match(Set result (AryEq ary1 ary2));
17118 effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3,
17119 TEMP vtmp0, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4, TEMP vtmp5,
17120 TEMP vtmp6, TEMP vtmp7, KILL cr);
17121
17122 format %{ "Array Equals $ary1,ary2 -> $result # KILL $ary1 $ary2 $tmp $tmp1 $tmp2 $tmp3 V0-V7 cr" %}
17123 ins_encode %{
17124 address tpc = __ arrays_equals($ary1$$Register, $ary2$$Register,
17125 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
17126 $result$$Register, $tmp$$Register, 1);
17127 if (tpc == NULL) {
17128 ciEnv::current()->record_failure("CodeCache is full");
17129 return;
17130 }
17131 %}
17132 ins_pipe(pipe_class_memory);
17133 %}
17134
17135 instruct array_equalsC(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
17136 iRegP_R3 tmp1, iRegP_R4 tmp2, iRegP_R5 tmp3,
17137 vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2, vRegD_V3 vtmp3,
17138 vRegD_V4 vtmp4, vRegD_V5 vtmp5, vRegD_V6 vtmp6, vRegD_V7 vtmp7,
17139 iRegP_R10 tmp, rFlagsReg cr)
17140 %{
17141 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
17142 match(Set result (AryEq ary1 ary2));
17143 effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3,
17144 TEMP vtmp0, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4, TEMP vtmp5,
17145 TEMP vtmp6, TEMP vtmp7, KILL cr);
17146
17147 format %{ "Array Equals $ary1,ary2 -> $result # KILL $ary1 $ary2 $tmp $tmp1 $tmp2 $tmp3 V0-V7 cr" %}
17148 ins_encode %{
17149 address tpc = __ arrays_equals($ary1$$Register, $ary2$$Register,
17150 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
17151 $result$$Register, $tmp$$Register, 2);
17152 if (tpc == NULL) {
17153 ciEnv::current()->record_failure("CodeCache is full");
17154 return;
17155 }
17156 %}
17157 ins_pipe(pipe_class_memory);
17158 %}
17159
17160 instruct count_positives(iRegP_R1 ary1, iRegI_R2 len, iRegI_R0 result, rFlagsReg cr)
17161 %{
17162 match(Set result (CountPositives ary1 len));
17163 effect(USE_KILL ary1, USE_KILL len, KILL cr);
17164 format %{ "count positives byte[] $ary1,$len -> $result" %}
17165 ins_encode %{
17166 address tpc = __ count_positives($ary1$$Register, $len$$Register, $result$$Register);
17167 if (tpc == NULL) {
17168 ciEnv::current()->record_failure("CodeCache is full");
17169 return;
17170 }
17171 %}
17172 ins_pipe( pipe_slow );
17173 %}
17174
17175 // fast char[] to byte[] compression
17176 instruct string_compress(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
17177 vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2,
17178 vRegD_V3 vtmp3, vRegD_V4 vtmp4, vRegD_V5 vtmp5,
17179 iRegI_R0 result, rFlagsReg cr)
17180 %{
17181 match(Set result (StrCompressedCopy src (Binary dst len)));
17182 effect(TEMP vtmp0, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4, TEMP vtmp5,
17183 USE_KILL src, USE_KILL dst, USE len, KILL cr);
17184
17185 format %{ "String Compress $src,$dst,$len -> $result # KILL $src $dst V0-V5 cr" %}
17186 ins_encode %{
17187 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
17190 $vtmp4$$FloatRegister, $vtmp5$$FloatRegister);
17191 %}
17192 ins_pipe(pipe_slow);
17193 %}
17194
17195 // fast byte[] to char[] inflation
17196 instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len, iRegP_R3 tmp,
17197 vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2, vRegD_V3 vtmp3,
17198 vRegD_V4 vtmp4, vRegD_V5 vtmp5, vRegD_V6 vtmp6, rFlagsReg cr)
17199 %{
17200 match(Set dummy (StrInflatedCopy src (Binary dst len)));
17201 effect(TEMP vtmp0, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3,
17202 TEMP vtmp4, TEMP vtmp5, TEMP vtmp6, TEMP tmp,
17203 USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
17204
17205 format %{ "String Inflate $src,$dst # KILL $tmp $src $dst $len V0-V6 cr" %}
17206 ins_encode %{
17207 address tpc = __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
17208 $vtmp0$$FloatRegister, $vtmp1$$FloatRegister,
17209 $vtmp2$$FloatRegister, $tmp$$Register);
17210 if (tpc == NULL) {
17211 ciEnv::current()->record_failure("CodeCache is full");
17212 return;
17213 }
17214 %}
17215 ins_pipe(pipe_class_memory);
17216 %}
17217
17218 // encode char[] to byte[] in ISO_8859_1
17219 instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
17220 vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2,
17221 vRegD_V3 vtmp3, vRegD_V4 vtmp4, vRegD_V5 vtmp5,
17222 iRegI_R0 result, rFlagsReg cr)
17223 %{
17224 predicate(!((EncodeISOArrayNode*)n)->is_ascii());
17225 match(Set result (EncodeISOArray src (Binary dst len)));
17226 effect(USE_KILL src, USE_KILL dst, USE len, KILL vtmp0, KILL vtmp1,
17227 KILL vtmp2, KILL vtmp3, KILL vtmp4, KILL vtmp5, KILL cr);
17228
17229 format %{ "Encode ISO array $src,$dst,$len -> $result # KILL $src $dst V0-V5 cr" %}
17230 ins_encode %{
|
1220 // registers conditionally reserved.
1221
1222 _ANY_REG32_mask = _ALL_REG32_mask;
1223 _ANY_REG32_mask.Remove(OptoReg::as_OptoReg(r31_sp->as_VMReg()));
1224
1225 _ANY_REG_mask = _ALL_REG_mask;
1226
1227 _PTR_REG_mask = _ALL_REG_mask;
1228
1229 _NO_SPECIAL_REG32_mask = _ALL_REG32_mask;
1230 _NO_SPECIAL_REG32_mask.SUBTRACT(_NON_ALLOCATABLE_REG32_mask);
1231
1232 _NO_SPECIAL_REG_mask = _ALL_REG_mask;
1233 _NO_SPECIAL_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask);
1234
1235 _NO_SPECIAL_PTR_REG_mask = _ALL_REG_mask;
1236 _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask);
1237
1238 // r27 is not allocatable when compressed oops is on and heapbase is not
1239 // zero, compressed klass pointers doesn't use r27 after JDK-8234794
1240 if (UseCompressedOops && (CompressedOops::ptrs_base() != nullptr)) {
1241 _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(r27->as_VMReg()));
1242 _NO_SPECIAL_REG_mask.Remove(OptoReg::as_OptoReg(r27->as_VMReg()));
1243 _NO_SPECIAL_PTR_REG_mask.Remove(OptoReg::as_OptoReg(r27->as_VMReg()));
1244 }
1245
1246 // r29 is not allocatable when PreserveFramePointer is on
1247 if (PreserveFramePointer) {
1248 _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(r29->as_VMReg()));
1249 _NO_SPECIAL_REG_mask.Remove(OptoReg::as_OptoReg(r29->as_VMReg()));
1250 _NO_SPECIAL_PTR_REG_mask.Remove(OptoReg::as_OptoReg(r29->as_VMReg()));
1251 }
1252 }
1253
1254 // Optimizaton of volatile gets and puts
1255 // -------------------------------------
1256 //
1257 // AArch64 has ldar<x> and stlr<x> instructions which we can safely
1258 // use to implement volatile reads and writes. For a volatile read
1259 // we simply need
1260 //
1564 bool release = mbvol->trailing_store();
1565 assert(!release || (mbvol->in(MemBarNode::Precedent)->is_Store() && mbvol->in(MemBarNode::Precedent)->as_Store()->is_release()), "");
1566 #ifdef ASSERT
1567 if (release) {
1568 Node* leading = mbvol->leading_membar();
1569 assert(leading->Opcode() == Op_MemBarRelease, "");
1570 assert(leading->as_MemBar()->leading_store(), "");
1571 assert(leading->as_MemBar()->trailing_membar() == mbvol, "");
1572 }
1573 #endif
1574
1575 return release;
1576 }
1577
1578 // predicates controlling emit of str<x>/stlr<x>
1579
1580 bool needs_releasing_store(const Node *n)
1581 {
1582 // assert n->is_Store();
1583 StoreNode *st = n->as_Store();
1584 return st->trailing_membar() != nullptr;
1585 }
1586
1587 // predicate controlling translation of CAS
1588 //
1589 // returns true if CAS needs to use an acquiring load otherwise false
1590
1591 bool needs_acquiring_load_exclusive(const Node *n)
1592 {
1593 assert(is_CAS(n->Opcode(), true), "expecting a compare and swap");
1594 LoadStoreNode* ldst = n->as_LoadStore();
1595 if (is_CAS(n->Opcode(), false)) {
1596 assert(ldst->trailing_membar() != nullptr, "expected trailing membar");
1597 } else {
1598 return ldst->trailing_membar() != nullptr;
1599 }
1600
1601 // so we can just return true here
1602 return true;
1603 }
1604
1605 #define __ _masm.
1606
1607 // advance declarations for helper functions to convert register
1608 // indices to register objects
1609
1610 // the ad file has to provide implementations of certain methods
1611 // expected by the generic code
1612 //
1613 // REQUIRED FUNCTIONALITY
1614
1615 //=============================================================================
1616
1617 // !!!!! Special hack to get all types of calls to specify the byte offset
1618 // from the start of the call to the point where the return address
1627
1628 int MachCallDynamicJavaNode::ret_addr_offset()
1629 {
1630 return 16; // movz, movk, movk, bl
1631 }
1632
1633 int MachCallRuntimeNode::ret_addr_offset() {
1634 // for generated stubs the call will be
1635 // bl(addr)
1636 // or with far branches
1637 // bl(trampoline_stub)
1638 // for real runtime callouts it will be six instructions
1639 // see aarch64_enc_java_to_runtime
1640 // adr(rscratch2, retaddr)
1641 // lea(rscratch1, RuntimeAddress(addr)
1642 // stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
1643 // blr(rscratch1)
1644 CodeBlob *cb = CodeCache::find_blob(_entry_point);
1645 if (cb) {
1646 return 1 * NativeInstruction::instruction_size;
1647 } else if (_entry_point == nullptr) {
1648 // See CallLeafNoFPIndirect
1649 return 1 * NativeInstruction::instruction_size;
1650 } else {
1651 return 6 * NativeInstruction::instruction_size;
1652 }
1653 }
1654
1655 //=============================================================================
1656
1657 #ifndef PRODUCT
1658 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1659 st->print("BREAKPOINT");
1660 }
1661 #endif
1662
1663 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1664 C2_MacroAssembler _masm(&cbuf);
1665 __ brk(0);
1666 }
1667
1668 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
1669 return MachNode::size(ra_);
1720
1721 int framesize = C->output()->frame_slots() << LogBytesPerInt;
1722
1723 if (C->output()->need_stack_bang(framesize))
1724 st->print("# stack bang size=%d\n\t", framesize);
1725
1726 if (VM_Version::use_rop_protection()) {
1727 st->print("ldr zr, [lr]\n\t");
1728 st->print("paciaz\n\t");
1729 }
1730 if (framesize < ((1 << 9) + 2 * wordSize)) {
1731 st->print("sub sp, sp, #%d\n\t", framesize);
1732 st->print("stp rfp, lr, [sp, #%d]", framesize - 2 * wordSize);
1733 if (PreserveFramePointer) st->print("\n\tadd rfp, sp, #%d", framesize - 2 * wordSize);
1734 } else {
1735 st->print("stp lr, rfp, [sp, #%d]!\n\t", -(2 * wordSize));
1736 if (PreserveFramePointer) st->print("mov rfp, sp\n\t");
1737 st->print("mov rscratch1, #%d\n\t", framesize - 2 * wordSize);
1738 st->print("sub sp, sp, rscratch1");
1739 }
1740 if (C->stub_function() == nullptr && BarrierSet::barrier_set()->barrier_set_nmethod() != nullptr) {
1741 st->print("\n\t");
1742 st->print("ldr rscratch1, [guard]\n\t");
1743 st->print("dmb ishld\n\t");
1744 st->print("ldr rscratch2, [rthread, #thread_disarmed_guard_value_offset]\n\t");
1745 st->print("cmp rscratch1, rscratch2\n\t");
1746 st->print("b.eq skip");
1747 st->print("\n\t");
1748 st->print("blr #nmethod_entry_barrier_stub\n\t");
1749 st->print("b skip\n\t");
1750 st->print("guard: int\n\t");
1751 st->print("\n\t");
1752 st->print("skip:\n\t");
1753 }
1754 }
1755 #endif
1756
1757 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1758 Compile* C = ra_->C;
1759 C2_MacroAssembler _masm(&cbuf);
1760
1761 // insert a nop at the start of the prolog so we can patch in a
1762 // branch if we need to invalidate the method later
1763 __ nop();
1764
1765 __ verified_entry(C, 0);
1766
1767 if (C->stub_function() == nullptr) {
1768 __ entry_barrier();
1769 }
1770
1771 if (!Compile::current()->output()->in_scratch_emit_size()) {
1772 __ bind(*_verified_entry);
1773 }
1774
1775 if (VerifyStackAtCalls) {
1776 Unimplemented();
1777 }
1778
1779 C->output()->set_frame_complete(cbuf.insts_size());
1780
1781 if (C->has_mach_constant_base_node()) {
1782 // NOTE: We set the table base offset here because users might be
1783 // emitted before MachConstantBaseNode.
1784 ConstantTable& constant_table = C->output()->constant_table();
1785 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1786 }
1787 }
1788
1789 int MachPrologNode::reloc() const
1790 {
1791 return 0;
1792 }
1793
1794 //=============================================================================
1795
1796 #ifndef PRODUCT
1797 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1798 Compile* C = ra_->C;
1799 int framesize = C->output()->frame_slots() << LogBytesPerInt;
1800
1801 st->print("# pop frame %d\n\t",framesize);
1802
1803 if (framesize == 0) {
1804 st->print("ldp lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1805 } else if (framesize < ((1 << 9) + 2 * wordSize)) {
1806 st->print("ldp lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
1807 st->print("add sp, sp, #%d\n\t", framesize);
1808 } else {
1812 }
1813 if (VM_Version::use_rop_protection()) {
1814 st->print("autiaz\n\t");
1815 st->print("ldr zr, [lr]\n\t");
1816 }
1817
1818 if (do_polling() && C->is_method_compilation()) {
1819 st->print("# test polling word\n\t");
1820 st->print("ldr rscratch1, [rthread],#%d\n\t", in_bytes(JavaThread::polling_word_offset()));
1821 st->print("cmp sp, rscratch1\n\t");
1822 st->print("bhi #slow_path");
1823 }
1824 }
1825 #endif
1826
1827 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1828 Compile* C = ra_->C;
1829 C2_MacroAssembler _masm(&cbuf);
1830 int framesize = C->output()->frame_slots() << LogBytesPerInt;
1831
1832 __ remove_frame(framesize, C->needs_stack_repair());
1833
1834 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1835 __ reserved_stack_check();
1836 }
1837
1838 if (do_polling() && C->is_method_compilation()) {
1839 Label dummy_label;
1840 Label* code_stub = &dummy_label;
1841 if (!C->output()->in_scratch_emit_size()) {
1842 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1843 C->output()->add_stub(stub);
1844 code_stub = &stub->entry();
1845 }
1846 __ relocate(relocInfo::poll_return_type);
1847 __ safepoint_poll(*code_stub, true /* at_return */, false /* acquire */, true /* in_nmethod */);
1848 }
1849 }
1850
1851 int MachEpilogNode::reloc() const {
1852 // Return number of relocatable values contained in this instruction.
1853 return 1; // 1 for polling page.
1854 }
1855
1856 const Pipeline * MachEpilogNode::pipeline() const {
1857 return MachNode::pipeline_class();
1858 }
1859
1860 //=============================================================================
1861
1862 // Figure out which register class each belongs in: rc_int, rc_float or
1863 // rc_stack.
1864 enum RC { rc_bad, rc_int, rc_float, rc_predicate, rc_stack };
1865
1866 static enum RC rc_class(OptoReg::Name reg) {
1867
1868 if (reg == OptoReg::Bad) {
1869 return rc_bad;
1870 }
2091 }
2092 st->print("\t# vector spill size = %d", vsize);
2093 } else if (ideal_reg() == Op_RegVectMask) {
2094 assert(Matcher::supports_scalable_vector(), "bad register type for spill");
2095 int vsize = Matcher::scalable_predicate_reg_slots() * 32;
2096 st->print("\t# predicate spill size = %d", vsize);
2097 } else {
2098 st->print("\t# spill size = %d", is64 ? 64 : 32);
2099 }
2100 }
2101
2102 return 0;
2103
2104 }
2105
2106 #ifndef PRODUCT
2107 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2108 if (!ra_)
2109 st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
2110 else
2111 implementation(nullptr, ra_, false, st);
2112 }
2113 #endif
2114
2115 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2116 implementation(&cbuf, ra_, false, nullptr);
2117 }
2118
2119 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
2120 return MachNode::size(ra_);
2121 }
2122
2123 //=============================================================================
2124
2125 #ifndef PRODUCT
2126 void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2127 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2128 int reg = ra_->get_reg_first(this);
2129 st->print("add %s, rsp, #%d]\t# box lock",
2130 Matcher::regName[reg], offset);
2131 }
2132 #endif
2133
2134 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2135 C2_MacroAssembler _masm(&cbuf);
2136
2137 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2138 int reg = ra_->get_encode(this);
2139
2140 // This add will handle any 24-bit signed offset. 24 bits allows an
2141 // 8 megabyte stack frame.
2142 __ add(as_Register(reg), sp, offset);
2143 }
2144
2145 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
2146 // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
2147 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2148
2149 if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
2150 return NativeInstruction::instruction_size;
2151 } else {
2152 return 2 * NativeInstruction::instruction_size;
2153 }
2154 }
2155
2156 ///=============================================================================
2157 #ifndef PRODUCT
2158 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2159 {
2160 st->print_cr("# MachVEPNode");
2161 if (!_verified) {
2162 st->print_cr("\t load_class");
2163 } else {
2164 st->print_cr("\t unpack_inline_arg");
2165 }
2166 }
2167 #endif
2168
2169 void MachVEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
2170 {
2171 C2_MacroAssembler _masm(&cbuf);
2172
2173 if (!_verified) {
2174 Label skip;
2175 __ cmp_klass(j_rarg0, rscratch2, rscratch1);
2176 __ br(Assembler::EQ, skip);
2177 __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
2178 __ bind(skip);
2179
2180 } else {
2181 // insert a nop at the start of the prolog so we can patch in a
2182 // branch if we need to invalidate the method later
2183 __ nop();
2184
2185 // TODO 8284443 Avoid creation of temporary frame
2186 if (ra_->C->stub_function() == nullptr) {
2187 __ verified_entry(ra_->C, 0);
2188 __ entry_barrier();
2189 int framesize = ra_->C->output()->frame_slots() << LogBytesPerInt;
2190 __ remove_frame(framesize, false);
2191 }
2192 // Unpack inline type args passed as oop and then jump to
2193 // the verified entry point (skipping the unverified entry).
2194 int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
2195 // Emit code for verified entry and save increment for stack repair on return
2196 __ verified_entry(ra_->C, sp_inc);
2197 if (Compile::current()->output()->in_scratch_emit_size()) {
2198 Label dummy_verified_entry;
2199 __ b(dummy_verified_entry);
2200 } else {
2201 __ b(*_verified_entry);
2202 }
2203 }
2204 }
2205
2206 //=============================================================================
2207 #ifndef PRODUCT
2208 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2209 {
2210 st->print_cr("# MachUEPNode");
2211 if (UseCompressedClassPointers) {
2212 st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2213 if (CompressedKlassPointers::shift() != 0) {
2214 st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
2215 }
2216 } else {
2217 st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2218 }
2219 st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
2220 st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
2221 }
2222 #endif
2223
2224 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
2225 {
2226 // This is the unverified entry point.
2227 C2_MacroAssembler _masm(&cbuf);
2228 Label skip;
2229
2230 // UseCompressedClassPointers logic are inside cmp_klass
2231 __ cmp_klass(j_rarg0, rscratch2, rscratch1);
2232
2233 // TODO
2234 // can we avoid this skip and still use a reloc?
2235 __ br(Assembler::EQ, skip);
2236 __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
2237 __ bind(skip);
2238 }
2239
2240 // REQUIRED EMIT CODE
2241
2242 //=============================================================================
2243
2244 // Emit exception handler code.
2245 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
2246 {
2247 // mov rscratch1 #exception_blob_entry_point
2248 // br rscratch1
2249 // Note that the code buffer's insts_mark is always relative to insts.
2250 // That's why we must use the macroassembler to generate a handler.
2251 C2_MacroAssembler _masm(&cbuf);
2252 address base = __ start_a_stub(size_exception_handler());
2253 if (base == nullptr) {
2254 ciEnv::current()->record_failure("CodeCache is full");
2255 return 0; // CodeBuffer::expand failed
2256 }
2257 int offset = __ offset();
2258 __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
2259 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
2260 __ end_a_stub();
2261 return offset;
2262 }
2263
2264 // Emit deopt handler code.
2265 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
2266 {
2267 // Note that the code buffer's insts_mark is always relative to insts.
2268 // That's why we must use the macroassembler to generate a handler.
2269 C2_MacroAssembler _masm(&cbuf);
2270 address base = __ start_a_stub(size_deopt_handler());
2271 if (base == nullptr) {
2272 ciEnv::current()->record_failure("CodeCache is full");
2273 return 0; // CodeBuffer::expand failed
2274 }
2275 int offset = __ offset();
2276
2277 __ adr(lr, __ pc());
2278 __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
2279
2280 assert(__ offset() - offset == (int) size_deopt_handler(), "overflow");
2281 __ end_a_stub();
2282 return offset;
2283 }
2284
2285 // REQUIRED MATCHER CODE
2286
2287 //=============================================================================
2288
2289 bool Matcher::match_rule_supported(int opcode) {
2290 if (!has_match_rule(opcode))
2291 return false;
2394 }
2395 switch(len) {
2396 // For 16-bit/32-bit mask vector, reuse VecD.
2397 case 2:
2398 case 4:
2399 case 8: return Op_VecD;
2400 case 16: return Op_VecX;
2401 }
2402 ShouldNotReachHere();
2403 return 0;
2404 }
2405
2406 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
2407 assert(Matcher::is_generic_vector(generic_opnd), "not generic");
2408 switch (ideal_reg) {
2409 case Op_VecA: return new vecAOper();
2410 case Op_VecD: return new vecDOper();
2411 case Op_VecX: return new vecXOper();
2412 }
2413 ShouldNotReachHere();
2414 return nullptr;
2415 }
2416
2417 bool Matcher::is_reg2reg_move(MachNode* m) {
2418 return false;
2419 }
2420
2421 bool Matcher::is_generic_vector(MachOper* opnd) {
2422 return opnd->opcode() == VREG;
2423 }
2424
2425 // Return whether or not this register is ever used as an argument.
2426 // This function is used on startup to build the trampoline stubs in
2427 // generateOptoStub. Registers not mentioned will be killed by the VM
2428 // call in the trampoline, and arguments in those registers not be
2429 // available to the callee.
2430 bool Matcher::can_be_java_arg(int reg)
2431 {
2432 return
2433 reg == R0_num || reg == R0_H_num ||
2434 reg == R1_num || reg == R1_H_num ||
2567 result = Assembler::VS; break;
2568 case BoolTest::no_overflow:
2569 result = Assembler::VC; break;
2570 default:
2571 ShouldNotReachHere();
2572 return Assembler::Condition(-1);
2573 }
2574
2575 // Check conversion
2576 if (cond & BoolTest::unsigned_compare) {
2577 assert(cmpOpUOper((BoolTest::mask)((int)cond & ~(BoolTest::unsigned_compare))).ccode() == result, "Invalid conversion");
2578 } else {
2579 assert(cmpOpOper(cond).ccode() == result, "Invalid conversion");
2580 }
2581
2582 return result;
2583 }
2584
2585 // Binary src (Replicate con)
2586 bool is_valid_sve_arith_imm_pattern(Node* n, Node* m) {
2587 if (n == nullptr || m == nullptr) {
2588 return false;
2589 }
2590
2591 if (UseSVE == 0 || m->Opcode() != Op_Replicate) {
2592 return false;
2593 }
2594
2595 Node* imm_node = m->in(1);
2596 if (!imm_node->is_Con()) {
2597 return false;
2598 }
2599
2600 const Type* t = imm_node->bottom_type();
2601 if (!(t->isa_int() || t->isa_long())) {
2602 return false;
2603 }
2604
2605 switch (n->Opcode()) {
2606 case Op_AndV:
2607 case Op_OrV:
2608 case Op_XorV: {
2609 Assembler::SIMD_RegVariant T = Assembler::elemType_to_regVariant(Matcher::vector_element_basic_type(n));
2610 uint64_t value = t->isa_long() ? (uint64_t)imm_node->get_long() : (uint64_t)imm_node->get_int();
2611 return Assembler::operand_valid_for_sve_logical_immediate(Assembler::regVariant_to_elemBits(T), value);
2612 }
2613 case Op_AddVB:
2614 return (imm_node->get_int() <= 255 && imm_node->get_int() >= -255);
2615 case Op_AddVS:
2616 case Op_AddVI:
2617 return Assembler::operand_valid_for_sve_add_sub_immediate((int64_t)imm_node->get_int());
2618 case Op_AddVL:
2619 return Assembler::operand_valid_for_sve_add_sub_immediate(imm_node->get_long());
2620 default:
2621 return false;
2622 }
2623 }
2624
2625 // (XorV src (Replicate m1))
2626 // (XorVMask src (MaskAll m1))
2627 bool is_vector_bitwise_not_pattern(Node* n, Node* m) {
2628 if (n != nullptr && m != nullptr) {
2629 return (n->Opcode() == Op_XorV || n->Opcode() == Op_XorVMask) &&
2630 VectorNode::is_all_ones_vector(m);
2631 }
2632 return false;
2633 }
2634
2635 // Should the matcher clone input 'm' of node 'n'?
2636 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
2637 if (is_vshift_con_pattern(n, m) ||
2638 is_vector_bitwise_not_pattern(n, m) ||
2639 is_valid_sve_arith_imm_pattern(n, m)) {
2640 mstack.push(m, Visit);
2641 return true;
2642 }
2643 return false;
2644 }
2645
2646 // Should the Matcher clone shifts on addressing modes, expecting them
2647 // to be subsumed into complex addressing expressions or compute them
2648 // into registers?
3414 } else {
3415 __ movw(dst_reg, con);
3416 }
3417 %}
3418
3419 enc_class aarch64_enc_mov_imm(iRegL dst, immL src) %{
3420 C2_MacroAssembler _masm(&cbuf);
3421 Register dst_reg = as_Register($dst$$reg);
3422 uint64_t con = (uint64_t)$src$$constant;
3423 if (con == 0) {
3424 __ mov(dst_reg, zr);
3425 } else {
3426 __ mov(dst_reg, con);
3427 }
3428 %}
3429
3430 enc_class aarch64_enc_mov_p(iRegP dst, immP src) %{
3431 C2_MacroAssembler _masm(&cbuf);
3432 Register dst_reg = as_Register($dst$$reg);
3433 address con = (address)$src$$constant;
3434 if (con == nullptr || con == (address)1) {
3435 ShouldNotReachHere();
3436 } else {
3437 relocInfo::relocType rtype = $src->constant_reloc();
3438 if (rtype == relocInfo::oop_type) {
3439 __ movoop(dst_reg, (jobject)con);
3440 } else if (rtype == relocInfo::metadata_type) {
3441 __ mov_metadata(dst_reg, (Metadata*)con);
3442 } else {
3443 assert(rtype == relocInfo::none, "unexpected reloc type");
3444 if (! __ is_valid_AArch64_address(con) ||
3445 con < (address)(uintptr_t)os::vm_page_size()) {
3446 __ mov(dst_reg, con);
3447 } else {
3448 uint64_t offset;
3449 __ adrp(dst_reg, con, offset);
3450 __ add(dst_reg, dst_reg, offset);
3451 }
3452 }
3453 }
3454 %}
3457 C2_MacroAssembler _masm(&cbuf);
3458 Register dst_reg = as_Register($dst$$reg);
3459 __ mov(dst_reg, zr);
3460 %}
3461
3462 enc_class aarch64_enc_mov_p1(iRegP dst, immP_1 src) %{
3463 C2_MacroAssembler _masm(&cbuf);
3464 Register dst_reg = as_Register($dst$$reg);
3465 __ mov(dst_reg, (uint64_t)1);
3466 %}
3467
3468 enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
3469 C2_MacroAssembler _masm(&cbuf);
3470 __ load_byte_map_base($dst$$Register);
3471 %}
3472
3473 enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
3474 C2_MacroAssembler _masm(&cbuf);
3475 Register dst_reg = as_Register($dst$$reg);
3476 address con = (address)$src$$constant;
3477 if (con == nullptr) {
3478 ShouldNotReachHere();
3479 } else {
3480 relocInfo::relocType rtype = $src->constant_reloc();
3481 assert(rtype == relocInfo::oop_type, "unexpected reloc type");
3482 __ set_narrow_oop(dst_reg, (jobject)con);
3483 }
3484 %}
3485
3486 enc_class aarch64_enc_mov_n0(iRegN dst, immN0 src) %{
3487 C2_MacroAssembler _masm(&cbuf);
3488 Register dst_reg = as_Register($dst$$reg);
3489 __ mov(dst_reg, zr);
3490 %}
3491
3492 enc_class aarch64_enc_mov_nk(iRegN dst, immNKlass src) %{
3493 C2_MacroAssembler _masm(&cbuf);
3494 Register dst_reg = as_Register($dst$$reg);
3495 address con = (address)$src$$constant;
3496 if (con == nullptr) {
3497 ShouldNotReachHere();
3498 } else {
3499 relocInfo::relocType rtype = $src->constant_reloc();
3500 assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
3501 __ set_narrow_klass(dst_reg, (Klass *)con);
3502 }
3503 %}
3504
3505 // arithmetic encodings
3506
3507 enc_class aarch64_enc_addsubw_imm(iRegI dst, iRegI src1, immIAddSub src2) %{
3508 C2_MacroAssembler _masm(&cbuf);
3509 Register dst_reg = as_Register($dst$$reg);
3510 Register src_reg = as_Register($src1$$reg);
3511 int32_t con = (int32_t)$src2$$constant;
3512 // add has primary == 0, subtract has primary == 1
3513 if ($primary) { con = -con; }
3514 if (con < 0) {
3515 __ subw(dst_reg, src_reg, -con);
3516 } else {
3659 Label *L = $lbl$$label;
3660 __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
3661 %}
3662
3663 enc_class aarch64_enc_br_conU(cmpOpU cmp, label lbl) %{
3664 C2_MacroAssembler _masm(&cbuf);
3665 Label *L = $lbl$$label;
3666 __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
3667 %}
3668
3669 enc_class aarch64_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result)
3670 %{
3671 Register sub_reg = as_Register($sub$$reg);
3672 Register super_reg = as_Register($super$$reg);
3673 Register temp_reg = as_Register($temp$$reg);
3674 Register result_reg = as_Register($result$$reg);
3675
3676 Label miss;
3677 C2_MacroAssembler _masm(&cbuf);
3678 __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
3679 nullptr, &miss,
3680 /*set_cond_codes:*/ true);
3681 if ($primary) {
3682 __ mov(result_reg, zr);
3683 }
3684 __ bind(miss);
3685 %}
3686
3687 enc_class aarch64_enc_java_static_call(method meth) %{
3688 C2_MacroAssembler _masm(&cbuf);
3689
3690 address addr = (address)$meth$$method;
3691 address call;
3692 if (!_method) {
3693 // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
3694 call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type));
3695 if (call == nullptr) {
3696 ciEnv::current()->record_failure("CodeCache is full");
3697 return;
3698 }
3699 } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
3700 // The NOP here is purely to ensure that eliding a call to
3701 // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
3702 __ nop();
3703 __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
3704 } else {
3705 int method_index = resolved_method_index(cbuf);
3706 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
3707 : static_call_Relocation::spec(method_index);
3708 call = __ trampoline_call(Address(addr, rspec));
3709 if (call == nullptr) {
3710 ciEnv::current()->record_failure("CodeCache is full");
3711 return;
3712 }
3713 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
3714 // Calls of the same statically bound method can share
3715 // a stub to the interpreter.
3716 cbuf.shared_stub_to_interp_for(_method, call - cbuf.insts_begin());
3717 } else {
3718 // Emit stub for static call
3719 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, call);
3720 if (stub == nullptr) {
3721 ciEnv::current()->record_failure("CodeCache is full");
3722 return;
3723 }
3724 }
3725 }
3726
3727 __ post_call_nop();
3728
3729 // Only non uncommon_trap calls need to reinitialize ptrue.
3730 if (Compile::current()->max_vector_size() > 0 && uncommon_trap_request() == 0) {
3731 __ reinitialize_ptrue();
3732 }
3733 %}
3734
3735 enc_class aarch64_enc_java_dynamic_call(method meth) %{
3736 C2_MacroAssembler _masm(&cbuf);
3737 int method_index = resolved_method_index(cbuf);
3738 address call = __ ic_call((address)$meth$$method, method_index);
3739 if (call == nullptr) {
3740 ciEnv::current()->record_failure("CodeCache is full");
3741 return;
3742 }
3743 __ post_call_nop();
3744 if (Compile::current()->max_vector_size() > 0) {
3745 __ reinitialize_ptrue();
3746 }
3747 %}
3748
3749 enc_class aarch64_enc_call_epilog() %{
3750 C2_MacroAssembler _masm(&cbuf);
3751 if (VerifyStackAtCalls) {
3752 // Check that stack depth is unchanged: find majik cookie on stack
3753 __ call_Unimplemented();
3754 }
3755 if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic()) {
3756 // The last return value is not set by the callee but used to pass IsInit information to compiled code.
3757 // Search for the corresponding projection, get the register and emit code that initialized it.
3758 uint con = (tf()->range_cc()->cnt() - 1);
3759 for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
3760 ProjNode* proj = fast_out(i)->as_Proj();
3761 if (proj->_con == con) {
3762 // Set IsInit if r0 is non-null (a non-null value is returned buffered or scalarized)
3763 OptoReg::Name optoReg = ra_->get_reg_first(proj);
3764 VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
3765 Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
3766 __ cmp(r0, zr);
3767 __ cset(toReg, Assembler::NE);
3768 if (reg->is_stack()) {
3769 int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
3770 __ str(toReg, Address(sp, st_off));
3771 }
3772 break;
3773 }
3774 }
3775 if (return_value_is_used()) {
3776 // An inline type is returned as fields in multiple registers.
3777 // R0 either contains an oop if the inline type is buffered or a pointer
3778 // to the corresponding InlineKlass with the lowest bit set to 1. Zero r0
3779 // if the lowest bit is set to allow C2 to use the oop after null checking.
3780 // r0 &= (r0 & 1) - 1
3781 __ andr(rscratch1, r0, 0x1);
3782 __ sub(rscratch1, rscratch1, 0x1);
3783 __ andr(r0, r0, rscratch1);
3784 }
3785 }
3786 %}
3787
3788 enc_class aarch64_enc_java_to_runtime(method meth) %{
3789 C2_MacroAssembler _masm(&cbuf);
3790
3791 // some calls to generated routines (arraycopy code) are scheduled
3792 // by C2 as runtime calls. if so we can call them using a br (they
3793 // will be in a reachable segment) otherwise we have to use a blr
3794 // which loads the absolute address into a register.
3795 address entry = (address)$meth$$method;
3796 CodeBlob *cb = CodeCache::find_blob(entry);
3797 if (cb) {
3798 address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
3799 if (call == nullptr) {
3800 ciEnv::current()->record_failure("CodeCache is full");
3801 return;
3802 }
3803 __ post_call_nop();
3804 } else {
3805 Label retaddr;
3806 __ adr(rscratch2, retaddr);
3807 __ lea(rscratch1, RuntimeAddress(entry));
3808 // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc()
3809 __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
3810 __ blr(rscratch1);
3811 __ bind(retaddr);
3812 __ post_call_nop();
3813 __ add(sp, sp, 2 * wordSize);
3814 }
3815 if (Compile::current()->max_vector_size() > 0) {
3816 __ reinitialize_ptrue();
3817 }
3818 %}
3819
4678 operand immL_32bits()
4679 %{
4680 predicate(n->get_long() == 0xFFFFFFFFL);
4681 match(ConL);
4682 op_cost(0);
4683 format %{ %}
4684 interface(CONST_INTER);
4685 %}
4686
4687 // Pointer operands
4688 // Pointer Immediate
4689 operand immP()
4690 %{
4691 match(ConP);
4692
4693 op_cost(0);
4694 format %{ %}
4695 interface(CONST_INTER);
4696 %}
4697
4698 // nullptr Pointer Immediate
4699 operand immP0()
4700 %{
4701 predicate(n->get_ptr() == 0);
4702 match(ConP);
4703
4704 op_cost(0);
4705 format %{ %}
4706 interface(CONST_INTER);
4707 %}
4708
4709 // Pointer Immediate One
4710 // this is used in object initialization (initial object header)
4711 operand immP_1()
4712 %{
4713 predicate(n->get_ptr() == 1);
4714 match(ConP);
4715
4716 op_cost(0);
4717 format %{ %}
4718 interface(CONST_INTER);
4810 operand immFPacked()
4811 %{
4812 predicate(Assembler::operand_valid_for_float_immediate((double)n->getf()));
4813 match(ConF);
4814 op_cost(0);
4815 format %{ %}
4816 interface(CONST_INTER);
4817 %}
4818
4819 // Narrow pointer operands
4820 // Narrow Pointer Immediate
4821 operand immN()
4822 %{
4823 match(ConN);
4824
4825 op_cost(0);
4826 format %{ %}
4827 interface(CONST_INTER);
4828 %}
4829
4830 // Narrow nullptr Pointer Immediate
4831 operand immN0()
4832 %{
4833 predicate(n->get_narrowcon() == 0);
4834 match(ConN);
4835
4836 op_cost(0);
4837 format %{ %}
4838 interface(CONST_INTER);
4839 %}
4840
4841 operand immNKlass()
4842 %{
4843 match(ConNKlass);
4844
4845 op_cost(0);
4846 format %{ %}
4847 interface(CONST_INTER);
4848 %}
4849
4850 // Integer 32 bit Register Operands
7219 instruct loadConL(iRegLNoSp dst, immL src)
7220 %{
7221 match(Set dst src);
7222
7223 ins_cost(INSN_COST);
7224 format %{ "mov $dst, $src\t# long" %}
7225
7226 ins_encode( aarch64_enc_mov_imm(dst, src) );
7227
7228 ins_pipe(ialu_imm);
7229 %}
7230
7231 // Load Pointer Constant
7232
7233 instruct loadConP(iRegPNoSp dst, immP con)
7234 %{
7235 match(Set dst con);
7236
7237 ins_cost(INSN_COST * 4);
7238 format %{
7239 "mov $dst, $con\t# ptr"
7240 %}
7241
7242 ins_encode(aarch64_enc_mov_p(dst, con));
7243
7244 ins_pipe(ialu_imm);
7245 %}
7246
7247 // Load Null Pointer Constant
7248
7249 instruct loadConP0(iRegPNoSp dst, immP0 con)
7250 %{
7251 match(Set dst con);
7252
7253 ins_cost(INSN_COST);
7254 format %{ "mov $dst, $con\t# nullptr ptr" %}
7255
7256 ins_encode(aarch64_enc_mov_p0(dst, con));
7257
7258 ins_pipe(ialu_imm);
7259 %}
7260
7261 // Load Pointer Constant One
7262
7263 instruct loadConP1(iRegPNoSp dst, immP_1 con)
7264 %{
7265 match(Set dst con);
7266
7267 ins_cost(INSN_COST);
7268 format %{ "mov $dst, $con\t# nullptr ptr" %}
7269
7270 ins_encode(aarch64_enc_mov_p1(dst, con));
7271
7272 ins_pipe(ialu_imm);
7273 %}
7274
7275 // Load Byte Map Base Constant
7276
7277 instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
7278 %{
7279 match(Set dst con);
7280
7281 ins_cost(INSN_COST);
7282 format %{ "adr $dst, $con\t# Byte Map Base" %}
7283
7284 ins_encode(aarch64_enc_mov_byte_map_base(dst, con));
7285
7286 ins_pipe(ialu_imm);
7287 %}
7288
7290
7291 instruct loadConN(iRegNNoSp dst, immN con)
7292 %{
7293 match(Set dst con);
7294
7295 ins_cost(INSN_COST * 4);
7296 format %{ "mov $dst, $con\t# compressed ptr" %}
7297
7298 ins_encode(aarch64_enc_mov_n(dst, con));
7299
7300 ins_pipe(ialu_imm);
7301 %}
7302
7303 // Load Narrow Null Pointer Constant
7304
7305 instruct loadConN0(iRegNNoSp dst, immN0 con)
7306 %{
7307 match(Set dst con);
7308
7309 ins_cost(INSN_COST);
7310 format %{ "mov $dst, $con\t# compressed nullptr ptr" %}
7311
7312 ins_encode(aarch64_enc_mov_n0(dst, con));
7313
7314 ins_pipe(ialu_imm);
7315 %}
7316
7317 // Load Narrow Klass Constant
7318
7319 instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
7320 %{
7321 match(Set dst con);
7322
7323 ins_cost(INSN_COST);
7324 format %{ "mov $dst, $con\t# compressed klass ptr" %}
7325
7326 ins_encode(aarch64_enc_mov_nk(dst, con));
7327
7328 ins_pipe(ialu_imm);
7329 %}
7330
8422 %}
8423
8424 // ============================================================================
8425 // Cast/Convert Instructions
8426
8427 instruct castX2P(iRegPNoSp dst, iRegL src) %{
8428 match(Set dst (CastX2P src));
8429
8430 ins_cost(INSN_COST);
8431 format %{ "mov $dst, $src\t# long -> ptr" %}
8432
8433 ins_encode %{
8434 if ($dst$$reg != $src$$reg) {
8435 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8436 }
8437 %}
8438
8439 ins_pipe(ialu_reg);
8440 %}
8441
8442 instruct castN2X(iRegLNoSp dst, iRegN src) %{
8443 match(Set dst (CastP2X src));
8444
8445 ins_cost(INSN_COST);
8446 format %{ "mov $dst, $src\t# ptr -> long" %}
8447
8448 ins_encode %{
8449 if ($dst$$reg != $src$$reg) {
8450 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8451 }
8452 %}
8453
8454 ins_pipe(ialu_reg);
8455 %}
8456
8457 instruct castP2X(iRegLNoSp dst, iRegP src) %{
8458 match(Set dst (CastP2X src));
8459
8460 ins_cost(INSN_COST);
8461 format %{ "mov $dst, $src\t# ptr -> long" %}
8462
8463 ins_encode %{
8464 if ($dst$$reg != $src$$reg) {
8465 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8466 }
8467 %}
8468
8469 ins_pipe(ialu_reg);
8470 %}
8471
8472 // Convert oop into int for vectors alignment masking
8473 instruct convP2I(iRegINoSp dst, iRegP src) %{
8474 match(Set dst (ConvL2I (CastP2X src)));
8475
8476 ins_cost(INSN_COST);
15258
15259 match(Set dst (MoveL2D src));
15260
15261 effect(DEF dst, USE src);
15262
15263 ins_cost(INSN_COST);
15264
15265 format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
15266
15267 ins_encode %{
15268 __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
15269 %}
15270
15271 ins_pipe(fp_l2d);
15272
15273 %}
15274
15275 // ============================================================================
15276 // clearing of an array
15277
15278 instruct clearArray_reg_reg_immL0(iRegL_R11 cnt, iRegP_R10 base, immL0 zero, Universe dummy, rFlagsReg cr)
15279 %{
15280 match(Set dummy (ClearArray (Binary cnt base) zero));
15281 effect(USE_KILL cnt, USE_KILL base, KILL cr);
15282
15283 ins_cost(4 * INSN_COST);
15284 format %{ "ClearArray $cnt, $base" %}
15285
15286 ins_encode %{
15287 address tpc = __ zero_words($base$$Register, $cnt$$Register);
15288 if (tpc == nullptr) {
15289 ciEnv::current()->record_failure("CodeCache is full");
15290 return;
15291 }
15292 %}
15293
15294 ins_pipe(pipe_class_memory);
15295 %}
15296
15297 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, iRegL val, Universe dummy, rFlagsReg cr)
15298 %{
15299 predicate(((ClearArrayNode*)n)->word_copy_only());
15300 match(Set dummy (ClearArray (Binary cnt base) val));
15301 effect(USE_KILL cnt, USE_KILL base, KILL cr);
15302
15303 ins_cost(4 * INSN_COST);
15304 format %{ "ClearArray $cnt, $base, $val" %}
15305
15306 ins_encode %{
15307 __ fill_words($base$$Register, $cnt$$Register, $val$$Register);
15308 %}
15309
15310 ins_pipe(pipe_class_memory);
15311 %}
15312
15313 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 temp, Universe dummy, rFlagsReg cr)
15314 %{
15315 predicate((uint64_t)n->in(2)->get_long()
15316 < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord)
15317 && !((ClearArrayNode*)n)->word_copy_only());
15318 match(Set dummy (ClearArray cnt base));
15319 effect(TEMP temp, USE_KILL base, KILL cr);
15320
15321 ins_cost(4 * INSN_COST);
15322 format %{ "ClearArray $cnt, $base" %}
15323
15324 ins_encode %{
15325 address tpc = __ zero_words($base$$Register, (uint64_t)$cnt$$constant);
15326 if (tpc == nullptr) {
15327 ciEnv::current()->record_failure("CodeCache is full");
15328 return;
15329 }
15330 %}
15331
15332 ins_pipe(pipe_class_memory);
15333 %}
15334
15335 // ============================================================================
15336 // Overflow Math Instructions
15337
15338 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
15339 %{
15340 match(Set cr (OverflowAddI op1 op2));
15341
15342 format %{ "cmnw $op1, $op2\t# overflow check int" %}
15343 ins_cost(INSN_COST);
15344 ins_encode %{
15345 __ cmnw($op1$$Register, $op2$$Register);
15346 %}
16597
16598 // Call Runtime Instruction
16599
16600 instruct CallLeafDirect(method meth)
16601 %{
16602 match(CallLeaf);
16603
16604 effect(USE meth);
16605
16606 ins_cost(CALL_COST);
16607
16608 format %{ "CALL, runtime leaf $meth" %}
16609
16610 ins_encode( aarch64_enc_java_to_runtime(meth) );
16611
16612 ins_pipe(pipe_class_call);
16613 %}
16614
16615 // Call Runtime Instruction
16616
16617 // entry point is null, target holds the address to call
16618 instruct CallLeafNoFPIndirect(iRegP target)
16619 %{
16620 predicate(n->as_Call()->entry_point() == nullptr);
16621
16622 match(CallLeafNoFP target);
16623
16624 ins_cost(CALL_COST);
16625
16626 format %{ "CALL, runtime leaf nofp indirect $target" %}
16627
16628 ins_encode %{
16629 __ blr($target$$Register);
16630 %}
16631
16632 ins_pipe(pipe_class_call);
16633 %}
16634
16635 instruct CallLeafNoFPDirect(method meth)
16636 %{
16637 predicate(n->as_Call()->entry_point() != nullptr);
16638
16639 match(CallLeafNoFP);
16640
16641 effect(USE meth);
16642
16643 ins_cost(CALL_COST);
16644
16645 format %{ "CALL, runtime leaf nofp $meth" %}
16646
16647 ins_encode( aarch64_enc_java_to_runtime(meth) );
16648
16649 ins_pipe(pipe_class_call);
16650 %}
16651
16652 // Tail Call; Jump from runtime stub to Java code.
16653 // Also known as an 'interprocedural jump'.
16654 // Target of jump will eventually return to caller.
16655 // TailJump below removes the return address.
16656 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_ptr)
16657 %{
16658 match(TailCall jump_target method_ptr);
17191 ins_pipe(pipe_class_memory);
17192 %}
17193
17194 instruct array_equalsB(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
17195 iRegP_R3 tmp1, iRegP_R4 tmp2, iRegP_R5 tmp3,
17196 vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2, vRegD_V3 vtmp3,
17197 vRegD_V4 vtmp4, vRegD_V5 vtmp5, vRegD_V6 vtmp6, vRegD_V7 vtmp7,
17198 iRegP_R10 tmp, rFlagsReg cr)
17199 %{
17200 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
17201 match(Set result (AryEq ary1 ary2));
17202 effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3,
17203 TEMP vtmp0, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4, TEMP vtmp5,
17204 TEMP vtmp6, TEMP vtmp7, KILL cr);
17205
17206 format %{ "Array Equals $ary1,ary2 -> $result # KILL $ary1 $ary2 $tmp $tmp1 $tmp2 $tmp3 V0-V7 cr" %}
17207 ins_encode %{
17208 address tpc = __ arrays_equals($ary1$$Register, $ary2$$Register,
17209 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
17210 $result$$Register, $tmp$$Register, 1);
17211 if (tpc == nullptr) {
17212 ciEnv::current()->record_failure("CodeCache is full");
17213 return;
17214 }
17215 %}
17216 ins_pipe(pipe_class_memory);
17217 %}
17218
17219 instruct array_equalsC(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
17220 iRegP_R3 tmp1, iRegP_R4 tmp2, iRegP_R5 tmp3,
17221 vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2, vRegD_V3 vtmp3,
17222 vRegD_V4 vtmp4, vRegD_V5 vtmp5, vRegD_V6 vtmp6, vRegD_V7 vtmp7,
17223 iRegP_R10 tmp, rFlagsReg cr)
17224 %{
17225 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
17226 match(Set result (AryEq ary1 ary2));
17227 effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3,
17228 TEMP vtmp0, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4, TEMP vtmp5,
17229 TEMP vtmp6, TEMP vtmp7, KILL cr);
17230
17231 format %{ "Array Equals $ary1,ary2 -> $result # KILL $ary1 $ary2 $tmp $tmp1 $tmp2 $tmp3 V0-V7 cr" %}
17232 ins_encode %{
17233 address tpc = __ arrays_equals($ary1$$Register, $ary2$$Register,
17234 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
17235 $result$$Register, $tmp$$Register, 2);
17236 if (tpc == nullptr) {
17237 ciEnv::current()->record_failure("CodeCache is full");
17238 return;
17239 }
17240 %}
17241 ins_pipe(pipe_class_memory);
17242 %}
17243
17244 instruct count_positives(iRegP_R1 ary1, iRegI_R2 len, iRegI_R0 result, rFlagsReg cr)
17245 %{
17246 match(Set result (CountPositives ary1 len));
17247 effect(USE_KILL ary1, USE_KILL len, KILL cr);
17248 format %{ "count positives byte[] $ary1,$len -> $result" %}
17249 ins_encode %{
17250 address tpc = __ count_positives($ary1$$Register, $len$$Register, $result$$Register);
17251 if (tpc == nullptr) {
17252 ciEnv::current()->record_failure("CodeCache is full");
17253 return;
17254 }
17255 %}
17256 ins_pipe( pipe_slow );
17257 %}
17258
17259 // fast char[] to byte[] compression
17260 instruct string_compress(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
17261 vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2,
17262 vRegD_V3 vtmp3, vRegD_V4 vtmp4, vRegD_V5 vtmp5,
17263 iRegI_R0 result, rFlagsReg cr)
17264 %{
17265 match(Set result (StrCompressedCopy src (Binary dst len)));
17266 effect(TEMP vtmp0, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4, TEMP vtmp5,
17267 USE_KILL src, USE_KILL dst, USE len, KILL cr);
17268
17269 format %{ "String Compress $src,$dst,$len -> $result # KILL $src $dst V0-V5 cr" %}
17270 ins_encode %{
17271 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
17274 $vtmp4$$FloatRegister, $vtmp5$$FloatRegister);
17275 %}
17276 ins_pipe(pipe_slow);
17277 %}
17278
17279 // fast byte[] to char[] inflation
17280 instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len, iRegP_R3 tmp,
17281 vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2, vRegD_V3 vtmp3,
17282 vRegD_V4 vtmp4, vRegD_V5 vtmp5, vRegD_V6 vtmp6, rFlagsReg cr)
17283 %{
17284 match(Set dummy (StrInflatedCopy src (Binary dst len)));
17285 effect(TEMP vtmp0, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3,
17286 TEMP vtmp4, TEMP vtmp5, TEMP vtmp6, TEMP tmp,
17287 USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
17288
17289 format %{ "String Inflate $src,$dst # KILL $tmp $src $dst $len V0-V6 cr" %}
17290 ins_encode %{
17291 address tpc = __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
17292 $vtmp0$$FloatRegister, $vtmp1$$FloatRegister,
17293 $vtmp2$$FloatRegister, $tmp$$Register);
17294 if (tpc == nullptr) {
17295 ciEnv::current()->record_failure("CodeCache is full");
17296 return;
17297 }
17298 %}
17299 ins_pipe(pipe_class_memory);
17300 %}
17301
17302 // encode char[] to byte[] in ISO_8859_1
17303 instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
17304 vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2,
17305 vRegD_V3 vtmp3, vRegD_V4 vtmp4, vRegD_V5 vtmp5,
17306 iRegI_R0 result, rFlagsReg cr)
17307 %{
17308 predicate(!((EncodeISOArrayNode*)n)->is_ascii());
17309 match(Set result (EncodeISOArray src (Binary dst len)));
17310 effect(USE_KILL src, USE_KILL dst, USE len, KILL vtmp0, KILL vtmp1,
17311 KILL vtmp2, KILL vtmp3, KILL vtmp4, KILL vtmp5, KILL cr);
17312
17313 format %{ "Encode ISO array $src,$dst,$len -> $result # KILL $src $dst V0-V5 cr" %}
17314 ins_encode %{
|