1220 // registers conditionally reserved.
1221
1222 _ANY_REG32_mask = _ALL_REG32_mask;
1223 _ANY_REG32_mask.Remove(OptoReg::as_OptoReg(r31_sp->as_VMReg()));
1224
1225 _ANY_REG_mask = _ALL_REG_mask;
1226
1227 _PTR_REG_mask = _ALL_REG_mask;
1228
1229 _NO_SPECIAL_REG32_mask = _ALL_REG32_mask;
1230 _NO_SPECIAL_REG32_mask.SUBTRACT(_NON_ALLOCATABLE_REG32_mask);
1231
1232 _NO_SPECIAL_REG_mask = _ALL_REG_mask;
1233 _NO_SPECIAL_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask);
1234
1235 _NO_SPECIAL_PTR_REG_mask = _ALL_REG_mask;
1236 _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask);
1237
1238 // r27 is not allocatable when compressed oops is on and heapbase is not
1239 // zero, compressed klass pointers doesn't use r27 after JDK-8234794
1240 if (UseCompressedOops && (CompressedOops::ptrs_base() != NULL)) {
1241 _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(r27->as_VMReg()));
1242 _NO_SPECIAL_REG_mask.Remove(OptoReg::as_OptoReg(r27->as_VMReg()));
1243 _NO_SPECIAL_PTR_REG_mask.Remove(OptoReg::as_OptoReg(r27->as_VMReg()));
1244 }
1245
1246 // r29 is not allocatable when PreserveFramePointer is on
1247 if (PreserveFramePointer) {
1248 _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(r29->as_VMReg()));
1249 _NO_SPECIAL_REG_mask.Remove(OptoReg::as_OptoReg(r29->as_VMReg()));
1250 _NO_SPECIAL_PTR_REG_mask.Remove(OptoReg::as_OptoReg(r29->as_VMReg()));
1251 }
1252 }
1253
1254 // Optimizaton of volatile gets and puts
1255 // -------------------------------------
1256 //
1257 // AArch64 has ldar<x> and stlr<x> instructions which we can safely
1258 // use to implement volatile reads and writes. For a volatile read
1259 // we simply need
1260 //
1564 bool release = mbvol->trailing_store();
1565 assert(!release || (mbvol->in(MemBarNode::Precedent)->is_Store() && mbvol->in(MemBarNode::Precedent)->as_Store()->is_release()), "");
1566 #ifdef ASSERT
1567 if (release) {
1568 Node* leading = mbvol->leading_membar();
1569 assert(leading->Opcode() == Op_MemBarRelease, "");
1570 assert(leading->as_MemBar()->leading_store(), "");
1571 assert(leading->as_MemBar()->trailing_membar() == mbvol, "");
1572 }
1573 #endif
1574
1575 return release;
1576 }
1577
1578 // predicates controlling emit of str<x>/stlr<x>
1579
1580 bool needs_releasing_store(const Node *n)
1581 {
1582 // assert n->is_Store();
1583 StoreNode *st = n->as_Store();
1584 return st->trailing_membar() != NULL;
1585 }
1586
1587 // predicate controlling translation of CAS
1588 //
1589 // returns true if CAS needs to use an acquiring load otherwise false
1590
1591 bool needs_acquiring_load_exclusive(const Node *n)
1592 {
1593 assert(is_CAS(n->Opcode(), true), "expecting a compare and swap");
1594 LoadStoreNode* ldst = n->as_LoadStore();
1595 if (is_CAS(n->Opcode(), false)) {
1596 assert(ldst->trailing_membar() != NULL, "expected trailing membar");
1597 } else {
1598 return ldst->trailing_membar() != NULL;
1599 }
1600
1601 // so we can just return true here
1602 return true;
1603 }
1604
1605 #define __ _masm.
1606
1607 // advance declarations for helper functions to convert register
1608 // indices to register objects
1609
1610 // the ad file has to provide implementations of certain methods
1611 // expected by the generic code
1612 //
1613 // REQUIRED FUNCTIONALITY
1614
1615 //=============================================================================
1616
1617 // !!!!! Special hack to get all types of calls to specify the byte offset
1618 // from the start of the call to the point where the return address
1627
1628 int MachCallDynamicJavaNode::ret_addr_offset()
1629 {
1630 return 16; // movz, movk, movk, bl
1631 }
1632
1633 int MachCallRuntimeNode::ret_addr_offset() {
1634 // for generated stubs the call will be
1635 // bl(addr)
1636 // or with far branches
1637 // bl(trampoline_stub)
1638 // for real runtime callouts it will be six instructions
1639 // see aarch64_enc_java_to_runtime
1640 // adr(rscratch2, retaddr)
1641 // lea(rscratch1, RuntimeAddress(addr)
1642 // stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
1643 // blr(rscratch1)
1644 CodeBlob *cb = CodeCache::find_blob(_entry_point);
1645 if (cb) {
1646 return 1 * NativeInstruction::instruction_size;
1647 } else {
1648 return 6 * NativeInstruction::instruction_size;
1649 }
1650 }
1651
1652 //=============================================================================
1653
1654 #ifndef PRODUCT
1655 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1656 st->print("BREAKPOINT");
1657 }
1658 #endif
1659
1660 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1661 C2_MacroAssembler _masm(&cbuf);
1662 __ brk(0);
1663 }
1664
1665 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
1666 return MachNode::size(ra_);
1717
1718 int framesize = C->output()->frame_slots() << LogBytesPerInt;
1719
1720 if (C->output()->need_stack_bang(framesize))
1721 st->print("# stack bang size=%d\n\t", framesize);
1722
1723 if (VM_Version::use_rop_protection()) {
1724 st->print("ldr zr, [lr]\n\t");
1725 st->print("pacia lr, rfp\n\t");
1726 }
1727 if (framesize < ((1 << 9) + 2 * wordSize)) {
1728 st->print("sub sp, sp, #%d\n\t", framesize);
1729 st->print("stp rfp, lr, [sp, #%d]", framesize - 2 * wordSize);
1730 if (PreserveFramePointer) st->print("\n\tadd rfp, sp, #%d", framesize - 2 * wordSize);
1731 } else {
1732 st->print("stp lr, rfp, [sp, #%d]!\n\t", -(2 * wordSize));
1733 if (PreserveFramePointer) st->print("mov rfp, sp\n\t");
1734 st->print("mov rscratch1, #%d\n\t", framesize - 2 * wordSize);
1735 st->print("sub sp, sp, rscratch1");
1736 }
1737 if (C->stub_function() == NULL && BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
1738 st->print("\n\t");
1739 st->print("ldr rscratch1, [guard]\n\t");
1740 st->print("dmb ishld\n\t");
1741 st->print("ldr rscratch2, [rthread, #thread_disarmed_guard_value_offset]\n\t");
1742 st->print("cmp rscratch1, rscratch2\n\t");
1743 st->print("b.eq skip");
1744 st->print("\n\t");
1745 st->print("blr #nmethod_entry_barrier_stub\n\t");
1746 st->print("b skip\n\t");
1747 st->print("guard: int\n\t");
1748 st->print("\n\t");
1749 st->print("skip:\n\t");
1750 }
1751 }
1752 #endif
1753
1754 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1755 Compile* C = ra_->C;
1756 C2_MacroAssembler _masm(&cbuf);
1757
1758 // n.b. frame size includes space for return pc and rfp
1759 const int framesize = C->output()->frame_size_in_bytes();
1760
1761 // insert a nop at the start of the prolog so we can patch in a
1762 // branch if we need to invalidate the method later
1763 __ nop();
1764
1765 if (C->clinit_barrier_on_entry()) {
1766 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
1767
1768 Label L_skip_barrier;
1769
1770 __ mov_metadata(rscratch2, C->method()->holder()->constant_encoding());
1771 __ clinit_barrier(rscratch2, rscratch1, &L_skip_barrier);
1772 __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
1773 __ bind(L_skip_barrier);
1774 }
1775
1776 if (C->max_vector_size() > 0) {
1777 __ reinitialize_ptrue();
1778 }
1779
1780 int bangsize = C->output()->bang_size_in_bytes();
1781 if (C->output()->need_stack_bang(bangsize))
1782 __ generate_stack_overflow_check(bangsize);
1783
1784 __ build_frame(framesize);
1785
1786 if (C->stub_function() == NULL) {
1787 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
1788 if (BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
1789 // Dummy labels for just measuring the code size
1790 Label dummy_slow_path;
1791 Label dummy_continuation;
1792 Label dummy_guard;
1793 Label* slow_path = &dummy_slow_path;
1794 Label* continuation = &dummy_continuation;
1795 Label* guard = &dummy_guard;
1796 if (!Compile::current()->output()->in_scratch_emit_size()) {
1797 // Use real labels from actual stub when not emitting code for the purpose of measuring its size
1798 C2EntryBarrierStub* stub = new (Compile::current()->comp_arena()) C2EntryBarrierStub();
1799 Compile::current()->output()->add_stub(stub);
1800 slow_path = &stub->entry();
1801 continuation = &stub->continuation();
1802 guard = &stub->guard();
1803 }
1804 // In the C2 code, we move the non-hot part of nmethod entry barriers out-of-line to a stub.
1805 bs->nmethod_entry_barrier(&_masm, slow_path, continuation, guard);
1806 }
1807 }
1808
1809 if (VerifyStackAtCalls) {
1810 Unimplemented();
1811 }
1812
1813 C->output()->set_frame_complete(cbuf.insts_size());
1814
1815 if (C->has_mach_constant_base_node()) {
1816 // NOTE: We set the table base offset here because users might be
1817 // emitted before MachConstantBaseNode.
1818 ConstantTable& constant_table = C->output()->constant_table();
1819 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1820 }
1821 }
1822
1823 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1824 {
1825 return MachNode::size(ra_); // too many variables; just compute it
1826 // the hard way
1827 }
1828
1829 int MachPrologNode::reloc() const
1830 {
1831 return 0;
1832 }
1833
1834 //=============================================================================
1835
1836 #ifndef PRODUCT
1837 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1838 Compile* C = ra_->C;
1839 int framesize = C->output()->frame_slots() << LogBytesPerInt;
1840
1841 st->print("# pop frame %d\n\t",framesize);
1842
1843 if (framesize == 0) {
1844 st->print("ldp lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1845 } else if (framesize < ((1 << 9) + 2 * wordSize)) {
1846 st->print("ldp lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
1847 st->print("add sp, sp, #%d\n\t", framesize);
1848 } else {
1852 }
1853 if (VM_Version::use_rop_protection()) {
1854 st->print("autia lr, rfp\n\t");
1855 st->print("ldr zr, [lr]\n\t");
1856 }
1857
1858 if (do_polling() && C->is_method_compilation()) {
1859 st->print("# test polling word\n\t");
1860 st->print("ldr rscratch1, [rthread],#%d\n\t", in_bytes(JavaThread::polling_word_offset()));
1861 st->print("cmp sp, rscratch1\n\t");
1862 st->print("bhi #slow_path");
1863 }
1864 }
1865 #endif
1866
1867 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1868 Compile* C = ra_->C;
1869 C2_MacroAssembler _masm(&cbuf);
1870 int framesize = C->output()->frame_slots() << LogBytesPerInt;
1871
1872 __ remove_frame(framesize);
1873
1874 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1875 __ reserved_stack_check();
1876 }
1877
1878 if (do_polling() && C->is_method_compilation()) {
1879 Label dummy_label;
1880 Label* code_stub = &dummy_label;
1881 if (!C->output()->in_scratch_emit_size()) {
1882 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1883 C->output()->add_stub(stub);
1884 code_stub = &stub->entry();
1885 }
1886 __ relocate(relocInfo::poll_return_type);
1887 __ safepoint_poll(*code_stub, true /* at_return */, false /* acquire */, true /* in_nmethod */);
1888 }
1889 }
1890
1891 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
1892 // Variable size. Determine dynamically.
1893 return MachNode::size(ra_);
1894 }
1895
1896 int MachEpilogNode::reloc() const {
1897 // Return number of relocatable values contained in this instruction.
1898 return 1; // 1 for polling page.
1899 }
1900
1901 const Pipeline * MachEpilogNode::pipeline() const {
1902 return MachNode::pipeline_class();
1903 }
1904
1905 //=============================================================================
1906
1907 // Figure out which register class each belongs in: rc_int, rc_float or
1908 // rc_stack.
1909 enum RC { rc_bad, rc_int, rc_float, rc_predicate, rc_stack };
1910
1911 static enum RC rc_class(OptoReg::Name reg) {
1912
1913 if (reg == OptoReg::Bad) {
1914 return rc_bad;
1915 }
2136 }
2137 st->print("\t# vector spill size = %d", vsize);
2138 } else if (ideal_reg() == Op_RegVectMask) {
2139 assert(Matcher::supports_scalable_vector(), "bad register type for spill");
2140 int vsize = Matcher::scalable_predicate_reg_slots() * 32;
2141 st->print("\t# predicate spill size = %d", vsize);
2142 } else {
2143 st->print("\t# spill size = %d", is64 ? 64 : 32);
2144 }
2145 }
2146
2147 return 0;
2148
2149 }
2150
2151 #ifndef PRODUCT
2152 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2153 if (!ra_)
2154 st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
2155 else
2156 implementation(NULL, ra_, false, st);
2157 }
2158 #endif
2159
2160 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2161 implementation(&cbuf, ra_, false, NULL);
2162 }
2163
2164 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
2165 return MachNode::size(ra_);
2166 }
2167
2168 //=============================================================================
2169
2170 #ifndef PRODUCT
2171 void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2172 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2173 int reg = ra_->get_reg_first(this);
2174 st->print("add %s, rsp, #%d]\t# box lock",
2175 Matcher::regName[reg], offset);
2176 }
2177 #endif
2178
2179 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2180 C2_MacroAssembler _masm(&cbuf);
2181
2182 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2183 int reg = ra_->get_encode(this);
2184
2185 // This add will handle any 24-bit signed offset. 24 bits allows an
2186 // 8 megabyte stack frame.
2187 __ add(as_Register(reg), sp, offset);
2188 }
2189
2190 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
2191 // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
2192 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2193
2194 if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
2195 return NativeInstruction::instruction_size;
2196 } else {
2197 return 2 * NativeInstruction::instruction_size;
2198 }
2199 }
2200
2201 //=============================================================================
2202
2203 #ifndef PRODUCT
2204 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2205 {
2206 st->print_cr("# MachUEPNode");
2207 if (UseCompressedClassPointers) {
2208 st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2209 if (CompressedKlassPointers::shift() != 0) {
2210 st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
2211 }
2212 } else {
2213 st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2214 }
2215 st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
2216 st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
2217 }
2218 #endif
2219
2220 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
2221 {
2222 // This is the unverified entry point.
2223 C2_MacroAssembler _masm(&cbuf);
2224
2225 __ cmp_klass(j_rarg0, rscratch2, rscratch1);
2226 Label skip;
2227 // TODO
2228 // can we avoid this skip and still use a reloc?
2229 __ br(Assembler::EQ, skip);
2230 __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
2231 __ bind(skip);
2232 }
2233
2234 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
2235 {
2236 return MachNode::size(ra_);
2237 }
2238
2239 // REQUIRED EMIT CODE
2240
2241 //=============================================================================
2242
2243 // Emit exception handler code.
2244 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
2245 {
2246 // mov rscratch1 #exception_blob_entry_point
2247 // br rscratch1
2248 // Note that the code buffer's insts_mark is always relative to insts.
2249 // That's why we must use the macroassembler to generate a handler.
2250 C2_MacroAssembler _masm(&cbuf);
2251 address base = __ start_a_stub(size_exception_handler());
2252 if (base == NULL) {
2253 ciEnv::current()->record_failure("CodeCache is full");
2254 return 0; // CodeBuffer::expand failed
2255 }
2256 int offset = __ offset();
2257 __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
2258 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
2259 __ end_a_stub();
2260 return offset;
2261 }
2262
2263 // Emit deopt handler code.
2264 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
2265 {
2266 // Note that the code buffer's insts_mark is always relative to insts.
2267 // That's why we must use the macroassembler to generate a handler.
2268 C2_MacroAssembler _masm(&cbuf);
2269 address base = __ start_a_stub(size_deopt_handler());
2270 if (base == NULL) {
2271 ciEnv::current()->record_failure("CodeCache is full");
2272 return 0; // CodeBuffer::expand failed
2273 }
2274 int offset = __ offset();
2275
2276 __ adr(lr, __ pc());
2277 __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
2278
2279 assert(__ offset() - offset == (int) size_deopt_handler(), "overflow");
2280 __ end_a_stub();
2281 return offset;
2282 }
2283
2284 // REQUIRED MATCHER CODE
2285
2286 //=============================================================================
2287
2288 bool Matcher::match_rule_supported(int opcode) {
2289 if (!has_match_rule(opcode))
2290 return false;
2386 }
2387 switch(len) {
2388 // For 16-bit/32-bit mask vector, reuse VecD.
2389 case 2:
2390 case 4:
2391 case 8: return Op_VecD;
2392 case 16: return Op_VecX;
2393 }
2394 ShouldNotReachHere();
2395 return 0;
2396 }
2397
2398 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
2399 assert(Matcher::is_generic_vector(generic_opnd), "not generic");
2400 switch (ideal_reg) {
2401 case Op_VecA: return new vecAOper();
2402 case Op_VecD: return new vecDOper();
2403 case Op_VecX: return new vecXOper();
2404 }
2405 ShouldNotReachHere();
2406 return NULL;
2407 }
2408
2409 bool Matcher::is_reg2reg_move(MachNode* m) {
2410 return false;
2411 }
2412
2413 bool Matcher::is_generic_vector(MachOper* opnd) {
2414 return opnd->opcode() == VREG;
2415 }
2416
2417 // Return whether or not this register is ever used as an argument.
2418 // This function is used on startup to build the trampoline stubs in
2419 // generateOptoStub. Registers not mentioned will be killed by the VM
2420 // call in the trampoline, and arguments in those registers not be
2421 // available to the callee.
2422 bool Matcher::can_be_java_arg(int reg)
2423 {
2424 return
2425 reg == R0_num || reg == R0_H_num ||
2426 reg == R1_num || reg == R1_H_num ||
2559 result = Assembler::VS; break;
2560 case BoolTest::no_overflow:
2561 result = Assembler::VC; break;
2562 default:
2563 ShouldNotReachHere();
2564 return Assembler::Condition(-1);
2565 }
2566
2567 // Check conversion
2568 if (cond & BoolTest::unsigned_compare) {
2569 assert(cmpOpUOper((BoolTest::mask)((int)cond & ~(BoolTest::unsigned_compare))).ccode() == result, "Invalid conversion");
2570 } else {
2571 assert(cmpOpOper(cond).ccode() == result, "Invalid conversion");
2572 }
2573
2574 return result;
2575 }
2576
2577 // Binary src (Replicate con)
2578 bool is_valid_sve_arith_imm_pattern(Node* n, Node* m) {
2579 if (n == NULL || m == NULL) {
2580 return false;
2581 }
2582
2583 if (UseSVE == 0 || !VectorNode::is_invariant_vector(m)) {
2584 return false;
2585 }
2586
2587 Node* imm_node = m->in(1);
2588 if (!imm_node->is_Con()) {
2589 return false;
2590 }
2591
2592 const Type* t = imm_node->bottom_type();
2593 if (!(t->isa_int() || t->isa_long())) {
2594 return false;
2595 }
2596
2597 switch (n->Opcode()) {
2598 case Op_AndV:
2599 case Op_OrV:
2600 case Op_XorV: {
2601 Assembler::SIMD_RegVariant T = Assembler::elemType_to_regVariant(Matcher::vector_element_basic_type(n));
2602 uint64_t value = t->isa_long() ? (uint64_t)imm_node->get_long() : (uint64_t)imm_node->get_int();
2603 return Assembler::operand_valid_for_sve_logical_immediate(Assembler::regVariant_to_elemBits(T), value);
2604 }
2605 case Op_AddVB:
2606 return (imm_node->get_int() <= 255 && imm_node->get_int() >= -255);
2607 case Op_AddVS:
2608 case Op_AddVI:
2609 return Assembler::operand_valid_for_sve_add_sub_immediate((int64_t)imm_node->get_int());
2610 case Op_AddVL:
2611 return Assembler::operand_valid_for_sve_add_sub_immediate(imm_node->get_long());
2612 default:
2613 return false;
2614 }
2615 }
2616
2617 // (XorV src (Replicate m1))
2618 // (XorVMask src (MaskAll m1))
2619 bool is_vector_bitwise_not_pattern(Node* n, Node* m) {
2620 if (n != NULL && m != NULL) {
2621 return (n->Opcode() == Op_XorV || n->Opcode() == Op_XorVMask) &&
2622 VectorNode::is_all_ones_vector(m);
2623 }
2624 return false;
2625 }
2626
2627 // Should the matcher clone input 'm' of node 'n'?
2628 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
2629 if (is_vshift_con_pattern(n, m) ||
2630 is_vector_bitwise_not_pattern(n, m) ||
2631 is_valid_sve_arith_imm_pattern(n, m)) {
2632 mstack.push(m, Visit);
2633 return true;
2634 }
2635 return false;
2636 }
2637
2638 // Should the Matcher clone shifts on addressing modes, expecting them
2639 // to be subsumed into complex addressing expressions or compute them
2640 // into registers?
3406 } else {
3407 __ movw(dst_reg, con);
3408 }
3409 %}
3410
3411 enc_class aarch64_enc_mov_imm(iRegL dst, immL src) %{
3412 C2_MacroAssembler _masm(&cbuf);
3413 Register dst_reg = as_Register($dst$$reg);
3414 uint64_t con = (uint64_t)$src$$constant;
3415 if (con == 0) {
3416 __ mov(dst_reg, zr);
3417 } else {
3418 __ mov(dst_reg, con);
3419 }
3420 %}
3421
3422 enc_class aarch64_enc_mov_p(iRegP dst, immP src) %{
3423 C2_MacroAssembler _masm(&cbuf);
3424 Register dst_reg = as_Register($dst$$reg);
3425 address con = (address)$src$$constant;
3426 if (con == NULL || con == (address)1) {
3427 ShouldNotReachHere();
3428 } else {
3429 relocInfo::relocType rtype = $src->constant_reloc();
3430 if (rtype == relocInfo::oop_type) {
3431 __ movoop(dst_reg, (jobject)con);
3432 } else if (rtype == relocInfo::metadata_type) {
3433 __ mov_metadata(dst_reg, (Metadata*)con);
3434 } else {
3435 assert(rtype == relocInfo::none, "unexpected reloc type");
3436 if (! __ is_valid_AArch64_address(con) ||
3437 con < (address)(uintptr_t)os::vm_page_size()) {
3438 __ mov(dst_reg, con);
3439 } else {
3440 uint64_t offset;
3441 __ adrp(dst_reg, con, offset);
3442 __ add(dst_reg, dst_reg, offset);
3443 }
3444 }
3445 }
3446 %}
3449 C2_MacroAssembler _masm(&cbuf);
3450 Register dst_reg = as_Register($dst$$reg);
3451 __ mov(dst_reg, zr);
3452 %}
3453
3454 enc_class aarch64_enc_mov_p1(iRegP dst, immP_1 src) %{
3455 C2_MacroAssembler _masm(&cbuf);
3456 Register dst_reg = as_Register($dst$$reg);
3457 __ mov(dst_reg, (uint64_t)1);
3458 %}
3459
3460 enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
3461 C2_MacroAssembler _masm(&cbuf);
3462 __ load_byte_map_base($dst$$Register);
3463 %}
3464
3465 enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
3466 C2_MacroAssembler _masm(&cbuf);
3467 Register dst_reg = as_Register($dst$$reg);
3468 address con = (address)$src$$constant;
3469 if (con == NULL) {
3470 ShouldNotReachHere();
3471 } else {
3472 relocInfo::relocType rtype = $src->constant_reloc();
3473 assert(rtype == relocInfo::oop_type, "unexpected reloc type");
3474 __ set_narrow_oop(dst_reg, (jobject)con);
3475 }
3476 %}
3477
3478 enc_class aarch64_enc_mov_n0(iRegN dst, immN0 src) %{
3479 C2_MacroAssembler _masm(&cbuf);
3480 Register dst_reg = as_Register($dst$$reg);
3481 __ mov(dst_reg, zr);
3482 %}
3483
3484 enc_class aarch64_enc_mov_nk(iRegN dst, immNKlass src) %{
3485 C2_MacroAssembler _masm(&cbuf);
3486 Register dst_reg = as_Register($dst$$reg);
3487 address con = (address)$src$$constant;
3488 if (con == NULL) {
3489 ShouldNotReachHere();
3490 } else {
3491 relocInfo::relocType rtype = $src->constant_reloc();
3492 assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
3493 __ set_narrow_klass(dst_reg, (Klass *)con);
3494 }
3495 %}
3496
3497 // arithmetic encodings
3498
3499 enc_class aarch64_enc_addsubw_imm(iRegI dst, iRegI src1, immIAddSub src2) %{
3500 C2_MacroAssembler _masm(&cbuf);
3501 Register dst_reg = as_Register($dst$$reg);
3502 Register src_reg = as_Register($src1$$reg);
3503 int32_t con = (int32_t)$src2$$constant;
3504 // add has primary == 0, subtract has primary == 1
3505 if ($primary) { con = -con; }
3506 if (con < 0) {
3507 __ subw(dst_reg, src_reg, -con);
3508 } else {
3651 Label *L = $lbl$$label;
3652 __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
3653 %}
3654
3655 enc_class aarch64_enc_br_conU(cmpOpU cmp, label lbl) %{
3656 C2_MacroAssembler _masm(&cbuf);
3657 Label *L = $lbl$$label;
3658 __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
3659 %}
3660
3661 enc_class aarch64_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result)
3662 %{
3663 Register sub_reg = as_Register($sub$$reg);
3664 Register super_reg = as_Register($super$$reg);
3665 Register temp_reg = as_Register($temp$$reg);
3666 Register result_reg = as_Register($result$$reg);
3667
3668 Label miss;
3669 C2_MacroAssembler _masm(&cbuf);
3670 __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
3671 NULL, &miss,
3672 /*set_cond_codes:*/ true);
3673 if ($primary) {
3674 __ mov(result_reg, zr);
3675 }
3676 __ bind(miss);
3677 %}
3678
3679 enc_class aarch64_enc_java_static_call(method meth) %{
3680 C2_MacroAssembler _masm(&cbuf);
3681
3682 address addr = (address)$meth$$method;
3683 address call;
3684 if (!_method) {
3685 // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
3686 call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type));
3687 if (call == NULL) {
3688 ciEnv::current()->record_failure("CodeCache is full");
3689 return;
3690 }
3691 } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
3692 // The NOP here is purely to ensure that eliding a call to
3693 // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
3694 __ nop();
3695 __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
3696 } else {
3697 int method_index = resolved_method_index(cbuf);
3698 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
3699 : static_call_Relocation::spec(method_index);
3700 call = __ trampoline_call(Address(addr, rspec));
3701 if (call == NULL) {
3702 ciEnv::current()->record_failure("CodeCache is full");
3703 return;
3704 }
3705 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
3706 // Calls of the same statically bound method can share
3707 // a stub to the interpreter.
3708 cbuf.shared_stub_to_interp_for(_method, call - cbuf.insts_begin());
3709 } else {
3710 // Emit stub for static call
3711 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, call);
3712 if (stub == NULL) {
3713 ciEnv::current()->record_failure("CodeCache is full");
3714 return;
3715 }
3716 }
3717 }
3718
3719 __ post_call_nop();
3720
3721 // Only non uncommon_trap calls need to reinitialize ptrue.
3722 if (Compile::current()->max_vector_size() > 0 && uncommon_trap_request() == 0) {
3723 __ reinitialize_ptrue();
3724 }
3725 %}
3726
3727 enc_class aarch64_enc_java_dynamic_call(method meth) %{
3728 C2_MacroAssembler _masm(&cbuf);
3729 int method_index = resolved_method_index(cbuf);
3730 address call = __ ic_call((address)$meth$$method, method_index);
3731 if (call == NULL) {
3732 ciEnv::current()->record_failure("CodeCache is full");
3733 return;
3734 }
3735 __ post_call_nop();
3736 if (Compile::current()->max_vector_size() > 0) {
3737 __ reinitialize_ptrue();
3738 }
3739 %}
3740
3741 enc_class aarch64_enc_call_epilog() %{
3742 C2_MacroAssembler _masm(&cbuf);
3743 if (VerifyStackAtCalls) {
3744 // Check that stack depth is unchanged: find majik cookie on stack
3745 __ call_Unimplemented();
3746 }
3747 %}
3748
3749 enc_class aarch64_enc_java_to_runtime(method meth) %{
3750 C2_MacroAssembler _masm(&cbuf);
3751
3752 // some calls to generated routines (arraycopy code) are scheduled
3753 // by C2 as runtime calls. if so we can call them using a br (they
3754 // will be in a reachable segment) otherwise we have to use a blr
3755 // which loads the absolute address into a register.
3756 address entry = (address)$meth$$method;
3757 CodeBlob *cb = CodeCache::find_blob(entry);
3758 if (cb) {
3759 address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
3760 if (call == NULL) {
3761 ciEnv::current()->record_failure("CodeCache is full");
3762 return;
3763 }
3764 __ post_call_nop();
3765 } else {
3766 Label retaddr;
3767 __ adr(rscratch2, retaddr);
3768 __ lea(rscratch1, RuntimeAddress(entry));
3769 // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc()
3770 __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
3771 __ blr(rscratch1);
3772 __ bind(retaddr);
3773 __ post_call_nop();
3774 __ add(sp, sp, 2 * wordSize);
3775 }
3776 if (Compile::current()->max_vector_size() > 0) {
3777 __ reinitialize_ptrue();
3778 }
3779 %}
3780
3824 // Load markWord from object into displaced_header.
3825 __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
3826
3827 if (DiagnoseSyncOnValueBasedClasses != 0) {
3828 __ load_klass(tmp, oop);
3829 __ ldrw(tmp, Address(tmp, Klass::access_flags_offset()));
3830 __ tstw(tmp, JVM_ACC_IS_VALUE_BASED_CLASS);
3831 __ br(Assembler::NE, cont);
3832 }
3833
3834 // Check for existing monitor
3835 __ tbnz(disp_hdr, exact_log2(markWord::monitor_value), object_has_monitor);
3836
3837 if (LockingMode == LM_MONITOR) {
3838 __ tst(oop, oop); // Set NE to indicate 'failure' -> take slow-path. We know that oop != 0.
3839 __ b(cont);
3840 } else if (LockingMode == LM_LEGACY) {
3841 // Set tmp to be (markWord of object | UNLOCK_VALUE).
3842 __ orr(tmp, disp_hdr, markWord::unlocked_value);
3843
3844 // Initialize the box. (Must happen before we update the object mark!)
3845 __ str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3846
3847 // Compare object markWord with an unlocked value (tmp) and if
3848 // equal exchange the stack address of our box with object markWord.
3849 // On failure disp_hdr contains the possibly locked markWord.
3850 __ cmpxchg(oop, tmp, box, Assembler::xword, /*acquire*/ true,
3851 /*release*/ true, /*weak*/ false, disp_hdr);
3852 __ br(Assembler::EQ, cont);
3853
3854 assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
3855
3856 // If the compare-and-exchange succeeded, then we found an unlocked
3857 // object, will have now locked it will continue at label cont
3858
3859 // Check if the owner is self by comparing the value in the
3860 // markWord of object (disp_hdr) with the stack pointer.
3861 __ mov(rscratch1, sp);
3862 __ sub(disp_hdr, disp_hdr, rscratch1);
3863 __ mov(tmp, (address) (~(os::vm_page_size()-1) | markWord::lock_mask_in_place));
3864 // If condition is true we are cont and hence we can store 0 as the
3865 // displaced header in the box, which indicates that it is a recursive lock.
3866 __ ands(tmp/*==0?*/, disp_hdr, tmp); // Sets flags for result
3867 __ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3868 __ b(cont);
3869 } else {
3870 assert(LockingMode == LM_LIGHTWEIGHT, "must be");
3871 __ fast_lock(oop, disp_hdr, tmp, rscratch1, no_count);
3872 __ b(count);
3873 }
3874
3875 // Handle existing monitor.
3876 __ bind(object_has_monitor);
3877
3878 // The object's monitor m is unlocked iff m->owner == NULL,
3879 // otherwise m->owner may contain a thread or a stack address.
3880 //
3881 // Try to CAS m->owner from NULL to current thread.
3882 __ add(tmp, disp_hdr, (in_bytes(ObjectMonitor::owner_offset())-markWord::monitor_value));
3883 __ cmpxchg(tmp, zr, rthread, Assembler::xword, /*acquire*/ true,
3884 /*release*/ true, /*weak*/ false, rscratch1); // Sets flags for result
3885
3886 if (LockingMode != LM_LIGHTWEIGHT) {
3887 // Store a non-null value into the box to avoid looking like a re-entrant
3888 // lock. The fast-path monitor unlock code checks for
3889 // markWord::monitor_value so use markWord::unused_mark which has the
3890 // relevant bit set, and also matches ObjectSynchronizer::enter.
3891 __ mov(tmp, (address)markWord::unused_mark().value());
3892 __ str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3893 }
3894 __ br(Assembler::EQ, cont); // CAS success means locking succeeded
3895
3896 __ cmp(rscratch1, rthread);
3897 __ br(Assembler::NE, cont); // Check for recursive locking
3898
3899 // Recursive lock case
3900 __ increment(Address(disp_hdr, in_bytes(ObjectMonitor::recursions_offset()) - markWord::monitor_value), 1);
3901 // flag == EQ still from the cmp above, checking if this is a reentrant lock
4835 operand immL_32bits()
4836 %{
4837 predicate(n->get_long() == 0xFFFFFFFFL);
4838 match(ConL);
4839 op_cost(0);
4840 format %{ %}
4841 interface(CONST_INTER);
4842 %}
4843
4844 // Pointer operands
4845 // Pointer Immediate
4846 operand immP()
4847 %{
4848 match(ConP);
4849
4850 op_cost(0);
4851 format %{ %}
4852 interface(CONST_INTER);
4853 %}
4854
4855 // NULL Pointer Immediate
4856 operand immP0()
4857 %{
4858 predicate(n->get_ptr() == 0);
4859 match(ConP);
4860
4861 op_cost(0);
4862 format %{ %}
4863 interface(CONST_INTER);
4864 %}
4865
4866 // Pointer Immediate One
4867 // this is used in object initialization (initial object header)
4868 operand immP_1()
4869 %{
4870 predicate(n->get_ptr() == 1);
4871 match(ConP);
4872
4873 op_cost(0);
4874 format %{ %}
4875 interface(CONST_INTER);
4967 operand immFPacked()
4968 %{
4969 predicate(Assembler::operand_valid_for_float_immediate((double)n->getf()));
4970 match(ConF);
4971 op_cost(0);
4972 format %{ %}
4973 interface(CONST_INTER);
4974 %}
4975
4976 // Narrow pointer operands
4977 // Narrow Pointer Immediate
4978 operand immN()
4979 %{
4980 match(ConN);
4981
4982 op_cost(0);
4983 format %{ %}
4984 interface(CONST_INTER);
4985 %}
4986
4987 // Narrow NULL Pointer Immediate
4988 operand immN0()
4989 %{
4990 predicate(n->get_narrowcon() == 0);
4991 match(ConN);
4992
4993 op_cost(0);
4994 format %{ %}
4995 interface(CONST_INTER);
4996 %}
4997
4998 operand immNKlass()
4999 %{
5000 match(ConNKlass);
5001
5002 op_cost(0);
5003 format %{ %}
5004 interface(CONST_INTER);
5005 %}
5006
5007 // Integer 32 bit Register Operands
7376 instruct loadConL(iRegLNoSp dst, immL src)
7377 %{
7378 match(Set dst src);
7379
7380 ins_cost(INSN_COST);
7381 format %{ "mov $dst, $src\t# long" %}
7382
7383 ins_encode( aarch64_enc_mov_imm(dst, src) );
7384
7385 ins_pipe(ialu_imm);
7386 %}
7387
7388 // Load Pointer Constant
7389
7390 instruct loadConP(iRegPNoSp dst, immP con)
7391 %{
7392 match(Set dst con);
7393
7394 ins_cost(INSN_COST * 4);
7395 format %{
7396 "mov $dst, $con\t# ptr\n\t"
7397 %}
7398
7399 ins_encode(aarch64_enc_mov_p(dst, con));
7400
7401 ins_pipe(ialu_imm);
7402 %}
7403
7404 // Load Null Pointer Constant
7405
7406 instruct loadConP0(iRegPNoSp dst, immP0 con)
7407 %{
7408 match(Set dst con);
7409
7410 ins_cost(INSN_COST);
7411 format %{ "mov $dst, $con\t# NULL ptr" %}
7412
7413 ins_encode(aarch64_enc_mov_p0(dst, con));
7414
7415 ins_pipe(ialu_imm);
7416 %}
7417
7418 // Load Pointer Constant One
7419
7420 instruct loadConP1(iRegPNoSp dst, immP_1 con)
7421 %{
7422 match(Set dst con);
7423
7424 ins_cost(INSN_COST);
7425 format %{ "mov $dst, $con\t# NULL ptr" %}
7426
7427 ins_encode(aarch64_enc_mov_p1(dst, con));
7428
7429 ins_pipe(ialu_imm);
7430 %}
7431
7432 // Load Byte Map Base Constant
7433
7434 instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
7435 %{
7436 match(Set dst con);
7437
7438 ins_cost(INSN_COST);
7439 format %{ "adr $dst, $con\t# Byte Map Base" %}
7440
7441 ins_encode(aarch64_enc_mov_byte_map_base(dst, con));
7442
7443 ins_pipe(ialu_imm);
7444 %}
7445
7447
7448 instruct loadConN(iRegNNoSp dst, immN con)
7449 %{
7450 match(Set dst con);
7451
7452 ins_cost(INSN_COST * 4);
7453 format %{ "mov $dst, $con\t# compressed ptr" %}
7454
7455 ins_encode(aarch64_enc_mov_n(dst, con));
7456
7457 ins_pipe(ialu_imm);
7458 %}
7459
7460 // Load Narrow Null Pointer Constant
7461
7462 instruct loadConN0(iRegNNoSp dst, immN0 con)
7463 %{
7464 match(Set dst con);
7465
7466 ins_cost(INSN_COST);
7467 format %{ "mov $dst, $con\t# compressed NULL ptr" %}
7468
7469 ins_encode(aarch64_enc_mov_n0(dst, con));
7470
7471 ins_pipe(ialu_imm);
7472 %}
7473
7474 // Load Narrow Klass Constant
7475
7476 instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
7477 %{
7478 match(Set dst con);
7479
7480 ins_cost(INSN_COST);
7481 format %{ "mov $dst, $con\t# compressed klass ptr" %}
7482
7483 ins_encode(aarch64_enc_mov_nk(dst, con));
7484
7485 ins_pipe(ialu_imm);
7486 %}
7487
8579 %}
8580
8581 // ============================================================================
8582 // Cast/Convert Instructions
8583
8584 instruct castX2P(iRegPNoSp dst, iRegL src) %{
8585 match(Set dst (CastX2P src));
8586
8587 ins_cost(INSN_COST);
8588 format %{ "mov $dst, $src\t# long -> ptr" %}
8589
8590 ins_encode %{
8591 if ($dst$$reg != $src$$reg) {
8592 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8593 }
8594 %}
8595
8596 ins_pipe(ialu_reg);
8597 %}
8598
8599 instruct castP2X(iRegLNoSp dst, iRegP src) %{
8600 match(Set dst (CastP2X src));
8601
8602 ins_cost(INSN_COST);
8603 format %{ "mov $dst, $src\t# ptr -> long" %}
8604
8605 ins_encode %{
8606 if ($dst$$reg != $src$$reg) {
8607 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8608 }
8609 %}
8610
8611 ins_pipe(ialu_reg);
8612 %}
8613
8614 // Convert oop into int for vectors alignment masking
8615 instruct convP2I(iRegINoSp dst, iRegP src) %{
8616 match(Set dst (ConvL2I (CastP2X src)));
8617
8618 ins_cost(INSN_COST);
15400
15401 match(Set dst (MoveL2D src));
15402
15403 effect(DEF dst, USE src);
15404
15405 ins_cost(INSN_COST);
15406
15407 format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
15408
15409 ins_encode %{
15410 __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
15411 %}
15412
15413 ins_pipe(fp_l2d);
15414
15415 %}
15416
15417 // ============================================================================
15418 // clearing of an array
15419
15420 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
15421 %{
15422 match(Set dummy (ClearArray cnt base));
15423 effect(USE_KILL cnt, USE_KILL base, KILL cr);
15424
15425 ins_cost(4 * INSN_COST);
15426 format %{ "ClearArray $cnt, $base" %}
15427
15428 ins_encode %{
15429 address tpc = __ zero_words($base$$Register, $cnt$$Register);
15430 if (tpc == NULL) {
15431 ciEnv::current()->record_failure("CodeCache is full");
15432 return;
15433 }
15434 %}
15435
15436 ins_pipe(pipe_class_memory);
15437 %}
15438
15439 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 temp, Universe dummy, rFlagsReg cr)
15440 %{
15441 predicate((uint64_t)n->in(2)->get_long()
15442 < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
15443 match(Set dummy (ClearArray cnt base));
15444 effect(TEMP temp, USE_KILL base, KILL cr);
15445
15446 ins_cost(4 * INSN_COST);
15447 format %{ "ClearArray $cnt, $base" %}
15448
15449 ins_encode %{
15450 address tpc = __ zero_words($base$$Register, (uint64_t)$cnt$$constant);
15451 if (tpc == NULL) {
15452 ciEnv::current()->record_failure("CodeCache is full");
15453 return;
15454 }
15455 %}
15456
15457 ins_pipe(pipe_class_memory);
15458 %}
15459
15460 // ============================================================================
15461 // Overflow Math Instructions
15462
15463 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
15464 %{
15465 match(Set cr (OverflowAddI op1 op2));
15466
15467 format %{ "cmnw $op1, $op2\t# overflow check int" %}
15468 ins_cost(INSN_COST);
15469 ins_encode %{
15470 __ cmnw($op1$$Register, $op2$$Register);
15471 %}
16718
16719 // Call Runtime Instruction
16720
16721 instruct CallLeafDirect(method meth)
16722 %{
16723 match(CallLeaf);
16724
16725 effect(USE meth);
16726
16727 ins_cost(CALL_COST);
16728
16729 format %{ "CALL, runtime leaf $meth" %}
16730
16731 ins_encode( aarch64_enc_java_to_runtime(meth) );
16732
16733 ins_pipe(pipe_class_call);
16734 %}
16735
16736 // Call Runtime Instruction
16737
16738 instruct CallLeafNoFPDirect(method meth)
16739 %{
16740 match(CallLeafNoFP);
16741
16742 effect(USE meth);
16743
16744 ins_cost(CALL_COST);
16745
16746 format %{ "CALL, runtime leaf nofp $meth" %}
16747
16748 ins_encode( aarch64_enc_java_to_runtime(meth) );
16749
16750 ins_pipe(pipe_class_call);
16751 %}
16752
16753 // Tail Call; Jump from runtime stub to Java code.
16754 // Also known as an 'interprocedural jump'.
16755 // Target of jump will eventually return to caller.
16756 // TailJump below removes the return address.
16757 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_ptr)
16758 %{
16759 match(TailCall jump_target method_ptr);
17292 ins_pipe(pipe_class_memory);
17293 %}
17294
17295 instruct array_equalsB(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
17296 iRegP_R3 tmp1, iRegP_R4 tmp2, iRegP_R5 tmp3,
17297 vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2, vRegD_V3 vtmp3,
17298 vRegD_V4 vtmp4, vRegD_V5 vtmp5, vRegD_V6 vtmp6, vRegD_V7 vtmp7,
17299 iRegP_R10 tmp, rFlagsReg cr)
17300 %{
17301 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
17302 match(Set result (AryEq ary1 ary2));
17303 effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3,
17304 TEMP vtmp0, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4, TEMP vtmp5,
17305 TEMP vtmp6, TEMP vtmp7, KILL cr);
17306
17307 format %{ "Array Equals $ary1,ary2 -> $result # KILL $ary1 $ary2 $tmp $tmp1 $tmp2 $tmp3 V0-V7 cr" %}
17308 ins_encode %{
17309 address tpc = __ arrays_equals($ary1$$Register, $ary2$$Register,
17310 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
17311 $result$$Register, $tmp$$Register, 1);
17312 if (tpc == NULL) {
17313 ciEnv::current()->record_failure("CodeCache is full");
17314 return;
17315 }
17316 %}
17317 ins_pipe(pipe_class_memory);
17318 %}
17319
17320 instruct array_equalsC(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
17321 iRegP_R3 tmp1, iRegP_R4 tmp2, iRegP_R5 tmp3,
17322 vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2, vRegD_V3 vtmp3,
17323 vRegD_V4 vtmp4, vRegD_V5 vtmp5, vRegD_V6 vtmp6, vRegD_V7 vtmp7,
17324 iRegP_R10 tmp, rFlagsReg cr)
17325 %{
17326 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
17327 match(Set result (AryEq ary1 ary2));
17328 effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3,
17329 TEMP vtmp0, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4, TEMP vtmp5,
17330 TEMP vtmp6, TEMP vtmp7, KILL cr);
17331
17332 format %{ "Array Equals $ary1,ary2 -> $result # KILL $ary1 $ary2 $tmp $tmp1 $tmp2 $tmp3 V0-V7 cr" %}
17333 ins_encode %{
17334 address tpc = __ arrays_equals($ary1$$Register, $ary2$$Register,
17335 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
17336 $result$$Register, $tmp$$Register, 2);
17337 if (tpc == NULL) {
17338 ciEnv::current()->record_failure("CodeCache is full");
17339 return;
17340 }
17341 %}
17342 ins_pipe(pipe_class_memory);
17343 %}
17344
17345 instruct count_positives(iRegP_R1 ary1, iRegI_R2 len, iRegI_R0 result, rFlagsReg cr)
17346 %{
17347 match(Set result (CountPositives ary1 len));
17348 effect(USE_KILL ary1, USE_KILL len, KILL cr);
17349 format %{ "count positives byte[] $ary1,$len -> $result" %}
17350 ins_encode %{
17351 address tpc = __ count_positives($ary1$$Register, $len$$Register, $result$$Register);
17352 if (tpc == NULL) {
17353 ciEnv::current()->record_failure("CodeCache is full");
17354 return;
17355 }
17356 %}
17357 ins_pipe( pipe_slow );
17358 %}
17359
17360 // fast char[] to byte[] compression
17361 instruct string_compress(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
17362 vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2,
17363 vRegD_V3 vtmp3, vRegD_V4 vtmp4, vRegD_V5 vtmp5,
17364 iRegI_R0 result, rFlagsReg cr)
17365 %{
17366 match(Set result (StrCompressedCopy src (Binary dst len)));
17367 effect(TEMP vtmp0, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4, TEMP vtmp5,
17368 USE_KILL src, USE_KILL dst, USE len, KILL cr);
17369
17370 format %{ "String Compress $src,$dst,$len -> $result # KILL $src $dst V0-V5 cr" %}
17371 ins_encode %{
17372 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
17375 $vtmp4$$FloatRegister, $vtmp5$$FloatRegister);
17376 %}
17377 ins_pipe(pipe_slow);
17378 %}
17379
17380 // fast byte[] to char[] inflation
17381 instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len, iRegP_R3 tmp,
17382 vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2, vRegD_V3 vtmp3,
17383 vRegD_V4 vtmp4, vRegD_V5 vtmp5, vRegD_V6 vtmp6, rFlagsReg cr)
17384 %{
17385 match(Set dummy (StrInflatedCopy src (Binary dst len)));
17386 effect(TEMP vtmp0, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3,
17387 TEMP vtmp4, TEMP vtmp5, TEMP vtmp6, TEMP tmp,
17388 USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
17389
17390 format %{ "String Inflate $src,$dst # KILL $tmp $src $dst $len V0-V6 cr" %}
17391 ins_encode %{
17392 address tpc = __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
17393 $vtmp0$$FloatRegister, $vtmp1$$FloatRegister,
17394 $vtmp2$$FloatRegister, $tmp$$Register);
17395 if (tpc == NULL) {
17396 ciEnv::current()->record_failure("CodeCache is full");
17397 return;
17398 }
17399 %}
17400 ins_pipe(pipe_class_memory);
17401 %}
17402
17403 // encode char[] to byte[] in ISO_8859_1
17404 instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
17405 vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2,
17406 vRegD_V3 vtmp3, vRegD_V4 vtmp4, vRegD_V5 vtmp5,
17407 iRegI_R0 result, rFlagsReg cr)
17408 %{
17409 predicate(!((EncodeISOArrayNode*)n)->is_ascii());
17410 match(Set result (EncodeISOArray src (Binary dst len)));
17411 effect(USE_KILL src, USE_KILL dst, USE len, KILL vtmp0, KILL vtmp1,
17412 KILL vtmp2, KILL vtmp3, KILL vtmp4, KILL vtmp5, KILL cr);
17413
17414 format %{ "Encode ISO array $src,$dst,$len -> $result # KILL $src $dst V0-V5 cr" %}
17415 ins_encode %{
|
1220 // registers conditionally reserved.
1221
1222 _ANY_REG32_mask = _ALL_REG32_mask;
1223 _ANY_REG32_mask.Remove(OptoReg::as_OptoReg(r31_sp->as_VMReg()));
1224
1225 _ANY_REG_mask = _ALL_REG_mask;
1226
1227 _PTR_REG_mask = _ALL_REG_mask;
1228
1229 _NO_SPECIAL_REG32_mask = _ALL_REG32_mask;
1230 _NO_SPECIAL_REG32_mask.SUBTRACT(_NON_ALLOCATABLE_REG32_mask);
1231
1232 _NO_SPECIAL_REG_mask = _ALL_REG_mask;
1233 _NO_SPECIAL_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask);
1234
1235 _NO_SPECIAL_PTR_REG_mask = _ALL_REG_mask;
1236 _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask);
1237
1238 // r27 is not allocatable when compressed oops is on and heapbase is not
1239 // zero, compressed klass pointers doesn't use r27 after JDK-8234794
1240 if (UseCompressedOops && (CompressedOops::ptrs_base() != nullptr)) {
1241 _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(r27->as_VMReg()));
1242 _NO_SPECIAL_REG_mask.Remove(OptoReg::as_OptoReg(r27->as_VMReg()));
1243 _NO_SPECIAL_PTR_REG_mask.Remove(OptoReg::as_OptoReg(r27->as_VMReg()));
1244 }
1245
1246 // r29 is not allocatable when PreserveFramePointer is on
1247 if (PreserveFramePointer) {
1248 _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(r29->as_VMReg()));
1249 _NO_SPECIAL_REG_mask.Remove(OptoReg::as_OptoReg(r29->as_VMReg()));
1250 _NO_SPECIAL_PTR_REG_mask.Remove(OptoReg::as_OptoReg(r29->as_VMReg()));
1251 }
1252 }
1253
1254 // Optimizaton of volatile gets and puts
1255 // -------------------------------------
1256 //
1257 // AArch64 has ldar<x> and stlr<x> instructions which we can safely
1258 // use to implement volatile reads and writes. For a volatile read
1259 // we simply need
1260 //
1564 bool release = mbvol->trailing_store();
1565 assert(!release || (mbvol->in(MemBarNode::Precedent)->is_Store() && mbvol->in(MemBarNode::Precedent)->as_Store()->is_release()), "");
1566 #ifdef ASSERT
1567 if (release) {
1568 Node* leading = mbvol->leading_membar();
1569 assert(leading->Opcode() == Op_MemBarRelease, "");
1570 assert(leading->as_MemBar()->leading_store(), "");
1571 assert(leading->as_MemBar()->trailing_membar() == mbvol, "");
1572 }
1573 #endif
1574
1575 return release;
1576 }
1577
1578 // predicates controlling emit of str<x>/stlr<x>
1579
1580 bool needs_releasing_store(const Node *n)
1581 {
1582 // assert n->is_Store();
1583 StoreNode *st = n->as_Store();
1584 return st->trailing_membar() != nullptr;
1585 }
1586
1587 // predicate controlling translation of CAS
1588 //
1589 // returns true if CAS needs to use an acquiring load otherwise false
1590
1591 bool needs_acquiring_load_exclusive(const Node *n)
1592 {
1593 assert(is_CAS(n->Opcode(), true), "expecting a compare and swap");
1594 LoadStoreNode* ldst = n->as_LoadStore();
1595 if (is_CAS(n->Opcode(), false)) {
1596 assert(ldst->trailing_membar() != nullptr, "expected trailing membar");
1597 } else {
1598 return ldst->trailing_membar() != nullptr;
1599 }
1600
1601 // so we can just return true here
1602 return true;
1603 }
1604
1605 #define __ _masm.
1606
1607 // advance declarations for helper functions to convert register
1608 // indices to register objects
1609
1610 // the ad file has to provide implementations of certain methods
1611 // expected by the generic code
1612 //
1613 // REQUIRED FUNCTIONALITY
1614
1615 //=============================================================================
1616
1617 // !!!!! Special hack to get all types of calls to specify the byte offset
1618 // from the start of the call to the point where the return address
1627
1628 int MachCallDynamicJavaNode::ret_addr_offset()
1629 {
1630 return 16; // movz, movk, movk, bl
1631 }
1632
1633 int MachCallRuntimeNode::ret_addr_offset() {
1634 // for generated stubs the call will be
1635 // bl(addr)
1636 // or with far branches
1637 // bl(trampoline_stub)
1638 // for real runtime callouts it will be six instructions
1639 // see aarch64_enc_java_to_runtime
1640 // adr(rscratch2, retaddr)
1641 // lea(rscratch1, RuntimeAddress(addr)
1642 // stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
1643 // blr(rscratch1)
1644 CodeBlob *cb = CodeCache::find_blob(_entry_point);
1645 if (cb) {
1646 return 1 * NativeInstruction::instruction_size;
1647 } else if (_entry_point == nullptr) {
1648 // See CallLeafNoFPIndirect
1649 return 1 * NativeInstruction::instruction_size;
1650 } else {
1651 return 6 * NativeInstruction::instruction_size;
1652 }
1653 }
1654
1655 //=============================================================================
1656
1657 #ifndef PRODUCT
1658 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1659 st->print("BREAKPOINT");
1660 }
1661 #endif
1662
1663 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1664 C2_MacroAssembler _masm(&cbuf);
1665 __ brk(0);
1666 }
1667
1668 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
1669 return MachNode::size(ra_);
1720
1721 int framesize = C->output()->frame_slots() << LogBytesPerInt;
1722
1723 if (C->output()->need_stack_bang(framesize))
1724 st->print("# stack bang size=%d\n\t", framesize);
1725
1726 if (VM_Version::use_rop_protection()) {
1727 st->print("ldr zr, [lr]\n\t");
1728 st->print("pacia lr, rfp\n\t");
1729 }
1730 if (framesize < ((1 << 9) + 2 * wordSize)) {
1731 st->print("sub sp, sp, #%d\n\t", framesize);
1732 st->print("stp rfp, lr, [sp, #%d]", framesize - 2 * wordSize);
1733 if (PreserveFramePointer) st->print("\n\tadd rfp, sp, #%d", framesize - 2 * wordSize);
1734 } else {
1735 st->print("stp lr, rfp, [sp, #%d]!\n\t", -(2 * wordSize));
1736 if (PreserveFramePointer) st->print("mov rfp, sp\n\t");
1737 st->print("mov rscratch1, #%d\n\t", framesize - 2 * wordSize);
1738 st->print("sub sp, sp, rscratch1");
1739 }
1740 if (C->stub_function() == nullptr && BarrierSet::barrier_set()->barrier_set_nmethod() != nullptr) {
1741 st->print("\n\t");
1742 st->print("ldr rscratch1, [guard]\n\t");
1743 st->print("dmb ishld\n\t");
1744 st->print("ldr rscratch2, [rthread, #thread_disarmed_guard_value_offset]\n\t");
1745 st->print("cmp rscratch1, rscratch2\n\t");
1746 st->print("b.eq skip");
1747 st->print("\n\t");
1748 st->print("blr #nmethod_entry_barrier_stub\n\t");
1749 st->print("b skip\n\t");
1750 st->print("guard: int\n\t");
1751 st->print("\n\t");
1752 st->print("skip:\n\t");
1753 }
1754 }
1755 #endif
1756
1757 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1758 Compile* C = ra_->C;
1759 C2_MacroAssembler _masm(&cbuf);
1760
1761 // insert a nop at the start of the prolog so we can patch in a
1762 // branch if we need to invalidate the method later
1763 __ nop();
1764
1765 __ verified_entry(C, 0);
1766
1767 if (C->stub_function() == nullptr) {
1768 __ entry_barrier();
1769 }
1770
1771 if (!Compile::current()->output()->in_scratch_emit_size()) {
1772 __ bind(*_verified_entry);
1773 }
1774
1775 if (VerifyStackAtCalls) {
1776 Unimplemented();
1777 }
1778
1779 C->output()->set_frame_complete(cbuf.insts_size());
1780
1781 if (C->has_mach_constant_base_node()) {
1782 // NOTE: We set the table base offset here because users might be
1783 // emitted before MachConstantBaseNode.
1784 ConstantTable& constant_table = C->output()->constant_table();
1785 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1786 }
1787 }
1788
1789 int MachPrologNode::reloc() const
1790 {
1791 return 0;
1792 }
1793
1794 //=============================================================================
1795
1796 #ifndef PRODUCT
1797 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1798 Compile* C = ra_->C;
1799 int framesize = C->output()->frame_slots() << LogBytesPerInt;
1800
1801 st->print("# pop frame %d\n\t",framesize);
1802
1803 if (framesize == 0) {
1804 st->print("ldp lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1805 } else if (framesize < ((1 << 9) + 2 * wordSize)) {
1806 st->print("ldp lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
1807 st->print("add sp, sp, #%d\n\t", framesize);
1808 } else {
1812 }
1813 if (VM_Version::use_rop_protection()) {
1814 st->print("autia lr, rfp\n\t");
1815 st->print("ldr zr, [lr]\n\t");
1816 }
1817
1818 if (do_polling() && C->is_method_compilation()) {
1819 st->print("# test polling word\n\t");
1820 st->print("ldr rscratch1, [rthread],#%d\n\t", in_bytes(JavaThread::polling_word_offset()));
1821 st->print("cmp sp, rscratch1\n\t");
1822 st->print("bhi #slow_path");
1823 }
1824 }
1825 #endif
1826
1827 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1828 Compile* C = ra_->C;
1829 C2_MacroAssembler _masm(&cbuf);
1830 int framesize = C->output()->frame_slots() << LogBytesPerInt;
1831
1832 __ remove_frame(framesize, C->needs_stack_repair());
1833
1834 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1835 __ reserved_stack_check();
1836 }
1837
1838 if (do_polling() && C->is_method_compilation()) {
1839 Label dummy_label;
1840 Label* code_stub = &dummy_label;
1841 if (!C->output()->in_scratch_emit_size()) {
1842 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1843 C->output()->add_stub(stub);
1844 code_stub = &stub->entry();
1845 }
1846 __ relocate(relocInfo::poll_return_type);
1847 __ safepoint_poll(*code_stub, true /* at_return */, false /* acquire */, true /* in_nmethod */);
1848 }
1849 }
1850
1851 int MachEpilogNode::reloc() const {
1852 // Return number of relocatable values contained in this instruction.
1853 return 1; // 1 for polling page.
1854 }
1855
1856 const Pipeline * MachEpilogNode::pipeline() const {
1857 return MachNode::pipeline_class();
1858 }
1859
1860 //=============================================================================
1861
1862 // Figure out which register class each belongs in: rc_int, rc_float or
1863 // rc_stack.
1864 enum RC { rc_bad, rc_int, rc_float, rc_predicate, rc_stack };
1865
1866 static enum RC rc_class(OptoReg::Name reg) {
1867
1868 if (reg == OptoReg::Bad) {
1869 return rc_bad;
1870 }
2091 }
2092 st->print("\t# vector spill size = %d", vsize);
2093 } else if (ideal_reg() == Op_RegVectMask) {
2094 assert(Matcher::supports_scalable_vector(), "bad register type for spill");
2095 int vsize = Matcher::scalable_predicate_reg_slots() * 32;
2096 st->print("\t# predicate spill size = %d", vsize);
2097 } else {
2098 st->print("\t# spill size = %d", is64 ? 64 : 32);
2099 }
2100 }
2101
2102 return 0;
2103
2104 }
2105
2106 #ifndef PRODUCT
2107 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2108 if (!ra_)
2109 st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
2110 else
2111 implementation(nullptr, ra_, false, st);
2112 }
2113 #endif
2114
2115 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2116 implementation(&cbuf, ra_, false, nullptr);
2117 }
2118
2119 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
2120 return MachNode::size(ra_);
2121 }
2122
2123 //=============================================================================
2124
2125 #ifndef PRODUCT
2126 void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2127 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2128 int reg = ra_->get_reg_first(this);
2129 st->print("add %s, rsp, #%d]\t# box lock",
2130 Matcher::regName[reg], offset);
2131 }
2132 #endif
2133
2134 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2135 C2_MacroAssembler _masm(&cbuf);
2136
2137 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2138 int reg = ra_->get_encode(this);
2139
2140 // This add will handle any 24-bit signed offset. 24 bits allows an
2141 // 8 megabyte stack frame.
2142 __ add(as_Register(reg), sp, offset);
2143 }
2144
2145 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
2146 // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
2147 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2148
2149 if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
2150 return NativeInstruction::instruction_size;
2151 } else {
2152 return 2 * NativeInstruction::instruction_size;
2153 }
2154 }
2155
2156 ///=============================================================================
2157 #ifndef PRODUCT
2158 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2159 {
2160 st->print_cr("# MachVEPNode");
2161 if (!_verified) {
2162 st->print_cr("\t load_class");
2163 } else {
2164 st->print_cr("\t unpack_inline_arg");
2165 }
2166 }
2167 #endif
2168
2169 void MachVEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
2170 {
2171 C2_MacroAssembler _masm(&cbuf);
2172
2173 if (!_verified) {
2174 Label skip;
2175 __ cmp_klass(j_rarg0, rscratch2, rscratch1);
2176 __ br(Assembler::EQ, skip);
2177 __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
2178 __ bind(skip);
2179
2180 } else {
2181 // insert a nop at the start of the prolog so we can patch in a
2182 // branch if we need to invalidate the method later
2183 __ nop();
2184
2185 // TODO 8284443 Avoid creation of temporary frame
2186 if (ra_->C->stub_function() == nullptr) {
2187 __ verified_entry(ra_->C, 0);
2188 __ entry_barrier();
2189 int framesize = ra_->C->output()->frame_slots() << LogBytesPerInt;
2190 __ remove_frame(framesize, false);
2191 }
2192 // Unpack inline type args passed as oop and then jump to
2193 // the verified entry point (skipping the unverified entry).
2194 int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
2195 // Emit code for verified entry and save increment for stack repair on return
2196 __ verified_entry(ra_->C, sp_inc);
2197 if (Compile::current()->output()->in_scratch_emit_size()) {
2198 Label dummy_verified_entry;
2199 __ b(dummy_verified_entry);
2200 } else {
2201 __ b(*_verified_entry);
2202 }
2203 }
2204 }
2205
2206 //=============================================================================
2207 #ifndef PRODUCT
2208 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2209 {
2210 st->print_cr("# MachUEPNode");
2211 if (UseCompressedClassPointers) {
2212 st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2213 if (CompressedKlassPointers::shift() != 0) {
2214 st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
2215 }
2216 } else {
2217 st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2218 }
2219 st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
2220 st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
2221 }
2222 #endif
2223
2224 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
2225 {
2226 // This is the unverified entry point.
2227 C2_MacroAssembler _masm(&cbuf);
2228 Label skip;
2229
2230 // UseCompressedClassPointers logic are inside cmp_klass
2231 __ cmp_klass(j_rarg0, rscratch2, rscratch1);
2232
2233 // TODO
2234 // can we avoid this skip and still use a reloc?
2235 __ br(Assembler::EQ, skip);
2236 __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
2237 __ bind(skip);
2238 }
2239
2240 // REQUIRED EMIT CODE
2241
2242 //=============================================================================
2243
2244 // Emit exception handler code.
2245 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
2246 {
2247 // mov rscratch1 #exception_blob_entry_point
2248 // br rscratch1
2249 // Note that the code buffer's insts_mark is always relative to insts.
2250 // That's why we must use the macroassembler to generate a handler.
2251 C2_MacroAssembler _masm(&cbuf);
2252 address base = __ start_a_stub(size_exception_handler());
2253 if (base == nullptr) {
2254 ciEnv::current()->record_failure("CodeCache is full");
2255 return 0; // CodeBuffer::expand failed
2256 }
2257 int offset = __ offset();
2258 __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
2259 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
2260 __ end_a_stub();
2261 return offset;
2262 }
2263
2264 // Emit deopt handler code.
2265 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
2266 {
2267 // Note that the code buffer's insts_mark is always relative to insts.
2268 // That's why we must use the macroassembler to generate a handler.
2269 C2_MacroAssembler _masm(&cbuf);
2270 address base = __ start_a_stub(size_deopt_handler());
2271 if (base == nullptr) {
2272 ciEnv::current()->record_failure("CodeCache is full");
2273 return 0; // CodeBuffer::expand failed
2274 }
2275 int offset = __ offset();
2276
2277 __ adr(lr, __ pc());
2278 __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
2279
2280 assert(__ offset() - offset == (int) size_deopt_handler(), "overflow");
2281 __ end_a_stub();
2282 return offset;
2283 }
2284
2285 // REQUIRED MATCHER CODE
2286
2287 //=============================================================================
2288
2289 bool Matcher::match_rule_supported(int opcode) {
2290 if (!has_match_rule(opcode))
2291 return false;
2387 }
2388 switch(len) {
2389 // For 16-bit/32-bit mask vector, reuse VecD.
2390 case 2:
2391 case 4:
2392 case 8: return Op_VecD;
2393 case 16: return Op_VecX;
2394 }
2395 ShouldNotReachHere();
2396 return 0;
2397 }
2398
2399 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
2400 assert(Matcher::is_generic_vector(generic_opnd), "not generic");
2401 switch (ideal_reg) {
2402 case Op_VecA: return new vecAOper();
2403 case Op_VecD: return new vecDOper();
2404 case Op_VecX: return new vecXOper();
2405 }
2406 ShouldNotReachHere();
2407 return nullptr;
2408 }
2409
2410 bool Matcher::is_reg2reg_move(MachNode* m) {
2411 return false;
2412 }
2413
2414 bool Matcher::is_generic_vector(MachOper* opnd) {
2415 return opnd->opcode() == VREG;
2416 }
2417
2418 // Return whether or not this register is ever used as an argument.
2419 // This function is used on startup to build the trampoline stubs in
2420 // generateOptoStub. Registers not mentioned will be killed by the VM
2421 // call in the trampoline, and arguments in those registers not be
2422 // available to the callee.
2423 bool Matcher::can_be_java_arg(int reg)
2424 {
2425 return
2426 reg == R0_num || reg == R0_H_num ||
2427 reg == R1_num || reg == R1_H_num ||
2560 result = Assembler::VS; break;
2561 case BoolTest::no_overflow:
2562 result = Assembler::VC; break;
2563 default:
2564 ShouldNotReachHere();
2565 return Assembler::Condition(-1);
2566 }
2567
2568 // Check conversion
2569 if (cond & BoolTest::unsigned_compare) {
2570 assert(cmpOpUOper((BoolTest::mask)((int)cond & ~(BoolTest::unsigned_compare))).ccode() == result, "Invalid conversion");
2571 } else {
2572 assert(cmpOpOper(cond).ccode() == result, "Invalid conversion");
2573 }
2574
2575 return result;
2576 }
2577
2578 // Binary src (Replicate con)
2579 bool is_valid_sve_arith_imm_pattern(Node* n, Node* m) {
2580 if (n == nullptr || m == nullptr) {
2581 return false;
2582 }
2583
2584 if (UseSVE == 0 || !VectorNode::is_invariant_vector(m)) {
2585 return false;
2586 }
2587
2588 Node* imm_node = m->in(1);
2589 if (!imm_node->is_Con()) {
2590 return false;
2591 }
2592
2593 const Type* t = imm_node->bottom_type();
2594 if (!(t->isa_int() || t->isa_long())) {
2595 return false;
2596 }
2597
2598 switch (n->Opcode()) {
2599 case Op_AndV:
2600 case Op_OrV:
2601 case Op_XorV: {
2602 Assembler::SIMD_RegVariant T = Assembler::elemType_to_regVariant(Matcher::vector_element_basic_type(n));
2603 uint64_t value = t->isa_long() ? (uint64_t)imm_node->get_long() : (uint64_t)imm_node->get_int();
2604 return Assembler::operand_valid_for_sve_logical_immediate(Assembler::regVariant_to_elemBits(T), value);
2605 }
2606 case Op_AddVB:
2607 return (imm_node->get_int() <= 255 && imm_node->get_int() >= -255);
2608 case Op_AddVS:
2609 case Op_AddVI:
2610 return Assembler::operand_valid_for_sve_add_sub_immediate((int64_t)imm_node->get_int());
2611 case Op_AddVL:
2612 return Assembler::operand_valid_for_sve_add_sub_immediate(imm_node->get_long());
2613 default:
2614 return false;
2615 }
2616 }
2617
2618 // (XorV src (Replicate m1))
2619 // (XorVMask src (MaskAll m1))
2620 bool is_vector_bitwise_not_pattern(Node* n, Node* m) {
2621 if (n != nullptr && m != nullptr) {
2622 return (n->Opcode() == Op_XorV || n->Opcode() == Op_XorVMask) &&
2623 VectorNode::is_all_ones_vector(m);
2624 }
2625 return false;
2626 }
2627
2628 // Should the matcher clone input 'm' of node 'n'?
2629 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
2630 if (is_vshift_con_pattern(n, m) ||
2631 is_vector_bitwise_not_pattern(n, m) ||
2632 is_valid_sve_arith_imm_pattern(n, m)) {
2633 mstack.push(m, Visit);
2634 return true;
2635 }
2636 return false;
2637 }
2638
2639 // Should the Matcher clone shifts on addressing modes, expecting them
2640 // to be subsumed into complex addressing expressions or compute them
2641 // into registers?
3407 } else {
3408 __ movw(dst_reg, con);
3409 }
3410 %}
3411
3412 enc_class aarch64_enc_mov_imm(iRegL dst, immL src) %{
3413 C2_MacroAssembler _masm(&cbuf);
3414 Register dst_reg = as_Register($dst$$reg);
3415 uint64_t con = (uint64_t)$src$$constant;
3416 if (con == 0) {
3417 __ mov(dst_reg, zr);
3418 } else {
3419 __ mov(dst_reg, con);
3420 }
3421 %}
3422
3423 enc_class aarch64_enc_mov_p(iRegP dst, immP src) %{
3424 C2_MacroAssembler _masm(&cbuf);
3425 Register dst_reg = as_Register($dst$$reg);
3426 address con = (address)$src$$constant;
3427 if (con == nullptr || con == (address)1) {
3428 ShouldNotReachHere();
3429 } else {
3430 relocInfo::relocType rtype = $src->constant_reloc();
3431 if (rtype == relocInfo::oop_type) {
3432 __ movoop(dst_reg, (jobject)con);
3433 } else if (rtype == relocInfo::metadata_type) {
3434 __ mov_metadata(dst_reg, (Metadata*)con);
3435 } else {
3436 assert(rtype == relocInfo::none, "unexpected reloc type");
3437 if (! __ is_valid_AArch64_address(con) ||
3438 con < (address)(uintptr_t)os::vm_page_size()) {
3439 __ mov(dst_reg, con);
3440 } else {
3441 uint64_t offset;
3442 __ adrp(dst_reg, con, offset);
3443 __ add(dst_reg, dst_reg, offset);
3444 }
3445 }
3446 }
3447 %}
3450 C2_MacroAssembler _masm(&cbuf);
3451 Register dst_reg = as_Register($dst$$reg);
3452 __ mov(dst_reg, zr);
3453 %}
3454
3455 enc_class aarch64_enc_mov_p1(iRegP dst, immP_1 src) %{
3456 C2_MacroAssembler _masm(&cbuf);
3457 Register dst_reg = as_Register($dst$$reg);
3458 __ mov(dst_reg, (uint64_t)1);
3459 %}
3460
3461 enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
3462 C2_MacroAssembler _masm(&cbuf);
3463 __ load_byte_map_base($dst$$Register);
3464 %}
3465
3466 enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
3467 C2_MacroAssembler _masm(&cbuf);
3468 Register dst_reg = as_Register($dst$$reg);
3469 address con = (address)$src$$constant;
3470 if (con == nullptr) {
3471 ShouldNotReachHere();
3472 } else {
3473 relocInfo::relocType rtype = $src->constant_reloc();
3474 assert(rtype == relocInfo::oop_type, "unexpected reloc type");
3475 __ set_narrow_oop(dst_reg, (jobject)con);
3476 }
3477 %}
3478
3479 enc_class aarch64_enc_mov_n0(iRegN dst, immN0 src) %{
3480 C2_MacroAssembler _masm(&cbuf);
3481 Register dst_reg = as_Register($dst$$reg);
3482 __ mov(dst_reg, zr);
3483 %}
3484
3485 enc_class aarch64_enc_mov_nk(iRegN dst, immNKlass src) %{
3486 C2_MacroAssembler _masm(&cbuf);
3487 Register dst_reg = as_Register($dst$$reg);
3488 address con = (address)$src$$constant;
3489 if (con == nullptr) {
3490 ShouldNotReachHere();
3491 } else {
3492 relocInfo::relocType rtype = $src->constant_reloc();
3493 assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
3494 __ set_narrow_klass(dst_reg, (Klass *)con);
3495 }
3496 %}
3497
3498 // arithmetic encodings
3499
3500 enc_class aarch64_enc_addsubw_imm(iRegI dst, iRegI src1, immIAddSub src2) %{
3501 C2_MacroAssembler _masm(&cbuf);
3502 Register dst_reg = as_Register($dst$$reg);
3503 Register src_reg = as_Register($src1$$reg);
3504 int32_t con = (int32_t)$src2$$constant;
3505 // add has primary == 0, subtract has primary == 1
3506 if ($primary) { con = -con; }
3507 if (con < 0) {
3508 __ subw(dst_reg, src_reg, -con);
3509 } else {
3652 Label *L = $lbl$$label;
3653 __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
3654 %}
3655
3656 enc_class aarch64_enc_br_conU(cmpOpU cmp, label lbl) %{
3657 C2_MacroAssembler _masm(&cbuf);
3658 Label *L = $lbl$$label;
3659 __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
3660 %}
3661
3662 enc_class aarch64_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result)
3663 %{
3664 Register sub_reg = as_Register($sub$$reg);
3665 Register super_reg = as_Register($super$$reg);
3666 Register temp_reg = as_Register($temp$$reg);
3667 Register result_reg = as_Register($result$$reg);
3668
3669 Label miss;
3670 C2_MacroAssembler _masm(&cbuf);
3671 __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
3672 nullptr, &miss,
3673 /*set_cond_codes:*/ true);
3674 if ($primary) {
3675 __ mov(result_reg, zr);
3676 }
3677 __ bind(miss);
3678 %}
3679
3680 enc_class aarch64_enc_java_static_call(method meth) %{
3681 C2_MacroAssembler _masm(&cbuf);
3682
3683 address addr = (address)$meth$$method;
3684 address call;
3685 if (!_method) {
3686 // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
3687 call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type));
3688 if (call == nullptr) {
3689 ciEnv::current()->record_failure("CodeCache is full");
3690 return;
3691 }
3692 } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
3693 // The NOP here is purely to ensure that eliding a call to
3694 // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
3695 __ nop();
3696 __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
3697 } else {
3698 int method_index = resolved_method_index(cbuf);
3699 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
3700 : static_call_Relocation::spec(method_index);
3701 call = __ trampoline_call(Address(addr, rspec));
3702 if (call == nullptr) {
3703 ciEnv::current()->record_failure("CodeCache is full");
3704 return;
3705 }
3706 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
3707 // Calls of the same statically bound method can share
3708 // a stub to the interpreter.
3709 cbuf.shared_stub_to_interp_for(_method, call - cbuf.insts_begin());
3710 } else {
3711 // Emit stub for static call
3712 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, call);
3713 if (stub == nullptr) {
3714 ciEnv::current()->record_failure("CodeCache is full");
3715 return;
3716 }
3717 }
3718 }
3719
3720 __ post_call_nop();
3721
3722 // Only non uncommon_trap calls need to reinitialize ptrue.
3723 if (Compile::current()->max_vector_size() > 0 && uncommon_trap_request() == 0) {
3724 __ reinitialize_ptrue();
3725 }
3726 %}
3727
3728 enc_class aarch64_enc_java_dynamic_call(method meth) %{
3729 C2_MacroAssembler _masm(&cbuf);
3730 int method_index = resolved_method_index(cbuf);
3731 address call = __ ic_call((address)$meth$$method, method_index);
3732 if (call == nullptr) {
3733 ciEnv::current()->record_failure("CodeCache is full");
3734 return;
3735 }
3736 __ post_call_nop();
3737 if (Compile::current()->max_vector_size() > 0) {
3738 __ reinitialize_ptrue();
3739 }
3740 %}
3741
3742 enc_class aarch64_enc_call_epilog() %{
3743 C2_MacroAssembler _masm(&cbuf);
3744 if (VerifyStackAtCalls) {
3745 // Check that stack depth is unchanged: find majik cookie on stack
3746 __ call_Unimplemented();
3747 }
3748 if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic()) {
3749 if (!_method->signature()->returns_null_free_inline_type()) {
3750 // The last return value is not set by the callee but used to pass IsInit information to compiled code.
3751 // Search for the corresponding projection, get the register and emit code that initialized it.
3752 uint con = (tf()->range_cc()->cnt() - 1);
3753 for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
3754 ProjNode* proj = fast_out(i)->as_Proj();
3755 if (proj->_con == con) {
3756 // Set IsInit if r0 is non-null (a non-null value is returned buffered or scalarized)
3757 OptoReg::Name optoReg = ra_->get_reg_first(proj);
3758 VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
3759 Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
3760 __ cmp(r0, zr);
3761 __ cset(toReg, Assembler::NE);
3762 if (reg->is_stack()) {
3763 int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
3764 __ str(toReg, Address(sp, st_off));
3765 }
3766 break;
3767 }
3768 }
3769 }
3770 if (return_value_is_used()) {
3771 // An inline type is returned as fields in multiple registers.
3772 // R0 either contains an oop if the inline type is buffered or a pointer
3773 // to the corresponding InlineKlass with the lowest bit set to 1. Zero r0
3774 // if the lowest bit is set to allow C2 to use the oop after null checking.
3775 // r0 &= (r0 & 1) - 1
3776 __ andr(rscratch1, r0, 0x1);
3777 __ sub(rscratch1, rscratch1, 0x1);
3778 __ andr(r0, r0, rscratch1);
3779 }
3780 }
3781 %}
3782
3783 enc_class aarch64_enc_java_to_runtime(method meth) %{
3784 C2_MacroAssembler _masm(&cbuf);
3785
3786 // some calls to generated routines (arraycopy code) are scheduled
3787 // by C2 as runtime calls. if so we can call them using a br (they
3788 // will be in a reachable segment) otherwise we have to use a blr
3789 // which loads the absolute address into a register.
3790 address entry = (address)$meth$$method;
3791 CodeBlob *cb = CodeCache::find_blob(entry);
3792 if (cb) {
3793 address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
3794 if (call == nullptr) {
3795 ciEnv::current()->record_failure("CodeCache is full");
3796 return;
3797 }
3798 __ post_call_nop();
3799 } else {
3800 Label retaddr;
3801 __ adr(rscratch2, retaddr);
3802 __ lea(rscratch1, RuntimeAddress(entry));
3803 // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc()
3804 __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
3805 __ blr(rscratch1);
3806 __ bind(retaddr);
3807 __ post_call_nop();
3808 __ add(sp, sp, 2 * wordSize);
3809 }
3810 if (Compile::current()->max_vector_size() > 0) {
3811 __ reinitialize_ptrue();
3812 }
3813 %}
3814
3858 // Load markWord from object into displaced_header.
3859 __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
3860
3861 if (DiagnoseSyncOnValueBasedClasses != 0) {
3862 __ load_klass(tmp, oop);
3863 __ ldrw(tmp, Address(tmp, Klass::access_flags_offset()));
3864 __ tstw(tmp, JVM_ACC_IS_VALUE_BASED_CLASS);
3865 __ br(Assembler::NE, cont);
3866 }
3867
3868 // Check for existing monitor
3869 __ tbnz(disp_hdr, exact_log2(markWord::monitor_value), object_has_monitor);
3870
3871 if (LockingMode == LM_MONITOR) {
3872 __ tst(oop, oop); // Set NE to indicate 'failure' -> take slow-path. We know that oop != 0.
3873 __ b(cont);
3874 } else if (LockingMode == LM_LEGACY) {
3875 // Set tmp to be (markWord of object | UNLOCK_VALUE).
3876 __ orr(tmp, disp_hdr, markWord::unlocked_value);
3877
3878 if (EnableValhalla) {
3879 // Mask inline_type bit such that we go to the slow path if object is an inline type
3880 __ andr(tmp, tmp, ~((int) markWord::inline_type_bit_in_place));
3881 }
3882
3883 // Initialize the box. (Must happen before we update the object mark!)
3884 __ str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3885
3886 // Compare object markWord with an unlocked value (tmp) and if
3887 // equal exchange the stack address of our box with object markWord.
3888 // On failure disp_hdr contains the possibly locked markWord.
3889 __ cmpxchg(oop, tmp, box, Assembler::xword, /*acquire*/ true,
3890 /*release*/ true, /*weak*/ false, disp_hdr);
3891 __ br(Assembler::EQ, cont);
3892
3893 assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
3894
3895 // If the compare-and-exchange succeeded, then we found an unlocked
3896 // object, will have now locked it will continue at label cont
3897
3898 // Check if the owner is self by comparing the value in the
3899 // markWord of object (disp_hdr) with the stack pointer.
3900 __ mov(rscratch1, sp);
3901 __ sub(disp_hdr, disp_hdr, rscratch1);
3902 __ mov(tmp, (address) (~(os::vm_page_size()-1) | markWord::lock_mask_in_place));
3903 // If condition is true we are cont and hence we can store 0 as the
3904 // displaced header in the box, which indicates that it is a recursive lock.
3905 __ ands(tmp/*==0?*/, disp_hdr, tmp); // Sets flags for result
3906 __ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3907 __ b(cont);
3908 } else {
3909 assert(LockingMode == LM_LIGHTWEIGHT, "must be");
3910 __ fast_lock(oop, disp_hdr, tmp, rscratch1, no_count);
3911 __ b(count);
3912 }
3913
3914 // Handle existing monitor.
3915 __ bind(object_has_monitor);
3916
3917 // The object's monitor m is unlocked iff m->owner == nullptr,
3918 // otherwise m->owner may contain a thread or a stack address.
3919 //
3920 // Try to CAS m->owner from nullptr to current thread.
3921 __ add(tmp, disp_hdr, (in_bytes(ObjectMonitor::owner_offset())-markWord::monitor_value));
3922 __ cmpxchg(tmp, zr, rthread, Assembler::xword, /*acquire*/ true,
3923 /*release*/ true, /*weak*/ false, rscratch1); // Sets flags for result
3924
3925 if (LockingMode != LM_LIGHTWEIGHT) {
3926 // Store a non-null value into the box to avoid looking like a re-entrant
3927 // lock. The fast-path monitor unlock code checks for
3928 // markWord::monitor_value so use markWord::unused_mark which has the
3929 // relevant bit set, and also matches ObjectSynchronizer::enter.
3930 __ mov(tmp, (address)markWord::unused_mark().value());
3931 __ str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3932 }
3933 __ br(Assembler::EQ, cont); // CAS success means locking succeeded
3934
3935 __ cmp(rscratch1, rthread);
3936 __ br(Assembler::NE, cont); // Check for recursive locking
3937
3938 // Recursive lock case
3939 __ increment(Address(disp_hdr, in_bytes(ObjectMonitor::recursions_offset()) - markWord::monitor_value), 1);
3940 // flag == EQ still from the cmp above, checking if this is a reentrant lock
4874 operand immL_32bits()
4875 %{
4876 predicate(n->get_long() == 0xFFFFFFFFL);
4877 match(ConL);
4878 op_cost(0);
4879 format %{ %}
4880 interface(CONST_INTER);
4881 %}
4882
4883 // Pointer operands
4884 // Pointer Immediate
4885 operand immP()
4886 %{
4887 match(ConP);
4888
4889 op_cost(0);
4890 format %{ %}
4891 interface(CONST_INTER);
4892 %}
4893
4894 // nullptr Pointer Immediate
4895 operand immP0()
4896 %{
4897 predicate(n->get_ptr() == 0);
4898 match(ConP);
4899
4900 op_cost(0);
4901 format %{ %}
4902 interface(CONST_INTER);
4903 %}
4904
4905 // Pointer Immediate One
4906 // this is used in object initialization (initial object header)
4907 operand immP_1()
4908 %{
4909 predicate(n->get_ptr() == 1);
4910 match(ConP);
4911
4912 op_cost(0);
4913 format %{ %}
4914 interface(CONST_INTER);
5006 operand immFPacked()
5007 %{
5008 predicate(Assembler::operand_valid_for_float_immediate((double)n->getf()));
5009 match(ConF);
5010 op_cost(0);
5011 format %{ %}
5012 interface(CONST_INTER);
5013 %}
5014
5015 // Narrow pointer operands
5016 // Narrow Pointer Immediate
5017 operand immN()
5018 %{
5019 match(ConN);
5020
5021 op_cost(0);
5022 format %{ %}
5023 interface(CONST_INTER);
5024 %}
5025
5026 // Narrow nullptr Pointer Immediate
5027 operand immN0()
5028 %{
5029 predicate(n->get_narrowcon() == 0);
5030 match(ConN);
5031
5032 op_cost(0);
5033 format %{ %}
5034 interface(CONST_INTER);
5035 %}
5036
5037 operand immNKlass()
5038 %{
5039 match(ConNKlass);
5040
5041 op_cost(0);
5042 format %{ %}
5043 interface(CONST_INTER);
5044 %}
5045
5046 // Integer 32 bit Register Operands
7415 instruct loadConL(iRegLNoSp dst, immL src)
7416 %{
7417 match(Set dst src);
7418
7419 ins_cost(INSN_COST);
7420 format %{ "mov $dst, $src\t# long" %}
7421
7422 ins_encode( aarch64_enc_mov_imm(dst, src) );
7423
7424 ins_pipe(ialu_imm);
7425 %}
7426
7427 // Load Pointer Constant
7428
7429 instruct loadConP(iRegPNoSp dst, immP con)
7430 %{
7431 match(Set dst con);
7432
7433 ins_cost(INSN_COST * 4);
7434 format %{
7435 "mov $dst, $con\t# ptr"
7436 %}
7437
7438 ins_encode(aarch64_enc_mov_p(dst, con));
7439
7440 ins_pipe(ialu_imm);
7441 %}
7442
7443 // Load Null Pointer Constant
7444
7445 instruct loadConP0(iRegPNoSp dst, immP0 con)
7446 %{
7447 match(Set dst con);
7448
7449 ins_cost(INSN_COST);
7450 format %{ "mov $dst, $con\t# nullptr ptr" %}
7451
7452 ins_encode(aarch64_enc_mov_p0(dst, con));
7453
7454 ins_pipe(ialu_imm);
7455 %}
7456
7457 // Load Pointer Constant One
7458
7459 instruct loadConP1(iRegPNoSp dst, immP_1 con)
7460 %{
7461 match(Set dst con);
7462
7463 ins_cost(INSN_COST);
7464 format %{ "mov $dst, $con\t# nullptr ptr" %}
7465
7466 ins_encode(aarch64_enc_mov_p1(dst, con));
7467
7468 ins_pipe(ialu_imm);
7469 %}
7470
7471 // Load Byte Map Base Constant
7472
7473 instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
7474 %{
7475 match(Set dst con);
7476
7477 ins_cost(INSN_COST);
7478 format %{ "adr $dst, $con\t# Byte Map Base" %}
7479
7480 ins_encode(aarch64_enc_mov_byte_map_base(dst, con));
7481
7482 ins_pipe(ialu_imm);
7483 %}
7484
7486
7487 instruct loadConN(iRegNNoSp dst, immN con)
7488 %{
7489 match(Set dst con);
7490
7491 ins_cost(INSN_COST * 4);
7492 format %{ "mov $dst, $con\t# compressed ptr" %}
7493
7494 ins_encode(aarch64_enc_mov_n(dst, con));
7495
7496 ins_pipe(ialu_imm);
7497 %}
7498
7499 // Load Narrow Null Pointer Constant
7500
7501 instruct loadConN0(iRegNNoSp dst, immN0 con)
7502 %{
7503 match(Set dst con);
7504
7505 ins_cost(INSN_COST);
7506 format %{ "mov $dst, $con\t# compressed nullptr ptr" %}
7507
7508 ins_encode(aarch64_enc_mov_n0(dst, con));
7509
7510 ins_pipe(ialu_imm);
7511 %}
7512
7513 // Load Narrow Klass Constant
7514
7515 instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
7516 %{
7517 match(Set dst con);
7518
7519 ins_cost(INSN_COST);
7520 format %{ "mov $dst, $con\t# compressed klass ptr" %}
7521
7522 ins_encode(aarch64_enc_mov_nk(dst, con));
7523
7524 ins_pipe(ialu_imm);
7525 %}
7526
8618 %}
8619
8620 // ============================================================================
8621 // Cast/Convert Instructions
8622
8623 instruct castX2P(iRegPNoSp dst, iRegL src) %{
8624 match(Set dst (CastX2P src));
8625
8626 ins_cost(INSN_COST);
8627 format %{ "mov $dst, $src\t# long -> ptr" %}
8628
8629 ins_encode %{
8630 if ($dst$$reg != $src$$reg) {
8631 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8632 }
8633 %}
8634
8635 ins_pipe(ialu_reg);
8636 %}
8637
8638 instruct castN2X(iRegLNoSp dst, iRegN src) %{
8639 match(Set dst (CastP2X src));
8640
8641 ins_cost(INSN_COST);
8642 format %{ "mov $dst, $src\t# ptr -> long" %}
8643
8644 ins_encode %{
8645 if ($dst$$reg != $src$$reg) {
8646 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8647 }
8648 %}
8649
8650 ins_pipe(ialu_reg);
8651 %}
8652
8653 instruct castP2X(iRegLNoSp dst, iRegP src) %{
8654 match(Set dst (CastP2X src));
8655
8656 ins_cost(INSN_COST);
8657 format %{ "mov $dst, $src\t# ptr -> long" %}
8658
8659 ins_encode %{
8660 if ($dst$$reg != $src$$reg) {
8661 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8662 }
8663 %}
8664
8665 ins_pipe(ialu_reg);
8666 %}
8667
8668 // Convert oop into int for vectors alignment masking
8669 instruct convP2I(iRegINoSp dst, iRegP src) %{
8670 match(Set dst (ConvL2I (CastP2X src)));
8671
8672 ins_cost(INSN_COST);
15454
15455 match(Set dst (MoveL2D src));
15456
15457 effect(DEF dst, USE src);
15458
15459 ins_cost(INSN_COST);
15460
15461 format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
15462
15463 ins_encode %{
15464 __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
15465 %}
15466
15467 ins_pipe(fp_l2d);
15468
15469 %}
15470
15471 // ============================================================================
15472 // clearing of an array
15473
15474 instruct clearArray_reg_reg_immL0(iRegL_R11 cnt, iRegP_R10 base, immL0 zero, Universe dummy, rFlagsReg cr)
15475 %{
15476 match(Set dummy (ClearArray (Binary cnt base) zero));
15477 effect(USE_KILL cnt, USE_KILL base, KILL cr);
15478
15479 ins_cost(4 * INSN_COST);
15480 format %{ "ClearArray $cnt, $base" %}
15481
15482 ins_encode %{
15483 address tpc = __ zero_words($base$$Register, $cnt$$Register);
15484 if (tpc == nullptr) {
15485 ciEnv::current()->record_failure("CodeCache is full");
15486 return;
15487 }
15488 %}
15489
15490 ins_pipe(pipe_class_memory);
15491 %}
15492
15493 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, iRegL val, Universe dummy, rFlagsReg cr)
15494 %{
15495 predicate(((ClearArrayNode*)n)->word_copy_only());
15496 match(Set dummy (ClearArray (Binary cnt base) val));
15497 effect(USE_KILL cnt, USE_KILL base, KILL cr);
15498
15499 ins_cost(4 * INSN_COST);
15500 format %{ "ClearArray $cnt, $base, $val" %}
15501
15502 ins_encode %{
15503 __ fill_words($base$$Register, $cnt$$Register, $val$$Register);
15504 %}
15505
15506 ins_pipe(pipe_class_memory);
15507 %}
15508
15509 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 temp, Universe dummy, rFlagsReg cr)
15510 %{
15511 predicate((uint64_t)n->in(2)->get_long()
15512 < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord)
15513 && !((ClearArrayNode*)n)->word_copy_only());
15514 match(Set dummy (ClearArray cnt base));
15515 effect(TEMP temp, USE_KILL base, KILL cr);
15516
15517 ins_cost(4 * INSN_COST);
15518 format %{ "ClearArray $cnt, $base" %}
15519
15520 ins_encode %{
15521 address tpc = __ zero_words($base$$Register, (uint64_t)$cnt$$constant);
15522 if (tpc == nullptr) {
15523 ciEnv::current()->record_failure("CodeCache is full");
15524 return;
15525 }
15526 %}
15527
15528 ins_pipe(pipe_class_memory);
15529 %}
15530
15531 // ============================================================================
15532 // Overflow Math Instructions
15533
15534 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
15535 %{
15536 match(Set cr (OverflowAddI op1 op2));
15537
15538 format %{ "cmnw $op1, $op2\t# overflow check int" %}
15539 ins_cost(INSN_COST);
15540 ins_encode %{
15541 __ cmnw($op1$$Register, $op2$$Register);
15542 %}
16789
16790 // Call Runtime Instruction
16791
16792 instruct CallLeafDirect(method meth)
16793 %{
16794 match(CallLeaf);
16795
16796 effect(USE meth);
16797
16798 ins_cost(CALL_COST);
16799
16800 format %{ "CALL, runtime leaf $meth" %}
16801
16802 ins_encode( aarch64_enc_java_to_runtime(meth) );
16803
16804 ins_pipe(pipe_class_call);
16805 %}
16806
16807 // Call Runtime Instruction
16808
16809 // entry point is null, target holds the address to call
16810 instruct CallLeafNoFPIndirect(iRegP target)
16811 %{
16812 predicate(n->as_Call()->entry_point() == nullptr);
16813
16814 match(CallLeafNoFP target);
16815
16816 ins_cost(CALL_COST);
16817
16818 format %{ "CALL, runtime leaf nofp indirect $target" %}
16819
16820 ins_encode %{
16821 __ blr($target$$Register);
16822 %}
16823
16824 ins_pipe(pipe_class_call);
16825 %}
16826
16827 instruct CallLeafNoFPDirect(method meth)
16828 %{
16829 predicate(n->as_Call()->entry_point() != nullptr);
16830
16831 match(CallLeafNoFP);
16832
16833 effect(USE meth);
16834
16835 ins_cost(CALL_COST);
16836
16837 format %{ "CALL, runtime leaf nofp $meth" %}
16838
16839 ins_encode( aarch64_enc_java_to_runtime(meth) );
16840
16841 ins_pipe(pipe_class_call);
16842 %}
16843
16844 // Tail Call; Jump from runtime stub to Java code.
16845 // Also known as an 'interprocedural jump'.
16846 // Target of jump will eventually return to caller.
16847 // TailJump below removes the return address.
16848 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_ptr)
16849 %{
16850 match(TailCall jump_target method_ptr);
17383 ins_pipe(pipe_class_memory);
17384 %}
17385
17386 instruct array_equalsB(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
17387 iRegP_R3 tmp1, iRegP_R4 tmp2, iRegP_R5 tmp3,
17388 vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2, vRegD_V3 vtmp3,
17389 vRegD_V4 vtmp4, vRegD_V5 vtmp5, vRegD_V6 vtmp6, vRegD_V7 vtmp7,
17390 iRegP_R10 tmp, rFlagsReg cr)
17391 %{
17392 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
17393 match(Set result (AryEq ary1 ary2));
17394 effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3,
17395 TEMP vtmp0, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4, TEMP vtmp5,
17396 TEMP vtmp6, TEMP vtmp7, KILL cr);
17397
17398 format %{ "Array Equals $ary1,ary2 -> $result # KILL $ary1 $ary2 $tmp $tmp1 $tmp2 $tmp3 V0-V7 cr" %}
17399 ins_encode %{
17400 address tpc = __ arrays_equals($ary1$$Register, $ary2$$Register,
17401 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
17402 $result$$Register, $tmp$$Register, 1);
17403 if (tpc == nullptr) {
17404 ciEnv::current()->record_failure("CodeCache is full");
17405 return;
17406 }
17407 %}
17408 ins_pipe(pipe_class_memory);
17409 %}
17410
17411 instruct array_equalsC(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
17412 iRegP_R3 tmp1, iRegP_R4 tmp2, iRegP_R5 tmp3,
17413 vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2, vRegD_V3 vtmp3,
17414 vRegD_V4 vtmp4, vRegD_V5 vtmp5, vRegD_V6 vtmp6, vRegD_V7 vtmp7,
17415 iRegP_R10 tmp, rFlagsReg cr)
17416 %{
17417 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
17418 match(Set result (AryEq ary1 ary2));
17419 effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3,
17420 TEMP vtmp0, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4, TEMP vtmp5,
17421 TEMP vtmp6, TEMP vtmp7, KILL cr);
17422
17423 format %{ "Array Equals $ary1,ary2 -> $result # KILL $ary1 $ary2 $tmp $tmp1 $tmp2 $tmp3 V0-V7 cr" %}
17424 ins_encode %{
17425 address tpc = __ arrays_equals($ary1$$Register, $ary2$$Register,
17426 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
17427 $result$$Register, $tmp$$Register, 2);
17428 if (tpc == nullptr) {
17429 ciEnv::current()->record_failure("CodeCache is full");
17430 return;
17431 }
17432 %}
17433 ins_pipe(pipe_class_memory);
17434 %}
17435
17436 instruct count_positives(iRegP_R1 ary1, iRegI_R2 len, iRegI_R0 result, rFlagsReg cr)
17437 %{
17438 match(Set result (CountPositives ary1 len));
17439 effect(USE_KILL ary1, USE_KILL len, KILL cr);
17440 format %{ "count positives byte[] $ary1,$len -> $result" %}
17441 ins_encode %{
17442 address tpc = __ count_positives($ary1$$Register, $len$$Register, $result$$Register);
17443 if (tpc == nullptr) {
17444 ciEnv::current()->record_failure("CodeCache is full");
17445 return;
17446 }
17447 %}
17448 ins_pipe( pipe_slow );
17449 %}
17450
17451 // fast char[] to byte[] compression
17452 instruct string_compress(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
17453 vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2,
17454 vRegD_V3 vtmp3, vRegD_V4 vtmp4, vRegD_V5 vtmp5,
17455 iRegI_R0 result, rFlagsReg cr)
17456 %{
17457 match(Set result (StrCompressedCopy src (Binary dst len)));
17458 effect(TEMP vtmp0, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4, TEMP vtmp5,
17459 USE_KILL src, USE_KILL dst, USE len, KILL cr);
17460
17461 format %{ "String Compress $src,$dst,$len -> $result # KILL $src $dst V0-V5 cr" %}
17462 ins_encode %{
17463 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
17466 $vtmp4$$FloatRegister, $vtmp5$$FloatRegister);
17467 %}
17468 ins_pipe(pipe_slow);
17469 %}
17470
17471 // fast byte[] to char[] inflation
17472 instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len, iRegP_R3 tmp,
17473 vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2, vRegD_V3 vtmp3,
17474 vRegD_V4 vtmp4, vRegD_V5 vtmp5, vRegD_V6 vtmp6, rFlagsReg cr)
17475 %{
17476 match(Set dummy (StrInflatedCopy src (Binary dst len)));
17477 effect(TEMP vtmp0, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3,
17478 TEMP vtmp4, TEMP vtmp5, TEMP vtmp6, TEMP tmp,
17479 USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
17480
17481 format %{ "String Inflate $src,$dst # KILL $tmp $src $dst $len V0-V6 cr" %}
17482 ins_encode %{
17483 address tpc = __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
17484 $vtmp0$$FloatRegister, $vtmp1$$FloatRegister,
17485 $vtmp2$$FloatRegister, $tmp$$Register);
17486 if (tpc == nullptr) {
17487 ciEnv::current()->record_failure("CodeCache is full");
17488 return;
17489 }
17490 %}
17491 ins_pipe(pipe_class_memory);
17492 %}
17493
17494 // encode char[] to byte[] in ISO_8859_1
17495 instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
17496 vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2,
17497 vRegD_V3 vtmp3, vRegD_V4 vtmp4, vRegD_V5 vtmp5,
17498 iRegI_R0 result, rFlagsReg cr)
17499 %{
17500 predicate(!((EncodeISOArrayNode*)n)->is_ascii());
17501 match(Set result (EncodeISOArray src (Binary dst len)));
17502 effect(USE_KILL src, USE_KILL dst, USE len, KILL vtmp0, KILL vtmp1,
17503 KILL vtmp2, KILL vtmp3, KILL vtmp4, KILL vtmp5, KILL cr);
17504
17505 format %{ "Encode ISO array $src,$dst,$len -> $result # KILL $src $dst V0-V5 cr" %}
17506 ins_encode %{
|