1628
1629 int MachCallDynamicJavaNode::ret_addr_offset()
1630 {
1631 return 16; // movz, movk, movk, bl
1632 }
1633
1634 int MachCallRuntimeNode::ret_addr_offset() {
1635 // for generated stubs the call will be
1636 // bl(addr)
1637 // or with far branches
1638 // bl(trampoline_stub)
1639 // for real runtime callouts it will be six instructions
1640 // see aarch64_enc_java_to_runtime
1641 // adr(rscratch2, retaddr)
1642 // lea(rscratch1, RuntimeAddress(addr)
1643 // stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
1644 // blr(rscratch1)
1645 CodeBlob *cb = CodeCache::find_blob(_entry_point);
1646 if (cb) {
1647 return 1 * NativeInstruction::instruction_size;
1648 } else {
1649 return 6 * NativeInstruction::instruction_size;
1650 }
1651 }
1652
1653 //=============================================================================
1654
1655 #ifndef PRODUCT
1656 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1657 st->print("BREAKPOINT");
1658 }
1659 #endif
1660
1661 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1662 C2_MacroAssembler _masm(&cbuf);
1663 __ brk(0);
1664 }
1665
1666 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
1667 return MachNode::size(ra_);
1739 st->print("\n\t");
1740 st->print("ldr rscratch1, [guard]\n\t");
1741 st->print("dmb ishld\n\t");
1742 st->print("ldr rscratch2, [rthread, #thread_disarmed_guard_value_offset]\n\t");
1743 st->print("cmp rscratch1, rscratch2\n\t");
1744 st->print("b.eq skip");
1745 st->print("\n\t");
1746 st->print("blr #nmethod_entry_barrier_stub\n\t");
1747 st->print("b skip\n\t");
1748 st->print("guard: int\n\t");
1749 st->print("\n\t");
1750 st->print("skip:\n\t");
1751 }
1752 }
1753 #endif
1754
1755 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1756 Compile* C = ra_->C;
1757 C2_MacroAssembler _masm(&cbuf);
1758
1759 // n.b. frame size includes space for return pc and rfp
1760 const int framesize = C->output()->frame_size_in_bytes();
1761
1762 // insert a nop at the start of the prolog so we can patch in a
1763 // branch if we need to invalidate the method later
1764 __ nop();
1765
1766 if (C->clinit_barrier_on_entry()) {
1767 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
1768
1769 Label L_skip_barrier;
1770
1771 __ mov_metadata(rscratch2, C->method()->holder()->constant_encoding());
1772 __ clinit_barrier(rscratch2, rscratch1, &L_skip_barrier);
1773 __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
1774 __ bind(L_skip_barrier);
1775 }
1776
1777 if (C->max_vector_size() > 0) {
1778 __ reinitialize_ptrue();
1779 }
1780
1781 int bangsize = C->output()->bang_size_in_bytes();
1782 if (C->output()->need_stack_bang(bangsize))
1783 __ generate_stack_overflow_check(bangsize);
1784
1785 __ build_frame(framesize);
1786
1787 if (C->stub_function() == NULL) {
1788 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
1789 if (BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
1790 // Dummy labels for just measuring the code size
1791 Label dummy_slow_path;
1792 Label dummy_continuation;
1793 Label dummy_guard;
1794 Label* slow_path = &dummy_slow_path;
1795 Label* continuation = &dummy_continuation;
1796 Label* guard = &dummy_guard;
1797 if (!Compile::current()->output()->in_scratch_emit_size()) {
1798 // Use real labels from actual stub when not emitting code for the purpose of measuring its size
1799 C2EntryBarrierStub* stub = new (Compile::current()->comp_arena()) C2EntryBarrierStub();
1800 Compile::current()->output()->add_stub(stub);
1801 slow_path = &stub->entry();
1802 continuation = &stub->continuation();
1803 guard = &stub->guard();
1804 }
1805 // In the C2 code, we move the non-hot part of nmethod entry barriers out-of-line to a stub.
1806 bs->nmethod_entry_barrier(&_masm, slow_path, continuation, guard);
1807 }
1808 }
1809
1810 if (VerifyStackAtCalls) {
1811 Unimplemented();
1812 }
1813
1814 C->output()->set_frame_complete(cbuf.insts_size());
1815
1816 if (C->has_mach_constant_base_node()) {
1817 // NOTE: We set the table base offset here because users might be
1818 // emitted before MachConstantBaseNode.
1819 ConstantTable& constant_table = C->output()->constant_table();
1820 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1821 }
1822 }
1823
1824 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1825 {
1826 return MachNode::size(ra_); // too many variables; just compute it
1827 // the hard way
1828 }
1829
1830 int MachPrologNode::reloc() const
1831 {
1832 return 0;
1833 }
1834
1835 //=============================================================================
1836
1837 #ifndef PRODUCT
1838 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1839 Compile* C = ra_->C;
1840 int framesize = C->output()->frame_slots() << LogBytesPerInt;
1841
1842 st->print("# pop frame %d\n\t",framesize);
1843
1844 if (framesize == 0) {
1845 st->print("ldp lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1846 } else if (framesize < ((1 << 9) + 2 * wordSize)) {
1847 st->print("ldp lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
1848 st->print("add sp, sp, #%d\n\t", framesize);
1849 } else {
1853 }
1854 if (VM_Version::use_rop_protection()) {
1855 st->print("autia lr, rfp\n\t");
1856 st->print("ldr zr, [lr]\n\t");
1857 }
1858
1859 if (do_polling() && C->is_method_compilation()) {
1860 st->print("# test polling word\n\t");
1861 st->print("ldr rscratch1, [rthread],#%d\n\t", in_bytes(JavaThread::polling_word_offset()));
1862 st->print("cmp sp, rscratch1\n\t");
1863 st->print("bhi #slow_path");
1864 }
1865 }
1866 #endif
1867
1868 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1869 Compile* C = ra_->C;
1870 C2_MacroAssembler _masm(&cbuf);
1871 int framesize = C->output()->frame_slots() << LogBytesPerInt;
1872
1873 __ remove_frame(framesize);
1874
1875 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1876 __ reserved_stack_check();
1877 }
1878
1879 if (do_polling() && C->is_method_compilation()) {
1880 Label dummy_label;
1881 Label* code_stub = &dummy_label;
1882 if (!C->output()->in_scratch_emit_size()) {
1883 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1884 C->output()->add_stub(stub);
1885 code_stub = &stub->entry();
1886 }
1887 __ relocate(relocInfo::poll_return_type);
1888 __ safepoint_poll(*code_stub, true /* at_return */, false /* acquire */, true /* in_nmethod */);
1889 }
1890 }
1891
1892 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
1893 // Variable size. Determine dynamically.
1894 return MachNode::size(ra_);
1895 }
1896
1897 int MachEpilogNode::reloc() const {
1898 // Return number of relocatable values contained in this instruction.
1899 return 1; // 1 for polling page.
1900 }
1901
1902 const Pipeline * MachEpilogNode::pipeline() const {
1903 return MachNode::pipeline_class();
1904 }
1905
1906 //=============================================================================
1907
1908 // Figure out which register class each belongs in: rc_int, rc_float or
1909 // rc_stack.
1910 enum RC { rc_bad, rc_int, rc_float, rc_predicate, rc_stack };
1911
1912 static enum RC rc_class(OptoReg::Name reg) {
1913
1914 if (reg == OptoReg::Bad) {
1915 return rc_bad;
1916 }
2182
2183 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2184 int reg = ra_->get_encode(this);
2185
2186 // This add will handle any 24-bit signed offset. 24 bits allows an
2187 // 8 megabyte stack frame.
2188 __ add(as_Register(reg), sp, offset);
2189 }
2190
2191 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
2192 // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
2193 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2194
2195 if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
2196 return NativeInstruction::instruction_size;
2197 } else {
2198 return 2 * NativeInstruction::instruction_size;
2199 }
2200 }
2201
2202 //=============================================================================
2203
2204 #ifndef PRODUCT
2205 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2206 {
2207 st->print_cr("# MachUEPNode");
2208 if (UseCompressedClassPointers) {
2209 st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2210 if (CompressedKlassPointers::shift() != 0) {
2211 st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
2212 }
2213 } else {
2214 st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2215 }
2216 st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
2217 st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
2218 }
2219 #endif
2220
2221 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
2222 {
2223 // This is the unverified entry point.
2224 C2_MacroAssembler _masm(&cbuf);
2225
2226 __ cmp_klass(j_rarg0, rscratch2, rscratch1);
2227 Label skip;
2228 // TODO
2229 // can we avoid this skip and still use a reloc?
2230 __ br(Assembler::EQ, skip);
2231 __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
2232 __ bind(skip);
2233 }
2234
2235 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
2236 {
2237 return MachNode::size(ra_);
2238 }
2239
2240 // REQUIRED EMIT CODE
2241
2242 //=============================================================================
2243
2244 // Emit exception handler code.
2245 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
2246 {
2247 // mov rscratch1 #exception_blob_entry_point
2248 // br rscratch1
2249 // Note that the code buffer's insts_mark is always relative to insts.
2250 // That's why we must use the macroassembler to generate a handler.
2251 C2_MacroAssembler _masm(&cbuf);
2252 address base = __ start_a_stub(size_exception_handler());
2253 if (base == NULL) {
2254 ciEnv::current()->record_failure("CodeCache is full");
2255 return 0; // CodeBuffer::expand failed
2256 }
2257 int offset = __ offset();
2258 __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
2259 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
3674 enc_class aarch64_enc_java_dynamic_call(method meth) %{
3675 C2_MacroAssembler _masm(&cbuf);
3676 int method_index = resolved_method_index(cbuf);
3677 address call = __ ic_call((address)$meth$$method, method_index);
3678 if (call == NULL) {
3679 ciEnv::current()->record_failure("CodeCache is full");
3680 return;
3681 }
3682 __ post_call_nop();
3683 if (Compile::current()->max_vector_size() > 0) {
3684 __ reinitialize_ptrue();
3685 }
3686 %}
3687
3688 enc_class aarch64_enc_call_epilog() %{
3689 C2_MacroAssembler _masm(&cbuf);
3690 if (VerifyStackAtCalls) {
3691 // Check that stack depth is unchanged: find majik cookie on stack
3692 __ call_Unimplemented();
3693 }
3694 %}
3695
3696 enc_class aarch64_enc_java_to_runtime(method meth) %{
3697 C2_MacroAssembler _masm(&cbuf);
3698
3699 // some calls to generated routines (arraycopy code) are scheduled
3700 // by C2 as runtime calls. if so we can call them using a br (they
3701 // will be in a reachable segment) otherwise we have to use a blr
3702 // which loads the absolute address into a register.
3703 address entry = (address)$meth$$method;
3704 CodeBlob *cb = CodeCache::find_blob(entry);
3705 if (cb) {
3706 address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
3707 if (call == NULL) {
3708 ciEnv::current()->record_failure("CodeCache is full");
3709 return;
3710 }
3711 __ post_call_nop();
3712 } else {
3713 Label retaddr;
3768
3769 assert_different_registers(oop, box, tmp, disp_hdr);
3770
3771 // Load markWord from object into displaced_header.
3772 __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
3773
3774 if (DiagnoseSyncOnValueBasedClasses != 0) {
3775 __ load_klass(tmp, oop);
3776 __ ldrw(tmp, Address(tmp, Klass::access_flags_offset()));
3777 __ tstw(tmp, JVM_ACC_IS_VALUE_BASED_CLASS);
3778 __ br(Assembler::NE, cont);
3779 }
3780
3781 // Check for existing monitor
3782 __ tbnz(disp_hdr, exact_log2(markWord::monitor_value), object_has_monitor);
3783
3784 if (!UseHeavyMonitors) {
3785 // Set tmp to be (markWord of object | UNLOCK_VALUE).
3786 __ orr(tmp, disp_hdr, markWord::unlocked_value);
3787
3788 // Initialize the box. (Must happen before we update the object mark!)
3789 __ str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3790
3791 // Compare object markWord with an unlocked value (tmp) and if
3792 // equal exchange the stack address of our box with object markWord.
3793 // On failure disp_hdr contains the possibly locked markWord.
3794 __ cmpxchg(oop, tmp, box, Assembler::xword, /*acquire*/ true,
3795 /*release*/ true, /*weak*/ false, disp_hdr);
3796 __ br(Assembler::EQ, cont);
3797
3798 assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
3799
3800 // If the compare-and-exchange succeeded, then we found an unlocked
3801 // object, will have now locked it will continue at label cont
3802
3803 // Check if the owner is self by comparing the value in the
3804 // markWord of object (disp_hdr) with the stack pointer.
3805 __ mov(rscratch1, sp);
3806 __ sub(disp_hdr, disp_hdr, rscratch1);
3807 __ mov(tmp, (address) (~(os::vm_page_size()-1) | markWord::lock_mask_in_place));
7241 instruct loadConL(iRegLNoSp dst, immL src)
7242 %{
7243 match(Set dst src);
7244
7245 ins_cost(INSN_COST);
7246 format %{ "mov $dst, $src\t# long" %}
7247
7248 ins_encode( aarch64_enc_mov_imm(dst, src) );
7249
7250 ins_pipe(ialu_imm);
7251 %}
7252
7253 // Load Pointer Constant
7254
7255 instruct loadConP(iRegPNoSp dst, immP con)
7256 %{
7257 match(Set dst con);
7258
7259 ins_cost(INSN_COST * 4);
7260 format %{
7261 "mov $dst, $con\t# ptr\n\t"
7262 %}
7263
7264 ins_encode(aarch64_enc_mov_p(dst, con));
7265
7266 ins_pipe(ialu_imm);
7267 %}
7268
7269 // Load Null Pointer Constant
7270
7271 instruct loadConP0(iRegPNoSp dst, immP0 con)
7272 %{
7273 match(Set dst con);
7274
7275 ins_cost(INSN_COST);
7276 format %{ "mov $dst, $con\t# NULL ptr" %}
7277
7278 ins_encode(aarch64_enc_mov_p0(dst, con));
7279
7280 ins_pipe(ialu_imm);
7281 %}
8444 %}
8445
8446 // ============================================================================
8447 // Cast/Convert Instructions
8448
8449 instruct castX2P(iRegPNoSp dst, iRegL src) %{
8450 match(Set dst (CastX2P src));
8451
8452 ins_cost(INSN_COST);
8453 format %{ "mov $dst, $src\t# long -> ptr" %}
8454
8455 ins_encode %{
8456 if ($dst$$reg != $src$$reg) {
8457 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8458 }
8459 %}
8460
8461 ins_pipe(ialu_reg);
8462 %}
8463
8464 instruct castP2X(iRegLNoSp dst, iRegP src) %{
8465 match(Set dst (CastP2X src));
8466
8467 ins_cost(INSN_COST);
8468 format %{ "mov $dst, $src\t# ptr -> long" %}
8469
8470 ins_encode %{
8471 if ($dst$$reg != $src$$reg) {
8472 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8473 }
8474 %}
8475
8476 ins_pipe(ialu_reg);
8477 %}
8478
8479 // Convert oop into int for vectors alignment masking
8480 instruct convP2I(iRegINoSp dst, iRegP src) %{
8481 match(Set dst (ConvL2I (CastP2X src)));
8482
8483 ins_cost(INSN_COST);
14998
14999 match(Set dst (MoveL2D src));
15000
15001 effect(DEF dst, USE src);
15002
15003 ins_cost(INSN_COST);
15004
15005 format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
15006
15007 ins_encode %{
15008 __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
15009 %}
15010
15011 ins_pipe(fp_l2d);
15012
15013 %}
15014
15015 // ============================================================================
15016 // clearing of an array
15017
15018 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
15019 %{
15020 match(Set dummy (ClearArray cnt base));
15021 effect(USE_KILL cnt, USE_KILL base, KILL cr);
15022
15023 ins_cost(4 * INSN_COST);
15024 format %{ "ClearArray $cnt, $base" %}
15025
15026 ins_encode %{
15027 address tpc = __ zero_words($base$$Register, $cnt$$Register);
15028 if (tpc == NULL) {
15029 ciEnv::current()->record_failure("CodeCache is full");
15030 return;
15031 }
15032 %}
15033
15034 ins_pipe(pipe_class_memory);
15035 %}
15036
15037 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 temp, Universe dummy, rFlagsReg cr)
15038 %{
15039 predicate((uint64_t)n->in(2)->get_long()
15040 < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
15041 match(Set dummy (ClearArray cnt base));
15042 effect(TEMP temp, USE_KILL base, KILL cr);
15043
15044 ins_cost(4 * INSN_COST);
15045 format %{ "ClearArray $cnt, $base" %}
15046
15047 ins_encode %{
15048 address tpc = __ zero_words($base$$Register, (uint64_t)$cnt$$constant);
15049 if (tpc == NULL) {
15050 ciEnv::current()->record_failure("CodeCache is full");
15051 return;
15052 }
15053 %}
15054
15055 ins_pipe(pipe_class_memory);
15056 %}
15057
15058 // ============================================================================
15059 // Overflow Math Instructions
15060
16354
16355 // Call Runtime Instruction
16356
16357 instruct CallLeafDirect(method meth)
16358 %{
16359 match(CallLeaf);
16360
16361 effect(USE meth);
16362
16363 ins_cost(CALL_COST);
16364
16365 format %{ "CALL, runtime leaf $meth" %}
16366
16367 ins_encode( aarch64_enc_java_to_runtime(meth) );
16368
16369 ins_pipe(pipe_class_call);
16370 %}
16371
16372 // Call Runtime Instruction
16373
16374 instruct CallLeafNoFPDirect(method meth)
16375 %{
16376 match(CallLeafNoFP);
16377
16378 effect(USE meth);
16379
16380 ins_cost(CALL_COST);
16381
16382 format %{ "CALL, runtime leaf nofp $meth" %}
16383
16384 ins_encode( aarch64_enc_java_to_runtime(meth) );
16385
16386 ins_pipe(pipe_class_call);
16387 %}
16388
16389 // Tail Call; Jump from runtime stub to Java code.
16390 // Also known as an 'interprocedural jump'.
16391 // Target of jump will eventually return to caller.
16392 // TailJump below removes the return address.
16393 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_ptr)
16394 %{
16395 match(TailCall jump_target method_ptr);
|
1628
1629 int MachCallDynamicJavaNode::ret_addr_offset()
1630 {
1631 return 16; // movz, movk, movk, bl
1632 }
1633
1634 int MachCallRuntimeNode::ret_addr_offset() {
1635 // for generated stubs the call will be
1636 // bl(addr)
1637 // or with far branches
1638 // bl(trampoline_stub)
1639 // for real runtime callouts it will be six instructions
1640 // see aarch64_enc_java_to_runtime
1641 // adr(rscratch2, retaddr)
1642 // lea(rscratch1, RuntimeAddress(addr)
1643 // stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
1644 // blr(rscratch1)
1645 CodeBlob *cb = CodeCache::find_blob(_entry_point);
1646 if (cb) {
1647 return 1 * NativeInstruction::instruction_size;
1648 } else if (_entry_point == NULL) {
1649 // See CallLeafNoFPIndirect
1650 return 1 * NativeInstruction::instruction_size;
1651 } else {
1652 return 6 * NativeInstruction::instruction_size;
1653 }
1654 }
1655
1656 //=============================================================================
1657
1658 #ifndef PRODUCT
1659 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1660 st->print("BREAKPOINT");
1661 }
1662 #endif
1663
1664 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1665 C2_MacroAssembler _masm(&cbuf);
1666 __ brk(0);
1667 }
1668
1669 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
1670 return MachNode::size(ra_);
1742 st->print("\n\t");
1743 st->print("ldr rscratch1, [guard]\n\t");
1744 st->print("dmb ishld\n\t");
1745 st->print("ldr rscratch2, [rthread, #thread_disarmed_guard_value_offset]\n\t");
1746 st->print("cmp rscratch1, rscratch2\n\t");
1747 st->print("b.eq skip");
1748 st->print("\n\t");
1749 st->print("blr #nmethod_entry_barrier_stub\n\t");
1750 st->print("b skip\n\t");
1751 st->print("guard: int\n\t");
1752 st->print("\n\t");
1753 st->print("skip:\n\t");
1754 }
1755 }
1756 #endif
1757
1758 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1759 Compile* C = ra_->C;
1760 C2_MacroAssembler _masm(&cbuf);
1761
1762 // insert a nop at the start of the prolog so we can patch in a
1763 // branch if we need to invalidate the method later
1764 __ nop();
1765
1766 __ verified_entry(C, 0);
1767
1768 if (C->stub_function() == NULL) {
1769 __ entry_barrier();
1770 }
1771
1772 if (!Compile::current()->output()->in_scratch_emit_size()) {
1773 __ bind(*_verified_entry);
1774 }
1775
1776 if (VerifyStackAtCalls) {
1777 Unimplemented();
1778 }
1779
1780 C->output()->set_frame_complete(cbuf.insts_size());
1781
1782 if (C->has_mach_constant_base_node()) {
1783 // NOTE: We set the table base offset here because users might be
1784 // emitted before MachConstantBaseNode.
1785 ConstantTable& constant_table = C->output()->constant_table();
1786 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1787 }
1788 }
1789
1790 int MachPrologNode::reloc() const
1791 {
1792 return 0;
1793 }
1794
1795 //=============================================================================
1796
1797 #ifndef PRODUCT
1798 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1799 Compile* C = ra_->C;
1800 int framesize = C->output()->frame_slots() << LogBytesPerInt;
1801
1802 st->print("# pop frame %d\n\t",framesize);
1803
1804 if (framesize == 0) {
1805 st->print("ldp lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1806 } else if (framesize < ((1 << 9) + 2 * wordSize)) {
1807 st->print("ldp lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
1808 st->print("add sp, sp, #%d\n\t", framesize);
1809 } else {
1813 }
1814 if (VM_Version::use_rop_protection()) {
1815 st->print("autia lr, rfp\n\t");
1816 st->print("ldr zr, [lr]\n\t");
1817 }
1818
1819 if (do_polling() && C->is_method_compilation()) {
1820 st->print("# test polling word\n\t");
1821 st->print("ldr rscratch1, [rthread],#%d\n\t", in_bytes(JavaThread::polling_word_offset()));
1822 st->print("cmp sp, rscratch1\n\t");
1823 st->print("bhi #slow_path");
1824 }
1825 }
1826 #endif
1827
1828 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1829 Compile* C = ra_->C;
1830 C2_MacroAssembler _masm(&cbuf);
1831 int framesize = C->output()->frame_slots() << LogBytesPerInt;
1832
1833 __ remove_frame(framesize, C->needs_stack_repair());
1834
1835 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1836 __ reserved_stack_check();
1837 }
1838
1839 if (do_polling() && C->is_method_compilation()) {
1840 Label dummy_label;
1841 Label* code_stub = &dummy_label;
1842 if (!C->output()->in_scratch_emit_size()) {
1843 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1844 C->output()->add_stub(stub);
1845 code_stub = &stub->entry();
1846 }
1847 __ relocate(relocInfo::poll_return_type);
1848 __ safepoint_poll(*code_stub, true /* at_return */, false /* acquire */, true /* in_nmethod */);
1849 }
1850 }
1851
1852 int MachEpilogNode::reloc() const {
1853 // Return number of relocatable values contained in this instruction.
1854 return 1; // 1 for polling page.
1855 }
1856
1857 const Pipeline * MachEpilogNode::pipeline() const {
1858 return MachNode::pipeline_class();
1859 }
1860
1861 //=============================================================================
1862
1863 // Figure out which register class each belongs in: rc_int, rc_float or
1864 // rc_stack.
1865 enum RC { rc_bad, rc_int, rc_float, rc_predicate, rc_stack };
1866
1867 static enum RC rc_class(OptoReg::Name reg) {
1868
1869 if (reg == OptoReg::Bad) {
1870 return rc_bad;
1871 }
2137
2138 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2139 int reg = ra_->get_encode(this);
2140
2141 // This add will handle any 24-bit signed offset. 24 bits allows an
2142 // 8 megabyte stack frame.
2143 __ add(as_Register(reg), sp, offset);
2144 }
2145
2146 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
2147 // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
2148 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2149
2150 if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
2151 return NativeInstruction::instruction_size;
2152 } else {
2153 return 2 * NativeInstruction::instruction_size;
2154 }
2155 }
2156
2157 ///=============================================================================
2158 #ifndef PRODUCT
2159 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2160 {
2161 st->print_cr("# MachVEPNode");
2162 if (!_verified) {
2163 st->print_cr("\t load_class");
2164 } else {
2165 st->print_cr("\t unpack_inline_arg");
2166 }
2167 }
2168 #endif
2169
2170 void MachVEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
2171 {
2172 C2_MacroAssembler _masm(&cbuf);
2173
2174 if (!_verified) {
2175 Label skip;
2176 __ cmp_klass(j_rarg0, rscratch2, rscratch1);
2177 __ br(Assembler::EQ, skip);
2178 __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
2179 __ bind(skip);
2180
2181 } else {
2182 // insert a nop at the start of the prolog so we can patch in a
2183 // branch if we need to invalidate the method later
2184 __ nop();
2185
2186 // TODO 8284443 Avoid creation of temporary frame
2187 if (ra_->C->stub_function() == NULL) {
2188 __ verified_entry(ra_->C, 0);
2189 __ entry_barrier();
2190 int framesize = ra_->C->output()->frame_slots() << LogBytesPerInt;
2191 __ remove_frame(framesize, false);
2192 }
2193 // Unpack inline type args passed as oop and then jump to
2194 // the verified entry point (skipping the unverified entry).
2195 int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
2196 // Emit code for verified entry and save increment for stack repair on return
2197 __ verified_entry(ra_->C, sp_inc);
2198 if (Compile::current()->output()->in_scratch_emit_size()) {
2199 Label dummy_verified_entry;
2200 __ b(dummy_verified_entry);
2201 } else {
2202 __ b(*_verified_entry);
2203 }
2204 }
2205 }
2206
2207 //=============================================================================
2208 #ifndef PRODUCT
2209 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2210 {
2211 st->print_cr("# MachUEPNode");
2212 if (UseCompressedClassPointers) {
2213 st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2214 if (CompressedKlassPointers::shift() != 0) {
2215 st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
2216 }
2217 } else {
2218 st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2219 }
2220 st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
2221 st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
2222 }
2223 #endif
2224
2225 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
2226 {
2227 // This is the unverified entry point.
2228 C2_MacroAssembler _masm(&cbuf);
2229 Label skip;
2230
2231 // UseCompressedClassPointers logic are inside cmp_klass
2232 __ cmp_klass(j_rarg0, rscratch2, rscratch1);
2233
2234 // TODO
2235 // can we avoid this skip and still use a reloc?
2236 __ br(Assembler::EQ, skip);
2237 __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
2238 __ bind(skip);
2239 }
2240
2241 // REQUIRED EMIT CODE
2242
2243 //=============================================================================
2244
2245 // Emit exception handler code.
2246 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
2247 {
2248 // mov rscratch1 #exception_blob_entry_point
2249 // br rscratch1
2250 // Note that the code buffer's insts_mark is always relative to insts.
2251 // That's why we must use the macroassembler to generate a handler.
2252 C2_MacroAssembler _masm(&cbuf);
2253 address base = __ start_a_stub(size_exception_handler());
2254 if (base == NULL) {
2255 ciEnv::current()->record_failure("CodeCache is full");
2256 return 0; // CodeBuffer::expand failed
2257 }
2258 int offset = __ offset();
2259 __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
2260 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
3675 enc_class aarch64_enc_java_dynamic_call(method meth) %{
3676 C2_MacroAssembler _masm(&cbuf);
3677 int method_index = resolved_method_index(cbuf);
3678 address call = __ ic_call((address)$meth$$method, method_index);
3679 if (call == NULL) {
3680 ciEnv::current()->record_failure("CodeCache is full");
3681 return;
3682 }
3683 __ post_call_nop();
3684 if (Compile::current()->max_vector_size() > 0) {
3685 __ reinitialize_ptrue();
3686 }
3687 %}
3688
3689 enc_class aarch64_enc_call_epilog() %{
3690 C2_MacroAssembler _masm(&cbuf);
3691 if (VerifyStackAtCalls) {
3692 // Check that stack depth is unchanged: find majik cookie on stack
3693 __ call_Unimplemented();
3694 }
3695 if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic()) {
3696 if (!_method->signature()->returns_null_free_inline_type()) {
3697 // The last return value is not set by the callee but used to pass IsInit information to compiled code.
3698 // Search for the corresponding projection, get the register and emit code that initialized it.
3699 uint con = (tf()->range_cc()->cnt() - 1);
3700 for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
3701 ProjNode* proj = fast_out(i)->as_Proj();
3702 if (proj->_con == con) {
3703 // Set IsInit if r0 is non-null (a non-null value is returned buffered or scalarized)
3704 OptoReg::Name optoReg = ra_->get_reg_first(proj);
3705 VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
3706 Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
3707 __ cmp(r0, zr);
3708 __ cset(toReg, Assembler::NE);
3709 if (reg->is_stack()) {
3710 int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
3711 __ str(toReg, Address(sp, st_off));
3712 }
3713 break;
3714 }
3715 }
3716 }
3717 if (return_value_is_used()) {
3718 // An inline type is returned as fields in multiple registers.
3719 // R0 either contains an oop if the inline type is buffered or a pointer
3720 // to the corresponding InlineKlass with the lowest bit set to 1. Zero r0
3721 // if the lowest bit is set to allow C2 to use the oop after null checking.
3722 // r0 &= (r0 & 1) - 1
3723 __ andr(rscratch1, r0, 0x1);
3724 __ sub(rscratch1, rscratch1, 0x1);
3725 __ andr(r0, r0, rscratch1);
3726 }
3727 }
3728 %}
3729
3730 enc_class aarch64_enc_java_to_runtime(method meth) %{
3731 C2_MacroAssembler _masm(&cbuf);
3732
3733 // some calls to generated routines (arraycopy code) are scheduled
3734 // by C2 as runtime calls. if so we can call them using a br (they
3735 // will be in a reachable segment) otherwise we have to use a blr
3736 // which loads the absolute address into a register.
3737 address entry = (address)$meth$$method;
3738 CodeBlob *cb = CodeCache::find_blob(entry);
3739 if (cb) {
3740 address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
3741 if (call == NULL) {
3742 ciEnv::current()->record_failure("CodeCache is full");
3743 return;
3744 }
3745 __ post_call_nop();
3746 } else {
3747 Label retaddr;
3802
3803 assert_different_registers(oop, box, tmp, disp_hdr);
3804
3805 // Load markWord from object into displaced_header.
3806 __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
3807
3808 if (DiagnoseSyncOnValueBasedClasses != 0) {
3809 __ load_klass(tmp, oop);
3810 __ ldrw(tmp, Address(tmp, Klass::access_flags_offset()));
3811 __ tstw(tmp, JVM_ACC_IS_VALUE_BASED_CLASS);
3812 __ br(Assembler::NE, cont);
3813 }
3814
3815 // Check for existing monitor
3816 __ tbnz(disp_hdr, exact_log2(markWord::monitor_value), object_has_monitor);
3817
3818 if (!UseHeavyMonitors) {
3819 // Set tmp to be (markWord of object | UNLOCK_VALUE).
3820 __ orr(tmp, disp_hdr, markWord::unlocked_value);
3821
3822 if (EnableValhalla) {
3823 // Mask inline_type bit such that we go to the slow path if object is an inline type
3824 __ andr(tmp, tmp, ~((int) markWord::inline_type_bit_in_place));
3825 }
3826
3827 // Initialize the box. (Must happen before we update the object mark!)
3828 __ str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3829
3830 // Compare object markWord with an unlocked value (tmp) and if
3831 // equal exchange the stack address of our box with object markWord.
3832 // On failure disp_hdr contains the possibly locked markWord.
3833 __ cmpxchg(oop, tmp, box, Assembler::xword, /*acquire*/ true,
3834 /*release*/ true, /*weak*/ false, disp_hdr);
3835 __ br(Assembler::EQ, cont);
3836
3837 assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
3838
3839 // If the compare-and-exchange succeeded, then we found an unlocked
3840 // object, will have now locked it will continue at label cont
3841
3842 // Check if the owner is self by comparing the value in the
3843 // markWord of object (disp_hdr) with the stack pointer.
3844 __ mov(rscratch1, sp);
3845 __ sub(disp_hdr, disp_hdr, rscratch1);
3846 __ mov(tmp, (address) (~(os::vm_page_size()-1) | markWord::lock_mask_in_place));
7280 instruct loadConL(iRegLNoSp dst, immL src)
7281 %{
7282 match(Set dst src);
7283
7284 ins_cost(INSN_COST);
7285 format %{ "mov $dst, $src\t# long" %}
7286
7287 ins_encode( aarch64_enc_mov_imm(dst, src) );
7288
7289 ins_pipe(ialu_imm);
7290 %}
7291
7292 // Load Pointer Constant
7293
7294 instruct loadConP(iRegPNoSp dst, immP con)
7295 %{
7296 match(Set dst con);
7297
7298 ins_cost(INSN_COST * 4);
7299 format %{
7300 "mov $dst, $con\t# ptr"
7301 %}
7302
7303 ins_encode(aarch64_enc_mov_p(dst, con));
7304
7305 ins_pipe(ialu_imm);
7306 %}
7307
7308 // Load Null Pointer Constant
7309
7310 instruct loadConP0(iRegPNoSp dst, immP0 con)
7311 %{
7312 match(Set dst con);
7313
7314 ins_cost(INSN_COST);
7315 format %{ "mov $dst, $con\t# NULL ptr" %}
7316
7317 ins_encode(aarch64_enc_mov_p0(dst, con));
7318
7319 ins_pipe(ialu_imm);
7320 %}
8483 %}
8484
8485 // ============================================================================
8486 // Cast/Convert Instructions
8487
8488 instruct castX2P(iRegPNoSp dst, iRegL src) %{
8489 match(Set dst (CastX2P src));
8490
8491 ins_cost(INSN_COST);
8492 format %{ "mov $dst, $src\t# long -> ptr" %}
8493
8494 ins_encode %{
8495 if ($dst$$reg != $src$$reg) {
8496 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8497 }
8498 %}
8499
8500 ins_pipe(ialu_reg);
8501 %}
8502
8503 instruct castN2X(iRegLNoSp dst, iRegN src) %{
8504 match(Set dst (CastP2X src));
8505
8506 ins_cost(INSN_COST);
8507 format %{ "mov $dst, $src\t# ptr -> long" %}
8508
8509 ins_encode %{
8510 if ($dst$$reg != $src$$reg) {
8511 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8512 }
8513 %}
8514
8515 ins_pipe(ialu_reg);
8516 %}
8517
8518 instruct castP2X(iRegLNoSp dst, iRegP src) %{
8519 match(Set dst (CastP2X src));
8520
8521 ins_cost(INSN_COST);
8522 format %{ "mov $dst, $src\t# ptr -> long" %}
8523
8524 ins_encode %{
8525 if ($dst$$reg != $src$$reg) {
8526 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8527 }
8528 %}
8529
8530 ins_pipe(ialu_reg);
8531 %}
8532
8533 // Convert oop into int for vectors alignment masking
8534 instruct convP2I(iRegINoSp dst, iRegP src) %{
8535 match(Set dst (ConvL2I (CastP2X src)));
8536
8537 ins_cost(INSN_COST);
15052
15053 match(Set dst (MoveL2D src));
15054
15055 effect(DEF dst, USE src);
15056
15057 ins_cost(INSN_COST);
15058
15059 format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
15060
15061 ins_encode %{
15062 __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
15063 %}
15064
15065 ins_pipe(fp_l2d);
15066
15067 %}
15068
15069 // ============================================================================
15070 // clearing of an array
15071
15072 instruct clearArray_reg_reg_immL0(iRegL_R11 cnt, iRegP_R10 base, immL0 zero, Universe dummy, rFlagsReg cr)
15073 %{
15074 match(Set dummy (ClearArray (Binary cnt base) zero));
15075 effect(USE_KILL cnt, USE_KILL base, KILL cr);
15076
15077 ins_cost(4 * INSN_COST);
15078 format %{ "ClearArray $cnt, $base" %}
15079
15080 ins_encode %{
15081 address tpc = __ zero_words($base$$Register, $cnt$$Register);
15082 if (tpc == NULL) {
15083 ciEnv::current()->record_failure("CodeCache is full");
15084 return;
15085 }
15086 %}
15087
15088 ins_pipe(pipe_class_memory);
15089 %}
15090
15091 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, iRegL val, Universe dummy, rFlagsReg cr)
15092 %{
15093 predicate(((ClearArrayNode*)n)->word_copy_only());
15094 match(Set dummy (ClearArray (Binary cnt base) val));
15095 effect(USE_KILL cnt, USE_KILL base, KILL cr);
15096
15097 ins_cost(4 * INSN_COST);
15098 format %{ "ClearArray $cnt, $base, $val" %}
15099
15100 ins_encode %{
15101 __ fill_words($base$$Register, $cnt$$Register, $val$$Register);
15102 %}
15103
15104 ins_pipe(pipe_class_memory);
15105 %}
15106
15107 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 temp, Universe dummy, rFlagsReg cr)
15108 %{
15109 predicate((uint64_t)n->in(2)->get_long()
15110 < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord)
15111 && !((ClearArrayNode*)n)->word_copy_only());
15112 match(Set dummy (ClearArray cnt base));
15113 effect(TEMP temp, USE_KILL base, KILL cr);
15114
15115 ins_cost(4 * INSN_COST);
15116 format %{ "ClearArray $cnt, $base" %}
15117
15118 ins_encode %{
15119 address tpc = __ zero_words($base$$Register, (uint64_t)$cnt$$constant);
15120 if (tpc == NULL) {
15121 ciEnv::current()->record_failure("CodeCache is full");
15122 return;
15123 }
15124 %}
15125
15126 ins_pipe(pipe_class_memory);
15127 %}
15128
15129 // ============================================================================
15130 // Overflow Math Instructions
15131
16425
16426 // Call Runtime Instruction
16427
16428 instruct CallLeafDirect(method meth)
16429 %{
16430 match(CallLeaf);
16431
16432 effect(USE meth);
16433
16434 ins_cost(CALL_COST);
16435
16436 format %{ "CALL, runtime leaf $meth" %}
16437
16438 ins_encode( aarch64_enc_java_to_runtime(meth) );
16439
16440 ins_pipe(pipe_class_call);
16441 %}
16442
16443 // Call Runtime Instruction
16444
16445 // entry point is null, target holds the address to call
16446 instruct CallLeafNoFPIndirect(iRegP target)
16447 %{
16448 predicate(n->as_Call()->entry_point() == NULL);
16449
16450 match(CallLeafNoFP target);
16451
16452 ins_cost(CALL_COST);
16453
16454 format %{ "CALL, runtime leaf nofp indirect $target" %}
16455
16456 ins_encode %{
16457 __ blr($target$$Register);
16458 %}
16459
16460 ins_pipe(pipe_class_call);
16461 %}
16462
16463 instruct CallLeafNoFPDirect(method meth)
16464 %{
16465 predicate(n->as_Call()->entry_point() != NULL);
16466
16467 match(CallLeafNoFP);
16468
16469 effect(USE meth);
16470
16471 ins_cost(CALL_COST);
16472
16473 format %{ "CALL, runtime leaf nofp $meth" %}
16474
16475 ins_encode( aarch64_enc_java_to_runtime(meth) );
16476
16477 ins_pipe(pipe_class_call);
16478 %}
16479
16480 // Tail Call; Jump from runtime stub to Java code.
16481 // Also known as an 'interprocedural jump'.
16482 // Target of jump will eventually return to caller.
16483 // TailJump below removes the return address.
16484 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_ptr)
16485 %{
16486 match(TailCall jump_target method_ptr);
|