1759
1760 int MachCallDynamicJavaNode::ret_addr_offset()
1761 {
1762 return 16; // movz, movk, movk, bl
1763 }
1764
1765 int MachCallRuntimeNode::ret_addr_offset() {
1766 // for generated stubs the call will be
1767 // bl(addr)
1768 // or with far branches
1769 // bl(trampoline_stub)
1770 // for real runtime callouts it will be six instructions
1771 // see aarch64_enc_java_to_runtime
1772 // adr(rscratch2, retaddr)
1773 // lea(rscratch1, RuntimeAddress(addr)
1774 // stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
1775 // blr(rscratch1)
1776 CodeBlob *cb = CodeCache::find_blob(_entry_point);
1777 if (cb) {
1778 return 1 * NativeInstruction::instruction_size;
1779 } else {
1780 return 6 * NativeInstruction::instruction_size;
1781 }
1782 }
1783
1784 int MachCallNativeNode::ret_addr_offset() {
1785 // This is implemented using aarch64_enc_java_to_runtime as above.
1786 CodeBlob *cb = CodeCache::find_blob(_entry_point);
1787 if (cb) {
1788 return 1 * NativeInstruction::instruction_size;
1789 } else {
1790 return 6 * NativeInstruction::instruction_size;
1791 }
1792 }
1793
1794 //=============================================================================
1795
1796 #ifndef PRODUCT
1797 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1798 st->print("BREAKPOINT");
1880 st->print("\n\t");
1881 st->print("ldr rscratch1, [guard]\n\t");
1882 st->print("dmb ishld\n\t");
1883 st->print("ldr rscratch2, [rthread, #thread_disarmed_offset]\n\t");
1884 st->print("cmp rscratch1, rscratch2\n\t");
1885 st->print("b.eq skip");
1886 st->print("\n\t");
1887 st->print("blr #nmethod_entry_barrier_stub\n\t");
1888 st->print("b skip\n\t");
1889 st->print("guard: int\n\t");
1890 st->print("\n\t");
1891 st->print("skip:\n\t");
1892 }
1893 }
1894 #endif
1895
1896 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1897 Compile* C = ra_->C;
1898 C2_MacroAssembler _masm(&cbuf);
1899
1900 // n.b. frame size includes space for return pc and rfp
1901 const int framesize = C->output()->frame_size_in_bytes();
1902
1903 // insert a nop at the start of the prolog so we can patch in a
1904 // branch if we need to invalidate the method later
1905 __ nop();
1906
1907 if (C->clinit_barrier_on_entry()) {
1908 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
1909
1910 Label L_skip_barrier;
1911
1912 __ mov_metadata(rscratch2, C->method()->holder()->constant_encoding());
1913 __ clinit_barrier(rscratch2, rscratch1, &L_skip_barrier);
1914 __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
1915 __ bind(L_skip_barrier);
1916 }
1917
1918 if (C->max_vector_size() > 0) {
1919 __ reinitialize_ptrue();
1920 }
1921
1922 int bangsize = C->output()->bang_size_in_bytes();
1923 if (C->output()->need_stack_bang(bangsize))
1924 __ generate_stack_overflow_check(bangsize);
1925
1926 __ build_frame(framesize);
1927
1928 if (C->stub_function() == NULL) {
1929 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
1930 bs->nmethod_entry_barrier(&_masm);
1931 }
1932
1933 if (VerifyStackAtCalls) {
1934 Unimplemented();
1935 }
1936
1937 C->output()->set_frame_complete(cbuf.insts_size());
1938
1939 if (C->has_mach_constant_base_node()) {
1940 // NOTE: We set the table base offset here because users might be
1941 // emitted before MachConstantBaseNode.
1942 ConstantTable& constant_table = C->output()->constant_table();
1943 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1944 }
1945 }
1946
1947 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1948 {
1949 return MachNode::size(ra_); // too many variables; just compute it
1950 // the hard way
1951 }
1952
1953 int MachPrologNode::reloc() const
1954 {
1955 return 0;
1956 }
1957
1958 //=============================================================================
1959
1960 #ifndef PRODUCT
1961 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1962 Compile* C = ra_->C;
1963 int framesize = C->output()->frame_slots() << LogBytesPerInt;
1964
1965 st->print("# pop frame %d\n\t",framesize);
1966
1967 if (framesize == 0) {
1968 st->print("ldp lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1969 } else if (framesize < ((1 << 9) + 2 * wordSize)) {
1970 st->print("ldp lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
1971 st->print("add sp, sp, #%d\n\t", framesize);
1972 } else {
1976 }
1977 if (VM_Version::use_rop_protection()) {
1978 st->print("autia lr, rfp\n\t");
1979 st->print("ldr zr, [lr]\n\t");
1980 }
1981
1982 if (do_polling() && C->is_method_compilation()) {
1983 st->print("# test polling word\n\t");
1984 st->print("ldr rscratch1, [rthread],#%d\n\t", in_bytes(JavaThread::polling_word_offset()));
1985 st->print("cmp sp, rscratch1\n\t");
1986 st->print("bhi #slow_path");
1987 }
1988 }
1989 #endif
1990
1991 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1992 Compile* C = ra_->C;
1993 C2_MacroAssembler _masm(&cbuf);
1994 int framesize = C->output()->frame_slots() << LogBytesPerInt;
1995
1996 __ remove_frame(framesize);
1997
1998 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1999 __ reserved_stack_check();
2000 }
2001
2002 if (do_polling() && C->is_method_compilation()) {
2003 Label dummy_label;
2004 Label* code_stub = &dummy_label;
2005 if (!C->output()->in_scratch_emit_size()) {
2006 code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset());
2007 }
2008 __ relocate(relocInfo::poll_return_type);
2009 __ safepoint_poll(*code_stub, true /* at_return */, false /* acquire */, true /* in_nmethod */);
2010 }
2011 }
2012
2013 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
2014 // Variable size. Determine dynamically.
2015 return MachNode::size(ra_);
2016 }
2017
2018 int MachEpilogNode::reloc() const {
2019 // Return number of relocatable values contained in this instruction.
2020 return 1; // 1 for polling page.
2021 }
2022
2023 const Pipeline * MachEpilogNode::pipeline() const {
2024 return MachNode::pipeline_class();
2025 }
2026
2027 //=============================================================================
2028
2029 // Figure out which register class each belongs in: rc_int, rc_float or
2030 // rc_stack.
2031 enum RC { rc_bad, rc_int, rc_float, rc_predicate, rc_stack };
2032
2033 static enum RC rc_class(OptoReg::Name reg) {
2034
2035 if (reg == OptoReg::Bad) {
2036 return rc_bad;
2037 }
2303
2304 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2305 int reg = ra_->get_encode(this);
2306
2307 // This add will handle any 24-bit signed offset. 24 bits allows an
2308 // 8 megabyte stack frame.
2309 __ add(as_Register(reg), sp, offset);
2310 }
2311
2312 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
2313 // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
2314 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2315
2316 if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
2317 return NativeInstruction::instruction_size;
2318 } else {
2319 return 2 * NativeInstruction::instruction_size;
2320 }
2321 }
2322
2323 //=============================================================================
2324
2325 #ifndef PRODUCT
2326 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2327 {
2328 st->print_cr("# MachUEPNode");
2329 if (UseCompressedClassPointers) {
2330 st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2331 if (CompressedKlassPointers::shift() != 0) {
2332 st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
2333 }
2334 } else {
2335 st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2336 }
2337 st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
2338 st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
2339 }
2340 #endif
2341
2342 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
2343 {
2344 // This is the unverified entry point.
2345 C2_MacroAssembler _masm(&cbuf);
2346
2347 __ cmp_klass(j_rarg0, rscratch2, rscratch1);
2348 Label skip;
2349 // TODO
2350 // can we avoid this skip and still use a reloc?
2351 __ br(Assembler::EQ, skip);
2352 __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
2353 __ bind(skip);
2354 }
2355
2356 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
2357 {
2358 return MachNode::size(ra_);
2359 }
2360
2361 // REQUIRED EMIT CODE
2362
2363 //=============================================================================
2364
2365 // Emit exception handler code.
2366 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
2367 {
2368 // mov rscratch1 #exception_blob_entry_point
2369 // br rscratch1
2370 // Note that the code buffer's insts_mark is always relative to insts.
2371 // That's why we must use the macroassembler to generate a handler.
2372 C2_MacroAssembler _masm(&cbuf);
2373 address base = __ start_a_stub(size_exception_handler());
2374 if (base == NULL) {
2375 ciEnv::current()->record_failure("CodeCache is full");
2376 return 0; // CodeBuffer::expand failed
2377 }
2378 int offset = __ offset();
2379 __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
2380 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
3831 %}
3832
3833 enc_class aarch64_enc_java_dynamic_call(method meth) %{
3834 C2_MacroAssembler _masm(&cbuf);
3835 int method_index = resolved_method_index(cbuf);
3836 address call = __ ic_call((address)$meth$$method, method_index);
3837 if (call == NULL) {
3838 ciEnv::current()->record_failure("CodeCache is full");
3839 return;
3840 } else if (Compile::current()->max_vector_size() > 0) {
3841 __ reinitialize_ptrue();
3842 }
3843 %}
3844
3845 enc_class aarch64_enc_call_epilog() %{
3846 C2_MacroAssembler _masm(&cbuf);
3847 if (VerifyStackAtCalls) {
3848 // Check that stack depth is unchanged: find majik cookie on stack
3849 __ call_Unimplemented();
3850 }
3851 %}
3852
3853 enc_class aarch64_enc_java_to_runtime(method meth) %{
3854 C2_MacroAssembler _masm(&cbuf);
3855
3856 // some calls to generated routines (arraycopy code) are scheduled
3857 // by C2 as runtime calls. if so we can call them using a br (they
3858 // will be in a reachable segment) otherwise we have to use a blr
3859 // which loads the absolute address into a register.
3860 address entry = (address)$meth$$method;
3861 CodeBlob *cb = CodeCache::find_blob(entry);
3862 if (cb) {
3863 address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
3864 if (call == NULL) {
3865 ciEnv::current()->record_failure("CodeCache is full");
3866 return;
3867 }
3868 } else {
3869 Label retaddr;
3870 __ adr(rscratch2, retaddr);
3923
3924 assert_different_registers(oop, box, tmp, disp_hdr);
3925
3926 // Load markWord from object into displaced_header.
3927 __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
3928
3929 if (DiagnoseSyncOnValueBasedClasses != 0) {
3930 __ load_klass(tmp, oop);
3931 __ ldrw(tmp, Address(tmp, Klass::access_flags_offset()));
3932 __ tstw(tmp, JVM_ACC_IS_VALUE_BASED_CLASS);
3933 __ br(Assembler::NE, cont);
3934 }
3935
3936 // Check for existing monitor
3937 __ tbnz(disp_hdr, exact_log2(markWord::monitor_value), object_has_monitor);
3938
3939 if (!UseHeavyMonitors) {
3940 // Set tmp to be (markWord of object | UNLOCK_VALUE).
3941 __ orr(tmp, disp_hdr, markWord::unlocked_value);
3942
3943 // Initialize the box. (Must happen before we update the object mark!)
3944 __ str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3945
3946 // Compare object markWord with an unlocked value (tmp) and if
3947 // equal exchange the stack address of our box with object markWord.
3948 // On failure disp_hdr contains the possibly locked markWord.
3949 __ cmpxchg(oop, tmp, box, Assembler::xword, /*acquire*/ true,
3950 /*release*/ true, /*weak*/ false, disp_hdr);
3951 __ br(Assembler::EQ, cont);
3952
3953 assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
3954
3955 // If the compare-and-exchange succeeded, then we found an unlocked
3956 // object, will have now locked it will continue at label cont
3957
3958 __ bind(cas_failed);
3959 // We did not see an unlocked object so try the fast recursive case.
3960
3961 // Check if the owner is self by comparing the value in the
3962 // markWord of object (disp_hdr) with the stack pointer.
7671 instruct loadConL(iRegLNoSp dst, immL src)
7672 %{
7673 match(Set dst src);
7674
7675 ins_cost(INSN_COST);
7676 format %{ "mov $dst, $src\t# long" %}
7677
7678 ins_encode( aarch64_enc_mov_imm(dst, src) );
7679
7680 ins_pipe(ialu_imm);
7681 %}
7682
7683 // Load Pointer Constant
7684
7685 instruct loadConP(iRegPNoSp dst, immP con)
7686 %{
7687 match(Set dst con);
7688
7689 ins_cost(INSN_COST * 4);
7690 format %{
7691 "mov $dst, $con\t# ptr\n\t"
7692 %}
7693
7694 ins_encode(aarch64_enc_mov_p(dst, con));
7695
7696 ins_pipe(ialu_imm);
7697 %}
7698
7699 // Load Null Pointer Constant
7700
7701 instruct loadConP0(iRegPNoSp dst, immP0 con)
7702 %{
7703 match(Set dst con);
7704
7705 ins_cost(INSN_COST);
7706 format %{ "mov $dst, $con\t# NULL ptr" %}
7707
7708 ins_encode(aarch64_enc_mov_p0(dst, con));
7709
7710 ins_pipe(ialu_imm);
7711 %}
8876 %}
8877
8878 // ============================================================================
8879 // Cast/Convert Instructions
8880
8881 instruct castX2P(iRegPNoSp dst, iRegL src) %{
8882 match(Set dst (CastX2P src));
8883
8884 ins_cost(INSN_COST);
8885 format %{ "mov $dst, $src\t# long -> ptr" %}
8886
8887 ins_encode %{
8888 if ($dst$$reg != $src$$reg) {
8889 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8890 }
8891 %}
8892
8893 ins_pipe(ialu_reg);
8894 %}
8895
8896 instruct castP2X(iRegLNoSp dst, iRegP src) %{
8897 match(Set dst (CastP2X src));
8898
8899 ins_cost(INSN_COST);
8900 format %{ "mov $dst, $src\t# ptr -> long" %}
8901
8902 ins_encode %{
8903 if ($dst$$reg != $src$$reg) {
8904 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8905 }
8906 %}
8907
8908 ins_pipe(ialu_reg);
8909 %}
8910
8911 // Convert oop into int for vectors alignment masking
8912 instruct convP2I(iRegINoSp dst, iRegP src) %{
8913 match(Set dst (ConvL2I (CastP2X src)));
8914
8915 ins_cost(INSN_COST);
15239
15240 match(Set dst (MoveL2D src));
15241
15242 effect(DEF dst, USE src);
15243
15244 ins_cost(INSN_COST);
15245
15246 format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
15247
15248 ins_encode %{
15249 __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
15250 %}
15251
15252 ins_pipe(fp_l2d);
15253
15254 %}
15255
15256 // ============================================================================
15257 // clearing of an array
15258
15259 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
15260 %{
15261 match(Set dummy (ClearArray cnt base));
15262 effect(USE_KILL cnt, USE_KILL base, KILL cr);
15263
15264 ins_cost(4 * INSN_COST);
15265 format %{ "ClearArray $cnt, $base" %}
15266
15267 ins_encode %{
15268 address tpc = __ zero_words($base$$Register, $cnt$$Register);
15269 if (tpc == NULL) {
15270 ciEnv::current()->record_failure("CodeCache is full");
15271 return;
15272 }
15273 %}
15274
15275 ins_pipe(pipe_class_memory);
15276 %}
15277
15278 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 temp, Universe dummy, rFlagsReg cr)
15279 %{
15280 predicate((uint64_t)n->in(2)->get_long()
15281 < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
15282 match(Set dummy (ClearArray cnt base));
15283 effect(TEMP temp, USE_KILL base, KILL cr);
15284
15285 ins_cost(4 * INSN_COST);
15286 format %{ "ClearArray $cnt, $base" %}
15287
15288 ins_encode %{
15289 __ zero_words($base$$Register, (uint64_t)$cnt$$constant);
15290 %}
15291
15292 ins_pipe(pipe_class_memory);
15293 %}
15294
15295 // ============================================================================
15296 // Overflow Math Instructions
15297
15298 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
15299 %{
15300 match(Set cr (OverflowAddI op1 op2));
15301
16591
16592 // Call Runtime Instruction
16593
16594 instruct CallLeafDirect(method meth)
16595 %{
16596 match(CallLeaf);
16597
16598 effect(USE meth);
16599
16600 ins_cost(CALL_COST);
16601
16602 format %{ "CALL, runtime leaf $meth" %}
16603
16604 ins_encode( aarch64_enc_java_to_runtime(meth) );
16605
16606 ins_pipe(pipe_class_call);
16607 %}
16608
16609 // Call Runtime Instruction
16610
16611 instruct CallLeafNoFPDirect(method meth)
16612 %{
16613 match(CallLeafNoFP);
16614
16615 effect(USE meth);
16616
16617 ins_cost(CALL_COST);
16618
16619 format %{ "CALL, runtime leaf nofp $meth" %}
16620
16621 ins_encode( aarch64_enc_java_to_runtime(meth) );
16622
16623 ins_pipe(pipe_class_call);
16624 %}
16625
16626 instruct CallNativeDirect(method meth)
16627 %{
16628 match(CallNative);
16629
16630 effect(USE meth);
16631
16632 ins_cost(CALL_COST);
|
1759
1760 int MachCallDynamicJavaNode::ret_addr_offset()
1761 {
1762 return 16; // movz, movk, movk, bl
1763 }
1764
1765 int MachCallRuntimeNode::ret_addr_offset() {
1766 // for generated stubs the call will be
1767 // bl(addr)
1768 // or with far branches
1769 // bl(trampoline_stub)
1770 // for real runtime callouts it will be six instructions
1771 // see aarch64_enc_java_to_runtime
1772 // adr(rscratch2, retaddr)
1773 // lea(rscratch1, RuntimeAddress(addr)
1774 // stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
1775 // blr(rscratch1)
1776 CodeBlob *cb = CodeCache::find_blob(_entry_point);
1777 if (cb) {
1778 return 1 * NativeInstruction::instruction_size;
1779 } else if (_entry_point == NULL) {
1780 // See CallLeafNoFPIndirect
1781 return 1 * NativeInstruction::instruction_size;
1782 } else {
1783 return 6 * NativeInstruction::instruction_size;
1784 }
1785 }
1786
1787 int MachCallNativeNode::ret_addr_offset() {
1788 // This is implemented using aarch64_enc_java_to_runtime as above.
1789 CodeBlob *cb = CodeCache::find_blob(_entry_point);
1790 if (cb) {
1791 return 1 * NativeInstruction::instruction_size;
1792 } else {
1793 return 6 * NativeInstruction::instruction_size;
1794 }
1795 }
1796
1797 //=============================================================================
1798
1799 #ifndef PRODUCT
1800 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1801 st->print("BREAKPOINT");
1883 st->print("\n\t");
1884 st->print("ldr rscratch1, [guard]\n\t");
1885 st->print("dmb ishld\n\t");
1886 st->print("ldr rscratch2, [rthread, #thread_disarmed_offset]\n\t");
1887 st->print("cmp rscratch1, rscratch2\n\t");
1888 st->print("b.eq skip");
1889 st->print("\n\t");
1890 st->print("blr #nmethod_entry_barrier_stub\n\t");
1891 st->print("b skip\n\t");
1892 st->print("guard: int\n\t");
1893 st->print("\n\t");
1894 st->print("skip:\n\t");
1895 }
1896 }
1897 #endif
1898
1899 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1900 Compile* C = ra_->C;
1901 C2_MacroAssembler _masm(&cbuf);
1902
1903 // insert a nop at the start of the prolog so we can patch in a
1904 // branch if we need to invalidate the method later
1905 __ nop();
1906
1907 if (C->clinit_barrier_on_entry()) {
1908 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
1909
1910 Label L_skip_barrier;
1911
1912 __ mov_metadata(rscratch2, C->method()->holder()->constant_encoding());
1913 __ clinit_barrier(rscratch2, rscratch1, &L_skip_barrier);
1914 __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
1915 __ bind(L_skip_barrier);
1916 }
1917
1918 if (C->max_vector_size() > 0) {
1919 __ reinitialize_ptrue();
1920 }
1921
1922 __ verified_entry(C, 0);
1923 __ bind(*_verified_entry);
1924
1925 if (C->stub_function() == NULL) {
1926 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
1927 bs->nmethod_entry_barrier(&_masm);
1928 }
1929
1930 if (VerifyStackAtCalls) {
1931 Unimplemented();
1932 }
1933
1934 C->output()->set_frame_complete(cbuf.insts_size());
1935
1936 if (C->has_mach_constant_base_node()) {
1937 // NOTE: We set the table base offset here because users might be
1938 // emitted before MachConstantBaseNode.
1939 ConstantTable& constant_table = C->output()->constant_table();
1940 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1941 }
1942 }
1943
1944 int MachPrologNode::reloc() const
1945 {
1946 return 0;
1947 }
1948
1949 //=============================================================================
1950
1951 #ifndef PRODUCT
1952 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1953 Compile* C = ra_->C;
1954 int framesize = C->output()->frame_slots() << LogBytesPerInt;
1955
1956 st->print("# pop frame %d\n\t",framesize);
1957
1958 if (framesize == 0) {
1959 st->print("ldp lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1960 } else if (framesize < ((1 << 9) + 2 * wordSize)) {
1961 st->print("ldp lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
1962 st->print("add sp, sp, #%d\n\t", framesize);
1963 } else {
1967 }
1968 if (VM_Version::use_rop_protection()) {
1969 st->print("autia lr, rfp\n\t");
1970 st->print("ldr zr, [lr]\n\t");
1971 }
1972
1973 if (do_polling() && C->is_method_compilation()) {
1974 st->print("# test polling word\n\t");
1975 st->print("ldr rscratch1, [rthread],#%d\n\t", in_bytes(JavaThread::polling_word_offset()));
1976 st->print("cmp sp, rscratch1\n\t");
1977 st->print("bhi #slow_path");
1978 }
1979 }
1980 #endif
1981
1982 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1983 Compile* C = ra_->C;
1984 C2_MacroAssembler _masm(&cbuf);
1985 int framesize = C->output()->frame_slots() << LogBytesPerInt;
1986
1987 __ remove_frame(framesize, C->needs_stack_repair());
1988
1989 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1990 __ reserved_stack_check();
1991 }
1992
1993 if (do_polling() && C->is_method_compilation()) {
1994 Label dummy_label;
1995 Label* code_stub = &dummy_label;
1996 if (!C->output()->in_scratch_emit_size()) {
1997 code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset());
1998 }
1999 __ relocate(relocInfo::poll_return_type);
2000 __ safepoint_poll(*code_stub, true /* at_return */, false /* acquire */, true /* in_nmethod */);
2001 }
2002 }
2003
2004 int MachEpilogNode::reloc() const {
2005 // Return number of relocatable values contained in this instruction.
2006 return 1; // 1 for polling page.
2007 }
2008
2009 const Pipeline * MachEpilogNode::pipeline() const {
2010 return MachNode::pipeline_class();
2011 }
2012
2013 //=============================================================================
2014
2015 // Figure out which register class each belongs in: rc_int, rc_float or
2016 // rc_stack.
2017 enum RC { rc_bad, rc_int, rc_float, rc_predicate, rc_stack };
2018
2019 static enum RC rc_class(OptoReg::Name reg) {
2020
2021 if (reg == OptoReg::Bad) {
2022 return rc_bad;
2023 }
2289
2290 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2291 int reg = ra_->get_encode(this);
2292
2293 // This add will handle any 24-bit signed offset. 24 bits allows an
2294 // 8 megabyte stack frame.
2295 __ add(as_Register(reg), sp, offset);
2296 }
2297
2298 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
2299 // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
2300 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2301
2302 if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
2303 return NativeInstruction::instruction_size;
2304 } else {
2305 return 2 * NativeInstruction::instruction_size;
2306 }
2307 }
2308
2309 ///=============================================================================
2310 #ifndef PRODUCT
2311 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2312 {
2313 st->print_cr("# MachVEPNode");
2314 if (!_verified) {
2315 st->print_cr("\t load_class");
2316 } else {
2317 st->print_cr("\t unpack_inline_arg");
2318 }
2319 }
2320 #endif
2321
2322 void MachVEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
2323 {
2324 MacroAssembler _masm(&cbuf);
2325
2326 if (!_verified) {
2327 Label skip;
2328 __ cmp_klass(j_rarg0, rscratch2, rscratch1);
2329 __ br(Assembler::EQ, skip);
2330 __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
2331 __ bind(skip);
2332
2333 } else {
2334 // Unpack inline type args passed as oop and then jump to
2335 // the verified entry point (skipping the unverified entry).
2336 int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
2337 // Emit code for verified entry and save increment for stack repair on return
2338 __ verified_entry(ra_->C, sp_inc);
2339 __ b(*_verified_entry);
2340 }
2341 }
2342
2343 //=============================================================================
2344 #ifndef PRODUCT
2345 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2346 {
2347 st->print_cr("# MachUEPNode");
2348 if (UseCompressedClassPointers) {
2349 st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2350 if (CompressedKlassPointers::shift() != 0) {
2351 st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
2352 }
2353 } else {
2354 st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2355 }
2356 st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
2357 st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
2358 }
2359 #endif
2360
2361 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
2362 {
2363 // This is the unverified entry point.
2364 C2_MacroAssembler _masm(&cbuf);
2365 Label skip;
2366
2367 // UseCompressedClassPointers logic are inside cmp_klass
2368 __ cmp_klass(j_rarg0, rscratch2, rscratch1);
2369
2370 // TODO
2371 // can we avoid this skip and still use a reloc?
2372 __ br(Assembler::EQ, skip);
2373 __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
2374 __ bind(skip);
2375 }
2376
2377 // REQUIRED EMIT CODE
2378
2379 //=============================================================================
2380
2381 // Emit exception handler code.
2382 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
2383 {
2384 // mov rscratch1 #exception_blob_entry_point
2385 // br rscratch1
2386 // Note that the code buffer's insts_mark is always relative to insts.
2387 // That's why we must use the macroassembler to generate a handler.
2388 C2_MacroAssembler _masm(&cbuf);
2389 address base = __ start_a_stub(size_exception_handler());
2390 if (base == NULL) {
2391 ciEnv::current()->record_failure("CodeCache is full");
2392 return 0; // CodeBuffer::expand failed
2393 }
2394 int offset = __ offset();
2395 __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
2396 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
3847 %}
3848
3849 enc_class aarch64_enc_java_dynamic_call(method meth) %{
3850 C2_MacroAssembler _masm(&cbuf);
3851 int method_index = resolved_method_index(cbuf);
3852 address call = __ ic_call((address)$meth$$method, method_index);
3853 if (call == NULL) {
3854 ciEnv::current()->record_failure("CodeCache is full");
3855 return;
3856 } else if (Compile::current()->max_vector_size() > 0) {
3857 __ reinitialize_ptrue();
3858 }
3859 %}
3860
3861 enc_class aarch64_enc_call_epilog() %{
3862 C2_MacroAssembler _masm(&cbuf);
3863 if (VerifyStackAtCalls) {
3864 // Check that stack depth is unchanged: find majik cookie on stack
3865 __ call_Unimplemented();
3866 }
3867 if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic()) {
3868 if (!_method->signature()->returns_null_free_inline_type()) {
3869 // The last return value is not set by the callee but used to pass IsInit information to compiled code.
3870 // Search for the corresponding projection, get the register and emit code that initialized it.
3871 uint con = (tf()->range_cc()->cnt() - 1);
3872 for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
3873 ProjNode* proj = fast_out(i)->as_Proj();
3874 if (proj->_con == con) {
3875 // Set IsInit if r0 is non-null (a non-null value is returned buffered or scalarized)
3876 OptoReg::Name optoReg = ra_->get_reg_first(proj);
3877 VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
3878 Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
3879 __ cmp(r0, zr);
3880 __ cset(toReg, Assembler::NE);
3881 if (reg->is_stack()) {
3882 int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
3883 __ str(toReg, Address(sp, st_off));
3884 }
3885 break;
3886 }
3887 }
3888 }
3889 if (return_value_is_used()) {
3890 // An inline type is returned as fields in multiple registers.
3891 // R0 either contains an oop if the inline type is buffered or a pointer
3892 // to the corresponding InlineKlass with the lowest bit set to 1. Zero r0
3893 // if the lowest bit is set to allow C2 to use the oop after null checking.
3894 // r0 &= (r0 & 1) - 1
3895 __ andr(rscratch1, r0, 0x1);
3896 __ sub(rscratch1, rscratch1, 0x1);
3897 __ andr(r0, r0, rscratch1);
3898 }
3899 }
3900 %}
3901
3902 enc_class aarch64_enc_java_to_runtime(method meth) %{
3903 C2_MacroAssembler _masm(&cbuf);
3904
3905 // some calls to generated routines (arraycopy code) are scheduled
3906 // by C2 as runtime calls. if so we can call them using a br (they
3907 // will be in a reachable segment) otherwise we have to use a blr
3908 // which loads the absolute address into a register.
3909 address entry = (address)$meth$$method;
3910 CodeBlob *cb = CodeCache::find_blob(entry);
3911 if (cb) {
3912 address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
3913 if (call == NULL) {
3914 ciEnv::current()->record_failure("CodeCache is full");
3915 return;
3916 }
3917 } else {
3918 Label retaddr;
3919 __ adr(rscratch2, retaddr);
3972
3973 assert_different_registers(oop, box, tmp, disp_hdr);
3974
3975 // Load markWord from object into displaced_header.
3976 __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
3977
3978 if (DiagnoseSyncOnValueBasedClasses != 0) {
3979 __ load_klass(tmp, oop);
3980 __ ldrw(tmp, Address(tmp, Klass::access_flags_offset()));
3981 __ tstw(tmp, JVM_ACC_IS_VALUE_BASED_CLASS);
3982 __ br(Assembler::NE, cont);
3983 }
3984
3985 // Check for existing monitor
3986 __ tbnz(disp_hdr, exact_log2(markWord::monitor_value), object_has_monitor);
3987
3988 if (!UseHeavyMonitors) {
3989 // Set tmp to be (markWord of object | UNLOCK_VALUE).
3990 __ orr(tmp, disp_hdr, markWord::unlocked_value);
3991
3992 if (EnableValhalla) {
3993 // Mask inline_type bit such that we go to the slow path if object is an inline type
3994 __ andr(tmp, tmp, ~((int) markWord::inline_type_bit_in_place));
3995 }
3996
3997 // Initialize the box. (Must happen before we update the object mark!)
3998 __ str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3999
4000 // Compare object markWord with an unlocked value (tmp) and if
4001 // equal exchange the stack address of our box with object markWord.
4002 // On failure disp_hdr contains the possibly locked markWord.
4003 __ cmpxchg(oop, tmp, box, Assembler::xword, /*acquire*/ true,
4004 /*release*/ true, /*weak*/ false, disp_hdr);
4005 __ br(Assembler::EQ, cont);
4006
4007 assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
4008
4009 // If the compare-and-exchange succeeded, then we found an unlocked
4010 // object, will have now locked it will continue at label cont
4011
4012 __ bind(cas_failed);
4013 // We did not see an unlocked object so try the fast recursive case.
4014
4015 // Check if the owner is self by comparing the value in the
4016 // markWord of object (disp_hdr) with the stack pointer.
7725 instruct loadConL(iRegLNoSp dst, immL src)
7726 %{
7727 match(Set dst src);
7728
7729 ins_cost(INSN_COST);
7730 format %{ "mov $dst, $src\t# long" %}
7731
7732 ins_encode( aarch64_enc_mov_imm(dst, src) );
7733
7734 ins_pipe(ialu_imm);
7735 %}
7736
7737 // Load Pointer Constant
7738
7739 instruct loadConP(iRegPNoSp dst, immP con)
7740 %{
7741 match(Set dst con);
7742
7743 ins_cost(INSN_COST * 4);
7744 format %{
7745 "mov $dst, $con\t# ptr"
7746 %}
7747
7748 ins_encode(aarch64_enc_mov_p(dst, con));
7749
7750 ins_pipe(ialu_imm);
7751 %}
7752
7753 // Load Null Pointer Constant
7754
7755 instruct loadConP0(iRegPNoSp dst, immP0 con)
7756 %{
7757 match(Set dst con);
7758
7759 ins_cost(INSN_COST);
7760 format %{ "mov $dst, $con\t# NULL ptr" %}
7761
7762 ins_encode(aarch64_enc_mov_p0(dst, con));
7763
7764 ins_pipe(ialu_imm);
7765 %}
8930 %}
8931
8932 // ============================================================================
8933 // Cast/Convert Instructions
8934
8935 instruct castX2P(iRegPNoSp dst, iRegL src) %{
8936 match(Set dst (CastX2P src));
8937
8938 ins_cost(INSN_COST);
8939 format %{ "mov $dst, $src\t# long -> ptr" %}
8940
8941 ins_encode %{
8942 if ($dst$$reg != $src$$reg) {
8943 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8944 }
8945 %}
8946
8947 ins_pipe(ialu_reg);
8948 %}
8949
8950 instruct castN2X(iRegLNoSp dst, iRegN src) %{
8951 match(Set dst (CastP2X src));
8952
8953 ins_cost(INSN_COST);
8954 format %{ "mov $dst, $src\t# ptr -> long" %}
8955
8956 ins_encode %{
8957 if ($dst$$reg != $src$$reg) {
8958 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8959 }
8960 %}
8961
8962 ins_pipe(ialu_reg);
8963 %}
8964
8965 instruct castP2X(iRegLNoSp dst, iRegP src) %{
8966 match(Set dst (CastP2X src));
8967
8968 ins_cost(INSN_COST);
8969 format %{ "mov $dst, $src\t# ptr -> long" %}
8970
8971 ins_encode %{
8972 if ($dst$$reg != $src$$reg) {
8973 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8974 }
8975 %}
8976
8977 ins_pipe(ialu_reg);
8978 %}
8979
8980 // Convert oop into int for vectors alignment masking
8981 instruct convP2I(iRegINoSp dst, iRegP src) %{
8982 match(Set dst (ConvL2I (CastP2X src)));
8983
8984 ins_cost(INSN_COST);
15308
15309 match(Set dst (MoveL2D src));
15310
15311 effect(DEF dst, USE src);
15312
15313 ins_cost(INSN_COST);
15314
15315 format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
15316
15317 ins_encode %{
15318 __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
15319 %}
15320
15321 ins_pipe(fp_l2d);
15322
15323 %}
15324
15325 // ============================================================================
15326 // clearing of an array
15327
15328 instruct clearArray_reg_reg_immL0(iRegL_R11 cnt, iRegP_R10 base, immL0 zero, Universe dummy, rFlagsReg cr)
15329 %{
15330 match(Set dummy (ClearArray (Binary cnt base) zero));
15331 effect(USE_KILL cnt, USE_KILL base, KILL cr);
15332
15333 ins_cost(4 * INSN_COST);
15334 format %{ "ClearArray $cnt, $base" %}
15335
15336 ins_encode %{
15337 address tpc = __ zero_words($base$$Register, $cnt$$Register);
15338 if (tpc == NULL) {
15339 ciEnv::current()->record_failure("CodeCache is full");
15340 return;
15341 }
15342 %}
15343
15344 ins_pipe(pipe_class_memory);
15345 %}
15346
15347 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, iRegL val, Universe dummy, rFlagsReg cr)
15348 %{
15349 predicate(((ClearArrayNode*)n)->word_copy_only());
15350 match(Set dummy (ClearArray (Binary cnt base) val));
15351 effect(USE_KILL cnt, USE_KILL base, KILL cr);
15352
15353 ins_cost(4 * INSN_COST);
15354 format %{ "ClearArray $cnt, $base, $val" %}
15355
15356 ins_encode %{
15357 __ fill_words($base$$Register, $cnt$$Register, $val$$Register);
15358 %}
15359
15360 ins_pipe(pipe_class_memory);
15361 %}
15362
15363 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 temp, Universe dummy, rFlagsReg cr)
15364 %{
15365 predicate((uint64_t)n->in(2)->get_long()
15366 < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord)
15367 && !((ClearArrayNode*)n)->word_copy_only());
15368 match(Set dummy (ClearArray cnt base));
15369 effect(TEMP temp, USE_KILL base, KILL cr);
15370
15371 ins_cost(4 * INSN_COST);
15372 format %{ "ClearArray $cnt, $base" %}
15373
15374 ins_encode %{
15375 __ zero_words($base$$Register, (uint64_t)$cnt$$constant);
15376 %}
15377
15378 ins_pipe(pipe_class_memory);
15379 %}
15380
15381 // ============================================================================
15382 // Overflow Math Instructions
15383
15384 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
15385 %{
15386 match(Set cr (OverflowAddI op1 op2));
15387
16677
16678 // Call Runtime Instruction
16679
16680 instruct CallLeafDirect(method meth)
16681 %{
16682 match(CallLeaf);
16683
16684 effect(USE meth);
16685
16686 ins_cost(CALL_COST);
16687
16688 format %{ "CALL, runtime leaf $meth" %}
16689
16690 ins_encode( aarch64_enc_java_to_runtime(meth) );
16691
16692 ins_pipe(pipe_class_call);
16693 %}
16694
16695 // Call Runtime Instruction
16696
16697 // entry point is null, target holds the address to call
16698 instruct CallLeafNoFPIndirect(iRegP target)
16699 %{
16700 predicate(n->as_Call()->entry_point() == NULL);
16701
16702 match(CallLeafNoFP target);
16703
16704 ins_cost(CALL_COST);
16705
16706 format %{ "CALL, runtime leaf nofp indirect $target" %}
16707
16708 ins_encode %{
16709 __ blr($target$$Register);
16710 %}
16711
16712 ins_pipe(pipe_class_call);
16713 %}
16714
16715 instruct CallLeafNoFPDirect(method meth)
16716 %{
16717 predicate(n->as_Call()->entry_point() != NULL);
16718
16719 match(CallLeafNoFP);
16720
16721 effect(USE meth);
16722
16723 ins_cost(CALL_COST);
16724
16725 format %{ "CALL, runtime leaf nofp $meth" %}
16726
16727 ins_encode( aarch64_enc_java_to_runtime(meth) );
16728
16729 ins_pipe(pipe_class_call);
16730 %}
16731
16732 instruct CallNativeDirect(method meth)
16733 %{
16734 match(CallNative);
16735
16736 effect(USE meth);
16737
16738 ins_cost(CALL_COST);
|