< prev index next >

src/hotspot/cpu/aarch64/aarch64.ad

Print this page

 1750 
 1751 int MachCallDynamicJavaNode::ret_addr_offset()
 1752 {
 1753   return 16; // movz, movk, movk, bl
 1754 }
 1755 
 1756 int MachCallRuntimeNode::ret_addr_offset() {
 1757   // for generated stubs the call will be
 1758   //   bl(addr)
 1759   // or with far branches
 1760   //   bl(trampoline_stub)
 1761   // for real runtime callouts it will be six instructions
 1762   // see aarch64_enc_java_to_runtime
 1763   //   adr(rscratch2, retaddr)
 1764   //   lea(rscratch1, RuntimeAddress(addr)
 1765   //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
 1766   //   blr(rscratch1)
 1767   CodeBlob *cb = CodeCache::find_blob(_entry_point);
 1768   if (cb) {
 1769     return 1 * NativeInstruction::instruction_size;



 1770   } else {
 1771     return 6 * NativeInstruction::instruction_size;
 1772   }
 1773 }
 1774 
 1775 int MachCallNativeNode::ret_addr_offset() {
 1776   // This is implemented using aarch64_enc_java_to_runtime as above.
 1777   CodeBlob *cb = CodeCache::find_blob(_entry_point);
 1778   if (cb) {
 1779     return 1 * NativeInstruction::instruction_size;
 1780   } else {
 1781     return 6 * NativeInstruction::instruction_size;
 1782   }
 1783 }
 1784 
 1785 //=============================================================================
 1786 
 1787 #ifndef PRODUCT
 1788 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1789   st->print("BREAKPOINT");

 1867     st->print("\n\t");
 1868     st->print("ldr  rscratch1, [guard]\n\t");
 1869     st->print("dmb ishld\n\t");
 1870     st->print("ldr  rscratch2, [rthread, #thread_disarmed_offset]\n\t");
 1871     st->print("cmp  rscratch1, rscratch2\n\t");
 1872     st->print("b.eq skip");
 1873     st->print("\n\t");
 1874     st->print("blr #nmethod_entry_barrier_stub\n\t");
 1875     st->print("b skip\n\t");
 1876     st->print("guard: int\n\t");
 1877     st->print("\n\t");
 1878     st->print("skip:\n\t");
 1879   }
 1880 }
 1881 #endif
 1882 
 1883 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1884   Compile* C = ra_->C;
 1885   C2_MacroAssembler _masm(&cbuf);
 1886 
 1887   // n.b. frame size includes space for return pc and rfp
 1888   const int framesize = C->output()->frame_size_in_bytes();
 1889 
 1890   // insert a nop at the start of the prolog so we can patch in a
 1891   // branch if we need to invalidate the method later
 1892   __ nop();
 1893 
 1894   if (C->clinit_barrier_on_entry()) {
 1895     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
 1896 
 1897     Label L_skip_barrier;
 1898 
 1899     __ mov_metadata(rscratch2, C->method()->holder()->constant_encoding());
 1900     __ clinit_barrier(rscratch2, rscratch1, &L_skip_barrier);
 1901     __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
 1902     __ bind(L_skip_barrier);
 1903   }
 1904 
 1905   if (C->max_vector_size() > 0) {
 1906     __ reinitialize_ptrue();
 1907   }
 1908 
 1909   int bangsize = C->output()->bang_size_in_bytes();
 1910   if (C->output()->need_stack_bang(bangsize))
 1911     __ generate_stack_overflow_check(bangsize);
 1912 
 1913   __ build_frame(framesize);
 1914 
 1915   if (C->stub_function() == NULL) {
 1916     BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 1917     bs->nmethod_entry_barrier(&_masm);
 1918   }
 1919 
 1920   if (VerifyStackAtCalls) {
 1921     Unimplemented();
 1922   }
 1923 
 1924   C->output()->set_frame_complete(cbuf.insts_size());
 1925 
 1926   if (C->has_mach_constant_base_node()) {
 1927     // NOTE: We set the table base offset here because users might be
 1928     // emitted before MachConstantBaseNode.
 1929     ConstantTable& constant_table = C->output()->constant_table();
 1930     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1931   }
 1932 }
 1933 
 1934 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 1935 {
 1936   return MachNode::size(ra_); // too many variables; just compute it
 1937                               // the hard way
 1938 }
 1939 
 1940 int MachPrologNode::reloc() const
 1941 {
 1942   return 0;
 1943 }
 1944 
 1945 //=============================================================================
 1946 
 1947 #ifndef PRODUCT
 1948 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1949   Compile* C = ra_->C;
 1950   int framesize = C->output()->frame_slots() << LogBytesPerInt;
 1951 
 1952   st->print("# pop frame %d\n\t",framesize);
 1953 
 1954   if (framesize == 0) {
 1955     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
 1956   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
 1957     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
 1958     st->print("add  sp, sp, #%d\n\t", framesize);
 1959   } else {
 1960     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
 1961     st->print("add  sp, sp, rscratch1\n\t");
 1962     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
 1963   }
 1964 
 1965   if (do_polling() && C->is_method_compilation()) {
 1966     st->print("# test polling word\n\t");
 1967     st->print("ldr  rscratch1, [rthread],#%d\n\t", in_bytes(JavaThread::polling_word_offset()));
 1968     st->print("cmp  sp, rscratch1\n\t");
 1969     st->print("bhi #slow_path");
 1970   }
 1971 }
 1972 #endif
 1973 
 1974 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1975   Compile* C = ra_->C;
 1976   C2_MacroAssembler _masm(&cbuf);
 1977   int framesize = C->output()->frame_slots() << LogBytesPerInt;
 1978 
 1979   __ remove_frame(framesize);
 1980 
 1981   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1982     __ reserved_stack_check();
 1983   }
 1984 
 1985   if (do_polling() && C->is_method_compilation()) {
 1986     Label dummy_label;
 1987     Label* code_stub = &dummy_label;
 1988     if (!C->output()->in_scratch_emit_size()) {
 1989       code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset());
 1990     }
 1991     __ relocate(relocInfo::poll_return_type);
 1992     __ safepoint_poll(*code_stub, true /* at_return */, false /* acquire */, true /* in_nmethod */);
 1993   }
 1994 }
 1995 
 1996 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 1997   // Variable size. Determine dynamically.
 1998   return MachNode::size(ra_);
 1999 }
 2000 
 2001 int MachEpilogNode::reloc() const {
 2002   // Return number of relocatable values contained in this instruction.
 2003   return 1; // 1 for polling page.
 2004 }
 2005 
 2006 const Pipeline * MachEpilogNode::pipeline() const {
 2007   return MachNode::pipeline_class();
 2008 }
 2009 
 2010 //=============================================================================
 2011 
 2012 // Figure out which register class each belongs in: rc_int, rc_float or
 2013 // rc_stack.
 2014 enum RC { rc_bad, rc_int, rc_float, rc_predicate, rc_stack };
 2015 
 2016 static enum RC rc_class(OptoReg::Name reg) {
 2017 
 2018   if (reg == OptoReg::Bad) {
 2019     return rc_bad;
 2020   }

 2263 
 2264   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2265   int reg    = ra_->get_encode(this);
 2266 
 2267   // This add will handle any 24-bit signed offset. 24 bits allows an
 2268   // 8 megabyte stack frame.
 2269   __ add(as_Register(reg), sp, offset);
 2270 }
 2271 
 2272 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
 2273   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
 2274   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2275 
 2276   if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
 2277     return NativeInstruction::instruction_size;
 2278   } else {
 2279     return 2 * NativeInstruction::instruction_size;
 2280   }
 2281 }
 2282 
 2283 //=============================================================================






















 2284 











 2285 #ifndef PRODUCT
 2286 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2287 {
 2288   st->print_cr("# MachUEPNode");
 2289   if (UseCompressedClassPointers) {
 2290     st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2291     if (CompressedKlassPointers::shift() != 0) {
 2292       st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
 2293     }
 2294   } else {
 2295    st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2296   }
 2297   st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
 2298   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
 2299 }
 2300 #endif
 2301 
 2302 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 2303 {
 2304   // This is the unverified entry point.
 2305   C2_MacroAssembler _masm(&cbuf);

 2306 

 2307   __ cmp_klass(j_rarg0, rscratch2, rscratch1);
 2308   Label skip;
 2309   // TODO
 2310   // can we avoid this skip and still use a reloc?
 2311   __ br(Assembler::EQ, skip);
 2312   __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 2313   __ bind(skip);
 2314 }
 2315 
 2316 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 2317 {
 2318   return MachNode::size(ra_);
 2319 }
 2320 
 2321 // REQUIRED EMIT CODE
 2322 
 2323 //=============================================================================
 2324 
 2325 // Emit exception handler code.
 2326 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
 2327 {
 2328   // mov rscratch1 #exception_blob_entry_point
 2329   // br rscratch1
 2330   // Note that the code buffer's insts_mark is always relative to insts.
 2331   // That's why we must use the macroassembler to generate a handler.
 2332   C2_MacroAssembler _masm(&cbuf);
 2333   address base = __ start_a_stub(size_exception_handler());
 2334   if (base == NULL) {
 2335     ciEnv::current()->record_failure("CodeCache is full");
 2336     return 0;  // CodeBuffer::expand failed
 2337   }
 2338   int offset = __ offset();
 2339   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
 2340   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");

 3725   %}
 3726 
 3727   enc_class aarch64_enc_java_dynamic_call(method meth) %{
 3728     C2_MacroAssembler _masm(&cbuf);
 3729     int method_index = resolved_method_index(cbuf);
 3730     address call = __ ic_call((address)$meth$$method, method_index);
 3731     if (call == NULL) {
 3732       ciEnv::current()->record_failure("CodeCache is full");
 3733       return;
 3734     } else if (Compile::current()->max_vector_size() > 0) {
 3735       __ reinitialize_ptrue();
 3736     }
 3737   %}
 3738 
 3739   enc_class aarch64_enc_call_epilog() %{
 3740     C2_MacroAssembler _masm(&cbuf);
 3741     if (VerifyStackAtCalls) {
 3742       // Check that stack depth is unchanged: find majik cookie on stack
 3743       __ call_Unimplemented();
 3744     }











 3745   %}
 3746 
 3747   enc_class aarch64_enc_java_to_runtime(method meth) %{
 3748     C2_MacroAssembler _masm(&cbuf);
 3749 
 3750     // some calls to generated routines (arraycopy code) are scheduled
 3751     // by C2 as runtime calls. if so we can call them using a br (they
 3752     // will be in a reachable segment) otherwise we have to use a blr
 3753     // which loads the absolute address into a register.
 3754     address entry = (address)$meth$$method;
 3755     CodeBlob *cb = CodeCache::find_blob(entry);
 3756     if (cb) {
 3757       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
 3758       if (call == NULL) {
 3759         ciEnv::current()->record_failure("CodeCache is full");
 3760         return;
 3761       }
 3762     } else {
 3763       Label retaddr;
 3764       __ adr(rscratch2, retaddr);

 3816     Label cas_failed;
 3817 
 3818     assert_different_registers(oop, box, tmp, disp_hdr);
 3819 
 3820     // Load markWord from object into displaced_header.
 3821     __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
 3822 
 3823     if (DiagnoseSyncOnValueBasedClasses != 0) {
 3824       __ load_klass(tmp, oop);
 3825       __ ldrw(tmp, Address(tmp, Klass::access_flags_offset()));
 3826       __ tstw(tmp, JVM_ACC_IS_VALUE_BASED_CLASS);
 3827       __ br(Assembler::NE, cont);
 3828     }
 3829 
 3830     // Check for existing monitor
 3831     __ tbnz(disp_hdr, exact_log2(markWord::monitor_value), object_has_monitor);
 3832 
 3833     // Set tmp to be (markWord of object | UNLOCK_VALUE).
 3834     __ orr(tmp, disp_hdr, markWord::unlocked_value);
 3835 





 3836     // Initialize the box. (Must happen before we update the object mark!)
 3837     __ str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
 3838 
 3839     // Compare object markWord with an unlocked value (tmp) and if
 3840     // equal exchange the stack address of our box with object markWord.
 3841     // On failure disp_hdr contains the possibly locked markWord.
 3842     __ cmpxchg(oop, tmp, box, Assembler::xword, /*acquire*/ true,
 3843                /*release*/ true, /*weak*/ false, disp_hdr);
 3844     __ br(Assembler::EQ, cont);
 3845 
 3846     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
 3847 
 3848     // If the compare-and-exchange succeeded, then we found an unlocked
 3849     // object, will have now locked it will continue at label cont
 3850 
 3851     __ bind(cas_failed);
 3852     // We did not see an unlocked object so try the fast recursive case.
 3853 
 3854     // Check if the owner is self by comparing the value in the
 3855     // markWord of object (disp_hdr) with the stack pointer.

 7488 instruct loadConL(iRegLNoSp dst, immL src)
 7489 %{
 7490   match(Set dst src);
 7491 
 7492   ins_cost(INSN_COST);
 7493   format %{ "mov $dst, $src\t# long" %}
 7494 
 7495   ins_encode( aarch64_enc_mov_imm(dst, src) );
 7496 
 7497   ins_pipe(ialu_imm);
 7498 %}
 7499 
 7500 // Load Pointer Constant
 7501 
 7502 instruct loadConP(iRegPNoSp dst, immP con)
 7503 %{
 7504   match(Set dst con);
 7505 
 7506   ins_cost(INSN_COST * 4);
 7507   format %{
 7508     "mov  $dst, $con\t# ptr\n\t"
 7509   %}
 7510 
 7511   ins_encode(aarch64_enc_mov_p(dst, con));
 7512 
 7513   ins_pipe(ialu_imm);
 7514 %}
 7515 
 7516 // Load Null Pointer Constant
 7517 
 7518 instruct loadConP0(iRegPNoSp dst, immP0 con)
 7519 %{
 7520   match(Set dst con);
 7521 
 7522   ins_cost(INSN_COST);
 7523   format %{ "mov  $dst, $con\t# NULL ptr" %}
 7524 
 7525   ins_encode(aarch64_enc_mov_p0(dst, con));
 7526 
 7527   ins_pipe(ialu_imm);
 7528 %}

 8620 %}
 8621 
 8622 // ============================================================================
 8623 // Cast/Convert Instructions
 8624 
 8625 instruct castX2P(iRegPNoSp dst, iRegL src) %{
 8626   match(Set dst (CastX2P src));
 8627 
 8628   ins_cost(INSN_COST);
 8629   format %{ "mov $dst, $src\t# long -> ptr" %}
 8630 
 8631   ins_encode %{
 8632     if ($dst$$reg != $src$$reg) {
 8633       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
 8634     }
 8635   %}
 8636 
 8637   ins_pipe(ialu_reg);
 8638 %}
 8639 















 8640 instruct castP2X(iRegLNoSp dst, iRegP src) %{
 8641   match(Set dst (CastP2X src));
 8642 
 8643   ins_cost(INSN_COST);
 8644   format %{ "mov $dst, $src\t# ptr -> long" %}
 8645 
 8646   ins_encode %{
 8647     if ($dst$$reg != $src$$reg) {
 8648       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
 8649     }
 8650   %}
 8651 
 8652   ins_pipe(ialu_reg);
 8653 %}
 8654 
 8655 // Convert oop into int for vectors alignment masking
 8656 instruct convP2I(iRegINoSp dst, iRegP src) %{
 8657   match(Set dst (ConvL2I (CastP2X src)));
 8658 
 8659   ins_cost(INSN_COST);

14963 
14964   match(Set dst (MoveL2D src));
14965 
14966   effect(DEF dst, USE src);
14967 
14968   ins_cost(INSN_COST);
14969 
14970   format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
14971 
14972   ins_encode %{
14973     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
14974   %}
14975 
14976   ins_pipe(fp_l2d);
14977 
14978 %}
14979 
14980 // ============================================================================
14981 // clearing of an array
14982 
14983 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
14984 %{
14985   match(Set dummy (ClearArray cnt base));
14986   effect(USE_KILL cnt, USE_KILL base, KILL cr);
14987 
14988   ins_cost(4 * INSN_COST);
14989   format %{ "ClearArray $cnt, $base" %}
14990 
14991   ins_encode %{
14992     address tpc = __ zero_words($base$$Register, $cnt$$Register);
14993     if (tpc == NULL) {
14994       ciEnv::current()->record_failure("CodeCache is full");
14995       return;
14996     }
14997   %}
14998 
14999   ins_pipe(pipe_class_memory);
15000 %}
15001 
















15002 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 temp, Universe dummy, rFlagsReg cr)
15003 %{
15004   predicate((uint64_t)n->in(2)->get_long()
15005             < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord));

15006   match(Set dummy (ClearArray cnt base));
15007   effect(TEMP temp, USE_KILL base, KILL cr);
15008 
15009   ins_cost(4 * INSN_COST);
15010   format %{ "ClearArray $cnt, $base" %}
15011 
15012   ins_encode %{
15013     __ zero_words($base$$Register, (uint64_t)$cnt$$constant);
15014   %}
15015 
15016   ins_pipe(pipe_class_memory);
15017 %}
15018 
15019 // ============================================================================
15020 // Overflow Math Instructions
15021 
15022 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
15023 %{
15024   match(Set cr (OverflowAddI op1 op2));
15025 

16315 
16316 // Call Runtime Instruction
16317 
16318 instruct CallLeafDirect(method meth)
16319 %{
16320   match(CallLeaf);
16321 
16322   effect(USE meth);
16323 
16324   ins_cost(CALL_COST);
16325 
16326   format %{ "CALL, runtime leaf $meth" %}
16327 
16328   ins_encode( aarch64_enc_java_to_runtime(meth) );
16329 
16330   ins_pipe(pipe_class_call);
16331 %}
16332 
16333 // Call Runtime Instruction
16334 


















16335 instruct CallLeafNoFPDirect(method meth)
16336 %{


16337   match(CallLeafNoFP);
16338 
16339   effect(USE meth);
16340 
16341   ins_cost(CALL_COST);
16342 
16343   format %{ "CALL, runtime leaf nofp $meth" %}
16344 
16345   ins_encode( aarch64_enc_java_to_runtime(meth) );
16346 
16347   ins_pipe(pipe_class_call);
16348 %}
16349 
16350 instruct CallNativeDirect(method meth)
16351 %{
16352   match(CallNative);
16353 
16354   effect(USE meth);
16355 
16356   ins_cost(CALL_COST);

 1750 
 1751 int MachCallDynamicJavaNode::ret_addr_offset()
 1752 {
 1753   return 16; // movz, movk, movk, bl
 1754 }
 1755 
 1756 int MachCallRuntimeNode::ret_addr_offset() {
 1757   // for generated stubs the call will be
 1758   //   bl(addr)
 1759   // or with far branches
 1760   //   bl(trampoline_stub)
 1761   // for real runtime callouts it will be six instructions
 1762   // see aarch64_enc_java_to_runtime
 1763   //   adr(rscratch2, retaddr)
 1764   //   lea(rscratch1, RuntimeAddress(addr)
 1765   //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
 1766   //   blr(rscratch1)
 1767   CodeBlob *cb = CodeCache::find_blob(_entry_point);
 1768   if (cb) {
 1769     return 1 * NativeInstruction::instruction_size;
 1770   } else if (_entry_point == NULL) {
 1771     // See CallLeafNoFPIndirect
 1772     return 1 * NativeInstruction::instruction_size;
 1773   } else {
 1774     return 6 * NativeInstruction::instruction_size;
 1775   }
 1776 }
 1777 
 1778 int MachCallNativeNode::ret_addr_offset() {
 1779   // This is implemented using aarch64_enc_java_to_runtime as above.
 1780   CodeBlob *cb = CodeCache::find_blob(_entry_point);
 1781   if (cb) {
 1782     return 1 * NativeInstruction::instruction_size;
 1783   } else {
 1784     return 6 * NativeInstruction::instruction_size;
 1785   }
 1786 }
 1787 
 1788 //=============================================================================
 1789 
 1790 #ifndef PRODUCT
 1791 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1792   st->print("BREAKPOINT");

 1870     st->print("\n\t");
 1871     st->print("ldr  rscratch1, [guard]\n\t");
 1872     st->print("dmb ishld\n\t");
 1873     st->print("ldr  rscratch2, [rthread, #thread_disarmed_offset]\n\t");
 1874     st->print("cmp  rscratch1, rscratch2\n\t");
 1875     st->print("b.eq skip");
 1876     st->print("\n\t");
 1877     st->print("blr #nmethod_entry_barrier_stub\n\t");
 1878     st->print("b skip\n\t");
 1879     st->print("guard: int\n\t");
 1880     st->print("\n\t");
 1881     st->print("skip:\n\t");
 1882   }
 1883 }
 1884 #endif
 1885 
 1886 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1887   Compile* C = ra_->C;
 1888   C2_MacroAssembler _masm(&cbuf);
 1889 



 1890   // insert a nop at the start of the prolog so we can patch in a
 1891   // branch if we need to invalidate the method later
 1892   __ nop();
 1893 
 1894   if (C->clinit_barrier_on_entry()) {
 1895     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
 1896 
 1897     Label L_skip_barrier;
 1898 
 1899     __ mov_metadata(rscratch2, C->method()->holder()->constant_encoding());
 1900     __ clinit_barrier(rscratch2, rscratch1, &L_skip_barrier);
 1901     __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
 1902     __ bind(L_skip_barrier);
 1903   }
 1904 
 1905   if (C->max_vector_size() > 0) {
 1906     __ reinitialize_ptrue();
 1907   }
 1908 
 1909   __ verified_entry(C, 0);
 1910   __ bind(*_verified_entry);



 1911 
 1912   if (C->stub_function() == NULL) {
 1913     BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 1914     bs->nmethod_entry_barrier(&_masm);
 1915   }
 1916 
 1917   if (VerifyStackAtCalls) {
 1918     Unimplemented();
 1919   }
 1920 
 1921   C->output()->set_frame_complete(cbuf.insts_size());
 1922 
 1923   if (C->has_mach_constant_base_node()) {
 1924     // NOTE: We set the table base offset here because users might be
 1925     // emitted before MachConstantBaseNode.
 1926     ConstantTable& constant_table = C->output()->constant_table();
 1927     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1928   }
 1929 }
 1930 






 1931 int MachPrologNode::reloc() const
 1932 {
 1933   return 0;
 1934 }
 1935 
 1936 //=============================================================================
 1937 
 1938 #ifndef PRODUCT
 1939 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1940   Compile* C = ra_->C;
 1941   int framesize = C->output()->frame_slots() << LogBytesPerInt;
 1942 
 1943   st->print("# pop frame %d\n\t",framesize);
 1944 
 1945   if (framesize == 0) {
 1946     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
 1947   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
 1948     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
 1949     st->print("add  sp, sp, #%d\n\t", framesize);
 1950   } else {
 1951     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
 1952     st->print("add  sp, sp, rscratch1\n\t");
 1953     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
 1954   }
 1955 
 1956   if (do_polling() && C->is_method_compilation()) {
 1957     st->print("# test polling word\n\t");
 1958     st->print("ldr  rscratch1, [rthread],#%d\n\t", in_bytes(JavaThread::polling_word_offset()));
 1959     st->print("cmp  sp, rscratch1\n\t");
 1960     st->print("bhi #slow_path");
 1961   }
 1962 }
 1963 #endif
 1964 
 1965 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1966   Compile* C = ra_->C;
 1967   C2_MacroAssembler _masm(&cbuf);
 1968   int framesize = C->output()->frame_slots() << LogBytesPerInt;
 1969 
 1970   __ remove_frame(framesize, C->needs_stack_repair());
 1971 
 1972   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1973     __ reserved_stack_check();
 1974   }
 1975 
 1976   if (do_polling() && C->is_method_compilation()) {
 1977     Label dummy_label;
 1978     Label* code_stub = &dummy_label;
 1979     if (!C->output()->in_scratch_emit_size()) {
 1980       code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset());
 1981     }
 1982     __ relocate(relocInfo::poll_return_type);
 1983     __ safepoint_poll(*code_stub, true /* at_return */, false /* acquire */, true /* in_nmethod */);
 1984   }
 1985 }
 1986 





 1987 int MachEpilogNode::reloc() const {
 1988   // Return number of relocatable values contained in this instruction.
 1989   return 1; // 1 for polling page.
 1990 }
 1991 
 1992 const Pipeline * MachEpilogNode::pipeline() const {
 1993   return MachNode::pipeline_class();
 1994 }
 1995 
 1996 //=============================================================================
 1997 
 1998 // Figure out which register class each belongs in: rc_int, rc_float or
 1999 // rc_stack.
 2000 enum RC { rc_bad, rc_int, rc_float, rc_predicate, rc_stack };
 2001 
 2002 static enum RC rc_class(OptoReg::Name reg) {
 2003 
 2004   if (reg == OptoReg::Bad) {
 2005     return rc_bad;
 2006   }

 2249 
 2250   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2251   int reg    = ra_->get_encode(this);
 2252 
 2253   // This add will handle any 24-bit signed offset. 24 bits allows an
 2254   // 8 megabyte stack frame.
 2255   __ add(as_Register(reg), sp, offset);
 2256 }
 2257 
 2258 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
 2259   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
 2260   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2261 
 2262   if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
 2263     return NativeInstruction::instruction_size;
 2264   } else {
 2265     return 2 * NativeInstruction::instruction_size;
 2266   }
 2267 }
 2268 
 2269 ///=============================================================================
 2270 #ifndef PRODUCT
 2271 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2272 {
 2273   st->print_cr("# MachVEPNode");
 2274   if (!_verified) {
 2275     st->print_cr("\t load_class");
 2276   } else {
 2277     st->print_cr("\t unpack_inline_arg");
 2278   }
 2279 }
 2280 #endif
 2281 
 2282 void MachVEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 2283 {
 2284   MacroAssembler _masm(&cbuf);
 2285 
 2286   if (!_verified) {
 2287     Label skip;
 2288     __ cmp_klass(j_rarg0, rscratch2, rscratch1);
 2289     __ br(Assembler::EQ, skip);
 2290       __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 2291     __ bind(skip);
 2292 
 2293   } else {
 2294     // Unpack inline type args passed as oop and then jump to
 2295     // the verified entry point (skipping the unverified entry).
 2296     int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
 2297     // Emit code for verified entry and save increment for stack repair on return
 2298     __ verified_entry(ra_->C, sp_inc);
 2299     __ b(*_verified_entry);
 2300   }
 2301 }
 2302 
 2303 //=============================================================================
 2304 #ifndef PRODUCT
 2305 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2306 {
 2307   st->print_cr("# MachUEPNode");
 2308   if (UseCompressedClassPointers) {
 2309     st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2310     if (CompressedKlassPointers::shift() != 0) {
 2311       st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
 2312     }
 2313   } else {
 2314    st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2315   }
 2316   st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
 2317   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
 2318 }
 2319 #endif
 2320 
 2321 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 2322 {
 2323   // This is the unverified entry point.
 2324   C2_MacroAssembler _masm(&cbuf);
 2325   Label skip;
 2326 
 2327   // UseCompressedClassPointers logic are inside cmp_klass
 2328   __ cmp_klass(j_rarg0, rscratch2, rscratch1);
 2329 
 2330   // TODO
 2331   // can we avoid this skip and still use a reloc?
 2332   __ br(Assembler::EQ, skip);
 2333   __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 2334   __ bind(skip);
 2335 }
 2336 





 2337 // REQUIRED EMIT CODE
 2338 
 2339 //=============================================================================
 2340 
 2341 // Emit exception handler code.
 2342 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
 2343 {
 2344   // mov rscratch1 #exception_blob_entry_point
 2345   // br rscratch1
 2346   // Note that the code buffer's insts_mark is always relative to insts.
 2347   // That's why we must use the macroassembler to generate a handler.
 2348   C2_MacroAssembler _masm(&cbuf);
 2349   address base = __ start_a_stub(size_exception_handler());
 2350   if (base == NULL) {
 2351     ciEnv::current()->record_failure("CodeCache is full");
 2352     return 0;  // CodeBuffer::expand failed
 2353   }
 2354   int offset = __ offset();
 2355   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
 2356   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");

 3741   %}
 3742 
 3743   enc_class aarch64_enc_java_dynamic_call(method meth) %{
 3744     C2_MacroAssembler _masm(&cbuf);
 3745     int method_index = resolved_method_index(cbuf);
 3746     address call = __ ic_call((address)$meth$$method, method_index);
 3747     if (call == NULL) {
 3748       ciEnv::current()->record_failure("CodeCache is full");
 3749       return;
 3750     } else if (Compile::current()->max_vector_size() > 0) {
 3751       __ reinitialize_ptrue();
 3752     }
 3753   %}
 3754 
 3755   enc_class aarch64_enc_call_epilog() %{
 3756     C2_MacroAssembler _masm(&cbuf);
 3757     if (VerifyStackAtCalls) {
 3758       // Check that stack depth is unchanged: find majik cookie on stack
 3759       __ call_Unimplemented();
 3760     }
 3761     if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic()) {
 3762       // An inline type is returned as fields in multiple registers.
 3763       // R0 either contains an oop if the inline type is buffered or a pointer
 3764       // to the corresponding InlineKlass with the lowest bit set to 1. Zero r0
 3765       // if the lowest bit is set to allow C2 to use the oop after null checking.
 3766       // r0 &= (r0 & 1) - 1
 3767       C2_MacroAssembler _masm(&cbuf);
 3768       __ andr(rscratch1, r0, 0x1);
 3769       __ sub(rscratch1, rscratch1, 0x1);
 3770       __ andr(r0, r0, rscratch1);
 3771     }
 3772   %}
 3773 
 3774   enc_class aarch64_enc_java_to_runtime(method meth) %{
 3775     C2_MacroAssembler _masm(&cbuf);
 3776 
 3777     // some calls to generated routines (arraycopy code) are scheduled
 3778     // by C2 as runtime calls. if so we can call them using a br (they
 3779     // will be in a reachable segment) otherwise we have to use a blr
 3780     // which loads the absolute address into a register.
 3781     address entry = (address)$meth$$method;
 3782     CodeBlob *cb = CodeCache::find_blob(entry);
 3783     if (cb) {
 3784       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
 3785       if (call == NULL) {
 3786         ciEnv::current()->record_failure("CodeCache is full");
 3787         return;
 3788       }
 3789     } else {
 3790       Label retaddr;
 3791       __ adr(rscratch2, retaddr);

 3843     Label cas_failed;
 3844 
 3845     assert_different_registers(oop, box, tmp, disp_hdr);
 3846 
 3847     // Load markWord from object into displaced_header.
 3848     __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
 3849 
 3850     if (DiagnoseSyncOnValueBasedClasses != 0) {
 3851       __ load_klass(tmp, oop);
 3852       __ ldrw(tmp, Address(tmp, Klass::access_flags_offset()));
 3853       __ tstw(tmp, JVM_ACC_IS_VALUE_BASED_CLASS);
 3854       __ br(Assembler::NE, cont);
 3855     }
 3856 
 3857     // Check for existing monitor
 3858     __ tbnz(disp_hdr, exact_log2(markWord::monitor_value), object_has_monitor);
 3859 
 3860     // Set tmp to be (markWord of object | UNLOCK_VALUE).
 3861     __ orr(tmp, disp_hdr, markWord::unlocked_value);
 3862 
 3863     if (EnableValhalla) {
 3864       // Mask inline_type bit such that we go to the slow path if object is an inline type
 3865       __ andr(tmp, tmp, ~((int) markWord::inline_type_bit_in_place));
 3866     }
 3867 
 3868     // Initialize the box. (Must happen before we update the object mark!)
 3869     __ str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
 3870 
 3871     // Compare object markWord with an unlocked value (tmp) and if
 3872     // equal exchange the stack address of our box with object markWord.
 3873     // On failure disp_hdr contains the possibly locked markWord.
 3874     __ cmpxchg(oop, tmp, box, Assembler::xword, /*acquire*/ true,
 3875                /*release*/ true, /*weak*/ false, disp_hdr);
 3876     __ br(Assembler::EQ, cont);
 3877 
 3878     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
 3879 
 3880     // If the compare-and-exchange succeeded, then we found an unlocked
 3881     // object, will have now locked it will continue at label cont
 3882 
 3883     __ bind(cas_failed);
 3884     // We did not see an unlocked object so try the fast recursive case.
 3885 
 3886     // Check if the owner is self by comparing the value in the
 3887     // markWord of object (disp_hdr) with the stack pointer.

 7520 instruct loadConL(iRegLNoSp dst, immL src)
 7521 %{
 7522   match(Set dst src);
 7523 
 7524   ins_cost(INSN_COST);
 7525   format %{ "mov $dst, $src\t# long" %}
 7526 
 7527   ins_encode( aarch64_enc_mov_imm(dst, src) );
 7528 
 7529   ins_pipe(ialu_imm);
 7530 %}
 7531 
 7532 // Load Pointer Constant
 7533 
 7534 instruct loadConP(iRegPNoSp dst, immP con)
 7535 %{
 7536   match(Set dst con);
 7537 
 7538   ins_cost(INSN_COST * 4);
 7539   format %{
 7540     "mov  $dst, $con\t# ptr"
 7541   %}
 7542 
 7543   ins_encode(aarch64_enc_mov_p(dst, con));
 7544 
 7545   ins_pipe(ialu_imm);
 7546 %}
 7547 
 7548 // Load Null Pointer Constant
 7549 
 7550 instruct loadConP0(iRegPNoSp dst, immP0 con)
 7551 %{
 7552   match(Set dst con);
 7553 
 7554   ins_cost(INSN_COST);
 7555   format %{ "mov  $dst, $con\t# NULL ptr" %}
 7556 
 7557   ins_encode(aarch64_enc_mov_p0(dst, con));
 7558 
 7559   ins_pipe(ialu_imm);
 7560 %}

 8652 %}
 8653 
 8654 // ============================================================================
 8655 // Cast/Convert Instructions
 8656 
 8657 instruct castX2P(iRegPNoSp dst, iRegL src) %{
 8658   match(Set dst (CastX2P src));
 8659 
 8660   ins_cost(INSN_COST);
 8661   format %{ "mov $dst, $src\t# long -> ptr" %}
 8662 
 8663   ins_encode %{
 8664     if ($dst$$reg != $src$$reg) {
 8665       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
 8666     }
 8667   %}
 8668 
 8669   ins_pipe(ialu_reg);
 8670 %}
 8671 
 8672 instruct castN2X(iRegLNoSp dst, iRegN src) %{
 8673   match(Set dst (CastP2X src));
 8674 
 8675   ins_cost(INSN_COST);
 8676   format %{ "mov $dst, $src\t# ptr -> long" %}
 8677 
 8678   ins_encode %{
 8679     if ($dst$$reg != $src$$reg) {
 8680       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
 8681     }
 8682   %}
 8683 
 8684   ins_pipe(ialu_reg);
 8685 %}
 8686 
 8687 instruct castP2X(iRegLNoSp dst, iRegP src) %{
 8688   match(Set dst (CastP2X src));
 8689 
 8690   ins_cost(INSN_COST);
 8691   format %{ "mov $dst, $src\t# ptr -> long" %}
 8692 
 8693   ins_encode %{
 8694     if ($dst$$reg != $src$$reg) {
 8695       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
 8696     }
 8697   %}
 8698 
 8699   ins_pipe(ialu_reg);
 8700 %}
 8701 
 8702 // Convert oop into int for vectors alignment masking
 8703 instruct convP2I(iRegINoSp dst, iRegP src) %{
 8704   match(Set dst (ConvL2I (CastP2X src)));
 8705 
 8706   ins_cost(INSN_COST);

15010 
15011   match(Set dst (MoveL2D src));
15012 
15013   effect(DEF dst, USE src);
15014 
15015   ins_cost(INSN_COST);
15016 
15017   format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
15018 
15019   ins_encode %{
15020     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
15021   %}
15022 
15023   ins_pipe(fp_l2d);
15024 
15025 %}
15026 
15027 // ============================================================================
15028 // clearing of an array
15029 
15030 instruct clearArray_reg_reg_immL0(iRegL_R11 cnt, iRegP_R10 base, immL0 zero, Universe dummy, rFlagsReg cr)
15031 %{
15032   match(Set dummy (ClearArray (Binary cnt base) zero));
15033   effect(USE_KILL cnt, USE_KILL base, KILL cr);
15034 
15035   ins_cost(4 * INSN_COST);
15036   format %{ "ClearArray $cnt, $base" %}
15037 
15038   ins_encode %{
15039     address tpc = __ zero_words($base$$Register, $cnt$$Register);
15040     if (tpc == NULL) {
15041       ciEnv::current()->record_failure("CodeCache is full");
15042       return;
15043     }
15044   %}
15045 
15046   ins_pipe(pipe_class_memory);
15047 %}
15048 
15049 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, iRegL val, Universe dummy, rFlagsReg cr)
15050 %{
15051   predicate(((ClearArrayNode*)n)->word_copy_only());
15052   match(Set dummy (ClearArray (Binary cnt base) val));
15053   effect(USE_KILL cnt, USE_KILL base, KILL cr);
15054 
15055   ins_cost(4 * INSN_COST);
15056   format %{ "ClearArray $cnt, $base, $val" %}
15057 
15058   ins_encode %{
15059     __ fill_words($base$$Register, $cnt$$Register, $val$$Register);
15060   %}
15061 
15062   ins_pipe(pipe_class_memory);
15063 %}
15064 
15065 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 temp, Universe dummy, rFlagsReg cr)
15066 %{
15067   predicate((uint64_t)n->in(2)->get_long()
15068             < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord)
15069             && !((ClearArrayNode*)n)->word_copy_only());
15070   match(Set dummy (ClearArray cnt base));
15071   effect(TEMP temp, USE_KILL base, KILL cr);
15072 
15073   ins_cost(4 * INSN_COST);
15074   format %{ "ClearArray $cnt, $base" %}
15075 
15076   ins_encode %{
15077     __ zero_words($base$$Register, (uint64_t)$cnt$$constant);
15078   %}
15079 
15080   ins_pipe(pipe_class_memory);
15081 %}
15082 
15083 // ============================================================================
15084 // Overflow Math Instructions
15085 
15086 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
15087 %{
15088   match(Set cr (OverflowAddI op1 op2));
15089 

16379 
16380 // Call Runtime Instruction
16381 
16382 instruct CallLeafDirect(method meth)
16383 %{
16384   match(CallLeaf);
16385 
16386   effect(USE meth);
16387 
16388   ins_cost(CALL_COST);
16389 
16390   format %{ "CALL, runtime leaf $meth" %}
16391 
16392   ins_encode( aarch64_enc_java_to_runtime(meth) );
16393 
16394   ins_pipe(pipe_class_call);
16395 %}
16396 
16397 // Call Runtime Instruction
16398 
16399 // entry point is null, target holds the address to call
16400 instruct CallLeafNoFPIndirect(iRegP target)
16401 %{
16402   predicate(n->as_Call()->entry_point() == NULL);
16403 
16404   match(CallLeafNoFP target);
16405 
16406   ins_cost(CALL_COST);
16407 
16408   format %{ "CALL, runtime leaf nofp indirect $target" %}
16409 
16410   ins_encode %{
16411     __ blr($target$$Register);
16412   %}
16413 
16414   ins_pipe(pipe_class_call);
16415 %}
16416 
16417 instruct CallLeafNoFPDirect(method meth)
16418 %{
16419   predicate(n->as_Call()->entry_point() != NULL);
16420 
16421   match(CallLeafNoFP);
16422 
16423   effect(USE meth);
16424 
16425   ins_cost(CALL_COST);
16426 
16427   format %{ "CALL, runtime leaf nofp $meth" %}
16428 
16429   ins_encode( aarch64_enc_java_to_runtime(meth) );
16430 
16431   ins_pipe(pipe_class_call);
16432 %}
16433 
16434 instruct CallNativeDirect(method meth)
16435 %{
16436   match(CallNative);
16437 
16438   effect(USE meth);
16439 
16440   ins_cost(CALL_COST);
< prev index next >