< prev index next >

src/hotspot/cpu/aarch64/aarch64.ad

Print this page

 1753 
 1754 int MachCallDynamicJavaNode::ret_addr_offset()
 1755 {
 1756   return 16; // movz, movk, movk, bl
 1757 }
 1758 
 1759 int MachCallRuntimeNode::ret_addr_offset() {
 1760   // for generated stubs the call will be
 1761   //   bl(addr)
 1762   // or with far branches
 1763   //   bl(trampoline_stub)
 1764   // for real runtime callouts it will be six instructions
 1765   // see aarch64_enc_java_to_runtime
 1766   //   adr(rscratch2, retaddr)
 1767   //   lea(rscratch1, RuntimeAddress(addr)
 1768   //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
 1769   //   blr(rscratch1)
 1770   CodeBlob *cb = CodeCache::find_blob(_entry_point);
 1771   if (cb) {
 1772     return 1 * NativeInstruction::instruction_size;



 1773   } else {
 1774     return 6 * NativeInstruction::instruction_size;
 1775   }
 1776 }
 1777 
 1778 int MachCallNativeNode::ret_addr_offset() {
 1779   // This is implemented using aarch64_enc_java_to_runtime as above.
 1780   CodeBlob *cb = CodeCache::find_blob(_entry_point);
 1781   if (cb) {
 1782     return 1 * NativeInstruction::instruction_size;
 1783   } else {
 1784     return 6 * NativeInstruction::instruction_size;
 1785   }
 1786 }
 1787 
 1788 //=============================================================================
 1789 
 1790 #ifndef PRODUCT
 1791 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1792   st->print("BREAKPOINT");

 1870     st->print("\n\t");
 1871     st->print("ldr  rscratch1, [guard]\n\t");
 1872     st->print("dmb ishld\n\t");
 1873     st->print("ldr  rscratch2, [rthread, #thread_disarmed_offset]\n\t");
 1874     st->print("cmp  rscratch1, rscratch2\n\t");
 1875     st->print("b.eq skip");
 1876     st->print("\n\t");
 1877     st->print("blr #nmethod_entry_barrier_stub\n\t");
 1878     st->print("b skip\n\t");
 1879     st->print("guard: int\n\t");
 1880     st->print("\n\t");
 1881     st->print("skip:\n\t");
 1882   }
 1883 }
 1884 #endif
 1885 
 1886 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1887   Compile* C = ra_->C;
 1888   C2_MacroAssembler _masm(&cbuf);
 1889 
 1890   // n.b. frame size includes space for return pc and rfp
 1891   const int framesize = C->output()->frame_size_in_bytes();
 1892 
 1893   // insert a nop at the start of the prolog so we can patch in a
 1894   // branch if we need to invalidate the method later
 1895   __ nop();
 1896 
 1897   if (C->clinit_barrier_on_entry()) {
 1898     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
 1899 
 1900     Label L_skip_barrier;
 1901 
 1902     __ mov_metadata(rscratch2, C->method()->holder()->constant_encoding());
 1903     __ clinit_barrier(rscratch2, rscratch1, &L_skip_barrier);
 1904     __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
 1905     __ bind(L_skip_barrier);
 1906   }
 1907 
 1908   if (C->max_vector_size() > 0) {
 1909     __ reinitialize_ptrue();
 1910   }
 1911 
 1912   int bangsize = C->output()->bang_size_in_bytes();
 1913   if (C->output()->need_stack_bang(bangsize))
 1914     __ generate_stack_overflow_check(bangsize);
 1915 
 1916   __ build_frame(framesize);
 1917 
 1918   if (C->stub_function() == NULL) {
 1919     BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 1920     bs->nmethod_entry_barrier(&_masm);
 1921   }
 1922 
 1923   if (VerifyStackAtCalls) {
 1924     Unimplemented();
 1925   }
 1926 
 1927   C->output()->set_frame_complete(cbuf.insts_size());
 1928 
 1929   if (C->has_mach_constant_base_node()) {
 1930     // NOTE: We set the table base offset here because users might be
 1931     // emitted before MachConstantBaseNode.
 1932     ConstantTable& constant_table = C->output()->constant_table();
 1933     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1934   }
 1935 }
 1936 
 1937 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 1938 {
 1939   return MachNode::size(ra_); // too many variables; just compute it
 1940                               // the hard way
 1941 }
 1942 
 1943 int MachPrologNode::reloc() const
 1944 {
 1945   return 0;
 1946 }
 1947 
 1948 //=============================================================================
 1949 
 1950 #ifndef PRODUCT
 1951 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1952   Compile* C = ra_->C;
 1953   int framesize = C->output()->frame_slots() << LogBytesPerInt;
 1954 
 1955   st->print("# pop frame %d\n\t",framesize);
 1956 
 1957   if (framesize == 0) {
 1958     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
 1959   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
 1960     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
 1961     st->print("add  sp, sp, #%d\n\t", framesize);
 1962   } else {
 1963     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
 1964     st->print("add  sp, sp, rscratch1\n\t");
 1965     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
 1966   }
 1967 
 1968   if (do_polling() && C->is_method_compilation()) {
 1969     st->print("# test polling word\n\t");
 1970     st->print("ldr  rscratch1, [rthread],#%d\n\t", in_bytes(JavaThread::polling_word_offset()));
 1971     st->print("cmp  sp, rscratch1\n\t");
 1972     st->print("bhi #slow_path");
 1973   }
 1974 }
 1975 #endif
 1976 
 1977 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1978   Compile* C = ra_->C;
 1979   C2_MacroAssembler _masm(&cbuf);
 1980   int framesize = C->output()->frame_slots() << LogBytesPerInt;
 1981 
 1982   __ remove_frame(framesize);
 1983 
 1984   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1985     __ reserved_stack_check();
 1986   }
 1987 
 1988   if (do_polling() && C->is_method_compilation()) {
 1989     Label dummy_label;
 1990     Label* code_stub = &dummy_label;
 1991     if (!C->output()->in_scratch_emit_size()) {
 1992       code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset());
 1993     }
 1994     __ relocate(relocInfo::poll_return_type);
 1995     __ safepoint_poll(*code_stub, true /* at_return */, false /* acquire */, true /* in_nmethod */);
 1996   }
 1997 }
 1998 
 1999 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 2000   // Variable size. Determine dynamically.
 2001   return MachNode::size(ra_);
 2002 }
 2003 
 2004 int MachEpilogNode::reloc() const {
 2005   // Return number of relocatable values contained in this instruction.
 2006   return 1; // 1 for polling page.
 2007 }
 2008 
 2009 const Pipeline * MachEpilogNode::pipeline() const {
 2010   return MachNode::pipeline_class();
 2011 }
 2012 
 2013 //=============================================================================
 2014 
 2015 // Figure out which register class each belongs in: rc_int, rc_float or
 2016 // rc_stack.
 2017 enum RC { rc_bad, rc_int, rc_float, rc_predicate, rc_stack };
 2018 
 2019 static enum RC rc_class(OptoReg::Name reg) {
 2020 
 2021   if (reg == OptoReg::Bad) {
 2022     return rc_bad;
 2023   }

 2289 
 2290   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2291   int reg    = ra_->get_encode(this);
 2292 
 2293   // This add will handle any 24-bit signed offset. 24 bits allows an
 2294   // 8 megabyte stack frame.
 2295   __ add(as_Register(reg), sp, offset);
 2296 }
 2297 
 2298 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
 2299   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
 2300   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2301 
 2302   if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
 2303     return NativeInstruction::instruction_size;
 2304   } else {
 2305     return 2 * NativeInstruction::instruction_size;
 2306   }
 2307 }
 2308 
 2309 //=============================================================================






















 2310 











 2311 #ifndef PRODUCT
 2312 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2313 {
 2314   st->print_cr("# MachUEPNode");
 2315   if (UseCompressedClassPointers) {
 2316     st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2317     if (CompressedKlassPointers::shift() != 0) {
 2318       st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
 2319     }
 2320   } else {
 2321    st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2322   }
 2323   st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
 2324   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
 2325 }
 2326 #endif
 2327 
 2328 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 2329 {
 2330   // This is the unverified entry point.
 2331   C2_MacroAssembler _masm(&cbuf);

 2332 

 2333   __ cmp_klass(j_rarg0, rscratch2, rscratch1);
 2334   Label skip;
 2335   // TODO
 2336   // can we avoid this skip and still use a reloc?
 2337   __ br(Assembler::EQ, skip);
 2338   __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 2339   __ bind(skip);
 2340 }
 2341 
 2342 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 2343 {
 2344   return MachNode::size(ra_);
 2345 }
 2346 
 2347 // REQUIRED EMIT CODE
 2348 
 2349 //=============================================================================
 2350 
 2351 // Emit exception handler code.
 2352 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
 2353 {
 2354   // mov rscratch1 #exception_blob_entry_point
 2355   // br rscratch1
 2356   // Note that the code buffer's insts_mark is always relative to insts.
 2357   // That's why we must use the macroassembler to generate a handler.
 2358   C2_MacroAssembler _masm(&cbuf);
 2359   address base = __ start_a_stub(size_exception_handler());
 2360   if (base == NULL) {
 2361     ciEnv::current()->record_failure("CodeCache is full");
 2362     return 0;  // CodeBuffer::expand failed
 2363   }
 2364   int offset = __ offset();
 2365   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
 2366   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");

 3801   %}
 3802 
 3803   enc_class aarch64_enc_java_dynamic_call(method meth) %{
 3804     C2_MacroAssembler _masm(&cbuf);
 3805     int method_index = resolved_method_index(cbuf);
 3806     address call = __ ic_call((address)$meth$$method, method_index);
 3807     if (call == NULL) {
 3808       ciEnv::current()->record_failure("CodeCache is full");
 3809       return;
 3810     } else if (Compile::current()->max_vector_size() > 0) {
 3811       __ reinitialize_ptrue();
 3812     }
 3813   %}
 3814 
 3815   enc_class aarch64_enc_call_epilog() %{
 3816     C2_MacroAssembler _masm(&cbuf);
 3817     if (VerifyStackAtCalls) {
 3818       // Check that stack depth is unchanged: find majik cookie on stack
 3819       __ call_Unimplemented();
 3820     }











 3821   %}
 3822 
 3823   enc_class aarch64_enc_java_to_runtime(method meth) %{
 3824     C2_MacroAssembler _masm(&cbuf);
 3825 
 3826     // some calls to generated routines (arraycopy code) are scheduled
 3827     // by C2 as runtime calls. if so we can call them using a br (they
 3828     // will be in a reachable segment) otherwise we have to use a blr
 3829     // which loads the absolute address into a register.
 3830     address entry = (address)$meth$$method;
 3831     CodeBlob *cb = CodeCache::find_blob(entry);
 3832     if (cb) {
 3833       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
 3834       if (call == NULL) {
 3835         ciEnv::current()->record_failure("CodeCache is full");
 3836         return;
 3837       }
 3838     } else {
 3839       Label retaddr;
 3840       __ adr(rscratch2, retaddr);

 3892     Label cas_failed;
 3893 
 3894     assert_different_registers(oop, box, tmp, disp_hdr);
 3895 
 3896     // Load markWord from object into displaced_header.
 3897     __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
 3898 
 3899     if (DiagnoseSyncOnValueBasedClasses != 0) {
 3900       __ load_klass(tmp, oop);
 3901       __ ldrw(tmp, Address(tmp, Klass::access_flags_offset()));
 3902       __ tstw(tmp, JVM_ACC_IS_VALUE_BASED_CLASS);
 3903       __ br(Assembler::NE, cont);
 3904     }
 3905 
 3906     // Check for existing monitor
 3907     __ tbnz(disp_hdr, exact_log2(markWord::monitor_value), object_has_monitor);
 3908 
 3909     // Set tmp to be (markWord of object | UNLOCK_VALUE).
 3910     __ orr(tmp, disp_hdr, markWord::unlocked_value);
 3911 





 3912     // Initialize the box. (Must happen before we update the object mark!)
 3913     __ str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
 3914 
 3915     // Compare object markWord with an unlocked value (tmp) and if
 3916     // equal exchange the stack address of our box with object markWord.
 3917     // On failure disp_hdr contains the possibly locked markWord.
 3918     __ cmpxchg(oop, tmp, box, Assembler::xword, /*acquire*/ true,
 3919                /*release*/ true, /*weak*/ false, disp_hdr);
 3920     __ br(Assembler::EQ, cont);
 3921 
 3922     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
 3923 
 3924     // If the compare-and-exchange succeeded, then we found an unlocked
 3925     // object, will have now locked it will continue at label cont
 3926 
 3927     __ bind(cas_failed);
 3928     // We did not see an unlocked object so try the fast recursive case.
 3929 
 3930     // Check if the owner is self by comparing the value in the
 3931     // markWord of object (disp_hdr) with the stack pointer.

 7654 instruct loadConL(iRegLNoSp dst, immL src)
 7655 %{
 7656   match(Set dst src);
 7657 
 7658   ins_cost(INSN_COST);
 7659   format %{ "mov $dst, $src\t# long" %}
 7660 
 7661   ins_encode( aarch64_enc_mov_imm(dst, src) );
 7662 
 7663   ins_pipe(ialu_imm);
 7664 %}
 7665 
 7666 // Load Pointer Constant
 7667 
 7668 instruct loadConP(iRegPNoSp dst, immP con)
 7669 %{
 7670   match(Set dst con);
 7671 
 7672   ins_cost(INSN_COST * 4);
 7673   format %{
 7674     "mov  $dst, $con\t# ptr\n\t"
 7675   %}
 7676 
 7677   ins_encode(aarch64_enc_mov_p(dst, con));
 7678 
 7679   ins_pipe(ialu_imm);
 7680 %}
 7681 
 7682 // Load Null Pointer Constant
 7683 
 7684 instruct loadConP0(iRegPNoSp dst, immP0 con)
 7685 %{
 7686   match(Set dst con);
 7687 
 7688   ins_cost(INSN_COST);
 7689   format %{ "mov  $dst, $con\t# NULL ptr" %}
 7690 
 7691   ins_encode(aarch64_enc_mov_p0(dst, con));
 7692 
 7693   ins_pipe(ialu_imm);
 7694 %}

 8787 %}
 8788 
 8789 // ============================================================================
 8790 // Cast/Convert Instructions
 8791 
 8792 instruct castX2P(iRegPNoSp dst, iRegL src) %{
 8793   match(Set dst (CastX2P src));
 8794 
 8795   ins_cost(INSN_COST);
 8796   format %{ "mov $dst, $src\t# long -> ptr" %}
 8797 
 8798   ins_encode %{
 8799     if ($dst$$reg != $src$$reg) {
 8800       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
 8801     }
 8802   %}
 8803 
 8804   ins_pipe(ialu_reg);
 8805 %}
 8806 















 8807 instruct castP2X(iRegLNoSp dst, iRegP src) %{
 8808   match(Set dst (CastP2X src));
 8809 
 8810   ins_cost(INSN_COST);
 8811   format %{ "mov $dst, $src\t# ptr -> long" %}
 8812 
 8813   ins_encode %{
 8814     if ($dst$$reg != $src$$reg) {
 8815       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
 8816     }
 8817   %}
 8818 
 8819   ins_pipe(ialu_reg);
 8820 %}
 8821 
 8822 // Convert oop into int for vectors alignment masking
 8823 instruct convP2I(iRegINoSp dst, iRegP src) %{
 8824   match(Set dst (ConvL2I (CastP2X src)));
 8825 
 8826   ins_cost(INSN_COST);

15150 
15151   match(Set dst (MoveL2D src));
15152 
15153   effect(DEF dst, USE src);
15154 
15155   ins_cost(INSN_COST);
15156 
15157   format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
15158 
15159   ins_encode %{
15160     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
15161   %}
15162 
15163   ins_pipe(fp_l2d);
15164 
15165 %}
15166 
15167 // ============================================================================
15168 // clearing of an array
15169 
15170 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
15171 %{
15172   match(Set dummy (ClearArray cnt base));
15173   effect(USE_KILL cnt, USE_KILL base, KILL cr);
15174 
15175   ins_cost(4 * INSN_COST);
15176   format %{ "ClearArray $cnt, $base" %}
15177 
15178   ins_encode %{
15179     address tpc = __ zero_words($base$$Register, $cnt$$Register);
15180     if (tpc == NULL) {
15181       ciEnv::current()->record_failure("CodeCache is full");
15182       return;
15183     }
15184   %}
15185 
15186   ins_pipe(pipe_class_memory);
15187 %}
15188 
















15189 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 temp, Universe dummy, rFlagsReg cr)
15190 %{
15191   predicate((uint64_t)n->in(2)->get_long()
15192             < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord));

15193   match(Set dummy (ClearArray cnt base));
15194   effect(TEMP temp, USE_KILL base, KILL cr);
15195 
15196   ins_cost(4 * INSN_COST);
15197   format %{ "ClearArray $cnt, $base" %}
15198 
15199   ins_encode %{
15200     __ zero_words($base$$Register, (uint64_t)$cnt$$constant);
15201   %}
15202 
15203   ins_pipe(pipe_class_memory);
15204 %}
15205 
15206 // ============================================================================
15207 // Overflow Math Instructions
15208 
15209 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
15210 %{
15211   match(Set cr (OverflowAddI op1 op2));
15212 

16502 
16503 // Call Runtime Instruction
16504 
16505 instruct CallLeafDirect(method meth)
16506 %{
16507   match(CallLeaf);
16508 
16509   effect(USE meth);
16510 
16511   ins_cost(CALL_COST);
16512 
16513   format %{ "CALL, runtime leaf $meth" %}
16514 
16515   ins_encode( aarch64_enc_java_to_runtime(meth) );
16516 
16517   ins_pipe(pipe_class_call);
16518 %}
16519 
16520 // Call Runtime Instruction
16521 


















16522 instruct CallLeafNoFPDirect(method meth)
16523 %{


16524   match(CallLeafNoFP);
16525 
16526   effect(USE meth);
16527 
16528   ins_cost(CALL_COST);
16529 
16530   format %{ "CALL, runtime leaf nofp $meth" %}
16531 
16532   ins_encode( aarch64_enc_java_to_runtime(meth) );
16533 
16534   ins_pipe(pipe_class_call);
16535 %}
16536 
16537 instruct CallNativeDirect(method meth)
16538 %{
16539   match(CallNative);
16540 
16541   effect(USE meth);
16542 
16543   ins_cost(CALL_COST);

 1753 
 1754 int MachCallDynamicJavaNode::ret_addr_offset()
 1755 {
 1756   return 16; // movz, movk, movk, bl
 1757 }
 1758 
 1759 int MachCallRuntimeNode::ret_addr_offset() {
 1760   // for generated stubs the call will be
 1761   //   bl(addr)
 1762   // or with far branches
 1763   //   bl(trampoline_stub)
 1764   // for real runtime callouts it will be six instructions
 1765   // see aarch64_enc_java_to_runtime
 1766   //   adr(rscratch2, retaddr)
 1767   //   lea(rscratch1, RuntimeAddress(addr)
 1768   //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
 1769   //   blr(rscratch1)
 1770   CodeBlob *cb = CodeCache::find_blob(_entry_point);
 1771   if (cb) {
 1772     return 1 * NativeInstruction::instruction_size;
 1773   } else if (_entry_point == NULL) {
 1774     // See CallLeafNoFPIndirect
 1775     return 1 * NativeInstruction::instruction_size;
 1776   } else {
 1777     return 6 * NativeInstruction::instruction_size;
 1778   }
 1779 }
 1780 
 1781 int MachCallNativeNode::ret_addr_offset() {
 1782   // This is implemented using aarch64_enc_java_to_runtime as above.
 1783   CodeBlob *cb = CodeCache::find_blob(_entry_point);
 1784   if (cb) {
 1785     return 1 * NativeInstruction::instruction_size;
 1786   } else {
 1787     return 6 * NativeInstruction::instruction_size;
 1788   }
 1789 }
 1790 
 1791 //=============================================================================
 1792 
 1793 #ifndef PRODUCT
 1794 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1795   st->print("BREAKPOINT");

 1873     st->print("\n\t");
 1874     st->print("ldr  rscratch1, [guard]\n\t");
 1875     st->print("dmb ishld\n\t");
 1876     st->print("ldr  rscratch2, [rthread, #thread_disarmed_offset]\n\t");
 1877     st->print("cmp  rscratch1, rscratch2\n\t");
 1878     st->print("b.eq skip");
 1879     st->print("\n\t");
 1880     st->print("blr #nmethod_entry_barrier_stub\n\t");
 1881     st->print("b skip\n\t");
 1882     st->print("guard: int\n\t");
 1883     st->print("\n\t");
 1884     st->print("skip:\n\t");
 1885   }
 1886 }
 1887 #endif
 1888 
 1889 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1890   Compile* C = ra_->C;
 1891   C2_MacroAssembler _masm(&cbuf);
 1892 



 1893   // insert a nop at the start of the prolog so we can patch in a
 1894   // branch if we need to invalidate the method later
 1895   __ nop();
 1896 
 1897   if (C->clinit_barrier_on_entry()) {
 1898     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
 1899 
 1900     Label L_skip_barrier;
 1901 
 1902     __ mov_metadata(rscratch2, C->method()->holder()->constant_encoding());
 1903     __ clinit_barrier(rscratch2, rscratch1, &L_skip_barrier);
 1904     __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
 1905     __ bind(L_skip_barrier);
 1906   }
 1907 
 1908   if (C->max_vector_size() > 0) {
 1909     __ reinitialize_ptrue();
 1910   }
 1911 
 1912   __ verified_entry(C, 0);
 1913   __ bind(*_verified_entry);



 1914 
 1915   if (C->stub_function() == NULL) {
 1916     BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 1917     bs->nmethod_entry_barrier(&_masm);
 1918   }
 1919 
 1920   if (VerifyStackAtCalls) {
 1921     Unimplemented();
 1922   }
 1923 
 1924   C->output()->set_frame_complete(cbuf.insts_size());
 1925 
 1926   if (C->has_mach_constant_base_node()) {
 1927     // NOTE: We set the table base offset here because users might be
 1928     // emitted before MachConstantBaseNode.
 1929     ConstantTable& constant_table = C->output()->constant_table();
 1930     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1931   }
 1932 }
 1933 






 1934 int MachPrologNode::reloc() const
 1935 {
 1936   return 0;
 1937 }
 1938 
 1939 //=============================================================================
 1940 
 1941 #ifndef PRODUCT
 1942 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1943   Compile* C = ra_->C;
 1944   int framesize = C->output()->frame_slots() << LogBytesPerInt;
 1945 
 1946   st->print("# pop frame %d\n\t",framesize);
 1947 
 1948   if (framesize == 0) {
 1949     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
 1950   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
 1951     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
 1952     st->print("add  sp, sp, #%d\n\t", framesize);
 1953   } else {
 1954     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
 1955     st->print("add  sp, sp, rscratch1\n\t");
 1956     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
 1957   }
 1958 
 1959   if (do_polling() && C->is_method_compilation()) {
 1960     st->print("# test polling word\n\t");
 1961     st->print("ldr  rscratch1, [rthread],#%d\n\t", in_bytes(JavaThread::polling_word_offset()));
 1962     st->print("cmp  sp, rscratch1\n\t");
 1963     st->print("bhi #slow_path");
 1964   }
 1965 }
 1966 #endif
 1967 
 1968 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1969   Compile* C = ra_->C;
 1970   C2_MacroAssembler _masm(&cbuf);
 1971   int framesize = C->output()->frame_slots() << LogBytesPerInt;
 1972 
 1973   __ remove_frame(framesize, C->needs_stack_repair());
 1974 
 1975   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1976     __ reserved_stack_check();
 1977   }
 1978 
 1979   if (do_polling() && C->is_method_compilation()) {
 1980     Label dummy_label;
 1981     Label* code_stub = &dummy_label;
 1982     if (!C->output()->in_scratch_emit_size()) {
 1983       code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset());
 1984     }
 1985     __ relocate(relocInfo::poll_return_type);
 1986     __ safepoint_poll(*code_stub, true /* at_return */, false /* acquire */, true /* in_nmethod */);
 1987   }
 1988 }
 1989 





 1990 int MachEpilogNode::reloc() const {
 1991   // Return number of relocatable values contained in this instruction.
 1992   return 1; // 1 for polling page.
 1993 }
 1994 
 1995 const Pipeline * MachEpilogNode::pipeline() const {
 1996   return MachNode::pipeline_class();
 1997 }
 1998 
 1999 //=============================================================================
 2000 
 2001 // Figure out which register class each belongs in: rc_int, rc_float or
 2002 // rc_stack.
 2003 enum RC { rc_bad, rc_int, rc_float, rc_predicate, rc_stack };
 2004 
 2005 static enum RC rc_class(OptoReg::Name reg) {
 2006 
 2007   if (reg == OptoReg::Bad) {
 2008     return rc_bad;
 2009   }

 2275 
 2276   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2277   int reg    = ra_->get_encode(this);
 2278 
 2279   // This add will handle any 24-bit signed offset. 24 bits allows an
 2280   // 8 megabyte stack frame.
 2281   __ add(as_Register(reg), sp, offset);
 2282 }
 2283 
 2284 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
 2285   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
 2286   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2287 
 2288   if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
 2289     return NativeInstruction::instruction_size;
 2290   } else {
 2291     return 2 * NativeInstruction::instruction_size;
 2292   }
 2293 }
 2294 
 2295 ///=============================================================================
 2296 #ifndef PRODUCT
 2297 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2298 {
 2299   st->print_cr("# MachVEPNode");
 2300   if (!_verified) {
 2301     st->print_cr("\t load_class");
 2302   } else {
 2303     st->print_cr("\t unpack_inline_arg");
 2304   }
 2305 }
 2306 #endif
 2307 
 2308 void MachVEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 2309 {
 2310   MacroAssembler _masm(&cbuf);
 2311 
 2312   if (!_verified) {
 2313     Label skip;
 2314     __ cmp_klass(j_rarg0, rscratch2, rscratch1);
 2315     __ br(Assembler::EQ, skip);
 2316       __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 2317     __ bind(skip);
 2318 
 2319   } else {
 2320     // Unpack inline type args passed as oop and then jump to
 2321     // the verified entry point (skipping the unverified entry).
 2322     int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
 2323     // Emit code for verified entry and save increment for stack repair on return
 2324     __ verified_entry(ra_->C, sp_inc);
 2325     __ b(*_verified_entry);
 2326   }
 2327 }
 2328 
 2329 //=============================================================================
 2330 #ifndef PRODUCT
 2331 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2332 {
 2333   st->print_cr("# MachUEPNode");
 2334   if (UseCompressedClassPointers) {
 2335     st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2336     if (CompressedKlassPointers::shift() != 0) {
 2337       st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
 2338     }
 2339   } else {
 2340    st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2341   }
 2342   st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
 2343   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
 2344 }
 2345 #endif
 2346 
 2347 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 2348 {
 2349   // This is the unverified entry point.
 2350   C2_MacroAssembler _masm(&cbuf);
 2351   Label skip;
 2352 
 2353   // UseCompressedClassPointers logic are inside cmp_klass
 2354   __ cmp_klass(j_rarg0, rscratch2, rscratch1);
 2355 
 2356   // TODO
 2357   // can we avoid this skip and still use a reloc?
 2358   __ br(Assembler::EQ, skip);
 2359   __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 2360   __ bind(skip);
 2361 }
 2362 





 2363 // REQUIRED EMIT CODE
 2364 
 2365 //=============================================================================
 2366 
 2367 // Emit exception handler code.
 2368 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
 2369 {
 2370   // mov rscratch1 #exception_blob_entry_point
 2371   // br rscratch1
 2372   // Note that the code buffer's insts_mark is always relative to insts.
 2373   // That's why we must use the macroassembler to generate a handler.
 2374   C2_MacroAssembler _masm(&cbuf);
 2375   address base = __ start_a_stub(size_exception_handler());
 2376   if (base == NULL) {
 2377     ciEnv::current()->record_failure("CodeCache is full");
 2378     return 0;  // CodeBuffer::expand failed
 2379   }
 2380   int offset = __ offset();
 2381   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
 2382   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");

 3817   %}
 3818 
 3819   enc_class aarch64_enc_java_dynamic_call(method meth) %{
 3820     C2_MacroAssembler _masm(&cbuf);
 3821     int method_index = resolved_method_index(cbuf);
 3822     address call = __ ic_call((address)$meth$$method, method_index);
 3823     if (call == NULL) {
 3824       ciEnv::current()->record_failure("CodeCache is full");
 3825       return;
 3826     } else if (Compile::current()->max_vector_size() > 0) {
 3827       __ reinitialize_ptrue();
 3828     }
 3829   %}
 3830 
 3831   enc_class aarch64_enc_call_epilog() %{
 3832     C2_MacroAssembler _masm(&cbuf);
 3833     if (VerifyStackAtCalls) {
 3834       // Check that stack depth is unchanged: find majik cookie on stack
 3835       __ call_Unimplemented();
 3836     }
 3837     if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && return_value_is_used()) {
 3838       // An inline type is returned as fields in multiple registers.
 3839       // R0 either contains an oop if the inline type is buffered or a pointer
 3840       // to the corresponding InlineKlass with the lowest bit set to 1. Zero r0
 3841       // if the lowest bit is set to allow C2 to use the oop after null checking.
 3842       // r0 &= (r0 & 1) - 1
 3843       C2_MacroAssembler _masm(&cbuf);
 3844       __ andr(rscratch1, r0, 0x1);
 3845       __ sub(rscratch1, rscratch1, 0x1);
 3846       __ andr(r0, r0, rscratch1);
 3847     }
 3848   %}
 3849 
 3850   enc_class aarch64_enc_java_to_runtime(method meth) %{
 3851     C2_MacroAssembler _masm(&cbuf);
 3852 
 3853     // some calls to generated routines (arraycopy code) are scheduled
 3854     // by C2 as runtime calls. if so we can call them using a br (they
 3855     // will be in a reachable segment) otherwise we have to use a blr
 3856     // which loads the absolute address into a register.
 3857     address entry = (address)$meth$$method;
 3858     CodeBlob *cb = CodeCache::find_blob(entry);
 3859     if (cb) {
 3860       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
 3861       if (call == NULL) {
 3862         ciEnv::current()->record_failure("CodeCache is full");
 3863         return;
 3864       }
 3865     } else {
 3866       Label retaddr;
 3867       __ adr(rscratch2, retaddr);

 3919     Label cas_failed;
 3920 
 3921     assert_different_registers(oop, box, tmp, disp_hdr);
 3922 
 3923     // Load markWord from object into displaced_header.
 3924     __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
 3925 
 3926     if (DiagnoseSyncOnValueBasedClasses != 0) {
 3927       __ load_klass(tmp, oop);
 3928       __ ldrw(tmp, Address(tmp, Klass::access_flags_offset()));
 3929       __ tstw(tmp, JVM_ACC_IS_VALUE_BASED_CLASS);
 3930       __ br(Assembler::NE, cont);
 3931     }
 3932 
 3933     // Check for existing monitor
 3934     __ tbnz(disp_hdr, exact_log2(markWord::monitor_value), object_has_monitor);
 3935 
 3936     // Set tmp to be (markWord of object | UNLOCK_VALUE).
 3937     __ orr(tmp, disp_hdr, markWord::unlocked_value);
 3938 
 3939     if (EnableValhalla) {
 3940       // Mask inline_type bit such that we go to the slow path if object is an inline type
 3941       __ andr(tmp, tmp, ~((int) markWord::inline_type_bit_in_place));
 3942     }
 3943 
 3944     // Initialize the box. (Must happen before we update the object mark!)
 3945     __ str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
 3946 
 3947     // Compare object markWord with an unlocked value (tmp) and if
 3948     // equal exchange the stack address of our box with object markWord.
 3949     // On failure disp_hdr contains the possibly locked markWord.
 3950     __ cmpxchg(oop, tmp, box, Assembler::xword, /*acquire*/ true,
 3951                /*release*/ true, /*weak*/ false, disp_hdr);
 3952     __ br(Assembler::EQ, cont);
 3953 
 3954     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
 3955 
 3956     // If the compare-and-exchange succeeded, then we found an unlocked
 3957     // object, will have now locked it will continue at label cont
 3958 
 3959     __ bind(cas_failed);
 3960     // We did not see an unlocked object so try the fast recursive case.
 3961 
 3962     // Check if the owner is self by comparing the value in the
 3963     // markWord of object (disp_hdr) with the stack pointer.

 7686 instruct loadConL(iRegLNoSp dst, immL src)
 7687 %{
 7688   match(Set dst src);
 7689 
 7690   ins_cost(INSN_COST);
 7691   format %{ "mov $dst, $src\t# long" %}
 7692 
 7693   ins_encode( aarch64_enc_mov_imm(dst, src) );
 7694 
 7695   ins_pipe(ialu_imm);
 7696 %}
 7697 
 7698 // Load Pointer Constant
 7699 
 7700 instruct loadConP(iRegPNoSp dst, immP con)
 7701 %{
 7702   match(Set dst con);
 7703 
 7704   ins_cost(INSN_COST * 4);
 7705   format %{
 7706     "mov  $dst, $con\t# ptr"
 7707   %}
 7708 
 7709   ins_encode(aarch64_enc_mov_p(dst, con));
 7710 
 7711   ins_pipe(ialu_imm);
 7712 %}
 7713 
 7714 // Load Null Pointer Constant
 7715 
 7716 instruct loadConP0(iRegPNoSp dst, immP0 con)
 7717 %{
 7718   match(Set dst con);
 7719 
 7720   ins_cost(INSN_COST);
 7721   format %{ "mov  $dst, $con\t# NULL ptr" %}
 7722 
 7723   ins_encode(aarch64_enc_mov_p0(dst, con));
 7724 
 7725   ins_pipe(ialu_imm);
 7726 %}

 8819 %}
 8820 
 8821 // ============================================================================
 8822 // Cast/Convert Instructions
 8823 
 8824 instruct castX2P(iRegPNoSp dst, iRegL src) %{
 8825   match(Set dst (CastX2P src));
 8826 
 8827   ins_cost(INSN_COST);
 8828   format %{ "mov $dst, $src\t# long -> ptr" %}
 8829 
 8830   ins_encode %{
 8831     if ($dst$$reg != $src$$reg) {
 8832       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
 8833     }
 8834   %}
 8835 
 8836   ins_pipe(ialu_reg);
 8837 %}
 8838 
 8839 instruct castN2X(iRegLNoSp dst, iRegN src) %{
 8840   match(Set dst (CastP2X src));
 8841 
 8842   ins_cost(INSN_COST);
 8843   format %{ "mov $dst, $src\t# ptr -> long" %}
 8844 
 8845   ins_encode %{
 8846     if ($dst$$reg != $src$$reg) {
 8847       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
 8848     }
 8849   %}
 8850 
 8851   ins_pipe(ialu_reg);
 8852 %}
 8853 
 8854 instruct castP2X(iRegLNoSp dst, iRegP src) %{
 8855   match(Set dst (CastP2X src));
 8856 
 8857   ins_cost(INSN_COST);
 8858   format %{ "mov $dst, $src\t# ptr -> long" %}
 8859 
 8860   ins_encode %{
 8861     if ($dst$$reg != $src$$reg) {
 8862       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
 8863     }
 8864   %}
 8865 
 8866   ins_pipe(ialu_reg);
 8867 %}
 8868 
 8869 // Convert oop into int for vectors alignment masking
 8870 instruct convP2I(iRegINoSp dst, iRegP src) %{
 8871   match(Set dst (ConvL2I (CastP2X src)));
 8872 
 8873   ins_cost(INSN_COST);

15197 
15198   match(Set dst (MoveL2D src));
15199 
15200   effect(DEF dst, USE src);
15201 
15202   ins_cost(INSN_COST);
15203 
15204   format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
15205 
15206   ins_encode %{
15207     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
15208   %}
15209 
15210   ins_pipe(fp_l2d);
15211 
15212 %}
15213 
15214 // ============================================================================
15215 // clearing of an array
15216 
15217 instruct clearArray_reg_reg_immL0(iRegL_R11 cnt, iRegP_R10 base, immL0 zero, Universe dummy, rFlagsReg cr)
15218 %{
15219   match(Set dummy (ClearArray (Binary cnt base) zero));
15220   effect(USE_KILL cnt, USE_KILL base, KILL cr);
15221 
15222   ins_cost(4 * INSN_COST);
15223   format %{ "ClearArray $cnt, $base" %}
15224 
15225   ins_encode %{
15226     address tpc = __ zero_words($base$$Register, $cnt$$Register);
15227     if (tpc == NULL) {
15228       ciEnv::current()->record_failure("CodeCache is full");
15229       return;
15230     }
15231   %}
15232 
15233   ins_pipe(pipe_class_memory);
15234 %}
15235 
15236 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, iRegL val, Universe dummy, rFlagsReg cr)
15237 %{
15238   predicate(((ClearArrayNode*)n)->word_copy_only());
15239   match(Set dummy (ClearArray (Binary cnt base) val));
15240   effect(USE_KILL cnt, USE_KILL base, KILL cr);
15241 
15242   ins_cost(4 * INSN_COST);
15243   format %{ "ClearArray $cnt, $base, $val" %}
15244 
15245   ins_encode %{
15246     __ fill_words($base$$Register, $cnt$$Register, $val$$Register);
15247   %}
15248 
15249   ins_pipe(pipe_class_memory);
15250 %}
15251 
15252 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 temp, Universe dummy, rFlagsReg cr)
15253 %{
15254   predicate((uint64_t)n->in(2)->get_long()
15255             < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord)
15256             && !((ClearArrayNode*)n)->word_copy_only());
15257   match(Set dummy (ClearArray cnt base));
15258   effect(TEMP temp, USE_KILL base, KILL cr);
15259 
15260   ins_cost(4 * INSN_COST);
15261   format %{ "ClearArray $cnt, $base" %}
15262 
15263   ins_encode %{
15264     __ zero_words($base$$Register, (uint64_t)$cnt$$constant);
15265   %}
15266 
15267   ins_pipe(pipe_class_memory);
15268 %}
15269 
15270 // ============================================================================
15271 // Overflow Math Instructions
15272 
15273 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
15274 %{
15275   match(Set cr (OverflowAddI op1 op2));
15276 

16566 
16567 // Call Runtime Instruction
16568 
16569 instruct CallLeafDirect(method meth)
16570 %{
16571   match(CallLeaf);
16572 
16573   effect(USE meth);
16574 
16575   ins_cost(CALL_COST);
16576 
16577   format %{ "CALL, runtime leaf $meth" %}
16578 
16579   ins_encode( aarch64_enc_java_to_runtime(meth) );
16580 
16581   ins_pipe(pipe_class_call);
16582 %}
16583 
16584 // Call Runtime Instruction
16585 
16586 // entry point is null, target holds the address to call
16587 instruct CallLeafNoFPIndirect(iRegP target)
16588 %{
16589   predicate(n->as_Call()->entry_point() == NULL);
16590 
16591   match(CallLeafNoFP target);
16592 
16593   ins_cost(CALL_COST);
16594 
16595   format %{ "CALL, runtime leaf nofp indirect $target" %}
16596 
16597   ins_encode %{
16598     __ blr($target$$Register);
16599   %}
16600 
16601   ins_pipe(pipe_class_call);
16602 %}
16603 
16604 instruct CallLeafNoFPDirect(method meth)
16605 %{
16606   predicate(n->as_Call()->entry_point() != NULL);
16607 
16608   match(CallLeafNoFP);
16609 
16610   effect(USE meth);
16611 
16612   ins_cost(CALL_COST);
16613 
16614   format %{ "CALL, runtime leaf nofp $meth" %}
16615 
16616   ins_encode( aarch64_enc_java_to_runtime(meth) );
16617 
16618   ins_pipe(pipe_class_call);
16619 %}
16620 
16621 instruct CallNativeDirect(method meth)
16622 %{
16623   match(CallNative);
16624 
16625   effect(USE meth);
16626 
16627   ins_cost(CALL_COST);
< prev index next >