< prev index next >

src/hotspot/cpu/aarch64/aarch64.ad

Print this page

 1759 
 1760 int MachCallDynamicJavaNode::ret_addr_offset()
 1761 {
 1762   return 16; // movz, movk, movk, bl
 1763 }
 1764 
 1765 int MachCallRuntimeNode::ret_addr_offset() {
 1766   // for generated stubs the call will be
 1767   //   bl(addr)
 1768   // or with far branches
 1769   //   bl(trampoline_stub)
 1770   // for real runtime callouts it will be six instructions
 1771   // see aarch64_enc_java_to_runtime
 1772   //   adr(rscratch2, retaddr)
 1773   //   lea(rscratch1, RuntimeAddress(addr)
 1774   //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
 1775   //   blr(rscratch1)
 1776   CodeBlob *cb = CodeCache::find_blob(_entry_point);
 1777   if (cb) {
 1778     return 1 * NativeInstruction::instruction_size;



 1779   } else {
 1780     return 6 * NativeInstruction::instruction_size;
 1781   }
 1782 }
 1783 
 1784 int MachCallNativeNode::ret_addr_offset() {
 1785   // This is implemented using aarch64_enc_java_to_runtime as above.
 1786   CodeBlob *cb = CodeCache::find_blob(_entry_point);
 1787   if (cb) {
 1788     return 1 * NativeInstruction::instruction_size;
 1789   } else {
 1790     return 6 * NativeInstruction::instruction_size;
 1791   }
 1792 }
 1793 
 1794 //=============================================================================
 1795 
 1796 #ifndef PRODUCT
 1797 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1798   st->print("BREAKPOINT");

 1880     st->print("\n\t");
 1881     st->print("ldr  rscratch1, [guard]\n\t");
 1882     st->print("dmb ishld\n\t");
 1883     st->print("ldr  rscratch2, [rthread, #thread_disarmed_offset]\n\t");
 1884     st->print("cmp  rscratch1, rscratch2\n\t");
 1885     st->print("b.eq skip");
 1886     st->print("\n\t");
 1887     st->print("blr #nmethod_entry_barrier_stub\n\t");
 1888     st->print("b skip\n\t");
 1889     st->print("guard: int\n\t");
 1890     st->print("\n\t");
 1891     st->print("skip:\n\t");
 1892   }
 1893 }
 1894 #endif
 1895 
 1896 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1897   Compile* C = ra_->C;
 1898   C2_MacroAssembler _masm(&cbuf);
 1899 
 1900   // n.b. frame size includes space for return pc and rfp
 1901   const int framesize = C->output()->frame_size_in_bytes();
 1902 
 1903   // insert a nop at the start of the prolog so we can patch in a
 1904   // branch if we need to invalidate the method later
 1905   __ nop();
 1906 
 1907   if (C->clinit_barrier_on_entry()) {
 1908     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
 1909 
 1910     Label L_skip_barrier;
 1911 
 1912     __ mov_metadata(rscratch2, C->method()->holder()->constant_encoding());
 1913     __ clinit_barrier(rscratch2, rscratch1, &L_skip_barrier);
 1914     __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
 1915     __ bind(L_skip_barrier);
 1916   }
 1917 
 1918   if (C->max_vector_size() > 0) {
 1919     __ reinitialize_ptrue();
 1920   }
 1921 
 1922   int bangsize = C->output()->bang_size_in_bytes();
 1923   if (C->output()->need_stack_bang(bangsize))
 1924     __ generate_stack_overflow_check(bangsize);
 1925 
 1926   __ build_frame(framesize);
 1927 
 1928   if (C->stub_function() == NULL) {
 1929     BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 1930     bs->nmethod_entry_barrier(&_masm);
 1931   }
 1932 
 1933   if (VerifyStackAtCalls) {
 1934     Unimplemented();
 1935   }
 1936 
 1937   C->output()->set_frame_complete(cbuf.insts_size());
 1938 
 1939   if (C->has_mach_constant_base_node()) {
 1940     // NOTE: We set the table base offset here because users might be
 1941     // emitted before MachConstantBaseNode.
 1942     ConstantTable& constant_table = C->output()->constant_table();
 1943     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1944   }
 1945 }
 1946 
 1947 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 1948 {
 1949   return MachNode::size(ra_); // too many variables; just compute it
 1950                               // the hard way
 1951 }
 1952 
 1953 int MachPrologNode::reloc() const
 1954 {
 1955   return 0;
 1956 }
 1957 
 1958 //=============================================================================
 1959 
 1960 #ifndef PRODUCT
 1961 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1962   Compile* C = ra_->C;
 1963   int framesize = C->output()->frame_slots() << LogBytesPerInt;
 1964 
 1965   st->print("# pop frame %d\n\t",framesize);
 1966 
 1967   if (framesize == 0) {
 1968     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
 1969   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
 1970     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
 1971     st->print("add  sp, sp, #%d\n\t", framesize);
 1972   } else {

 1976   }
 1977   if (VM_Version::use_rop_protection()) {
 1978     st->print("autia lr, rfp\n\t");
 1979     st->print("ldr zr, [lr]\n\t");
 1980   }
 1981 
 1982   if (do_polling() && C->is_method_compilation()) {
 1983     st->print("# test polling word\n\t");
 1984     st->print("ldr  rscratch1, [rthread],#%d\n\t", in_bytes(JavaThread::polling_word_offset()));
 1985     st->print("cmp  sp, rscratch1\n\t");
 1986     st->print("bhi #slow_path");
 1987   }
 1988 }
 1989 #endif
 1990 
 1991 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1992   Compile* C = ra_->C;
 1993   C2_MacroAssembler _masm(&cbuf);
 1994   int framesize = C->output()->frame_slots() << LogBytesPerInt;
 1995 
 1996   __ remove_frame(framesize);
 1997 
 1998   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1999     __ reserved_stack_check();
 2000   }
 2001 
 2002   if (do_polling() && C->is_method_compilation()) {
 2003     Label dummy_label;
 2004     Label* code_stub = &dummy_label;
 2005     if (!C->output()->in_scratch_emit_size()) {
 2006       code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset());
 2007     }
 2008     __ relocate(relocInfo::poll_return_type);
 2009     __ safepoint_poll(*code_stub, true /* at_return */, false /* acquire */, true /* in_nmethod */);
 2010   }
 2011 }
 2012 
 2013 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 2014   // Variable size. Determine dynamically.
 2015   return MachNode::size(ra_);
 2016 }
 2017 
 2018 int MachEpilogNode::reloc() const {
 2019   // Return number of relocatable values contained in this instruction.
 2020   return 1; // 1 for polling page.
 2021 }
 2022 
 2023 const Pipeline * MachEpilogNode::pipeline() const {
 2024   return MachNode::pipeline_class();
 2025 }
 2026 
 2027 //=============================================================================
 2028 
 2029 // Figure out which register class each belongs in: rc_int, rc_float or
 2030 // rc_stack.
 2031 enum RC { rc_bad, rc_int, rc_float, rc_predicate, rc_stack };
 2032 
 2033 static enum RC rc_class(OptoReg::Name reg) {
 2034 
 2035   if (reg == OptoReg::Bad) {
 2036     return rc_bad;
 2037   }

 2303 
 2304   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2305   int reg    = ra_->get_encode(this);
 2306 
 2307   // This add will handle any 24-bit signed offset. 24 bits allows an
 2308   // 8 megabyte stack frame.
 2309   __ add(as_Register(reg), sp, offset);
 2310 }
 2311 
 2312 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
 2313   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
 2314   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2315 
 2316   if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
 2317     return NativeInstruction::instruction_size;
 2318   } else {
 2319     return 2 * NativeInstruction::instruction_size;
 2320   }
 2321 }
 2322 
 2323 //=============================================================================






















 2324 











 2325 #ifndef PRODUCT
 2326 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2327 {
 2328   st->print_cr("# MachUEPNode");
 2329   if (UseCompressedClassPointers) {
 2330     st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2331     if (CompressedKlassPointers::shift() != 0) {
 2332       st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
 2333     }
 2334   } else {
 2335    st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2336   }
 2337   st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
 2338   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
 2339 }
 2340 #endif
 2341 
 2342 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 2343 {
 2344   // This is the unverified entry point.
 2345   C2_MacroAssembler _masm(&cbuf);

 2346 

 2347   __ cmp_klass(j_rarg0, rscratch2, rscratch1);
 2348   Label skip;
 2349   // TODO
 2350   // can we avoid this skip and still use a reloc?
 2351   __ br(Assembler::EQ, skip);
 2352   __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 2353   __ bind(skip);
 2354 }
 2355 
 2356 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 2357 {
 2358   return MachNode::size(ra_);
 2359 }
 2360 
 2361 // REQUIRED EMIT CODE
 2362 
 2363 //=============================================================================
 2364 
 2365 // Emit exception handler code.
 2366 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
 2367 {
 2368   // mov rscratch1 #exception_blob_entry_point
 2369   // br rscratch1
 2370   // Note that the code buffer's insts_mark is always relative to insts.
 2371   // That's why we must use the macroassembler to generate a handler.
 2372   C2_MacroAssembler _masm(&cbuf);
 2373   address base = __ start_a_stub(size_exception_handler());
 2374   if (base == NULL) {
 2375     ciEnv::current()->record_failure("CodeCache is full");
 2376     return 0;  // CodeBuffer::expand failed
 2377   }
 2378   int offset = __ offset();
 2379   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
 2380   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");

 3831   %}
 3832 
 3833   enc_class aarch64_enc_java_dynamic_call(method meth) %{
 3834     C2_MacroAssembler _masm(&cbuf);
 3835     int method_index = resolved_method_index(cbuf);
 3836     address call = __ ic_call((address)$meth$$method, method_index);
 3837     if (call == NULL) {
 3838       ciEnv::current()->record_failure("CodeCache is full");
 3839       return;
 3840     } else if (Compile::current()->max_vector_size() > 0) {
 3841       __ reinitialize_ptrue();
 3842     }
 3843   %}
 3844 
 3845   enc_class aarch64_enc_call_epilog() %{
 3846     C2_MacroAssembler _masm(&cbuf);
 3847     if (VerifyStackAtCalls) {
 3848       // Check that stack depth is unchanged: find majik cookie on stack
 3849       __ call_Unimplemented();
 3850     }

































 3851   %}
 3852 
 3853   enc_class aarch64_enc_java_to_runtime(method meth) %{
 3854     C2_MacroAssembler _masm(&cbuf);
 3855 
 3856     // some calls to generated routines (arraycopy code) are scheduled
 3857     // by C2 as runtime calls. if so we can call them using a br (they
 3858     // will be in a reachable segment) otherwise we have to use a blr
 3859     // which loads the absolute address into a register.
 3860     address entry = (address)$meth$$method;
 3861     CodeBlob *cb = CodeCache::find_blob(entry);
 3862     if (cb) {
 3863       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
 3864       if (call == NULL) {
 3865         ciEnv::current()->record_failure("CodeCache is full");
 3866         return;
 3867       }
 3868     } else {
 3869       Label retaddr;
 3870       __ adr(rscratch2, retaddr);

 3923 
 3924     assert_different_registers(oop, box, tmp, disp_hdr);
 3925 
 3926     // Load markWord from object into displaced_header.
 3927     __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
 3928 
 3929     if (DiagnoseSyncOnValueBasedClasses != 0) {
 3930       __ load_klass(tmp, oop);
 3931       __ ldrw(tmp, Address(tmp, Klass::access_flags_offset()));
 3932       __ tstw(tmp, JVM_ACC_IS_VALUE_BASED_CLASS);
 3933       __ br(Assembler::NE, cont);
 3934     }
 3935 
 3936     // Check for existing monitor
 3937     __ tbnz(disp_hdr, exact_log2(markWord::monitor_value), object_has_monitor);
 3938 
 3939     if (!UseHeavyMonitors) {
 3940       // Set tmp to be (markWord of object | UNLOCK_VALUE).
 3941       __ orr(tmp, disp_hdr, markWord::unlocked_value);
 3942 





 3943       // Initialize the box. (Must happen before we update the object mark!)
 3944       __ str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
 3945 
 3946       // Compare object markWord with an unlocked value (tmp) and if
 3947       // equal exchange the stack address of our box with object markWord.
 3948       // On failure disp_hdr contains the possibly locked markWord.
 3949       __ cmpxchg(oop, tmp, box, Assembler::xword, /*acquire*/ true,
 3950                  /*release*/ true, /*weak*/ false, disp_hdr);
 3951       __ br(Assembler::EQ, cont);
 3952 
 3953       assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
 3954 
 3955       // If the compare-and-exchange succeeded, then we found an unlocked
 3956       // object, will have now locked it will continue at label cont
 3957 
 3958       __ bind(cas_failed);
 3959       // We did not see an unlocked object so try the fast recursive case.
 3960 
 3961       // Check if the owner is self by comparing the value in the
 3962       // markWord of object (disp_hdr) with the stack pointer.

 7671 instruct loadConL(iRegLNoSp dst, immL src)
 7672 %{
 7673   match(Set dst src);
 7674 
 7675   ins_cost(INSN_COST);
 7676   format %{ "mov $dst, $src\t# long" %}
 7677 
 7678   ins_encode( aarch64_enc_mov_imm(dst, src) );
 7679 
 7680   ins_pipe(ialu_imm);
 7681 %}
 7682 
 7683 // Load Pointer Constant
 7684 
 7685 instruct loadConP(iRegPNoSp dst, immP con)
 7686 %{
 7687   match(Set dst con);
 7688 
 7689   ins_cost(INSN_COST * 4);
 7690   format %{
 7691     "mov  $dst, $con\t# ptr\n\t"
 7692   %}
 7693 
 7694   ins_encode(aarch64_enc_mov_p(dst, con));
 7695 
 7696   ins_pipe(ialu_imm);
 7697 %}
 7698 
 7699 // Load Null Pointer Constant
 7700 
 7701 instruct loadConP0(iRegPNoSp dst, immP0 con)
 7702 %{
 7703   match(Set dst con);
 7704 
 7705   ins_cost(INSN_COST);
 7706   format %{ "mov  $dst, $con\t# NULL ptr" %}
 7707 
 7708   ins_encode(aarch64_enc_mov_p0(dst, con));
 7709 
 7710   ins_pipe(ialu_imm);
 7711 %}

 8876 %}
 8877 
 8878 // ============================================================================
 8879 // Cast/Convert Instructions
 8880 
 8881 instruct castX2P(iRegPNoSp dst, iRegL src) %{
 8882   match(Set dst (CastX2P src));
 8883 
 8884   ins_cost(INSN_COST);
 8885   format %{ "mov $dst, $src\t# long -> ptr" %}
 8886 
 8887   ins_encode %{
 8888     if ($dst$$reg != $src$$reg) {
 8889       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
 8890     }
 8891   %}
 8892 
 8893   ins_pipe(ialu_reg);
 8894 %}
 8895 















 8896 instruct castP2X(iRegLNoSp dst, iRegP src) %{
 8897   match(Set dst (CastP2X src));
 8898 
 8899   ins_cost(INSN_COST);
 8900   format %{ "mov $dst, $src\t# ptr -> long" %}
 8901 
 8902   ins_encode %{
 8903     if ($dst$$reg != $src$$reg) {
 8904       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
 8905     }
 8906   %}
 8907 
 8908   ins_pipe(ialu_reg);
 8909 %}
 8910 
 8911 // Convert oop into int for vectors alignment masking
 8912 instruct convP2I(iRegINoSp dst, iRegP src) %{
 8913   match(Set dst (ConvL2I (CastP2X src)));
 8914 
 8915   ins_cost(INSN_COST);

15239 
15240   match(Set dst (MoveL2D src));
15241 
15242   effect(DEF dst, USE src);
15243 
15244   ins_cost(INSN_COST);
15245 
15246   format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
15247 
15248   ins_encode %{
15249     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
15250   %}
15251 
15252   ins_pipe(fp_l2d);
15253 
15254 %}
15255 
15256 // ============================================================================
15257 // clearing of an array
15258 
15259 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
15260 %{
15261   match(Set dummy (ClearArray cnt base));
15262   effect(USE_KILL cnt, USE_KILL base, KILL cr);
15263 
15264   ins_cost(4 * INSN_COST);
15265   format %{ "ClearArray $cnt, $base" %}
15266 
15267   ins_encode %{
15268     address tpc = __ zero_words($base$$Register, $cnt$$Register);
15269     if (tpc == NULL) {
15270       ciEnv::current()->record_failure("CodeCache is full");
15271       return;
15272     }
15273   %}
15274 
15275   ins_pipe(pipe_class_memory);
15276 %}
15277 
















15278 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 temp, Universe dummy, rFlagsReg cr)
15279 %{
15280   predicate((uint64_t)n->in(2)->get_long()
15281             < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord));

15282   match(Set dummy (ClearArray cnt base));
15283   effect(TEMP temp, USE_KILL base, KILL cr);
15284 
15285   ins_cost(4 * INSN_COST);
15286   format %{ "ClearArray $cnt, $base" %}
15287 
15288   ins_encode %{
15289     __ zero_words($base$$Register, (uint64_t)$cnt$$constant);
15290   %}
15291 
15292   ins_pipe(pipe_class_memory);
15293 %}
15294 
15295 // ============================================================================
15296 // Overflow Math Instructions
15297 
15298 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
15299 %{
15300   match(Set cr (OverflowAddI op1 op2));
15301 

16591 
16592 // Call Runtime Instruction
16593 
16594 instruct CallLeafDirect(method meth)
16595 %{
16596   match(CallLeaf);
16597 
16598   effect(USE meth);
16599 
16600   ins_cost(CALL_COST);
16601 
16602   format %{ "CALL, runtime leaf $meth" %}
16603 
16604   ins_encode( aarch64_enc_java_to_runtime(meth) );
16605 
16606   ins_pipe(pipe_class_call);
16607 %}
16608 
16609 // Call Runtime Instruction
16610 


















16611 instruct CallLeafNoFPDirect(method meth)
16612 %{


16613   match(CallLeafNoFP);
16614 
16615   effect(USE meth);
16616 
16617   ins_cost(CALL_COST);
16618 
16619   format %{ "CALL, runtime leaf nofp $meth" %}
16620 
16621   ins_encode( aarch64_enc_java_to_runtime(meth) );
16622 
16623   ins_pipe(pipe_class_call);
16624 %}
16625 
16626 instruct CallNativeDirect(method meth)
16627 %{
16628   match(CallNative);
16629 
16630   effect(USE meth);
16631 
16632   ins_cost(CALL_COST);

 1759 
 1760 int MachCallDynamicJavaNode::ret_addr_offset()
 1761 {
 1762   return 16; // movz, movk, movk, bl
 1763 }
 1764 
 1765 int MachCallRuntimeNode::ret_addr_offset() {
 1766   // for generated stubs the call will be
 1767   //   bl(addr)
 1768   // or with far branches
 1769   //   bl(trampoline_stub)
 1770   // for real runtime callouts it will be six instructions
 1771   // see aarch64_enc_java_to_runtime
 1772   //   adr(rscratch2, retaddr)
 1773   //   lea(rscratch1, RuntimeAddress(addr)
 1774   //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
 1775   //   blr(rscratch1)
 1776   CodeBlob *cb = CodeCache::find_blob(_entry_point);
 1777   if (cb) {
 1778     return 1 * NativeInstruction::instruction_size;
 1779   } else if (_entry_point == NULL) {
 1780     // See CallLeafNoFPIndirect
 1781     return 1 * NativeInstruction::instruction_size;
 1782   } else {
 1783     return 6 * NativeInstruction::instruction_size;
 1784   }
 1785 }
 1786 
 1787 int MachCallNativeNode::ret_addr_offset() {
 1788   // This is implemented using aarch64_enc_java_to_runtime as above.
 1789   CodeBlob *cb = CodeCache::find_blob(_entry_point);
 1790   if (cb) {
 1791     return 1 * NativeInstruction::instruction_size;
 1792   } else {
 1793     return 6 * NativeInstruction::instruction_size;
 1794   }
 1795 }
 1796 
 1797 //=============================================================================
 1798 
 1799 #ifndef PRODUCT
 1800 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1801   st->print("BREAKPOINT");

 1883     st->print("\n\t");
 1884     st->print("ldr  rscratch1, [guard]\n\t");
 1885     st->print("dmb ishld\n\t");
 1886     st->print("ldr  rscratch2, [rthread, #thread_disarmed_offset]\n\t");
 1887     st->print("cmp  rscratch1, rscratch2\n\t");
 1888     st->print("b.eq skip");
 1889     st->print("\n\t");
 1890     st->print("blr #nmethod_entry_barrier_stub\n\t");
 1891     st->print("b skip\n\t");
 1892     st->print("guard: int\n\t");
 1893     st->print("\n\t");
 1894     st->print("skip:\n\t");
 1895   }
 1896 }
 1897 #endif
 1898 
 1899 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1900   Compile* C = ra_->C;
 1901   C2_MacroAssembler _masm(&cbuf);
 1902 



 1903   // insert a nop at the start of the prolog so we can patch in a
 1904   // branch if we need to invalidate the method later
 1905   __ nop();
 1906 
 1907   if (C->clinit_barrier_on_entry()) {
 1908     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
 1909 
 1910     Label L_skip_barrier;
 1911 
 1912     __ mov_metadata(rscratch2, C->method()->holder()->constant_encoding());
 1913     __ clinit_barrier(rscratch2, rscratch1, &L_skip_barrier);
 1914     __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
 1915     __ bind(L_skip_barrier);
 1916   }
 1917 
 1918   if (C->max_vector_size() > 0) {
 1919     __ reinitialize_ptrue();
 1920   }
 1921 
 1922   __ verified_entry(C, 0);
 1923   __ bind(*_verified_entry);



 1924 
 1925   if (C->stub_function() == NULL) {
 1926     BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 1927     bs->nmethod_entry_barrier(&_masm);
 1928   }
 1929 
 1930   if (VerifyStackAtCalls) {
 1931     Unimplemented();
 1932   }
 1933 
 1934   C->output()->set_frame_complete(cbuf.insts_size());
 1935 
 1936   if (C->has_mach_constant_base_node()) {
 1937     // NOTE: We set the table base offset here because users might be
 1938     // emitted before MachConstantBaseNode.
 1939     ConstantTable& constant_table = C->output()->constant_table();
 1940     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1941   }
 1942 }
 1943 






 1944 int MachPrologNode::reloc() const
 1945 {
 1946   return 0;
 1947 }
 1948 
 1949 //=============================================================================
 1950 
 1951 #ifndef PRODUCT
 1952 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1953   Compile* C = ra_->C;
 1954   int framesize = C->output()->frame_slots() << LogBytesPerInt;
 1955 
 1956   st->print("# pop frame %d\n\t",framesize);
 1957 
 1958   if (framesize == 0) {
 1959     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
 1960   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
 1961     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
 1962     st->print("add  sp, sp, #%d\n\t", framesize);
 1963   } else {

 1967   }
 1968   if (VM_Version::use_rop_protection()) {
 1969     st->print("autia lr, rfp\n\t");
 1970     st->print("ldr zr, [lr]\n\t");
 1971   }
 1972 
 1973   if (do_polling() && C->is_method_compilation()) {
 1974     st->print("# test polling word\n\t");
 1975     st->print("ldr  rscratch1, [rthread],#%d\n\t", in_bytes(JavaThread::polling_word_offset()));
 1976     st->print("cmp  sp, rscratch1\n\t");
 1977     st->print("bhi #slow_path");
 1978   }
 1979 }
 1980 #endif
 1981 
 1982 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1983   Compile* C = ra_->C;
 1984   C2_MacroAssembler _masm(&cbuf);
 1985   int framesize = C->output()->frame_slots() << LogBytesPerInt;
 1986 
 1987   __ remove_frame(framesize, C->needs_stack_repair());
 1988 
 1989   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1990     __ reserved_stack_check();
 1991   }
 1992 
 1993   if (do_polling() && C->is_method_compilation()) {
 1994     Label dummy_label;
 1995     Label* code_stub = &dummy_label;
 1996     if (!C->output()->in_scratch_emit_size()) {
 1997       code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset());
 1998     }
 1999     __ relocate(relocInfo::poll_return_type);
 2000     __ safepoint_poll(*code_stub, true /* at_return */, false /* acquire */, true /* in_nmethod */);
 2001   }
 2002 }
 2003 





 2004 int MachEpilogNode::reloc() const {
 2005   // Return number of relocatable values contained in this instruction.
 2006   return 1; // 1 for polling page.
 2007 }
 2008 
 2009 const Pipeline * MachEpilogNode::pipeline() const {
 2010   return MachNode::pipeline_class();
 2011 }
 2012 
 2013 //=============================================================================
 2014 
 2015 // Figure out which register class each belongs in: rc_int, rc_float or
 2016 // rc_stack.
 2017 enum RC { rc_bad, rc_int, rc_float, rc_predicate, rc_stack };
 2018 
 2019 static enum RC rc_class(OptoReg::Name reg) {
 2020 
 2021   if (reg == OptoReg::Bad) {
 2022     return rc_bad;
 2023   }

 2289 
 2290   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2291   int reg    = ra_->get_encode(this);
 2292 
 2293   // This add will handle any 24-bit signed offset. 24 bits allows an
 2294   // 8 megabyte stack frame.
 2295   __ add(as_Register(reg), sp, offset);
 2296 }
 2297 
 2298 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
 2299   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
 2300   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2301 
 2302   if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
 2303     return NativeInstruction::instruction_size;
 2304   } else {
 2305     return 2 * NativeInstruction::instruction_size;
 2306   }
 2307 }
 2308 
 2309 ///=============================================================================
 2310 #ifndef PRODUCT
 2311 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2312 {
 2313   st->print_cr("# MachVEPNode");
 2314   if (!_verified) {
 2315     st->print_cr("\t load_class");
 2316   } else {
 2317     st->print_cr("\t unpack_inline_arg");
 2318   }
 2319 }
 2320 #endif
 2321 
 2322 void MachVEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 2323 {
 2324   MacroAssembler _masm(&cbuf);
 2325 
 2326   if (!_verified) {
 2327     Label skip;
 2328     __ cmp_klass(j_rarg0, rscratch2, rscratch1);
 2329     __ br(Assembler::EQ, skip);
 2330       __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 2331     __ bind(skip);
 2332 
 2333   } else {
 2334     // Unpack inline type args passed as oop and then jump to
 2335     // the verified entry point (skipping the unverified entry).
 2336     int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
 2337     // Emit code for verified entry and save increment for stack repair on return
 2338     __ verified_entry(ra_->C, sp_inc);
 2339     __ b(*_verified_entry);
 2340   }
 2341 }
 2342 
 2343 //=============================================================================
 2344 #ifndef PRODUCT
 2345 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2346 {
 2347   st->print_cr("# MachUEPNode");
 2348   if (UseCompressedClassPointers) {
 2349     st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2350     if (CompressedKlassPointers::shift() != 0) {
 2351       st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
 2352     }
 2353   } else {
 2354    st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2355   }
 2356   st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
 2357   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
 2358 }
 2359 #endif
 2360 
 2361 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 2362 {
 2363   // This is the unverified entry point.
 2364   C2_MacroAssembler _masm(&cbuf);
 2365   Label skip;
 2366 
 2367   // UseCompressedClassPointers logic are inside cmp_klass
 2368   __ cmp_klass(j_rarg0, rscratch2, rscratch1);
 2369 
 2370   // TODO
 2371   // can we avoid this skip and still use a reloc?
 2372   __ br(Assembler::EQ, skip);
 2373   __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 2374   __ bind(skip);
 2375 }
 2376 





 2377 // REQUIRED EMIT CODE
 2378 
 2379 //=============================================================================
 2380 
 2381 // Emit exception handler code.
 2382 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
 2383 {
 2384   // mov rscratch1 #exception_blob_entry_point
 2385   // br rscratch1
 2386   // Note that the code buffer's insts_mark is always relative to insts.
 2387   // That's why we must use the macroassembler to generate a handler.
 2388   C2_MacroAssembler _masm(&cbuf);
 2389   address base = __ start_a_stub(size_exception_handler());
 2390   if (base == NULL) {
 2391     ciEnv::current()->record_failure("CodeCache is full");
 2392     return 0;  // CodeBuffer::expand failed
 2393   }
 2394   int offset = __ offset();
 2395   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
 2396   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");

 3847   %}
 3848 
 3849   enc_class aarch64_enc_java_dynamic_call(method meth) %{
 3850     C2_MacroAssembler _masm(&cbuf);
 3851     int method_index = resolved_method_index(cbuf);
 3852     address call = __ ic_call((address)$meth$$method, method_index);
 3853     if (call == NULL) {
 3854       ciEnv::current()->record_failure("CodeCache is full");
 3855       return;
 3856     } else if (Compile::current()->max_vector_size() > 0) {
 3857       __ reinitialize_ptrue();
 3858     }
 3859   %}
 3860 
 3861   enc_class aarch64_enc_call_epilog() %{
 3862     C2_MacroAssembler _masm(&cbuf);
 3863     if (VerifyStackAtCalls) {
 3864       // Check that stack depth is unchanged: find majik cookie on stack
 3865       __ call_Unimplemented();
 3866     }
 3867     if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic()) {
 3868       if (!_method->signature()->returns_null_free_inline_type()) {
 3869         // The last return value is not set by the callee but used to pass IsInit information to compiled code.
 3870         // Search for the corresponding projection, get the register and emit code that initialized it.
 3871         uint con = (tf()->range_cc()->cnt() - 1);
 3872         for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
 3873           ProjNode* proj = fast_out(i)->as_Proj();
 3874           if (proj->_con == con) {
 3875             // Set IsInit if r0 is non-null (a non-null value is returned buffered or scalarized)
 3876             OptoReg::Name optoReg = ra_->get_reg_first(proj);
 3877             VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
 3878             Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
 3879             __ cmp(r0, zr);
 3880             __ cset(toReg, Assembler::NE);
 3881             if (reg->is_stack()) {
 3882               int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
 3883               __ str(toReg, Address(sp, st_off));
 3884             }
 3885             break;
 3886           }
 3887         }
 3888       }
 3889       if (return_value_is_used()) {
 3890         // An inline type is returned as fields in multiple registers.
 3891         // R0 either contains an oop if the inline type is buffered or a pointer
 3892         // to the corresponding InlineKlass with the lowest bit set to 1. Zero r0
 3893         // if the lowest bit is set to allow C2 to use the oop after null checking.
 3894         // r0 &= (r0 & 1) - 1
 3895         __ andr(rscratch1, r0, 0x1);
 3896         __ sub(rscratch1, rscratch1, 0x1);
 3897         __ andr(r0, r0, rscratch1);
 3898       }
 3899     }
 3900   %}
 3901 
 3902   enc_class aarch64_enc_java_to_runtime(method meth) %{
 3903     C2_MacroAssembler _masm(&cbuf);
 3904 
 3905     // some calls to generated routines (arraycopy code) are scheduled
 3906     // by C2 as runtime calls. if so we can call them using a br (they
 3907     // will be in a reachable segment) otherwise we have to use a blr
 3908     // which loads the absolute address into a register.
 3909     address entry = (address)$meth$$method;
 3910     CodeBlob *cb = CodeCache::find_blob(entry);
 3911     if (cb) {
 3912       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
 3913       if (call == NULL) {
 3914         ciEnv::current()->record_failure("CodeCache is full");
 3915         return;
 3916       }
 3917     } else {
 3918       Label retaddr;
 3919       __ adr(rscratch2, retaddr);

 3972 
 3973     assert_different_registers(oop, box, tmp, disp_hdr);
 3974 
 3975     // Load markWord from object into displaced_header.
 3976     __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
 3977 
 3978     if (DiagnoseSyncOnValueBasedClasses != 0) {
 3979       __ load_klass(tmp, oop);
 3980       __ ldrw(tmp, Address(tmp, Klass::access_flags_offset()));
 3981       __ tstw(tmp, JVM_ACC_IS_VALUE_BASED_CLASS);
 3982       __ br(Assembler::NE, cont);
 3983     }
 3984 
 3985     // Check for existing monitor
 3986     __ tbnz(disp_hdr, exact_log2(markWord::monitor_value), object_has_monitor);
 3987 
 3988     if (!UseHeavyMonitors) {
 3989       // Set tmp to be (markWord of object | UNLOCK_VALUE).
 3990       __ orr(tmp, disp_hdr, markWord::unlocked_value);
 3991 
 3992       if (EnableValhalla) {
 3993         // Mask inline_type bit such that we go to the slow path if object is an inline type
 3994         __ andr(tmp, tmp, ~((int) markWord::inline_type_bit_in_place));
 3995       }
 3996 
 3997       // Initialize the box. (Must happen before we update the object mark!)
 3998       __ str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
 3999 
 4000       // Compare object markWord with an unlocked value (tmp) and if
 4001       // equal exchange the stack address of our box with object markWord.
 4002       // On failure disp_hdr contains the possibly locked markWord.
 4003       __ cmpxchg(oop, tmp, box, Assembler::xword, /*acquire*/ true,
 4004                  /*release*/ true, /*weak*/ false, disp_hdr);
 4005       __ br(Assembler::EQ, cont);
 4006 
 4007       assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
 4008 
 4009       // If the compare-and-exchange succeeded, then we found an unlocked
 4010       // object, will have now locked it will continue at label cont
 4011 
 4012       __ bind(cas_failed);
 4013       // We did not see an unlocked object so try the fast recursive case.
 4014 
 4015       // Check if the owner is self by comparing the value in the
 4016       // markWord of object (disp_hdr) with the stack pointer.

 7725 instruct loadConL(iRegLNoSp dst, immL src)
 7726 %{
 7727   match(Set dst src);
 7728 
 7729   ins_cost(INSN_COST);
 7730   format %{ "mov $dst, $src\t# long" %}
 7731 
 7732   ins_encode( aarch64_enc_mov_imm(dst, src) );
 7733 
 7734   ins_pipe(ialu_imm);
 7735 %}
 7736 
 7737 // Load Pointer Constant
 7738 
 7739 instruct loadConP(iRegPNoSp dst, immP con)
 7740 %{
 7741   match(Set dst con);
 7742 
 7743   ins_cost(INSN_COST * 4);
 7744   format %{
 7745     "mov  $dst, $con\t# ptr"
 7746   %}
 7747 
 7748   ins_encode(aarch64_enc_mov_p(dst, con));
 7749 
 7750   ins_pipe(ialu_imm);
 7751 %}
 7752 
 7753 // Load Null Pointer Constant
 7754 
 7755 instruct loadConP0(iRegPNoSp dst, immP0 con)
 7756 %{
 7757   match(Set dst con);
 7758 
 7759   ins_cost(INSN_COST);
 7760   format %{ "mov  $dst, $con\t# NULL ptr" %}
 7761 
 7762   ins_encode(aarch64_enc_mov_p0(dst, con));
 7763 
 7764   ins_pipe(ialu_imm);
 7765 %}

 8930 %}
 8931 
 8932 // ============================================================================
 8933 // Cast/Convert Instructions
 8934 
 8935 instruct castX2P(iRegPNoSp dst, iRegL src) %{
 8936   match(Set dst (CastX2P src));
 8937 
 8938   ins_cost(INSN_COST);
 8939   format %{ "mov $dst, $src\t# long -> ptr" %}
 8940 
 8941   ins_encode %{
 8942     if ($dst$$reg != $src$$reg) {
 8943       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
 8944     }
 8945   %}
 8946 
 8947   ins_pipe(ialu_reg);
 8948 %}
 8949 
 8950 instruct castN2X(iRegLNoSp dst, iRegN src) %{
 8951   match(Set dst (CastP2X src));
 8952 
 8953   ins_cost(INSN_COST);
 8954   format %{ "mov $dst, $src\t# ptr -> long" %}
 8955 
 8956   ins_encode %{
 8957     if ($dst$$reg != $src$$reg) {
 8958       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
 8959     }
 8960   %}
 8961 
 8962   ins_pipe(ialu_reg);
 8963 %}
 8964 
 8965 instruct castP2X(iRegLNoSp dst, iRegP src) %{
 8966   match(Set dst (CastP2X src));
 8967 
 8968   ins_cost(INSN_COST);
 8969   format %{ "mov $dst, $src\t# ptr -> long" %}
 8970 
 8971   ins_encode %{
 8972     if ($dst$$reg != $src$$reg) {
 8973       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
 8974     }
 8975   %}
 8976 
 8977   ins_pipe(ialu_reg);
 8978 %}
 8979 
 8980 // Convert oop into int for vectors alignment masking
 8981 instruct convP2I(iRegINoSp dst, iRegP src) %{
 8982   match(Set dst (ConvL2I (CastP2X src)));
 8983 
 8984   ins_cost(INSN_COST);

15308 
15309   match(Set dst (MoveL2D src));
15310 
15311   effect(DEF dst, USE src);
15312 
15313   ins_cost(INSN_COST);
15314 
15315   format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
15316 
15317   ins_encode %{
15318     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
15319   %}
15320 
15321   ins_pipe(fp_l2d);
15322 
15323 %}
15324 
15325 // ============================================================================
15326 // clearing of an array
15327 
15328 instruct clearArray_reg_reg_immL0(iRegL_R11 cnt, iRegP_R10 base, immL0 zero, Universe dummy, rFlagsReg cr)
15329 %{
15330   match(Set dummy (ClearArray (Binary cnt base) zero));
15331   effect(USE_KILL cnt, USE_KILL base, KILL cr);
15332 
15333   ins_cost(4 * INSN_COST);
15334   format %{ "ClearArray $cnt, $base" %}
15335 
15336   ins_encode %{
15337     address tpc = __ zero_words($base$$Register, $cnt$$Register);
15338     if (tpc == NULL) {
15339       ciEnv::current()->record_failure("CodeCache is full");
15340       return;
15341     }
15342   %}
15343 
15344   ins_pipe(pipe_class_memory);
15345 %}
15346 
15347 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, iRegL val, Universe dummy, rFlagsReg cr)
15348 %{
15349   predicate(((ClearArrayNode*)n)->word_copy_only());
15350   match(Set dummy (ClearArray (Binary cnt base) val));
15351   effect(USE_KILL cnt, USE_KILL base, KILL cr);
15352 
15353   ins_cost(4 * INSN_COST);
15354   format %{ "ClearArray $cnt, $base, $val" %}
15355 
15356   ins_encode %{
15357     __ fill_words($base$$Register, $cnt$$Register, $val$$Register);
15358   %}
15359 
15360   ins_pipe(pipe_class_memory);
15361 %}
15362 
15363 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 temp, Universe dummy, rFlagsReg cr)
15364 %{
15365   predicate((uint64_t)n->in(2)->get_long()
15366             < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord)
15367             && !((ClearArrayNode*)n)->word_copy_only());
15368   match(Set dummy (ClearArray cnt base));
15369   effect(TEMP temp, USE_KILL base, KILL cr);
15370 
15371   ins_cost(4 * INSN_COST);
15372   format %{ "ClearArray $cnt, $base" %}
15373 
15374   ins_encode %{
15375     __ zero_words($base$$Register, (uint64_t)$cnt$$constant);
15376   %}
15377 
15378   ins_pipe(pipe_class_memory);
15379 %}
15380 
15381 // ============================================================================
15382 // Overflow Math Instructions
15383 
15384 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
15385 %{
15386   match(Set cr (OverflowAddI op1 op2));
15387 

16677 
16678 // Call Runtime Instruction
16679 
16680 instruct CallLeafDirect(method meth)
16681 %{
16682   match(CallLeaf);
16683 
16684   effect(USE meth);
16685 
16686   ins_cost(CALL_COST);
16687 
16688   format %{ "CALL, runtime leaf $meth" %}
16689 
16690   ins_encode( aarch64_enc_java_to_runtime(meth) );
16691 
16692   ins_pipe(pipe_class_call);
16693 %}
16694 
16695 // Call Runtime Instruction
16696 
16697 // entry point is null, target holds the address to call
16698 instruct CallLeafNoFPIndirect(iRegP target)
16699 %{
16700   predicate(n->as_Call()->entry_point() == NULL);
16701 
16702   match(CallLeafNoFP target);
16703 
16704   ins_cost(CALL_COST);
16705 
16706   format %{ "CALL, runtime leaf nofp indirect $target" %}
16707 
16708   ins_encode %{
16709     __ blr($target$$Register);
16710   %}
16711 
16712   ins_pipe(pipe_class_call);
16713 %}
16714 
16715 instruct CallLeafNoFPDirect(method meth)
16716 %{
16717   predicate(n->as_Call()->entry_point() != NULL);
16718 
16719   match(CallLeafNoFP);
16720 
16721   effect(USE meth);
16722 
16723   ins_cost(CALL_COST);
16724 
16725   format %{ "CALL, runtime leaf nofp $meth" %}
16726 
16727   ins_encode( aarch64_enc_java_to_runtime(meth) );
16728 
16729   ins_pipe(pipe_class_call);
16730 %}
16731 
16732 instruct CallNativeDirect(method meth)
16733 %{
16734   match(CallNative);
16735 
16736   effect(USE meth);
16737 
16738   ins_cost(CALL_COST);
< prev index next >