< prev index next >

src/hotspot/cpu/x86/x86_64.ad

Print this page

  473 }
  474 
  475 // !!!!! Special hack to get all types of calls to specify the byte offset
  476 //       from the start of the call to the point where the return address
  477 //       will point.
  478 int MachCallStaticJavaNode::ret_addr_offset()
  479 {
  480   int offset = 5; // 5 bytes from start of call to where return address points
  481   offset += clear_avx_size();
  482   return offset;
  483 }
  484 
  485 int MachCallDynamicJavaNode::ret_addr_offset()
  486 {
  487   int offset = 15; // 15 bytes from start of call to where return address points
  488   offset += clear_avx_size();
  489   return offset;
  490 }
  491 
  492 int MachCallRuntimeNode::ret_addr_offset() {




  493   int offset = 13; // movq r10,#addr; callq (r10)
  494   if (this->ideal_Opcode() != Op_CallLeafVector) {
  495     offset += clear_avx_size();
  496   }
  497   return offset;
  498 }

  499 //
  500 // Compute padding required for nodes which need alignment
  501 //
  502 
  503 // The address of the call instruction needs to be 4-byte aligned to
  504 // ensure that it does not span a cache line so that it can be patched.
  505 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
  506 {
  507   current_offset += clear_avx_size(); // skip vzeroupper
  508   current_offset += 1; // skip call opcode byte
  509   return align_up(current_offset, alignment_required()) - current_offset;
  510 }
  511 
  512 // The address of the call instruction needs to be 4-byte aligned to
  513 // ensure that it does not span a cache line so that it can be patched.
  514 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
  515 {
  516   current_offset += clear_avx_size(); // skip vzeroupper
  517   current_offset += 11; // skip movq instruction + call opcode byte
  518   return align_up(current_offset, alignment_required()) - current_offset;

  689     st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
  690     if (PreserveFramePointer) {
  691       st->print("\n\t");
  692       st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
  693       if (framesize > 0) {
  694         st->print("\n\t");
  695         st->print("addq    rbp, #%d", framesize);
  696       }
  697     }
  698   }
  699 
  700   if (VerifyStackAtCalls) {
  701     st->print("\n\t");
  702     framesize -= wordSize;
  703     st->print("movq    [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
  704 #ifdef ASSERT
  705     st->print("\n\t");
  706     st->print("# stack alignment check");
  707 #endif
  708   }
  709   if (C->stub_function() != NULL && BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
  710     st->print("\n\t");
  711     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
  712     st->print("\n\t");
  713     st->print("je      fast_entry\t");
  714     st->print("\n\t");
  715     st->print("call    #nmethod_entry_barrier_stub\t");
  716     st->print("\n\tfast_entry:");
  717   }
  718   st->cr();
  719 }
  720 #endif
  721 
  722 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  723   Compile* C = ra_->C;
  724   C2_MacroAssembler _masm(&cbuf);
  725 
  726   int framesize = C->output()->frame_size_in_bytes();
  727   int bangsize = C->output()->bang_size_in_bytes();
  728 
  729   if (C->clinit_barrier_on_entry()) {
  730     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
  731     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
  732 
  733     Label L_skip_barrier;
  734     Register klass = rscratch1;
  735 
  736     __ mov_metadata(klass, C->method()->holder()->constant_encoding());
  737     __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/);
  738 
  739     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
  740 
  741     __ bind(L_skip_barrier);

  742   }
  743 
  744   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != NULL);


  745 
  746   C->output()->set_frame_complete(cbuf.insts_size());
  747 
  748   if (C->has_mach_constant_base_node()) {
  749     // NOTE: We set the table base offset here because users might be
  750     // emitted before MachConstantBaseNode.
  751     ConstantTable& constant_table = C->output()->constant_table();
  752     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  753   }
  754 }
  755 
  756 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
  757 {
  758   return MachNode::size(ra_); // too many variables; just compute it
  759                               // the hard way
  760 }
  761 
  762 int MachPrologNode::reloc() const
  763 {
  764   return 0; // a large enough number
  765 }
  766 
  767 //=============================================================================
  768 #ifndef PRODUCT
  769 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
  770 {
  771   Compile* C = ra_->C;
  772   if (generate_vzeroupper(C)) {
  773     st->print("vzeroupper");
  774     st->cr(); st->print("\t");
  775   }
  776 
  777   int framesize = C->output()->frame_size_in_bytes();
  778   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  779   // Remove word for return adr already pushed
  780   // and RBP
  781   framesize -= 2*wordSize;

  789   if (do_polling() && C->is_method_compilation()) {
  790     st->print("\t");
  791     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
  792                  "ja      #safepoint_stub\t"
  793                  "# Safepoint: poll for GC");
  794   }
  795 }
  796 #endif
  797 
  798 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
  799 {
  800   Compile* C = ra_->C;
  801   MacroAssembler _masm(&cbuf);
  802 
  803   if (generate_vzeroupper(C)) {
  804     // Clear upper bits of YMM registers when current compiled code uses
  805     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  806     __ vzeroupper();
  807   }
  808 
  809   int framesize = C->output()->frame_size_in_bytes();
  810   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  811   // Remove word for return adr already pushed
  812   // and RBP
  813   framesize -= 2*wordSize;
  814 
  815   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
  816 
  817   if (framesize) {
  818     __ addq(rsp, framesize);
  819   }
  820 
  821   __ popq(rbp);
  822 
  823   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
  824     __ reserved_stack_check();
  825   }
  826 
  827   if (do_polling() && C->is_method_compilation()) {
  828     MacroAssembler _masm(&cbuf);
  829     Label dummy_label;
  830     Label* code_stub = &dummy_label;
  831     if (!C->output()->in_scratch_emit_size()) {
  832       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
  833       C->output()->add_stub(stub);
  834       code_stub = &stub->entry();
  835     }
  836     __ relocate(relocInfo::poll_return_type);
  837     __ safepoint_poll(*code_stub, r15_thread, true /* at_return */, true /* in_nmethod */);
  838   }
  839 }
  840 
  841 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
  842 {
  843   return MachNode::size(ra_); // too many variables; just compute it
  844                               // the hard way
  845 }
  846 
  847 int MachEpilogNode::reloc() const
  848 {
  849   return 2; // a large enough number
  850 }
  851 
  852 const Pipeline* MachEpilogNode::pipeline() const
  853 {
  854   return MachNode::pipeline_class();
  855 }
  856 
  857 //=============================================================================
  858 
  859 enum RC {
  860   rc_bad,
  861   rc_int,
  862   rc_kreg,
  863   rc_float,
  864   rc_stack
  865 };
  866 

  953                 src_offset, dst_offset);
  954       break;
  955     case Op_VecZ:
  956       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
  957                 "vmovdqu xmm0, [rsp + #%d]\n\t"
  958                 "vmovdqu [rsp + #%d], xmm0\n\t"
  959                 "vmovdqu xmm0, [rsp - #64]",
  960                 src_offset, dst_offset);
  961       break;
  962     default:
  963       ShouldNotReachHere();
  964     }
  965 #endif
  966   }
  967 }
  968 
  969 uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
  970                                        PhaseRegAlloc* ra_,
  971                                        bool do_size,
  972                                        outputStream* st) const {
  973   assert(cbuf != NULL || st  != NULL, "sanity");
  974   // Get registers to move
  975   OptoReg::Name src_second = ra_->get_reg_second(in(1));
  976   OptoReg::Name src_first = ra_->get_reg_first(in(1));
  977   OptoReg::Name dst_second = ra_->get_reg_second(this);
  978   OptoReg::Name dst_first = ra_->get_reg_first(this);
  979 
  980   enum RC src_second_rc = rc_class(src_second);
  981   enum RC src_first_rc = rc_class(src_first);
  982   enum RC dst_second_rc = rc_class(dst_second);
  983   enum RC dst_first_rc = rc_class(dst_first);
  984 
  985   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
  986          "must move at least 1 register" );
  987 
  988   if (src_first == dst_first && src_second == dst_second) {
  989     // Self copy, no move
  990     return 0;
  991   }
  992   if (bottom_type()->isa_vect() != NULL && bottom_type()->isa_vectmask() == NULL) {
  993     uint ireg = ideal_reg();
  994     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
  995     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
  996     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
  997       // mem -> mem
  998       int src_offset = ra_->reg2offset(src_first);
  999       int dst_offset = ra_->reg2offset(dst_first);
 1000       vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
 1001     } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
 1002       vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st);
 1003     } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 1004       int stack_offset = ra_->reg2offset(dst_first);
 1005       vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st);
 1006     } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
 1007       int stack_offset = ra_->reg2offset(src_first);
 1008       vec_spill_helper(cbuf, true,  stack_offset, dst_first, ireg, st);
 1009     } else {
 1010       ShouldNotReachHere();
 1011     }
 1012     return 0;

 1411          st->print("kmovq   %s, %s\t# spill",
 1412                      Matcher::regName[dst_first],
 1413                      Matcher::regName[src_first]);
 1414 #endif
 1415         }
 1416       }
 1417       return 0;
 1418     } else if (dst_first_rc == rc_float) {
 1419       assert(false, "Illegal spill");
 1420       return 0;
 1421     }
 1422   }
 1423 
 1424   assert(0," foo ");
 1425   Unimplemented();
 1426   return 0;
 1427 }
 1428 
 1429 #ifndef PRODUCT
 1430 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 1431   implementation(NULL, ra_, false, st);
 1432 }
 1433 #endif
 1434 
 1435 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1436   implementation(&cbuf, ra_, false, NULL);
 1437 }
 1438 
 1439 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 1440   return MachNode::size(ra_);
 1441 }
 1442 
 1443 //=============================================================================
 1444 #ifndef PRODUCT
 1445 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1446 {
 1447   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1448   int reg = ra_->get_reg_first(this);
 1449   st->print("leaq    %s, [rsp + #%d]\t# box lock",
 1450             Matcher::regName[reg], offset);
 1451 }
 1452 #endif
 1453 
 1454 void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 1455 {
 1456   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1457   int reg = ra_->get_encode(this);
 1458 
 1459   MacroAssembler masm(&cbuf);
 1460   masm.lea(as_Register(reg), Address(rsp, offset));
 1461 }
 1462 
 1463 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 1464 {
 1465   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1466   return (offset < 0x80) ? 5 : 8; // REX
 1467 }
 1468 

















































 1469 //=============================================================================
 1470 #ifndef PRODUCT
 1471 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1472 {
 1473   if (UseCompressedClassPointers) {
 1474     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 1475     st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
 1476     st->print_cr("\tcmpq    rax, rscratch1\t # Inline cache check");
 1477   } else {
 1478     st->print_cr("\tcmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
 1479                  "# Inline cache check");
 1480   }
 1481   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 1482   st->print_cr("\tnop\t# nops to align entry point");
 1483 }
 1484 #endif
 1485 
 1486 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 1487 {
 1488   MacroAssembler masm(&cbuf);

 1491     masm.load_klass(rscratch1, j_rarg0, rscratch2);
 1492     masm.cmpptr(rax, rscratch1);
 1493   } else {
 1494     masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
 1495   }
 1496 
 1497   masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 1498 
 1499   /* WARNING these NOPs are critical so that verified entry point is properly
 1500      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
 1501   int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
 1502   if (OptoBreakpoint) {
 1503     // Leave space for int3
 1504     nops_cnt -= 1;
 1505   }
 1506   nops_cnt &= 0x3; // Do not add nops if code is aligned.
 1507   if (nops_cnt > 0)
 1508     masm.nop(nops_cnt);
 1509 }
 1510 
 1511 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 1512 {
 1513   return MachNode::size(ra_); // too many variables; just compute it
 1514                               // the hard way
 1515 }
 1516 
 1517 
 1518 //=============================================================================
 1519 
 1520 bool Matcher::supports_vector_calling_convention(void) {
 1521   if (EnableVectorSupport && UseVectorStubs) {
 1522     return true;
 1523   }
 1524   return false;
 1525 }
 1526 
 1527 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1528   assert(EnableVectorSupport && UseVectorStubs, "sanity");
 1529   int lo = XMM0_num;
 1530   int hi = XMM0b_num;
 1531   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 1532   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 1533   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 1534   return OptoRegPair(hi, lo);
 1535 }
 1536 
 1537 // Is this branch offset short enough that a short branch can be used?

 1767     __ bind(normal);
 1768     __ cdqq();
 1769 
 1770     // idivq (note: must be emitted by the user of this rule)
 1771     // <done>
 1772     __ idivq($div$$Register);
 1773     __ bind(done);
 1774   %}
 1775 
 1776   enc_class enc_PartialSubtypeCheck()
 1777   %{
 1778     Register Rrdi = as_Register(RDI_enc); // result register
 1779     Register Rrax = as_Register(RAX_enc); // super class
 1780     Register Rrcx = as_Register(RCX_enc); // killed
 1781     Register Rrsi = as_Register(RSI_enc); // sub class
 1782     Label miss;
 1783     const bool set_cond_codes = true;
 1784 
 1785     MacroAssembler _masm(&cbuf);
 1786     __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
 1787                                      NULL, &miss,
 1788                                      /*set_cond_codes:*/ true);
 1789     if ($primary) {
 1790       __ xorptr(Rrdi, Rrdi);
 1791     }
 1792     __ bind(miss);
 1793   %}
 1794 
 1795   enc_class clear_avx %{
 1796     debug_only(int off0 = cbuf.insts_size());
 1797     if (generate_vzeroupper(Compile::current())) {
 1798       // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
 1799       // Clear upper bits of YMM registers when current compiled code uses
 1800       // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1801       MacroAssembler _masm(&cbuf);
 1802       __ vzeroupper();
 1803     }
 1804     debug_only(int off1 = cbuf.insts_size());
 1805     assert(off1 - off0 == clear_avx_size(), "correct size prediction");
 1806   %}
 1807 

 1824       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
 1825     } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
 1826       // The NOP here is purely to ensure that eliding a call to
 1827       // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
 1828       __ addr_nop_5();
 1829       __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
 1830     } else {
 1831       int method_index = resolved_method_index(cbuf);
 1832       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 1833                                                   : static_call_Relocation::spec(method_index);
 1834       address mark = __ pc();
 1835       int call_offset = __ offset();
 1836       __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
 1837       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 1838         // Calls of the same statically bound method can share
 1839         // a stub to the interpreter.
 1840         cbuf.shared_stub_to_interp_for(_method, call_offset);
 1841       } else {
 1842         // Emit stubs for static call.
 1843         address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark);
 1844         if (stub == NULL) {
 1845           ciEnv::current()->record_failure("CodeCache is full");
 1846           return;
 1847         }
 1848       }
 1849     }
 1850     __ post_call_nop();
 1851   %}
 1852 
 1853   enc_class Java_Dynamic_Call(method meth) %{
 1854     MacroAssembler _masm(&cbuf);
 1855     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
 1856     __ post_call_nop();
 1857   %}
 1858 
 1859 %}
 1860 
 1861 
 1862 
 1863 //----------FRAME--------------------------------------------------------------
 1864 // Definition of frame structure and management information.

 2162 operand immI_64()
 2163 %{
 2164   predicate( n->get_int() == 64 );
 2165   match(ConI);
 2166 
 2167   op_cost(0);
 2168   format %{ %}
 2169   interface(CONST_INTER);
 2170 %}
 2171 
 2172 // Pointer Immediate
 2173 operand immP()
 2174 %{
 2175   match(ConP);
 2176 
 2177   op_cost(10);
 2178   format %{ %}
 2179   interface(CONST_INTER);
 2180 %}
 2181 
 2182 // NULL Pointer Immediate
 2183 operand immP0()
 2184 %{
 2185   predicate(n->get_ptr() == 0);
 2186   match(ConP);
 2187 
 2188   op_cost(5);
 2189   format %{ %}
 2190   interface(CONST_INTER);
 2191 %}
 2192 
 2193 // Pointer Immediate
 2194 operand immN() %{
 2195   match(ConN);
 2196 
 2197   op_cost(10);
 2198   format %{ %}
 2199   interface(CONST_INTER);
 2200 %}
 2201 
 2202 operand immNKlass() %{
 2203   match(ConNKlass);
 2204 
 2205   op_cost(10);
 2206   format %{ %}
 2207   interface(CONST_INTER);
 2208 %}
 2209 
 2210 // NULL Pointer Immediate
 2211 operand immN0() %{
 2212   predicate(n->get_narrowcon() == 0);
 2213   match(ConN);
 2214 
 2215   op_cost(5);
 2216   format %{ %}
 2217   interface(CONST_INTER);
 2218 %}
 2219 
 2220 operand immP31()
 2221 %{
 2222   predicate(n->as_Type()->type()->reloc() == relocInfo::none
 2223             && (n->get_ptr() >> 31) == 0);
 2224   match(ConP);
 2225 
 2226   op_cost(5);
 2227   format %{ %}
 2228   interface(CONST_INTER);
 2229 %}
 2230 

 3102   %}
 3103 %}
 3104 
 3105 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 3106 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 3107 %{
 3108   constraint(ALLOC_IN_RC(ptr_reg));
 3109   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 3110   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 3111 
 3112   op_cost(10);
 3113   format %{"[$reg + $off + $idx << $scale]" %}
 3114   interface(MEMORY_INTER) %{
 3115     base($reg);
 3116     index($idx);
 3117     scale($scale);
 3118     disp($off);
 3119   %}
 3120 %}
 3121 
















 3122 // Indirect Narrow Oop Plus Offset Operand
 3123 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 3124 // we can't free r12 even with CompressedOops::base() == NULL.
 3125 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 3126   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 3127   constraint(ALLOC_IN_RC(ptr_reg));
 3128   match(AddP (DecodeN reg) off);
 3129 
 3130   op_cost(10);
 3131   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 3132   interface(MEMORY_INTER) %{
 3133     base(0xc); // R12
 3134     index($reg);
 3135     scale(0x3);
 3136     disp($off);
 3137   %}
 3138 %}
 3139 
 3140 // Indirect Memory Operand
 3141 operand indirectNarrow(rRegN reg)
 3142 %{
 3143   predicate(CompressedOops::shift() == 0);
 3144   constraint(ALLOC_IN_RC(ptr_reg));

 3448     equal(0x4, "e");
 3449     not_equal(0x5, "ne");
 3450     less(0x2, "b");
 3451     greater_equal(0x3, "ae");
 3452     less_equal(0x6, "be");
 3453     greater(0x7, "a");
 3454     overflow(0x0, "o");
 3455     no_overflow(0x1, "no");
 3456   %}
 3457 %}
 3458 
 3459 //----------OPERAND CLASSES----------------------------------------------------
 3460 // Operand Classes are groups of operands that are used as to simplify
 3461 // instruction definitions by not requiring the AD writer to specify separate
 3462 // instructions for every form of operand when the instruction accepts
 3463 // multiple operand types with the same basic encoding and format.  The classic
 3464 // case of this is memory operands.
 3465 
 3466 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 3467                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 3468                indCompressedOopOffset,
 3469                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 3470                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 3471                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 3472 
 3473 //----------PIPELINE-----------------------------------------------------------
 3474 // Rules which define the behavior of the target architectures pipeline.
 3475 pipeline %{
 3476 
 3477 //----------ATTRIBUTES---------------------------------------------------------
 3478 attributes %{
 3479   variable_size_instructions;        // Fixed size instructions
 3480   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 3481   instruction_unit_size = 1;         // An instruction is 1 bytes long
 3482   instruction_fetch_unit_size = 16;  // The processor fetches one line
 3483   instruction_fetch_units = 1;       // of 16 bytes
 3484 
 3485   // List of nop instructions
 3486   nops( MachNop );
 3487 %}
 3488 

 4885   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
 4886   ins_encode %{
 4887     __ movl($dst$$Register, $src$$constant);
 4888   %}
 4889   ins_pipe(ialu_reg);
 4890 %}
 4891 
 4892 instruct loadConF(regF dst, immF con) %{
 4893   match(Set dst con);
 4894   ins_cost(125);
 4895   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
 4896   ins_encode %{
 4897     __ movflt($dst$$XMMRegister, $constantaddress($con));
 4898   %}
 4899   ins_pipe(pipe_slow);
 4900 %}
 4901 
 4902 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
 4903   match(Set dst src);
 4904   effect(KILL cr);
 4905   format %{ "xorq    $dst, $src\t# compressed NULL ptr" %}
 4906   ins_encode %{
 4907     __ xorq($dst$$Register, $dst$$Register);
 4908   %}
 4909   ins_pipe(ialu_reg);
 4910 %}
 4911 
 4912 instruct loadConN(rRegN dst, immN src) %{
 4913   match(Set dst src);
 4914 
 4915   ins_cost(125);
 4916   format %{ "movl    $dst, $src\t# compressed ptr" %}
 4917   ins_encode %{
 4918     address con = (address)$src$$constant;
 4919     if (con == NULL) {
 4920       ShouldNotReachHere();
 4921     } else {
 4922       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
 4923     }
 4924   %}
 4925   ins_pipe(ialu_reg_fat); // XXX
 4926 %}
 4927 
 4928 instruct loadConNKlass(rRegN dst, immNKlass src) %{
 4929   match(Set dst src);
 4930 
 4931   ins_cost(125);
 4932   format %{ "movl    $dst, $src\t# compressed klass ptr" %}
 4933   ins_encode %{
 4934     address con = (address)$src$$constant;
 4935     if (con == NULL) {
 4936       ShouldNotReachHere();
 4937     } else {
 4938       __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
 4939     }
 4940   %}
 4941   ins_pipe(ialu_reg_fat); // XXX
 4942 %}
 4943 
 4944 instruct loadConF0(regF dst, immF0 src)
 4945 %{
 4946   match(Set dst src);
 4947   ins_cost(100);
 4948 
 4949   format %{ "xorps   $dst, $dst\t# float 0.0" %}
 4950   ins_encode %{
 4951     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 4952   %}
 4953   ins_pipe(pipe_slow);
 4954 %}
 4955 

 5141   %}
 5142   ins_pipe(ialu_mem_reg); // XXX
 5143 %}
 5144 
 5145 // Store Pointer
 5146 instruct storeP(memory mem, any_RegP src)
 5147 %{
 5148   predicate(n->as_Store()->barrier_data() == 0);
 5149   match(Set mem (StoreP mem src));
 5150 
 5151   ins_cost(125); // XXX
 5152   format %{ "movq    $mem, $src\t# ptr" %}
 5153   ins_encode %{
 5154     __ movq($mem$$Address, $src$$Register);
 5155   %}
 5156   ins_pipe(ialu_mem_reg);
 5157 %}
 5158 
 5159 instruct storeImmP0(memory mem, immP0 zero)
 5160 %{
 5161   predicate(UseCompressedOops && (CompressedOops::base() == NULL) && n->as_Store()->barrier_data() == 0);
 5162   match(Set mem (StoreP mem zero));
 5163 
 5164   ins_cost(125); // XXX
 5165   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
 5166   ins_encode %{
 5167     __ movq($mem$$Address, r12);
 5168   %}
 5169   ins_pipe(ialu_mem_reg);
 5170 %}
 5171 
 5172 // Store NULL Pointer, mark word, or other simple pointer constant.
 5173 instruct storeImmP(memory mem, immP31 src)
 5174 %{
 5175   predicate(n->as_Store()->barrier_data() == 0);
 5176   match(Set mem (StoreP mem src));
 5177 
 5178   ins_cost(150); // XXX
 5179   format %{ "movq    $mem, $src\t# ptr" %}
 5180   ins_encode %{
 5181     __ movq($mem$$Address, $src$$constant);
 5182   %}
 5183   ins_pipe(ialu_mem_imm);
 5184 %}
 5185 
 5186 // Store Compressed Pointer
 5187 instruct storeN(memory mem, rRegN src)
 5188 %{
 5189   match(Set mem (StoreN mem src));
 5190 
 5191   ins_cost(125); // XXX
 5192   format %{ "movl    $mem, $src\t# compressed ptr" %}
 5193   ins_encode %{
 5194     __ movl($mem$$Address, $src$$Register);
 5195   %}
 5196   ins_pipe(ialu_mem_reg);
 5197 %}
 5198 
 5199 instruct storeNKlass(memory mem, rRegN src)
 5200 %{
 5201   match(Set mem (StoreNKlass mem src));
 5202 
 5203   ins_cost(125); // XXX
 5204   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 5205   ins_encode %{
 5206     __ movl($mem$$Address, $src$$Register);
 5207   %}
 5208   ins_pipe(ialu_mem_reg);
 5209 %}
 5210 
 5211 instruct storeImmN0(memory mem, immN0 zero)
 5212 %{
 5213   predicate(CompressedOops::base() == NULL);
 5214   match(Set mem (StoreN mem zero));
 5215 
 5216   ins_cost(125); // XXX
 5217   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
 5218   ins_encode %{
 5219     __ movl($mem$$Address, r12);
 5220   %}
 5221   ins_pipe(ialu_mem_reg);
 5222 %}
 5223 
 5224 instruct storeImmN(memory mem, immN src)
 5225 %{
 5226   match(Set mem (StoreN mem src));
 5227 
 5228   ins_cost(150); // XXX
 5229   format %{ "movl    $mem, $src\t# compressed ptr" %}
 5230   ins_encode %{
 5231     address con = (address)$src$$constant;
 5232     if (con == NULL) {
 5233       __ movl($mem$$Address, 0);
 5234     } else {
 5235       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
 5236     }
 5237   %}
 5238   ins_pipe(ialu_mem_imm);
 5239 %}
 5240 
 5241 instruct storeImmNKlass(memory mem, immNKlass src)
 5242 %{
 5243   match(Set mem (StoreNKlass mem src));
 5244 
 5245   ins_cost(150); // XXX
 5246   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 5247   ins_encode %{
 5248     __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
 5249   %}
 5250   ins_pipe(ialu_mem_imm);
 5251 %}
 5252 
 5253 // Store Integer Immediate
 5254 instruct storeImmI0(memory mem, immI_0 zero)
 5255 %{
 5256   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
 5257   match(Set mem (StoreI mem zero));
 5258 
 5259   ins_cost(125); // XXX
 5260   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
 5261   ins_encode %{
 5262     __ movl($mem$$Address, r12);
 5263   %}
 5264   ins_pipe(ialu_mem_reg);
 5265 %}
 5266 
 5267 instruct storeImmI(memory mem, immI src)
 5268 %{
 5269   match(Set mem (StoreI mem src));
 5270 
 5271   ins_cost(150);
 5272   format %{ "movl    $mem, $src\t# int" %}
 5273   ins_encode %{
 5274     __ movl($mem$$Address, $src$$constant);
 5275   %}
 5276   ins_pipe(ialu_mem_imm);
 5277 %}
 5278 
 5279 // Store Long Immediate
 5280 instruct storeImmL0(memory mem, immL0 zero)
 5281 %{
 5282   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
 5283   match(Set mem (StoreL mem zero));
 5284 
 5285   ins_cost(125); // XXX
 5286   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
 5287   ins_encode %{
 5288     __ movq($mem$$Address, r12);
 5289   %}
 5290   ins_pipe(ialu_mem_reg);
 5291 %}
 5292 
 5293 instruct storeImmL(memory mem, immL32 src)
 5294 %{
 5295   match(Set mem (StoreL mem src));
 5296 
 5297   ins_cost(150);
 5298   format %{ "movq    $mem, $src\t# long" %}
 5299   ins_encode %{
 5300     __ movq($mem$$Address, $src$$constant);
 5301   %}
 5302   ins_pipe(ialu_mem_imm);
 5303 %}
 5304 
 5305 // Store Short/Char Immediate
 5306 instruct storeImmC0(memory mem, immI_0 zero)
 5307 %{
 5308   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
 5309   match(Set mem (StoreC mem zero));
 5310 
 5311   ins_cost(125); // XXX
 5312   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
 5313   ins_encode %{
 5314     __ movw($mem$$Address, r12);
 5315   %}
 5316   ins_pipe(ialu_mem_reg);
 5317 %}
 5318 
 5319 instruct storeImmI16(memory mem, immI16 src)
 5320 %{
 5321   predicate(UseStoreImmI16);
 5322   match(Set mem (StoreC mem src));
 5323 
 5324   ins_cost(150);
 5325   format %{ "movw    $mem, $src\t# short/char" %}
 5326   ins_encode %{
 5327     __ movw($mem$$Address, $src$$constant);
 5328   %}
 5329   ins_pipe(ialu_mem_imm);
 5330 %}
 5331 
 5332 // Store Byte Immediate
 5333 instruct storeImmB0(memory mem, immI_0 zero)
 5334 %{
 5335   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
 5336   match(Set mem (StoreB mem zero));
 5337 
 5338   ins_cost(125); // XXX
 5339   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
 5340   ins_encode %{
 5341     __ movb($mem$$Address, r12);
 5342   %}
 5343   ins_pipe(ialu_mem_reg);
 5344 %}
 5345 
 5346 instruct storeImmB(memory mem, immI8 src)
 5347 %{
 5348   match(Set mem (StoreB mem src));
 5349 
 5350   ins_cost(150); // XXX
 5351   format %{ "movb    $mem, $src\t# byte" %}
 5352   ins_encode %{
 5353     __ movb($mem$$Address, $src$$constant);
 5354   %}
 5355   ins_pipe(ialu_mem_imm);
 5356 %}
 5357 
 5358 // Store CMS card-mark Immediate
 5359 instruct storeImmCM0_reg(memory mem, immI_0 zero)
 5360 %{
 5361   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
 5362   match(Set mem (StoreCM mem zero));
 5363 
 5364   ins_cost(125); // XXX
 5365   format %{ "movb    $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
 5366   ins_encode %{
 5367     __ movb($mem$$Address, r12);
 5368   %}
 5369   ins_pipe(ialu_mem_reg);
 5370 %}
 5371 
 5372 instruct storeImmCM0(memory mem, immI_0 src)
 5373 %{
 5374   match(Set mem (StoreCM mem src));
 5375 
 5376   ins_cost(150); // XXX
 5377   format %{ "movb    $mem, $src\t# CMS card-mark byte 0" %}
 5378   ins_encode %{
 5379     __ movb($mem$$Address, $src$$constant);
 5380   %}
 5381   ins_pipe(ialu_mem_imm);
 5382 %}
 5383 
 5384 // Store Float
 5385 instruct storeF(memory mem, regF src)
 5386 %{
 5387   match(Set mem (StoreF mem src));
 5388 
 5389   ins_cost(95); // XXX
 5390   format %{ "movss   $mem, $src\t# float" %}
 5391   ins_encode %{
 5392     __ movflt($mem$$Address, $src$$XMMRegister);
 5393   %}
 5394   ins_pipe(pipe_slow); // XXX
 5395 %}
 5396 
 5397 // Store immediate Float value (it is faster than store from XMM register)
 5398 instruct storeF0(memory mem, immF0 zero)
 5399 %{
 5400   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
 5401   match(Set mem (StoreF mem zero));
 5402 
 5403   ins_cost(25); // XXX
 5404   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
 5405   ins_encode %{
 5406     __ movl($mem$$Address, r12);
 5407   %}
 5408   ins_pipe(ialu_mem_reg);
 5409 %}
 5410 
 5411 instruct storeF_imm(memory mem, immF src)
 5412 %{
 5413   match(Set mem (StoreF mem src));
 5414 
 5415   ins_cost(50);
 5416   format %{ "movl    $mem, $src\t# float" %}
 5417   ins_encode %{
 5418     __ movl($mem$$Address, jint_cast($src$$constant));
 5419   %}
 5420   ins_pipe(ialu_mem_imm);
 5421 %}
 5422 
 5423 // Store Double
 5424 instruct storeD(memory mem, regD src)
 5425 %{
 5426   match(Set mem (StoreD mem src));
 5427 
 5428   ins_cost(95); // XXX
 5429   format %{ "movsd   $mem, $src\t# double" %}
 5430   ins_encode %{
 5431     __ movdbl($mem$$Address, $src$$XMMRegister);
 5432   %}
 5433   ins_pipe(pipe_slow); // XXX
 5434 %}
 5435 
 5436 // Store immediate double 0.0 (it is faster than store from XMM register)
 5437 instruct storeD0_imm(memory mem, immD0 src)
 5438 %{
 5439   predicate(!UseCompressedOops || (CompressedOops::base() != NULL));
 5440   match(Set mem (StoreD mem src));
 5441 
 5442   ins_cost(50);
 5443   format %{ "movq    $mem, $src\t# double 0." %}
 5444   ins_encode %{
 5445     __ movq($mem$$Address, $src$$constant);
 5446   %}
 5447   ins_pipe(ialu_mem_imm);
 5448 %}
 5449 
 5450 instruct storeD0(memory mem, immD0 zero)
 5451 %{
 5452   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
 5453   match(Set mem (StoreD mem zero));
 5454 
 5455   ins_cost(25); // XXX
 5456   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
 5457   ins_encode %{
 5458     __ movq($mem$$Address, r12);
 5459   %}
 5460   ins_pipe(ialu_mem_reg);
 5461 %}
 5462 
 5463 instruct storeSSI(stackSlotI dst, rRegI src)
 5464 %{
 5465   match(Set dst src);
 5466 
 5467   ins_cost(100);
 5468   format %{ "movl    $dst, $src\t# int stk" %}
 5469   ins_encode %{
 5470     __ movl($dst$$Address, $src$$Register);
 5471   %}
 5472   ins_pipe( ialu_mem_reg );

 5979   format %{ "MEMBAR-storestore (empty encoding)" %}
 5980   ins_encode( );
 5981   ins_pipe(empty);
 5982 %}
 5983 
 5984 //----------Move Instructions--------------------------------------------------
 5985 
 5986 instruct castX2P(rRegP dst, rRegL src)
 5987 %{
 5988   match(Set dst (CastX2P src));
 5989 
 5990   format %{ "movq    $dst, $src\t# long->ptr" %}
 5991   ins_encode %{
 5992     if ($dst$$reg != $src$$reg) {
 5993       __ movptr($dst$$Register, $src$$Register);
 5994     }
 5995   %}
 5996   ins_pipe(ialu_reg_reg); // XXX
 5997 %}
 5998 













 5999 instruct castP2X(rRegL dst, rRegP src)
 6000 %{
 6001   match(Set dst (CastP2X src));
 6002 
 6003   format %{ "movq    $dst, $src\t# ptr -> long" %}
 6004   ins_encode %{
 6005     if ($dst$$reg != $src$$reg) {
 6006       __ movptr($dst$$Register, $src$$Register);
 6007     }
 6008   %}
 6009   ins_pipe(ialu_reg_reg); // XXX
 6010 %}
 6011 
 6012 // Convert oop into int for vectors alignment masking
 6013 instruct convP2I(rRegI dst, rRegP src)
 6014 %{
 6015   match(Set dst (ConvL2I (CastP2X src)));
 6016 
 6017   format %{ "movl    $dst, $src\t# ptr -> int" %}
 6018   ins_encode %{

10520   effect(DEF dst, USE src);
10521   ins_cost(100);
10522   format %{ "movd    $dst,$src\t# MoveI2F" %}
10523   ins_encode %{
10524     __ movdl($dst$$XMMRegister, $src$$Register);
10525   %}
10526   ins_pipe( pipe_slow );
10527 %}
10528 
10529 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
10530   match(Set dst (MoveL2D src));
10531   effect(DEF dst, USE src);
10532   ins_cost(100);
10533   format %{ "movd    $dst,$src\t# MoveL2D" %}
10534   ins_encode %{
10535      __ movdq($dst$$XMMRegister, $src$$Register);
10536   %}
10537   ins_pipe( pipe_slow );
10538 %}
10539 

10540 // Fast clearing of an array
10541 // Small ClearArray non-AVX512.
10542 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
10543                   Universe dummy, rFlagsReg cr)
10544 %{
10545   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
10546   match(Set dummy (ClearArray cnt base));
10547   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);





















































































































10548 
10549   format %{ $$template
10550     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
10551     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
10552     $$emit$$"jg      LARGE\n\t"
10553     $$emit$$"dec     rcx\n\t"
10554     $$emit$$"js      DONE\t# Zero length\n\t"
10555     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
10556     $$emit$$"dec     rcx\n\t"
10557     $$emit$$"jge     LOOP\n\t"
10558     $$emit$$"jmp     DONE\n\t"
10559     $$emit$$"# LARGE:\n\t"
10560     if (UseFastStosb) {
10561        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
10562        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
10563     } else if (UseXMMForObjInit) {
10564        $$emit$$"mov     rdi,rax\n\t"
10565        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
10566        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10567        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

10575        $$emit$$"jl      L_tail\n\t"
10576        $$emit$$"vmovdqu ymm0,(rax)\n\t"
10577        $$emit$$"add     0x20,rax\n\t"
10578        $$emit$$"sub     0x4,rcx\n\t"
10579        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10580        $$emit$$"add     0x4,rcx\n\t"
10581        $$emit$$"jle     L_end\n\t"
10582        $$emit$$"dec     rcx\n\t"
10583        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10584        $$emit$$"vmovq   xmm0,(rax)\n\t"
10585        $$emit$$"add     0x8,rax\n\t"
10586        $$emit$$"dec     rcx\n\t"
10587        $$emit$$"jge     L_sloop\n\t"
10588        $$emit$$"# L_end:\n\t"
10589     } else {
10590        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
10591     }
10592     $$emit$$"# DONE"
10593   %}
10594   ins_encode %{
10595     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
10596                  $tmp$$XMMRegister, false, knoreg);
10597   %}
10598   ins_pipe(pipe_slow);
10599 %}
10600 
10601 // Small ClearArray AVX512 non-constant length.
10602 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
10603                        Universe dummy, rFlagsReg cr)
10604 %{
10605   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
10606   match(Set dummy (ClearArray cnt base));
10607   ins_cost(125);
10608   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
10609 
10610   format %{ $$template
10611     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
10612     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
10613     $$emit$$"jg      LARGE\n\t"
10614     $$emit$$"dec     rcx\n\t"
10615     $$emit$$"js      DONE\t# Zero length\n\t"
10616     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
10617     $$emit$$"dec     rcx\n\t"
10618     $$emit$$"jge     LOOP\n\t"
10619     $$emit$$"jmp     DONE\n\t"
10620     $$emit$$"# LARGE:\n\t"
10621     if (UseFastStosb) {
10622        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
10623        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
10624     } else if (UseXMMForObjInit) {
10625        $$emit$$"mov     rdi,rax\n\t"
10626        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
10627        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10628        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

10636        $$emit$$"jl      L_tail\n\t"
10637        $$emit$$"vmovdqu ymm0,(rax)\n\t"
10638        $$emit$$"add     0x20,rax\n\t"
10639        $$emit$$"sub     0x4,rcx\n\t"
10640        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10641        $$emit$$"add     0x4,rcx\n\t"
10642        $$emit$$"jle     L_end\n\t"
10643        $$emit$$"dec     rcx\n\t"
10644        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10645        $$emit$$"vmovq   xmm0,(rax)\n\t"
10646        $$emit$$"add     0x8,rax\n\t"
10647        $$emit$$"dec     rcx\n\t"
10648        $$emit$$"jge     L_sloop\n\t"
10649        $$emit$$"# L_end:\n\t"
10650     } else {
10651        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
10652     }
10653     $$emit$$"# DONE"
10654   %}
10655   ins_encode %{
10656     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
10657                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
10658   %}
10659   ins_pipe(pipe_slow);
10660 %}
10661 
10662 // Large ClearArray non-AVX512.
10663 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
10664                         Universe dummy, rFlagsReg cr)
10665 %{
10666   predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
10667   match(Set dummy (ClearArray cnt base));
10668   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
































































































10669 
10670   format %{ $$template
10671     if (UseFastStosb) {
10672        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
10673        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
10674        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
10675     } else if (UseXMMForObjInit) {
10676        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
10677        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
10678        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10679        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10680        $$emit$$"vmovdqu ymm0,(rax)\n\t"
10681        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
10682        $$emit$$"add     0x40,rax\n\t"
10683        $$emit$$"# L_zero_64_bytes:\n\t"
10684        $$emit$$"sub     0x8,rcx\n\t"
10685        $$emit$$"jge     L_loop\n\t"
10686        $$emit$$"add     0x4,rcx\n\t"
10687        $$emit$$"jl      L_tail\n\t"
10688        $$emit$$"vmovdqu ymm0,(rax)\n\t"
10689        $$emit$$"add     0x20,rax\n\t"
10690        $$emit$$"sub     0x4,rcx\n\t"
10691        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10692        $$emit$$"add     0x4,rcx\n\t"
10693        $$emit$$"jle     L_end\n\t"
10694        $$emit$$"dec     rcx\n\t"
10695        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10696        $$emit$$"vmovq   xmm0,(rax)\n\t"
10697        $$emit$$"add     0x8,rax\n\t"
10698        $$emit$$"dec     rcx\n\t"
10699        $$emit$$"jge     L_sloop\n\t"
10700        $$emit$$"# L_end:\n\t"
10701     } else {
10702        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
10703        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
10704     }
10705   %}
10706   ins_encode %{
10707     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
10708                  $tmp$$XMMRegister, true, knoreg);
10709   %}
10710   ins_pipe(pipe_slow);
10711 %}
10712 
10713 // Large ClearArray AVX512.
10714 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
10715                              Universe dummy, rFlagsReg cr)
10716 %{
10717   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
10718   match(Set dummy (ClearArray cnt base));
10719   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
10720 
10721   format %{ $$template
10722     if (UseFastStosb) {
10723        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
10724        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
10725        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
10726     } else if (UseXMMForObjInit) {
10727        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
10728        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
10729        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10730        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10731        $$emit$$"vmovdqu ymm0,(rax)\n\t"
10732        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
10733        $$emit$$"add     0x40,rax\n\t"
10734        $$emit$$"# L_zero_64_bytes:\n\t"
10735        $$emit$$"sub     0x8,rcx\n\t"
10736        $$emit$$"jge     L_loop\n\t"
10737        $$emit$$"add     0x4,rcx\n\t"
10738        $$emit$$"jl      L_tail\n\t"
10739        $$emit$$"vmovdqu ymm0,(rax)\n\t"
10740        $$emit$$"add     0x20,rax\n\t"
10741        $$emit$$"sub     0x4,rcx\n\t"
10742        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10743        $$emit$$"add     0x4,rcx\n\t"
10744        $$emit$$"jle     L_end\n\t"
10745        $$emit$$"dec     rcx\n\t"
10746        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10747        $$emit$$"vmovq   xmm0,(rax)\n\t"
10748        $$emit$$"add     0x8,rax\n\t"
10749        $$emit$$"dec     rcx\n\t"
10750        $$emit$$"jge     L_sloop\n\t"
10751        $$emit$$"# L_end:\n\t"
10752     } else {
10753        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
10754        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
10755     }
10756   %}
10757   ins_encode %{
10758     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
10759                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
10760   %}
10761   ins_pipe(pipe_slow);
10762 %}
10763 
10764 // Small ClearArray AVX512 constant length.
10765 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
10766 %{
10767   predicate(!((ClearArrayNode*)n)->is_large() &&
10768               ((UseAVX > 2) && VM_Version::supports_avx512vlbw()));
10769   match(Set dummy (ClearArray cnt base));
10770   ins_cost(100);
10771   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
10772   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
10773   ins_encode %{
10774    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
10775   %}
10776   ins_pipe(pipe_slow);
10777 %}
10778 
10779 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
10780                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
10781 %{
10782   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
10783   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
10784   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
10785 
10786   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
10787   ins_encode %{
10788     __ string_compare($str1$$Register, $str2$$Register,
10789                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
10790                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
10791   %}
10792   ins_pipe( pipe_slow );
10793 %}
10794 

11663   ins_pipe(ialu_cr_reg_mem);
11664 %}
11665 
11666 // This will generate a signed flags result. This should be OK since
11667 // any compare to a zero should be eq/neq.
11668 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
11669 %{
11670   match(Set cr (CmpP src zero));
11671 
11672   format %{ "testq   $src, $src\t# ptr" %}
11673   ins_encode %{
11674     __ testq($src$$Register, $src$$Register);
11675   %}
11676   ins_pipe(ialu_cr_reg_imm);
11677 %}
11678 
11679 // This will generate a signed flags result. This should be OK since
11680 // any compare to a zero should be eq/neq.
11681 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
11682 %{
11683   predicate((!UseCompressedOops || (CompressedOops::base() != NULL)) &&
11684             n->in(1)->as_Load()->barrier_data() == 0);
11685   match(Set cr (CmpP (LoadP op) zero));
11686 
11687   ins_cost(500); // XXX
11688   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
11689   ins_encode %{
11690     __ testq($op$$Address, 0xFFFFFFFF);
11691   %}
11692   ins_pipe(ialu_cr_reg_imm);
11693 %}
11694 
11695 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
11696 %{
11697   predicate(UseCompressedOops && (CompressedOops::base() == NULL) &&
11698             n->in(1)->as_Load()->barrier_data() == 0);
11699   match(Set cr (CmpP (LoadP mem) zero));
11700 
11701   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
11702   ins_encode %{
11703     __ cmpq(r12, $mem$$Address);
11704   %}
11705   ins_pipe(ialu_cr_reg_mem);
11706 %}
11707 
11708 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
11709 %{
11710   match(Set cr (CmpN op1 op2));
11711 
11712   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
11713   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
11714   ins_pipe(ialu_cr_reg_reg);
11715 %}
11716 
11717 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)

11760 %{
11761   match(Set cr (CmpN src (LoadNKlass mem)));
11762 
11763   format %{ "cmpl    $mem, $src\t# compressed klass ptr" %}
11764   ins_encode %{
11765     __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
11766   %}
11767   ins_pipe(ialu_cr_reg_mem);
11768 %}
11769 
11770 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
11771   match(Set cr (CmpN src zero));
11772 
11773   format %{ "testl   $src, $src\t# compressed ptr" %}
11774   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
11775   ins_pipe(ialu_cr_reg_imm);
11776 %}
11777 
11778 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
11779 %{
11780   predicate(CompressedOops::base() != NULL);
11781   match(Set cr (CmpN (LoadN mem) zero));
11782 
11783   ins_cost(500); // XXX
11784   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
11785   ins_encode %{
11786     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
11787   %}
11788   ins_pipe(ialu_cr_reg_mem);
11789 %}
11790 
11791 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
11792 %{
11793   predicate(CompressedOops::base() == NULL);
11794   match(Set cr (CmpN (LoadN mem) zero));
11795 
11796   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
11797   ins_encode %{
11798     __ cmpl(r12, $mem$$Address);
11799   %}
11800   ins_pipe(ialu_cr_reg_mem);
11801 %}
11802 
11803 // Yanked all unsigned pointer compare operations.
11804 // Pointer compares are done with CmpP which is already unsigned.
11805 
11806 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
11807 %{
11808   match(Set cr (CmpL op1 op2));
11809 
11810   format %{ "cmpq    $op1, $op2" %}
11811   ins_encode %{
11812     __ cmpq($op1$$Register, $op2$$Register);
11813   %}

12524 
12525   ins_cost(300);
12526   format %{ "call_leaf,runtime " %}
12527   ins_encode(clear_avx, Java_To_Runtime(meth));
12528   ins_pipe(pipe_slow);
12529 %}
12530 
12531 // Call runtime without safepoint and with vector arguments
12532 instruct CallLeafDirectVector(method meth)
12533 %{
12534   match(CallLeafVector);
12535   effect(USE meth);
12536 
12537   ins_cost(300);
12538   format %{ "call_leaf,vector " %}
12539   ins_encode(Java_To_Runtime(meth));
12540   ins_pipe(pipe_slow);
12541 %}
12542 
12543 // Call runtime without safepoint















12544 instruct CallLeafNoFPDirect(method meth)
12545 %{

12546   match(CallLeafNoFP);
12547   effect(USE meth);
12548 
12549   ins_cost(300);
12550   format %{ "call_leaf_nofp,runtime " %}
12551   ins_encode(clear_avx, Java_To_Runtime(meth));
12552   ins_pipe(pipe_slow);
12553 %}
12554 
12555 // Return Instruction
12556 // Remove the return address & jump to it.
12557 // Notice: We always emit a nop after a ret to make sure there is room
12558 // for safepoint patching
12559 instruct Ret()
12560 %{
12561   match(Return);
12562 
12563   format %{ "ret" %}
12564   ins_encode %{
12565     __ ret(0);

  473 }
  474 
  475 // !!!!! Special hack to get all types of calls to specify the byte offset
  476 //       from the start of the call to the point where the return address
  477 //       will point.
  478 int MachCallStaticJavaNode::ret_addr_offset()
  479 {
  480   int offset = 5; // 5 bytes from start of call to where return address points
  481   offset += clear_avx_size();
  482   return offset;
  483 }
  484 
  485 int MachCallDynamicJavaNode::ret_addr_offset()
  486 {
  487   int offset = 15; // 15 bytes from start of call to where return address points
  488   offset += clear_avx_size();
  489   return offset;
  490 }
  491 
  492 int MachCallRuntimeNode::ret_addr_offset() {
  493   if (_entry_point == nullptr) {
  494     // CallLeafNoFPInDirect
  495     return 3; // callq (register)
  496   }
  497   int offset = 13; // movq r10,#addr; callq (r10)
  498   if (this->ideal_Opcode() != Op_CallLeafVector) {
  499     offset += clear_avx_size();
  500   }
  501   return offset;
  502 }
  503 
  504 //
  505 // Compute padding required for nodes which need alignment
  506 //
  507 
  508 // The address of the call instruction needs to be 4-byte aligned to
  509 // ensure that it does not span a cache line so that it can be patched.
  510 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
  511 {
  512   current_offset += clear_avx_size(); // skip vzeroupper
  513   current_offset += 1; // skip call opcode byte
  514   return align_up(current_offset, alignment_required()) - current_offset;
  515 }
  516 
  517 // The address of the call instruction needs to be 4-byte aligned to
  518 // ensure that it does not span a cache line so that it can be patched.
  519 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
  520 {
  521   current_offset += clear_avx_size(); // skip vzeroupper
  522   current_offset += 11; // skip movq instruction + call opcode byte
  523   return align_up(current_offset, alignment_required()) - current_offset;

  694     st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
  695     if (PreserveFramePointer) {
  696       st->print("\n\t");
  697       st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
  698       if (framesize > 0) {
  699         st->print("\n\t");
  700         st->print("addq    rbp, #%d", framesize);
  701       }
  702     }
  703   }
  704 
  705   if (VerifyStackAtCalls) {
  706     st->print("\n\t");
  707     framesize -= wordSize;
  708     st->print("movq    [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
  709 #ifdef ASSERT
  710     st->print("\n\t");
  711     st->print("# stack alignment check");
  712 #endif
  713   }
  714   if (C->stub_function() != nullptr && BarrierSet::barrier_set()->barrier_set_nmethod() != nullptr) {
  715     st->print("\n\t");
  716     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
  717     st->print("\n\t");
  718     st->print("je      fast_entry\t");
  719     st->print("\n\t");
  720     st->print("call    #nmethod_entry_barrier_stub\t");
  721     st->print("\n\tfast_entry:");
  722   }
  723   st->cr();
  724 }
  725 #endif
  726 
  727 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  728   Compile* C = ra_->C;
  729   C2_MacroAssembler _masm(&cbuf);
  730 
  731   __ verified_entry(C);













  732 
  733   if (ra_->C->stub_function() == nullptr) {
  734     __ entry_barrier();
  735   }
  736 
  737   if (!Compile::current()->output()->in_scratch_emit_size()) {
  738     __ bind(*_verified_entry);
  739   }
  740 
  741   C->output()->set_frame_complete(cbuf.insts_size());
  742 
  743   if (C->has_mach_constant_base_node()) {
  744     // NOTE: We set the table base offset here because users might be
  745     // emitted before MachConstantBaseNode.
  746     ConstantTable& constant_table = C->output()->constant_table();
  747     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  748   }
  749 }
  750 






  751 int MachPrologNode::reloc() const
  752 {
  753   return 0; // a large enough number
  754 }
  755 
  756 //=============================================================================
  757 #ifndef PRODUCT
  758 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
  759 {
  760   Compile* C = ra_->C;
  761   if (generate_vzeroupper(C)) {
  762     st->print("vzeroupper");
  763     st->cr(); st->print("\t");
  764   }
  765 
  766   int framesize = C->output()->frame_size_in_bytes();
  767   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  768   // Remove word for return adr already pushed
  769   // and RBP
  770   framesize -= 2*wordSize;

  778   if (do_polling() && C->is_method_compilation()) {
  779     st->print("\t");
  780     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
  781                  "ja      #safepoint_stub\t"
  782                  "# Safepoint: poll for GC");
  783   }
  784 }
  785 #endif
  786 
  787 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
  788 {
  789   Compile* C = ra_->C;
  790   MacroAssembler _masm(&cbuf);
  791 
  792   if (generate_vzeroupper(C)) {
  793     // Clear upper bits of YMM registers when current compiled code uses
  794     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  795     __ vzeroupper();
  796   }
  797 
  798   // Subtract two words to account for return address and rbp
  799   int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
  800   __ remove_frame(initial_framesize, C->needs_stack_repair());










  801 
  802   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
  803     __ reserved_stack_check();
  804   }
  805 
  806   if (do_polling() && C->is_method_compilation()) {
  807     MacroAssembler _masm(&cbuf);
  808     Label dummy_label;
  809     Label* code_stub = &dummy_label;
  810     if (!C->output()->in_scratch_emit_size()) {
  811       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
  812       C->output()->add_stub(stub);
  813       code_stub = &stub->entry();
  814     }
  815     __ relocate(relocInfo::poll_return_type);
  816     __ safepoint_poll(*code_stub, r15_thread, true /* at_return */, true /* in_nmethod */);
  817   }
  818 }
  819 






  820 int MachEpilogNode::reloc() const
  821 {
  822   return 2; // a large enough number
  823 }
  824 
  825 const Pipeline* MachEpilogNode::pipeline() const
  826 {
  827   return MachNode::pipeline_class();
  828 }
  829 
  830 //=============================================================================
  831 
  832 enum RC {
  833   rc_bad,
  834   rc_int,
  835   rc_kreg,
  836   rc_float,
  837   rc_stack
  838 };
  839 

  926                 src_offset, dst_offset);
  927       break;
  928     case Op_VecZ:
  929       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
  930                 "vmovdqu xmm0, [rsp + #%d]\n\t"
  931                 "vmovdqu [rsp + #%d], xmm0\n\t"
  932                 "vmovdqu xmm0, [rsp - #64]",
  933                 src_offset, dst_offset);
  934       break;
  935     default:
  936       ShouldNotReachHere();
  937     }
  938 #endif
  939   }
  940 }
  941 
  942 uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
  943                                        PhaseRegAlloc* ra_,
  944                                        bool do_size,
  945                                        outputStream* st) const {
  946   assert(cbuf != nullptr || st  != nullptr, "sanity");
  947   // Get registers to move
  948   OptoReg::Name src_second = ra_->get_reg_second(in(1));
  949   OptoReg::Name src_first = ra_->get_reg_first(in(1));
  950   OptoReg::Name dst_second = ra_->get_reg_second(this);
  951   OptoReg::Name dst_first = ra_->get_reg_first(this);
  952 
  953   enum RC src_second_rc = rc_class(src_second);
  954   enum RC src_first_rc = rc_class(src_first);
  955   enum RC dst_second_rc = rc_class(dst_second);
  956   enum RC dst_first_rc = rc_class(dst_first);
  957 
  958   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
  959          "must move at least 1 register" );
  960 
  961   if (src_first == dst_first && src_second == dst_second) {
  962     // Self copy, no move
  963     return 0;
  964   }
  965   if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
  966     uint ireg = ideal_reg();
  967     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
  968     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
  969     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
  970       // mem -> mem
  971       int src_offset = ra_->reg2offset(src_first);
  972       int dst_offset = ra_->reg2offset(dst_first);
  973       vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
  974     } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
  975       vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st);
  976     } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
  977       int stack_offset = ra_->reg2offset(dst_first);
  978       vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st);
  979     } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
  980       int stack_offset = ra_->reg2offset(src_first);
  981       vec_spill_helper(cbuf, true,  stack_offset, dst_first, ireg, st);
  982     } else {
  983       ShouldNotReachHere();
  984     }
  985     return 0;

 1384          st->print("kmovq   %s, %s\t# spill",
 1385                      Matcher::regName[dst_first],
 1386                      Matcher::regName[src_first]);
 1387 #endif
 1388         }
 1389       }
 1390       return 0;
 1391     } else if (dst_first_rc == rc_float) {
 1392       assert(false, "Illegal spill");
 1393       return 0;
 1394     }
 1395   }
 1396 
 1397   assert(0," foo ");
 1398   Unimplemented();
 1399   return 0;
 1400 }
 1401 
 1402 #ifndef PRODUCT
 1403 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 1404   implementation(nullptr, ra_, false, st);
 1405 }
 1406 #endif
 1407 
 1408 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1409   implementation(&cbuf, ra_, false, nullptr);
 1410 }
 1411 
 1412 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 1413   return MachNode::size(ra_);
 1414 }
 1415 
 1416 //=============================================================================
 1417 #ifndef PRODUCT
 1418 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1419 {
 1420   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1421   int reg = ra_->get_reg_first(this);
 1422   st->print("leaq    %s, [rsp + #%d]\t# box lock",
 1423             Matcher::regName[reg], offset);
 1424 }
 1425 #endif
 1426 
 1427 void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 1428 {
 1429   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1430   int reg = ra_->get_encode(this);
 1431 
 1432   MacroAssembler masm(&cbuf);
 1433   masm.lea(as_Register(reg), Address(rsp, offset));
 1434 }
 1435 
 1436 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 1437 {
 1438   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1439   return (offset < 0x80) ? 5 : 8; // REX
 1440 }
 1441 
 1442 //=============================================================================
 1443 #ifndef PRODUCT
 1444 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1445 {
 1446   st->print_cr("MachVEPNode");
 1447 }
 1448 #endif
 1449 
 1450 void MachVEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 1451 {
 1452   C2_MacroAssembler _masm(&cbuf);
 1453   uint insts_size = cbuf.insts_size();
 1454   if (!_verified) {
 1455     if (UseCompressedClassPointers) {
 1456       __ load_klass(rscratch1, j_rarg0, rscratch2);
 1457       __ cmpptr(rax, rscratch1);
 1458     } else {
 1459       __ cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
 1460     }
 1461     __ jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 1462   } else {
 1463     // TODO 8284443 Avoid creation of temporary frame
 1464     if (ra_->C->stub_function() == nullptr) {
 1465       __ verified_entry(ra_->C, 0);
 1466       __ entry_barrier();
 1467       int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
 1468       __ remove_frame(initial_framesize, false);
 1469     }
 1470     // Unpack inline type args passed as oop and then jump to
 1471     // the verified entry point (skipping the unverified entry).
 1472     int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
 1473     // Emit code for verified entry and save increment for stack repair on return
 1474     __ verified_entry(ra_->C, sp_inc);
 1475     if (Compile::current()->output()->in_scratch_emit_size()) {
 1476       Label dummy_verified_entry;
 1477       __ jmp(dummy_verified_entry);
 1478     } else {
 1479       __ jmp(*_verified_entry);
 1480     }
 1481   }
 1482   /* WARNING these NOPs are critical so that verified entry point is properly
 1483      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
 1484   int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
 1485   nops_cnt &= 0x3; // Do not add nops if code is aligned.
 1486   if (nops_cnt > 0) {
 1487     __ nop(nops_cnt);
 1488   }
 1489 }
 1490 
 1491 //=============================================================================
 1492 #ifndef PRODUCT
 1493 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1494 {
 1495   if (UseCompressedClassPointers) {
 1496     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 1497     st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
 1498     st->print_cr("\tcmpq    rax, rscratch1\t # Inline cache check");
 1499   } else {
 1500     st->print_cr("\tcmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
 1501                  "# Inline cache check");
 1502   }
 1503   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 1504   st->print_cr("\tnop\t# nops to align entry point");
 1505 }
 1506 #endif
 1507 
 1508 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 1509 {
 1510   MacroAssembler masm(&cbuf);

 1513     masm.load_klass(rscratch1, j_rarg0, rscratch2);
 1514     masm.cmpptr(rax, rscratch1);
 1515   } else {
 1516     masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
 1517   }
 1518 
 1519   masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 1520 
 1521   /* WARNING these NOPs are critical so that verified entry point is properly
 1522      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
 1523   int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
 1524   if (OptoBreakpoint) {
 1525     // Leave space for int3
 1526     nops_cnt -= 1;
 1527   }
 1528   nops_cnt &= 0x3; // Do not add nops if code is aligned.
 1529   if (nops_cnt > 0)
 1530     masm.nop(nops_cnt);
 1531 }
 1532 







 1533 //=============================================================================
 1534 
 1535 bool Matcher::supports_vector_calling_convention(void) {
 1536   if (EnableVectorSupport && UseVectorStubs) {
 1537     return true;
 1538   }
 1539   return false;
 1540 }
 1541 
 1542 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1543   assert(EnableVectorSupport && UseVectorStubs, "sanity");
 1544   int lo = XMM0_num;
 1545   int hi = XMM0b_num;
 1546   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 1547   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 1548   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 1549   return OptoRegPair(hi, lo);
 1550 }
 1551 
 1552 // Is this branch offset short enough that a short branch can be used?

 1782     __ bind(normal);
 1783     __ cdqq();
 1784 
 1785     // idivq (note: must be emitted by the user of this rule)
 1786     // <done>
 1787     __ idivq($div$$Register);
 1788     __ bind(done);
 1789   %}
 1790 
 1791   enc_class enc_PartialSubtypeCheck()
 1792   %{
 1793     Register Rrdi = as_Register(RDI_enc); // result register
 1794     Register Rrax = as_Register(RAX_enc); // super class
 1795     Register Rrcx = as_Register(RCX_enc); // killed
 1796     Register Rrsi = as_Register(RSI_enc); // sub class
 1797     Label miss;
 1798     const bool set_cond_codes = true;
 1799 
 1800     MacroAssembler _masm(&cbuf);
 1801     __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
 1802                                      nullptr, &miss,
 1803                                      /*set_cond_codes:*/ true);
 1804     if ($primary) {
 1805       __ xorptr(Rrdi, Rrdi);
 1806     }
 1807     __ bind(miss);
 1808   %}
 1809 
 1810   enc_class clear_avx %{
 1811     debug_only(int off0 = cbuf.insts_size());
 1812     if (generate_vzeroupper(Compile::current())) {
 1813       // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
 1814       // Clear upper bits of YMM registers when current compiled code uses
 1815       // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1816       MacroAssembler _masm(&cbuf);
 1817       __ vzeroupper();
 1818     }
 1819     debug_only(int off1 = cbuf.insts_size());
 1820     assert(off1 - off0 == clear_avx_size(), "correct size prediction");
 1821   %}
 1822 

 1839       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
 1840     } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
 1841       // The NOP here is purely to ensure that eliding a call to
 1842       // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
 1843       __ addr_nop_5();
 1844       __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
 1845     } else {
 1846       int method_index = resolved_method_index(cbuf);
 1847       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 1848                                                   : static_call_Relocation::spec(method_index);
 1849       address mark = __ pc();
 1850       int call_offset = __ offset();
 1851       __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
 1852       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 1853         // Calls of the same statically bound method can share
 1854         // a stub to the interpreter.
 1855         cbuf.shared_stub_to_interp_for(_method, call_offset);
 1856       } else {
 1857         // Emit stubs for static call.
 1858         address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark);
 1859         if (stub == nullptr) {
 1860           ciEnv::current()->record_failure("CodeCache is full");
 1861           return;
 1862         }
 1863       }
 1864     }
 1865     __ post_call_nop();
 1866   %}
 1867 
 1868   enc_class Java_Dynamic_Call(method meth) %{
 1869     MacroAssembler _masm(&cbuf);
 1870     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
 1871     __ post_call_nop();
 1872   %}
 1873 
 1874 %}
 1875 
 1876 
 1877 
 1878 //----------FRAME--------------------------------------------------------------
 1879 // Definition of frame structure and management information.

 2177 operand immI_64()
 2178 %{
 2179   predicate( n->get_int() == 64 );
 2180   match(ConI);
 2181 
 2182   op_cost(0);
 2183   format %{ %}
 2184   interface(CONST_INTER);
 2185 %}
 2186 
 2187 // Pointer Immediate
 2188 operand immP()
 2189 %{
 2190   match(ConP);
 2191 
 2192   op_cost(10);
 2193   format %{ %}
 2194   interface(CONST_INTER);
 2195 %}
 2196 
 2197 // nullptr Pointer Immediate
 2198 operand immP0()
 2199 %{
 2200   predicate(n->get_ptr() == 0);
 2201   match(ConP);
 2202 
 2203   op_cost(5);
 2204   format %{ %}
 2205   interface(CONST_INTER);
 2206 %}
 2207 
 2208 // Pointer Immediate
 2209 operand immN() %{
 2210   match(ConN);
 2211 
 2212   op_cost(10);
 2213   format %{ %}
 2214   interface(CONST_INTER);
 2215 %}
 2216 
 2217 operand immNKlass() %{
 2218   match(ConNKlass);
 2219 
 2220   op_cost(10);
 2221   format %{ %}
 2222   interface(CONST_INTER);
 2223 %}
 2224 
 2225 // nullptr Pointer Immediate
 2226 operand immN0() %{
 2227   predicate(n->get_narrowcon() == 0);
 2228   match(ConN);
 2229 
 2230   op_cost(5);
 2231   format %{ %}
 2232   interface(CONST_INTER);
 2233 %}
 2234 
 2235 operand immP31()
 2236 %{
 2237   predicate(n->as_Type()->type()->reloc() == relocInfo::none
 2238             && (n->get_ptr() >> 31) == 0);
 2239   match(ConP);
 2240 
 2241   op_cost(5);
 2242   format %{ %}
 2243   interface(CONST_INTER);
 2244 %}
 2245 

 3117   %}
 3118 %}
 3119 
 3120 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 3121 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 3122 %{
 3123   constraint(ALLOC_IN_RC(ptr_reg));
 3124   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 3125   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 3126 
 3127   op_cost(10);
 3128   format %{"[$reg + $off + $idx << $scale]" %}
 3129   interface(MEMORY_INTER) %{
 3130     base($reg);
 3131     index($idx);
 3132     scale($scale);
 3133     disp($off);
 3134   %}
 3135 %}
 3136 
 3137 // Indirect Narrow Oop Operand
 3138 operand indCompressedOop(rRegN reg) %{
 3139   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 3140   constraint(ALLOC_IN_RC(ptr_reg));
 3141   match(DecodeN reg);
 3142 
 3143   op_cost(10);
 3144   format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
 3145   interface(MEMORY_INTER) %{
 3146     base(0xc); // R12
 3147     index($reg);
 3148     scale(0x3);
 3149     disp(0x0);
 3150   %}
 3151 %}
 3152 
 3153 // Indirect Narrow Oop Plus Offset Operand
 3154 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 3155 // we can't free r12 even with CompressedOops::base() == nullptr.
 3156 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 3157   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 3158   constraint(ALLOC_IN_RC(ptr_reg));
 3159   match(AddP (DecodeN reg) off);
 3160 
 3161   op_cost(10);
 3162   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 3163   interface(MEMORY_INTER) %{
 3164     base(0xc); // R12
 3165     index($reg);
 3166     scale(0x3);
 3167     disp($off);
 3168   %}
 3169 %}
 3170 
 3171 // Indirect Memory Operand
 3172 operand indirectNarrow(rRegN reg)
 3173 %{
 3174   predicate(CompressedOops::shift() == 0);
 3175   constraint(ALLOC_IN_RC(ptr_reg));

 3479     equal(0x4, "e");
 3480     not_equal(0x5, "ne");
 3481     less(0x2, "b");
 3482     greater_equal(0x3, "ae");
 3483     less_equal(0x6, "be");
 3484     greater(0x7, "a");
 3485     overflow(0x0, "o");
 3486     no_overflow(0x1, "no");
 3487   %}
 3488 %}
 3489 
 3490 //----------OPERAND CLASSES----------------------------------------------------
 3491 // Operand Classes are groups of operands that are used as to simplify
 3492 // instruction definitions by not requiring the AD writer to specify separate
 3493 // instructions for every form of operand when the instruction accepts
 3494 // multiple operand types with the same basic encoding and format.  The classic
 3495 // case of this is memory operands.
 3496 
 3497 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 3498                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 3499                indCompressedOop, indCompressedOopOffset,
 3500                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 3501                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 3502                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 3503 
 3504 //----------PIPELINE-----------------------------------------------------------
 3505 // Rules which define the behavior of the target architectures pipeline.
 3506 pipeline %{
 3507 
 3508 //----------ATTRIBUTES---------------------------------------------------------
 3509 attributes %{
 3510   variable_size_instructions;        // Fixed size instructions
 3511   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 3512   instruction_unit_size = 1;         // An instruction is 1 bytes long
 3513   instruction_fetch_unit_size = 16;  // The processor fetches one line
 3514   instruction_fetch_units = 1;       // of 16 bytes
 3515 
 3516   // List of nop instructions
 3517   nops( MachNop );
 3518 %}
 3519 

 4916   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
 4917   ins_encode %{
 4918     __ movl($dst$$Register, $src$$constant);
 4919   %}
 4920   ins_pipe(ialu_reg);
 4921 %}
 4922 
 4923 instruct loadConF(regF dst, immF con) %{
 4924   match(Set dst con);
 4925   ins_cost(125);
 4926   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
 4927   ins_encode %{
 4928     __ movflt($dst$$XMMRegister, $constantaddress($con));
 4929   %}
 4930   ins_pipe(pipe_slow);
 4931 %}
 4932 
 4933 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
 4934   match(Set dst src);
 4935   effect(KILL cr);
 4936   format %{ "xorq    $dst, $src\t# compressed nullptr ptr" %}
 4937   ins_encode %{
 4938     __ xorq($dst$$Register, $dst$$Register);
 4939   %}
 4940   ins_pipe(ialu_reg);
 4941 %}
 4942 
 4943 instruct loadConN(rRegN dst, immN src) %{
 4944   match(Set dst src);
 4945 
 4946   ins_cost(125);
 4947   format %{ "movl    $dst, $src\t# compressed ptr" %}
 4948   ins_encode %{
 4949     address con = (address)$src$$constant;
 4950     if (con == nullptr) {
 4951       ShouldNotReachHere();
 4952     } else {
 4953       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
 4954     }
 4955   %}
 4956   ins_pipe(ialu_reg_fat); // XXX
 4957 %}
 4958 
 4959 instruct loadConNKlass(rRegN dst, immNKlass src) %{
 4960   match(Set dst src);
 4961 
 4962   ins_cost(125);
 4963   format %{ "movl    $dst, $src\t# compressed klass ptr" %}
 4964   ins_encode %{
 4965     address con = (address)$src$$constant;
 4966     if (con == nullptr) {
 4967       ShouldNotReachHere();
 4968     } else {
 4969       __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
 4970     }
 4971   %}
 4972   ins_pipe(ialu_reg_fat); // XXX
 4973 %}
 4974 
 4975 instruct loadConF0(regF dst, immF0 src)
 4976 %{
 4977   match(Set dst src);
 4978   ins_cost(100);
 4979 
 4980   format %{ "xorps   $dst, $dst\t# float 0.0" %}
 4981   ins_encode %{
 4982     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 4983   %}
 4984   ins_pipe(pipe_slow);
 4985 %}
 4986 

 5172   %}
 5173   ins_pipe(ialu_mem_reg); // XXX
 5174 %}
 5175 
 5176 // Store Pointer
 5177 instruct storeP(memory mem, any_RegP src)
 5178 %{
 5179   predicate(n->as_Store()->barrier_data() == 0);
 5180   match(Set mem (StoreP mem src));
 5181 
 5182   ins_cost(125); // XXX
 5183   format %{ "movq    $mem, $src\t# ptr" %}
 5184   ins_encode %{
 5185     __ movq($mem$$Address, $src$$Register);
 5186   %}
 5187   ins_pipe(ialu_mem_reg);
 5188 %}
 5189 
 5190 instruct storeImmP0(memory mem, immP0 zero)
 5191 %{
 5192   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
 5193   match(Set mem (StoreP mem zero));
 5194 
 5195   ins_cost(125); // XXX
 5196   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
 5197   ins_encode %{
 5198     __ movq($mem$$Address, r12);
 5199   %}
 5200   ins_pipe(ialu_mem_reg);
 5201 %}
 5202 
 5203 // Store nullptr Pointer, mark word, or other simple pointer constant.
 5204 instruct storeImmP(memory mem, immP31 src)
 5205 %{
 5206   predicate(n->as_Store()->barrier_data() == 0);
 5207   match(Set mem (StoreP mem src));
 5208 
 5209   ins_cost(150); // XXX
 5210   format %{ "movq    $mem, $src\t# ptr" %}
 5211   ins_encode %{
 5212     __ movq($mem$$Address, $src$$constant);
 5213   %}
 5214   ins_pipe(ialu_mem_imm);
 5215 %}
 5216 
 5217 // Store Compressed Pointer
 5218 instruct storeN(memory mem, rRegN src)
 5219 %{
 5220   match(Set mem (StoreN mem src));
 5221 
 5222   ins_cost(125); // XXX
 5223   format %{ "movl    $mem, $src\t# compressed ptr" %}
 5224   ins_encode %{
 5225     __ movl($mem$$Address, $src$$Register);
 5226   %}
 5227   ins_pipe(ialu_mem_reg);
 5228 %}
 5229 
 5230 instruct storeNKlass(memory mem, rRegN src)
 5231 %{
 5232   match(Set mem (StoreNKlass mem src));
 5233 
 5234   ins_cost(125); // XXX
 5235   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 5236   ins_encode %{
 5237     __ movl($mem$$Address, $src$$Register);
 5238   %}
 5239   ins_pipe(ialu_mem_reg);
 5240 %}
 5241 
 5242 instruct storeImmN0(memory mem, immN0 zero)
 5243 %{
 5244   predicate(CompressedOops::base() == nullptr);
 5245   match(Set mem (StoreN mem zero));
 5246 
 5247   ins_cost(125); // XXX
 5248   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
 5249   ins_encode %{
 5250     __ movl($mem$$Address, r12);
 5251   %}
 5252   ins_pipe(ialu_mem_reg);
 5253 %}
 5254 
 5255 instruct storeImmN(memory mem, immN src)
 5256 %{
 5257   match(Set mem (StoreN mem src));
 5258 
 5259   ins_cost(150); // XXX
 5260   format %{ "movl    $mem, $src\t# compressed ptr" %}
 5261   ins_encode %{
 5262     address con = (address)$src$$constant;
 5263     if (con == nullptr) {
 5264       __ movl($mem$$Address, 0);
 5265     } else {
 5266       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
 5267     }
 5268   %}
 5269   ins_pipe(ialu_mem_imm);
 5270 %}
 5271 
 5272 instruct storeImmNKlass(memory mem, immNKlass src)
 5273 %{
 5274   match(Set mem (StoreNKlass mem src));
 5275 
 5276   ins_cost(150); // XXX
 5277   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 5278   ins_encode %{
 5279     __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
 5280   %}
 5281   ins_pipe(ialu_mem_imm);
 5282 %}
 5283 
 5284 // Store Integer Immediate
 5285 instruct storeImmI0(memory mem, immI_0 zero)
 5286 %{
 5287   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 5288   match(Set mem (StoreI mem zero));
 5289 
 5290   ins_cost(125); // XXX
 5291   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
 5292   ins_encode %{
 5293     __ movl($mem$$Address, r12);
 5294   %}
 5295   ins_pipe(ialu_mem_reg);
 5296 %}
 5297 
 5298 instruct storeImmI(memory mem, immI src)
 5299 %{
 5300   match(Set mem (StoreI mem src));
 5301 
 5302   ins_cost(150);
 5303   format %{ "movl    $mem, $src\t# int" %}
 5304   ins_encode %{
 5305     __ movl($mem$$Address, $src$$constant);
 5306   %}
 5307   ins_pipe(ialu_mem_imm);
 5308 %}
 5309 
 5310 // Store Long Immediate
 5311 instruct storeImmL0(memory mem, immL0 zero)
 5312 %{
 5313   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 5314   match(Set mem (StoreL mem zero));
 5315 
 5316   ins_cost(125); // XXX
 5317   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
 5318   ins_encode %{
 5319     __ movq($mem$$Address, r12);
 5320   %}
 5321   ins_pipe(ialu_mem_reg);
 5322 %}
 5323 
 5324 instruct storeImmL(memory mem, immL32 src)
 5325 %{
 5326   match(Set mem (StoreL mem src));
 5327 
 5328   ins_cost(150);
 5329   format %{ "movq    $mem, $src\t# long" %}
 5330   ins_encode %{
 5331     __ movq($mem$$Address, $src$$constant);
 5332   %}
 5333   ins_pipe(ialu_mem_imm);
 5334 %}
 5335 
 5336 // Store Short/Char Immediate
 5337 instruct storeImmC0(memory mem, immI_0 zero)
 5338 %{
 5339   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 5340   match(Set mem (StoreC mem zero));
 5341 
 5342   ins_cost(125); // XXX
 5343   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
 5344   ins_encode %{
 5345     __ movw($mem$$Address, r12);
 5346   %}
 5347   ins_pipe(ialu_mem_reg);
 5348 %}
 5349 
 5350 instruct storeImmI16(memory mem, immI16 src)
 5351 %{
 5352   predicate(UseStoreImmI16);
 5353   match(Set mem (StoreC mem src));
 5354 
 5355   ins_cost(150);
 5356   format %{ "movw    $mem, $src\t# short/char" %}
 5357   ins_encode %{
 5358     __ movw($mem$$Address, $src$$constant);
 5359   %}
 5360   ins_pipe(ialu_mem_imm);
 5361 %}
 5362 
 5363 // Store Byte Immediate
 5364 instruct storeImmB0(memory mem, immI_0 zero)
 5365 %{
 5366   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 5367   match(Set mem (StoreB mem zero));
 5368 
 5369   ins_cost(125); // XXX
 5370   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
 5371   ins_encode %{
 5372     __ movb($mem$$Address, r12);
 5373   %}
 5374   ins_pipe(ialu_mem_reg);
 5375 %}
 5376 
 5377 instruct storeImmB(memory mem, immI8 src)
 5378 %{
 5379   match(Set mem (StoreB mem src));
 5380 
 5381   ins_cost(150); // XXX
 5382   format %{ "movb    $mem, $src\t# byte" %}
 5383   ins_encode %{
 5384     __ movb($mem$$Address, $src$$constant);
 5385   %}
 5386   ins_pipe(ialu_mem_imm);
 5387 %}
 5388 
 5389 // Store CMS card-mark Immediate
 5390 instruct storeImmCM0_reg(memory mem, immI_0 zero)
 5391 %{
 5392   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 5393   match(Set mem (StoreCM mem zero));
 5394 
 5395   ins_cost(125); // XXX
 5396   format %{ "movb    $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
 5397   ins_encode %{
 5398     __ movb($mem$$Address, r12);
 5399   %}
 5400   ins_pipe(ialu_mem_reg);
 5401 %}
 5402 
 5403 instruct storeImmCM0(memory mem, immI_0 src)
 5404 %{
 5405   match(Set mem (StoreCM mem src));
 5406 
 5407   ins_cost(150); // XXX
 5408   format %{ "movb    $mem, $src\t# CMS card-mark byte 0" %}
 5409   ins_encode %{
 5410     __ movb($mem$$Address, $src$$constant);
 5411   %}
 5412   ins_pipe(ialu_mem_imm);
 5413 %}
 5414 
 5415 // Store Float
 5416 instruct storeF(memory mem, regF src)
 5417 %{
 5418   match(Set mem (StoreF mem src));
 5419 
 5420   ins_cost(95); // XXX
 5421   format %{ "movss   $mem, $src\t# float" %}
 5422   ins_encode %{
 5423     __ movflt($mem$$Address, $src$$XMMRegister);
 5424   %}
 5425   ins_pipe(pipe_slow); // XXX
 5426 %}
 5427 
 5428 // Store immediate Float value (it is faster than store from XMM register)
 5429 instruct storeF0(memory mem, immF0 zero)
 5430 %{
 5431   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 5432   match(Set mem (StoreF mem zero));
 5433 
 5434   ins_cost(25); // XXX
 5435   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
 5436   ins_encode %{
 5437     __ movl($mem$$Address, r12);
 5438   %}
 5439   ins_pipe(ialu_mem_reg);
 5440 %}
 5441 
 5442 instruct storeF_imm(memory mem, immF src)
 5443 %{
 5444   match(Set mem (StoreF mem src));
 5445 
 5446   ins_cost(50);
 5447   format %{ "movl    $mem, $src\t# float" %}
 5448   ins_encode %{
 5449     __ movl($mem$$Address, jint_cast($src$$constant));
 5450   %}
 5451   ins_pipe(ialu_mem_imm);
 5452 %}
 5453 
 5454 // Store Double
 5455 instruct storeD(memory mem, regD src)
 5456 %{
 5457   match(Set mem (StoreD mem src));
 5458 
 5459   ins_cost(95); // XXX
 5460   format %{ "movsd   $mem, $src\t# double" %}
 5461   ins_encode %{
 5462     __ movdbl($mem$$Address, $src$$XMMRegister);
 5463   %}
 5464   ins_pipe(pipe_slow); // XXX
 5465 %}
 5466 
 5467 // Store immediate double 0.0 (it is faster than store from XMM register)
 5468 instruct storeD0_imm(memory mem, immD0 src)
 5469 %{
 5470   predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
 5471   match(Set mem (StoreD mem src));
 5472 
 5473   ins_cost(50);
 5474   format %{ "movq    $mem, $src\t# double 0." %}
 5475   ins_encode %{
 5476     __ movq($mem$$Address, $src$$constant);
 5477   %}
 5478   ins_pipe(ialu_mem_imm);
 5479 %}
 5480 
 5481 instruct storeD0(memory mem, immD0 zero)
 5482 %{
 5483   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 5484   match(Set mem (StoreD mem zero));
 5485 
 5486   ins_cost(25); // XXX
 5487   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
 5488   ins_encode %{
 5489     __ movq($mem$$Address, r12);
 5490   %}
 5491   ins_pipe(ialu_mem_reg);
 5492 %}
 5493 
 5494 instruct storeSSI(stackSlotI dst, rRegI src)
 5495 %{
 5496   match(Set dst src);
 5497 
 5498   ins_cost(100);
 5499   format %{ "movl    $dst, $src\t# int stk" %}
 5500   ins_encode %{
 5501     __ movl($dst$$Address, $src$$Register);
 5502   %}
 5503   ins_pipe( ialu_mem_reg );

 6010   format %{ "MEMBAR-storestore (empty encoding)" %}
 6011   ins_encode( );
 6012   ins_pipe(empty);
 6013 %}
 6014 
 6015 //----------Move Instructions--------------------------------------------------
 6016 
 6017 instruct castX2P(rRegP dst, rRegL src)
 6018 %{
 6019   match(Set dst (CastX2P src));
 6020 
 6021   format %{ "movq    $dst, $src\t# long->ptr" %}
 6022   ins_encode %{
 6023     if ($dst$$reg != $src$$reg) {
 6024       __ movptr($dst$$Register, $src$$Register);
 6025     }
 6026   %}
 6027   ins_pipe(ialu_reg_reg); // XXX
 6028 %}
 6029 
 6030 instruct castN2X(rRegL dst, rRegN src)
 6031 %{
 6032   match(Set dst (CastP2X src));
 6033 
 6034   format %{ "movq    $dst, $src\t# ptr -> long" %}
 6035   ins_encode %{
 6036     if ($dst$$reg != $src$$reg) {
 6037       __ movptr($dst$$Register, $src$$Register);
 6038     }
 6039   %}
 6040   ins_pipe(ialu_reg_reg); // XXX
 6041 %}
 6042 
 6043 instruct castP2X(rRegL dst, rRegP src)
 6044 %{
 6045   match(Set dst (CastP2X src));
 6046 
 6047   format %{ "movq    $dst, $src\t# ptr -> long" %}
 6048   ins_encode %{
 6049     if ($dst$$reg != $src$$reg) {
 6050       __ movptr($dst$$Register, $src$$Register);
 6051     }
 6052   %}
 6053   ins_pipe(ialu_reg_reg); // XXX
 6054 %}
 6055 
 6056 // Convert oop into int for vectors alignment masking
 6057 instruct convP2I(rRegI dst, rRegP src)
 6058 %{
 6059   match(Set dst (ConvL2I (CastP2X src)));
 6060 
 6061   format %{ "movl    $dst, $src\t# ptr -> int" %}
 6062   ins_encode %{

10564   effect(DEF dst, USE src);
10565   ins_cost(100);
10566   format %{ "movd    $dst,$src\t# MoveI2F" %}
10567   ins_encode %{
10568     __ movdl($dst$$XMMRegister, $src$$Register);
10569   %}
10570   ins_pipe( pipe_slow );
10571 %}
10572 
10573 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
10574   match(Set dst (MoveL2D src));
10575   effect(DEF dst, USE src);
10576   ins_cost(100);
10577   format %{ "movd    $dst,$src\t# MoveL2D" %}
10578   ins_encode %{
10579      __ movdq($dst$$XMMRegister, $src$$Register);
10580   %}
10581   ins_pipe( pipe_slow );
10582 %}
10583 
10584 
10585 // Fast clearing of an array
10586 // Small ClearArray non-AVX512.
10587 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
10588                   Universe dummy, rFlagsReg cr)
10589 %{
10590   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
10591   match(Set dummy (ClearArray (Binary cnt base) val));
10592   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
10593 
10594   format %{ $$template
10595     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
10596     $$emit$$"jg      LARGE\n\t"
10597     $$emit$$"dec     rcx\n\t"
10598     $$emit$$"js      DONE\t# Zero length\n\t"
10599     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
10600     $$emit$$"dec     rcx\n\t"
10601     $$emit$$"jge     LOOP\n\t"
10602     $$emit$$"jmp     DONE\n\t"
10603     $$emit$$"# LARGE:\n\t"
10604     if (UseFastStosb) {
10605        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
10606        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
10607     } else if (UseXMMForObjInit) {
10608        $$emit$$"movdq   $tmp, $val\n\t"
10609        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
10610        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
10611        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10612        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10613        $$emit$$"vmovdqu $tmp,(rax)\n\t"
10614        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
10615        $$emit$$"add     0x40,rax\n\t"
10616        $$emit$$"# L_zero_64_bytes:\n\t"
10617        $$emit$$"sub     0x8,rcx\n\t"
10618        $$emit$$"jge     L_loop\n\t"
10619        $$emit$$"add     0x4,rcx\n\t"
10620        $$emit$$"jl      L_tail\n\t"
10621        $$emit$$"vmovdqu $tmp,(rax)\n\t"
10622        $$emit$$"add     0x20,rax\n\t"
10623        $$emit$$"sub     0x4,rcx\n\t"
10624        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10625        $$emit$$"add     0x4,rcx\n\t"
10626        $$emit$$"jle     L_end\n\t"
10627        $$emit$$"dec     rcx\n\t"
10628        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10629        $$emit$$"vmovq   xmm0,(rax)\n\t"
10630        $$emit$$"add     0x8,rax\n\t"
10631        $$emit$$"dec     rcx\n\t"
10632        $$emit$$"jge     L_sloop\n\t"
10633        $$emit$$"# L_end:\n\t"
10634     } else {
10635        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
10636     }
10637     $$emit$$"# DONE"
10638   %}
10639   ins_encode %{
10640     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10641                  $tmp$$XMMRegister, false, false);
10642   %}
10643   ins_pipe(pipe_slow);
10644 %}
10645 
10646 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
10647                             Universe dummy, rFlagsReg cr)
10648 %{
10649   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
10650   match(Set dummy (ClearArray (Binary cnt base) val));
10651   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
10652 
10653   format %{ $$template
10654     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
10655     $$emit$$"jg      LARGE\n\t"
10656     $$emit$$"dec     rcx\n\t"
10657     $$emit$$"js      DONE\t# Zero length\n\t"
10658     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
10659     $$emit$$"dec     rcx\n\t"
10660     $$emit$$"jge     LOOP\n\t"
10661     $$emit$$"jmp     DONE\n\t"
10662     $$emit$$"# LARGE:\n\t"
10663     if (UseXMMForObjInit) {
10664        $$emit$$"movdq   $tmp, $val\n\t"
10665        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
10666        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
10667        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10668        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10669        $$emit$$"vmovdqu $tmp,(rax)\n\t"
10670        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
10671        $$emit$$"add     0x40,rax\n\t"
10672        $$emit$$"# L_zero_64_bytes:\n\t"
10673        $$emit$$"sub     0x8,rcx\n\t"
10674        $$emit$$"jge     L_loop\n\t"
10675        $$emit$$"add     0x4,rcx\n\t"
10676        $$emit$$"jl      L_tail\n\t"
10677        $$emit$$"vmovdqu $tmp,(rax)\n\t"
10678        $$emit$$"add     0x20,rax\n\t"
10679        $$emit$$"sub     0x4,rcx\n\t"
10680        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10681        $$emit$$"add     0x4,rcx\n\t"
10682        $$emit$$"jle     L_end\n\t"
10683        $$emit$$"dec     rcx\n\t"
10684        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10685        $$emit$$"vmovq   xmm0,(rax)\n\t"
10686        $$emit$$"add     0x8,rax\n\t"
10687        $$emit$$"dec     rcx\n\t"
10688        $$emit$$"jge     L_sloop\n\t"
10689        $$emit$$"# L_end:\n\t"
10690     } else {
10691        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
10692     }
10693     $$emit$$"# DONE"
10694   %}
10695   ins_encode %{
10696     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10697                  $tmp$$XMMRegister, false, true);
10698   %}
10699   ins_pipe(pipe_slow);
10700 %}
10701 
10702 // Small ClearArray AVX512 non-constant length.
10703 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
10704                        Universe dummy, rFlagsReg cr)
10705 %{
10706   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
10707   match(Set dummy (ClearArray (Binary cnt base) val));
10708   ins_cost(125);
10709   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
10710 
10711   format %{ $$template
10712     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
10713     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
10714     $$emit$$"jg      LARGE\n\t"
10715     $$emit$$"dec     rcx\n\t"
10716     $$emit$$"js      DONE\t# Zero length\n\t"
10717     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
10718     $$emit$$"dec     rcx\n\t"
10719     $$emit$$"jge     LOOP\n\t"
10720     $$emit$$"jmp     DONE\n\t"
10721     $$emit$$"# LARGE:\n\t"
10722     if (UseFastStosb) {
10723        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
10724        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
10725     } else if (UseXMMForObjInit) {
10726        $$emit$$"mov     rdi,rax\n\t"
10727        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
10728        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10729        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

10737        $$emit$$"jl      L_tail\n\t"
10738        $$emit$$"vmovdqu ymm0,(rax)\n\t"
10739        $$emit$$"add     0x20,rax\n\t"
10740        $$emit$$"sub     0x4,rcx\n\t"
10741        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10742        $$emit$$"add     0x4,rcx\n\t"
10743        $$emit$$"jle     L_end\n\t"
10744        $$emit$$"dec     rcx\n\t"
10745        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10746        $$emit$$"vmovq   xmm0,(rax)\n\t"
10747        $$emit$$"add     0x8,rax\n\t"
10748        $$emit$$"dec     rcx\n\t"
10749        $$emit$$"jge     L_sloop\n\t"
10750        $$emit$$"# L_end:\n\t"
10751     } else {
10752        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
10753     }
10754     $$emit$$"# DONE"
10755   %}
10756   ins_encode %{
10757     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10758                  $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
10759   %}
10760   ins_pipe(pipe_slow);
10761 %}
10762 
10763 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
10764                                  Universe dummy, rFlagsReg cr)

10765 %{
10766   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
10767   match(Set dummy (ClearArray (Binary cnt base) val));
10768   ins_cost(125);
10769   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
10770 
10771   format %{ $$template
10772     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
10773     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
10774     $$emit$$"jg      LARGE\n\t"
10775     $$emit$$"dec     rcx\n\t"
10776     $$emit$$"js      DONE\t# Zero length\n\t"
10777     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
10778     $$emit$$"dec     rcx\n\t"
10779     $$emit$$"jge     LOOP\n\t"
10780     $$emit$$"jmp     DONE\n\t"
10781     $$emit$$"# LARGE:\n\t"
10782     if (UseFastStosb) {
10783        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
10784        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
10785     } else if (UseXMMForObjInit) {
10786        $$emit$$"mov     rdi,rax\n\t"
10787        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
10788        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10789        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

10797        $$emit$$"jl      L_tail\n\t"
10798        $$emit$$"vmovdqu ymm0,(rax)\n\t"
10799        $$emit$$"add     0x20,rax\n\t"
10800        $$emit$$"sub     0x4,rcx\n\t"
10801        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10802        $$emit$$"add     0x4,rcx\n\t"
10803        $$emit$$"jle     L_end\n\t"
10804        $$emit$$"dec     rcx\n\t"
10805        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10806        $$emit$$"vmovq   xmm0,(rax)\n\t"
10807        $$emit$$"add     0x8,rax\n\t"
10808        $$emit$$"dec     rcx\n\t"
10809        $$emit$$"jge     L_sloop\n\t"
10810        $$emit$$"# L_end:\n\t"
10811     } else {
10812        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
10813     }
10814     $$emit$$"# DONE"
10815   %}
10816   ins_encode %{
10817     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10818                  $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
10819   %}
10820   ins_pipe(pipe_slow);
10821 %}
10822 
10823 // Large ClearArray non-AVX512.
10824 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
10825                         Universe dummy, rFlagsReg cr)
10826 %{
10827   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
10828   match(Set dummy (ClearArray (Binary cnt base) val));
10829   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
10830 
10831   format %{ $$template
10832     if (UseFastStosb) {
10833        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
10834        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
10835     } else if (UseXMMForObjInit) {
10836        $$emit$$"movdq   $tmp, $val\n\t"
10837        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
10838        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
10839        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10840        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10841        $$emit$$"vmovdqu $tmp,(rax)\n\t"
10842        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
10843        $$emit$$"add     0x40,rax\n\t"
10844        $$emit$$"# L_zero_64_bytes:\n\t"
10845        $$emit$$"sub     0x8,rcx\n\t"
10846        $$emit$$"jge     L_loop\n\t"
10847        $$emit$$"add     0x4,rcx\n\t"
10848        $$emit$$"jl      L_tail\n\t"
10849        $$emit$$"vmovdqu $tmp,(rax)\n\t"
10850        $$emit$$"add     0x20,rax\n\t"
10851        $$emit$$"sub     0x4,rcx\n\t"
10852        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10853        $$emit$$"add     0x4,rcx\n\t"
10854        $$emit$$"jle     L_end\n\t"
10855        $$emit$$"dec     rcx\n\t"
10856        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10857        $$emit$$"vmovq   xmm0,(rax)\n\t"
10858        $$emit$$"add     0x8,rax\n\t"
10859        $$emit$$"dec     rcx\n\t"
10860        $$emit$$"jge     L_sloop\n\t"
10861        $$emit$$"# L_end:\n\t"
10862     } else {
10863        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
10864     }
10865   %}
10866   ins_encode %{
10867     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10868                  $tmp$$XMMRegister, true, false);
10869   %}
10870   ins_pipe(pipe_slow);
10871 %}
10872 
10873 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
10874                                   Universe dummy, rFlagsReg cr)
10875 %{
10876   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
10877   match(Set dummy (ClearArray (Binary cnt base) val));
10878   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
10879 
10880   format %{ $$template
10881     if (UseXMMForObjInit) {
10882        $$emit$$"movdq   $tmp, $val\n\t"
10883        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
10884        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
10885        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10886        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10887        $$emit$$"vmovdqu $tmp,(rax)\n\t"
10888        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
10889        $$emit$$"add     0x40,rax\n\t"
10890        $$emit$$"# L_zero_64_bytes:\n\t"
10891        $$emit$$"sub     0x8,rcx\n\t"
10892        $$emit$$"jge     L_loop\n\t"
10893        $$emit$$"add     0x4,rcx\n\t"
10894        $$emit$$"jl      L_tail\n\t"
10895        $$emit$$"vmovdqu $tmp,(rax)\n\t"
10896        $$emit$$"add     0x20,rax\n\t"
10897        $$emit$$"sub     0x4,rcx\n\t"
10898        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10899        $$emit$$"add     0x4,rcx\n\t"
10900        $$emit$$"jle     L_end\n\t"
10901        $$emit$$"dec     rcx\n\t"
10902        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10903        $$emit$$"vmovq   xmm0,(rax)\n\t"
10904        $$emit$$"add     0x8,rax\n\t"
10905        $$emit$$"dec     rcx\n\t"
10906        $$emit$$"jge     L_sloop\n\t"
10907        $$emit$$"# L_end:\n\t"
10908     } else {
10909        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
10910     }
10911   %}
10912   ins_encode %{
10913     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10914                  $tmp$$XMMRegister, true, true);
10915   %}
10916   ins_pipe(pipe_slow);
10917 %}
10918 
10919 // Large ClearArray AVX512.
10920 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
10921                              Universe dummy, rFlagsReg cr)
10922 %{
10923   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
10924   match(Set dummy (ClearArray (Binary cnt base) val));
10925   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
10926 
10927   format %{ $$template
10928     if (UseFastStosb) {
10929        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
10930        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
10931        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
10932     } else if (UseXMMForObjInit) {
10933        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
10934        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
10935        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10936        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10937        $$emit$$"vmovdqu ymm0,(rax)\n\t"
10938        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
10939        $$emit$$"add     0x40,rax\n\t"
10940        $$emit$$"# L_zero_64_bytes:\n\t"
10941        $$emit$$"sub     0x8,rcx\n\t"
10942        $$emit$$"jge     L_loop\n\t"
10943        $$emit$$"add     0x4,rcx\n\t"
10944        $$emit$$"jl      L_tail\n\t"
10945        $$emit$$"vmovdqu ymm0,(rax)\n\t"
10946        $$emit$$"add     0x20,rax\n\t"
10947        $$emit$$"sub     0x4,rcx\n\t"
10948        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10949        $$emit$$"add     0x4,rcx\n\t"
10950        $$emit$$"jle     L_end\n\t"
10951        $$emit$$"dec     rcx\n\t"
10952        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10953        $$emit$$"vmovq   xmm0,(rax)\n\t"
10954        $$emit$$"add     0x8,rax\n\t"
10955        $$emit$$"dec     rcx\n\t"
10956        $$emit$$"jge     L_sloop\n\t"
10957        $$emit$$"# L_end:\n\t"
10958     } else {
10959        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
10960        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
10961     }
10962   %}
10963   ins_encode %{
10964     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10965                  $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
10966   %}
10967   ins_pipe(pipe_slow);
10968 %}
10969 
10970 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
10971                                        Universe dummy, rFlagsReg cr)

10972 %{
10973   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
10974   match(Set dummy (ClearArray (Binary cnt base) val));
10975   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
10976 
10977   format %{ $$template
10978     if (UseFastStosb) {
10979        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
10980        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
10981        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
10982     } else if (UseXMMForObjInit) {
10983        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
10984        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
10985        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10986        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10987        $$emit$$"vmovdqu ymm0,(rax)\n\t"
10988        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
10989        $$emit$$"add     0x40,rax\n\t"
10990        $$emit$$"# L_zero_64_bytes:\n\t"
10991        $$emit$$"sub     0x8,rcx\n\t"
10992        $$emit$$"jge     L_loop\n\t"
10993        $$emit$$"add     0x4,rcx\n\t"
10994        $$emit$$"jl      L_tail\n\t"
10995        $$emit$$"vmovdqu ymm0,(rax)\n\t"
10996        $$emit$$"add     0x20,rax\n\t"
10997        $$emit$$"sub     0x4,rcx\n\t"
10998        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10999        $$emit$$"add     0x4,rcx\n\t"
11000        $$emit$$"jle     L_end\n\t"
11001        $$emit$$"dec     rcx\n\t"
11002        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11003        $$emit$$"vmovq   xmm0,(rax)\n\t"
11004        $$emit$$"add     0x8,rax\n\t"
11005        $$emit$$"dec     rcx\n\t"
11006        $$emit$$"jge     L_sloop\n\t"
11007        $$emit$$"# L_end:\n\t"
11008     } else {
11009        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11010        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11011     }
11012   %}
11013   ins_encode %{
11014     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11015                  $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
11016   %}
11017   ins_pipe(pipe_slow);
11018 %}
11019 
11020 // Small ClearArray AVX512 constant length.
11021 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
11022 %{
11023   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
11024             ((UseAVX > 2) && VM_Version::supports_avx512vlbw()));
11025   match(Set dummy (ClearArray (Binary cnt base) val));
11026   ins_cost(100);
11027   effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
11028   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
11029   ins_encode %{
11030     __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
11031   %}
11032   ins_pipe(pipe_slow);
11033 %}
11034 
11035 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11036                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
11037 %{
11038   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11039   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11040   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11041 
11042   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11043   ins_encode %{
11044     __ string_compare($str1$$Register, $str2$$Register,
11045                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11046                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
11047   %}
11048   ins_pipe( pipe_slow );
11049 %}
11050 

11919   ins_pipe(ialu_cr_reg_mem);
11920 %}
11921 
11922 // This will generate a signed flags result. This should be OK since
11923 // any compare to a zero should be eq/neq.
11924 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
11925 %{
11926   match(Set cr (CmpP src zero));
11927 
11928   format %{ "testq   $src, $src\t# ptr" %}
11929   ins_encode %{
11930     __ testq($src$$Register, $src$$Register);
11931   %}
11932   ins_pipe(ialu_cr_reg_imm);
11933 %}
11934 
11935 // This will generate a signed flags result. This should be OK since
11936 // any compare to a zero should be eq/neq.
11937 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
11938 %{
11939   predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
11940             n->in(1)->as_Load()->barrier_data() == 0);
11941   match(Set cr (CmpP (LoadP op) zero));
11942 
11943   ins_cost(500); // XXX
11944   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
11945   ins_encode %{
11946     __ testq($op$$Address, 0xFFFFFFFF);
11947   %}
11948   ins_pipe(ialu_cr_reg_imm);
11949 %}
11950 
11951 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
11952 %{
11953   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
11954             n->in(1)->as_Load()->barrier_data() == 0);
11955   match(Set cr (CmpP (LoadP mem) zero));
11956 
11957   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
11958   ins_encode %{
11959     __ cmpq(r12, $mem$$Address);
11960   %}
11961   ins_pipe(ialu_cr_reg_mem);
11962 %}
11963 
11964 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
11965 %{
11966   match(Set cr (CmpN op1 op2));
11967 
11968   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
11969   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
11970   ins_pipe(ialu_cr_reg_reg);
11971 %}
11972 
11973 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)

12016 %{
12017   match(Set cr (CmpN src (LoadNKlass mem)));
12018 
12019   format %{ "cmpl    $mem, $src\t# compressed klass ptr" %}
12020   ins_encode %{
12021     __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
12022   %}
12023   ins_pipe(ialu_cr_reg_mem);
12024 %}
12025 
12026 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
12027   match(Set cr (CmpN src zero));
12028 
12029   format %{ "testl   $src, $src\t# compressed ptr" %}
12030   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
12031   ins_pipe(ialu_cr_reg_imm);
12032 %}
12033 
12034 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
12035 %{
12036   predicate(CompressedOops::base() != nullptr);
12037   match(Set cr (CmpN (LoadN mem) zero));
12038 
12039   ins_cost(500); // XXX
12040   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
12041   ins_encode %{
12042     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
12043   %}
12044   ins_pipe(ialu_cr_reg_mem);
12045 %}
12046 
12047 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
12048 %{
12049   predicate(CompressedOops::base() == nullptr);
12050   match(Set cr (CmpN (LoadN mem) zero));
12051 
12052   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
12053   ins_encode %{
12054     __ cmpl(r12, $mem$$Address);
12055   %}
12056   ins_pipe(ialu_cr_reg_mem);
12057 %}
12058 
12059 // Yanked all unsigned pointer compare operations.
12060 // Pointer compares are done with CmpP which is already unsigned.
12061 
12062 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
12063 %{
12064   match(Set cr (CmpL op1 op2));
12065 
12066   format %{ "cmpq    $op1, $op2" %}
12067   ins_encode %{
12068     __ cmpq($op1$$Register, $op2$$Register);
12069   %}

12780 
12781   ins_cost(300);
12782   format %{ "call_leaf,runtime " %}
12783   ins_encode(clear_avx, Java_To_Runtime(meth));
12784   ins_pipe(pipe_slow);
12785 %}
12786 
12787 // Call runtime without safepoint and with vector arguments
12788 instruct CallLeafDirectVector(method meth)
12789 %{
12790   match(CallLeafVector);
12791   effect(USE meth);
12792 
12793   ins_cost(300);
12794   format %{ "call_leaf,vector " %}
12795   ins_encode(Java_To_Runtime(meth));
12796   ins_pipe(pipe_slow);
12797 %}
12798 
12799 // Call runtime without safepoint
12800 // entry point is null, target holds the address to call
12801 instruct CallLeafNoFPInDirect(rRegP target)
12802 %{
12803   predicate(n->as_Call()->entry_point() == nullptr);
12804   match(CallLeafNoFP target);
12805 
12806   ins_cost(300);
12807   format %{ "call_leaf_nofp,runtime indirect " %}
12808   ins_encode %{
12809      __ call($target$$Register);
12810   %}
12811 
12812   ins_pipe(pipe_slow);
12813 %}
12814 
12815 instruct CallLeafNoFPDirect(method meth)
12816 %{
12817   predicate(n->as_Call()->entry_point() != nullptr);
12818   match(CallLeafNoFP);
12819   effect(USE meth);
12820 
12821   ins_cost(300);
12822   format %{ "call_leaf_nofp,runtime " %}
12823   ins_encode(clear_avx, Java_To_Runtime(meth));
12824   ins_pipe(pipe_slow);
12825 %}
12826 
12827 // Return Instruction
12828 // Remove the return address & jump to it.
12829 // Notice: We always emit a nop after a ret to make sure there is room
12830 // for safepoint patching
12831 instruct Ret()
12832 %{
12833   match(Return);
12834 
12835   format %{ "ret" %}
12836   ins_encode %{
12837     __ ret(0);
< prev index next >