< prev index next >

src/hotspot/cpu/x86/x86_64.ad

Print this page

  473 }
  474 
  475 // !!!!! Special hack to get all types of calls to specify the byte offset
  476 //       from the start of the call to the point where the return address
  477 //       will point.
  478 int MachCallStaticJavaNode::ret_addr_offset()
  479 {
  480   int offset = 5; // 5 bytes from start of call to where return address points
  481   offset += clear_avx_size();
  482   return offset;
  483 }
  484 
  485 int MachCallDynamicJavaNode::ret_addr_offset()
  486 {
  487   int offset = 15; // 15 bytes from start of call to where return address points
  488   offset += clear_avx_size();
  489   return offset;
  490 }
  491 
  492 int MachCallRuntimeNode::ret_addr_offset() {




  493   int offset = 13; // movq r10,#addr; callq (r10)
  494   if (this->ideal_Opcode() != Op_CallLeafVector) {
  495     offset += clear_avx_size();
  496   }
  497   return offset;
  498 }

  499 //
  500 // Compute padding required for nodes which need alignment
  501 //
  502 
  503 // The address of the call instruction needs to be 4-byte aligned to
  504 // ensure that it does not span a cache line so that it can be patched.
  505 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
  506 {
  507   current_offset += clear_avx_size(); // skip vzeroupper
  508   current_offset += 1; // skip call opcode byte
  509   return align_up(current_offset, alignment_required()) - current_offset;
  510 }
  511 
  512 // The address of the call instruction needs to be 4-byte aligned to
  513 // ensure that it does not span a cache line so that it can be patched.
  514 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
  515 {
  516   current_offset += clear_avx_size(); // skip vzeroupper
  517   current_offset += 11; // skip movq instruction + call opcode byte
  518   return align_up(current_offset, alignment_required()) - current_offset;

  870     st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
  871     if (PreserveFramePointer) {
  872       st->print("\n\t");
  873       st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
  874       if (framesize > 0) {
  875         st->print("\n\t");
  876         st->print("addq    rbp, #%d", framesize);
  877       }
  878     }
  879   }
  880 
  881   if (VerifyStackAtCalls) {
  882     st->print("\n\t");
  883     framesize -= wordSize;
  884     st->print("movq    [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
  885 #ifdef ASSERT
  886     st->print("\n\t");
  887     st->print("# stack alignment check");
  888 #endif
  889   }
  890   if (C->stub_function() != NULL && BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
  891     st->print("\n\t");
  892     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
  893     st->print("\n\t");
  894     st->print("je      fast_entry\t");
  895     st->print("\n\t");
  896     st->print("call    #nmethod_entry_barrier_stub\t");
  897     st->print("\n\tfast_entry:");
  898   }
  899   st->cr();
  900 }
  901 #endif
  902 
  903 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  904   Compile* C = ra_->C;
  905   C2_MacroAssembler _masm(&cbuf);
  906 
  907   int framesize = C->output()->frame_size_in_bytes();
  908   int bangsize = C->output()->bang_size_in_bytes();
  909 
  910   if (C->clinit_barrier_on_entry()) {
  911     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
  912     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
  913 
  914     Label L_skip_barrier;
  915     Register klass = rscratch1;
  916 
  917     __ mov_metadata(klass, C->method()->holder()->constant_encoding());
  918     __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/);
  919 
  920     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
  921 
  922     __ bind(L_skip_barrier);

  923   }
  924 
  925   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != NULL);


  926 
  927   C->output()->set_frame_complete(cbuf.insts_size());
  928 
  929   if (C->has_mach_constant_base_node()) {
  930     // NOTE: We set the table base offset here because users might be
  931     // emitted before MachConstantBaseNode.
  932     ConstantTable& constant_table = C->output()->constant_table();
  933     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  934   }
  935 }
  936 
  937 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
  938 {
  939   return MachNode::size(ra_); // too many variables; just compute it
  940                               // the hard way
  941 }
  942 
  943 int MachPrologNode::reloc() const
  944 {
  945   return 0; // a large enough number
  946 }
  947 
  948 //=============================================================================
  949 #ifndef PRODUCT
  950 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
  951 {
  952   Compile* C = ra_->C;
  953   if (generate_vzeroupper(C)) {
  954     st->print("vzeroupper");
  955     st->cr(); st->print("\t");
  956   }
  957 
  958   int framesize = C->output()->frame_size_in_bytes();
  959   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  960   // Remove word for return adr already pushed
  961   // and RBP
  962   framesize -= 2*wordSize;

  970   if (do_polling() && C->is_method_compilation()) {
  971     st->print("\t");
  972     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
  973                  "ja      #safepoint_stub\t"
  974                  "# Safepoint: poll for GC");
  975   }
  976 }
  977 #endif
  978 
  979 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
  980 {
  981   Compile* C = ra_->C;
  982   MacroAssembler _masm(&cbuf);
  983 
  984   if (generate_vzeroupper(C)) {
  985     // Clear upper bits of YMM registers when current compiled code uses
  986     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  987     __ vzeroupper();
  988   }
  989 
  990   int framesize = C->output()->frame_size_in_bytes();
  991   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  992   // Remove word for return adr already pushed
  993   // and RBP
  994   framesize -= 2*wordSize;
  995 
  996   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
  997 
  998   if (framesize) {
  999     emit_opcode(cbuf, Assembler::REX_W);
 1000     if (framesize < 0x80) {
 1001       emit_opcode(cbuf, 0x83); // addq rsp, #framesize
 1002       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
 1003       emit_d8(cbuf, framesize);
 1004     } else {
 1005       emit_opcode(cbuf, 0x81); // addq rsp, #framesize
 1006       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
 1007       emit_d32(cbuf, framesize);
 1008     }
 1009   }
 1010 
 1011   // popq rbp
 1012   emit_opcode(cbuf, 0x58 | RBP_enc);
 1013 
 1014   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1015     __ reserved_stack_check();
 1016   }
 1017 
 1018   if (do_polling() && C->is_method_compilation()) {
 1019     MacroAssembler _masm(&cbuf);
 1020     Label dummy_label;
 1021     Label* code_stub = &dummy_label;
 1022     if (!C->output()->in_scratch_emit_size()) {
 1023       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1024       C->output()->add_stub(stub);
 1025       code_stub = &stub->entry();
 1026     }
 1027     __ relocate(relocInfo::poll_return_type);
 1028     __ safepoint_poll(*code_stub, r15_thread, true /* at_return */, true /* in_nmethod */);
 1029   }
 1030 }
 1031 
 1032 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
 1033 {
 1034   return MachNode::size(ra_); // too many variables; just compute it
 1035                               // the hard way
 1036 }
 1037 
 1038 int MachEpilogNode::reloc() const
 1039 {
 1040   return 2; // a large enough number
 1041 }
 1042 
 1043 const Pipeline* MachEpilogNode::pipeline() const
 1044 {
 1045   return MachNode::pipeline_class();
 1046 }
 1047 
 1048 //=============================================================================
 1049 
 1050 enum RC {
 1051   rc_bad,
 1052   rc_int,
 1053   rc_kreg,
 1054   rc_float,
 1055   rc_stack
 1056 };
 1057 

 1144                 src_offset, dst_offset);
 1145       break;
 1146     case Op_VecZ:
 1147       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 1148                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1149                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1150                 "vmovdqu xmm0, [rsp - #64]",
 1151                 src_offset, dst_offset);
 1152       break;
 1153     default:
 1154       ShouldNotReachHere();
 1155     }
 1156 #endif
 1157   }
 1158 }
 1159 
 1160 uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
 1161                                        PhaseRegAlloc* ra_,
 1162                                        bool do_size,
 1163                                        outputStream* st) const {
 1164   assert(cbuf != NULL || st  != NULL, "sanity");
 1165   // Get registers to move
 1166   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 1167   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 1168   OptoReg::Name dst_second = ra_->get_reg_second(this);
 1169   OptoReg::Name dst_first = ra_->get_reg_first(this);
 1170 
 1171   enum RC src_second_rc = rc_class(src_second);
 1172   enum RC src_first_rc = rc_class(src_first);
 1173   enum RC dst_second_rc = rc_class(dst_second);
 1174   enum RC dst_first_rc = rc_class(dst_first);
 1175 
 1176   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
 1177          "must move at least 1 register" );
 1178 
 1179   if (src_first == dst_first && src_second == dst_second) {
 1180     // Self copy, no move
 1181     return 0;
 1182   }
 1183   if (bottom_type()->isa_vect() != NULL && bottom_type()->isa_vectmask() == NULL) {
 1184     uint ireg = ideal_reg();
 1185     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 1186     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 1187     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1188       // mem -> mem
 1189       int src_offset = ra_->reg2offset(src_first);
 1190       int dst_offset = ra_->reg2offset(dst_first);
 1191       vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
 1192     } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
 1193       vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st);
 1194     } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 1195       int stack_offset = ra_->reg2offset(dst_first);
 1196       vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st);
 1197     } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
 1198       int stack_offset = ra_->reg2offset(src_first);
 1199       vec_spill_helper(cbuf, true,  stack_offset, dst_first, ireg, st);
 1200     } else {
 1201       ShouldNotReachHere();
 1202     }
 1203     return 0;

 1602          st->print("kmovq   %s, %s\t# spill",
 1603                      Matcher::regName[dst_first],
 1604                      Matcher::regName[src_first]);
 1605 #endif
 1606         }
 1607       }
 1608       return 0;
 1609     } else if (dst_first_rc == rc_float) {
 1610       assert(false, "Illegal spill");
 1611       return 0;
 1612     }
 1613   }
 1614 
 1615   assert(0," foo ");
 1616   Unimplemented();
 1617   return 0;
 1618 }
 1619 
 1620 #ifndef PRODUCT
 1621 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 1622   implementation(NULL, ra_, false, st);
 1623 }
 1624 #endif
 1625 
 1626 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1627   implementation(&cbuf, ra_, false, NULL);
 1628 }
 1629 
 1630 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 1631   return MachNode::size(ra_);
 1632 }
 1633 
 1634 //=============================================================================
 1635 #ifndef PRODUCT
 1636 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1637 {
 1638   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1639   int reg = ra_->get_reg_first(this);
 1640   st->print("leaq    %s, [rsp + #%d]\t# box lock",
 1641             Matcher::regName[reg], offset);
 1642 }
 1643 #endif
 1644 
 1645 void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 1646 {
 1647   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());

 1650     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
 1651     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
 1652     emit_rm(cbuf, 0x2, reg & 7, 0x04);
 1653     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
 1654     emit_d32(cbuf, offset);
 1655   } else {
 1656     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
 1657     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
 1658     emit_rm(cbuf, 0x1, reg & 7, 0x04);
 1659     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
 1660     emit_d8(cbuf, offset);
 1661   }
 1662 }
 1663 
 1664 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 1665 {
 1666   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1667   return (offset < 0x80) ? 5 : 8; // REX
 1668 }
 1669 

















































 1670 //=============================================================================
 1671 #ifndef PRODUCT
 1672 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1673 {
 1674   if (UseCompressedClassPointers) {
 1675     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 1676     st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
 1677     st->print_cr("\tcmpq    rax, rscratch1\t # Inline cache check");
 1678   } else {
 1679     st->print_cr("\tcmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
 1680                  "# Inline cache check");
 1681   }
 1682   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 1683   st->print_cr("\tnop\t# nops to align entry point");
 1684 }
 1685 #endif
 1686 
 1687 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 1688 {
 1689   MacroAssembler masm(&cbuf);

 1692     masm.load_klass(rscratch1, j_rarg0, rscratch2);
 1693     masm.cmpptr(rax, rscratch1);
 1694   } else {
 1695     masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
 1696   }
 1697 
 1698   masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 1699 
 1700   /* WARNING these NOPs are critical so that verified entry point is properly
 1701      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
 1702   int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
 1703   if (OptoBreakpoint) {
 1704     // Leave space for int3
 1705     nops_cnt -= 1;
 1706   }
 1707   nops_cnt &= 0x3; // Do not add nops if code is aligned.
 1708   if (nops_cnt > 0)
 1709     masm.nop(nops_cnt);
 1710 }
 1711 
 1712 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 1713 {
 1714   return MachNode::size(ra_); // too many variables; just compute it
 1715                               // the hard way
 1716 }
 1717 
 1718 
 1719 //=============================================================================
 1720 
 1721 bool Matcher::supports_vector_calling_convention(void) {
 1722   if (EnableVectorSupport && UseVectorStubs) {
 1723     return true;
 1724   }
 1725   return false;
 1726 }
 1727 
 1728 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1729   assert(EnableVectorSupport && UseVectorStubs, "sanity");
 1730   int lo = XMM0_num;
 1731   int hi = XMM0b_num;
 1732   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 1733   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 1734   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 1735   return OptoRegPair(hi, lo);
 1736 }
 1737 
 1738 // Is this branch offset short enough that a short branch can be used?

 2113   %}
 2114 
 2115   enc_class enc_cmov(cmpOp cop)
 2116   %{
 2117     // CMOV
 2118     $$$emit8$primary;
 2119     emit_cc(cbuf, $secondary, $cop$$cmpcode);
 2120   %}
 2121 
 2122   enc_class enc_PartialSubtypeCheck()
 2123   %{
 2124     Register Rrdi = as_Register(RDI_enc); // result register
 2125     Register Rrax = as_Register(RAX_enc); // super class
 2126     Register Rrcx = as_Register(RCX_enc); // killed
 2127     Register Rrsi = as_Register(RSI_enc); // sub class
 2128     Label miss;
 2129     const bool set_cond_codes = true;
 2130 
 2131     MacroAssembler _masm(&cbuf);
 2132     __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
 2133                                      NULL, &miss,
 2134                                      /*set_cond_codes:*/ true);
 2135     if ($primary) {
 2136       __ xorptr(Rrdi, Rrdi);
 2137     }
 2138     __ bind(miss);
 2139   %}
 2140 
 2141   enc_class clear_avx %{
 2142     debug_only(int off0 = cbuf.insts_size());
 2143     if (generate_vzeroupper(Compile::current())) {
 2144       // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
 2145       // Clear upper bits of YMM registers when current compiled code uses
 2146       // wide vectors to avoid AVX <-> SSE transition penalty during call.
 2147       MacroAssembler _masm(&cbuf);
 2148       __ vzeroupper();
 2149     }
 2150     debug_only(int off1 = cbuf.insts_size());
 2151     assert(off1 - off0 == clear_avx_size(), "correct size prediction");
 2152   %}
 2153 

 2175     } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
 2176       // The NOP here is purely to ensure that eliding a call to
 2177       // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
 2178       __ addr_nop_5();
 2179       __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
 2180     } else {
 2181       $$$emit8$primary;
 2182       int method_index = resolved_method_index(cbuf);
 2183       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 2184                                                   : static_call_Relocation::spec(method_index);
 2185       emit_d32_reloc(cbuf, (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
 2186                      rspec, RELOC_DISP32);
 2187       address mark = cbuf.insts_mark();
 2188       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 2189         // Calls of the same statically bound method can share
 2190         // a stub to the interpreter.
 2191         cbuf.shared_stub_to_interp_for(_method, cbuf.insts()->mark_off());
 2192       } else {
 2193         // Emit stubs for static call.
 2194         address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark);
 2195         if (stub == NULL) {
 2196           ciEnv::current()->record_failure("CodeCache is full");
 2197           return;
 2198         }
 2199       }
 2200     }
 2201     _masm.clear_inst_mark();
 2202     __ post_call_nop();
 2203   %}
 2204 
 2205   enc_class Java_Dynamic_Call(method meth) %{
 2206     MacroAssembler _masm(&cbuf);
 2207     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
 2208     __ post_call_nop();
 2209   %}
 2210 
 2211   enc_class reg_opc_imm(rRegI dst, immI8 shift)
 2212   %{
 2213     // SAL, SAR, SHR
 2214     int dstenc = $dst$$reg;
 2215     if (dstenc >= 8) {

 3037 operand immI_64()
 3038 %{
 3039   predicate( n->get_int() == 64 );
 3040   match(ConI);
 3041 
 3042   op_cost(0);
 3043   format %{ %}
 3044   interface(CONST_INTER);
 3045 %}
 3046 
 3047 // Pointer Immediate
 3048 operand immP()
 3049 %{
 3050   match(ConP);
 3051 
 3052   op_cost(10);
 3053   format %{ %}
 3054   interface(CONST_INTER);
 3055 %}
 3056 
 3057 // NULL Pointer Immediate
 3058 operand immP0()
 3059 %{
 3060   predicate(n->get_ptr() == 0);
 3061   match(ConP);
 3062 
 3063   op_cost(5);
 3064   format %{ %}
 3065   interface(CONST_INTER);
 3066 %}
 3067 
 3068 // Pointer Immediate
 3069 operand immN() %{
 3070   match(ConN);
 3071 
 3072   op_cost(10);
 3073   format %{ %}
 3074   interface(CONST_INTER);
 3075 %}
 3076 
 3077 operand immNKlass() %{
 3078   match(ConNKlass);
 3079 
 3080   op_cost(10);
 3081   format %{ %}
 3082   interface(CONST_INTER);
 3083 %}
 3084 
 3085 // NULL Pointer Immediate
 3086 operand immN0() %{
 3087   predicate(n->get_narrowcon() == 0);
 3088   match(ConN);
 3089 
 3090   op_cost(5);
 3091   format %{ %}
 3092   interface(CONST_INTER);
 3093 %}
 3094 
 3095 operand immP31()
 3096 %{
 3097   predicate(n->as_Type()->type()->reloc() == relocInfo::none
 3098             && (n->get_ptr() >> 31) == 0);
 3099   match(ConP);
 3100 
 3101   op_cost(5);
 3102   format %{ %}
 3103   interface(CONST_INTER);
 3104 %}
 3105 

 3977   %}
 3978 %}
 3979 
 3980 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 3981 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 3982 %{
 3983   constraint(ALLOC_IN_RC(ptr_reg));
 3984   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 3985   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 3986 
 3987   op_cost(10);
 3988   format %{"[$reg + $off + $idx << $scale]" %}
 3989   interface(MEMORY_INTER) %{
 3990     base($reg);
 3991     index($idx);
 3992     scale($scale);
 3993     disp($off);
 3994   %}
 3995 %}
 3996 
















 3997 // Indirect Narrow Oop Plus Offset Operand
 3998 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 3999 // we can't free r12 even with CompressedOops::base() == NULL.
 4000 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 4001   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 4002   constraint(ALLOC_IN_RC(ptr_reg));
 4003   match(AddP (DecodeN reg) off);
 4004 
 4005   op_cost(10);
 4006   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 4007   interface(MEMORY_INTER) %{
 4008     base(0xc); // R12
 4009     index($reg);
 4010     scale(0x3);
 4011     disp($off);
 4012   %}
 4013 %}
 4014 
 4015 // Indirect Memory Operand
 4016 operand indirectNarrow(rRegN reg)
 4017 %{
 4018   predicate(CompressedOops::shift() == 0);
 4019   constraint(ALLOC_IN_RC(ptr_reg));

 4323     equal(0x4, "e");
 4324     not_equal(0x5, "ne");
 4325     less(0x2, "b");
 4326     greater_equal(0x3, "ae");
 4327     less_equal(0x6, "be");
 4328     greater(0x7, "a");
 4329     overflow(0x0, "o");
 4330     no_overflow(0x1, "no");
 4331   %}
 4332 %}
 4333 
 4334 //----------OPERAND CLASSES----------------------------------------------------
 4335 // Operand Classes are groups of operands that are used as to simplify
 4336 // instruction definitions by not requiring the AD writer to specify separate
 4337 // instructions for every form of operand when the instruction accepts
 4338 // multiple operand types with the same basic encoding and format.  The classic
 4339 // case of this is memory operands.
 4340 
 4341 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 4342                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 4343                indCompressedOopOffset,
 4344                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 4345                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 4346                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 4347 
 4348 //----------PIPELINE-----------------------------------------------------------
 4349 // Rules which define the behavior of the target architectures pipeline.
 4350 pipeline %{
 4351 
 4352 //----------ATTRIBUTES---------------------------------------------------------
 4353 attributes %{
 4354   variable_size_instructions;        // Fixed size instructions
 4355   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 4356   instruction_unit_size = 1;         // An instruction is 1 bytes long
 4357   instruction_fetch_unit_size = 16;  // The processor fetches one line
 4358   instruction_fetch_units = 1;       // of 16 bytes
 4359 
 4360   // List of nop instructions
 4361   nops( MachNop );
 4362 %}
 4363 

 5823   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
 5824   ins_encode %{
 5825     __ movl($dst$$Register, $src$$constant);
 5826   %}
 5827   ins_pipe(ialu_reg);
 5828 %}
 5829 
 5830 instruct loadConF(regF dst, immF con) %{
 5831   match(Set dst con);
 5832   ins_cost(125);
 5833   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
 5834   ins_encode %{
 5835     __ movflt($dst$$XMMRegister, $constantaddress($con));
 5836   %}
 5837   ins_pipe(pipe_slow);
 5838 %}
 5839 
 5840 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
 5841   match(Set dst src);
 5842   effect(KILL cr);
 5843   format %{ "xorq    $dst, $src\t# compressed NULL ptr" %}
 5844   ins_encode %{
 5845     __ xorq($dst$$Register, $dst$$Register);
 5846   %}
 5847   ins_pipe(ialu_reg);
 5848 %}
 5849 
 5850 instruct loadConN(rRegN dst, immN src) %{
 5851   match(Set dst src);
 5852 
 5853   ins_cost(125);
 5854   format %{ "movl    $dst, $src\t# compressed ptr" %}
 5855   ins_encode %{
 5856     address con = (address)$src$$constant;
 5857     if (con == NULL) {
 5858       ShouldNotReachHere();
 5859     } else {
 5860       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
 5861     }
 5862   %}
 5863   ins_pipe(ialu_reg_fat); // XXX
 5864 %}
 5865 
 5866 instruct loadConNKlass(rRegN dst, immNKlass src) %{
 5867   match(Set dst src);
 5868 
 5869   ins_cost(125);
 5870   format %{ "movl    $dst, $src\t# compressed klass ptr" %}
 5871   ins_encode %{
 5872     address con = (address)$src$$constant;
 5873     if (con == NULL) {
 5874       ShouldNotReachHere();
 5875     } else {
 5876       __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
 5877     }
 5878   %}
 5879   ins_pipe(ialu_reg_fat); // XXX
 5880 %}
 5881 
 5882 instruct loadConF0(regF dst, immF0 src)
 5883 %{
 5884   match(Set dst src);
 5885   ins_cost(100);
 5886 
 5887   format %{ "xorps   $dst, $dst\t# float 0.0" %}
 5888   ins_encode %{
 5889     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 5890   %}
 5891   ins_pipe(pipe_slow);
 5892 %}
 5893 

 6076   %}
 6077   ins_pipe(ialu_mem_reg); // XXX
 6078 %}
 6079 
 6080 // Store Pointer
 6081 instruct storeP(memory mem, any_RegP src)
 6082 %{
 6083   predicate(n->as_Store()->barrier_data() == 0);
 6084   match(Set mem (StoreP mem src));
 6085 
 6086   ins_cost(125); // XXX
 6087   format %{ "movq    $mem, $src\t# ptr" %}
 6088   ins_encode %{
 6089     __ movq($mem$$Address, $src$$Register);
 6090   %}
 6091   ins_pipe(ialu_mem_reg);
 6092 %}
 6093 
 6094 instruct storeImmP0(memory mem, immP0 zero)
 6095 %{
 6096   predicate(UseCompressedOops && (CompressedOops::base() == NULL) && n->as_Store()->barrier_data() == 0);
 6097   match(Set mem (StoreP mem zero));
 6098 
 6099   ins_cost(125); // XXX
 6100   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
 6101   ins_encode %{
 6102     __ movq($mem$$Address, r12);
 6103   %}
 6104   ins_pipe(ialu_mem_reg);
 6105 %}
 6106 
 6107 // Store NULL Pointer, mark word, or other simple pointer constant.
 6108 instruct storeImmP(memory mem, immP31 src)
 6109 %{
 6110   predicate(n->as_Store()->barrier_data() == 0);
 6111   match(Set mem (StoreP mem src));
 6112 
 6113   ins_cost(150); // XXX
 6114   format %{ "movq    $mem, $src\t# ptr" %}
 6115   ins_encode %{
 6116     __ movq($mem$$Address, $src$$constant);
 6117   %}
 6118   ins_pipe(ialu_mem_imm);
 6119 %}
 6120 
 6121 // Store Compressed Pointer
 6122 instruct storeN(memory mem, rRegN src)
 6123 %{
 6124   match(Set mem (StoreN mem src));
 6125 
 6126   ins_cost(125); // XXX
 6127   format %{ "movl    $mem, $src\t# compressed ptr" %}
 6128   ins_encode %{
 6129     __ movl($mem$$Address, $src$$Register);
 6130   %}
 6131   ins_pipe(ialu_mem_reg);
 6132 %}
 6133 
 6134 instruct storeNKlass(memory mem, rRegN src)
 6135 %{
 6136   match(Set mem (StoreNKlass mem src));
 6137 
 6138   ins_cost(125); // XXX
 6139   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 6140   ins_encode %{
 6141     __ movl($mem$$Address, $src$$Register);
 6142   %}
 6143   ins_pipe(ialu_mem_reg);
 6144 %}
 6145 
 6146 instruct storeImmN0(memory mem, immN0 zero)
 6147 %{
 6148   predicate(CompressedOops::base() == NULL);
 6149   match(Set mem (StoreN mem zero));
 6150 
 6151   ins_cost(125); // XXX
 6152   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
 6153   ins_encode %{
 6154     __ movl($mem$$Address, r12);
 6155   %}
 6156   ins_pipe(ialu_mem_reg);
 6157 %}
 6158 
 6159 instruct storeImmN(memory mem, immN src)
 6160 %{
 6161   match(Set mem (StoreN mem src));
 6162 
 6163   ins_cost(150); // XXX
 6164   format %{ "movl    $mem, $src\t# compressed ptr" %}
 6165   ins_encode %{
 6166     address con = (address)$src$$constant;
 6167     if (con == NULL) {
 6168       __ movl($mem$$Address, 0);
 6169     } else {
 6170       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
 6171     }
 6172   %}
 6173   ins_pipe(ialu_mem_imm);
 6174 %}
 6175 
 6176 instruct storeImmNKlass(memory mem, immNKlass src)
 6177 %{
 6178   match(Set mem (StoreNKlass mem src));
 6179 
 6180   ins_cost(150); // XXX
 6181   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 6182   ins_encode %{
 6183     __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
 6184   %}
 6185   ins_pipe(ialu_mem_imm);
 6186 %}
 6187 
 6188 // Store Integer Immediate
 6189 instruct storeImmI0(memory mem, immI_0 zero)
 6190 %{
 6191   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
 6192   match(Set mem (StoreI mem zero));
 6193 
 6194   ins_cost(125); // XXX
 6195   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
 6196   ins_encode %{
 6197     __ movl($mem$$Address, r12);
 6198   %}
 6199   ins_pipe(ialu_mem_reg);
 6200 %}
 6201 
 6202 instruct storeImmI(memory mem, immI src)
 6203 %{
 6204   match(Set mem (StoreI mem src));
 6205 
 6206   ins_cost(150);
 6207   format %{ "movl    $mem, $src\t# int" %}
 6208   ins_encode %{
 6209     __ movl($mem$$Address, $src$$constant);
 6210   %}
 6211   ins_pipe(ialu_mem_imm);
 6212 %}
 6213 
 6214 // Store Long Immediate
 6215 instruct storeImmL0(memory mem, immL0 zero)
 6216 %{
 6217   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
 6218   match(Set mem (StoreL mem zero));
 6219 
 6220   ins_cost(125); // XXX
 6221   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
 6222   ins_encode %{
 6223     __ movq($mem$$Address, r12);
 6224   %}
 6225   ins_pipe(ialu_mem_reg);
 6226 %}
 6227 
 6228 instruct storeImmL(memory mem, immL32 src)
 6229 %{
 6230   match(Set mem (StoreL mem src));
 6231 
 6232   ins_cost(150);
 6233   format %{ "movq    $mem, $src\t# long" %}
 6234   ins_encode %{
 6235     __ movq($mem$$Address, $src$$constant);
 6236   %}
 6237   ins_pipe(ialu_mem_imm);
 6238 %}
 6239 
 6240 // Store Short/Char Immediate
 6241 instruct storeImmC0(memory mem, immI_0 zero)
 6242 %{
 6243   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
 6244   match(Set mem (StoreC mem zero));
 6245 
 6246   ins_cost(125); // XXX
 6247   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
 6248   ins_encode %{
 6249     __ movw($mem$$Address, r12);
 6250   %}
 6251   ins_pipe(ialu_mem_reg);
 6252 %}
 6253 
 6254 instruct storeImmI16(memory mem, immI16 src)
 6255 %{
 6256   predicate(UseStoreImmI16);
 6257   match(Set mem (StoreC mem src));
 6258 
 6259   ins_cost(150);
 6260   format %{ "movw    $mem, $src\t# short/char" %}
 6261   ins_encode %{
 6262     __ movw($mem$$Address, $src$$constant);
 6263   %}
 6264   ins_pipe(ialu_mem_imm);
 6265 %}
 6266 
 6267 // Store Byte Immediate
 6268 instruct storeImmB0(memory mem, immI_0 zero)
 6269 %{
 6270   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
 6271   match(Set mem (StoreB mem zero));
 6272 
 6273   ins_cost(125); // XXX
 6274   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
 6275   ins_encode %{
 6276     __ movb($mem$$Address, r12);
 6277   %}
 6278   ins_pipe(ialu_mem_reg);
 6279 %}
 6280 
 6281 instruct storeImmB(memory mem, immI8 src)
 6282 %{
 6283   match(Set mem (StoreB mem src));
 6284 
 6285   ins_cost(150); // XXX
 6286   format %{ "movb    $mem, $src\t# byte" %}
 6287   ins_encode %{
 6288     __ movb($mem$$Address, $src$$constant);
 6289   %}
 6290   ins_pipe(ialu_mem_imm);
 6291 %}
 6292 
 6293 // Store CMS card-mark Immediate
 6294 instruct storeImmCM0_reg(memory mem, immI_0 zero)
 6295 %{
 6296   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
 6297   match(Set mem (StoreCM mem zero));
 6298 
 6299   ins_cost(125); // XXX
 6300   format %{ "movb    $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
 6301   ins_encode %{
 6302     __ movb($mem$$Address, r12);
 6303   %}
 6304   ins_pipe(ialu_mem_reg);
 6305 %}
 6306 
 6307 instruct storeImmCM0(memory mem, immI_0 src)
 6308 %{
 6309   match(Set mem (StoreCM mem src));
 6310 
 6311   ins_cost(150); // XXX
 6312   format %{ "movb    $mem, $src\t# CMS card-mark byte 0" %}
 6313   ins_encode %{
 6314     __ movb($mem$$Address, $src$$constant);
 6315   %}
 6316   ins_pipe(ialu_mem_imm);
 6317 %}
 6318 
 6319 // Store Float
 6320 instruct storeF(memory mem, regF src)
 6321 %{
 6322   match(Set mem (StoreF mem src));
 6323 
 6324   ins_cost(95); // XXX
 6325   format %{ "movss   $mem, $src\t# float" %}
 6326   ins_encode %{
 6327     __ movflt($mem$$Address, $src$$XMMRegister);
 6328   %}
 6329   ins_pipe(pipe_slow); // XXX
 6330 %}
 6331 
 6332 // Store immediate Float value (it is faster than store from XMM register)
 6333 instruct storeF0(memory mem, immF0 zero)
 6334 %{
 6335   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
 6336   match(Set mem (StoreF mem zero));
 6337 
 6338   ins_cost(25); // XXX
 6339   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
 6340   ins_encode %{
 6341     __ movl($mem$$Address, r12);
 6342   %}
 6343   ins_pipe(ialu_mem_reg);
 6344 %}
 6345 
 6346 instruct storeF_imm(memory mem, immF src)
 6347 %{
 6348   match(Set mem (StoreF mem src));
 6349 
 6350   ins_cost(50);
 6351   format %{ "movl    $mem, $src\t# float" %}
 6352   ins_encode %{
 6353     __ movl($mem$$Address, jint_cast($src$$constant));
 6354   %}
 6355   ins_pipe(ialu_mem_imm);
 6356 %}
 6357 
 6358 // Store Double
 6359 instruct storeD(memory mem, regD src)
 6360 %{
 6361   match(Set mem (StoreD mem src));
 6362 
 6363   ins_cost(95); // XXX
 6364   format %{ "movsd   $mem, $src\t# double" %}
 6365   ins_encode %{
 6366     __ movdbl($mem$$Address, $src$$XMMRegister);
 6367   %}
 6368   ins_pipe(pipe_slow); // XXX
 6369 %}
 6370 
 6371 // Store immediate double 0.0 (it is faster than store from XMM register)
 6372 instruct storeD0_imm(memory mem, immD0 src)
 6373 %{
 6374   predicate(!UseCompressedOops || (CompressedOops::base() != NULL));
 6375   match(Set mem (StoreD mem src));
 6376 
 6377   ins_cost(50);
 6378   format %{ "movq    $mem, $src\t# double 0." %}
 6379   ins_encode %{
 6380     __ movq($mem$$Address, $src$$constant);
 6381   %}
 6382   ins_pipe(ialu_mem_imm);
 6383 %}
 6384 
 6385 instruct storeD0(memory mem, immD0 zero)
 6386 %{
 6387   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
 6388   match(Set mem (StoreD mem zero));
 6389 
 6390   ins_cost(25); // XXX
 6391   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
 6392   ins_encode %{
 6393     __ movq($mem$$Address, r12);
 6394   %}
 6395   ins_pipe(ialu_mem_reg);
 6396 %}
 6397 
 6398 instruct storeSSI(stackSlotI dst, rRegI src)
 6399 %{
 6400   match(Set dst src);
 6401 
 6402   ins_cost(100);
 6403   format %{ "movl    $dst, $src\t# int stk" %}
 6404   opcode(0x89);
 6405   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
 6406   ins_pipe( ialu_mem_reg );
 6407 %}

 6911   format %{ "MEMBAR-storestore (empty encoding)" %}
 6912   ins_encode( );
 6913   ins_pipe(empty);
 6914 %}
 6915 
 6916 //----------Move Instructions--------------------------------------------------
 6917 
 6918 instruct castX2P(rRegP dst, rRegL src)
 6919 %{
 6920   match(Set dst (CastX2P src));
 6921 
 6922   format %{ "movq    $dst, $src\t# long->ptr" %}
 6923   ins_encode %{
 6924     if ($dst$$reg != $src$$reg) {
 6925       __ movptr($dst$$Register, $src$$Register);
 6926     }
 6927   %}
 6928   ins_pipe(ialu_reg_reg); // XXX
 6929 %}
 6930 













 6931 instruct castP2X(rRegL dst, rRegP src)
 6932 %{
 6933   match(Set dst (CastP2X src));
 6934 
 6935   format %{ "movq    $dst, $src\t# ptr -> long" %}
 6936   ins_encode %{
 6937     if ($dst$$reg != $src$$reg) {
 6938       __ movptr($dst$$Register, $src$$Register);
 6939     }
 6940   %}
 6941   ins_pipe(ialu_reg_reg); // XXX
 6942 %}
 6943 
 6944 // Convert oop into int for vectors alignment masking
 6945 instruct convP2I(rRegI dst, rRegP src)
 6946 %{
 6947   match(Set dst (ConvL2I (CastP2X src)));
 6948 
 6949   format %{ "movl    $dst, $src\t# ptr -> int" %}
 6950   ins_encode %{

11509   effect(DEF dst, USE src);
11510   ins_cost(100);
11511   format %{ "movd    $dst,$src\t# MoveI2F" %}
11512   ins_encode %{
11513     __ movdl($dst$$XMMRegister, $src$$Register);
11514   %}
11515   ins_pipe( pipe_slow );
11516 %}
11517 
11518 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
11519   match(Set dst (MoveL2D src));
11520   effect(DEF dst, USE src);
11521   ins_cost(100);
11522   format %{ "movd    $dst,$src\t# MoveL2D" %}
11523   ins_encode %{
11524      __ movdq($dst$$XMMRegister, $src$$Register);
11525   %}
11526   ins_pipe( pipe_slow );
11527 %}
11528 

11529 // Fast clearing of an array
11530 // Small ClearArray non-AVX512.
11531 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
11532                   Universe dummy, rFlagsReg cr)
11533 %{
11534   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
11535   match(Set dummy (ClearArray cnt base));
11536   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);





















































































































11537 
11538   format %{ $$template
11539     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11540     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
11541     $$emit$$"jg      LARGE\n\t"
11542     $$emit$$"dec     rcx\n\t"
11543     $$emit$$"js      DONE\t# Zero length\n\t"
11544     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
11545     $$emit$$"dec     rcx\n\t"
11546     $$emit$$"jge     LOOP\n\t"
11547     $$emit$$"jmp     DONE\n\t"
11548     $$emit$$"# LARGE:\n\t"
11549     if (UseFastStosb) {
11550        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11551        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
11552     } else if (UseXMMForObjInit) {
11553        $$emit$$"mov     rdi,rax\n\t"
11554        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11555        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11556        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

11564        $$emit$$"jl      L_tail\n\t"
11565        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11566        $$emit$$"add     0x20,rax\n\t"
11567        $$emit$$"sub     0x4,rcx\n\t"
11568        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11569        $$emit$$"add     0x4,rcx\n\t"
11570        $$emit$$"jle     L_end\n\t"
11571        $$emit$$"dec     rcx\n\t"
11572        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11573        $$emit$$"vmovq   xmm0,(rax)\n\t"
11574        $$emit$$"add     0x8,rax\n\t"
11575        $$emit$$"dec     rcx\n\t"
11576        $$emit$$"jge     L_sloop\n\t"
11577        $$emit$$"# L_end:\n\t"
11578     } else {
11579        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
11580     }
11581     $$emit$$"# DONE"
11582   %}
11583   ins_encode %{
11584     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11585                  $tmp$$XMMRegister, false, knoreg);
11586   %}
11587   ins_pipe(pipe_slow);
11588 %}
11589 
11590 // Small ClearArray AVX512 non-constant length.
11591 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
11592                        Universe dummy, rFlagsReg cr)
11593 %{
11594   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
11595   match(Set dummy (ClearArray cnt base));
11596   ins_cost(125);
11597   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11598 
11599   format %{ $$template
11600     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11601     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
11602     $$emit$$"jg      LARGE\n\t"
11603     $$emit$$"dec     rcx\n\t"
11604     $$emit$$"js      DONE\t# Zero length\n\t"
11605     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
11606     $$emit$$"dec     rcx\n\t"
11607     $$emit$$"jge     LOOP\n\t"
11608     $$emit$$"jmp     DONE\n\t"
11609     $$emit$$"# LARGE:\n\t"
11610     if (UseFastStosb) {
11611        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11612        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
11613     } else if (UseXMMForObjInit) {
11614        $$emit$$"mov     rdi,rax\n\t"
11615        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11616        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11617        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

11625        $$emit$$"jl      L_tail\n\t"
11626        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11627        $$emit$$"add     0x20,rax\n\t"
11628        $$emit$$"sub     0x4,rcx\n\t"
11629        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11630        $$emit$$"add     0x4,rcx\n\t"
11631        $$emit$$"jle     L_end\n\t"
11632        $$emit$$"dec     rcx\n\t"
11633        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11634        $$emit$$"vmovq   xmm0,(rax)\n\t"
11635        $$emit$$"add     0x8,rax\n\t"
11636        $$emit$$"dec     rcx\n\t"
11637        $$emit$$"jge     L_sloop\n\t"
11638        $$emit$$"# L_end:\n\t"
11639     } else {
11640        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
11641     }
11642     $$emit$$"# DONE"
11643   %}
11644   ins_encode %{
11645     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11646                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
11647   %}
11648   ins_pipe(pipe_slow);
11649 %}
11650 
11651 // Large ClearArray non-AVX512.
11652 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
11653                         Universe dummy, rFlagsReg cr)
11654 %{
11655   predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
11656   match(Set dummy (ClearArray cnt base));
11657   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
































































































11658 
11659   format %{ $$template
11660     if (UseFastStosb) {
11661        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11662        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11663        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
11664     } else if (UseXMMForObjInit) {
11665        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
11666        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11667        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11668        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11669        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11670        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
11671        $$emit$$"add     0x40,rax\n\t"
11672        $$emit$$"# L_zero_64_bytes:\n\t"
11673        $$emit$$"sub     0x8,rcx\n\t"
11674        $$emit$$"jge     L_loop\n\t"
11675        $$emit$$"add     0x4,rcx\n\t"
11676        $$emit$$"jl      L_tail\n\t"
11677        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11678        $$emit$$"add     0x20,rax\n\t"
11679        $$emit$$"sub     0x4,rcx\n\t"
11680        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11681        $$emit$$"add     0x4,rcx\n\t"
11682        $$emit$$"jle     L_end\n\t"
11683        $$emit$$"dec     rcx\n\t"
11684        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11685        $$emit$$"vmovq   xmm0,(rax)\n\t"
11686        $$emit$$"add     0x8,rax\n\t"
11687        $$emit$$"dec     rcx\n\t"
11688        $$emit$$"jge     L_sloop\n\t"
11689        $$emit$$"# L_end:\n\t"
11690     } else {
11691        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11692        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11693     }
11694   %}
11695   ins_encode %{
11696     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11697                  $tmp$$XMMRegister, true, knoreg);
11698   %}
11699   ins_pipe(pipe_slow);
11700 %}
11701 
11702 // Large ClearArray AVX512.
11703 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
11704                              Universe dummy, rFlagsReg cr)
11705 %{
11706   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
11707   match(Set dummy (ClearArray cnt base));
11708   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11709 
11710   format %{ $$template
11711     if (UseFastStosb) {
11712        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11713        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11714        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
11715     } else if (UseXMMForObjInit) {
11716        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
11717        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11718        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11719        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11720        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11721        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
11722        $$emit$$"add     0x40,rax\n\t"
11723        $$emit$$"# L_zero_64_bytes:\n\t"
11724        $$emit$$"sub     0x8,rcx\n\t"
11725        $$emit$$"jge     L_loop\n\t"
11726        $$emit$$"add     0x4,rcx\n\t"
11727        $$emit$$"jl      L_tail\n\t"
11728        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11729        $$emit$$"add     0x20,rax\n\t"
11730        $$emit$$"sub     0x4,rcx\n\t"
11731        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11732        $$emit$$"add     0x4,rcx\n\t"
11733        $$emit$$"jle     L_end\n\t"
11734        $$emit$$"dec     rcx\n\t"
11735        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11736        $$emit$$"vmovq   xmm0,(rax)\n\t"
11737        $$emit$$"add     0x8,rax\n\t"
11738        $$emit$$"dec     rcx\n\t"
11739        $$emit$$"jge     L_sloop\n\t"
11740        $$emit$$"# L_end:\n\t"
11741     } else {
11742        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11743        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11744     }
11745   %}
11746   ins_encode %{
11747     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11748                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
11749   %}
11750   ins_pipe(pipe_slow);
11751 %}
11752 
11753 // Small ClearArray AVX512 constant length.
11754 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
11755 %{
11756   predicate(!((ClearArrayNode*)n)->is_large() &&
11757               ((UseAVX > 2) && VM_Version::supports_avx512vlbw()));
11758   match(Set dummy (ClearArray cnt base));
11759   ins_cost(100);
11760   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
11761   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
11762   ins_encode %{
11763    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
11764   %}
11765   ins_pipe(pipe_slow);
11766 %}
11767 
11768 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11769                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
11770 %{
11771   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11772   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11773   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11774 
11775   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11776   ins_encode %{
11777     __ string_compare($str1$$Register, $str2$$Register,
11778                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11779                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
11780   %}
11781   ins_pipe( pipe_slow );
11782 %}
11783 

12674   ins_pipe(ialu_cr_reg_mem);
12675 %}
12676 
12677 // This will generate a signed flags result. This should be OK since
12678 // any compare to a zero should be eq/neq.
12679 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
12680 %{
12681   match(Set cr (CmpP src zero));
12682 
12683   format %{ "testq   $src, $src\t# ptr" %}
12684   ins_encode %{
12685     __ testq($src$$Register, $src$$Register);
12686   %}
12687   ins_pipe(ialu_cr_reg_imm);
12688 %}
12689 
12690 // This will generate a signed flags result. This should be OK since
12691 // any compare to a zero should be eq/neq.
12692 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
12693 %{
12694   predicate((!UseCompressedOops || (CompressedOops::base() != NULL)) &&
12695             n->in(1)->as_Load()->barrier_data() == 0);
12696   match(Set cr (CmpP (LoadP op) zero));
12697 
12698   ins_cost(500); // XXX
12699   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
12700   ins_encode %{
12701     __ testq($op$$Address, 0xFFFFFFFF);
12702   %}
12703   ins_pipe(ialu_cr_reg_imm);
12704 %}
12705 
12706 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
12707 %{
12708   predicate(UseCompressedOops && (CompressedOops::base() == NULL) &&
12709             n->in(1)->as_Load()->barrier_data() == 0);
12710   match(Set cr (CmpP (LoadP mem) zero));
12711 
12712   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
12713   ins_encode %{
12714     __ cmpq(r12, $mem$$Address);
12715   %}
12716   ins_pipe(ialu_cr_reg_mem);
12717 %}
12718 
12719 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
12720 %{
12721   match(Set cr (CmpN op1 op2));
12722 
12723   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
12724   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
12725   ins_pipe(ialu_cr_reg_reg);
12726 %}
12727 
12728 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)

12771 %{
12772   match(Set cr (CmpN src (LoadNKlass mem)));
12773 
12774   format %{ "cmpl    $mem, $src\t# compressed klass ptr" %}
12775   ins_encode %{
12776     __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
12777   %}
12778   ins_pipe(ialu_cr_reg_mem);
12779 %}
12780 
12781 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
12782   match(Set cr (CmpN src zero));
12783 
12784   format %{ "testl   $src, $src\t# compressed ptr" %}
12785   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
12786   ins_pipe(ialu_cr_reg_imm);
12787 %}
12788 
12789 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
12790 %{
12791   predicate(CompressedOops::base() != NULL);
12792   match(Set cr (CmpN (LoadN mem) zero));
12793 
12794   ins_cost(500); // XXX
12795   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
12796   ins_encode %{
12797     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
12798   %}
12799   ins_pipe(ialu_cr_reg_mem);
12800 %}
12801 
12802 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
12803 %{
12804   predicate(CompressedOops::base() == NULL);
12805   match(Set cr (CmpN (LoadN mem) zero));
12806 
12807   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
12808   ins_encode %{
12809     __ cmpl(r12, $mem$$Address);
12810   %}
12811   ins_pipe(ialu_cr_reg_mem);
12812 %}
12813 
12814 // Yanked all unsigned pointer compare operations.
12815 // Pointer compares are done with CmpP which is already unsigned.
12816 
12817 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
12818 %{
12819   match(Set cr (CmpL op1 op2));
12820 
12821   format %{ "cmpq    $op1, $op2" %}
12822   ins_encode %{
12823     __ cmpq($op1$$Register, $op2$$Register);
12824   %}

13535 
13536   ins_cost(300);
13537   format %{ "call_leaf,runtime " %}
13538   ins_encode(clear_avx, Java_To_Runtime(meth));
13539   ins_pipe(pipe_slow);
13540 %}
13541 
13542 // Call runtime without safepoint and with vector arguments
13543 instruct CallLeafDirectVector(method meth)
13544 %{
13545   match(CallLeafVector);
13546   effect(USE meth);
13547 
13548   ins_cost(300);
13549   format %{ "call_leaf,vector " %}
13550   ins_encode(Java_To_Runtime(meth));
13551   ins_pipe(pipe_slow);
13552 %}
13553 
13554 // Call runtime without safepoint















13555 instruct CallLeafNoFPDirect(method meth)
13556 %{

13557   match(CallLeafNoFP);
13558   effect(USE meth);
13559 
13560   ins_cost(300);
13561   format %{ "call_leaf_nofp,runtime " %}
13562   ins_encode(clear_avx, Java_To_Runtime(meth));
13563   ins_pipe(pipe_slow);
13564 %}
13565 
13566 // Return Instruction
13567 // Remove the return address & jump to it.
13568 // Notice: We always emit a nop after a ret to make sure there is room
13569 // for safepoint patching
13570 instruct Ret()
13571 %{
13572   match(Return);
13573 
13574   format %{ "ret" %}
13575   ins_encode %{
13576     __ ret(0);

  473 }
  474 
  475 // !!!!! Special hack to get all types of calls to specify the byte offset
  476 //       from the start of the call to the point where the return address
  477 //       will point.
  478 int MachCallStaticJavaNode::ret_addr_offset()
  479 {
  480   int offset = 5; // 5 bytes from start of call to where return address points
  481   offset += clear_avx_size();
  482   return offset;
  483 }
  484 
  485 int MachCallDynamicJavaNode::ret_addr_offset()
  486 {
  487   int offset = 15; // 15 bytes from start of call to where return address points
  488   offset += clear_avx_size();
  489   return offset;
  490 }
  491 
  492 int MachCallRuntimeNode::ret_addr_offset() {
  493   if (_entry_point == nullptr) {
  494     // CallLeafNoFPInDirect
  495     return 3; // callq (register)
  496   }
  497   int offset = 13; // movq r10,#addr; callq (r10)
  498   if (this->ideal_Opcode() != Op_CallLeafVector) {
  499     offset += clear_avx_size();
  500   }
  501   return offset;
  502 }
  503 
  504 //
  505 // Compute padding required for nodes which need alignment
  506 //
  507 
  508 // The address of the call instruction needs to be 4-byte aligned to
  509 // ensure that it does not span a cache line so that it can be patched.
  510 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
  511 {
  512   current_offset += clear_avx_size(); // skip vzeroupper
  513   current_offset += 1; // skip call opcode byte
  514   return align_up(current_offset, alignment_required()) - current_offset;
  515 }
  516 
  517 // The address of the call instruction needs to be 4-byte aligned to
  518 // ensure that it does not span a cache line so that it can be patched.
  519 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
  520 {
  521   current_offset += clear_avx_size(); // skip vzeroupper
  522   current_offset += 11; // skip movq instruction + call opcode byte
  523   return align_up(current_offset, alignment_required()) - current_offset;

  875     st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
  876     if (PreserveFramePointer) {
  877       st->print("\n\t");
  878       st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
  879       if (framesize > 0) {
  880         st->print("\n\t");
  881         st->print("addq    rbp, #%d", framesize);
  882       }
  883     }
  884   }
  885 
  886   if (VerifyStackAtCalls) {
  887     st->print("\n\t");
  888     framesize -= wordSize;
  889     st->print("movq    [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
  890 #ifdef ASSERT
  891     st->print("\n\t");
  892     st->print("# stack alignment check");
  893 #endif
  894   }
  895   if (C->stub_function() != nullptr && BarrierSet::barrier_set()->barrier_set_nmethod() != nullptr) {
  896     st->print("\n\t");
  897     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
  898     st->print("\n\t");
  899     st->print("je      fast_entry\t");
  900     st->print("\n\t");
  901     st->print("call    #nmethod_entry_barrier_stub\t");
  902     st->print("\n\tfast_entry:");
  903   }
  904   st->cr();
  905 }
  906 #endif
  907 
  908 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  909   Compile* C = ra_->C;
  910   C2_MacroAssembler _masm(&cbuf);
  911 
  912   __ verified_entry(C);













  913 
  914   if (ra_->C->stub_function() == nullptr) {
  915     __ entry_barrier();
  916   }
  917 
  918   if (!Compile::current()->output()->in_scratch_emit_size()) {
  919     __ bind(*_verified_entry);
  920   }
  921 
  922   C->output()->set_frame_complete(cbuf.insts_size());
  923 
  924   if (C->has_mach_constant_base_node()) {
  925     // NOTE: We set the table base offset here because users might be
  926     // emitted before MachConstantBaseNode.
  927     ConstantTable& constant_table = C->output()->constant_table();
  928     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  929   }
  930 }
  931 






  932 int MachPrologNode::reloc() const
  933 {
  934   return 0; // a large enough number
  935 }
  936 
  937 //=============================================================================
  938 #ifndef PRODUCT
  939 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
  940 {
  941   Compile* C = ra_->C;
  942   if (generate_vzeroupper(C)) {
  943     st->print("vzeroupper");
  944     st->cr(); st->print("\t");
  945   }
  946 
  947   int framesize = C->output()->frame_size_in_bytes();
  948   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  949   // Remove word for return adr already pushed
  950   // and RBP
  951   framesize -= 2*wordSize;

  959   if (do_polling() && C->is_method_compilation()) {
  960     st->print("\t");
  961     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
  962                  "ja      #safepoint_stub\t"
  963                  "# Safepoint: poll for GC");
  964   }
  965 }
  966 #endif
  967 
  968 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
  969 {
  970   Compile* C = ra_->C;
  971   MacroAssembler _masm(&cbuf);
  972 
  973   if (generate_vzeroupper(C)) {
  974     // Clear upper bits of YMM registers when current compiled code uses
  975     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  976     __ vzeroupper();
  977   }
  978 
  979   // Subtract two words to account for return address and rbp
  980   int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
  981   __ remove_frame(initial_framesize, C->needs_stack_repair());




















  982 
  983   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
  984     __ reserved_stack_check();
  985   }
  986 
  987   if (do_polling() && C->is_method_compilation()) {
  988     MacroAssembler _masm(&cbuf);
  989     Label dummy_label;
  990     Label* code_stub = &dummy_label;
  991     if (!C->output()->in_scratch_emit_size()) {
  992       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
  993       C->output()->add_stub(stub);
  994       code_stub = &stub->entry();
  995     }
  996     __ relocate(relocInfo::poll_return_type);
  997     __ safepoint_poll(*code_stub, r15_thread, true /* at_return */, true /* in_nmethod */);
  998   }
  999 }
 1000 






 1001 int MachEpilogNode::reloc() const
 1002 {
 1003   return 2; // a large enough number
 1004 }
 1005 
 1006 const Pipeline* MachEpilogNode::pipeline() const
 1007 {
 1008   return MachNode::pipeline_class();
 1009 }
 1010 
 1011 //=============================================================================
 1012 
 1013 enum RC {
 1014   rc_bad,
 1015   rc_int,
 1016   rc_kreg,
 1017   rc_float,
 1018   rc_stack
 1019 };
 1020 

 1107                 src_offset, dst_offset);
 1108       break;
 1109     case Op_VecZ:
 1110       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 1111                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1112                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1113                 "vmovdqu xmm0, [rsp - #64]",
 1114                 src_offset, dst_offset);
 1115       break;
 1116     default:
 1117       ShouldNotReachHere();
 1118     }
 1119 #endif
 1120   }
 1121 }
 1122 
 1123 uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
 1124                                        PhaseRegAlloc* ra_,
 1125                                        bool do_size,
 1126                                        outputStream* st) const {
 1127   assert(cbuf != nullptr || st  != nullptr, "sanity");
 1128   // Get registers to move
 1129   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 1130   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 1131   OptoReg::Name dst_second = ra_->get_reg_second(this);
 1132   OptoReg::Name dst_first = ra_->get_reg_first(this);
 1133 
 1134   enum RC src_second_rc = rc_class(src_second);
 1135   enum RC src_first_rc = rc_class(src_first);
 1136   enum RC dst_second_rc = rc_class(dst_second);
 1137   enum RC dst_first_rc = rc_class(dst_first);
 1138 
 1139   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
 1140          "must move at least 1 register" );
 1141 
 1142   if (src_first == dst_first && src_second == dst_second) {
 1143     // Self copy, no move
 1144     return 0;
 1145   }
 1146   if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
 1147     uint ireg = ideal_reg();
 1148     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 1149     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 1150     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1151       // mem -> mem
 1152       int src_offset = ra_->reg2offset(src_first);
 1153       int dst_offset = ra_->reg2offset(dst_first);
 1154       vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
 1155     } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
 1156       vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st);
 1157     } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 1158       int stack_offset = ra_->reg2offset(dst_first);
 1159       vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st);
 1160     } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
 1161       int stack_offset = ra_->reg2offset(src_first);
 1162       vec_spill_helper(cbuf, true,  stack_offset, dst_first, ireg, st);
 1163     } else {
 1164       ShouldNotReachHere();
 1165     }
 1166     return 0;

 1565          st->print("kmovq   %s, %s\t# spill",
 1566                      Matcher::regName[dst_first],
 1567                      Matcher::regName[src_first]);
 1568 #endif
 1569         }
 1570       }
 1571       return 0;
 1572     } else if (dst_first_rc == rc_float) {
 1573       assert(false, "Illegal spill");
 1574       return 0;
 1575     }
 1576   }
 1577 
 1578   assert(0," foo ");
 1579   Unimplemented();
 1580   return 0;
 1581 }
 1582 
 1583 #ifndef PRODUCT
 1584 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 1585   implementation(nullptr, ra_, false, st);
 1586 }
 1587 #endif
 1588 
 1589 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1590   implementation(&cbuf, ra_, false, nullptr);
 1591 }
 1592 
 1593 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 1594   return MachNode::size(ra_);
 1595 }
 1596 
 1597 //=============================================================================
 1598 #ifndef PRODUCT
 1599 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1600 {
 1601   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1602   int reg = ra_->get_reg_first(this);
 1603   st->print("leaq    %s, [rsp + #%d]\t# box lock",
 1604             Matcher::regName[reg], offset);
 1605 }
 1606 #endif
 1607 
 1608 void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 1609 {
 1610   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());

 1613     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
 1614     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
 1615     emit_rm(cbuf, 0x2, reg & 7, 0x04);
 1616     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
 1617     emit_d32(cbuf, offset);
 1618   } else {
 1619     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
 1620     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
 1621     emit_rm(cbuf, 0x1, reg & 7, 0x04);
 1622     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
 1623     emit_d8(cbuf, offset);
 1624   }
 1625 }
 1626 
 1627 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 1628 {
 1629   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1630   return (offset < 0x80) ? 5 : 8; // REX
 1631 }
 1632 
 1633 //=============================================================================
 1634 #ifndef PRODUCT
 1635 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1636 {
 1637   st->print_cr("MachVEPNode");
 1638 }
 1639 #endif
 1640 
 1641 void MachVEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 1642 {
 1643   C2_MacroAssembler _masm(&cbuf);
 1644   uint insts_size = cbuf.insts_size();
 1645   if (!_verified) {
 1646     if (UseCompressedClassPointers) {
 1647       __ load_klass(rscratch1, j_rarg0, rscratch2);
 1648       __ cmpptr(rax, rscratch1);
 1649     } else {
 1650       __ cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
 1651     }
 1652     __ jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 1653   } else {
 1654     // TODO 8284443 Avoid creation of temporary frame
 1655     if (ra_->C->stub_function() == nullptr) {
 1656       __ verified_entry(ra_->C, 0);
 1657       __ entry_barrier();
 1658       int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
 1659       __ remove_frame(initial_framesize, false);
 1660     }
 1661     // Unpack inline type args passed as oop and then jump to
 1662     // the verified entry point (skipping the unverified entry).
 1663     int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
 1664     // Emit code for verified entry and save increment for stack repair on return
 1665     __ verified_entry(ra_->C, sp_inc);
 1666     if (Compile::current()->output()->in_scratch_emit_size()) {
 1667       Label dummy_verified_entry;
 1668       __ jmp(dummy_verified_entry);
 1669     } else {
 1670       __ jmp(*_verified_entry);
 1671     }
 1672   }
 1673   /* WARNING these NOPs are critical so that verified entry point is properly
 1674      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
 1675   int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
 1676   nops_cnt &= 0x3; // Do not add nops if code is aligned.
 1677   if (nops_cnt > 0) {
 1678     __ nop(nops_cnt);
 1679   }
 1680 }
 1681 
 1682 //=============================================================================
 1683 #ifndef PRODUCT
 1684 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1685 {
 1686   if (UseCompressedClassPointers) {
 1687     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 1688     st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
 1689     st->print_cr("\tcmpq    rax, rscratch1\t # Inline cache check");
 1690   } else {
 1691     st->print_cr("\tcmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
 1692                  "# Inline cache check");
 1693   }
 1694   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 1695   st->print_cr("\tnop\t# nops to align entry point");
 1696 }
 1697 #endif
 1698 
 1699 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 1700 {
 1701   MacroAssembler masm(&cbuf);

 1704     masm.load_klass(rscratch1, j_rarg0, rscratch2);
 1705     masm.cmpptr(rax, rscratch1);
 1706   } else {
 1707     masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
 1708   }
 1709 
 1710   masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 1711 
 1712   /* WARNING these NOPs are critical so that verified entry point is properly
 1713      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
 1714   int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
 1715   if (OptoBreakpoint) {
 1716     // Leave space for int3
 1717     nops_cnt -= 1;
 1718   }
 1719   nops_cnt &= 0x3; // Do not add nops if code is aligned.
 1720   if (nops_cnt > 0)
 1721     masm.nop(nops_cnt);
 1722 }
 1723 







 1724 //=============================================================================
 1725 
 1726 bool Matcher::supports_vector_calling_convention(void) {
 1727   if (EnableVectorSupport && UseVectorStubs) {
 1728     return true;
 1729   }
 1730   return false;
 1731 }
 1732 
 1733 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1734   assert(EnableVectorSupport && UseVectorStubs, "sanity");
 1735   int lo = XMM0_num;
 1736   int hi = XMM0b_num;
 1737   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 1738   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 1739   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 1740   return OptoRegPair(hi, lo);
 1741 }
 1742 
 1743 // Is this branch offset short enough that a short branch can be used?

 2118   %}
 2119 
 2120   enc_class enc_cmov(cmpOp cop)
 2121   %{
 2122     // CMOV
 2123     $$$emit8$primary;
 2124     emit_cc(cbuf, $secondary, $cop$$cmpcode);
 2125   %}
 2126 
 2127   enc_class enc_PartialSubtypeCheck()
 2128   %{
 2129     Register Rrdi = as_Register(RDI_enc); // result register
 2130     Register Rrax = as_Register(RAX_enc); // super class
 2131     Register Rrcx = as_Register(RCX_enc); // killed
 2132     Register Rrsi = as_Register(RSI_enc); // sub class
 2133     Label miss;
 2134     const bool set_cond_codes = true;
 2135 
 2136     MacroAssembler _masm(&cbuf);
 2137     __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
 2138                                      nullptr, &miss,
 2139                                      /*set_cond_codes:*/ true);
 2140     if ($primary) {
 2141       __ xorptr(Rrdi, Rrdi);
 2142     }
 2143     __ bind(miss);
 2144   %}
 2145 
 2146   enc_class clear_avx %{
 2147     debug_only(int off0 = cbuf.insts_size());
 2148     if (generate_vzeroupper(Compile::current())) {
 2149       // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
 2150       // Clear upper bits of YMM registers when current compiled code uses
 2151       // wide vectors to avoid AVX <-> SSE transition penalty during call.
 2152       MacroAssembler _masm(&cbuf);
 2153       __ vzeroupper();
 2154     }
 2155     debug_only(int off1 = cbuf.insts_size());
 2156     assert(off1 - off0 == clear_avx_size(), "correct size prediction");
 2157   %}
 2158 

 2180     } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
 2181       // The NOP here is purely to ensure that eliding a call to
 2182       // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
 2183       __ addr_nop_5();
 2184       __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
 2185     } else {
 2186       $$$emit8$primary;
 2187       int method_index = resolved_method_index(cbuf);
 2188       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 2189                                                   : static_call_Relocation::spec(method_index);
 2190       emit_d32_reloc(cbuf, (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
 2191                      rspec, RELOC_DISP32);
 2192       address mark = cbuf.insts_mark();
 2193       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 2194         // Calls of the same statically bound method can share
 2195         // a stub to the interpreter.
 2196         cbuf.shared_stub_to_interp_for(_method, cbuf.insts()->mark_off());
 2197       } else {
 2198         // Emit stubs for static call.
 2199         address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark);
 2200         if (stub == nullptr) {
 2201           ciEnv::current()->record_failure("CodeCache is full");
 2202           return;
 2203         }
 2204       }
 2205     }
 2206     _masm.clear_inst_mark();
 2207     __ post_call_nop();
 2208   %}
 2209 
 2210   enc_class Java_Dynamic_Call(method meth) %{
 2211     MacroAssembler _masm(&cbuf);
 2212     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
 2213     __ post_call_nop();
 2214   %}
 2215 
 2216   enc_class reg_opc_imm(rRegI dst, immI8 shift)
 2217   %{
 2218     // SAL, SAR, SHR
 2219     int dstenc = $dst$$reg;
 2220     if (dstenc >= 8) {

 3042 operand immI_64()
 3043 %{
 3044   predicate( n->get_int() == 64 );
 3045   match(ConI);
 3046 
 3047   op_cost(0);
 3048   format %{ %}
 3049   interface(CONST_INTER);
 3050 %}
 3051 
 3052 // Pointer Immediate
 3053 operand immP()
 3054 %{
 3055   match(ConP);
 3056 
 3057   op_cost(10);
 3058   format %{ %}
 3059   interface(CONST_INTER);
 3060 %}
 3061 
 3062 // nullptr Pointer Immediate
 3063 operand immP0()
 3064 %{
 3065   predicate(n->get_ptr() == 0);
 3066   match(ConP);
 3067 
 3068   op_cost(5);
 3069   format %{ %}
 3070   interface(CONST_INTER);
 3071 %}
 3072 
 3073 // Pointer Immediate
 3074 operand immN() %{
 3075   match(ConN);
 3076 
 3077   op_cost(10);
 3078   format %{ %}
 3079   interface(CONST_INTER);
 3080 %}
 3081 
 3082 operand immNKlass() %{
 3083   match(ConNKlass);
 3084 
 3085   op_cost(10);
 3086   format %{ %}
 3087   interface(CONST_INTER);
 3088 %}
 3089 
 3090 // nullptr Pointer Immediate
 3091 operand immN0() %{
 3092   predicate(n->get_narrowcon() == 0);
 3093   match(ConN);
 3094 
 3095   op_cost(5);
 3096   format %{ %}
 3097   interface(CONST_INTER);
 3098 %}
 3099 
 3100 operand immP31()
 3101 %{
 3102   predicate(n->as_Type()->type()->reloc() == relocInfo::none
 3103             && (n->get_ptr() >> 31) == 0);
 3104   match(ConP);
 3105 
 3106   op_cost(5);
 3107   format %{ %}
 3108   interface(CONST_INTER);
 3109 %}
 3110 

 3982   %}
 3983 %}
 3984 
 3985 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 3986 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 3987 %{
 3988   constraint(ALLOC_IN_RC(ptr_reg));
 3989   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 3990   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 3991 
 3992   op_cost(10);
 3993   format %{"[$reg + $off + $idx << $scale]" %}
 3994   interface(MEMORY_INTER) %{
 3995     base($reg);
 3996     index($idx);
 3997     scale($scale);
 3998     disp($off);
 3999   %}
 4000 %}
 4001 
 4002 // Indirect Narrow Oop Operand
 4003 operand indCompressedOop(rRegN reg) %{
 4004   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 4005   constraint(ALLOC_IN_RC(ptr_reg));
 4006   match(DecodeN reg);
 4007 
 4008   op_cost(10);
 4009   format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
 4010   interface(MEMORY_INTER) %{
 4011     base(0xc); // R12
 4012     index($reg);
 4013     scale(0x3);
 4014     disp(0x0);
 4015   %}
 4016 %}
 4017 
 4018 // Indirect Narrow Oop Plus Offset Operand
 4019 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 4020 // we can't free r12 even with CompressedOops::base() == nullptr.
 4021 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 4022   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 4023   constraint(ALLOC_IN_RC(ptr_reg));
 4024   match(AddP (DecodeN reg) off);
 4025 
 4026   op_cost(10);
 4027   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 4028   interface(MEMORY_INTER) %{
 4029     base(0xc); // R12
 4030     index($reg);
 4031     scale(0x3);
 4032     disp($off);
 4033   %}
 4034 %}
 4035 
 4036 // Indirect Memory Operand
 4037 operand indirectNarrow(rRegN reg)
 4038 %{
 4039   predicate(CompressedOops::shift() == 0);
 4040   constraint(ALLOC_IN_RC(ptr_reg));

 4344     equal(0x4, "e");
 4345     not_equal(0x5, "ne");
 4346     less(0x2, "b");
 4347     greater_equal(0x3, "ae");
 4348     less_equal(0x6, "be");
 4349     greater(0x7, "a");
 4350     overflow(0x0, "o");
 4351     no_overflow(0x1, "no");
 4352   %}
 4353 %}
 4354 
 4355 //----------OPERAND CLASSES----------------------------------------------------
 4356 // Operand Classes are groups of operands that are used as to simplify
 4357 // instruction definitions by not requiring the AD writer to specify separate
 4358 // instructions for every form of operand when the instruction accepts
 4359 // multiple operand types with the same basic encoding and format.  The classic
 4360 // case of this is memory operands.
 4361 
 4362 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 4363                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 4364                indCompressedOop, indCompressedOopOffset,
 4365                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 4366                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 4367                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 4368 
 4369 //----------PIPELINE-----------------------------------------------------------
 4370 // Rules which define the behavior of the target architectures pipeline.
 4371 pipeline %{
 4372 
 4373 //----------ATTRIBUTES---------------------------------------------------------
 4374 attributes %{
 4375   variable_size_instructions;        // Fixed size instructions
 4376   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 4377   instruction_unit_size = 1;         // An instruction is 1 bytes long
 4378   instruction_fetch_unit_size = 16;  // The processor fetches one line
 4379   instruction_fetch_units = 1;       // of 16 bytes
 4380 
 4381   // List of nop instructions
 4382   nops( MachNop );
 4383 %}
 4384 

 5844   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
 5845   ins_encode %{
 5846     __ movl($dst$$Register, $src$$constant);
 5847   %}
 5848   ins_pipe(ialu_reg);
 5849 %}
 5850 
 5851 instruct loadConF(regF dst, immF con) %{
 5852   match(Set dst con);
 5853   ins_cost(125);
 5854   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
 5855   ins_encode %{
 5856     __ movflt($dst$$XMMRegister, $constantaddress($con));
 5857   %}
 5858   ins_pipe(pipe_slow);
 5859 %}
 5860 
 5861 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
 5862   match(Set dst src);
 5863   effect(KILL cr);
 5864   format %{ "xorq    $dst, $src\t# compressed nullptr ptr" %}
 5865   ins_encode %{
 5866     __ xorq($dst$$Register, $dst$$Register);
 5867   %}
 5868   ins_pipe(ialu_reg);
 5869 %}
 5870 
 5871 instruct loadConN(rRegN dst, immN src) %{
 5872   match(Set dst src);
 5873 
 5874   ins_cost(125);
 5875   format %{ "movl    $dst, $src\t# compressed ptr" %}
 5876   ins_encode %{
 5877     address con = (address)$src$$constant;
 5878     if (con == nullptr) {
 5879       ShouldNotReachHere();
 5880     } else {
 5881       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
 5882     }
 5883   %}
 5884   ins_pipe(ialu_reg_fat); // XXX
 5885 %}
 5886 
 5887 instruct loadConNKlass(rRegN dst, immNKlass src) %{
 5888   match(Set dst src);
 5889 
 5890   ins_cost(125);
 5891   format %{ "movl    $dst, $src\t# compressed klass ptr" %}
 5892   ins_encode %{
 5893     address con = (address)$src$$constant;
 5894     if (con == nullptr) {
 5895       ShouldNotReachHere();
 5896     } else {
 5897       __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
 5898     }
 5899   %}
 5900   ins_pipe(ialu_reg_fat); // XXX
 5901 %}
 5902 
 5903 instruct loadConF0(regF dst, immF0 src)
 5904 %{
 5905   match(Set dst src);
 5906   ins_cost(100);
 5907 
 5908   format %{ "xorps   $dst, $dst\t# float 0.0" %}
 5909   ins_encode %{
 5910     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 5911   %}
 5912   ins_pipe(pipe_slow);
 5913 %}
 5914 

 6097   %}
 6098   ins_pipe(ialu_mem_reg); // XXX
 6099 %}
 6100 
 6101 // Store Pointer
 6102 instruct storeP(memory mem, any_RegP src)
 6103 %{
 6104   predicate(n->as_Store()->barrier_data() == 0);
 6105   match(Set mem (StoreP mem src));
 6106 
 6107   ins_cost(125); // XXX
 6108   format %{ "movq    $mem, $src\t# ptr" %}
 6109   ins_encode %{
 6110     __ movq($mem$$Address, $src$$Register);
 6111   %}
 6112   ins_pipe(ialu_mem_reg);
 6113 %}
 6114 
 6115 instruct storeImmP0(memory mem, immP0 zero)
 6116 %{
 6117   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
 6118   match(Set mem (StoreP mem zero));
 6119 
 6120   ins_cost(125); // XXX
 6121   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
 6122   ins_encode %{
 6123     __ movq($mem$$Address, r12);
 6124   %}
 6125   ins_pipe(ialu_mem_reg);
 6126 %}
 6127 
 6128 // Store nullptr Pointer, mark word, or other simple pointer constant.
 6129 instruct storeImmP(memory mem, immP31 src)
 6130 %{
 6131   predicate(n->as_Store()->barrier_data() == 0);
 6132   match(Set mem (StoreP mem src));
 6133 
 6134   ins_cost(150); // XXX
 6135   format %{ "movq    $mem, $src\t# ptr" %}
 6136   ins_encode %{
 6137     __ movq($mem$$Address, $src$$constant);
 6138   %}
 6139   ins_pipe(ialu_mem_imm);
 6140 %}
 6141 
 6142 // Store Compressed Pointer
 6143 instruct storeN(memory mem, rRegN src)
 6144 %{
 6145   match(Set mem (StoreN mem src));
 6146 
 6147   ins_cost(125); // XXX
 6148   format %{ "movl    $mem, $src\t# compressed ptr" %}
 6149   ins_encode %{
 6150     __ movl($mem$$Address, $src$$Register);
 6151   %}
 6152   ins_pipe(ialu_mem_reg);
 6153 %}
 6154 
 6155 instruct storeNKlass(memory mem, rRegN src)
 6156 %{
 6157   match(Set mem (StoreNKlass mem src));
 6158 
 6159   ins_cost(125); // XXX
 6160   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 6161   ins_encode %{
 6162     __ movl($mem$$Address, $src$$Register);
 6163   %}
 6164   ins_pipe(ialu_mem_reg);
 6165 %}
 6166 
 6167 instruct storeImmN0(memory mem, immN0 zero)
 6168 %{
 6169   predicate(CompressedOops::base() == nullptr);
 6170   match(Set mem (StoreN mem zero));
 6171 
 6172   ins_cost(125); // XXX
 6173   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
 6174   ins_encode %{
 6175     __ movl($mem$$Address, r12);
 6176   %}
 6177   ins_pipe(ialu_mem_reg);
 6178 %}
 6179 
 6180 instruct storeImmN(memory mem, immN src)
 6181 %{
 6182   match(Set mem (StoreN mem src));
 6183 
 6184   ins_cost(150); // XXX
 6185   format %{ "movl    $mem, $src\t# compressed ptr" %}
 6186   ins_encode %{
 6187     address con = (address)$src$$constant;
 6188     if (con == nullptr) {
 6189       __ movl($mem$$Address, 0);
 6190     } else {
 6191       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
 6192     }
 6193   %}
 6194   ins_pipe(ialu_mem_imm);
 6195 %}
 6196 
 6197 instruct storeImmNKlass(memory mem, immNKlass src)
 6198 %{
 6199   match(Set mem (StoreNKlass mem src));
 6200 
 6201   ins_cost(150); // XXX
 6202   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 6203   ins_encode %{
 6204     __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
 6205   %}
 6206   ins_pipe(ialu_mem_imm);
 6207 %}
 6208 
 6209 // Store Integer Immediate
 6210 instruct storeImmI0(memory mem, immI_0 zero)
 6211 %{
 6212   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 6213   match(Set mem (StoreI mem zero));
 6214 
 6215   ins_cost(125); // XXX
 6216   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
 6217   ins_encode %{
 6218     __ movl($mem$$Address, r12);
 6219   %}
 6220   ins_pipe(ialu_mem_reg);
 6221 %}
 6222 
 6223 instruct storeImmI(memory mem, immI src)
 6224 %{
 6225   match(Set mem (StoreI mem src));
 6226 
 6227   ins_cost(150);
 6228   format %{ "movl    $mem, $src\t# int" %}
 6229   ins_encode %{
 6230     __ movl($mem$$Address, $src$$constant);
 6231   %}
 6232   ins_pipe(ialu_mem_imm);
 6233 %}
 6234 
 6235 // Store Long Immediate
 6236 instruct storeImmL0(memory mem, immL0 zero)
 6237 %{
 6238   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 6239   match(Set mem (StoreL mem zero));
 6240 
 6241   ins_cost(125); // XXX
 6242   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
 6243   ins_encode %{
 6244     __ movq($mem$$Address, r12);
 6245   %}
 6246   ins_pipe(ialu_mem_reg);
 6247 %}
 6248 
 6249 instruct storeImmL(memory mem, immL32 src)
 6250 %{
 6251   match(Set mem (StoreL mem src));
 6252 
 6253   ins_cost(150);
 6254   format %{ "movq    $mem, $src\t# long" %}
 6255   ins_encode %{
 6256     __ movq($mem$$Address, $src$$constant);
 6257   %}
 6258   ins_pipe(ialu_mem_imm);
 6259 %}
 6260 
 6261 // Store Short/Char Immediate
 6262 instruct storeImmC0(memory mem, immI_0 zero)
 6263 %{
 6264   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 6265   match(Set mem (StoreC mem zero));
 6266 
 6267   ins_cost(125); // XXX
 6268   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
 6269   ins_encode %{
 6270     __ movw($mem$$Address, r12);
 6271   %}
 6272   ins_pipe(ialu_mem_reg);
 6273 %}
 6274 
 6275 instruct storeImmI16(memory mem, immI16 src)
 6276 %{
 6277   predicate(UseStoreImmI16);
 6278   match(Set mem (StoreC mem src));
 6279 
 6280   ins_cost(150);
 6281   format %{ "movw    $mem, $src\t# short/char" %}
 6282   ins_encode %{
 6283     __ movw($mem$$Address, $src$$constant);
 6284   %}
 6285   ins_pipe(ialu_mem_imm);
 6286 %}
 6287 
 6288 // Store Byte Immediate
 6289 instruct storeImmB0(memory mem, immI_0 zero)
 6290 %{
 6291   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 6292   match(Set mem (StoreB mem zero));
 6293 
 6294   ins_cost(125); // XXX
 6295   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
 6296   ins_encode %{
 6297     __ movb($mem$$Address, r12);
 6298   %}
 6299   ins_pipe(ialu_mem_reg);
 6300 %}
 6301 
 6302 instruct storeImmB(memory mem, immI8 src)
 6303 %{
 6304   match(Set mem (StoreB mem src));
 6305 
 6306   ins_cost(150); // XXX
 6307   format %{ "movb    $mem, $src\t# byte" %}
 6308   ins_encode %{
 6309     __ movb($mem$$Address, $src$$constant);
 6310   %}
 6311   ins_pipe(ialu_mem_imm);
 6312 %}
 6313 
 6314 // Store CMS card-mark Immediate
 6315 instruct storeImmCM0_reg(memory mem, immI_0 zero)
 6316 %{
 6317   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 6318   match(Set mem (StoreCM mem zero));
 6319 
 6320   ins_cost(125); // XXX
 6321   format %{ "movb    $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
 6322   ins_encode %{
 6323     __ movb($mem$$Address, r12);
 6324   %}
 6325   ins_pipe(ialu_mem_reg);
 6326 %}
 6327 
 6328 instruct storeImmCM0(memory mem, immI_0 src)
 6329 %{
 6330   match(Set mem (StoreCM mem src));
 6331 
 6332   ins_cost(150); // XXX
 6333   format %{ "movb    $mem, $src\t# CMS card-mark byte 0" %}
 6334   ins_encode %{
 6335     __ movb($mem$$Address, $src$$constant);
 6336   %}
 6337   ins_pipe(ialu_mem_imm);
 6338 %}
 6339 
 6340 // Store Float
 6341 instruct storeF(memory mem, regF src)
 6342 %{
 6343   match(Set mem (StoreF mem src));
 6344 
 6345   ins_cost(95); // XXX
 6346   format %{ "movss   $mem, $src\t# float" %}
 6347   ins_encode %{
 6348     __ movflt($mem$$Address, $src$$XMMRegister);
 6349   %}
 6350   ins_pipe(pipe_slow); // XXX
 6351 %}
 6352 
 6353 // Store immediate Float value (it is faster than store from XMM register)
 6354 instruct storeF0(memory mem, immF0 zero)
 6355 %{
 6356   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 6357   match(Set mem (StoreF mem zero));
 6358 
 6359   ins_cost(25); // XXX
 6360   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
 6361   ins_encode %{
 6362     __ movl($mem$$Address, r12);
 6363   %}
 6364   ins_pipe(ialu_mem_reg);
 6365 %}
 6366 
 6367 instruct storeF_imm(memory mem, immF src)
 6368 %{
 6369   match(Set mem (StoreF mem src));
 6370 
 6371   ins_cost(50);
 6372   format %{ "movl    $mem, $src\t# float" %}
 6373   ins_encode %{
 6374     __ movl($mem$$Address, jint_cast($src$$constant));
 6375   %}
 6376   ins_pipe(ialu_mem_imm);
 6377 %}
 6378 
 6379 // Store Double
 6380 instruct storeD(memory mem, regD src)
 6381 %{
 6382   match(Set mem (StoreD mem src));
 6383 
 6384   ins_cost(95); // XXX
 6385   format %{ "movsd   $mem, $src\t# double" %}
 6386   ins_encode %{
 6387     __ movdbl($mem$$Address, $src$$XMMRegister);
 6388   %}
 6389   ins_pipe(pipe_slow); // XXX
 6390 %}
 6391 
 6392 // Store immediate double 0.0 (it is faster than store from XMM register)
 6393 instruct storeD0_imm(memory mem, immD0 src)
 6394 %{
 6395   predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
 6396   match(Set mem (StoreD mem src));
 6397 
 6398   ins_cost(50);
 6399   format %{ "movq    $mem, $src\t# double 0." %}
 6400   ins_encode %{
 6401     __ movq($mem$$Address, $src$$constant);
 6402   %}
 6403   ins_pipe(ialu_mem_imm);
 6404 %}
 6405 
 6406 instruct storeD0(memory mem, immD0 zero)
 6407 %{
 6408   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 6409   match(Set mem (StoreD mem zero));
 6410 
 6411   ins_cost(25); // XXX
 6412   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
 6413   ins_encode %{
 6414     __ movq($mem$$Address, r12);
 6415   %}
 6416   ins_pipe(ialu_mem_reg);
 6417 %}
 6418 
 6419 instruct storeSSI(stackSlotI dst, rRegI src)
 6420 %{
 6421   match(Set dst src);
 6422 
 6423   ins_cost(100);
 6424   format %{ "movl    $dst, $src\t# int stk" %}
 6425   opcode(0x89);
 6426   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
 6427   ins_pipe( ialu_mem_reg );
 6428 %}

 6932   format %{ "MEMBAR-storestore (empty encoding)" %}
 6933   ins_encode( );
 6934   ins_pipe(empty);
 6935 %}
 6936 
 6937 //----------Move Instructions--------------------------------------------------
 6938 
 6939 instruct castX2P(rRegP dst, rRegL src)
 6940 %{
 6941   match(Set dst (CastX2P src));
 6942 
 6943   format %{ "movq    $dst, $src\t# long->ptr" %}
 6944   ins_encode %{
 6945     if ($dst$$reg != $src$$reg) {
 6946       __ movptr($dst$$Register, $src$$Register);
 6947     }
 6948   %}
 6949   ins_pipe(ialu_reg_reg); // XXX
 6950 %}
 6951 
 6952 instruct castN2X(rRegL dst, rRegN src)
 6953 %{
 6954   match(Set dst (CastP2X src));
 6955 
 6956   format %{ "movq    $dst, $src\t# ptr -> long" %}
 6957   ins_encode %{
 6958     if ($dst$$reg != $src$$reg) {
 6959       __ movptr($dst$$Register, $src$$Register);
 6960     }
 6961   %}
 6962   ins_pipe(ialu_reg_reg); // XXX
 6963 %}
 6964 
 6965 instruct castP2X(rRegL dst, rRegP src)
 6966 %{
 6967   match(Set dst (CastP2X src));
 6968 
 6969   format %{ "movq    $dst, $src\t# ptr -> long" %}
 6970   ins_encode %{
 6971     if ($dst$$reg != $src$$reg) {
 6972       __ movptr($dst$$Register, $src$$Register);
 6973     }
 6974   %}
 6975   ins_pipe(ialu_reg_reg); // XXX
 6976 %}
 6977 
 6978 // Convert oop into int for vectors alignment masking
 6979 instruct convP2I(rRegI dst, rRegP src)
 6980 %{
 6981   match(Set dst (ConvL2I (CastP2X src)));
 6982 
 6983   format %{ "movl    $dst, $src\t# ptr -> int" %}
 6984   ins_encode %{

11543   effect(DEF dst, USE src);
11544   ins_cost(100);
11545   format %{ "movd    $dst,$src\t# MoveI2F" %}
11546   ins_encode %{
11547     __ movdl($dst$$XMMRegister, $src$$Register);
11548   %}
11549   ins_pipe( pipe_slow );
11550 %}
11551 
11552 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
11553   match(Set dst (MoveL2D src));
11554   effect(DEF dst, USE src);
11555   ins_cost(100);
11556   format %{ "movd    $dst,$src\t# MoveL2D" %}
11557   ins_encode %{
11558      __ movdq($dst$$XMMRegister, $src$$Register);
11559   %}
11560   ins_pipe( pipe_slow );
11561 %}
11562 
11563 
11564 // Fast clearing of an array
11565 // Small ClearArray non-AVX512.
11566 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
11567                   Universe dummy, rFlagsReg cr)
11568 %{
11569   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
11570   match(Set dummy (ClearArray (Binary cnt base) val));
11571   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
11572 
11573   format %{ $$template
11574     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
11575     $$emit$$"jg      LARGE\n\t"
11576     $$emit$$"dec     rcx\n\t"
11577     $$emit$$"js      DONE\t# Zero length\n\t"
11578     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
11579     $$emit$$"dec     rcx\n\t"
11580     $$emit$$"jge     LOOP\n\t"
11581     $$emit$$"jmp     DONE\n\t"
11582     $$emit$$"# LARGE:\n\t"
11583     if (UseFastStosb) {
11584        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11585        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
11586     } else if (UseXMMForObjInit) {
11587        $$emit$$"movdq   $tmp, $val\n\t"
11588        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
11589        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
11590        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11591        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11592        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11593        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
11594        $$emit$$"add     0x40,rax\n\t"
11595        $$emit$$"# L_zero_64_bytes:\n\t"
11596        $$emit$$"sub     0x8,rcx\n\t"
11597        $$emit$$"jge     L_loop\n\t"
11598        $$emit$$"add     0x4,rcx\n\t"
11599        $$emit$$"jl      L_tail\n\t"
11600        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11601        $$emit$$"add     0x20,rax\n\t"
11602        $$emit$$"sub     0x4,rcx\n\t"
11603        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11604        $$emit$$"add     0x4,rcx\n\t"
11605        $$emit$$"jle     L_end\n\t"
11606        $$emit$$"dec     rcx\n\t"
11607        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11608        $$emit$$"vmovq   xmm0,(rax)\n\t"
11609        $$emit$$"add     0x8,rax\n\t"
11610        $$emit$$"dec     rcx\n\t"
11611        $$emit$$"jge     L_sloop\n\t"
11612        $$emit$$"# L_end:\n\t"
11613     } else {
11614        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
11615     }
11616     $$emit$$"# DONE"
11617   %}
11618   ins_encode %{
11619     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11620                  $tmp$$XMMRegister, false, false);
11621   %}
11622   ins_pipe(pipe_slow);
11623 %}
11624 
11625 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
11626                             Universe dummy, rFlagsReg cr)
11627 %{
11628   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
11629   match(Set dummy (ClearArray (Binary cnt base) val));
11630   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
11631 
11632   format %{ $$template
11633     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
11634     $$emit$$"jg      LARGE\n\t"
11635     $$emit$$"dec     rcx\n\t"
11636     $$emit$$"js      DONE\t# Zero length\n\t"
11637     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
11638     $$emit$$"dec     rcx\n\t"
11639     $$emit$$"jge     LOOP\n\t"
11640     $$emit$$"jmp     DONE\n\t"
11641     $$emit$$"# LARGE:\n\t"
11642     if (UseXMMForObjInit) {
11643        $$emit$$"movdq   $tmp, $val\n\t"
11644        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
11645        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
11646        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11647        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11648        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11649        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
11650        $$emit$$"add     0x40,rax\n\t"
11651        $$emit$$"# L_zero_64_bytes:\n\t"
11652        $$emit$$"sub     0x8,rcx\n\t"
11653        $$emit$$"jge     L_loop\n\t"
11654        $$emit$$"add     0x4,rcx\n\t"
11655        $$emit$$"jl      L_tail\n\t"
11656        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11657        $$emit$$"add     0x20,rax\n\t"
11658        $$emit$$"sub     0x4,rcx\n\t"
11659        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11660        $$emit$$"add     0x4,rcx\n\t"
11661        $$emit$$"jle     L_end\n\t"
11662        $$emit$$"dec     rcx\n\t"
11663        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11664        $$emit$$"vmovq   xmm0,(rax)\n\t"
11665        $$emit$$"add     0x8,rax\n\t"
11666        $$emit$$"dec     rcx\n\t"
11667        $$emit$$"jge     L_sloop\n\t"
11668        $$emit$$"# L_end:\n\t"
11669     } else {
11670        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
11671     }
11672     $$emit$$"# DONE"
11673   %}
11674   ins_encode %{
11675     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11676                  $tmp$$XMMRegister, false, true);
11677   %}
11678   ins_pipe(pipe_slow);
11679 %}
11680 
11681 // Small ClearArray AVX512 non-constant length.
11682 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
11683                        Universe dummy, rFlagsReg cr)
11684 %{
11685   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
11686   match(Set dummy (ClearArray (Binary cnt base) val));
11687   ins_cost(125);
11688   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
11689 
11690   format %{ $$template
11691     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11692     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
11693     $$emit$$"jg      LARGE\n\t"
11694     $$emit$$"dec     rcx\n\t"
11695     $$emit$$"js      DONE\t# Zero length\n\t"
11696     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
11697     $$emit$$"dec     rcx\n\t"
11698     $$emit$$"jge     LOOP\n\t"
11699     $$emit$$"jmp     DONE\n\t"
11700     $$emit$$"# LARGE:\n\t"
11701     if (UseFastStosb) {
11702        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11703        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
11704     } else if (UseXMMForObjInit) {
11705        $$emit$$"mov     rdi,rax\n\t"
11706        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11707        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11708        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

11716        $$emit$$"jl      L_tail\n\t"
11717        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11718        $$emit$$"add     0x20,rax\n\t"
11719        $$emit$$"sub     0x4,rcx\n\t"
11720        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11721        $$emit$$"add     0x4,rcx\n\t"
11722        $$emit$$"jle     L_end\n\t"
11723        $$emit$$"dec     rcx\n\t"
11724        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11725        $$emit$$"vmovq   xmm0,(rax)\n\t"
11726        $$emit$$"add     0x8,rax\n\t"
11727        $$emit$$"dec     rcx\n\t"
11728        $$emit$$"jge     L_sloop\n\t"
11729        $$emit$$"# L_end:\n\t"
11730     } else {
11731        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
11732     }
11733     $$emit$$"# DONE"
11734   %}
11735   ins_encode %{
11736     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11737                  $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
11738   %}
11739   ins_pipe(pipe_slow);
11740 %}
11741 
11742 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
11743                                  Universe dummy, rFlagsReg cr)

11744 %{
11745   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
11746   match(Set dummy (ClearArray (Binary cnt base) val));
11747   ins_cost(125);
11748   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
11749 
11750   format %{ $$template
11751     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11752     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
11753     $$emit$$"jg      LARGE\n\t"
11754     $$emit$$"dec     rcx\n\t"
11755     $$emit$$"js      DONE\t# Zero length\n\t"
11756     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
11757     $$emit$$"dec     rcx\n\t"
11758     $$emit$$"jge     LOOP\n\t"
11759     $$emit$$"jmp     DONE\n\t"
11760     $$emit$$"# LARGE:\n\t"
11761     if (UseFastStosb) {
11762        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11763        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
11764     } else if (UseXMMForObjInit) {
11765        $$emit$$"mov     rdi,rax\n\t"
11766        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11767        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11768        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

11776        $$emit$$"jl      L_tail\n\t"
11777        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11778        $$emit$$"add     0x20,rax\n\t"
11779        $$emit$$"sub     0x4,rcx\n\t"
11780        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11781        $$emit$$"add     0x4,rcx\n\t"
11782        $$emit$$"jle     L_end\n\t"
11783        $$emit$$"dec     rcx\n\t"
11784        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11785        $$emit$$"vmovq   xmm0,(rax)\n\t"
11786        $$emit$$"add     0x8,rax\n\t"
11787        $$emit$$"dec     rcx\n\t"
11788        $$emit$$"jge     L_sloop\n\t"
11789        $$emit$$"# L_end:\n\t"
11790     } else {
11791        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
11792     }
11793     $$emit$$"# DONE"
11794   %}
11795   ins_encode %{
11796     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11797                  $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
11798   %}
11799   ins_pipe(pipe_slow);
11800 %}
11801 
11802 // Large ClearArray non-AVX512.
11803 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
11804                         Universe dummy, rFlagsReg cr)
11805 %{
11806   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
11807   match(Set dummy (ClearArray (Binary cnt base) val));
11808   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
11809 
11810   format %{ $$template
11811     if (UseFastStosb) {
11812        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11813        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
11814     } else if (UseXMMForObjInit) {
11815        $$emit$$"movdq   $tmp, $val\n\t"
11816        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
11817        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
11818        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11819        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11820        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11821        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
11822        $$emit$$"add     0x40,rax\n\t"
11823        $$emit$$"# L_zero_64_bytes:\n\t"
11824        $$emit$$"sub     0x8,rcx\n\t"
11825        $$emit$$"jge     L_loop\n\t"
11826        $$emit$$"add     0x4,rcx\n\t"
11827        $$emit$$"jl      L_tail\n\t"
11828        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11829        $$emit$$"add     0x20,rax\n\t"
11830        $$emit$$"sub     0x4,rcx\n\t"
11831        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11832        $$emit$$"add     0x4,rcx\n\t"
11833        $$emit$$"jle     L_end\n\t"
11834        $$emit$$"dec     rcx\n\t"
11835        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11836        $$emit$$"vmovq   xmm0,(rax)\n\t"
11837        $$emit$$"add     0x8,rax\n\t"
11838        $$emit$$"dec     rcx\n\t"
11839        $$emit$$"jge     L_sloop\n\t"
11840        $$emit$$"# L_end:\n\t"
11841     } else {
11842        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11843     }
11844   %}
11845   ins_encode %{
11846     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11847                  $tmp$$XMMRegister, true, false);
11848   %}
11849   ins_pipe(pipe_slow);
11850 %}
11851 
11852 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
11853                                   Universe dummy, rFlagsReg cr)
11854 %{
11855   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
11856   match(Set dummy (ClearArray (Binary cnt base) val));
11857   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
11858 
11859   format %{ $$template
11860     if (UseXMMForObjInit) {
11861        $$emit$$"movdq   $tmp, $val\n\t"
11862        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
11863        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
11864        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11865        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11866        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11867        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
11868        $$emit$$"add     0x40,rax\n\t"
11869        $$emit$$"# L_zero_64_bytes:\n\t"
11870        $$emit$$"sub     0x8,rcx\n\t"
11871        $$emit$$"jge     L_loop\n\t"
11872        $$emit$$"add     0x4,rcx\n\t"
11873        $$emit$$"jl      L_tail\n\t"
11874        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11875        $$emit$$"add     0x20,rax\n\t"
11876        $$emit$$"sub     0x4,rcx\n\t"
11877        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11878        $$emit$$"add     0x4,rcx\n\t"
11879        $$emit$$"jle     L_end\n\t"
11880        $$emit$$"dec     rcx\n\t"
11881        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11882        $$emit$$"vmovq   xmm0,(rax)\n\t"
11883        $$emit$$"add     0x8,rax\n\t"
11884        $$emit$$"dec     rcx\n\t"
11885        $$emit$$"jge     L_sloop\n\t"
11886        $$emit$$"# L_end:\n\t"
11887     } else {
11888        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11889     }
11890   %}
11891   ins_encode %{
11892     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11893                  $tmp$$XMMRegister, true, true);
11894   %}
11895   ins_pipe(pipe_slow);
11896 %}
11897 
11898 // Large ClearArray AVX512.
11899 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
11900                              Universe dummy, rFlagsReg cr)
11901 %{
11902   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
11903   match(Set dummy (ClearArray (Binary cnt base) val));
11904   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
11905 
11906   format %{ $$template
11907     if (UseFastStosb) {
11908        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11909        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11910        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
11911     } else if (UseXMMForObjInit) {
11912        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
11913        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11914        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11915        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11916        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11917        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
11918        $$emit$$"add     0x40,rax\n\t"
11919        $$emit$$"# L_zero_64_bytes:\n\t"
11920        $$emit$$"sub     0x8,rcx\n\t"
11921        $$emit$$"jge     L_loop\n\t"
11922        $$emit$$"add     0x4,rcx\n\t"
11923        $$emit$$"jl      L_tail\n\t"
11924        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11925        $$emit$$"add     0x20,rax\n\t"
11926        $$emit$$"sub     0x4,rcx\n\t"
11927        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11928        $$emit$$"add     0x4,rcx\n\t"
11929        $$emit$$"jle     L_end\n\t"
11930        $$emit$$"dec     rcx\n\t"
11931        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11932        $$emit$$"vmovq   xmm0,(rax)\n\t"
11933        $$emit$$"add     0x8,rax\n\t"
11934        $$emit$$"dec     rcx\n\t"
11935        $$emit$$"jge     L_sloop\n\t"
11936        $$emit$$"# L_end:\n\t"
11937     } else {
11938        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11939        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11940     }
11941   %}
11942   ins_encode %{
11943     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11944                  $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
11945   %}
11946   ins_pipe(pipe_slow);
11947 %}
11948 
11949 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
11950                                        Universe dummy, rFlagsReg cr)

11951 %{
11952   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
11953   match(Set dummy (ClearArray (Binary cnt base) val));
11954   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
11955 
11956   format %{ $$template
11957     if (UseFastStosb) {
11958        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11959        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11960        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
11961     } else if (UseXMMForObjInit) {
11962        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
11963        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11964        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11965        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11966        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11967        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
11968        $$emit$$"add     0x40,rax\n\t"
11969        $$emit$$"# L_zero_64_bytes:\n\t"
11970        $$emit$$"sub     0x8,rcx\n\t"
11971        $$emit$$"jge     L_loop\n\t"
11972        $$emit$$"add     0x4,rcx\n\t"
11973        $$emit$$"jl      L_tail\n\t"
11974        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11975        $$emit$$"add     0x20,rax\n\t"
11976        $$emit$$"sub     0x4,rcx\n\t"
11977        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11978        $$emit$$"add     0x4,rcx\n\t"
11979        $$emit$$"jle     L_end\n\t"
11980        $$emit$$"dec     rcx\n\t"
11981        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11982        $$emit$$"vmovq   xmm0,(rax)\n\t"
11983        $$emit$$"add     0x8,rax\n\t"
11984        $$emit$$"dec     rcx\n\t"
11985        $$emit$$"jge     L_sloop\n\t"
11986        $$emit$$"# L_end:\n\t"
11987     } else {
11988        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11989        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11990     }
11991   %}
11992   ins_encode %{
11993     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11994                  $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
11995   %}
11996   ins_pipe(pipe_slow);
11997 %}
11998 
11999 // Small ClearArray AVX512 constant length.
12000 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
12001 %{
12002   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
12003             ((UseAVX > 2) && VM_Version::supports_avx512vlbw()));
12004   match(Set dummy (ClearArray (Binary cnt base) val));
12005   ins_cost(100);
12006   effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
12007   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
12008   ins_encode %{
12009     __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
12010   %}
12011   ins_pipe(pipe_slow);
12012 %}
12013 
12014 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
12015                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
12016 %{
12017   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
12018   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12019   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
12020 
12021   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
12022   ins_encode %{
12023     __ string_compare($str1$$Register, $str2$$Register,
12024                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
12025                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
12026   %}
12027   ins_pipe( pipe_slow );
12028 %}
12029 

12920   ins_pipe(ialu_cr_reg_mem);
12921 %}
12922 
12923 // This will generate a signed flags result. This should be OK since
12924 // any compare to a zero should be eq/neq.
12925 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
12926 %{
12927   match(Set cr (CmpP src zero));
12928 
12929   format %{ "testq   $src, $src\t# ptr" %}
12930   ins_encode %{
12931     __ testq($src$$Register, $src$$Register);
12932   %}
12933   ins_pipe(ialu_cr_reg_imm);
12934 %}
12935 
12936 // This will generate a signed flags result. This should be OK since
12937 // any compare to a zero should be eq/neq.
12938 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
12939 %{
12940   predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
12941             n->in(1)->as_Load()->barrier_data() == 0);
12942   match(Set cr (CmpP (LoadP op) zero));
12943 
12944   ins_cost(500); // XXX
12945   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
12946   ins_encode %{
12947     __ testq($op$$Address, 0xFFFFFFFF);
12948   %}
12949   ins_pipe(ialu_cr_reg_imm);
12950 %}
12951 
12952 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
12953 %{
12954   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
12955             n->in(1)->as_Load()->barrier_data() == 0);
12956   match(Set cr (CmpP (LoadP mem) zero));
12957 
12958   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
12959   ins_encode %{
12960     __ cmpq(r12, $mem$$Address);
12961   %}
12962   ins_pipe(ialu_cr_reg_mem);
12963 %}
12964 
12965 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
12966 %{
12967   match(Set cr (CmpN op1 op2));
12968 
12969   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
12970   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
12971   ins_pipe(ialu_cr_reg_reg);
12972 %}
12973 
12974 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)

13017 %{
13018   match(Set cr (CmpN src (LoadNKlass mem)));
13019 
13020   format %{ "cmpl    $mem, $src\t# compressed klass ptr" %}
13021   ins_encode %{
13022     __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
13023   %}
13024   ins_pipe(ialu_cr_reg_mem);
13025 %}
13026 
13027 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
13028   match(Set cr (CmpN src zero));
13029 
13030   format %{ "testl   $src, $src\t# compressed ptr" %}
13031   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
13032   ins_pipe(ialu_cr_reg_imm);
13033 %}
13034 
13035 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
13036 %{
13037   predicate(CompressedOops::base() != nullptr);
13038   match(Set cr (CmpN (LoadN mem) zero));
13039 
13040   ins_cost(500); // XXX
13041   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
13042   ins_encode %{
13043     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
13044   %}
13045   ins_pipe(ialu_cr_reg_mem);
13046 %}
13047 
13048 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
13049 %{
13050   predicate(CompressedOops::base() == nullptr);
13051   match(Set cr (CmpN (LoadN mem) zero));
13052 
13053   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
13054   ins_encode %{
13055     __ cmpl(r12, $mem$$Address);
13056   %}
13057   ins_pipe(ialu_cr_reg_mem);
13058 %}
13059 
13060 // Yanked all unsigned pointer compare operations.
13061 // Pointer compares are done with CmpP which is already unsigned.
13062 
13063 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
13064 %{
13065   match(Set cr (CmpL op1 op2));
13066 
13067   format %{ "cmpq    $op1, $op2" %}
13068   ins_encode %{
13069     __ cmpq($op1$$Register, $op2$$Register);
13070   %}

13781 
13782   ins_cost(300);
13783   format %{ "call_leaf,runtime " %}
13784   ins_encode(clear_avx, Java_To_Runtime(meth));
13785   ins_pipe(pipe_slow);
13786 %}
13787 
13788 // Call runtime without safepoint and with vector arguments
13789 instruct CallLeafDirectVector(method meth)
13790 %{
13791   match(CallLeafVector);
13792   effect(USE meth);
13793 
13794   ins_cost(300);
13795   format %{ "call_leaf,vector " %}
13796   ins_encode(Java_To_Runtime(meth));
13797   ins_pipe(pipe_slow);
13798 %}
13799 
13800 // Call runtime without safepoint
13801 // entry point is null, target holds the address to call
13802 instruct CallLeafNoFPInDirect(rRegP target)
13803 %{
13804   predicate(n->as_Call()->entry_point() == nullptr);
13805   match(CallLeafNoFP target);
13806 
13807   ins_cost(300);
13808   format %{ "call_leaf_nofp,runtime indirect " %}
13809   ins_encode %{
13810      __ call($target$$Register);
13811   %}
13812 
13813   ins_pipe(pipe_slow);
13814 %}
13815 
13816 instruct CallLeafNoFPDirect(method meth)
13817 %{
13818   predicate(n->as_Call()->entry_point() != nullptr);
13819   match(CallLeafNoFP);
13820   effect(USE meth);
13821 
13822   ins_cost(300);
13823   format %{ "call_leaf_nofp,runtime " %}
13824   ins_encode(clear_avx, Java_To_Runtime(meth));
13825   ins_pipe(pipe_slow);
13826 %}
13827 
13828 // Return Instruction
13829 // Remove the return address & jump to it.
13830 // Notice: We always emit a nop after a ret to make sure there is room
13831 // for safepoint patching
13832 instruct Ret()
13833 %{
13834   match(Return);
13835 
13836   format %{ "ret" %}
13837   ins_encode %{
13838     __ ret(0);
< prev index next >