< prev index next >

src/hotspot/cpu/x86/x86_64.ad

Print this page

  475 }
  476 
  477 // !!!!! Special hack to get all types of calls to specify the byte offset
  478 //       from the start of the call to the point where the return address
  479 //       will point.
  480 int MachCallStaticJavaNode::ret_addr_offset()
  481 {
  482   int offset = 5; // 5 bytes from start of call to where return address points
  483   offset += clear_avx_size();
  484   return offset;
  485 }
  486 
  487 int MachCallDynamicJavaNode::ret_addr_offset()
  488 {
  489   int offset = 15; // 15 bytes from start of call to where return address points
  490   offset += clear_avx_size();
  491   return offset;
  492 }
  493 
  494 int MachCallRuntimeNode::ret_addr_offset() {




  495   int offset = 13; // movq r10,#addr; callq (r10)
  496   if (this->ideal_Opcode() != Op_CallLeafVector) {
  497     offset += clear_avx_size();
  498   }
  499   return offset;
  500 }
  501 
  502 int MachCallNativeNode::ret_addr_offset() {
  503   int offset = 13; // movq r10,#addr; callq (r10)
  504   offset += clear_avx_size();
  505   return offset;
  506 }

  507 //
  508 // Compute padding required for nodes which need alignment
  509 //
  510 
  511 // The address of the call instruction needs to be 4-byte aligned to
  512 // ensure that it does not span a cache line so that it can be patched.
  513 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
  514 {
  515   current_offset += clear_avx_size(); // skip vzeroupper
  516   current_offset += 1; // skip call opcode byte
  517   return align_up(current_offset, alignment_required()) - current_offset;
  518 }
  519 
  520 // The address of the call instruction needs to be 4-byte aligned to
  521 // ensure that it does not span a cache line so that it can be patched.
  522 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
  523 {
  524   current_offset += clear_avx_size(); // skip vzeroupper
  525   current_offset += 11; // skip movq instruction + call opcode byte
  526   return align_up(current_offset, alignment_required()) - current_offset;

  895     st->print("# stack alignment check");
  896 #endif
  897   }
  898   if (C->stub_function() != NULL && BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
  899     st->print("\n\t");
  900     st->print("cmpl    [r15_thread + #disarmed_offset], #disarmed_value\t");
  901     st->print("\n\t");
  902     st->print("je      fast_entry\t");
  903     st->print("\n\t");
  904     st->print("call    #nmethod_entry_barrier_stub\t");
  905     st->print("\n\tfast_entry:");
  906   }
  907   st->cr();
  908 }
  909 #endif
  910 
  911 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  912   Compile* C = ra_->C;
  913   MacroAssembler _masm(&cbuf);
  914 
  915   int framesize = C->output()->frame_size_in_bytes();
  916   int bangsize = C->output()->bang_size_in_bytes();
  917 
  918   if (C->clinit_barrier_on_entry()) {
  919     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
  920     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
  921 
  922     Label L_skip_barrier;
  923     Register klass = rscratch1;
  924 
  925     __ mov_metadata(klass, C->method()->holder()->constant_encoding());
  926     __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/);
  927 
  928     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
  929 
  930     __ bind(L_skip_barrier);
  931   }
  932 
  933   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != NULL);






  934 
  935   C->output()->set_frame_complete(cbuf.insts_size());
  936 
  937   if (C->has_mach_constant_base_node()) {
  938     // NOTE: We set the table base offset here because users might be
  939     // emitted before MachConstantBaseNode.
  940     ConstantTable& constant_table = C->output()->constant_table();
  941     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  942   }
  943 }
  944 
  945 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
  946 {
  947   return MachNode::size(ra_); // too many variables; just compute it
  948                               // the hard way
  949 }
  950 
  951 int MachPrologNode::reloc() const
  952 {
  953   return 0; // a large enough number
  954 }
  955 
  956 //=============================================================================
  957 #ifndef PRODUCT
  958 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
  959 {
  960   Compile* C = ra_->C;
  961   if (generate_vzeroupper(C)) {
  962     st->print("vzeroupper");
  963     st->cr(); st->print("\t");
  964   }
  965 
  966   int framesize = C->output()->frame_size_in_bytes();
  967   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  968   // Remove word for return adr already pushed
  969   // and RBP
  970   framesize -= 2*wordSize;

  978   if (do_polling() && C->is_method_compilation()) {
  979     st->print("\t");
  980     st->print_cr("cmpq     rsp, poll_offset[r15_thread] \n\t"
  981                  "ja       #safepoint_stub\t"
  982                  "# Safepoint: poll for GC");
  983   }
  984 }
  985 #endif
  986 
  987 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
  988 {
  989   Compile* C = ra_->C;
  990   MacroAssembler _masm(&cbuf);
  991 
  992   if (generate_vzeroupper(C)) {
  993     // Clear upper bits of YMM registers when current compiled code uses
  994     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  995     __ vzeroupper();
  996   }
  997 
  998   int framesize = C->output()->frame_size_in_bytes();
  999   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1000   // Remove word for return adr already pushed
 1001   // and RBP
 1002   framesize -= 2*wordSize;
 1003 
 1004   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 1005 
 1006   if (framesize) {
 1007     emit_opcode(cbuf, Assembler::REX_W);
 1008     if (framesize < 0x80) {
 1009       emit_opcode(cbuf, 0x83); // addq rsp, #framesize
 1010       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
 1011       emit_d8(cbuf, framesize);
 1012     } else {
 1013       emit_opcode(cbuf, 0x81); // addq rsp, #framesize
 1014       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
 1015       emit_d32(cbuf, framesize);
 1016     }
 1017   }
 1018 
 1019   // popq rbp
 1020   emit_opcode(cbuf, 0x58 | RBP_enc);
 1021 
 1022   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1023     __ reserved_stack_check();
 1024   }
 1025 
 1026   if (do_polling() && C->is_method_compilation()) {
 1027     MacroAssembler _masm(&cbuf);
 1028     Label dummy_label;
 1029     Label* code_stub = &dummy_label;
 1030     if (!C->output()->in_scratch_emit_size()) {
 1031       code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset());
 1032     }
 1033     __ relocate(relocInfo::poll_return_type);
 1034     __ safepoint_poll(*code_stub, r15_thread, true /* at_return */, true /* in_nmethod */);
 1035   }
 1036 }
 1037 
 1038 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
 1039 {
 1040   return MachNode::size(ra_); // too many variables; just compute it
 1041                               // the hard way
 1042 }
 1043 
 1044 int MachEpilogNode::reloc() const
 1045 {
 1046   return 2; // a large enough number
 1047 }
 1048 
 1049 const Pipeline* MachEpilogNode::pipeline() const
 1050 {
 1051   return MachNode::pipeline_class();
 1052 }
 1053 
 1054 //=============================================================================
 1055 
 1056 enum RC {
 1057   rc_bad,
 1058   rc_int,
 1059   rc_kreg,
 1060   rc_float,
 1061   rc_stack
 1062 };
 1063 

 1656     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
 1657     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
 1658     emit_rm(cbuf, 0x2, reg & 7, 0x04);
 1659     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
 1660     emit_d32(cbuf, offset);
 1661   } else {
 1662     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
 1663     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
 1664     emit_rm(cbuf, 0x1, reg & 7, 0x04);
 1665     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
 1666     emit_d8(cbuf, offset);
 1667   }
 1668 }
 1669 
 1670 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 1671 {
 1672   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1673   return (offset < 0x80) ? 5 : 8; // REX
 1674 }
 1675 






























 1676 //=============================================================================
 1677 #ifndef PRODUCT
 1678 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1679 {
 1680   if (UseCompressedClassPointers) {
 1681     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 1682     st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
 1683     st->print_cr("\tcmpq    rax, rscratch1\t # Inline cache check");
 1684   } else {
 1685     st->print_cr("\tcmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
 1686                  "# Inline cache check");
 1687   }
 1688   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 1689   st->print_cr("\tnop\t# nops to align entry point");
 1690 }
 1691 #endif
 1692 
 1693 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 1694 {
 1695   MacroAssembler masm(&cbuf);

 1698     masm.load_klass(rscratch1, j_rarg0, rscratch2);
 1699     masm.cmpptr(rax, rscratch1);
 1700   } else {
 1701     masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
 1702   }
 1703 
 1704   masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 1705 
 1706   /* WARNING these NOPs are critical so that verified entry point is properly
 1707      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
 1708   int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
 1709   if (OptoBreakpoint) {
 1710     // Leave space for int3
 1711     nops_cnt -= 1;
 1712   }
 1713   nops_cnt &= 0x3; // Do not add nops if code is aligned.
 1714   if (nops_cnt > 0)
 1715     masm.nop(nops_cnt);
 1716 }
 1717 
 1718 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 1719 {
 1720   return MachNode::size(ra_); // too many variables; just compute it
 1721                               // the hard way
 1722 }
 1723 
 1724 
 1725 //=============================================================================
 1726 
 1727 const bool Matcher::supports_vector_calling_convention(void) {
 1728   if (EnableVectorSupport && UseVectorStubs) {
 1729     return true;
 1730   }
 1731   return false;
 1732 }
 1733 
 1734 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1735   assert(EnableVectorSupport && UseVectorStubs, "sanity");
 1736   int lo = XMM0_num;
 1737   int hi = XMM0b_num;
 1738   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 1739   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 1740   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 1741   return OptoRegPair(hi, lo);
 1742 }
 1743 
 1744 // Is this branch offset short enough that a short branch can be used?

 4010   %}
 4011 %}
 4012 
 4013 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 4014 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 4015 %{
 4016   constraint(ALLOC_IN_RC(ptr_reg));
 4017   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 4018   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 4019 
 4020   op_cost(10);
 4021   format %{"[$reg + $off + $idx << $scale]" %}
 4022   interface(MEMORY_INTER) %{
 4023     base($reg);
 4024     index($idx);
 4025     scale($scale);
 4026     disp($off);
 4027   %}
 4028 %}
 4029 
















 4030 // Indirect Narrow Oop Plus Offset Operand
 4031 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 4032 // we can't free r12 even with CompressedOops::base() == NULL.
 4033 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 4034   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 4035   constraint(ALLOC_IN_RC(ptr_reg));
 4036   match(AddP (DecodeN reg) off);
 4037 
 4038   op_cost(10);
 4039   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 4040   interface(MEMORY_INTER) %{
 4041     base(0xc); // R12
 4042     index($reg);
 4043     scale(0x3);
 4044     disp($off);
 4045   %}
 4046 %}
 4047 
 4048 // Indirect Memory Operand
 4049 operand indirectNarrow(rRegN reg)

 4352     equal(0x4, "e");
 4353     not_equal(0x5, "ne");
 4354     less(0x2, "b");
 4355     greater_equal(0x3, "nb");
 4356     less_equal(0x6, "be");
 4357     greater(0x7, "nbe");
 4358     overflow(0x0, "o");
 4359     no_overflow(0x1, "no");
 4360   %}
 4361 %}
 4362 
 4363 //----------OPERAND CLASSES----------------------------------------------------
 4364 // Operand Classes are groups of operands that are used as to simplify
 4365 // instruction definitions by not requiring the AD writer to specify separate
 4366 // instructions for every form of operand when the instruction accepts
 4367 // multiple operand types with the same basic encoding and format.  The classic
 4368 // case of this is memory operands.
 4369 
 4370 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 4371                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 4372                indCompressedOopOffset,
 4373                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 4374                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 4375                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 4376 
 4377 //----------PIPELINE-----------------------------------------------------------
 4378 // Rules which define the behavior of the target architectures pipeline.
 4379 pipeline %{
 4380 
 4381 //----------ATTRIBUTES---------------------------------------------------------
 4382 attributes %{
 4383   variable_size_instructions;        // Fixed size instructions
 4384   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 4385   instruction_unit_size = 1;         // An instruction is 1 bytes long
 4386   instruction_fetch_unit_size = 16;  // The processor fetches one line
 4387   instruction_fetch_units = 1;       // of 16 bytes
 4388 
 4389   // List of nop instructions
 4390   nops( MachNop );
 4391 %}
 4392 

 6846   format %{ "MEMBAR-storestore (empty encoding)" %}
 6847   ins_encode( );
 6848   ins_pipe(empty);
 6849 %}
 6850 
 6851 //----------Move Instructions--------------------------------------------------
 6852 
 6853 instruct castX2P(rRegP dst, rRegL src)
 6854 %{
 6855   match(Set dst (CastX2P src));
 6856 
 6857   format %{ "movq    $dst, $src\t# long->ptr" %}
 6858   ins_encode %{
 6859     if ($dst$$reg != $src$$reg) {
 6860       __ movptr($dst$$Register, $src$$Register);
 6861     }
 6862   %}
 6863   ins_pipe(ialu_reg_reg); // XXX
 6864 %}
 6865 













 6866 instruct castP2X(rRegL dst, rRegP src)
 6867 %{
 6868   match(Set dst (CastP2X src));
 6869 
 6870   format %{ "movq    $dst, $src\t# ptr -> long" %}
 6871   ins_encode %{
 6872     if ($dst$$reg != $src$$reg) {
 6873       __ movptr($dst$$Register, $src$$Register);
 6874     }
 6875   %}
 6876   ins_pipe(ialu_reg_reg); // XXX
 6877 %}
 6878 
 6879 // Convert oop into int for vectors alignment masking
 6880 instruct convP2I(rRegI dst, rRegP src)
 6881 %{
 6882   match(Set dst (ConvL2I (CastP2X src)));
 6883 
 6884   format %{ "movl    $dst, $src\t# ptr -> int" %}
 6885   ins_encode %{

11155   effect(DEF dst, USE src);
11156   ins_cost(100);
11157   format %{ "movd    $dst,$src\t# MoveI2F" %}
11158   ins_encode %{
11159     __ movdl($dst$$XMMRegister, $src$$Register);
11160   %}
11161   ins_pipe( pipe_slow );
11162 %}
11163 
11164 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
11165   match(Set dst (MoveL2D src));
11166   effect(DEF dst, USE src);
11167   ins_cost(100);
11168   format %{ "movd    $dst,$src\t# MoveL2D" %}
11169   ins_encode %{
11170      __ movdq($dst$$XMMRegister, $src$$Register);
11171   %}
11172   ins_pipe( pipe_slow );
11173 %}
11174 

11175 // Fast clearing of an array
11176 // Small ClearArray non-AVX512.
11177 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
11178                   Universe dummy, rFlagsReg cr)
11179 %{
11180   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
11181   match(Set dummy (ClearArray cnt base));
11182   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);





















































































































11183 
11184   format %{ $$template
11185     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11186     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
11187     $$emit$$"jg      LARGE\n\t"
11188     $$emit$$"dec     rcx\n\t"
11189     $$emit$$"js      DONE\t# Zero length\n\t"
11190     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
11191     $$emit$$"dec     rcx\n\t"
11192     $$emit$$"jge     LOOP\n\t"
11193     $$emit$$"jmp     DONE\n\t"
11194     $$emit$$"# LARGE:\n\t"
11195     if (UseFastStosb) {
11196        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11197        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
11198     } else if (UseXMMForObjInit) {
11199        $$emit$$"mov     rdi,rax\n\t"
11200        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11201        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11202        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

11210        $$emit$$"jl      L_tail\n\t"
11211        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11212        $$emit$$"add     0x20,rax\n\t"
11213        $$emit$$"sub     0x4,rcx\n\t"
11214        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11215        $$emit$$"add     0x4,rcx\n\t"
11216        $$emit$$"jle     L_end\n\t"
11217        $$emit$$"dec     rcx\n\t"
11218        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11219        $$emit$$"vmovq   xmm0,(rax)\n\t"
11220        $$emit$$"add     0x8,rax\n\t"
11221        $$emit$$"dec     rcx\n\t"
11222        $$emit$$"jge     L_sloop\n\t"
11223        $$emit$$"# L_end:\n\t"
11224     } else {
11225        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
11226     }
11227     $$emit$$"# DONE"
11228   %}
11229   ins_encode %{
11230     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11231                  $tmp$$XMMRegister, false, knoreg);
11232   %}
11233   ins_pipe(pipe_slow);
11234 %}
11235 
11236 // Small ClearArray AVX512 non-constant length.
11237 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
11238                        Universe dummy, rFlagsReg cr)
11239 %{
11240   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
11241   match(Set dummy (ClearArray cnt base));
11242   ins_cost(125);
11243   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11244 
11245   format %{ $$template
11246     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11247     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
11248     $$emit$$"jg      LARGE\n\t"
11249     $$emit$$"dec     rcx\n\t"
11250     $$emit$$"js      DONE\t# Zero length\n\t"
11251     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
11252     $$emit$$"dec     rcx\n\t"
11253     $$emit$$"jge     LOOP\n\t"
11254     $$emit$$"jmp     DONE\n\t"
11255     $$emit$$"# LARGE:\n\t"
11256     if (UseFastStosb) {
11257        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11258        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
11259     } else if (UseXMMForObjInit) {
11260        $$emit$$"mov     rdi,rax\n\t"
11261        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11262        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11263        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

11271        $$emit$$"jl      L_tail\n\t"
11272        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11273        $$emit$$"add     0x20,rax\n\t"
11274        $$emit$$"sub     0x4,rcx\n\t"
11275        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11276        $$emit$$"add     0x4,rcx\n\t"
11277        $$emit$$"jle     L_end\n\t"
11278        $$emit$$"dec     rcx\n\t"
11279        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11280        $$emit$$"vmovq   xmm0,(rax)\n\t"
11281        $$emit$$"add     0x8,rax\n\t"
11282        $$emit$$"dec     rcx\n\t"
11283        $$emit$$"jge     L_sloop\n\t"
11284        $$emit$$"# L_end:\n\t"
11285     } else {
11286        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
11287     }
11288     $$emit$$"# DONE"
11289   %}
11290   ins_encode %{
11291     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11292                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
11293   %}
11294   ins_pipe(pipe_slow);
11295 %}
11296 
11297 // Large ClearArray non-AVX512.
11298 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
11299                         Universe dummy, rFlagsReg cr)
11300 %{
11301   predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
11302   match(Set dummy (ClearArray cnt base));
11303   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
































































































11304 
11305   format %{ $$template
11306     if (UseFastStosb) {
11307        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11308        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11309        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
11310     } else if (UseXMMForObjInit) {
11311        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
11312        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11313        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11314        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11315        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11316        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
11317        $$emit$$"add     0x40,rax\n\t"
11318        $$emit$$"# L_zero_64_bytes:\n\t"
11319        $$emit$$"sub     0x8,rcx\n\t"
11320        $$emit$$"jge     L_loop\n\t"
11321        $$emit$$"add     0x4,rcx\n\t"
11322        $$emit$$"jl      L_tail\n\t"
11323        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11324        $$emit$$"add     0x20,rax\n\t"
11325        $$emit$$"sub     0x4,rcx\n\t"
11326        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11327        $$emit$$"add     0x4,rcx\n\t"
11328        $$emit$$"jle     L_end\n\t"
11329        $$emit$$"dec     rcx\n\t"
11330        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11331        $$emit$$"vmovq   xmm0,(rax)\n\t"
11332        $$emit$$"add     0x8,rax\n\t"
11333        $$emit$$"dec     rcx\n\t"
11334        $$emit$$"jge     L_sloop\n\t"
11335        $$emit$$"# L_end:\n\t"
11336     } else {
11337        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11338        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11339     }
11340   %}
11341   ins_encode %{
11342     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11343                  $tmp$$XMMRegister, true, knoreg);
11344   %}
11345   ins_pipe(pipe_slow);
11346 %}
11347 
11348 // Large ClearArray AVX512.
11349 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
11350                              Universe dummy, rFlagsReg cr)
11351 %{
11352   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
11353   match(Set dummy (ClearArray cnt base));
11354   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11355 
11356   format %{ $$template
11357     if (UseFastStosb) {
11358        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11359        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11360        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
11361     } else if (UseXMMForObjInit) {
11362        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
11363        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11364        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11365        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11366        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11367        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
11368        $$emit$$"add     0x40,rax\n\t"
11369        $$emit$$"# L_zero_64_bytes:\n\t"
11370        $$emit$$"sub     0x8,rcx\n\t"
11371        $$emit$$"jge     L_loop\n\t"
11372        $$emit$$"add     0x4,rcx\n\t"
11373        $$emit$$"jl      L_tail\n\t"
11374        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11375        $$emit$$"add     0x20,rax\n\t"
11376        $$emit$$"sub     0x4,rcx\n\t"
11377        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11378        $$emit$$"add     0x4,rcx\n\t"
11379        $$emit$$"jle     L_end\n\t"
11380        $$emit$$"dec     rcx\n\t"
11381        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11382        $$emit$$"vmovq   xmm0,(rax)\n\t"
11383        $$emit$$"add     0x8,rax\n\t"
11384        $$emit$$"dec     rcx\n\t"
11385        $$emit$$"jge     L_sloop\n\t"
11386        $$emit$$"# L_end:\n\t"
11387     } else {
11388        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11389        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11390     }
11391   %}
11392   ins_encode %{
11393     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11394                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
11395   %}
11396   ins_pipe(pipe_slow);
11397 %}
11398 
11399 // Small ClearArray AVX512 constant length.
11400 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
11401 %{
11402   predicate(!((ClearArrayNode*)n)->is_large() &&
11403               ((UseAVX > 2) && VM_Version::supports_avx512vlbw()));
11404   match(Set dummy (ClearArray cnt base));
11405   ins_cost(100);
11406   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
11407   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
11408   ins_encode %{
11409    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
11410   %}
11411   ins_pipe(pipe_slow);
11412 %}
11413 
11414 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11415                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
11416 %{
11417   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11418   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11419   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11420 
11421   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11422   ins_encode %{
11423     __ string_compare($str1$$Register, $str2$$Register,
11424                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11425                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
11426   %}
11427   ins_pipe( pipe_slow );
11428 %}
11429 

13217 
13218   ins_cost(300);
13219   format %{ "call_leaf,vector " %}
13220   ins_encode(Java_To_Runtime(meth));
13221   ins_pipe(pipe_slow);
13222 %}
13223 
13224 //
13225 instruct CallNativeDirect(method meth)
13226 %{
13227   match(CallNative);
13228   effect(USE meth);
13229 
13230   ins_cost(300);
13231   format %{ "call_native " %}
13232   ins_encode(clear_avx, Java_To_Runtime(meth));
13233   ins_pipe(pipe_slow);
13234 %}
13235 
13236 // Call runtime without safepoint















13237 instruct CallLeafNoFPDirect(method meth)
13238 %{

13239   match(CallLeafNoFP);
13240   effect(USE meth);
13241 
13242   ins_cost(300);
13243   format %{ "call_leaf_nofp,runtime " %}
13244   ins_encode(clear_avx, Java_To_Runtime(meth));
13245   ins_pipe(pipe_slow);
13246 %}
13247 
13248 // Return Instruction
13249 // Remove the return address & jump to it.
13250 // Notice: We always emit a nop after a ret to make sure there is room
13251 // for safepoint patching
13252 instruct Ret()
13253 %{
13254   match(Return);
13255 
13256   format %{ "ret" %}
13257   ins_encode %{
13258     __ ret(0);

  475 }
  476 
  477 // !!!!! Special hack to get all types of calls to specify the byte offset
  478 //       from the start of the call to the point where the return address
  479 //       will point.
  480 int MachCallStaticJavaNode::ret_addr_offset()
  481 {
  482   int offset = 5; // 5 bytes from start of call to where return address points
  483   offset += clear_avx_size();
  484   return offset;
  485 }
  486 
  487 int MachCallDynamicJavaNode::ret_addr_offset()
  488 {
  489   int offset = 15; // 15 bytes from start of call to where return address points
  490   offset += clear_avx_size();
  491   return offset;
  492 }
  493 
  494 int MachCallRuntimeNode::ret_addr_offset() {
  495   if (_entry_point == NULL) {
  496     // CallLeafNoFPInDirect
  497     return 3; // callq (register)
  498   }
  499   int offset = 13; // movq r10,#addr; callq (r10)
  500   if (this->ideal_Opcode() != Op_CallLeafVector) {
  501     offset += clear_avx_size();
  502   }
  503   return offset;
  504 }
  505 
  506 int MachCallNativeNode::ret_addr_offset() {
  507   int offset = 13; // movq r10,#addr; callq (r10)
  508   offset += clear_avx_size();
  509   return offset;
  510 }
  511 
  512 //
  513 // Compute padding required for nodes which need alignment
  514 //
  515 
  516 // The address of the call instruction needs to be 4-byte aligned to
  517 // ensure that it does not span a cache line so that it can be patched.
  518 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
  519 {
  520   current_offset += clear_avx_size(); // skip vzeroupper
  521   current_offset += 1; // skip call opcode byte
  522   return align_up(current_offset, alignment_required()) - current_offset;
  523 }
  524 
  525 // The address of the call instruction needs to be 4-byte aligned to
  526 // ensure that it does not span a cache line so that it can be patched.
  527 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
  528 {
  529   current_offset += clear_avx_size(); // skip vzeroupper
  530   current_offset += 11; // skip movq instruction + call opcode byte
  531   return align_up(current_offset, alignment_required()) - current_offset;

  900     st->print("# stack alignment check");
  901 #endif
  902   }
  903   if (C->stub_function() != NULL && BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
  904     st->print("\n\t");
  905     st->print("cmpl    [r15_thread + #disarmed_offset], #disarmed_value\t");
  906     st->print("\n\t");
  907     st->print("je      fast_entry\t");
  908     st->print("\n\t");
  909     st->print("call    #nmethod_entry_barrier_stub\t");
  910     st->print("\n\tfast_entry:");
  911   }
  912   st->cr();
  913 }
  914 #endif
  915 
  916 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  917   Compile* C = ra_->C;
  918   MacroAssembler _masm(&cbuf);
  919 



  920   if (C->clinit_barrier_on_entry()) {
  921     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
  922     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
  923 
  924     Label L_skip_barrier;
  925     Register klass = rscratch1;
  926 
  927     __ mov_metadata(klass, C->method()->holder()->constant_encoding());
  928     __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/);
  929 
  930     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
  931 
  932     __ bind(L_skip_barrier);
  933   }
  934 
  935   __ verified_entry(C);
  936   __ bind(*_verified_entry);
  937 
  938   if (C->stub_function() == NULL) {
  939     BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
  940     bs->nmethod_entry_barrier(&_masm);
  941   }
  942 
  943   C->output()->set_frame_complete(cbuf.insts_size());
  944 
  945   if (C->has_mach_constant_base_node()) {
  946     // NOTE: We set the table base offset here because users might be
  947     // emitted before MachConstantBaseNode.
  948     ConstantTable& constant_table = C->output()->constant_table();
  949     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  950   }
  951 }
  952 






  953 int MachPrologNode::reloc() const
  954 {
  955   return 0; // a large enough number
  956 }
  957 
  958 //=============================================================================
  959 #ifndef PRODUCT
  960 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
  961 {
  962   Compile* C = ra_->C;
  963   if (generate_vzeroupper(C)) {
  964     st->print("vzeroupper");
  965     st->cr(); st->print("\t");
  966   }
  967 
  968   int framesize = C->output()->frame_size_in_bytes();
  969   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  970   // Remove word for return adr already pushed
  971   // and RBP
  972   framesize -= 2*wordSize;

  980   if (do_polling() && C->is_method_compilation()) {
  981     st->print("\t");
  982     st->print_cr("cmpq     rsp, poll_offset[r15_thread] \n\t"
  983                  "ja       #safepoint_stub\t"
  984                  "# Safepoint: poll for GC");
  985   }
  986 }
  987 #endif
  988 
  989 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
  990 {
  991   Compile* C = ra_->C;
  992   MacroAssembler _masm(&cbuf);
  993 
  994   if (generate_vzeroupper(C)) {
  995     // Clear upper bits of YMM registers when current compiled code uses
  996     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  997     __ vzeroupper();
  998   }
  999 
 1000   // Subtract two words to account for return address and rbp
 1001   int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
 1002   __ remove_frame(initial_framesize, C->needs_stack_repair());




















 1003 
 1004   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1005     __ reserved_stack_check();
 1006   }
 1007 
 1008   if (do_polling() && C->is_method_compilation()) {
 1009     MacroAssembler _masm(&cbuf);
 1010     Label dummy_label;
 1011     Label* code_stub = &dummy_label;
 1012     if (!C->output()->in_scratch_emit_size()) {
 1013       code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset());
 1014     }
 1015     __ relocate(relocInfo::poll_return_type);
 1016     __ safepoint_poll(*code_stub, r15_thread, true /* at_return */, true /* in_nmethod */);
 1017   }
 1018 }
 1019 






 1020 int MachEpilogNode::reloc() const
 1021 {
 1022   return 2; // a large enough number
 1023 }
 1024 
 1025 const Pipeline* MachEpilogNode::pipeline() const
 1026 {
 1027   return MachNode::pipeline_class();
 1028 }
 1029 
 1030 //=============================================================================
 1031 
 1032 enum RC {
 1033   rc_bad,
 1034   rc_int,
 1035   rc_kreg,
 1036   rc_float,
 1037   rc_stack
 1038 };
 1039 

 1632     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
 1633     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
 1634     emit_rm(cbuf, 0x2, reg & 7, 0x04);
 1635     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
 1636     emit_d32(cbuf, offset);
 1637   } else {
 1638     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
 1639     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
 1640     emit_rm(cbuf, 0x1, reg & 7, 0x04);
 1641     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
 1642     emit_d8(cbuf, offset);
 1643   }
 1644 }
 1645 
 1646 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 1647 {
 1648   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1649   return (offset < 0x80) ? 5 : 8; // REX
 1650 }
 1651 
 1652 //=============================================================================
 1653 #ifndef PRODUCT
 1654 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1655 {
 1656   st->print_cr("MachVEPNode");
 1657 }
 1658 #endif
 1659 
 1660 void MachVEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 1661 {
 1662   MacroAssembler _masm(&cbuf);
 1663   if (!_verified) {
 1664     uint insts_size = cbuf.insts_size();
 1665     if (UseCompressedClassPointers) {
 1666       __ load_klass(rscratch1, j_rarg0, rscratch2);
 1667       __ cmpptr(rax, rscratch1);
 1668     } else {
 1669       __ cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
 1670     }
 1671     __ jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 1672   } else {
 1673     // Unpack inline type args passed as oop and then jump to
 1674     // the verified entry point (skipping the unverified entry).
 1675     int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
 1676     // Emit code for verified entry and save increment for stack repair on return
 1677     __ verified_entry(ra_->C, sp_inc);
 1678     __ jmp(*_verified_entry);
 1679   }
 1680 }
 1681 
 1682 //=============================================================================
 1683 #ifndef PRODUCT
 1684 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1685 {
 1686   if (UseCompressedClassPointers) {
 1687     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 1688     st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
 1689     st->print_cr("\tcmpq    rax, rscratch1\t # Inline cache check");
 1690   } else {
 1691     st->print_cr("\tcmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
 1692                  "# Inline cache check");
 1693   }
 1694   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 1695   st->print_cr("\tnop\t# nops to align entry point");
 1696 }
 1697 #endif
 1698 
 1699 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 1700 {
 1701   MacroAssembler masm(&cbuf);

 1704     masm.load_klass(rscratch1, j_rarg0, rscratch2);
 1705     masm.cmpptr(rax, rscratch1);
 1706   } else {
 1707     masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
 1708   }
 1709 
 1710   masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 1711 
 1712   /* WARNING these NOPs are critical so that verified entry point is properly
 1713      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
 1714   int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
 1715   if (OptoBreakpoint) {
 1716     // Leave space for int3
 1717     nops_cnt -= 1;
 1718   }
 1719   nops_cnt &= 0x3; // Do not add nops if code is aligned.
 1720   if (nops_cnt > 0)
 1721     masm.nop(nops_cnt);
 1722 }
 1723 







 1724 //=============================================================================
 1725 
 1726 const bool Matcher::supports_vector_calling_convention(void) {
 1727   if (EnableVectorSupport && UseVectorStubs) {
 1728     return true;
 1729   }
 1730   return false;
 1731 }
 1732 
 1733 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1734   assert(EnableVectorSupport && UseVectorStubs, "sanity");
 1735   int lo = XMM0_num;
 1736   int hi = XMM0b_num;
 1737   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 1738   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 1739   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 1740   return OptoRegPair(hi, lo);
 1741 }
 1742 
 1743 // Is this branch offset short enough that a short branch can be used?

 4009   %}
 4010 %}
 4011 
 4012 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 4013 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 4014 %{
 4015   constraint(ALLOC_IN_RC(ptr_reg));
 4016   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 4017   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 4018 
 4019   op_cost(10);
 4020   format %{"[$reg + $off + $idx << $scale]" %}
 4021   interface(MEMORY_INTER) %{
 4022     base($reg);
 4023     index($idx);
 4024     scale($scale);
 4025     disp($off);
 4026   %}
 4027 %}
 4028 
 4029 // Indirect Narrow Oop Operand
 4030 operand indCompressedOop(rRegN reg) %{
 4031   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 4032   constraint(ALLOC_IN_RC(ptr_reg));
 4033   match(DecodeN reg);
 4034 
 4035   op_cost(10);
 4036   format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
 4037   interface(MEMORY_INTER) %{
 4038     base(0xc); // R12
 4039     index($reg);
 4040     scale(0x3);
 4041     disp(0x0);
 4042   %}
 4043 %}
 4044 
 4045 // Indirect Narrow Oop Plus Offset Operand
 4046 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 4047 // we can't free r12 even with CompressedOops::base() == NULL.
 4048 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 4049   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 4050   constraint(ALLOC_IN_RC(ptr_reg));
 4051   match(AddP (DecodeN reg) off);
 4052 
 4053   op_cost(10);
 4054   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 4055   interface(MEMORY_INTER) %{
 4056     base(0xc); // R12
 4057     index($reg);
 4058     scale(0x3);
 4059     disp($off);
 4060   %}
 4061 %}
 4062 
 4063 // Indirect Memory Operand
 4064 operand indirectNarrow(rRegN reg)

 4367     equal(0x4, "e");
 4368     not_equal(0x5, "ne");
 4369     less(0x2, "b");
 4370     greater_equal(0x3, "nb");
 4371     less_equal(0x6, "be");
 4372     greater(0x7, "nbe");
 4373     overflow(0x0, "o");
 4374     no_overflow(0x1, "no");
 4375   %}
 4376 %}
 4377 
 4378 //----------OPERAND CLASSES----------------------------------------------------
 4379 // Operand Classes are groups of operands that are used as to simplify
 4380 // instruction definitions by not requiring the AD writer to specify separate
 4381 // instructions for every form of operand when the instruction accepts
 4382 // multiple operand types with the same basic encoding and format.  The classic
 4383 // case of this is memory operands.
 4384 
 4385 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 4386                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 4387                indCompressedOop, indCompressedOopOffset,
 4388                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 4389                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 4390                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 4391 
 4392 //----------PIPELINE-----------------------------------------------------------
 4393 // Rules which define the behavior of the target architectures pipeline.
 4394 pipeline %{
 4395 
 4396 //----------ATTRIBUTES---------------------------------------------------------
 4397 attributes %{
 4398   variable_size_instructions;        // Fixed size instructions
 4399   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 4400   instruction_unit_size = 1;         // An instruction is 1 bytes long
 4401   instruction_fetch_unit_size = 16;  // The processor fetches one line
 4402   instruction_fetch_units = 1;       // of 16 bytes
 4403 
 4404   // List of nop instructions
 4405   nops( MachNop );
 4406 %}
 4407 

 6861   format %{ "MEMBAR-storestore (empty encoding)" %}
 6862   ins_encode( );
 6863   ins_pipe(empty);
 6864 %}
 6865 
 6866 //----------Move Instructions--------------------------------------------------
 6867 
 6868 instruct castX2P(rRegP dst, rRegL src)
 6869 %{
 6870   match(Set dst (CastX2P src));
 6871 
 6872   format %{ "movq    $dst, $src\t# long->ptr" %}
 6873   ins_encode %{
 6874     if ($dst$$reg != $src$$reg) {
 6875       __ movptr($dst$$Register, $src$$Register);
 6876     }
 6877   %}
 6878   ins_pipe(ialu_reg_reg); // XXX
 6879 %}
 6880 
 6881 instruct castN2X(rRegL dst, rRegN src)
 6882 %{
 6883   match(Set dst (CastP2X src));
 6884 
 6885   format %{ "movq    $dst, $src\t# ptr -> long" %}
 6886   ins_encode %{
 6887     if ($dst$$reg != $src$$reg) {
 6888       __ movptr($dst$$Register, $src$$Register);
 6889     }
 6890   %}
 6891   ins_pipe(ialu_reg_reg); // XXX
 6892 %}
 6893 
 6894 instruct castP2X(rRegL dst, rRegP src)
 6895 %{
 6896   match(Set dst (CastP2X src));
 6897 
 6898   format %{ "movq    $dst, $src\t# ptr -> long" %}
 6899   ins_encode %{
 6900     if ($dst$$reg != $src$$reg) {
 6901       __ movptr($dst$$Register, $src$$Register);
 6902     }
 6903   %}
 6904   ins_pipe(ialu_reg_reg); // XXX
 6905 %}
 6906 
 6907 // Convert oop into int for vectors alignment masking
 6908 instruct convP2I(rRegI dst, rRegP src)
 6909 %{
 6910   match(Set dst (ConvL2I (CastP2X src)));
 6911 
 6912   format %{ "movl    $dst, $src\t# ptr -> int" %}
 6913   ins_encode %{

11183   effect(DEF dst, USE src);
11184   ins_cost(100);
11185   format %{ "movd    $dst,$src\t# MoveI2F" %}
11186   ins_encode %{
11187     __ movdl($dst$$XMMRegister, $src$$Register);
11188   %}
11189   ins_pipe( pipe_slow );
11190 %}
11191 
11192 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
11193   match(Set dst (MoveL2D src));
11194   effect(DEF dst, USE src);
11195   ins_cost(100);
11196   format %{ "movd    $dst,$src\t# MoveL2D" %}
11197   ins_encode %{
11198      __ movdq($dst$$XMMRegister, $src$$Register);
11199   %}
11200   ins_pipe( pipe_slow );
11201 %}
11202 
11203 
11204 // Fast clearing of an array
11205 // Small ClearArray non-AVX512.
11206 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
11207                   Universe dummy, rFlagsReg cr)
11208 %{
11209   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
11210   match(Set dummy (ClearArray (Binary cnt base) val));
11211   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
11212 
11213   format %{ $$template
11214     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
11215     $$emit$$"jg      LARGE\n\t"
11216     $$emit$$"dec     rcx\n\t"
11217     $$emit$$"js      DONE\t# Zero length\n\t"
11218     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
11219     $$emit$$"dec     rcx\n\t"
11220     $$emit$$"jge     LOOP\n\t"
11221     $$emit$$"jmp     DONE\n\t"
11222     $$emit$$"# LARGE:\n\t"
11223     if (UseFastStosb) {
11224        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11225        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
11226     } else if (UseXMMForObjInit) {
11227        $$emit$$"movdq   $tmp, $val\n\t"
11228        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
11229        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
11230        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11231        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11232        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11233        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
11234        $$emit$$"add     0x40,rax\n\t"
11235        $$emit$$"# L_zero_64_bytes:\n\t"
11236        $$emit$$"sub     0x8,rcx\n\t"
11237        $$emit$$"jge     L_loop\n\t"
11238        $$emit$$"add     0x4,rcx\n\t"
11239        $$emit$$"jl      L_tail\n\t"
11240        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11241        $$emit$$"add     0x20,rax\n\t"
11242        $$emit$$"sub     0x4,rcx\n\t"
11243        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11244        $$emit$$"add     0x4,rcx\n\t"
11245        $$emit$$"jle     L_end\n\t"
11246        $$emit$$"dec     rcx\n\t"
11247        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11248        $$emit$$"vmovq   xmm0,(rax)\n\t"
11249        $$emit$$"add     0x8,rax\n\t"
11250        $$emit$$"dec     rcx\n\t"
11251        $$emit$$"jge     L_sloop\n\t"
11252        $$emit$$"# L_end:\n\t"
11253     } else {
11254        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
11255     }
11256     $$emit$$"# DONE"
11257   %}
11258   ins_encode %{
11259     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11260                  $tmp$$XMMRegister, false, false);
11261   %}
11262   ins_pipe(pipe_slow);
11263 %}
11264 
11265 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
11266                             Universe dummy, rFlagsReg cr)
11267 %{
11268   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
11269   match(Set dummy (ClearArray (Binary cnt base) val));
11270   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
11271 
11272   format %{ $$template
11273     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
11274     $$emit$$"jg      LARGE\n\t"
11275     $$emit$$"dec     rcx\n\t"
11276     $$emit$$"js      DONE\t# Zero length\n\t"
11277     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
11278     $$emit$$"dec     rcx\n\t"
11279     $$emit$$"jge     LOOP\n\t"
11280     $$emit$$"jmp     DONE\n\t"
11281     $$emit$$"# LARGE:\n\t"
11282     if (UseXMMForObjInit) {
11283        $$emit$$"movdq   $tmp, $val\n\t"
11284        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
11285        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
11286        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11287        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11288        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11289        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
11290        $$emit$$"add     0x40,rax\n\t"
11291        $$emit$$"# L_zero_64_bytes:\n\t"
11292        $$emit$$"sub     0x8,rcx\n\t"
11293        $$emit$$"jge     L_loop\n\t"
11294        $$emit$$"add     0x4,rcx\n\t"
11295        $$emit$$"jl      L_tail\n\t"
11296        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11297        $$emit$$"add     0x20,rax\n\t"
11298        $$emit$$"sub     0x4,rcx\n\t"
11299        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11300        $$emit$$"add     0x4,rcx\n\t"
11301        $$emit$$"jle     L_end\n\t"
11302        $$emit$$"dec     rcx\n\t"
11303        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11304        $$emit$$"vmovq   xmm0,(rax)\n\t"
11305        $$emit$$"add     0x8,rax\n\t"
11306        $$emit$$"dec     rcx\n\t"
11307        $$emit$$"jge     L_sloop\n\t"
11308        $$emit$$"# L_end:\n\t"
11309     } else {
11310        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
11311     }
11312     $$emit$$"# DONE"
11313   %}
11314   ins_encode %{
11315     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11316                  $tmp$$XMMRegister, false, true);
11317   %}
11318   ins_pipe(pipe_slow);
11319 %}
11320 
11321 // Small ClearArray AVX512 non-constant length.
11322 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
11323                        Universe dummy, rFlagsReg cr)
11324 %{
11325   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
11326   match(Set dummy (ClearArray (Binary cnt base) val));
11327   ins_cost(125);
11328   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
11329 
11330   format %{ $$template
11331     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11332     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
11333     $$emit$$"jg      LARGE\n\t"
11334     $$emit$$"dec     rcx\n\t"
11335     $$emit$$"js      DONE\t# Zero length\n\t"
11336     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
11337     $$emit$$"dec     rcx\n\t"
11338     $$emit$$"jge     LOOP\n\t"
11339     $$emit$$"jmp     DONE\n\t"
11340     $$emit$$"# LARGE:\n\t"
11341     if (UseFastStosb) {
11342        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11343        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
11344     } else if (UseXMMForObjInit) {
11345        $$emit$$"mov     rdi,rax\n\t"
11346        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11347        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11348        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

11356        $$emit$$"jl      L_tail\n\t"
11357        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11358        $$emit$$"add     0x20,rax\n\t"
11359        $$emit$$"sub     0x4,rcx\n\t"
11360        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11361        $$emit$$"add     0x4,rcx\n\t"
11362        $$emit$$"jle     L_end\n\t"
11363        $$emit$$"dec     rcx\n\t"
11364        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11365        $$emit$$"vmovq   xmm0,(rax)\n\t"
11366        $$emit$$"add     0x8,rax\n\t"
11367        $$emit$$"dec     rcx\n\t"
11368        $$emit$$"jge     L_sloop\n\t"
11369        $$emit$$"# L_end:\n\t"
11370     } else {
11371        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
11372     }
11373     $$emit$$"# DONE"
11374   %}
11375   ins_encode %{
11376     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11377                  $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
11378   %}
11379   ins_pipe(pipe_slow);
11380 %}
11381 
11382 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
11383                                  Universe dummy, rFlagsReg cr)

11384 %{
11385   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
11386   match(Set dummy (ClearArray (Binary cnt base) val));
11387   ins_cost(125);
11388   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
11389 
11390   format %{ $$template
11391     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11392     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
11393     $$emit$$"jg      LARGE\n\t"
11394     $$emit$$"dec     rcx\n\t"
11395     $$emit$$"js      DONE\t# Zero length\n\t"
11396     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
11397     $$emit$$"dec     rcx\n\t"
11398     $$emit$$"jge     LOOP\n\t"
11399     $$emit$$"jmp     DONE\n\t"
11400     $$emit$$"# LARGE:\n\t"
11401     if (UseFastStosb) {
11402        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11403        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
11404     } else if (UseXMMForObjInit) {
11405        $$emit$$"mov     rdi,rax\n\t"
11406        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11407        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11408        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

11416        $$emit$$"jl      L_tail\n\t"
11417        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11418        $$emit$$"add     0x20,rax\n\t"
11419        $$emit$$"sub     0x4,rcx\n\t"
11420        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11421        $$emit$$"add     0x4,rcx\n\t"
11422        $$emit$$"jle     L_end\n\t"
11423        $$emit$$"dec     rcx\n\t"
11424        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11425        $$emit$$"vmovq   xmm0,(rax)\n\t"
11426        $$emit$$"add     0x8,rax\n\t"
11427        $$emit$$"dec     rcx\n\t"
11428        $$emit$$"jge     L_sloop\n\t"
11429        $$emit$$"# L_end:\n\t"
11430     } else {
11431        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
11432     }
11433     $$emit$$"# DONE"
11434   %}
11435   ins_encode %{
11436     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11437                  $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
11438   %}
11439   ins_pipe(pipe_slow);
11440 %}
11441 
11442 // Large ClearArray non-AVX512.
11443 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
11444                         Universe dummy, rFlagsReg cr)
11445 %{
11446   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
11447   match(Set dummy (ClearArray (Binary cnt base) val));
11448   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
11449 
11450   format %{ $$template
11451     if (UseFastStosb) {
11452        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11453        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
11454     } else if (UseXMMForObjInit) {
11455        $$emit$$"movdq   $tmp, $val\n\t"
11456        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
11457        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
11458        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11459        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11460        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11461        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
11462        $$emit$$"add     0x40,rax\n\t"
11463        $$emit$$"# L_zero_64_bytes:\n\t"
11464        $$emit$$"sub     0x8,rcx\n\t"
11465        $$emit$$"jge     L_loop\n\t"
11466        $$emit$$"add     0x4,rcx\n\t"
11467        $$emit$$"jl      L_tail\n\t"
11468        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11469        $$emit$$"add     0x20,rax\n\t"
11470        $$emit$$"sub     0x4,rcx\n\t"
11471        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11472        $$emit$$"add     0x4,rcx\n\t"
11473        $$emit$$"jle     L_end\n\t"
11474        $$emit$$"dec     rcx\n\t"
11475        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11476        $$emit$$"vmovq   xmm0,(rax)\n\t"
11477        $$emit$$"add     0x8,rax\n\t"
11478        $$emit$$"dec     rcx\n\t"
11479        $$emit$$"jge     L_sloop\n\t"
11480        $$emit$$"# L_end:\n\t"
11481     } else {
11482        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11483     }
11484   %}
11485   ins_encode %{
11486     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11487                  $tmp$$XMMRegister, true, false);
11488   %}
11489   ins_pipe(pipe_slow);
11490 %}
11491 
11492 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
11493                                   Universe dummy, rFlagsReg cr)
11494 %{
11495   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
11496   match(Set dummy (ClearArray (Binary cnt base) val));
11497   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
11498 
11499   format %{ $$template
11500     if (UseXMMForObjInit) {
11501        $$emit$$"movdq   $tmp, $val\n\t"
11502        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
11503        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
11504        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11505        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11506        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11507        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
11508        $$emit$$"add     0x40,rax\n\t"
11509        $$emit$$"# L_zero_64_bytes:\n\t"
11510        $$emit$$"sub     0x8,rcx\n\t"
11511        $$emit$$"jge     L_loop\n\t"
11512        $$emit$$"add     0x4,rcx\n\t"
11513        $$emit$$"jl      L_tail\n\t"
11514        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11515        $$emit$$"add     0x20,rax\n\t"
11516        $$emit$$"sub     0x4,rcx\n\t"
11517        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11518        $$emit$$"add     0x4,rcx\n\t"
11519        $$emit$$"jle     L_end\n\t"
11520        $$emit$$"dec     rcx\n\t"
11521        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11522        $$emit$$"vmovq   xmm0,(rax)\n\t"
11523        $$emit$$"add     0x8,rax\n\t"
11524        $$emit$$"dec     rcx\n\t"
11525        $$emit$$"jge     L_sloop\n\t"
11526        $$emit$$"# L_end:\n\t"
11527     } else {
11528        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11529     }
11530   %}
11531   ins_encode %{
11532     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11533                  $tmp$$XMMRegister, true, true);
11534   %}
11535   ins_pipe(pipe_slow);
11536 %}
11537 
11538 // Large ClearArray AVX512.
11539 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
11540                              Universe dummy, rFlagsReg cr)
11541 %{
11542   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
11543   match(Set dummy (ClearArray (Binary cnt base) val));
11544   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
11545 
11546   format %{ $$template
11547     if (UseFastStosb) {
11548        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11549        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11550        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
11551     } else if (UseXMMForObjInit) {
11552        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
11553        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11554        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11555        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11556        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11557        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
11558        $$emit$$"add     0x40,rax\n\t"
11559        $$emit$$"# L_zero_64_bytes:\n\t"
11560        $$emit$$"sub     0x8,rcx\n\t"
11561        $$emit$$"jge     L_loop\n\t"
11562        $$emit$$"add     0x4,rcx\n\t"
11563        $$emit$$"jl      L_tail\n\t"
11564        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11565        $$emit$$"add     0x20,rax\n\t"
11566        $$emit$$"sub     0x4,rcx\n\t"
11567        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11568        $$emit$$"add     0x4,rcx\n\t"
11569        $$emit$$"jle     L_end\n\t"
11570        $$emit$$"dec     rcx\n\t"
11571        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11572        $$emit$$"vmovq   xmm0,(rax)\n\t"
11573        $$emit$$"add     0x8,rax\n\t"
11574        $$emit$$"dec     rcx\n\t"
11575        $$emit$$"jge     L_sloop\n\t"
11576        $$emit$$"# L_end:\n\t"
11577     } else {
11578        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11579        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11580     }
11581   %}
11582   ins_encode %{
11583     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11584                  $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
11585   %}
11586   ins_pipe(pipe_slow);
11587 %}
11588 
11589 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
11590                                        Universe dummy, rFlagsReg cr)

11591 %{
11592   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
11593   match(Set dummy (ClearArray (Binary cnt base) val));
11594   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
11595 
11596   format %{ $$template
11597     if (UseFastStosb) {
11598        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11599        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11600        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
11601     } else if (UseXMMForObjInit) {
11602        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
11603        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11604        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11605        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11606        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11607        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
11608        $$emit$$"add     0x40,rax\n\t"
11609        $$emit$$"# L_zero_64_bytes:\n\t"
11610        $$emit$$"sub     0x8,rcx\n\t"
11611        $$emit$$"jge     L_loop\n\t"
11612        $$emit$$"add     0x4,rcx\n\t"
11613        $$emit$$"jl      L_tail\n\t"
11614        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11615        $$emit$$"add     0x20,rax\n\t"
11616        $$emit$$"sub     0x4,rcx\n\t"
11617        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11618        $$emit$$"add     0x4,rcx\n\t"
11619        $$emit$$"jle     L_end\n\t"
11620        $$emit$$"dec     rcx\n\t"
11621        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11622        $$emit$$"vmovq   xmm0,(rax)\n\t"
11623        $$emit$$"add     0x8,rax\n\t"
11624        $$emit$$"dec     rcx\n\t"
11625        $$emit$$"jge     L_sloop\n\t"
11626        $$emit$$"# L_end:\n\t"
11627     } else {
11628        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11629        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11630     }
11631   %}
11632   ins_encode %{
11633     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11634                  $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
11635   %}
11636   ins_pipe(pipe_slow);
11637 %}
11638 
11639 // Small ClearArray AVX512 constant length.
11640 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
11641 %{
11642   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
11643             ((UseAVX > 2) && VM_Version::supports_avx512vlbw()));
11644   match(Set dummy (ClearArray (Binary cnt base) val));
11645   ins_cost(100);
11646   effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
11647   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
11648   ins_encode %{
11649     __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
11650   %}
11651   ins_pipe(pipe_slow);
11652 %}
11653 
11654 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11655                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
11656 %{
11657   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11658   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11659   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11660 
11661   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11662   ins_encode %{
11663     __ string_compare($str1$$Register, $str2$$Register,
11664                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11665                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
11666   %}
11667   ins_pipe( pipe_slow );
11668 %}
11669 

13457 
13458   ins_cost(300);
13459   format %{ "call_leaf,vector " %}
13460   ins_encode(Java_To_Runtime(meth));
13461   ins_pipe(pipe_slow);
13462 %}
13463 
13464 //
13465 instruct CallNativeDirect(method meth)
13466 %{
13467   match(CallNative);
13468   effect(USE meth);
13469 
13470   ins_cost(300);
13471   format %{ "call_native " %}
13472   ins_encode(clear_avx, Java_To_Runtime(meth));
13473   ins_pipe(pipe_slow);
13474 %}
13475 
13476 // Call runtime without safepoint
13477 // entry point is null, target holds the address to call
13478 instruct CallLeafNoFPInDirect(rRegP target)
13479 %{
13480   predicate(n->as_Call()->entry_point() == NULL);
13481   match(CallLeafNoFP target);
13482 
13483   ins_cost(300);
13484   format %{ "call_leaf_nofp,runtime indirect " %}
13485   ins_encode %{
13486      __ call($target$$Register);
13487   %}
13488 
13489   ins_pipe(pipe_slow);
13490 %}
13491 
13492 instruct CallLeafNoFPDirect(method meth)
13493 %{
13494   predicate(n->as_Call()->entry_point() != NULL);
13495   match(CallLeafNoFP);
13496   effect(USE meth);
13497 
13498   ins_cost(300);
13499   format %{ "call_leaf_nofp,runtime " %}
13500   ins_encode(clear_avx, Java_To_Runtime(meth));
13501   ins_pipe(pipe_slow);
13502 %}
13503 
13504 // Return Instruction
13505 // Remove the return address & jump to it.
13506 // Notice: We always emit a nop after a ret to make sure there is room
13507 // for safepoint patching
13508 instruct Ret()
13509 %{
13510   match(Return);
13511 
13512   format %{ "ret" %}
13513   ins_encode %{
13514     __ ret(0);
< prev index next >