< prev index next >

src/hotspot/cpu/x86/x86_64.ad

Print this page

  598 }
  599 
  600 // !!!!! Special hack to get all types of calls to specify the byte offset
  601 //       from the start of the call to the point where the return address
  602 //       will point.
  603 int MachCallStaticJavaNode::ret_addr_offset()
  604 {
  605   int offset = 5; // 5 bytes from start of call to where return address points
  606   offset += clear_avx_size();
  607   return offset;
  608 }
  609 
  610 int MachCallDynamicJavaNode::ret_addr_offset()
  611 {
  612   int offset = 15; // 15 bytes from start of call to where return address points
  613   offset += clear_avx_size();
  614   return offset;
  615 }
  616 
  617 int MachCallRuntimeNode::ret_addr_offset() {




  618   int offset = 13; // movq r10,#addr; callq (r10)
  619   if (this->ideal_Opcode() != Op_CallLeafVector) {
  620     offset += clear_avx_size();
  621   }
  622   return offset;
  623 }

  624 //
  625 // Compute padding required for nodes which need alignment
  626 //
  627 
  628 // The address of the call instruction needs to be 4-byte aligned to
  629 // ensure that it does not span a cache line so that it can be patched.
  630 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
  631 {
  632   current_offset += clear_avx_size(); // skip vzeroupper
  633   current_offset += 1; // skip call opcode byte
  634   return align_up(current_offset, alignment_required()) - current_offset;
  635 }
  636 
  637 // The address of the call instruction needs to be 4-byte aligned to
  638 // ensure that it does not span a cache line so that it can be patched.
  639 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
  640 {
  641   current_offset += clear_avx_size(); // skip vzeroupper
  642   current_offset += 11; // skip movq instruction + call opcode byte
  643   return align_up(current_offset, alignment_required()) - current_offset;

  829     st->print("\n\t");
  830     st->print("# stack alignment check");
  831 #endif
  832   }
  833   if (C->stub_function() != nullptr) {
  834     st->print("\n\t");
  835     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
  836     st->print("\n\t");
  837     st->print("je      fast_entry\t");
  838     st->print("\n\t");
  839     st->print("call    #nmethod_entry_barrier_stub\t");
  840     st->print("\n\tfast_entry:");
  841   }
  842   st->cr();
  843 }
  844 #endif
  845 
  846 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
  847   Compile* C = ra_->C;
  848 
  849   int framesize = C->output()->frame_size_in_bytes();
  850   int bangsize = C->output()->bang_size_in_bytes();
  851 
  852   if (C->clinit_barrier_on_entry()) {
  853     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
  854     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
  855 
  856     Label L_skip_barrier;
  857     Register klass = rscratch1;
  858 
  859     __ mov_metadata(klass, C->method()->holder()->constant_encoding());
  860     __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
  861 
  862     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
  863 
  864     __ bind(L_skip_barrier);

  865   }
  866 
  867   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);


  868 
  869   C->output()->set_frame_complete(__ offset());
  870 
  871   if (C->has_mach_constant_base_node()) {
  872     // NOTE: We set the table base offset here because users might be
  873     // emitted before MachConstantBaseNode.
  874     ConstantTable& constant_table = C->output()->constant_table();
  875     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  876   }
  877 }
  878 
  879 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
  880 {
  881   return MachNode::size(ra_); // too many variables; just compute it
  882                               // the hard way
  883 }
  884 
  885 int MachPrologNode::reloc() const
  886 {
  887   return 0; // a large enough number
  888 }
  889 
  890 //=============================================================================
  891 #ifndef PRODUCT
  892 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
  893 {
  894   Compile* C = ra_->C;
  895   if (generate_vzeroupper(C)) {
  896     st->print("vzeroupper");
  897     st->cr(); st->print("\t");
  898   }
  899 
  900   int framesize = C->output()->frame_size_in_bytes();
  901   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  902   // Remove word for return adr already pushed
  903   // and RBP
  904   framesize -= 2*wordSize;

  911   st->print_cr("popq    rbp");
  912   if (do_polling() && C->is_method_compilation()) {
  913     st->print("\t");
  914     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
  915                  "ja      #safepoint_stub\t"
  916                  "# Safepoint: poll for GC");
  917   }
  918 }
  919 #endif
  920 
  921 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
  922 {
  923   Compile* C = ra_->C;
  924 
  925   if (generate_vzeroupper(C)) {
  926     // Clear upper bits of YMM registers when current compiled code uses
  927     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  928     __ vzeroupper();
  929   }
  930 
  931   int framesize = C->output()->frame_size_in_bytes();
  932   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  933   // Remove word for return adr already pushed
  934   // and RBP
  935   framesize -= 2*wordSize;
  936 
  937   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
  938 
  939   if (framesize) {
  940     __ addq(rsp, framesize);
  941   }
  942 
  943   __ popq(rbp);
  944 
  945   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
  946     __ reserved_stack_check();
  947   }
  948 
  949   if (do_polling() && C->is_method_compilation()) {
  950     Label dummy_label;
  951     Label* code_stub = &dummy_label;
  952     if (!C->output()->in_scratch_emit_size()) {
  953       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
  954       C->output()->add_stub(stub);
  955       code_stub = &stub->entry();
  956     }
  957     __ relocate(relocInfo::poll_return_type);
  958     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
  959   }
  960 }
  961 
  962 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
  963 {
  964   return MachNode::size(ra_); // too many variables; just compute it
  965                               // the hard way
  966 }
  967 
  968 int MachEpilogNode::reloc() const
  969 {
  970   return 2; // a large enough number
  971 }
  972 
  973 const Pipeline* MachEpilogNode::pipeline() const
  974 {
  975   return MachNode::pipeline_class();
  976 }
  977 
  978 //=============================================================================
  979 
  980 enum RC {
  981   rc_bad,
  982   rc_int,
  983   rc_kreg,
  984   rc_float,
  985   rc_stack
  986 };
  987 

 1549 #endif
 1550 
 1551 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1552 {
 1553   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1554   int reg = ra_->get_encode(this);
 1555 
 1556   __ lea(as_Register(reg), Address(rsp, offset));
 1557 }
 1558 
 1559 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 1560 {
 1561   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1562   if (ra_->get_encode(this) > 15) {
 1563     return (offset < 0x80) ? 6 : 9; // REX2
 1564   } else {
 1565     return (offset < 0x80) ? 5 : 8; // REX
 1566   }
 1567 }
 1568 











































 1569 //=============================================================================
 1570 #ifndef PRODUCT
 1571 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1572 {
 1573   if (UseCompressedClassPointers) {
 1574     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 1575     st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 1576   } else {
 1577     st->print_cr("movq    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 1578     st->print_cr("\tcmpq    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 1579   }
 1580   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 1581 }
 1582 #endif
 1583 
 1584 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1585 {
 1586   __ ic_check(InteriorEntryAlignment);
 1587 }
 1588 
 1589 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 1590 {
 1591   return MachNode::size(ra_); // too many variables; just compute it
 1592                               // the hard way
 1593 }
 1594 
 1595 
 1596 //=============================================================================
 1597 
 1598 bool Matcher::supports_vector_calling_convention(void) {
 1599   return EnableVectorSupport;
 1600 }
 1601 
 1602 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1603   assert(EnableVectorSupport, "sanity");
 1604   int lo = XMM0_num;
 1605   int hi = XMM0b_num;
 1606   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 1607   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 1608   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 1609   return OptoRegPair(hi, lo);
 1610 }
 1611 
 1612 // Is this branch offset short enough that a short branch can be used?
 1613 //
 1614 // NOTE: If the platform does not provide any short branch variants, then
 1615 //       this method should return false for offset 0.

 3046   %}
 3047 %}
 3048 
 3049 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 3050 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 3051 %{
 3052   constraint(ALLOC_IN_RC(ptr_reg));
 3053   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 3054   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 3055 
 3056   op_cost(10);
 3057   format %{"[$reg + $off + $idx << $scale]" %}
 3058   interface(MEMORY_INTER) %{
 3059     base($reg);
 3060     index($idx);
 3061     scale($scale);
 3062     disp($off);
 3063   %}
 3064 %}
 3065 
















 3066 // Indirect Narrow Oop Plus Offset Operand
 3067 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 3068 // we can't free r12 even with CompressedOops::base() == nullptr.
 3069 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 3070   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 3071   constraint(ALLOC_IN_RC(ptr_reg));
 3072   match(AddP (DecodeN reg) off);
 3073 
 3074   op_cost(10);
 3075   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 3076   interface(MEMORY_INTER) %{
 3077     base(0xc); // R12
 3078     index($reg);
 3079     scale(0x3);
 3080     disp($off);
 3081   %}
 3082 %}
 3083 
 3084 // Indirect Memory Operand
 3085 operand indirectNarrow(rRegN reg)

 3392     equal(0x4, "e");
 3393     not_equal(0x5, "ne");
 3394     less(0x2, "b");
 3395     greater_equal(0x3, "ae");
 3396     less_equal(0x6, "be");
 3397     greater(0x7, "a");
 3398     overflow(0x0, "o");
 3399     no_overflow(0x1, "no");
 3400   %}
 3401 %}
 3402 
 3403 //----------OPERAND CLASSES----------------------------------------------------
 3404 // Operand Classes are groups of operands that are used as to simplify
 3405 // instruction definitions by not requiring the AD writer to specify separate
 3406 // instructions for every form of operand when the instruction accepts
 3407 // multiple operand types with the same basic encoding and format.  The classic
 3408 // case of this is memory operands.
 3409 
 3410 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 3411                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 3412                indCompressedOopOffset,
 3413                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 3414                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 3415                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 3416 
 3417 //----------PIPELINE-----------------------------------------------------------
 3418 // Rules which define the behavior of the target architectures pipeline.
 3419 pipeline %{
 3420 
 3421 //----------ATTRIBUTES---------------------------------------------------------
 3422 attributes %{
 3423   variable_size_instructions;        // Fixed size instructions
 3424   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 3425   instruction_unit_size = 1;         // An instruction is 1 bytes long
 3426   instruction_fetch_unit_size = 16;  // The processor fetches one line
 3427   instruction_fetch_units = 1;       // of 16 bytes
 3428 
 3429   // List of nop instructions
 3430   nops( MachNop );
 3431 %}
 3432 

 5933   format %{ "MEMBAR-storestore (empty encoding)" %}
 5934   ins_encode( );
 5935   ins_pipe(empty);
 5936 %}
 5937 
 5938 //----------Move Instructions--------------------------------------------------
 5939 
 5940 instruct castX2P(rRegP dst, rRegL src)
 5941 %{
 5942   match(Set dst (CastX2P src));
 5943 
 5944   format %{ "movq    $dst, $src\t# long->ptr" %}
 5945   ins_encode %{
 5946     if ($dst$$reg != $src$$reg) {
 5947       __ movptr($dst$$Register, $src$$Register);
 5948     }
 5949   %}
 5950   ins_pipe(ialu_reg_reg); // XXX
 5951 %}
 5952 


























 5953 instruct castP2X(rRegL dst, rRegP src)
 5954 %{
 5955   match(Set dst (CastP2X src));
 5956 
 5957   format %{ "movq    $dst, $src\t# ptr -> long" %}
 5958   ins_encode %{
 5959     if ($dst$$reg != $src$$reg) {
 5960       __ movptr($dst$$Register, $src$$Register);
 5961     }
 5962   %}
 5963   ins_pipe(ialu_reg_reg); // XXX
 5964 %}
 5965 
 5966 // Convert oop into int for vectors alignment masking
 5967 instruct convP2I(rRegI dst, rRegP src)
 5968 %{
 5969   match(Set dst (ConvL2I (CastP2X src)));
 5970 
 5971   format %{ "movl    $dst, $src\t# ptr -> int" %}
 5972   ins_encode %{

12193   effect(DEF dst, USE src);
12194   ins_cost(100);
12195   format %{ "movd    $dst,$src\t# MoveI2F" %}
12196   ins_encode %{
12197     __ movdl($dst$$XMMRegister, $src$$Register);
12198   %}
12199   ins_pipe( pipe_slow );
12200 %}
12201 
12202 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
12203   match(Set dst (MoveL2D src));
12204   effect(DEF dst, USE src);
12205   ins_cost(100);
12206   format %{ "movd    $dst,$src\t# MoveL2D" %}
12207   ins_encode %{
12208      __ movdq($dst$$XMMRegister, $src$$Register);
12209   %}
12210   ins_pipe( pipe_slow );
12211 %}
12212 

12213 // Fast clearing of an array
12214 // Small non-constant lenght ClearArray for non-AVX512 targets.
12215 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
12216                   Universe dummy, rFlagsReg cr)
12217 %{
12218   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
12219   match(Set dummy (ClearArray cnt base));
12220   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);





















































































































12221 
12222   format %{ $$template
12223     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12224     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
12225     $$emit$$"jg      LARGE\n\t"
12226     $$emit$$"dec     rcx\n\t"
12227     $$emit$$"js      DONE\t# Zero length\n\t"
12228     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
12229     $$emit$$"dec     rcx\n\t"
12230     $$emit$$"jge     LOOP\n\t"
12231     $$emit$$"jmp     DONE\n\t"
12232     $$emit$$"# LARGE:\n\t"
12233     if (UseFastStosb) {
12234        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
12235        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
12236     } else if (UseXMMForObjInit) {
12237        $$emit$$"mov     rdi,rax\n\t"
12238        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
12239        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12240        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

12248        $$emit$$"jl      L_tail\n\t"
12249        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12250        $$emit$$"add     0x20,rax\n\t"
12251        $$emit$$"sub     0x4,rcx\n\t"
12252        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12253        $$emit$$"add     0x4,rcx\n\t"
12254        $$emit$$"jle     L_end\n\t"
12255        $$emit$$"dec     rcx\n\t"
12256        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12257        $$emit$$"vmovq   xmm0,(rax)\n\t"
12258        $$emit$$"add     0x8,rax\n\t"
12259        $$emit$$"dec     rcx\n\t"
12260        $$emit$$"jge     L_sloop\n\t"
12261        $$emit$$"# L_end:\n\t"
12262     } else {
12263        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
12264     }
12265     $$emit$$"# DONE"
12266   %}
12267   ins_encode %{
12268     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
12269                  $tmp$$XMMRegister, false, knoreg);
12270   %}
12271   ins_pipe(pipe_slow);
12272 %}
12273 
12274 // Small non-constant length ClearArray for AVX512 targets.
12275 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
12276                        Universe dummy, rFlagsReg cr)
12277 %{
12278   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
12279   match(Set dummy (ClearArray cnt base));
12280   ins_cost(125);
12281   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
12282 
12283   format %{ $$template
12284     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12285     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
12286     $$emit$$"jg      LARGE\n\t"
12287     $$emit$$"dec     rcx\n\t"
12288     $$emit$$"js      DONE\t# Zero length\n\t"
12289     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
12290     $$emit$$"dec     rcx\n\t"
12291     $$emit$$"jge     LOOP\n\t"
12292     $$emit$$"jmp     DONE\n\t"
12293     $$emit$$"# LARGE:\n\t"
12294     if (UseFastStosb) {
12295        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
12296        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
12297     } else if (UseXMMForObjInit) {
12298        $$emit$$"mov     rdi,rax\n\t"
12299        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
12300        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12301        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

12309        $$emit$$"jl      L_tail\n\t"
12310        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12311        $$emit$$"add     0x20,rax\n\t"
12312        $$emit$$"sub     0x4,rcx\n\t"
12313        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12314        $$emit$$"add     0x4,rcx\n\t"
12315        $$emit$$"jle     L_end\n\t"
12316        $$emit$$"dec     rcx\n\t"
12317        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12318        $$emit$$"vmovq   xmm0,(rax)\n\t"
12319        $$emit$$"add     0x8,rax\n\t"
12320        $$emit$$"dec     rcx\n\t"
12321        $$emit$$"jge     L_sloop\n\t"
12322        $$emit$$"# L_end:\n\t"
12323     } else {
12324        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
12325     }
12326     $$emit$$"# DONE"
12327   %}
12328   ins_encode %{
12329     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
12330                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
12331   %}
12332   ins_pipe(pipe_slow);
12333 %}
12334 
12335 // Large non-constant length ClearArray for non-AVX512 targets.
12336 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
12337                         Universe dummy, rFlagsReg cr)
12338 %{
12339   predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
12340   match(Set dummy (ClearArray cnt base));
12341   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
































































































12342 
12343   format %{ $$template
12344     if (UseFastStosb) {
12345        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12346        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
12347        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
12348     } else if (UseXMMForObjInit) {
12349        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
12350        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
12351        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12352        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12353        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12354        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
12355        $$emit$$"add     0x40,rax\n\t"
12356        $$emit$$"# L_zero_64_bytes:\n\t"
12357        $$emit$$"sub     0x8,rcx\n\t"
12358        $$emit$$"jge     L_loop\n\t"
12359        $$emit$$"add     0x4,rcx\n\t"
12360        $$emit$$"jl      L_tail\n\t"
12361        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12362        $$emit$$"add     0x20,rax\n\t"
12363        $$emit$$"sub     0x4,rcx\n\t"
12364        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12365        $$emit$$"add     0x4,rcx\n\t"
12366        $$emit$$"jle     L_end\n\t"
12367        $$emit$$"dec     rcx\n\t"
12368        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12369        $$emit$$"vmovq   xmm0,(rax)\n\t"
12370        $$emit$$"add     0x8,rax\n\t"
12371        $$emit$$"dec     rcx\n\t"
12372        $$emit$$"jge     L_sloop\n\t"
12373        $$emit$$"# L_end:\n\t"
12374     } else {
12375        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12376        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
12377     }
12378   %}
12379   ins_encode %{
12380     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
12381                  $tmp$$XMMRegister, true, knoreg);
12382   %}
12383   ins_pipe(pipe_slow);
12384 %}
12385 
12386 // Large non-constant length ClearArray for AVX512 targets.
12387 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
12388                              Universe dummy, rFlagsReg cr)
12389 %{
12390   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
12391   match(Set dummy (ClearArray cnt base));
12392   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
12393 
12394   format %{ $$template
12395     if (UseFastStosb) {
12396        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12397        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
12398        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
12399     } else if (UseXMMForObjInit) {
12400        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
12401        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
12402        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12403        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12404        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12405        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
12406        $$emit$$"add     0x40,rax\n\t"
12407        $$emit$$"# L_zero_64_bytes:\n\t"
12408        $$emit$$"sub     0x8,rcx\n\t"
12409        $$emit$$"jge     L_loop\n\t"
12410        $$emit$$"add     0x4,rcx\n\t"
12411        $$emit$$"jl      L_tail\n\t"
12412        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12413        $$emit$$"add     0x20,rax\n\t"
12414        $$emit$$"sub     0x4,rcx\n\t"
12415        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12416        $$emit$$"add     0x4,rcx\n\t"
12417        $$emit$$"jle     L_end\n\t"
12418        $$emit$$"dec     rcx\n\t"
12419        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12420        $$emit$$"vmovq   xmm0,(rax)\n\t"
12421        $$emit$$"add     0x8,rax\n\t"
12422        $$emit$$"dec     rcx\n\t"
12423        $$emit$$"jge     L_sloop\n\t"
12424        $$emit$$"# L_end:\n\t"
12425     } else {
12426        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12427        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
12428     }
12429   %}
12430   ins_encode %{
12431     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
12432                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
12433   %}
12434   ins_pipe(pipe_slow);
12435 %}
12436 
12437 // Small constant length ClearArray for AVX512 targets.
12438 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
12439 %{
12440   predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
12441   match(Set dummy (ClearArray cnt base));

12442   ins_cost(100);
12443   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
12444   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
12445   ins_encode %{
12446    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
12447   %}
12448   ins_pipe(pipe_slow);
12449 %}
12450 
12451 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
12452                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
12453 %{
12454   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
12455   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12456   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
12457 
12458   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
12459   ins_encode %{
12460     __ string_compare($str1$$Register, $str2$$Register,
12461                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
12462                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
12463   %}
12464   ins_pipe( pipe_slow );
12465 %}
12466 

14299 
14300   ins_cost(300);
14301   format %{ "call_leaf,runtime " %}
14302   ins_encode(clear_avx, Java_To_Runtime(meth));
14303   ins_pipe(pipe_slow);
14304 %}
14305 
14306 // Call runtime without safepoint and with vector arguments
14307 instruct CallLeafDirectVector(method meth)
14308 %{
14309   match(CallLeafVector);
14310   effect(USE meth);
14311 
14312   ins_cost(300);
14313   format %{ "call_leaf,vector " %}
14314   ins_encode(Java_To_Runtime(meth));
14315   ins_pipe(pipe_slow);
14316 %}
14317 
14318 // Call runtime without safepoint















14319 instruct CallLeafNoFPDirect(method meth)
14320 %{

14321   match(CallLeafNoFP);
14322   effect(USE meth);
14323 
14324   ins_cost(300);
14325   format %{ "call_leaf_nofp,runtime " %}
14326   ins_encode(clear_avx, Java_To_Runtime(meth));
14327   ins_pipe(pipe_slow);
14328 %}
14329 
14330 // Return Instruction
14331 // Remove the return address & jump to it.
14332 // Notice: We always emit a nop after a ret to make sure there is room
14333 // for safepoint patching
14334 instruct Ret()
14335 %{
14336   match(Return);
14337 
14338   format %{ "ret" %}
14339   ins_encode %{
14340     __ ret(0);

  598 }
  599 
  600 // !!!!! Special hack to get all types of calls to specify the byte offset
  601 //       from the start of the call to the point where the return address
  602 //       will point.
  603 int MachCallStaticJavaNode::ret_addr_offset()
  604 {
  605   int offset = 5; // 5 bytes from start of call to where return address points
  606   offset += clear_avx_size();
  607   return offset;
  608 }
  609 
  610 int MachCallDynamicJavaNode::ret_addr_offset()
  611 {
  612   int offset = 15; // 15 bytes from start of call to where return address points
  613   offset += clear_avx_size();
  614   return offset;
  615 }
  616 
  617 int MachCallRuntimeNode::ret_addr_offset() {
  618   if (_entry_point == nullptr) {
  619     // CallLeafNoFPInDirect
  620     return 3; // callq (register)
  621   }
  622   int offset = 13; // movq r10,#addr; callq (r10)
  623   if (this->ideal_Opcode() != Op_CallLeafVector) {
  624     offset += clear_avx_size();
  625   }
  626   return offset;
  627 }
  628 
  629 //
  630 // Compute padding required for nodes which need alignment
  631 //
  632 
  633 // The address of the call instruction needs to be 4-byte aligned to
  634 // ensure that it does not span a cache line so that it can be patched.
  635 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
  636 {
  637   current_offset += clear_avx_size(); // skip vzeroupper
  638   current_offset += 1; // skip call opcode byte
  639   return align_up(current_offset, alignment_required()) - current_offset;
  640 }
  641 
  642 // The address of the call instruction needs to be 4-byte aligned to
  643 // ensure that it does not span a cache line so that it can be patched.
  644 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
  645 {
  646   current_offset += clear_avx_size(); // skip vzeroupper
  647   current_offset += 11; // skip movq instruction + call opcode byte
  648   return align_up(current_offset, alignment_required()) - current_offset;

  834     st->print("\n\t");
  835     st->print("# stack alignment check");
  836 #endif
  837   }
  838   if (C->stub_function() != nullptr) {
  839     st->print("\n\t");
  840     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
  841     st->print("\n\t");
  842     st->print("je      fast_entry\t");
  843     st->print("\n\t");
  844     st->print("call    #nmethod_entry_barrier_stub\t");
  845     st->print("\n\tfast_entry:");
  846   }
  847   st->cr();
  848 }
  849 #endif
  850 
  851 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
  852   Compile* C = ra_->C;
  853 
  854   __ verified_entry(C);













  855 
  856   if (ra_->C->stub_function() == nullptr) {
  857     __ entry_barrier();
  858   }
  859 
  860   if (!Compile::current()->output()->in_scratch_emit_size()) {
  861     __ bind(*_verified_entry);
  862   }
  863 
  864   C->output()->set_frame_complete(__ offset());
  865 
  866   if (C->has_mach_constant_base_node()) {
  867     // NOTE: We set the table base offset here because users might be
  868     // emitted before MachConstantBaseNode.
  869     ConstantTable& constant_table = C->output()->constant_table();
  870     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  871   }
  872 }
  873 






  874 int MachPrologNode::reloc() const
  875 {
  876   return 0; // a large enough number
  877 }
  878 
  879 //=============================================================================
  880 #ifndef PRODUCT
  881 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
  882 {
  883   Compile* C = ra_->C;
  884   if (generate_vzeroupper(C)) {
  885     st->print("vzeroupper");
  886     st->cr(); st->print("\t");
  887   }
  888 
  889   int framesize = C->output()->frame_size_in_bytes();
  890   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  891   // Remove word for return adr already pushed
  892   // and RBP
  893   framesize -= 2*wordSize;

  900   st->print_cr("popq    rbp");
  901   if (do_polling() && C->is_method_compilation()) {
  902     st->print("\t");
  903     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
  904                  "ja      #safepoint_stub\t"
  905                  "# Safepoint: poll for GC");
  906   }
  907 }
  908 #endif
  909 
  910 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
  911 {
  912   Compile* C = ra_->C;
  913 
  914   if (generate_vzeroupper(C)) {
  915     // Clear upper bits of YMM registers when current compiled code uses
  916     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  917     __ vzeroupper();
  918   }
  919 
  920   // Subtract two words to account for return address and rbp
  921   int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
  922   __ remove_frame(initial_framesize, C->needs_stack_repair());










  923 
  924   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
  925     __ reserved_stack_check();
  926   }
  927 
  928   if (do_polling() && C->is_method_compilation()) {
  929     Label dummy_label;
  930     Label* code_stub = &dummy_label;
  931     if (!C->output()->in_scratch_emit_size()) {
  932       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
  933       C->output()->add_stub(stub);
  934       code_stub = &stub->entry();
  935     }
  936     __ relocate(relocInfo::poll_return_type);
  937     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
  938   }
  939 }
  940 






  941 int MachEpilogNode::reloc() const
  942 {
  943   return 2; // a large enough number
  944 }
  945 
  946 const Pipeline* MachEpilogNode::pipeline() const
  947 {
  948   return MachNode::pipeline_class();
  949 }
  950 
  951 //=============================================================================
  952 
  953 enum RC {
  954   rc_bad,
  955   rc_int,
  956   rc_kreg,
  957   rc_float,
  958   rc_stack
  959 };
  960 

 1522 #endif
 1523 
 1524 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1525 {
 1526   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1527   int reg = ra_->get_encode(this);
 1528 
 1529   __ lea(as_Register(reg), Address(rsp, offset));
 1530 }
 1531 
 1532 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 1533 {
 1534   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1535   if (ra_->get_encode(this) > 15) {
 1536     return (offset < 0x80) ? 6 : 9; // REX2
 1537   } else {
 1538     return (offset < 0x80) ? 5 : 8; // REX
 1539   }
 1540 }
 1541 
 1542 //=============================================================================
 1543 #ifndef PRODUCT
 1544 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1545 {
 1546   st->print_cr("MachVEPNode");
 1547 }
 1548 #endif
 1549 
 1550 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1551 {
 1552   CodeBuffer* cbuf = masm->code();
 1553   uint insts_size = cbuf->insts_size();
 1554   if (!_verified) {
 1555     __ ic_check(1);
 1556   } else {
 1557     // TODO 8284443 Avoid creation of temporary frame
 1558     if (ra_->C->stub_function() == nullptr) {
 1559       __ verified_entry(ra_->C, 0);
 1560       __ entry_barrier();
 1561       int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
 1562       __ remove_frame(initial_framesize, false);
 1563     }
 1564     // Unpack inline type args passed as oop and then jump to
 1565     // the verified entry point (skipping the unverified entry).
 1566     int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
 1567     // Emit code for verified entry and save increment for stack repair on return
 1568     __ verified_entry(ra_->C, sp_inc);
 1569     if (Compile::current()->output()->in_scratch_emit_size()) {
 1570       Label dummy_verified_entry;
 1571       __ jmp(dummy_verified_entry);
 1572     } else {
 1573       __ jmp(*_verified_entry);
 1574     }
 1575   }
 1576   /* WARNING these NOPs are critical so that verified entry point is properly
 1577      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
 1578   int nops_cnt = 4 - ((cbuf->insts_size() - insts_size) & 0x3);
 1579   nops_cnt &= 0x3; // Do not add nops if code is aligned.
 1580   if (nops_cnt > 0) {
 1581     __ nop(nops_cnt);
 1582   }
 1583 }
 1584 
 1585 //=============================================================================
 1586 #ifndef PRODUCT
 1587 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1588 {
 1589   if (UseCompressedClassPointers) {
 1590     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 1591     st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 1592   } else {
 1593     st->print_cr("movq    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 1594     st->print_cr("\tcmpq    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 1595   }
 1596   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 1597 }
 1598 #endif
 1599 
 1600 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1601 {
 1602   __ ic_check(InteriorEntryAlignment);
 1603 }
 1604 







 1605 //=============================================================================
 1606 
 1607 bool Matcher::supports_vector_calling_convention(void) {
 1608   return EnableVectorSupport;
 1609 }
 1610 
 1611 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1612   assert(EnableVectorSupport, "sanity");
 1613   int lo = XMM0_num;
 1614   int hi = XMM0b_num;
 1615   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 1616   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 1617   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 1618   return OptoRegPair(hi, lo);
 1619 }
 1620 
 1621 // Is this branch offset short enough that a short branch can be used?
 1622 //
 1623 // NOTE: If the platform does not provide any short branch variants, then
 1624 //       this method should return false for offset 0.

 3055   %}
 3056 %}
 3057 
 3058 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 3059 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 3060 %{
 3061   constraint(ALLOC_IN_RC(ptr_reg));
 3062   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 3063   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 3064 
 3065   op_cost(10);
 3066   format %{"[$reg + $off + $idx << $scale]" %}
 3067   interface(MEMORY_INTER) %{
 3068     base($reg);
 3069     index($idx);
 3070     scale($scale);
 3071     disp($off);
 3072   %}
 3073 %}
 3074 
 3075 // Indirect Narrow Oop Operand
 3076 operand indCompressedOop(rRegN reg) %{
 3077   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 3078   constraint(ALLOC_IN_RC(ptr_reg));
 3079   match(DecodeN reg);
 3080 
 3081   op_cost(10);
 3082   format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
 3083   interface(MEMORY_INTER) %{
 3084     base(0xc); // R12
 3085     index($reg);
 3086     scale(0x3);
 3087     disp(0x0);
 3088   %}
 3089 %}
 3090 
 3091 // Indirect Narrow Oop Plus Offset Operand
 3092 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 3093 // we can't free r12 even with CompressedOops::base() == nullptr.
 3094 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 3095   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 3096   constraint(ALLOC_IN_RC(ptr_reg));
 3097   match(AddP (DecodeN reg) off);
 3098 
 3099   op_cost(10);
 3100   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 3101   interface(MEMORY_INTER) %{
 3102     base(0xc); // R12
 3103     index($reg);
 3104     scale(0x3);
 3105     disp($off);
 3106   %}
 3107 %}
 3108 
 3109 // Indirect Memory Operand
 3110 operand indirectNarrow(rRegN reg)

 3417     equal(0x4, "e");
 3418     not_equal(0x5, "ne");
 3419     less(0x2, "b");
 3420     greater_equal(0x3, "ae");
 3421     less_equal(0x6, "be");
 3422     greater(0x7, "a");
 3423     overflow(0x0, "o");
 3424     no_overflow(0x1, "no");
 3425   %}
 3426 %}
 3427 
 3428 //----------OPERAND CLASSES----------------------------------------------------
 3429 // Operand Classes are groups of operands that are used as to simplify
 3430 // instruction definitions by not requiring the AD writer to specify separate
 3431 // instructions for every form of operand when the instruction accepts
 3432 // multiple operand types with the same basic encoding and format.  The classic
 3433 // case of this is memory operands.
 3434 
 3435 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 3436                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 3437                indCompressedOop, indCompressedOopOffset,
 3438                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 3439                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 3440                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 3441 
 3442 //----------PIPELINE-----------------------------------------------------------
 3443 // Rules which define the behavior of the target architectures pipeline.
 3444 pipeline %{
 3445 
 3446 //----------ATTRIBUTES---------------------------------------------------------
 3447 attributes %{
 3448   variable_size_instructions;        // Fixed size instructions
 3449   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 3450   instruction_unit_size = 1;         // An instruction is 1 bytes long
 3451   instruction_fetch_unit_size = 16;  // The processor fetches one line
 3452   instruction_fetch_units = 1;       // of 16 bytes
 3453 
 3454   // List of nop instructions
 3455   nops( MachNop );
 3456 %}
 3457 

 5958   format %{ "MEMBAR-storestore (empty encoding)" %}
 5959   ins_encode( );
 5960   ins_pipe(empty);
 5961 %}
 5962 
 5963 //----------Move Instructions--------------------------------------------------
 5964 
 5965 instruct castX2P(rRegP dst, rRegL src)
 5966 %{
 5967   match(Set dst (CastX2P src));
 5968 
 5969   format %{ "movq    $dst, $src\t# long->ptr" %}
 5970   ins_encode %{
 5971     if ($dst$$reg != $src$$reg) {
 5972       __ movptr($dst$$Register, $src$$Register);
 5973     }
 5974   %}
 5975   ins_pipe(ialu_reg_reg); // XXX
 5976 %}
 5977 
 5978 instruct castI2N(rRegN dst, rRegI src)
 5979 %{
 5980   match(Set dst (CastI2N src));
 5981 
 5982   format %{ "movq    $dst, $src\t# int -> narrow ptr" %}
 5983   ins_encode %{
 5984     if ($dst$$reg != $src$$reg) {
 5985       __ movl($dst$$Register, $src$$Register);
 5986     }
 5987   %}
 5988   ins_pipe(ialu_reg_reg); // XXX
 5989 %}
 5990 
 5991 instruct castN2X(rRegL dst, rRegN src)
 5992 %{
 5993   match(Set dst (CastP2X src));
 5994 
 5995   format %{ "movq    $dst, $src\t# ptr -> long" %}
 5996   ins_encode %{
 5997     if ($dst$$reg != $src$$reg) {
 5998       __ movptr($dst$$Register, $src$$Register);
 5999     }
 6000   %}
 6001   ins_pipe(ialu_reg_reg); // XXX
 6002 %}
 6003 
 6004 instruct castP2X(rRegL dst, rRegP src)
 6005 %{
 6006   match(Set dst (CastP2X src));
 6007 
 6008   format %{ "movq    $dst, $src\t# ptr -> long" %}
 6009   ins_encode %{
 6010     if ($dst$$reg != $src$$reg) {
 6011       __ movptr($dst$$Register, $src$$Register);
 6012     }
 6013   %}
 6014   ins_pipe(ialu_reg_reg); // XXX
 6015 %}
 6016 
 6017 // Convert oop into int for vectors alignment masking
 6018 instruct convP2I(rRegI dst, rRegP src)
 6019 %{
 6020   match(Set dst (ConvL2I (CastP2X src)));
 6021 
 6022   format %{ "movl    $dst, $src\t# ptr -> int" %}
 6023   ins_encode %{

12244   effect(DEF dst, USE src);
12245   ins_cost(100);
12246   format %{ "movd    $dst,$src\t# MoveI2F" %}
12247   ins_encode %{
12248     __ movdl($dst$$XMMRegister, $src$$Register);
12249   %}
12250   ins_pipe( pipe_slow );
12251 %}
12252 
12253 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
12254   match(Set dst (MoveL2D src));
12255   effect(DEF dst, USE src);
12256   ins_cost(100);
12257   format %{ "movd    $dst,$src\t# MoveL2D" %}
12258   ins_encode %{
12259      __ movdq($dst$$XMMRegister, $src$$Register);
12260   %}
12261   ins_pipe( pipe_slow );
12262 %}
12263 
12264 
12265 // Fast clearing of an array
12266 // Small non-constant lenght ClearArray for non-AVX512 targets.
12267 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
12268                   Universe dummy, rFlagsReg cr)
12269 %{
12270   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
12271   match(Set dummy (ClearArray (Binary cnt base) val));
12272   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
12273 
12274   format %{ $$template
12275     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
12276     $$emit$$"jg      LARGE\n\t"
12277     $$emit$$"dec     rcx\n\t"
12278     $$emit$$"js      DONE\t# Zero length\n\t"
12279     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
12280     $$emit$$"dec     rcx\n\t"
12281     $$emit$$"jge     LOOP\n\t"
12282     $$emit$$"jmp     DONE\n\t"
12283     $$emit$$"# LARGE:\n\t"
12284     if (UseFastStosb) {
12285        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
12286        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
12287     } else if (UseXMMForObjInit) {
12288        $$emit$$"movdq   $tmp, $val\n\t"
12289        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
12290        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
12291        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12292        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12293        $$emit$$"vmovdqu $tmp,(rax)\n\t"
12294        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
12295        $$emit$$"add     0x40,rax\n\t"
12296        $$emit$$"# L_zero_64_bytes:\n\t"
12297        $$emit$$"sub     0x8,rcx\n\t"
12298        $$emit$$"jge     L_loop\n\t"
12299        $$emit$$"add     0x4,rcx\n\t"
12300        $$emit$$"jl      L_tail\n\t"
12301        $$emit$$"vmovdqu $tmp,(rax)\n\t"
12302        $$emit$$"add     0x20,rax\n\t"
12303        $$emit$$"sub     0x4,rcx\n\t"
12304        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12305        $$emit$$"add     0x4,rcx\n\t"
12306        $$emit$$"jle     L_end\n\t"
12307        $$emit$$"dec     rcx\n\t"
12308        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12309        $$emit$$"vmovq   xmm0,(rax)\n\t"
12310        $$emit$$"add     0x8,rax\n\t"
12311        $$emit$$"dec     rcx\n\t"
12312        $$emit$$"jge     L_sloop\n\t"
12313        $$emit$$"# L_end:\n\t"
12314     } else {
12315        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
12316     }
12317     $$emit$$"# DONE"
12318   %}
12319   ins_encode %{
12320     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12321                  $tmp$$XMMRegister, false, false);
12322   %}
12323   ins_pipe(pipe_slow);
12324 %}
12325 
12326 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
12327                             Universe dummy, rFlagsReg cr)
12328 %{
12329   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
12330   match(Set dummy (ClearArray (Binary cnt base) val));
12331   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
12332 
12333   format %{ $$template
12334     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
12335     $$emit$$"jg      LARGE\n\t"
12336     $$emit$$"dec     rcx\n\t"
12337     $$emit$$"js      DONE\t# Zero length\n\t"
12338     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
12339     $$emit$$"dec     rcx\n\t"
12340     $$emit$$"jge     LOOP\n\t"
12341     $$emit$$"jmp     DONE\n\t"
12342     $$emit$$"# LARGE:\n\t"
12343     if (UseXMMForObjInit) {
12344        $$emit$$"movdq   $tmp, $val\n\t"
12345        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
12346        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
12347        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12348        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12349        $$emit$$"vmovdqu $tmp,(rax)\n\t"
12350        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
12351        $$emit$$"add     0x40,rax\n\t"
12352        $$emit$$"# L_zero_64_bytes:\n\t"
12353        $$emit$$"sub     0x8,rcx\n\t"
12354        $$emit$$"jge     L_loop\n\t"
12355        $$emit$$"add     0x4,rcx\n\t"
12356        $$emit$$"jl      L_tail\n\t"
12357        $$emit$$"vmovdqu $tmp,(rax)\n\t"
12358        $$emit$$"add     0x20,rax\n\t"
12359        $$emit$$"sub     0x4,rcx\n\t"
12360        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12361        $$emit$$"add     0x4,rcx\n\t"
12362        $$emit$$"jle     L_end\n\t"
12363        $$emit$$"dec     rcx\n\t"
12364        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12365        $$emit$$"vmovq   xmm0,(rax)\n\t"
12366        $$emit$$"add     0x8,rax\n\t"
12367        $$emit$$"dec     rcx\n\t"
12368        $$emit$$"jge     L_sloop\n\t"
12369        $$emit$$"# L_end:\n\t"
12370     } else {
12371        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
12372     }
12373     $$emit$$"# DONE"
12374   %}
12375   ins_encode %{
12376     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12377                  $tmp$$XMMRegister, false, true);
12378   %}
12379   ins_pipe(pipe_slow);
12380 %}
12381 
12382 // Small non-constant length ClearArray for AVX512 targets.
12383 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
12384                        Universe dummy, rFlagsReg cr)
12385 %{
12386   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
12387   match(Set dummy (ClearArray (Binary cnt base) val));
12388   ins_cost(125);
12389   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
12390 
12391   format %{ $$template
12392     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12393     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
12394     $$emit$$"jg      LARGE\n\t"
12395     $$emit$$"dec     rcx\n\t"
12396     $$emit$$"js      DONE\t# Zero length\n\t"
12397     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
12398     $$emit$$"dec     rcx\n\t"
12399     $$emit$$"jge     LOOP\n\t"
12400     $$emit$$"jmp     DONE\n\t"
12401     $$emit$$"# LARGE:\n\t"
12402     if (UseFastStosb) {
12403        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
12404        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
12405     } else if (UseXMMForObjInit) {
12406        $$emit$$"mov     rdi,rax\n\t"
12407        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
12408        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12409        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

12417        $$emit$$"jl      L_tail\n\t"
12418        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12419        $$emit$$"add     0x20,rax\n\t"
12420        $$emit$$"sub     0x4,rcx\n\t"
12421        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12422        $$emit$$"add     0x4,rcx\n\t"
12423        $$emit$$"jle     L_end\n\t"
12424        $$emit$$"dec     rcx\n\t"
12425        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12426        $$emit$$"vmovq   xmm0,(rax)\n\t"
12427        $$emit$$"add     0x8,rax\n\t"
12428        $$emit$$"dec     rcx\n\t"
12429        $$emit$$"jge     L_sloop\n\t"
12430        $$emit$$"# L_end:\n\t"
12431     } else {
12432        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
12433     }
12434     $$emit$$"# DONE"
12435   %}
12436   ins_encode %{
12437     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12438                  $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
12439   %}
12440   ins_pipe(pipe_slow);
12441 %}
12442 
12443 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
12444                                  Universe dummy, rFlagsReg cr)

12445 %{
12446   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
12447   match(Set dummy (ClearArray (Binary cnt base) val));
12448   ins_cost(125);
12449   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
12450 
12451   format %{ $$template
12452     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12453     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
12454     $$emit$$"jg      LARGE\n\t"
12455     $$emit$$"dec     rcx\n\t"
12456     $$emit$$"js      DONE\t# Zero length\n\t"
12457     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
12458     $$emit$$"dec     rcx\n\t"
12459     $$emit$$"jge     LOOP\n\t"
12460     $$emit$$"jmp     DONE\n\t"
12461     $$emit$$"# LARGE:\n\t"
12462     if (UseFastStosb) {
12463        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
12464        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
12465     } else if (UseXMMForObjInit) {
12466        $$emit$$"mov     rdi,rax\n\t"
12467        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
12468        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12469        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

12477        $$emit$$"jl      L_tail\n\t"
12478        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12479        $$emit$$"add     0x20,rax\n\t"
12480        $$emit$$"sub     0x4,rcx\n\t"
12481        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12482        $$emit$$"add     0x4,rcx\n\t"
12483        $$emit$$"jle     L_end\n\t"
12484        $$emit$$"dec     rcx\n\t"
12485        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12486        $$emit$$"vmovq   xmm0,(rax)\n\t"
12487        $$emit$$"add     0x8,rax\n\t"
12488        $$emit$$"dec     rcx\n\t"
12489        $$emit$$"jge     L_sloop\n\t"
12490        $$emit$$"# L_end:\n\t"
12491     } else {
12492        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
12493     }
12494     $$emit$$"# DONE"
12495   %}
12496   ins_encode %{
12497     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12498                  $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
12499   %}
12500   ins_pipe(pipe_slow);
12501 %}
12502 
12503 // Large non-constant length ClearArray for non-AVX512 targets.
12504 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
12505                         Universe dummy, rFlagsReg cr)
12506 %{
12507   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
12508   match(Set dummy (ClearArray (Binary cnt base) val));
12509   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
12510 
12511   format %{ $$template
12512     if (UseFastStosb) {
12513        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
12514        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
12515     } else if (UseXMMForObjInit) {
12516        $$emit$$"movdq   $tmp, $val\n\t"
12517        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
12518        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
12519        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12520        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12521        $$emit$$"vmovdqu $tmp,(rax)\n\t"
12522        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
12523        $$emit$$"add     0x40,rax\n\t"
12524        $$emit$$"# L_zero_64_bytes:\n\t"
12525        $$emit$$"sub     0x8,rcx\n\t"
12526        $$emit$$"jge     L_loop\n\t"
12527        $$emit$$"add     0x4,rcx\n\t"
12528        $$emit$$"jl      L_tail\n\t"
12529        $$emit$$"vmovdqu $tmp,(rax)\n\t"
12530        $$emit$$"add     0x20,rax\n\t"
12531        $$emit$$"sub     0x4,rcx\n\t"
12532        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12533        $$emit$$"add     0x4,rcx\n\t"
12534        $$emit$$"jle     L_end\n\t"
12535        $$emit$$"dec     rcx\n\t"
12536        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12537        $$emit$$"vmovq   xmm0,(rax)\n\t"
12538        $$emit$$"add     0x8,rax\n\t"
12539        $$emit$$"dec     rcx\n\t"
12540        $$emit$$"jge     L_sloop\n\t"
12541        $$emit$$"# L_end:\n\t"
12542     } else {
12543        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
12544     }
12545   %}
12546   ins_encode %{
12547     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12548                  $tmp$$XMMRegister, true, false);
12549   %}
12550   ins_pipe(pipe_slow);
12551 %}
12552 
12553 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
12554                                   Universe dummy, rFlagsReg cr)
12555 %{
12556   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
12557   match(Set dummy (ClearArray (Binary cnt base) val));
12558   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
12559 
12560   format %{ $$template
12561     if (UseXMMForObjInit) {
12562        $$emit$$"movdq   $tmp, $val\n\t"
12563        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
12564        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
12565        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12566        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12567        $$emit$$"vmovdqu $tmp,(rax)\n\t"
12568        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
12569        $$emit$$"add     0x40,rax\n\t"
12570        $$emit$$"# L_zero_64_bytes:\n\t"
12571        $$emit$$"sub     0x8,rcx\n\t"
12572        $$emit$$"jge     L_loop\n\t"
12573        $$emit$$"add     0x4,rcx\n\t"
12574        $$emit$$"jl      L_tail\n\t"
12575        $$emit$$"vmovdqu $tmp,(rax)\n\t"
12576        $$emit$$"add     0x20,rax\n\t"
12577        $$emit$$"sub     0x4,rcx\n\t"
12578        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12579        $$emit$$"add     0x4,rcx\n\t"
12580        $$emit$$"jle     L_end\n\t"
12581        $$emit$$"dec     rcx\n\t"
12582        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12583        $$emit$$"vmovq   xmm0,(rax)\n\t"
12584        $$emit$$"add     0x8,rax\n\t"
12585        $$emit$$"dec     rcx\n\t"
12586        $$emit$$"jge     L_sloop\n\t"
12587        $$emit$$"# L_end:\n\t"
12588     } else {
12589        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
12590     }
12591   %}
12592   ins_encode %{
12593     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12594                  $tmp$$XMMRegister, true, true);
12595   %}
12596   ins_pipe(pipe_slow);
12597 %}
12598 
12599 // Large non-constant length ClearArray for AVX512 targets.
12600 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
12601                              Universe dummy, rFlagsReg cr)
12602 %{
12603   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
12604   match(Set dummy (ClearArray (Binary cnt base) val));
12605   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
12606 
12607   format %{ $$template
12608     if (UseFastStosb) {
12609        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12610        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
12611        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
12612     } else if (UseXMMForObjInit) {
12613        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
12614        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
12615        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12616        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12617        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12618        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
12619        $$emit$$"add     0x40,rax\n\t"
12620        $$emit$$"# L_zero_64_bytes:\n\t"
12621        $$emit$$"sub     0x8,rcx\n\t"
12622        $$emit$$"jge     L_loop\n\t"
12623        $$emit$$"add     0x4,rcx\n\t"
12624        $$emit$$"jl      L_tail\n\t"
12625        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12626        $$emit$$"add     0x20,rax\n\t"
12627        $$emit$$"sub     0x4,rcx\n\t"
12628        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12629        $$emit$$"add     0x4,rcx\n\t"
12630        $$emit$$"jle     L_end\n\t"
12631        $$emit$$"dec     rcx\n\t"
12632        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12633        $$emit$$"vmovq   xmm0,(rax)\n\t"
12634        $$emit$$"add     0x8,rax\n\t"
12635        $$emit$$"dec     rcx\n\t"
12636        $$emit$$"jge     L_sloop\n\t"
12637        $$emit$$"# L_end:\n\t"
12638     } else {
12639        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12640        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
12641     }
12642   %}
12643   ins_encode %{
12644     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12645                  $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
12646   %}
12647   ins_pipe(pipe_slow);
12648 %}
12649 
12650 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
12651                                        Universe dummy, rFlagsReg cr)

12652 %{
12653   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
12654   match(Set dummy (ClearArray (Binary cnt base) val));
12655   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
12656 
12657   format %{ $$template
12658     if (UseFastStosb) {
12659        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12660        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
12661        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
12662     } else if (UseXMMForObjInit) {
12663        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
12664        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
12665        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12666        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12667        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12668        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
12669        $$emit$$"add     0x40,rax\n\t"
12670        $$emit$$"# L_zero_64_bytes:\n\t"
12671        $$emit$$"sub     0x8,rcx\n\t"
12672        $$emit$$"jge     L_loop\n\t"
12673        $$emit$$"add     0x4,rcx\n\t"
12674        $$emit$$"jl      L_tail\n\t"
12675        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12676        $$emit$$"add     0x20,rax\n\t"
12677        $$emit$$"sub     0x4,rcx\n\t"
12678        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12679        $$emit$$"add     0x4,rcx\n\t"
12680        $$emit$$"jle     L_end\n\t"
12681        $$emit$$"dec     rcx\n\t"
12682        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12683        $$emit$$"vmovq   xmm0,(rax)\n\t"
12684        $$emit$$"add     0x8,rax\n\t"
12685        $$emit$$"dec     rcx\n\t"
12686        $$emit$$"jge     L_sloop\n\t"
12687        $$emit$$"# L_end:\n\t"
12688     } else {
12689        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12690        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
12691     }
12692   %}
12693   ins_encode %{
12694     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12695                  $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
12696   %}
12697   ins_pipe(pipe_slow);
12698 %}
12699 
12700 // Small constant length ClearArray for AVX512 targets.
12701 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
12702 %{
12703   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
12704             ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
12705   match(Set dummy (ClearArray (Binary cnt base) val));
12706   ins_cost(100);
12707   effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
12708   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
12709   ins_encode %{
12710     __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
12711   %}
12712   ins_pipe(pipe_slow);
12713 %}
12714 
12715 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
12716                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
12717 %{
12718   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
12719   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12720   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
12721 
12722   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
12723   ins_encode %{
12724     __ string_compare($str1$$Register, $str2$$Register,
12725                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
12726                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
12727   %}
12728   ins_pipe( pipe_slow );
12729 %}
12730 

14563 
14564   ins_cost(300);
14565   format %{ "call_leaf,runtime " %}
14566   ins_encode(clear_avx, Java_To_Runtime(meth));
14567   ins_pipe(pipe_slow);
14568 %}
14569 
14570 // Call runtime without safepoint and with vector arguments
14571 instruct CallLeafDirectVector(method meth)
14572 %{
14573   match(CallLeafVector);
14574   effect(USE meth);
14575 
14576   ins_cost(300);
14577   format %{ "call_leaf,vector " %}
14578   ins_encode(Java_To_Runtime(meth));
14579   ins_pipe(pipe_slow);
14580 %}
14581 
14582 // Call runtime without safepoint
14583 // entry point is null, target holds the address to call
14584 instruct CallLeafNoFPInDirect(rRegP target)
14585 %{
14586   predicate(n->as_Call()->entry_point() == nullptr);
14587   match(CallLeafNoFP target);
14588 
14589   ins_cost(300);
14590   format %{ "call_leaf_nofp,runtime indirect " %}
14591   ins_encode %{
14592      __ call($target$$Register);
14593   %}
14594 
14595   ins_pipe(pipe_slow);
14596 %}
14597 
14598 instruct CallLeafNoFPDirect(method meth)
14599 %{
14600   predicate(n->as_Call()->entry_point() != nullptr);
14601   match(CallLeafNoFP);
14602   effect(USE meth);
14603 
14604   ins_cost(300);
14605   format %{ "call_leaf_nofp,runtime " %}
14606   ins_encode(clear_avx, Java_To_Runtime(meth));
14607   ins_pipe(pipe_slow);
14608 %}
14609 
14610 // Return Instruction
14611 // Remove the return address & jump to it.
14612 // Notice: We always emit a nop after a ret to make sure there is room
14613 // for safepoint patching
14614 instruct Ret()
14615 %{
14616   match(Return);
14617 
14618   format %{ "ret" %}
14619   ins_encode %{
14620     __ ret(0);
< prev index next >