< prev index next >

src/hotspot/cpu/x86/x86_64.ad

Print this page

  598 }
  599 
  600 // !!!!! Special hack to get all types of calls to specify the byte offset
  601 //       from the start of the call to the point where the return address
  602 //       will point.
  603 int MachCallStaticJavaNode::ret_addr_offset()
  604 {
  605   int offset = 5; // 5 bytes from start of call to where return address points
  606   offset += clear_avx_size();
  607   return offset;
  608 }
  609 
  610 int MachCallDynamicJavaNode::ret_addr_offset()
  611 {
  612   int offset = 15; // 15 bytes from start of call to where return address points
  613   offset += clear_avx_size();
  614   return offset;
  615 }
  616 
  617 int MachCallRuntimeNode::ret_addr_offset() {




  618   int offset = 13; // movq r10,#addr; callq (r10)
  619   if (this->ideal_Opcode() != Op_CallLeafVector) {
  620     offset += clear_avx_size();
  621   }
  622   return offset;
  623 }

  624 //
  625 // Compute padding required for nodes which need alignment
  626 //
  627 
  628 // The address of the call instruction needs to be 4-byte aligned to
  629 // ensure that it does not span a cache line so that it can be patched.
  630 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
  631 {
  632   current_offset += clear_avx_size(); // skip vzeroupper
  633   current_offset += 1; // skip call opcode byte
  634   return align_up(current_offset, alignment_required()) - current_offset;
  635 }
  636 
  637 // The address of the call instruction needs to be 4-byte aligned to
  638 // ensure that it does not span a cache line so that it can be patched.
  639 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
  640 {
  641   current_offset += clear_avx_size(); // skip vzeroupper
  642   current_offset += 11; // skip movq instruction + call opcode byte
  643   return align_up(current_offset, alignment_required()) - current_offset;

  829     st->print("\n\t");
  830     st->print("# stack alignment check");
  831 #endif
  832   }
  833   if (C->stub_function() != nullptr) {
  834     st->print("\n\t");
  835     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
  836     st->print("\n\t");
  837     st->print("je      fast_entry\t");
  838     st->print("\n\t");
  839     st->print("call    #nmethod_entry_barrier_stub\t");
  840     st->print("\n\tfast_entry:");
  841   }
  842   st->cr();
  843 }
  844 #endif
  845 
  846 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
  847   Compile* C = ra_->C;
  848 
  849   int framesize = C->output()->frame_size_in_bytes();
  850   int bangsize = C->output()->bang_size_in_bytes();
  851 
  852   if (C->clinit_barrier_on_entry()) {
  853     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
  854     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
  855 
  856     Label L_skip_barrier;
  857     Register klass = rscratch1;
  858 
  859     __ mov_metadata(klass, C->method()->holder()->constant_encoding());
  860     __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
  861 
  862     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
  863 
  864     __ bind(L_skip_barrier);

  865   }
  866 
  867   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);


  868 
  869   C->output()->set_frame_complete(__ offset());
  870 
  871   if (C->has_mach_constant_base_node()) {
  872     // NOTE: We set the table base offset here because users might be
  873     // emitted before MachConstantBaseNode.
  874     ConstantTable& constant_table = C->output()->constant_table();
  875     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  876   }
  877 }
  878 
  879 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
  880 {
  881   return MachNode::size(ra_); // too many variables; just compute it
  882                               // the hard way
  883 }
  884 
  885 int MachPrologNode::reloc() const
  886 {
  887   return 0; // a large enough number
  888 }
  889 
  890 //=============================================================================
  891 #ifndef PRODUCT
  892 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
  893 {
  894   Compile* C = ra_->C;
  895   if (generate_vzeroupper(C)) {
  896     st->print("vzeroupper");
  897     st->cr(); st->print("\t");
  898   }
  899 
  900   int framesize = C->output()->frame_size_in_bytes();
  901   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  902   // Remove word for return adr already pushed
  903   // and RBP
  904   framesize -= 2*wordSize;

  911   st->print_cr("popq    rbp");
  912   if (do_polling() && C->is_method_compilation()) {
  913     st->print("\t");
  914     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
  915                  "ja      #safepoint_stub\t"
  916                  "# Safepoint: poll for GC");
  917   }
  918 }
  919 #endif
  920 
  921 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
  922 {
  923   Compile* C = ra_->C;
  924 
  925   if (generate_vzeroupper(C)) {
  926     // Clear upper bits of YMM registers when current compiled code uses
  927     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  928     __ vzeroupper();
  929   }
  930 
  931   int framesize = C->output()->frame_size_in_bytes();
  932   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  933   // Remove word for return adr already pushed
  934   // and RBP
  935   framesize -= 2*wordSize;
  936 
  937   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
  938 
  939   if (framesize) {
  940     __ addq(rsp, framesize);
  941   }
  942 
  943   __ popq(rbp);
  944 
  945   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
  946     __ reserved_stack_check();
  947   }
  948 
  949   if (do_polling() && C->is_method_compilation()) {
  950     Label dummy_label;
  951     Label* code_stub = &dummy_label;
  952     if (!C->output()->in_scratch_emit_size()) {
  953       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
  954       C->output()->add_stub(stub);
  955       code_stub = &stub->entry();
  956     }
  957     __ relocate(relocInfo::poll_return_type);
  958     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
  959   }
  960 }
  961 
  962 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
  963 {
  964   return MachNode::size(ra_); // too many variables; just compute it
  965                               // the hard way
  966 }
  967 
  968 int MachEpilogNode::reloc() const
  969 {
  970   return 2; // a large enough number
  971 }
  972 
  973 const Pipeline* MachEpilogNode::pipeline() const
  974 {
  975   return MachNode::pipeline_class();
  976 }
  977 
  978 //=============================================================================
  979 
  980 enum RC {
  981   rc_bad,
  982   rc_int,
  983   rc_kreg,
  984   rc_float,
  985   rc_stack
  986 };
  987 

 1549 #endif
 1550 
 1551 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1552 {
 1553   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1554   int reg = ra_->get_encode(this);
 1555 
 1556   __ lea(as_Register(reg), Address(rsp, offset));
 1557 }
 1558 
 1559 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 1560 {
 1561   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1562   if (ra_->get_encode(this) > 15) {
 1563     return (offset < 0x80) ? 6 : 9; // REX2
 1564   } else {
 1565     return (offset < 0x80) ? 5 : 8; // REX
 1566   }
 1567 }
 1568 











































 1569 //=============================================================================
 1570 #ifndef PRODUCT
 1571 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1572 {
 1573   if (UseCompressedClassPointers) {
 1574     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 1575     st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 1576   } else {
 1577     st->print_cr("movq    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 1578     st->print_cr("\tcmpq    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 1579   }
 1580   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 1581 }
 1582 #endif
 1583 
 1584 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1585 {
 1586   __ ic_check(InteriorEntryAlignment);
 1587 }
 1588 
 1589 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 1590 {
 1591   return MachNode::size(ra_); // too many variables; just compute it
 1592                               // the hard way
 1593 }
 1594 
 1595 
 1596 //=============================================================================
 1597 
 1598 bool Matcher::supports_vector_calling_convention(void) {
 1599   return EnableVectorSupport;
 1600 }
 1601 
 1602 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1603   assert(EnableVectorSupport, "sanity");
 1604   int lo = XMM0_num;
 1605   int hi = XMM0b_num;
 1606   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 1607   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 1608   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 1609   return OptoRegPair(hi, lo);
 1610 }
 1611 
 1612 // Is this branch offset short enough that a short branch can be used?
 1613 //
 1614 // NOTE: If the platform does not provide any short branch variants, then
 1615 //       this method should return false for offset 0.

 3045   %}
 3046 %}
 3047 
 3048 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 3049 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 3050 %{
 3051   constraint(ALLOC_IN_RC(ptr_reg));
 3052   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 3053   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 3054 
 3055   op_cost(10);
 3056   format %{"[$reg + $off + $idx << $scale]" %}
 3057   interface(MEMORY_INTER) %{
 3058     base($reg);
 3059     index($idx);
 3060     scale($scale);
 3061     disp($off);
 3062   %}
 3063 %}
 3064 
















 3065 // Indirect Narrow Oop Plus Offset Operand
 3066 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 3067 // we can't free r12 even with CompressedOops::base() == nullptr.
 3068 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 3069   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 3070   constraint(ALLOC_IN_RC(ptr_reg));
 3071   match(AddP (DecodeN reg) off);
 3072 
 3073   op_cost(10);
 3074   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 3075   interface(MEMORY_INTER) %{
 3076     base(0xc); // R12
 3077     index($reg);
 3078     scale(0x3);
 3079     disp($off);
 3080   %}
 3081 %}
 3082 
 3083 // Indirect Memory Operand
 3084 operand indirectNarrow(rRegN reg)

 3391     equal(0x4, "e");
 3392     not_equal(0x5, "ne");
 3393     less(0x2, "b");
 3394     greater_equal(0x3, "ae");
 3395     less_equal(0x6, "be");
 3396     greater(0x7, "a");
 3397     overflow(0x0, "o");
 3398     no_overflow(0x1, "no");
 3399   %}
 3400 %}
 3401 
 3402 //----------OPERAND CLASSES----------------------------------------------------
 3403 // Operand Classes are groups of operands that are used as to simplify
 3404 // instruction definitions by not requiring the AD writer to specify separate
 3405 // instructions for every form of operand when the instruction accepts
 3406 // multiple operand types with the same basic encoding and format.  The classic
 3407 // case of this is memory operands.
 3408 
 3409 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 3410                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 3411                indCompressedOopOffset,
 3412                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 3413                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 3414                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 3415 
 3416 //----------PIPELINE-----------------------------------------------------------
 3417 // Rules which define the behavior of the target architectures pipeline.
 3418 pipeline %{
 3419 
 3420 //----------ATTRIBUTES---------------------------------------------------------
 3421 attributes %{
 3422   variable_size_instructions;        // Fixed size instructions
 3423   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 3424   instruction_unit_size = 1;         // An instruction is 1 bytes long
 3425   instruction_fetch_unit_size = 16;  // The processor fetches one line
 3426   instruction_fetch_units = 1;       // of 16 bytes
 3427 %}
 3428 
 3429 //----------RESOURCES----------------------------------------------------------
 3430 // Resources are the functional units available to the machine
 3431 

 5973   format %{ "MEMBAR-storestore (empty encoding)" %}
 5974   ins_encode( );
 5975   ins_pipe(empty);
 5976 %}
 5977 
 5978 //----------Move Instructions--------------------------------------------------
 5979 
 5980 instruct castX2P(rRegP dst, rRegL src)
 5981 %{
 5982   match(Set dst (CastX2P src));
 5983 
 5984   format %{ "movq    $dst, $src\t# long->ptr" %}
 5985   ins_encode %{
 5986     if ($dst$$reg != $src$$reg) {
 5987       __ movptr($dst$$Register, $src$$Register);
 5988     }
 5989   %}
 5990   ins_pipe(ialu_reg_reg); // XXX
 5991 %}
 5992 


























 5993 instruct castP2X(rRegL dst, rRegP src)
 5994 %{
 5995   match(Set dst (CastP2X src));
 5996 
 5997   format %{ "movq    $dst, $src\t# ptr -> long" %}
 5998   ins_encode %{
 5999     if ($dst$$reg != $src$$reg) {
 6000       __ movptr($dst$$Register, $src$$Register);
 6001     }
 6002   %}
 6003   ins_pipe(ialu_reg_reg); // XXX
 6004 %}
 6005 
 6006 // Convert oop into int for vectors alignment masking
 6007 instruct convP2I(rRegI dst, rRegP src)
 6008 %{
 6009   match(Set dst (ConvL2I (CastP2X src)));
 6010 
 6011   format %{ "movl    $dst, $src\t# ptr -> int" %}
 6012   ins_encode %{

12196   effect(DEF dst, USE src);
12197   ins_cost(100);
12198   format %{ "movd    $dst,$src\t# MoveI2F" %}
12199   ins_encode %{
12200     __ movdl($dst$$XMMRegister, $src$$Register);
12201   %}
12202   ins_pipe( pipe_slow );
12203 %}
12204 
12205 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
12206   match(Set dst (MoveL2D src));
12207   effect(DEF dst, USE src);
12208   ins_cost(100);
12209   format %{ "movd    $dst,$src\t# MoveL2D" %}
12210   ins_encode %{
12211      __ movdq($dst$$XMMRegister, $src$$Register);
12212   %}
12213   ins_pipe( pipe_slow );
12214 %}
12215 

12216 // Fast clearing of an array
12217 // Small non-constant lenght ClearArray for non-AVX512 targets.
12218 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
12219                   Universe dummy, rFlagsReg cr)
12220 %{
12221   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
12222   match(Set dummy (ClearArray cnt base));
12223   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);





















































































































12224 
12225   format %{ $$template
12226     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12227     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
12228     $$emit$$"jg      LARGE\n\t"
12229     $$emit$$"dec     rcx\n\t"
12230     $$emit$$"js      DONE\t# Zero length\n\t"
12231     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
12232     $$emit$$"dec     rcx\n\t"
12233     $$emit$$"jge     LOOP\n\t"
12234     $$emit$$"jmp     DONE\n\t"
12235     $$emit$$"# LARGE:\n\t"
12236     if (UseFastStosb) {
12237        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
12238        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
12239     } else if (UseXMMForObjInit) {
12240        $$emit$$"mov     rdi,rax\n\t"
12241        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
12242        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12243        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

12251        $$emit$$"jl      L_tail\n\t"
12252        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12253        $$emit$$"add     0x20,rax\n\t"
12254        $$emit$$"sub     0x4,rcx\n\t"
12255        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12256        $$emit$$"add     0x4,rcx\n\t"
12257        $$emit$$"jle     L_end\n\t"
12258        $$emit$$"dec     rcx\n\t"
12259        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12260        $$emit$$"vmovq   xmm0,(rax)\n\t"
12261        $$emit$$"add     0x8,rax\n\t"
12262        $$emit$$"dec     rcx\n\t"
12263        $$emit$$"jge     L_sloop\n\t"
12264        $$emit$$"# L_end:\n\t"
12265     } else {
12266        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
12267     }
12268     $$emit$$"# DONE"
12269   %}
12270   ins_encode %{
12271     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
12272                  $tmp$$XMMRegister, false, knoreg);
12273   %}
12274   ins_pipe(pipe_slow);
12275 %}
12276 
12277 // Small non-constant length ClearArray for AVX512 targets.
12278 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
12279                        Universe dummy, rFlagsReg cr)
12280 %{
12281   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
12282   match(Set dummy (ClearArray cnt base));
12283   ins_cost(125);
12284   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
12285 
12286   format %{ $$template
12287     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12288     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
12289     $$emit$$"jg      LARGE\n\t"
12290     $$emit$$"dec     rcx\n\t"
12291     $$emit$$"js      DONE\t# Zero length\n\t"
12292     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
12293     $$emit$$"dec     rcx\n\t"
12294     $$emit$$"jge     LOOP\n\t"
12295     $$emit$$"jmp     DONE\n\t"
12296     $$emit$$"# LARGE:\n\t"
12297     if (UseFastStosb) {
12298        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
12299        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
12300     } else if (UseXMMForObjInit) {
12301        $$emit$$"mov     rdi,rax\n\t"
12302        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
12303        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12304        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

12312        $$emit$$"jl      L_tail\n\t"
12313        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12314        $$emit$$"add     0x20,rax\n\t"
12315        $$emit$$"sub     0x4,rcx\n\t"
12316        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12317        $$emit$$"add     0x4,rcx\n\t"
12318        $$emit$$"jle     L_end\n\t"
12319        $$emit$$"dec     rcx\n\t"
12320        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12321        $$emit$$"vmovq   xmm0,(rax)\n\t"
12322        $$emit$$"add     0x8,rax\n\t"
12323        $$emit$$"dec     rcx\n\t"
12324        $$emit$$"jge     L_sloop\n\t"
12325        $$emit$$"# L_end:\n\t"
12326     } else {
12327        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
12328     }
12329     $$emit$$"# DONE"
12330   %}
12331   ins_encode %{
12332     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
12333                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
12334   %}
12335   ins_pipe(pipe_slow);
12336 %}
12337 
12338 // Large non-constant length ClearArray for non-AVX512 targets.
12339 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
12340                         Universe dummy, rFlagsReg cr)
12341 %{
12342   predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
12343   match(Set dummy (ClearArray cnt base));
12344   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
































































































12345 
12346   format %{ $$template
12347     if (UseFastStosb) {
12348        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12349        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
12350        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
12351     } else if (UseXMMForObjInit) {
12352        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
12353        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
12354        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12355        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12356        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12357        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
12358        $$emit$$"add     0x40,rax\n\t"
12359        $$emit$$"# L_zero_64_bytes:\n\t"
12360        $$emit$$"sub     0x8,rcx\n\t"
12361        $$emit$$"jge     L_loop\n\t"
12362        $$emit$$"add     0x4,rcx\n\t"
12363        $$emit$$"jl      L_tail\n\t"
12364        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12365        $$emit$$"add     0x20,rax\n\t"
12366        $$emit$$"sub     0x4,rcx\n\t"
12367        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12368        $$emit$$"add     0x4,rcx\n\t"
12369        $$emit$$"jle     L_end\n\t"
12370        $$emit$$"dec     rcx\n\t"
12371        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12372        $$emit$$"vmovq   xmm0,(rax)\n\t"
12373        $$emit$$"add     0x8,rax\n\t"
12374        $$emit$$"dec     rcx\n\t"
12375        $$emit$$"jge     L_sloop\n\t"
12376        $$emit$$"# L_end:\n\t"
12377     } else {
12378        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12379        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
12380     }
12381   %}
12382   ins_encode %{
12383     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
12384                  $tmp$$XMMRegister, true, knoreg);
12385   %}
12386   ins_pipe(pipe_slow);
12387 %}
12388 
12389 // Large non-constant length ClearArray for AVX512 targets.
12390 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
12391                              Universe dummy, rFlagsReg cr)
12392 %{
12393   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
12394   match(Set dummy (ClearArray cnt base));
12395   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
12396 
12397   format %{ $$template
12398     if (UseFastStosb) {
12399        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12400        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
12401        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
12402     } else if (UseXMMForObjInit) {
12403        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
12404        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
12405        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12406        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12407        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12408        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
12409        $$emit$$"add     0x40,rax\n\t"
12410        $$emit$$"# L_zero_64_bytes:\n\t"
12411        $$emit$$"sub     0x8,rcx\n\t"
12412        $$emit$$"jge     L_loop\n\t"
12413        $$emit$$"add     0x4,rcx\n\t"
12414        $$emit$$"jl      L_tail\n\t"
12415        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12416        $$emit$$"add     0x20,rax\n\t"
12417        $$emit$$"sub     0x4,rcx\n\t"
12418        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12419        $$emit$$"add     0x4,rcx\n\t"
12420        $$emit$$"jle     L_end\n\t"
12421        $$emit$$"dec     rcx\n\t"
12422        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12423        $$emit$$"vmovq   xmm0,(rax)\n\t"
12424        $$emit$$"add     0x8,rax\n\t"
12425        $$emit$$"dec     rcx\n\t"
12426        $$emit$$"jge     L_sloop\n\t"
12427        $$emit$$"# L_end:\n\t"
12428     } else {
12429        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12430        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
12431     }
12432   %}
12433   ins_encode %{
12434     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
12435                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
12436   %}
12437   ins_pipe(pipe_slow);
12438 %}
12439 
12440 // Small constant length ClearArray for AVX512 targets.
12441 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
12442 %{
12443   predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
12444   match(Set dummy (ClearArray cnt base));

12445   ins_cost(100);
12446   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
12447   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
12448   ins_encode %{
12449    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
12450   %}
12451   ins_pipe(pipe_slow);
12452 %}
12453 
12454 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
12455                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
12456 %{
12457   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
12458   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12459   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
12460 
12461   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
12462   ins_encode %{
12463     __ string_compare($str1$$Register, $str2$$Register,
12464                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
12465                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
12466   %}
12467   ins_pipe( pipe_slow );
12468 %}
12469 

14275 
14276   ins_cost(300);
14277   format %{ "call_leaf,runtime " %}
14278   ins_encode(clear_avx, Java_To_Runtime(meth));
14279   ins_pipe(pipe_slow);
14280 %}
14281 
14282 // Call runtime without safepoint and with vector arguments
14283 instruct CallLeafDirectVector(method meth)
14284 %{
14285   match(CallLeafVector);
14286   effect(USE meth);
14287 
14288   ins_cost(300);
14289   format %{ "call_leaf,vector " %}
14290   ins_encode(Java_To_Runtime(meth));
14291   ins_pipe(pipe_slow);
14292 %}
14293 
14294 // Call runtime without safepoint















14295 instruct CallLeafNoFPDirect(method meth)
14296 %{

14297   match(CallLeafNoFP);
14298   effect(USE meth);
14299 
14300   ins_cost(300);
14301   format %{ "call_leaf_nofp,runtime " %}
14302   ins_encode(clear_avx, Java_To_Runtime(meth));
14303   ins_pipe(pipe_slow);
14304 %}
14305 
14306 // Return Instruction
14307 // Remove the return address & jump to it.
14308 // Notice: We always emit a nop after a ret to make sure there is room
14309 // for safepoint patching
14310 instruct Ret()
14311 %{
14312   match(Return);
14313 
14314   format %{ "ret" %}
14315   ins_encode %{
14316     __ ret(0);

  598 }
  599 
  600 // !!!!! Special hack to get all types of calls to specify the byte offset
  601 //       from the start of the call to the point where the return address
  602 //       will point.
  603 int MachCallStaticJavaNode::ret_addr_offset()
  604 {
  605   int offset = 5; // 5 bytes from start of call to where return address points
  606   offset += clear_avx_size();
  607   return offset;
  608 }
  609 
  610 int MachCallDynamicJavaNode::ret_addr_offset()
  611 {
  612   int offset = 15; // 15 bytes from start of call to where return address points
  613   offset += clear_avx_size();
  614   return offset;
  615 }
  616 
  617 int MachCallRuntimeNode::ret_addr_offset() {
  618   if (_entry_point == nullptr) {
  619     // CallLeafNoFPInDirect
  620     return 3; // callq (register)
  621   }
  622   int offset = 13; // movq r10,#addr; callq (r10)
  623   if (this->ideal_Opcode() != Op_CallLeafVector) {
  624     offset += clear_avx_size();
  625   }
  626   return offset;
  627 }
  628 
  629 //
  630 // Compute padding required for nodes which need alignment
  631 //
  632 
  633 // The address of the call instruction needs to be 4-byte aligned to
  634 // ensure that it does not span a cache line so that it can be patched.
  635 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
  636 {
  637   current_offset += clear_avx_size(); // skip vzeroupper
  638   current_offset += 1; // skip call opcode byte
  639   return align_up(current_offset, alignment_required()) - current_offset;
  640 }
  641 
  642 // The address of the call instruction needs to be 4-byte aligned to
  643 // ensure that it does not span a cache line so that it can be patched.
  644 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
  645 {
  646   current_offset += clear_avx_size(); // skip vzeroupper
  647   current_offset += 11; // skip movq instruction + call opcode byte
  648   return align_up(current_offset, alignment_required()) - current_offset;

  834     st->print("\n\t");
  835     st->print("# stack alignment check");
  836 #endif
  837   }
  838   if (C->stub_function() != nullptr) {
  839     st->print("\n\t");
  840     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
  841     st->print("\n\t");
  842     st->print("je      fast_entry\t");
  843     st->print("\n\t");
  844     st->print("call    #nmethod_entry_barrier_stub\t");
  845     st->print("\n\tfast_entry:");
  846   }
  847   st->cr();
  848 }
  849 #endif
  850 
  851 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
  852   Compile* C = ra_->C;
  853 
  854   __ verified_entry(C);













  855 
  856   if (ra_->C->stub_function() == nullptr) {
  857     __ entry_barrier();
  858   }
  859 
  860   if (!Compile::current()->output()->in_scratch_emit_size()) {
  861     __ bind(*_verified_entry);
  862   }
  863 
  864   C->output()->set_frame_complete(__ offset());
  865 
  866   if (C->has_mach_constant_base_node()) {
  867     // NOTE: We set the table base offset here because users might be
  868     // emitted before MachConstantBaseNode.
  869     ConstantTable& constant_table = C->output()->constant_table();
  870     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  871   }
  872 }
  873 






  874 int MachPrologNode::reloc() const
  875 {
  876   return 0; // a large enough number
  877 }
  878 
  879 //=============================================================================
  880 #ifndef PRODUCT
  881 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
  882 {
  883   Compile* C = ra_->C;
  884   if (generate_vzeroupper(C)) {
  885     st->print("vzeroupper");
  886     st->cr(); st->print("\t");
  887   }
  888 
  889   int framesize = C->output()->frame_size_in_bytes();
  890   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  891   // Remove word for return adr already pushed
  892   // and RBP
  893   framesize -= 2*wordSize;

  900   st->print_cr("popq    rbp");
  901   if (do_polling() && C->is_method_compilation()) {
  902     st->print("\t");
  903     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
  904                  "ja      #safepoint_stub\t"
  905                  "# Safepoint: poll for GC");
  906   }
  907 }
  908 #endif
  909 
  910 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
  911 {
  912   Compile* C = ra_->C;
  913 
  914   if (generate_vzeroupper(C)) {
  915     // Clear upper bits of YMM registers when current compiled code uses
  916     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  917     __ vzeroupper();
  918   }
  919 
  920   // Subtract two words to account for return address and rbp
  921   int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
  922   __ remove_frame(initial_framesize, C->needs_stack_repair());










  923 
  924   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
  925     __ reserved_stack_check();
  926   }
  927 
  928   if (do_polling() && C->is_method_compilation()) {
  929     Label dummy_label;
  930     Label* code_stub = &dummy_label;
  931     if (!C->output()->in_scratch_emit_size()) {
  932       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
  933       C->output()->add_stub(stub);
  934       code_stub = &stub->entry();
  935     }
  936     __ relocate(relocInfo::poll_return_type);
  937     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
  938   }
  939 }
  940 






  941 int MachEpilogNode::reloc() const
  942 {
  943   return 2; // a large enough number
  944 }
  945 
  946 const Pipeline* MachEpilogNode::pipeline() const
  947 {
  948   return MachNode::pipeline_class();
  949 }
  950 
  951 //=============================================================================
  952 
  953 enum RC {
  954   rc_bad,
  955   rc_int,
  956   rc_kreg,
  957   rc_float,
  958   rc_stack
  959 };
  960 

 1522 #endif
 1523 
 1524 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1525 {
 1526   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1527   int reg = ra_->get_encode(this);
 1528 
 1529   __ lea(as_Register(reg), Address(rsp, offset));
 1530 }
 1531 
 1532 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 1533 {
 1534   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1535   if (ra_->get_encode(this) > 15) {
 1536     return (offset < 0x80) ? 6 : 9; // REX2
 1537   } else {
 1538     return (offset < 0x80) ? 5 : 8; // REX
 1539   }
 1540 }
 1541 
 1542 //=============================================================================
 1543 #ifndef PRODUCT
 1544 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1545 {
 1546   st->print_cr("MachVEPNode");
 1547 }
 1548 #endif
 1549 
 1550 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1551 {
 1552   CodeBuffer* cbuf = masm->code();
 1553   uint insts_size = cbuf->insts_size();
 1554   if (!_verified) {
 1555     __ ic_check(1);
 1556   } else {
 1557     // TODO 8284443 Avoid creation of temporary frame
 1558     if (ra_->C->stub_function() == nullptr) {
 1559       __ verified_entry(ra_->C, 0);
 1560       __ entry_barrier();
 1561       int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
 1562       __ remove_frame(initial_framesize, false);
 1563     }
 1564     // Unpack inline type args passed as oop and then jump to
 1565     // the verified entry point (skipping the unverified entry).
 1566     int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
 1567     // Emit code for verified entry and save increment for stack repair on return
 1568     __ verified_entry(ra_->C, sp_inc);
 1569     if (Compile::current()->output()->in_scratch_emit_size()) {
 1570       Label dummy_verified_entry;
 1571       __ jmp(dummy_verified_entry);
 1572     } else {
 1573       __ jmp(*_verified_entry);
 1574     }
 1575   }
 1576   /* WARNING these NOPs are critical so that verified entry point is properly
 1577      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
 1578   int nops_cnt = 4 - ((cbuf->insts_size() - insts_size) & 0x3);
 1579   nops_cnt &= 0x3; // Do not add nops if code is aligned.
 1580   if (nops_cnt > 0) {
 1581     __ nop(nops_cnt);
 1582   }
 1583 }
 1584 
 1585 //=============================================================================
 1586 #ifndef PRODUCT
 1587 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1588 {
 1589   if (UseCompressedClassPointers) {
 1590     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 1591     st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 1592   } else {
 1593     st->print_cr("movq    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 1594     st->print_cr("\tcmpq    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 1595   }
 1596   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 1597 }
 1598 #endif
 1599 
 1600 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1601 {
 1602   __ ic_check(InteriorEntryAlignment);
 1603 }
 1604 







 1605 //=============================================================================
 1606 
 1607 bool Matcher::supports_vector_calling_convention(void) {
 1608   return EnableVectorSupport;
 1609 }
 1610 
 1611 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1612   assert(EnableVectorSupport, "sanity");
 1613   int lo = XMM0_num;
 1614   int hi = XMM0b_num;
 1615   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 1616   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 1617   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 1618   return OptoRegPair(hi, lo);
 1619 }
 1620 
 1621 // Is this branch offset short enough that a short branch can be used?
 1622 //
 1623 // NOTE: If the platform does not provide any short branch variants, then
 1624 //       this method should return false for offset 0.

 3054   %}
 3055 %}
 3056 
 3057 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 3058 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 3059 %{
 3060   constraint(ALLOC_IN_RC(ptr_reg));
 3061   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 3062   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 3063 
 3064   op_cost(10);
 3065   format %{"[$reg + $off + $idx << $scale]" %}
 3066   interface(MEMORY_INTER) %{
 3067     base($reg);
 3068     index($idx);
 3069     scale($scale);
 3070     disp($off);
 3071   %}
 3072 %}
 3073 
 3074 // Indirect Narrow Oop Operand
 3075 operand indCompressedOop(rRegN reg) %{
 3076   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 3077   constraint(ALLOC_IN_RC(ptr_reg));
 3078   match(DecodeN reg);
 3079 
 3080   op_cost(10);
 3081   format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
 3082   interface(MEMORY_INTER) %{
 3083     base(0xc); // R12
 3084     index($reg);
 3085     scale(0x3);
 3086     disp(0x0);
 3087   %}
 3088 %}
 3089 
 3090 // Indirect Narrow Oop Plus Offset Operand
 3091 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 3092 // we can't free r12 even with CompressedOops::base() == nullptr.
 3093 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 3094   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 3095   constraint(ALLOC_IN_RC(ptr_reg));
 3096   match(AddP (DecodeN reg) off);
 3097 
 3098   op_cost(10);
 3099   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 3100   interface(MEMORY_INTER) %{
 3101     base(0xc); // R12
 3102     index($reg);
 3103     scale(0x3);
 3104     disp($off);
 3105   %}
 3106 %}
 3107 
 3108 // Indirect Memory Operand
 3109 operand indirectNarrow(rRegN reg)

 3416     equal(0x4, "e");
 3417     not_equal(0x5, "ne");
 3418     less(0x2, "b");
 3419     greater_equal(0x3, "ae");
 3420     less_equal(0x6, "be");
 3421     greater(0x7, "a");
 3422     overflow(0x0, "o");
 3423     no_overflow(0x1, "no");
 3424   %}
 3425 %}
 3426 
 3427 //----------OPERAND CLASSES----------------------------------------------------
 3428 // Operand Classes are groups of operands that are used as to simplify
 3429 // instruction definitions by not requiring the AD writer to specify separate
 3430 // instructions for every form of operand when the instruction accepts
 3431 // multiple operand types with the same basic encoding and format.  The classic
 3432 // case of this is memory operands.
 3433 
 3434 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 3435                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 3436                indCompressedOop, indCompressedOopOffset,
 3437                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 3438                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 3439                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 3440 
 3441 //----------PIPELINE-----------------------------------------------------------
 3442 // Rules which define the behavior of the target architectures pipeline.
 3443 pipeline %{
 3444 
 3445 //----------ATTRIBUTES---------------------------------------------------------
 3446 attributes %{
 3447   variable_size_instructions;        // Fixed size instructions
 3448   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 3449   instruction_unit_size = 1;         // An instruction is 1 bytes long
 3450   instruction_fetch_unit_size = 16;  // The processor fetches one line
 3451   instruction_fetch_units = 1;       // of 16 bytes
 3452 %}
 3453 
 3454 //----------RESOURCES----------------------------------------------------------
 3455 // Resources are the functional units available to the machine
 3456 

 5998   format %{ "MEMBAR-storestore (empty encoding)" %}
 5999   ins_encode( );
 6000   ins_pipe(empty);
 6001 %}
 6002 
 6003 //----------Move Instructions--------------------------------------------------
 6004 
 6005 instruct castX2P(rRegP dst, rRegL src)
 6006 %{
 6007   match(Set dst (CastX2P src));
 6008 
 6009   format %{ "movq    $dst, $src\t# long->ptr" %}
 6010   ins_encode %{
 6011     if ($dst$$reg != $src$$reg) {
 6012       __ movptr($dst$$Register, $src$$Register);
 6013     }
 6014   %}
 6015   ins_pipe(ialu_reg_reg); // XXX
 6016 %}
 6017 
 6018 instruct castI2N(rRegN dst, rRegI src)
 6019 %{
 6020   match(Set dst (CastI2N src));
 6021 
 6022   format %{ "movq    $dst, $src\t# int -> narrow ptr" %}
 6023   ins_encode %{
 6024     if ($dst$$reg != $src$$reg) {
 6025       __ movl($dst$$Register, $src$$Register);
 6026     }
 6027   %}
 6028   ins_pipe(ialu_reg_reg); // XXX
 6029 %}
 6030 
 6031 instruct castN2X(rRegL dst, rRegN src)
 6032 %{
 6033   match(Set dst (CastP2X src));
 6034 
 6035   format %{ "movq    $dst, $src\t# ptr -> long" %}
 6036   ins_encode %{
 6037     if ($dst$$reg != $src$$reg) {
 6038       __ movptr($dst$$Register, $src$$Register);
 6039     }
 6040   %}
 6041   ins_pipe(ialu_reg_reg); // XXX
 6042 %}
 6043 
 6044 instruct castP2X(rRegL dst, rRegP src)
 6045 %{
 6046   match(Set dst (CastP2X src));
 6047 
 6048   format %{ "movq    $dst, $src\t# ptr -> long" %}
 6049   ins_encode %{
 6050     if ($dst$$reg != $src$$reg) {
 6051       __ movptr($dst$$Register, $src$$Register);
 6052     }
 6053   %}
 6054   ins_pipe(ialu_reg_reg); // XXX
 6055 %}
 6056 
 6057 // Convert oop into int for vectors alignment masking
 6058 instruct convP2I(rRegI dst, rRegP src)
 6059 %{
 6060   match(Set dst (ConvL2I (CastP2X src)));
 6061 
 6062   format %{ "movl    $dst, $src\t# ptr -> int" %}
 6063   ins_encode %{

12247   effect(DEF dst, USE src);
12248   ins_cost(100);
12249   format %{ "movd    $dst,$src\t# MoveI2F" %}
12250   ins_encode %{
12251     __ movdl($dst$$XMMRegister, $src$$Register);
12252   %}
12253   ins_pipe( pipe_slow );
12254 %}
12255 
12256 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
12257   match(Set dst (MoveL2D src));
12258   effect(DEF dst, USE src);
12259   ins_cost(100);
12260   format %{ "movd    $dst,$src\t# MoveL2D" %}
12261   ins_encode %{
12262      __ movdq($dst$$XMMRegister, $src$$Register);
12263   %}
12264   ins_pipe( pipe_slow );
12265 %}
12266 
12267 
12268 // Fast clearing of an array
12269 // Small non-constant lenght ClearArray for non-AVX512 targets.
12270 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
12271                   Universe dummy, rFlagsReg cr)
12272 %{
12273   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
12274   match(Set dummy (ClearArray (Binary cnt base) val));
12275   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
12276 
12277   format %{ $$template
12278     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
12279     $$emit$$"jg      LARGE\n\t"
12280     $$emit$$"dec     rcx\n\t"
12281     $$emit$$"js      DONE\t# Zero length\n\t"
12282     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
12283     $$emit$$"dec     rcx\n\t"
12284     $$emit$$"jge     LOOP\n\t"
12285     $$emit$$"jmp     DONE\n\t"
12286     $$emit$$"# LARGE:\n\t"
12287     if (UseFastStosb) {
12288        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
12289        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
12290     } else if (UseXMMForObjInit) {
12291        $$emit$$"movdq   $tmp, $val\n\t"
12292        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
12293        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
12294        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12295        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12296        $$emit$$"vmovdqu $tmp,(rax)\n\t"
12297        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
12298        $$emit$$"add     0x40,rax\n\t"
12299        $$emit$$"# L_zero_64_bytes:\n\t"
12300        $$emit$$"sub     0x8,rcx\n\t"
12301        $$emit$$"jge     L_loop\n\t"
12302        $$emit$$"add     0x4,rcx\n\t"
12303        $$emit$$"jl      L_tail\n\t"
12304        $$emit$$"vmovdqu $tmp,(rax)\n\t"
12305        $$emit$$"add     0x20,rax\n\t"
12306        $$emit$$"sub     0x4,rcx\n\t"
12307        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12308        $$emit$$"add     0x4,rcx\n\t"
12309        $$emit$$"jle     L_end\n\t"
12310        $$emit$$"dec     rcx\n\t"
12311        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12312        $$emit$$"vmovq   xmm0,(rax)\n\t"
12313        $$emit$$"add     0x8,rax\n\t"
12314        $$emit$$"dec     rcx\n\t"
12315        $$emit$$"jge     L_sloop\n\t"
12316        $$emit$$"# L_end:\n\t"
12317     } else {
12318        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
12319     }
12320     $$emit$$"# DONE"
12321   %}
12322   ins_encode %{
12323     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12324                  $tmp$$XMMRegister, false, false);
12325   %}
12326   ins_pipe(pipe_slow);
12327 %}
12328 
12329 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
12330                             Universe dummy, rFlagsReg cr)
12331 %{
12332   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
12333   match(Set dummy (ClearArray (Binary cnt base) val));
12334   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
12335 
12336   format %{ $$template
12337     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
12338     $$emit$$"jg      LARGE\n\t"
12339     $$emit$$"dec     rcx\n\t"
12340     $$emit$$"js      DONE\t# Zero length\n\t"
12341     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
12342     $$emit$$"dec     rcx\n\t"
12343     $$emit$$"jge     LOOP\n\t"
12344     $$emit$$"jmp     DONE\n\t"
12345     $$emit$$"# LARGE:\n\t"
12346     if (UseXMMForObjInit) {
12347        $$emit$$"movdq   $tmp, $val\n\t"
12348        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
12349        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
12350        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12351        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12352        $$emit$$"vmovdqu $tmp,(rax)\n\t"
12353        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
12354        $$emit$$"add     0x40,rax\n\t"
12355        $$emit$$"# L_zero_64_bytes:\n\t"
12356        $$emit$$"sub     0x8,rcx\n\t"
12357        $$emit$$"jge     L_loop\n\t"
12358        $$emit$$"add     0x4,rcx\n\t"
12359        $$emit$$"jl      L_tail\n\t"
12360        $$emit$$"vmovdqu $tmp,(rax)\n\t"
12361        $$emit$$"add     0x20,rax\n\t"
12362        $$emit$$"sub     0x4,rcx\n\t"
12363        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12364        $$emit$$"add     0x4,rcx\n\t"
12365        $$emit$$"jle     L_end\n\t"
12366        $$emit$$"dec     rcx\n\t"
12367        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12368        $$emit$$"vmovq   xmm0,(rax)\n\t"
12369        $$emit$$"add     0x8,rax\n\t"
12370        $$emit$$"dec     rcx\n\t"
12371        $$emit$$"jge     L_sloop\n\t"
12372        $$emit$$"# L_end:\n\t"
12373     } else {
12374        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
12375     }
12376     $$emit$$"# DONE"
12377   %}
12378   ins_encode %{
12379     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12380                  $tmp$$XMMRegister, false, true);
12381   %}
12382   ins_pipe(pipe_slow);
12383 %}
12384 
12385 // Small non-constant length ClearArray for AVX512 targets.
12386 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
12387                        Universe dummy, rFlagsReg cr)
12388 %{
12389   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
12390   match(Set dummy (ClearArray (Binary cnt base) val));
12391   ins_cost(125);
12392   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
12393 
12394   format %{ $$template
12395     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12396     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
12397     $$emit$$"jg      LARGE\n\t"
12398     $$emit$$"dec     rcx\n\t"
12399     $$emit$$"js      DONE\t# Zero length\n\t"
12400     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
12401     $$emit$$"dec     rcx\n\t"
12402     $$emit$$"jge     LOOP\n\t"
12403     $$emit$$"jmp     DONE\n\t"
12404     $$emit$$"# LARGE:\n\t"
12405     if (UseFastStosb) {
12406        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
12407        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
12408     } else if (UseXMMForObjInit) {
12409        $$emit$$"mov     rdi,rax\n\t"
12410        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
12411        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12412        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

12420        $$emit$$"jl      L_tail\n\t"
12421        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12422        $$emit$$"add     0x20,rax\n\t"
12423        $$emit$$"sub     0x4,rcx\n\t"
12424        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12425        $$emit$$"add     0x4,rcx\n\t"
12426        $$emit$$"jle     L_end\n\t"
12427        $$emit$$"dec     rcx\n\t"
12428        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12429        $$emit$$"vmovq   xmm0,(rax)\n\t"
12430        $$emit$$"add     0x8,rax\n\t"
12431        $$emit$$"dec     rcx\n\t"
12432        $$emit$$"jge     L_sloop\n\t"
12433        $$emit$$"# L_end:\n\t"
12434     } else {
12435        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
12436     }
12437     $$emit$$"# DONE"
12438   %}
12439   ins_encode %{
12440     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12441                  $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
12442   %}
12443   ins_pipe(pipe_slow);
12444 %}
12445 
12446 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
12447                                  Universe dummy, rFlagsReg cr)

12448 %{
12449   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
12450   match(Set dummy (ClearArray (Binary cnt base) val));
12451   ins_cost(125);
12452   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
12453 
12454   format %{ $$template
12455     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12456     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
12457     $$emit$$"jg      LARGE\n\t"
12458     $$emit$$"dec     rcx\n\t"
12459     $$emit$$"js      DONE\t# Zero length\n\t"
12460     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
12461     $$emit$$"dec     rcx\n\t"
12462     $$emit$$"jge     LOOP\n\t"
12463     $$emit$$"jmp     DONE\n\t"
12464     $$emit$$"# LARGE:\n\t"
12465     if (UseFastStosb) {
12466        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
12467        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
12468     } else if (UseXMMForObjInit) {
12469        $$emit$$"mov     rdi,rax\n\t"
12470        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
12471        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12472        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

12480        $$emit$$"jl      L_tail\n\t"
12481        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12482        $$emit$$"add     0x20,rax\n\t"
12483        $$emit$$"sub     0x4,rcx\n\t"
12484        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12485        $$emit$$"add     0x4,rcx\n\t"
12486        $$emit$$"jle     L_end\n\t"
12487        $$emit$$"dec     rcx\n\t"
12488        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12489        $$emit$$"vmovq   xmm0,(rax)\n\t"
12490        $$emit$$"add     0x8,rax\n\t"
12491        $$emit$$"dec     rcx\n\t"
12492        $$emit$$"jge     L_sloop\n\t"
12493        $$emit$$"# L_end:\n\t"
12494     } else {
12495        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
12496     }
12497     $$emit$$"# DONE"
12498   %}
12499   ins_encode %{
12500     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12501                  $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
12502   %}
12503   ins_pipe(pipe_slow);
12504 %}
12505 
12506 // Large non-constant length ClearArray for non-AVX512 targets.
12507 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
12508                         Universe dummy, rFlagsReg cr)
12509 %{
12510   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
12511   match(Set dummy (ClearArray (Binary cnt base) val));
12512   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
12513 
12514   format %{ $$template
12515     if (UseFastStosb) {
12516        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
12517        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
12518     } else if (UseXMMForObjInit) {
12519        $$emit$$"movdq   $tmp, $val\n\t"
12520        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
12521        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
12522        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12523        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12524        $$emit$$"vmovdqu $tmp,(rax)\n\t"
12525        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
12526        $$emit$$"add     0x40,rax\n\t"
12527        $$emit$$"# L_zero_64_bytes:\n\t"
12528        $$emit$$"sub     0x8,rcx\n\t"
12529        $$emit$$"jge     L_loop\n\t"
12530        $$emit$$"add     0x4,rcx\n\t"
12531        $$emit$$"jl      L_tail\n\t"
12532        $$emit$$"vmovdqu $tmp,(rax)\n\t"
12533        $$emit$$"add     0x20,rax\n\t"
12534        $$emit$$"sub     0x4,rcx\n\t"
12535        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12536        $$emit$$"add     0x4,rcx\n\t"
12537        $$emit$$"jle     L_end\n\t"
12538        $$emit$$"dec     rcx\n\t"
12539        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12540        $$emit$$"vmovq   xmm0,(rax)\n\t"
12541        $$emit$$"add     0x8,rax\n\t"
12542        $$emit$$"dec     rcx\n\t"
12543        $$emit$$"jge     L_sloop\n\t"
12544        $$emit$$"# L_end:\n\t"
12545     } else {
12546        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
12547     }
12548   %}
12549   ins_encode %{
12550     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12551                  $tmp$$XMMRegister, true, false);
12552   %}
12553   ins_pipe(pipe_slow);
12554 %}
12555 
12556 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
12557                                   Universe dummy, rFlagsReg cr)
12558 %{
12559   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
12560   match(Set dummy (ClearArray (Binary cnt base) val));
12561   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
12562 
12563   format %{ $$template
12564     if (UseXMMForObjInit) {
12565        $$emit$$"movdq   $tmp, $val\n\t"
12566        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
12567        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
12568        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12569        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12570        $$emit$$"vmovdqu $tmp,(rax)\n\t"
12571        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
12572        $$emit$$"add     0x40,rax\n\t"
12573        $$emit$$"# L_zero_64_bytes:\n\t"
12574        $$emit$$"sub     0x8,rcx\n\t"
12575        $$emit$$"jge     L_loop\n\t"
12576        $$emit$$"add     0x4,rcx\n\t"
12577        $$emit$$"jl      L_tail\n\t"
12578        $$emit$$"vmovdqu $tmp,(rax)\n\t"
12579        $$emit$$"add     0x20,rax\n\t"
12580        $$emit$$"sub     0x4,rcx\n\t"
12581        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12582        $$emit$$"add     0x4,rcx\n\t"
12583        $$emit$$"jle     L_end\n\t"
12584        $$emit$$"dec     rcx\n\t"
12585        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12586        $$emit$$"vmovq   xmm0,(rax)\n\t"
12587        $$emit$$"add     0x8,rax\n\t"
12588        $$emit$$"dec     rcx\n\t"
12589        $$emit$$"jge     L_sloop\n\t"
12590        $$emit$$"# L_end:\n\t"
12591     } else {
12592        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
12593     }
12594   %}
12595   ins_encode %{
12596     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12597                  $tmp$$XMMRegister, true, true);
12598   %}
12599   ins_pipe(pipe_slow);
12600 %}
12601 
12602 // Large non-constant length ClearArray for AVX512 targets.
12603 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
12604                              Universe dummy, rFlagsReg cr)
12605 %{
12606   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
12607   match(Set dummy (ClearArray (Binary cnt base) val));
12608   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
12609 
12610   format %{ $$template
12611     if (UseFastStosb) {
12612        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12613        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
12614        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
12615     } else if (UseXMMForObjInit) {
12616        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
12617        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
12618        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12619        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12620        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12621        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
12622        $$emit$$"add     0x40,rax\n\t"
12623        $$emit$$"# L_zero_64_bytes:\n\t"
12624        $$emit$$"sub     0x8,rcx\n\t"
12625        $$emit$$"jge     L_loop\n\t"
12626        $$emit$$"add     0x4,rcx\n\t"
12627        $$emit$$"jl      L_tail\n\t"
12628        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12629        $$emit$$"add     0x20,rax\n\t"
12630        $$emit$$"sub     0x4,rcx\n\t"
12631        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12632        $$emit$$"add     0x4,rcx\n\t"
12633        $$emit$$"jle     L_end\n\t"
12634        $$emit$$"dec     rcx\n\t"
12635        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12636        $$emit$$"vmovq   xmm0,(rax)\n\t"
12637        $$emit$$"add     0x8,rax\n\t"
12638        $$emit$$"dec     rcx\n\t"
12639        $$emit$$"jge     L_sloop\n\t"
12640        $$emit$$"# L_end:\n\t"
12641     } else {
12642        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12643        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
12644     }
12645   %}
12646   ins_encode %{
12647     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12648                  $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
12649   %}
12650   ins_pipe(pipe_slow);
12651 %}
12652 
12653 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
12654                                        Universe dummy, rFlagsReg cr)

12655 %{
12656   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
12657   match(Set dummy (ClearArray (Binary cnt base) val));
12658   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
12659 
12660   format %{ $$template
12661     if (UseFastStosb) {
12662        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12663        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
12664        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
12665     } else if (UseXMMForObjInit) {
12666        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
12667        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
12668        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12669        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12670        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12671        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
12672        $$emit$$"add     0x40,rax\n\t"
12673        $$emit$$"# L_zero_64_bytes:\n\t"
12674        $$emit$$"sub     0x8,rcx\n\t"
12675        $$emit$$"jge     L_loop\n\t"
12676        $$emit$$"add     0x4,rcx\n\t"
12677        $$emit$$"jl      L_tail\n\t"
12678        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12679        $$emit$$"add     0x20,rax\n\t"
12680        $$emit$$"sub     0x4,rcx\n\t"
12681        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12682        $$emit$$"add     0x4,rcx\n\t"
12683        $$emit$$"jle     L_end\n\t"
12684        $$emit$$"dec     rcx\n\t"
12685        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12686        $$emit$$"vmovq   xmm0,(rax)\n\t"
12687        $$emit$$"add     0x8,rax\n\t"
12688        $$emit$$"dec     rcx\n\t"
12689        $$emit$$"jge     L_sloop\n\t"
12690        $$emit$$"# L_end:\n\t"
12691     } else {
12692        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12693        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
12694     }
12695   %}
12696   ins_encode %{
12697     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12698                  $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
12699   %}
12700   ins_pipe(pipe_slow);
12701 %}
12702 
12703 // Small constant length ClearArray for AVX512 targets.
12704 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
12705 %{
12706   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
12707             ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
12708   match(Set dummy (ClearArray (Binary cnt base) val));
12709   ins_cost(100);
12710   effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
12711   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
12712   ins_encode %{
12713     __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
12714   %}
12715   ins_pipe(pipe_slow);
12716 %}
12717 
12718 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
12719                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
12720 %{
12721   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
12722   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12723   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
12724 
12725   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
12726   ins_encode %{
12727     __ string_compare($str1$$Register, $str2$$Register,
12728                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
12729                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
12730   %}
12731   ins_pipe( pipe_slow );
12732 %}
12733 

14539 
14540   ins_cost(300);
14541   format %{ "call_leaf,runtime " %}
14542   ins_encode(clear_avx, Java_To_Runtime(meth));
14543   ins_pipe(pipe_slow);
14544 %}
14545 
14546 // Call runtime without safepoint and with vector arguments
14547 instruct CallLeafDirectVector(method meth)
14548 %{
14549   match(CallLeafVector);
14550   effect(USE meth);
14551 
14552   ins_cost(300);
14553   format %{ "call_leaf,vector " %}
14554   ins_encode(Java_To_Runtime(meth));
14555   ins_pipe(pipe_slow);
14556 %}
14557 
14558 // Call runtime without safepoint
14559 // entry point is null, target holds the address to call
14560 instruct CallLeafNoFPInDirect(rRegP target)
14561 %{
14562   predicate(n->as_Call()->entry_point() == nullptr);
14563   match(CallLeafNoFP target);
14564 
14565   ins_cost(300);
14566   format %{ "call_leaf_nofp,runtime indirect " %}
14567   ins_encode %{
14568      __ call($target$$Register);
14569   %}
14570 
14571   ins_pipe(pipe_slow);
14572 %}
14573 
14574 instruct CallLeafNoFPDirect(method meth)
14575 %{
14576   predicate(n->as_Call()->entry_point() != nullptr);
14577   match(CallLeafNoFP);
14578   effect(USE meth);
14579 
14580   ins_cost(300);
14581   format %{ "call_leaf_nofp,runtime " %}
14582   ins_encode(clear_avx, Java_To_Runtime(meth));
14583   ins_pipe(pipe_slow);
14584 %}
14585 
14586 // Return Instruction
14587 // Remove the return address & jump to it.
14588 // Notice: We always emit a nop after a ret to make sure there is room
14589 // for safepoint patching
14590 instruct Ret()
14591 %{
14592   match(Return);
14593 
14594   format %{ "ret" %}
14595   ins_encode %{
14596     __ ret(0);
< prev index next >