< prev index next >

src/hotspot/cpu/x86/x86_64.ad

Print this page

  598 }
  599 
  600 // !!!!! Special hack to get all types of calls to specify the byte offset
  601 //       from the start of the call to the point where the return address
  602 //       will point.
  603 int MachCallStaticJavaNode::ret_addr_offset()
  604 {
  605   int offset = 5; // 5 bytes from start of call to where return address points
  606   offset += clear_avx_size();
  607   return offset;
  608 }
  609 
  610 int MachCallDynamicJavaNode::ret_addr_offset()
  611 {
  612   int offset = 15; // 15 bytes from start of call to where return address points
  613   offset += clear_avx_size();
  614   return offset;
  615 }
  616 
  617 int MachCallRuntimeNode::ret_addr_offset() {




  618   int offset = 13; // movq r10,#addr; callq (r10)
  619   if (this->ideal_Opcode() != Op_CallLeafVector) {
  620     offset += clear_avx_size();
  621   }
  622   return offset;
  623 }

  624 //
  625 // Compute padding required for nodes which need alignment
  626 //
  627 
  628 // The address of the call instruction needs to be 4-byte aligned to
  629 // ensure that it does not span a cache line so that it can be patched.
  630 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
  631 {
  632   current_offset += clear_avx_size(); // skip vzeroupper
  633   current_offset += 1; // skip call opcode byte
  634   return align_up(current_offset, alignment_required()) - current_offset;
  635 }
  636 
  637 // The address of the call instruction needs to be 4-byte aligned to
  638 // ensure that it does not span a cache line so that it can be patched.
  639 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
  640 {
  641   current_offset += clear_avx_size(); // skip vzeroupper
  642   current_offset += 11; // skip movq instruction + call opcode byte
  643   return align_up(current_offset, alignment_required()) - current_offset;

  829     st->print("\n\t");
  830     st->print("# stack alignment check");
  831 #endif
  832   }
  833   if (C->stub_function() != nullptr) {
  834     st->print("\n\t");
  835     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
  836     st->print("\n\t");
  837     st->print("je      fast_entry\t");
  838     st->print("\n\t");
  839     st->print("call    #nmethod_entry_barrier_stub\t");
  840     st->print("\n\tfast_entry:");
  841   }
  842   st->cr();
  843 }
  844 #endif
  845 
  846 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
  847   Compile* C = ra_->C;
  848 
  849   int framesize = C->output()->frame_size_in_bytes();
  850   int bangsize = C->output()->bang_size_in_bytes();
  851 
  852   if (C->clinit_barrier_on_entry()) {
  853     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
  854     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
  855 
  856     Label L_skip_barrier;
  857     Register klass = rscratch1;
  858 
  859     __ mov_metadata(klass, C->method()->holder()->constant_encoding());
  860     __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
  861 
  862     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
  863 
  864     __ bind(L_skip_barrier);

  865   }
  866 
  867   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);


  868 
  869   C->output()->set_frame_complete(__ offset());
  870 
  871   if (C->has_mach_constant_base_node()) {
  872     // NOTE: We set the table base offset here because users might be
  873     // emitted before MachConstantBaseNode.
  874     ConstantTable& constant_table = C->output()->constant_table();
  875     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  876   }
  877 }
  878 
  879 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
  880 {
  881   return MachNode::size(ra_); // too many variables; just compute it
  882                               // the hard way
  883 }
  884 
  885 int MachPrologNode::reloc() const
  886 {
  887   return 0; // a large enough number
  888 }
  889 
  890 //=============================================================================
  891 #ifndef PRODUCT
  892 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
  893 {
  894   Compile* C = ra_->C;
  895   if (generate_vzeroupper(C)) {
  896     st->print("vzeroupper");
  897     st->cr(); st->print("\t");
  898   }
  899 
  900   int framesize = C->output()->frame_size_in_bytes();
  901   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  902   // Remove word for return adr already pushed
  903   // and RBP
  904   framesize -= 2*wordSize;

  911   st->print_cr("popq    rbp");
  912   if (do_polling() && C->is_method_compilation()) {
  913     st->print("\t");
  914     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
  915                  "ja      #safepoint_stub\t"
  916                  "# Safepoint: poll for GC");
  917   }
  918 }
  919 #endif
  920 
  921 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
  922 {
  923   Compile* C = ra_->C;
  924 
  925   if (generate_vzeroupper(C)) {
  926     // Clear upper bits of YMM registers when current compiled code uses
  927     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  928     __ vzeroupper();
  929   }
  930 
  931   int framesize = C->output()->frame_size_in_bytes();
  932   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  933   // Remove word for return adr already pushed
  934   // and RBP
  935   framesize -= 2*wordSize;
  936 
  937   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
  938 
  939   if (framesize) {
  940     __ addq(rsp, framesize);
  941   }
  942 
  943   __ popq(rbp);
  944 
  945   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
  946     __ reserved_stack_check();
  947   }
  948 
  949   if (do_polling() && C->is_method_compilation()) {
  950     Label dummy_label;
  951     Label* code_stub = &dummy_label;
  952     if (!C->output()->in_scratch_emit_size()) {
  953       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
  954       C->output()->add_stub(stub);
  955       code_stub = &stub->entry();
  956     }
  957     __ relocate(relocInfo::poll_return_type);
  958     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
  959   }
  960 }
  961 
  962 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
  963 {
  964   return MachNode::size(ra_); // too many variables; just compute it
  965                               // the hard way
  966 }
  967 
  968 int MachEpilogNode::reloc() const
  969 {
  970   return 2; // a large enough number
  971 }
  972 
  973 const Pipeline* MachEpilogNode::pipeline() const
  974 {
  975   return MachNode::pipeline_class();
  976 }
  977 
  978 //=============================================================================
  979 
  980 enum RC {
  981   rc_bad,
  982   rc_int,
  983   rc_kreg,
  984   rc_float,
  985   rc_stack
  986 };
  987 

 1549 #endif
 1550 
 1551 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1552 {
 1553   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1554   int reg = ra_->get_encode(this);
 1555 
 1556   __ lea(as_Register(reg), Address(rsp, offset));
 1557 }
 1558 
 1559 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 1560 {
 1561   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1562   if (ra_->get_encode(this) > 15) {
 1563     return (offset < 0x80) ? 6 : 9; // REX2
 1564   } else {
 1565     return (offset < 0x80) ? 5 : 8; // REX
 1566   }
 1567 }
 1568 











































 1569 //=============================================================================
 1570 #ifndef PRODUCT
 1571 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1572 {
 1573   if (UseCompressedClassPointers) {
 1574     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 1575     st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 1576   } else {
 1577     st->print_cr("movq    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 1578     st->print_cr("\tcmpq    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 1579   }
 1580   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 1581 }
 1582 #endif
 1583 
 1584 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1585 {
 1586   __ ic_check(InteriorEntryAlignment);
 1587 }
 1588 
 1589 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 1590 {
 1591   return MachNode::size(ra_); // too many variables; just compute it
 1592                               // the hard way
 1593 }
 1594 
 1595 
 1596 //=============================================================================
 1597 
 1598 bool Matcher::supports_vector_calling_convention(void) {
 1599   return EnableVectorSupport;
 1600 }
 1601 
 1602 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1603   assert(EnableVectorSupport, "sanity");
 1604   int lo = XMM0_num;
 1605   int hi = XMM0b_num;
 1606   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 1607   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 1608   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 1609   return OptoRegPair(hi, lo);
 1610 }
 1611 
 1612 // Is this branch offset short enough that a short branch can be used?
 1613 //
 1614 // NOTE: If the platform does not provide any short branch variants, then
 1615 //       this method should return false for offset 0.

 3047   %}
 3048 %}
 3049 
 3050 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 3051 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 3052 %{
 3053   constraint(ALLOC_IN_RC(ptr_reg));
 3054   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 3055   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 3056 
 3057   op_cost(10);
 3058   format %{"[$reg + $off + $idx << $scale]" %}
 3059   interface(MEMORY_INTER) %{
 3060     base($reg);
 3061     index($idx);
 3062     scale($scale);
 3063     disp($off);
 3064   %}
 3065 %}
 3066 
















 3067 // Indirect Narrow Oop Plus Offset Operand
 3068 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 3069 // we can't free r12 even with CompressedOops::base() == nullptr.
 3070 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 3071   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 3072   constraint(ALLOC_IN_RC(ptr_reg));
 3073   match(AddP (DecodeN reg) off);
 3074 
 3075   op_cost(10);
 3076   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 3077   interface(MEMORY_INTER) %{
 3078     base(0xc); // R12
 3079     index($reg);
 3080     scale(0x3);
 3081     disp($off);
 3082   %}
 3083 %}
 3084 
 3085 // Indirect Memory Operand
 3086 operand indirectNarrow(rRegN reg)

 3393     equal(0x4, "e");
 3394     not_equal(0x5, "ne");
 3395     less(0x2, "b");
 3396     greater_equal(0x3, "ae");
 3397     less_equal(0x6, "be");
 3398     greater(0x7, "a");
 3399     overflow(0x0, "o");
 3400     no_overflow(0x1, "no");
 3401   %}
 3402 %}
 3403 
 3404 //----------OPERAND CLASSES----------------------------------------------------
 3405 // Operand Classes are groups of operands that are used as to simplify
 3406 // instruction definitions by not requiring the AD writer to specify separate
 3407 // instructions for every form of operand when the instruction accepts
 3408 // multiple operand types with the same basic encoding and format.  The classic
 3409 // case of this is memory operands.
 3410 
 3411 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 3412                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 3413                indCompressedOopOffset,
 3414                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 3415                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 3416                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 3417 
 3418 //----------PIPELINE-----------------------------------------------------------
 3419 // Rules which define the behavior of the target architectures pipeline.
 3420 pipeline %{
 3421 
 3422 //----------ATTRIBUTES---------------------------------------------------------
 3423 attributes %{
 3424   variable_size_instructions;        // Fixed size instructions
 3425   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 3426   instruction_unit_size = 1;         // An instruction is 1 bytes long
 3427   instruction_fetch_unit_size = 16;  // The processor fetches one line
 3428   instruction_fetch_units = 1;       // of 16 bytes
 3429 
 3430   // List of nop instructions
 3431   nops( MachNop );
 3432 %}
 3433 

 5934   format %{ "MEMBAR-storestore (empty encoding)" %}
 5935   ins_encode( );
 5936   ins_pipe(empty);
 5937 %}
 5938 
 5939 //----------Move Instructions--------------------------------------------------
 5940 
 5941 instruct castX2P(rRegP dst, rRegL src)
 5942 %{
 5943   match(Set dst (CastX2P src));
 5944 
 5945   format %{ "movq    $dst, $src\t# long->ptr" %}
 5946   ins_encode %{
 5947     if ($dst$$reg != $src$$reg) {
 5948       __ movptr($dst$$Register, $src$$Register);
 5949     }
 5950   %}
 5951   ins_pipe(ialu_reg_reg); // XXX
 5952 %}
 5953 


























 5954 instruct castP2X(rRegL dst, rRegP src)
 5955 %{
 5956   match(Set dst (CastP2X src));
 5957 
 5958   format %{ "movq    $dst, $src\t# ptr -> long" %}
 5959   ins_encode %{
 5960     if ($dst$$reg != $src$$reg) {
 5961       __ movptr($dst$$Register, $src$$Register);
 5962     }
 5963   %}
 5964   ins_pipe(ialu_reg_reg); // XXX
 5965 %}
 5966 
 5967 // Convert oop into int for vectors alignment masking
 5968 instruct convP2I(rRegI dst, rRegP src)
 5969 %{
 5970   match(Set dst (ConvL2I (CastP2X src)));
 5971 
 5972   format %{ "movl    $dst, $src\t# ptr -> int" %}
 5973   ins_encode %{

12185   effect(DEF dst, USE src);
12186   ins_cost(100);
12187   format %{ "movd    $dst,$src\t# MoveI2F" %}
12188   ins_encode %{
12189     __ movdl($dst$$XMMRegister, $src$$Register);
12190   %}
12191   ins_pipe( pipe_slow );
12192 %}
12193 
12194 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
12195   match(Set dst (MoveL2D src));
12196   effect(DEF dst, USE src);
12197   ins_cost(100);
12198   format %{ "movd    $dst,$src\t# MoveL2D" %}
12199   ins_encode %{
12200      __ movdq($dst$$XMMRegister, $src$$Register);
12201   %}
12202   ins_pipe( pipe_slow );
12203 %}
12204 

12205 // Fast clearing of an array
12206 // Small non-constant lenght ClearArray for non-AVX512 targets.
12207 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
12208                   Universe dummy, rFlagsReg cr)
12209 %{
12210   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
12211   match(Set dummy (ClearArray cnt base));
12212   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);





















































































































12213 
12214   format %{ $$template
12215     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12216     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
12217     $$emit$$"jg      LARGE\n\t"
12218     $$emit$$"dec     rcx\n\t"
12219     $$emit$$"js      DONE\t# Zero length\n\t"
12220     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
12221     $$emit$$"dec     rcx\n\t"
12222     $$emit$$"jge     LOOP\n\t"
12223     $$emit$$"jmp     DONE\n\t"
12224     $$emit$$"# LARGE:\n\t"
12225     if (UseFastStosb) {
12226        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
12227        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
12228     } else if (UseXMMForObjInit) {
12229        $$emit$$"mov     rdi,rax\n\t"
12230        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
12231        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12232        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

12240        $$emit$$"jl      L_tail\n\t"
12241        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12242        $$emit$$"add     0x20,rax\n\t"
12243        $$emit$$"sub     0x4,rcx\n\t"
12244        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12245        $$emit$$"add     0x4,rcx\n\t"
12246        $$emit$$"jle     L_end\n\t"
12247        $$emit$$"dec     rcx\n\t"
12248        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12249        $$emit$$"vmovq   xmm0,(rax)\n\t"
12250        $$emit$$"add     0x8,rax\n\t"
12251        $$emit$$"dec     rcx\n\t"
12252        $$emit$$"jge     L_sloop\n\t"
12253        $$emit$$"# L_end:\n\t"
12254     } else {
12255        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
12256     }
12257     $$emit$$"# DONE"
12258   %}
12259   ins_encode %{
12260     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
12261                  $tmp$$XMMRegister, false, knoreg);
12262   %}
12263   ins_pipe(pipe_slow);
12264 %}
12265 
12266 // Small non-constant length ClearArray for AVX512 targets.
12267 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
12268                        Universe dummy, rFlagsReg cr)
12269 %{
12270   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
12271   match(Set dummy (ClearArray cnt base));
12272   ins_cost(125);
12273   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
12274 
12275   format %{ $$template
12276     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12277     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
12278     $$emit$$"jg      LARGE\n\t"
12279     $$emit$$"dec     rcx\n\t"
12280     $$emit$$"js      DONE\t# Zero length\n\t"
12281     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
12282     $$emit$$"dec     rcx\n\t"
12283     $$emit$$"jge     LOOP\n\t"
12284     $$emit$$"jmp     DONE\n\t"
12285     $$emit$$"# LARGE:\n\t"
12286     if (UseFastStosb) {
12287        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
12288        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
12289     } else if (UseXMMForObjInit) {
12290        $$emit$$"mov     rdi,rax\n\t"
12291        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
12292        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12293        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

12301        $$emit$$"jl      L_tail\n\t"
12302        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12303        $$emit$$"add     0x20,rax\n\t"
12304        $$emit$$"sub     0x4,rcx\n\t"
12305        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12306        $$emit$$"add     0x4,rcx\n\t"
12307        $$emit$$"jle     L_end\n\t"
12308        $$emit$$"dec     rcx\n\t"
12309        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12310        $$emit$$"vmovq   xmm0,(rax)\n\t"
12311        $$emit$$"add     0x8,rax\n\t"
12312        $$emit$$"dec     rcx\n\t"
12313        $$emit$$"jge     L_sloop\n\t"
12314        $$emit$$"# L_end:\n\t"
12315     } else {
12316        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
12317     }
12318     $$emit$$"# DONE"
12319   %}
12320   ins_encode %{
12321     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
12322                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
12323   %}
12324   ins_pipe(pipe_slow);
12325 %}
12326 
12327 // Large non-constant length ClearArray for non-AVX512 targets.
12328 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
12329                         Universe dummy, rFlagsReg cr)
12330 %{
12331   predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
12332   match(Set dummy (ClearArray cnt base));
12333   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
































































































12334 
12335   format %{ $$template
12336     if (UseFastStosb) {
12337        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12338        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
12339        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
12340     } else if (UseXMMForObjInit) {
12341        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
12342        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
12343        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12344        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12345        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12346        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
12347        $$emit$$"add     0x40,rax\n\t"
12348        $$emit$$"# L_zero_64_bytes:\n\t"
12349        $$emit$$"sub     0x8,rcx\n\t"
12350        $$emit$$"jge     L_loop\n\t"
12351        $$emit$$"add     0x4,rcx\n\t"
12352        $$emit$$"jl      L_tail\n\t"
12353        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12354        $$emit$$"add     0x20,rax\n\t"
12355        $$emit$$"sub     0x4,rcx\n\t"
12356        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12357        $$emit$$"add     0x4,rcx\n\t"
12358        $$emit$$"jle     L_end\n\t"
12359        $$emit$$"dec     rcx\n\t"
12360        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12361        $$emit$$"vmovq   xmm0,(rax)\n\t"
12362        $$emit$$"add     0x8,rax\n\t"
12363        $$emit$$"dec     rcx\n\t"
12364        $$emit$$"jge     L_sloop\n\t"
12365        $$emit$$"# L_end:\n\t"
12366     } else {
12367        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12368        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
12369     }
12370   %}
12371   ins_encode %{
12372     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
12373                  $tmp$$XMMRegister, true, knoreg);
12374   %}
12375   ins_pipe(pipe_slow);
12376 %}
12377 
12378 // Large non-constant length ClearArray for AVX512 targets.
12379 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
12380                              Universe dummy, rFlagsReg cr)
12381 %{
12382   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
12383   match(Set dummy (ClearArray cnt base));
12384   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
12385 
12386   format %{ $$template
12387     if (UseFastStosb) {
12388        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12389        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
12390        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
12391     } else if (UseXMMForObjInit) {
12392        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
12393        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
12394        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12395        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12396        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12397        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
12398        $$emit$$"add     0x40,rax\n\t"
12399        $$emit$$"# L_zero_64_bytes:\n\t"
12400        $$emit$$"sub     0x8,rcx\n\t"
12401        $$emit$$"jge     L_loop\n\t"
12402        $$emit$$"add     0x4,rcx\n\t"
12403        $$emit$$"jl      L_tail\n\t"
12404        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12405        $$emit$$"add     0x20,rax\n\t"
12406        $$emit$$"sub     0x4,rcx\n\t"
12407        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12408        $$emit$$"add     0x4,rcx\n\t"
12409        $$emit$$"jle     L_end\n\t"
12410        $$emit$$"dec     rcx\n\t"
12411        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12412        $$emit$$"vmovq   xmm0,(rax)\n\t"
12413        $$emit$$"add     0x8,rax\n\t"
12414        $$emit$$"dec     rcx\n\t"
12415        $$emit$$"jge     L_sloop\n\t"
12416        $$emit$$"# L_end:\n\t"
12417     } else {
12418        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12419        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
12420     }
12421   %}
12422   ins_encode %{
12423     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
12424                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
12425   %}
12426   ins_pipe(pipe_slow);
12427 %}
12428 
12429 // Small constant length ClearArray for AVX512 targets.
12430 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
12431 %{
12432   predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
12433   match(Set dummy (ClearArray cnt base));

12434   ins_cost(100);
12435   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
12436   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
12437   ins_encode %{
12438    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
12439   %}
12440   ins_pipe(pipe_slow);
12441 %}
12442 
12443 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
12444                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
12445 %{
12446   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
12447   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12448   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
12449 
12450   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
12451   ins_encode %{
12452     __ string_compare($str1$$Register, $str2$$Register,
12453                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
12454                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
12455   %}
12456   ins_pipe( pipe_slow );
12457 %}
12458 

14291 
14292   ins_cost(300);
14293   format %{ "call_leaf,runtime " %}
14294   ins_encode(clear_avx, Java_To_Runtime(meth));
14295   ins_pipe(pipe_slow);
14296 %}
14297 
14298 // Call runtime without safepoint and with vector arguments
14299 instruct CallLeafDirectVector(method meth)
14300 %{
14301   match(CallLeafVector);
14302   effect(USE meth);
14303 
14304   ins_cost(300);
14305   format %{ "call_leaf,vector " %}
14306   ins_encode(Java_To_Runtime(meth));
14307   ins_pipe(pipe_slow);
14308 %}
14309 
14310 // Call runtime without safepoint















14311 instruct CallLeafNoFPDirect(method meth)
14312 %{

14313   match(CallLeafNoFP);
14314   effect(USE meth);
14315 
14316   ins_cost(300);
14317   format %{ "call_leaf_nofp,runtime " %}
14318   ins_encode(clear_avx, Java_To_Runtime(meth));
14319   ins_pipe(pipe_slow);
14320 %}
14321 
14322 // Return Instruction
14323 // Remove the return address & jump to it.
14324 // Notice: We always emit a nop after a ret to make sure there is room
14325 // for safepoint patching
14326 instruct Ret()
14327 %{
14328   match(Return);
14329 
14330   format %{ "ret" %}
14331   ins_encode %{
14332     __ ret(0);

  598 }
  599 
  600 // !!!!! Special hack to get all types of calls to specify the byte offset
  601 //       from the start of the call to the point where the return address
  602 //       will point.
  603 int MachCallStaticJavaNode::ret_addr_offset()
  604 {
  605   int offset = 5; // 5 bytes from start of call to where return address points
  606   offset += clear_avx_size();
  607   return offset;
  608 }
  609 
  610 int MachCallDynamicJavaNode::ret_addr_offset()
  611 {
  612   int offset = 15; // 15 bytes from start of call to where return address points
  613   offset += clear_avx_size();
  614   return offset;
  615 }
  616 
  617 int MachCallRuntimeNode::ret_addr_offset() {
  618   if (_entry_point == nullptr) {
  619     // CallLeafNoFPInDirect
  620     return 3; // callq (register)
  621   }
  622   int offset = 13; // movq r10,#addr; callq (r10)
  623   if (this->ideal_Opcode() != Op_CallLeafVector) {
  624     offset += clear_avx_size();
  625   }
  626   return offset;
  627 }
  628 
  629 //
  630 // Compute padding required for nodes which need alignment
  631 //
  632 
  633 // The address of the call instruction needs to be 4-byte aligned to
  634 // ensure that it does not span a cache line so that it can be patched.
  635 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
  636 {
  637   current_offset += clear_avx_size(); // skip vzeroupper
  638   current_offset += 1; // skip call opcode byte
  639   return align_up(current_offset, alignment_required()) - current_offset;
  640 }
  641 
  642 // The address of the call instruction needs to be 4-byte aligned to
  643 // ensure that it does not span a cache line so that it can be patched.
  644 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
  645 {
  646   current_offset += clear_avx_size(); // skip vzeroupper
  647   current_offset += 11; // skip movq instruction + call opcode byte
  648   return align_up(current_offset, alignment_required()) - current_offset;

  834     st->print("\n\t");
  835     st->print("# stack alignment check");
  836 #endif
  837   }
  838   if (C->stub_function() != nullptr) {
  839     st->print("\n\t");
  840     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
  841     st->print("\n\t");
  842     st->print("je      fast_entry\t");
  843     st->print("\n\t");
  844     st->print("call    #nmethod_entry_barrier_stub\t");
  845     st->print("\n\tfast_entry:");
  846   }
  847   st->cr();
  848 }
  849 #endif
  850 
  851 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
  852   Compile* C = ra_->C;
  853 
  854   __ verified_entry(C);













  855 
  856   if (ra_->C->stub_function() == nullptr) {
  857     __ entry_barrier();
  858   }
  859 
  860   if (!Compile::current()->output()->in_scratch_emit_size()) {
  861     __ bind(*_verified_entry);
  862   }
  863 
  864   C->output()->set_frame_complete(__ offset());
  865 
  866   if (C->has_mach_constant_base_node()) {
  867     // NOTE: We set the table base offset here because users might be
  868     // emitted before MachConstantBaseNode.
  869     ConstantTable& constant_table = C->output()->constant_table();
  870     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  871   }
  872 }
  873 






  874 int MachPrologNode::reloc() const
  875 {
  876   return 0; // a large enough number
  877 }
  878 
  879 //=============================================================================
  880 #ifndef PRODUCT
  881 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
  882 {
  883   Compile* C = ra_->C;
  884   if (generate_vzeroupper(C)) {
  885     st->print("vzeroupper");
  886     st->cr(); st->print("\t");
  887   }
  888 
  889   int framesize = C->output()->frame_size_in_bytes();
  890   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  891   // Remove word for return adr already pushed
  892   // and RBP
  893   framesize -= 2*wordSize;

  900   st->print_cr("popq    rbp");
  901   if (do_polling() && C->is_method_compilation()) {
  902     st->print("\t");
  903     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
  904                  "ja      #safepoint_stub\t"
  905                  "# Safepoint: poll for GC");
  906   }
  907 }
  908 #endif
  909 
  910 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
  911 {
  912   Compile* C = ra_->C;
  913 
  914   if (generate_vzeroupper(C)) {
  915     // Clear upper bits of YMM registers when current compiled code uses
  916     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  917     __ vzeroupper();
  918   }
  919 
  920   // Subtract two words to account for return address and rbp
  921   int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
  922   __ remove_frame(initial_framesize, C->needs_stack_repair());










  923 
  924   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
  925     __ reserved_stack_check();
  926   }
  927 
  928   if (do_polling() && C->is_method_compilation()) {
  929     Label dummy_label;
  930     Label* code_stub = &dummy_label;
  931     if (!C->output()->in_scratch_emit_size()) {
  932       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
  933       C->output()->add_stub(stub);
  934       code_stub = &stub->entry();
  935     }
  936     __ relocate(relocInfo::poll_return_type);
  937     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
  938   }
  939 }
  940 






  941 int MachEpilogNode::reloc() const
  942 {
  943   return 2; // a large enough number
  944 }
  945 
  946 const Pipeline* MachEpilogNode::pipeline() const
  947 {
  948   return MachNode::pipeline_class();
  949 }
  950 
  951 //=============================================================================
  952 
  953 enum RC {
  954   rc_bad,
  955   rc_int,
  956   rc_kreg,
  957   rc_float,
  958   rc_stack
  959 };
  960 

 1522 #endif
 1523 
 1524 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1525 {
 1526   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1527   int reg = ra_->get_encode(this);
 1528 
 1529   __ lea(as_Register(reg), Address(rsp, offset));
 1530 }
 1531 
 1532 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 1533 {
 1534   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1535   if (ra_->get_encode(this) > 15) {
 1536     return (offset < 0x80) ? 6 : 9; // REX2
 1537   } else {
 1538     return (offset < 0x80) ? 5 : 8; // REX
 1539   }
 1540 }
 1541 
 1542 //=============================================================================
 1543 #ifndef PRODUCT
 1544 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1545 {
 1546   st->print_cr("MachVEPNode");
 1547 }
 1548 #endif
 1549 
 1550 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1551 {
 1552   CodeBuffer* cbuf = masm->code();
 1553   uint insts_size = cbuf->insts_size();
 1554   if (!_verified) {
 1555     __ ic_check(1);
 1556   } else {
 1557     // TODO 8284443 Avoid creation of temporary frame
 1558     if (ra_->C->stub_function() == nullptr) {
 1559       __ verified_entry(ra_->C, 0);
 1560       __ entry_barrier();
 1561       int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
 1562       __ remove_frame(initial_framesize, false);
 1563     }
 1564     // Unpack inline type args passed as oop and then jump to
 1565     // the verified entry point (skipping the unverified entry).
 1566     int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
 1567     // Emit code for verified entry and save increment for stack repair on return
 1568     __ verified_entry(ra_->C, sp_inc);
 1569     if (Compile::current()->output()->in_scratch_emit_size()) {
 1570       Label dummy_verified_entry;
 1571       __ jmp(dummy_verified_entry);
 1572     } else {
 1573       __ jmp(*_verified_entry);
 1574     }
 1575   }
 1576   /* WARNING these NOPs are critical so that verified entry point is properly
 1577      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
 1578   int nops_cnt = 4 - ((cbuf->insts_size() - insts_size) & 0x3);
 1579   nops_cnt &= 0x3; // Do not add nops if code is aligned.
 1580   if (nops_cnt > 0) {
 1581     __ nop(nops_cnt);
 1582   }
 1583 }
 1584 
 1585 //=============================================================================
 1586 #ifndef PRODUCT
 1587 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1588 {
 1589   if (UseCompressedClassPointers) {
 1590     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 1591     st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 1592   } else {
 1593     st->print_cr("movq    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 1594     st->print_cr("\tcmpq    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 1595   }
 1596   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 1597 }
 1598 #endif
 1599 
 1600 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1601 {
 1602   __ ic_check(InteriorEntryAlignment);
 1603 }
 1604 







 1605 //=============================================================================
 1606 
 1607 bool Matcher::supports_vector_calling_convention(void) {
 1608   return EnableVectorSupport;
 1609 }
 1610 
 1611 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1612   assert(EnableVectorSupport, "sanity");
 1613   int lo = XMM0_num;
 1614   int hi = XMM0b_num;
 1615   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 1616   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 1617   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 1618   return OptoRegPair(hi, lo);
 1619 }
 1620 
 1621 // Is this branch offset short enough that a short branch can be used?
 1622 //
 1623 // NOTE: If the platform does not provide any short branch variants, then
 1624 //       this method should return false for offset 0.

 3056   %}
 3057 %}
 3058 
 3059 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 3060 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 3061 %{
 3062   constraint(ALLOC_IN_RC(ptr_reg));
 3063   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 3064   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 3065 
 3066   op_cost(10);
 3067   format %{"[$reg + $off + $idx << $scale]" %}
 3068   interface(MEMORY_INTER) %{
 3069     base($reg);
 3070     index($idx);
 3071     scale($scale);
 3072     disp($off);
 3073   %}
 3074 %}
 3075 
 3076 // Indirect Narrow Oop Operand
 3077 operand indCompressedOop(rRegN reg) %{
 3078   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 3079   constraint(ALLOC_IN_RC(ptr_reg));
 3080   match(DecodeN reg);
 3081 
 3082   op_cost(10);
 3083   format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
 3084   interface(MEMORY_INTER) %{
 3085     base(0xc); // R12
 3086     index($reg);
 3087     scale(0x3);
 3088     disp(0x0);
 3089   %}
 3090 %}
 3091 
 3092 // Indirect Narrow Oop Plus Offset Operand
 3093 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 3094 // we can't free r12 even with CompressedOops::base() == nullptr.
 3095 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 3096   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 3097   constraint(ALLOC_IN_RC(ptr_reg));
 3098   match(AddP (DecodeN reg) off);
 3099 
 3100   op_cost(10);
 3101   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 3102   interface(MEMORY_INTER) %{
 3103     base(0xc); // R12
 3104     index($reg);
 3105     scale(0x3);
 3106     disp($off);
 3107   %}
 3108 %}
 3109 
 3110 // Indirect Memory Operand
 3111 operand indirectNarrow(rRegN reg)

 3418     equal(0x4, "e");
 3419     not_equal(0x5, "ne");
 3420     less(0x2, "b");
 3421     greater_equal(0x3, "ae");
 3422     less_equal(0x6, "be");
 3423     greater(0x7, "a");
 3424     overflow(0x0, "o");
 3425     no_overflow(0x1, "no");
 3426   %}
 3427 %}
 3428 
 3429 //----------OPERAND CLASSES----------------------------------------------------
 3430 // Operand Classes are groups of operands that are used as to simplify
 3431 // instruction definitions by not requiring the AD writer to specify separate
 3432 // instructions for every form of operand when the instruction accepts
 3433 // multiple operand types with the same basic encoding and format.  The classic
 3434 // case of this is memory operands.
 3435 
 3436 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 3437                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 3438                indCompressedOop, indCompressedOopOffset,
 3439                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 3440                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 3441                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 3442 
 3443 //----------PIPELINE-----------------------------------------------------------
 3444 // Rules which define the behavior of the target architectures pipeline.
 3445 pipeline %{
 3446 
 3447 //----------ATTRIBUTES---------------------------------------------------------
 3448 attributes %{
 3449   variable_size_instructions;        // Fixed size instructions
 3450   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 3451   instruction_unit_size = 1;         // An instruction is 1 bytes long
 3452   instruction_fetch_unit_size = 16;  // The processor fetches one line
 3453   instruction_fetch_units = 1;       // of 16 bytes
 3454 
 3455   // List of nop instructions
 3456   nops( MachNop );
 3457 %}
 3458 

 5959   format %{ "MEMBAR-storestore (empty encoding)" %}
 5960   ins_encode( );
 5961   ins_pipe(empty);
 5962 %}
 5963 
 5964 //----------Move Instructions--------------------------------------------------
 5965 
 5966 instruct castX2P(rRegP dst, rRegL src)
 5967 %{
 5968   match(Set dst (CastX2P src));
 5969 
 5970   format %{ "movq    $dst, $src\t# long->ptr" %}
 5971   ins_encode %{
 5972     if ($dst$$reg != $src$$reg) {
 5973       __ movptr($dst$$Register, $src$$Register);
 5974     }
 5975   %}
 5976   ins_pipe(ialu_reg_reg); // XXX
 5977 %}
 5978 
 5979 instruct castI2N(rRegN dst, rRegI src)
 5980 %{
 5981   match(Set dst (CastI2N src));
 5982 
 5983   format %{ "movq    $dst, $src\t# int -> narrow ptr" %}
 5984   ins_encode %{
 5985     if ($dst$$reg != $src$$reg) {
 5986       __ movl($dst$$Register, $src$$Register);
 5987     }
 5988   %}
 5989   ins_pipe(ialu_reg_reg); // XXX
 5990 %}
 5991 
 5992 instruct castN2X(rRegL dst, rRegN src)
 5993 %{
 5994   match(Set dst (CastP2X src));
 5995 
 5996   format %{ "movq    $dst, $src\t# ptr -> long" %}
 5997   ins_encode %{
 5998     if ($dst$$reg != $src$$reg) {
 5999       __ movptr($dst$$Register, $src$$Register);
 6000     }
 6001   %}
 6002   ins_pipe(ialu_reg_reg); // XXX
 6003 %}
 6004 
 6005 instruct castP2X(rRegL dst, rRegP src)
 6006 %{
 6007   match(Set dst (CastP2X src));
 6008 
 6009   format %{ "movq    $dst, $src\t# ptr -> long" %}
 6010   ins_encode %{
 6011     if ($dst$$reg != $src$$reg) {
 6012       __ movptr($dst$$Register, $src$$Register);
 6013     }
 6014   %}
 6015   ins_pipe(ialu_reg_reg); // XXX
 6016 %}
 6017 
 6018 // Convert oop into int for vectors alignment masking
 6019 instruct convP2I(rRegI dst, rRegP src)
 6020 %{
 6021   match(Set dst (ConvL2I (CastP2X src)));
 6022 
 6023   format %{ "movl    $dst, $src\t# ptr -> int" %}
 6024   ins_encode %{

12236   effect(DEF dst, USE src);
12237   ins_cost(100);
12238   format %{ "movd    $dst,$src\t# MoveI2F" %}
12239   ins_encode %{
12240     __ movdl($dst$$XMMRegister, $src$$Register);
12241   %}
12242   ins_pipe( pipe_slow );
12243 %}
12244 
12245 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
12246   match(Set dst (MoveL2D src));
12247   effect(DEF dst, USE src);
12248   ins_cost(100);
12249   format %{ "movd    $dst,$src\t# MoveL2D" %}
12250   ins_encode %{
12251      __ movdq($dst$$XMMRegister, $src$$Register);
12252   %}
12253   ins_pipe( pipe_slow );
12254 %}
12255 
12256 
12257 // Fast clearing of an array
12258 // Small non-constant lenght ClearArray for non-AVX512 targets.
12259 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
12260                   Universe dummy, rFlagsReg cr)
12261 %{
12262   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
12263   match(Set dummy (ClearArray (Binary cnt base) val));
12264   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
12265 
12266   format %{ $$template
12267     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
12268     $$emit$$"jg      LARGE\n\t"
12269     $$emit$$"dec     rcx\n\t"
12270     $$emit$$"js      DONE\t# Zero length\n\t"
12271     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
12272     $$emit$$"dec     rcx\n\t"
12273     $$emit$$"jge     LOOP\n\t"
12274     $$emit$$"jmp     DONE\n\t"
12275     $$emit$$"# LARGE:\n\t"
12276     if (UseFastStosb) {
12277        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
12278        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
12279     } else if (UseXMMForObjInit) {
12280        $$emit$$"movdq   $tmp, $val\n\t"
12281        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
12282        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
12283        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12284        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12285        $$emit$$"vmovdqu $tmp,(rax)\n\t"
12286        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
12287        $$emit$$"add     0x40,rax\n\t"
12288        $$emit$$"# L_zero_64_bytes:\n\t"
12289        $$emit$$"sub     0x8,rcx\n\t"
12290        $$emit$$"jge     L_loop\n\t"
12291        $$emit$$"add     0x4,rcx\n\t"
12292        $$emit$$"jl      L_tail\n\t"
12293        $$emit$$"vmovdqu $tmp,(rax)\n\t"
12294        $$emit$$"add     0x20,rax\n\t"
12295        $$emit$$"sub     0x4,rcx\n\t"
12296        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12297        $$emit$$"add     0x4,rcx\n\t"
12298        $$emit$$"jle     L_end\n\t"
12299        $$emit$$"dec     rcx\n\t"
12300        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12301        $$emit$$"vmovq   xmm0,(rax)\n\t"
12302        $$emit$$"add     0x8,rax\n\t"
12303        $$emit$$"dec     rcx\n\t"
12304        $$emit$$"jge     L_sloop\n\t"
12305        $$emit$$"# L_end:\n\t"
12306     } else {
12307        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
12308     }
12309     $$emit$$"# DONE"
12310   %}
12311   ins_encode %{
12312     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12313                  $tmp$$XMMRegister, false, false);
12314   %}
12315   ins_pipe(pipe_slow);
12316 %}
12317 
12318 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
12319                             Universe dummy, rFlagsReg cr)
12320 %{
12321   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
12322   match(Set dummy (ClearArray (Binary cnt base) val));
12323   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
12324 
12325   format %{ $$template
12326     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
12327     $$emit$$"jg      LARGE\n\t"
12328     $$emit$$"dec     rcx\n\t"
12329     $$emit$$"js      DONE\t# Zero length\n\t"
12330     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
12331     $$emit$$"dec     rcx\n\t"
12332     $$emit$$"jge     LOOP\n\t"
12333     $$emit$$"jmp     DONE\n\t"
12334     $$emit$$"# LARGE:\n\t"
12335     if (UseXMMForObjInit) {
12336        $$emit$$"movdq   $tmp, $val\n\t"
12337        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
12338        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
12339        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12340        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12341        $$emit$$"vmovdqu $tmp,(rax)\n\t"
12342        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
12343        $$emit$$"add     0x40,rax\n\t"
12344        $$emit$$"# L_zero_64_bytes:\n\t"
12345        $$emit$$"sub     0x8,rcx\n\t"
12346        $$emit$$"jge     L_loop\n\t"
12347        $$emit$$"add     0x4,rcx\n\t"
12348        $$emit$$"jl      L_tail\n\t"
12349        $$emit$$"vmovdqu $tmp,(rax)\n\t"
12350        $$emit$$"add     0x20,rax\n\t"
12351        $$emit$$"sub     0x4,rcx\n\t"
12352        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12353        $$emit$$"add     0x4,rcx\n\t"
12354        $$emit$$"jle     L_end\n\t"
12355        $$emit$$"dec     rcx\n\t"
12356        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12357        $$emit$$"vmovq   xmm0,(rax)\n\t"
12358        $$emit$$"add     0x8,rax\n\t"
12359        $$emit$$"dec     rcx\n\t"
12360        $$emit$$"jge     L_sloop\n\t"
12361        $$emit$$"# L_end:\n\t"
12362     } else {
12363        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
12364     }
12365     $$emit$$"# DONE"
12366   %}
12367   ins_encode %{
12368     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12369                  $tmp$$XMMRegister, false, true);
12370   %}
12371   ins_pipe(pipe_slow);
12372 %}
12373 
12374 // Small non-constant length ClearArray for AVX512 targets.
12375 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
12376                        Universe dummy, rFlagsReg cr)
12377 %{
12378   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
12379   match(Set dummy (ClearArray (Binary cnt base) val));
12380   ins_cost(125);
12381   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
12382 
12383   format %{ $$template
12384     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12385     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
12386     $$emit$$"jg      LARGE\n\t"
12387     $$emit$$"dec     rcx\n\t"
12388     $$emit$$"js      DONE\t# Zero length\n\t"
12389     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
12390     $$emit$$"dec     rcx\n\t"
12391     $$emit$$"jge     LOOP\n\t"
12392     $$emit$$"jmp     DONE\n\t"
12393     $$emit$$"# LARGE:\n\t"
12394     if (UseFastStosb) {
12395        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
12396        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
12397     } else if (UseXMMForObjInit) {
12398        $$emit$$"mov     rdi,rax\n\t"
12399        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
12400        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12401        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

12409        $$emit$$"jl      L_tail\n\t"
12410        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12411        $$emit$$"add     0x20,rax\n\t"
12412        $$emit$$"sub     0x4,rcx\n\t"
12413        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12414        $$emit$$"add     0x4,rcx\n\t"
12415        $$emit$$"jle     L_end\n\t"
12416        $$emit$$"dec     rcx\n\t"
12417        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12418        $$emit$$"vmovq   xmm0,(rax)\n\t"
12419        $$emit$$"add     0x8,rax\n\t"
12420        $$emit$$"dec     rcx\n\t"
12421        $$emit$$"jge     L_sloop\n\t"
12422        $$emit$$"# L_end:\n\t"
12423     } else {
12424        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
12425     }
12426     $$emit$$"# DONE"
12427   %}
12428   ins_encode %{
12429     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12430                  $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
12431   %}
12432   ins_pipe(pipe_slow);
12433 %}
12434 
12435 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
12436                                  Universe dummy, rFlagsReg cr)

12437 %{
12438   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
12439   match(Set dummy (ClearArray (Binary cnt base) val));
12440   ins_cost(125);
12441   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
12442 
12443   format %{ $$template
12444     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12445     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
12446     $$emit$$"jg      LARGE\n\t"
12447     $$emit$$"dec     rcx\n\t"
12448     $$emit$$"js      DONE\t# Zero length\n\t"
12449     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
12450     $$emit$$"dec     rcx\n\t"
12451     $$emit$$"jge     LOOP\n\t"
12452     $$emit$$"jmp     DONE\n\t"
12453     $$emit$$"# LARGE:\n\t"
12454     if (UseFastStosb) {
12455        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
12456        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
12457     } else if (UseXMMForObjInit) {
12458        $$emit$$"mov     rdi,rax\n\t"
12459        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
12460        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12461        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

12469        $$emit$$"jl      L_tail\n\t"
12470        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12471        $$emit$$"add     0x20,rax\n\t"
12472        $$emit$$"sub     0x4,rcx\n\t"
12473        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12474        $$emit$$"add     0x4,rcx\n\t"
12475        $$emit$$"jle     L_end\n\t"
12476        $$emit$$"dec     rcx\n\t"
12477        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12478        $$emit$$"vmovq   xmm0,(rax)\n\t"
12479        $$emit$$"add     0x8,rax\n\t"
12480        $$emit$$"dec     rcx\n\t"
12481        $$emit$$"jge     L_sloop\n\t"
12482        $$emit$$"# L_end:\n\t"
12483     } else {
12484        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
12485     }
12486     $$emit$$"# DONE"
12487   %}
12488   ins_encode %{
12489     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12490                  $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
12491   %}
12492   ins_pipe(pipe_slow);
12493 %}
12494 
12495 // Large non-constant length ClearArray for non-AVX512 targets.
12496 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
12497                         Universe dummy, rFlagsReg cr)
12498 %{
12499   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
12500   match(Set dummy (ClearArray (Binary cnt base) val));
12501   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
12502 
12503   format %{ $$template
12504     if (UseFastStosb) {
12505        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
12506        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
12507     } else if (UseXMMForObjInit) {
12508        $$emit$$"movdq   $tmp, $val\n\t"
12509        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
12510        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
12511        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12512        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12513        $$emit$$"vmovdqu $tmp,(rax)\n\t"
12514        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
12515        $$emit$$"add     0x40,rax\n\t"
12516        $$emit$$"# L_zero_64_bytes:\n\t"
12517        $$emit$$"sub     0x8,rcx\n\t"
12518        $$emit$$"jge     L_loop\n\t"
12519        $$emit$$"add     0x4,rcx\n\t"
12520        $$emit$$"jl      L_tail\n\t"
12521        $$emit$$"vmovdqu $tmp,(rax)\n\t"
12522        $$emit$$"add     0x20,rax\n\t"
12523        $$emit$$"sub     0x4,rcx\n\t"
12524        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12525        $$emit$$"add     0x4,rcx\n\t"
12526        $$emit$$"jle     L_end\n\t"
12527        $$emit$$"dec     rcx\n\t"
12528        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12529        $$emit$$"vmovq   xmm0,(rax)\n\t"
12530        $$emit$$"add     0x8,rax\n\t"
12531        $$emit$$"dec     rcx\n\t"
12532        $$emit$$"jge     L_sloop\n\t"
12533        $$emit$$"# L_end:\n\t"
12534     } else {
12535        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
12536     }
12537   %}
12538   ins_encode %{
12539     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12540                  $tmp$$XMMRegister, true, false);
12541   %}
12542   ins_pipe(pipe_slow);
12543 %}
12544 
12545 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
12546                                   Universe dummy, rFlagsReg cr)
12547 %{
12548   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
12549   match(Set dummy (ClearArray (Binary cnt base) val));
12550   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
12551 
12552   format %{ $$template
12553     if (UseXMMForObjInit) {
12554        $$emit$$"movdq   $tmp, $val\n\t"
12555        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
12556        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
12557        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12558        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12559        $$emit$$"vmovdqu $tmp,(rax)\n\t"
12560        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
12561        $$emit$$"add     0x40,rax\n\t"
12562        $$emit$$"# L_zero_64_bytes:\n\t"
12563        $$emit$$"sub     0x8,rcx\n\t"
12564        $$emit$$"jge     L_loop\n\t"
12565        $$emit$$"add     0x4,rcx\n\t"
12566        $$emit$$"jl      L_tail\n\t"
12567        $$emit$$"vmovdqu $tmp,(rax)\n\t"
12568        $$emit$$"add     0x20,rax\n\t"
12569        $$emit$$"sub     0x4,rcx\n\t"
12570        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12571        $$emit$$"add     0x4,rcx\n\t"
12572        $$emit$$"jle     L_end\n\t"
12573        $$emit$$"dec     rcx\n\t"
12574        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12575        $$emit$$"vmovq   xmm0,(rax)\n\t"
12576        $$emit$$"add     0x8,rax\n\t"
12577        $$emit$$"dec     rcx\n\t"
12578        $$emit$$"jge     L_sloop\n\t"
12579        $$emit$$"# L_end:\n\t"
12580     } else {
12581        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
12582     }
12583   %}
12584   ins_encode %{
12585     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12586                  $tmp$$XMMRegister, true, true);
12587   %}
12588   ins_pipe(pipe_slow);
12589 %}
12590 
12591 // Large non-constant length ClearArray for AVX512 targets.
12592 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
12593                              Universe dummy, rFlagsReg cr)
12594 %{
12595   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
12596   match(Set dummy (ClearArray (Binary cnt base) val));
12597   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
12598 
12599   format %{ $$template
12600     if (UseFastStosb) {
12601        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12602        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
12603        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
12604     } else if (UseXMMForObjInit) {
12605        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
12606        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
12607        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12608        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12609        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12610        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
12611        $$emit$$"add     0x40,rax\n\t"
12612        $$emit$$"# L_zero_64_bytes:\n\t"
12613        $$emit$$"sub     0x8,rcx\n\t"
12614        $$emit$$"jge     L_loop\n\t"
12615        $$emit$$"add     0x4,rcx\n\t"
12616        $$emit$$"jl      L_tail\n\t"
12617        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12618        $$emit$$"add     0x20,rax\n\t"
12619        $$emit$$"sub     0x4,rcx\n\t"
12620        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12621        $$emit$$"add     0x4,rcx\n\t"
12622        $$emit$$"jle     L_end\n\t"
12623        $$emit$$"dec     rcx\n\t"
12624        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12625        $$emit$$"vmovq   xmm0,(rax)\n\t"
12626        $$emit$$"add     0x8,rax\n\t"
12627        $$emit$$"dec     rcx\n\t"
12628        $$emit$$"jge     L_sloop\n\t"
12629        $$emit$$"# L_end:\n\t"
12630     } else {
12631        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12632        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
12633     }
12634   %}
12635   ins_encode %{
12636     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12637                  $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
12638   %}
12639   ins_pipe(pipe_slow);
12640 %}
12641 
12642 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
12643                                        Universe dummy, rFlagsReg cr)

12644 %{
12645   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
12646   match(Set dummy (ClearArray (Binary cnt base) val));
12647   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
12648 
12649   format %{ $$template
12650     if (UseFastStosb) {
12651        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12652        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
12653        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
12654     } else if (UseXMMForObjInit) {
12655        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
12656        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
12657        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12658        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12659        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12660        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
12661        $$emit$$"add     0x40,rax\n\t"
12662        $$emit$$"# L_zero_64_bytes:\n\t"
12663        $$emit$$"sub     0x8,rcx\n\t"
12664        $$emit$$"jge     L_loop\n\t"
12665        $$emit$$"add     0x4,rcx\n\t"
12666        $$emit$$"jl      L_tail\n\t"
12667        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12668        $$emit$$"add     0x20,rax\n\t"
12669        $$emit$$"sub     0x4,rcx\n\t"
12670        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12671        $$emit$$"add     0x4,rcx\n\t"
12672        $$emit$$"jle     L_end\n\t"
12673        $$emit$$"dec     rcx\n\t"
12674        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12675        $$emit$$"vmovq   xmm0,(rax)\n\t"
12676        $$emit$$"add     0x8,rax\n\t"
12677        $$emit$$"dec     rcx\n\t"
12678        $$emit$$"jge     L_sloop\n\t"
12679        $$emit$$"# L_end:\n\t"
12680     } else {
12681        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12682        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
12683     }
12684   %}
12685   ins_encode %{
12686     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12687                  $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
12688   %}
12689   ins_pipe(pipe_slow);
12690 %}
12691 
12692 // Small constant length ClearArray for AVX512 targets.
12693 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
12694 %{
12695   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
12696             ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
12697   match(Set dummy (ClearArray (Binary cnt base) val));
12698   ins_cost(100);
12699   effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
12700   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
12701   ins_encode %{
12702     __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
12703   %}
12704   ins_pipe(pipe_slow);
12705 %}
12706 
12707 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
12708                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
12709 %{
12710   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
12711   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12712   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
12713 
12714   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
12715   ins_encode %{
12716     __ string_compare($str1$$Register, $str2$$Register,
12717                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
12718                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
12719   %}
12720   ins_pipe( pipe_slow );
12721 %}
12722 

14555 
14556   ins_cost(300);
14557   format %{ "call_leaf,runtime " %}
14558   ins_encode(clear_avx, Java_To_Runtime(meth));
14559   ins_pipe(pipe_slow);
14560 %}
14561 
14562 // Call runtime without safepoint and with vector arguments
14563 instruct CallLeafDirectVector(method meth)
14564 %{
14565   match(CallLeafVector);
14566   effect(USE meth);
14567 
14568   ins_cost(300);
14569   format %{ "call_leaf,vector " %}
14570   ins_encode(Java_To_Runtime(meth));
14571   ins_pipe(pipe_slow);
14572 %}
14573 
14574 // Call runtime without safepoint
14575 // entry point is null, target holds the address to call
14576 instruct CallLeafNoFPInDirect(rRegP target)
14577 %{
14578   predicate(n->as_Call()->entry_point() == nullptr);
14579   match(CallLeafNoFP target);
14580 
14581   ins_cost(300);
14582   format %{ "call_leaf_nofp,runtime indirect " %}
14583   ins_encode %{
14584      __ call($target$$Register);
14585   %}
14586 
14587   ins_pipe(pipe_slow);
14588 %}
14589 
14590 instruct CallLeafNoFPDirect(method meth)
14591 %{
14592   predicate(n->as_Call()->entry_point() != nullptr);
14593   match(CallLeafNoFP);
14594   effect(USE meth);
14595 
14596   ins_cost(300);
14597   format %{ "call_leaf_nofp,runtime " %}
14598   ins_encode(clear_avx, Java_To_Runtime(meth));
14599   ins_pipe(pipe_slow);
14600 %}
14601 
14602 // Return Instruction
14603 // Remove the return address & jump to it.
14604 // Notice: We always emit a nop after a ret to make sure there is room
14605 // for safepoint patching
14606 instruct Ret()
14607 %{
14608   match(Return);
14609 
14610   format %{ "ret" %}
14611   ins_encode %{
14612     __ ret(0);
< prev index next >