< prev index next >

src/hotspot/cpu/x86/x86_64.ad

Print this page

  586 }
  587 
  588 // !!!!! Special hack to get all types of calls to specify the byte offset
  589 //       from the start of the call to the point where the return address
  590 //       will point.
  591 int MachCallStaticJavaNode::ret_addr_offset()
  592 {
  593   int offset = 5; // 5 bytes from start of call to where return address points
  594   offset += clear_avx_size();
  595   return offset;
  596 }
  597 
  598 int MachCallDynamicJavaNode::ret_addr_offset()
  599 {
  600   int offset = 15; // 15 bytes from start of call to where return address points
  601   offset += clear_avx_size();
  602   return offset;
  603 }
  604 
  605 int MachCallRuntimeNode::ret_addr_offset() {




  606   int offset = 13; // movq r10,#addr; callq (r10)
  607   if (this->ideal_Opcode() != Op_CallLeafVector) {
  608     offset += clear_avx_size();
  609   }
  610   return offset;
  611 }

  612 //
  613 // Compute padding required for nodes which need alignment
  614 //
  615 
  616 // The address of the call instruction needs to be 4-byte aligned to
  617 // ensure that it does not span a cache line so that it can be patched.
  618 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
  619 {
  620   current_offset += clear_avx_size(); // skip vzeroupper
  621   current_offset += 1; // skip call opcode byte
  622   return align_up(current_offset, alignment_required()) - current_offset;
  623 }
  624 
  625 // The address of the call instruction needs to be 4-byte aligned to
  626 // ensure that it does not span a cache line so that it can be patched.
  627 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
  628 {
  629   current_offset += clear_avx_size(); // skip vzeroupper
  630   current_offset += 11; // skip movq instruction + call opcode byte
  631   return align_up(current_offset, alignment_required()) - current_offset;

  817     st->print("\n\t");
  818     st->print("# stack alignment check");
  819 #endif
  820   }
  821   if (C->stub_function() != nullptr && BarrierSet::barrier_set()->barrier_set_nmethod() != nullptr) {
  822     st->print("\n\t");
  823     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
  824     st->print("\n\t");
  825     st->print("je      fast_entry\t");
  826     st->print("\n\t");
  827     st->print("call    #nmethod_entry_barrier_stub\t");
  828     st->print("\n\tfast_entry:");
  829   }
  830   st->cr();
  831 }
  832 #endif
  833 
  834 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
  835   Compile* C = ra_->C;
  836 
  837   int framesize = C->output()->frame_size_in_bytes();
  838   int bangsize = C->output()->bang_size_in_bytes();
  839 
  840   if (C->clinit_barrier_on_entry()) {
  841     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
  842     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
  843 
  844     Label L_skip_barrier;
  845     Register klass = rscratch1;
  846 
  847     __ mov_metadata(klass, C->method()->holder()->constant_encoding());
  848     __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/);
  849 
  850     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
  851 
  852     __ bind(L_skip_barrier);

  853   }
  854 
  855   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);


  856 
  857   C->output()->set_frame_complete(__ offset());
  858 
  859   if (C->has_mach_constant_base_node()) {
  860     // NOTE: We set the table base offset here because users might be
  861     // emitted before MachConstantBaseNode.
  862     ConstantTable& constant_table = C->output()->constant_table();
  863     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  864   }
  865 }
  866 
  867 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
  868 {
  869   return MachNode::size(ra_); // too many variables; just compute it
  870                               // the hard way
  871 }
  872 
  873 int MachPrologNode::reloc() const
  874 {
  875   return 0; // a large enough number
  876 }
  877 
  878 //=============================================================================
  879 #ifndef PRODUCT
  880 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
  881 {
  882   Compile* C = ra_->C;
  883   if (generate_vzeroupper(C)) {
  884     st->print("vzeroupper");
  885     st->cr(); st->print("\t");
  886   }
  887 
  888   int framesize = C->output()->frame_size_in_bytes();
  889   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  890   // Remove word for return adr already pushed
  891   // and RBP
  892   framesize -= 2*wordSize;

  899   st->print_cr("popq    rbp");
  900   if (do_polling() && C->is_method_compilation()) {
  901     st->print("\t");
  902     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
  903                  "ja      #safepoint_stub\t"
  904                  "# Safepoint: poll for GC");
  905   }
  906 }
  907 #endif
  908 
  909 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
  910 {
  911   Compile* C = ra_->C;
  912 
  913   if (generate_vzeroupper(C)) {
  914     // Clear upper bits of YMM registers when current compiled code uses
  915     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  916     __ vzeroupper();
  917   }
  918 
  919   int framesize = C->output()->frame_size_in_bytes();
  920   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  921   // Remove word for return adr already pushed
  922   // and RBP
  923   framesize -= 2*wordSize;
  924 
  925   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
  926 
  927   if (framesize) {
  928     __ addq(rsp, framesize);
  929   }
  930 
  931   __ popq(rbp);
  932 
  933   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
  934     __ reserved_stack_check();
  935   }
  936 
  937   if (do_polling() && C->is_method_compilation()) {
  938     Label dummy_label;
  939     Label* code_stub = &dummy_label;
  940     if (!C->output()->in_scratch_emit_size()) {
  941       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
  942       C->output()->add_stub(stub);
  943       code_stub = &stub->entry();
  944     }
  945     __ relocate(relocInfo::poll_return_type);
  946     __ safepoint_poll(*code_stub, r15_thread, true /* at_return */, true /* in_nmethod */);
  947   }
  948 }
  949 
  950 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
  951 {
  952   return MachNode::size(ra_); // too many variables; just compute it
  953                               // the hard way
  954 }
  955 
  956 int MachEpilogNode::reloc() const
  957 {
  958   return 2; // a large enough number
  959 }
  960 
  961 const Pipeline* MachEpilogNode::pipeline() const
  962 {
  963   return MachNode::pipeline_class();
  964 }
  965 
  966 //=============================================================================
  967 
  968 enum RC {
  969   rc_bad,
  970   rc_int,
  971   rc_kreg,
  972   rc_float,
  973   rc_stack
  974 };
  975 

 1533   int reg = ra_->get_reg_first(this);
 1534   st->print("leaq    %s, [rsp + #%d]\t# box lock",
 1535             Matcher::regName[reg], offset);
 1536 }
 1537 #endif
 1538 
 1539 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1540 {
 1541   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1542   int reg = ra_->get_encode(this);
 1543 
 1544   __ lea(as_Register(reg), Address(rsp, offset));
 1545 }
 1546 
 1547 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 1548 {
 1549   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1550   return (offset < 0x80) ? 5 : 8; // REX
 1551 }
 1552 











































 1553 //=============================================================================
 1554 #ifndef PRODUCT
 1555 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1556 {
 1557   if (UseCompressedClassPointers) {
 1558     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 1559     st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 1560   } else {
 1561     st->print_cr("movq    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 1562     st->print_cr("\tcmpq    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 1563   }
 1564   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 1565 }
 1566 #endif
 1567 
 1568 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1569 {
 1570   __ ic_check(InteriorEntryAlignment);
 1571 }
 1572 
 1573 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 1574 {
 1575   return MachNode::size(ra_); // too many variables; just compute it
 1576                               // the hard way
 1577 }
 1578 
 1579 
 1580 //=============================================================================
 1581 
 1582 bool Matcher::supports_vector_calling_convention(void) {
 1583   if (EnableVectorSupport && UseVectorStubs) {
 1584     return true;
 1585   }
 1586   return false;
 1587 }
 1588 
 1589 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1590   assert(EnableVectorSupport && UseVectorStubs, "sanity");
 1591   int lo = XMM0_num;
 1592   int hi = XMM0b_num;
 1593   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 1594   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 1595   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 1596   return OptoRegPair(hi, lo);
 1597 }
 1598 
 1599 // Is this branch offset short enough that a short branch can be used?

 3042   %}
 3043 %}
 3044 
 3045 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 3046 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 3047 %{
 3048   constraint(ALLOC_IN_RC(ptr_reg));
 3049   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 3050   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 3051 
 3052   op_cost(10);
 3053   format %{"[$reg + $off + $idx << $scale]" %}
 3054   interface(MEMORY_INTER) %{
 3055     base($reg);
 3056     index($idx);
 3057     scale($scale);
 3058     disp($off);
 3059   %}
 3060 %}
 3061 
















 3062 // Indirect Narrow Oop Plus Offset Operand
 3063 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 3064 // we can't free r12 even with CompressedOops::base() == nullptr.
 3065 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 3066   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 3067   constraint(ALLOC_IN_RC(ptr_reg));
 3068   match(AddP (DecodeN reg) off);
 3069 
 3070   op_cost(10);
 3071   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 3072   interface(MEMORY_INTER) %{
 3073     base(0xc); // R12
 3074     index($reg);
 3075     scale(0x3);
 3076     disp($off);
 3077   %}
 3078 %}
 3079 
 3080 // Indirect Memory Operand
 3081 operand indirectNarrow(rRegN reg)

 3388     equal(0x4, "e");
 3389     not_equal(0x5, "ne");
 3390     less(0x2, "b");
 3391     greater_equal(0x3, "ae");
 3392     less_equal(0x6, "be");
 3393     greater(0x7, "a");
 3394     overflow(0x0, "o");
 3395     no_overflow(0x1, "no");
 3396   %}
 3397 %}
 3398 
 3399 //----------OPERAND CLASSES----------------------------------------------------
 3400 // Operand Classes are groups of operands that are used as to simplify
 3401 // instruction definitions by not requiring the AD writer to specify separate
 3402 // instructions for every form of operand when the instruction accepts
 3403 // multiple operand types with the same basic encoding and format.  The classic
 3404 // case of this is memory operands.
 3405 
 3406 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 3407                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 3408                indCompressedOopOffset,
 3409                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 3410                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 3411                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 3412 
 3413 //----------PIPELINE-----------------------------------------------------------
 3414 // Rules which define the behavior of the target architectures pipeline.
 3415 pipeline %{
 3416 
 3417 //----------ATTRIBUTES---------------------------------------------------------
 3418 attributes %{
 3419   variable_size_instructions;        // Fixed size instructions
 3420   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 3421   instruction_unit_size = 1;         // An instruction is 1 bytes long
 3422   instruction_fetch_unit_size = 16;  // The processor fetches one line
 3423   instruction_fetch_units = 1;       // of 16 bytes
 3424 
 3425   // List of nop instructions
 3426   nops( MachNop );
 3427 %}
 3428 

 5896   format %{ "MEMBAR-storestore (empty encoding)" %}
 5897   ins_encode( );
 5898   ins_pipe(empty);
 5899 %}
 5900 
 5901 //----------Move Instructions--------------------------------------------------
 5902 
 5903 instruct castX2P(rRegP dst, rRegL src)
 5904 %{
 5905   match(Set dst (CastX2P src));
 5906 
 5907   format %{ "movq    $dst, $src\t# long->ptr" %}
 5908   ins_encode %{
 5909     if ($dst$$reg != $src$$reg) {
 5910       __ movptr($dst$$Register, $src$$Register);
 5911     }
 5912   %}
 5913   ins_pipe(ialu_reg_reg); // XXX
 5914 %}
 5915 













 5916 instruct castP2X(rRegL dst, rRegP src)
 5917 %{
 5918   match(Set dst (CastP2X src));
 5919 
 5920   format %{ "movq    $dst, $src\t# ptr -> long" %}
 5921   ins_encode %{
 5922     if ($dst$$reg != $src$$reg) {
 5923       __ movptr($dst$$Register, $src$$Register);
 5924     }
 5925   %}
 5926   ins_pipe(ialu_reg_reg); // XXX
 5927 %}
 5928 
 5929 // Convert oop into int for vectors alignment masking
 5930 instruct convP2I(rRegI dst, rRegP src)
 5931 %{
 5932   match(Set dst (ConvL2I (CastP2X src)));
 5933 
 5934   format %{ "movl    $dst, $src\t# ptr -> int" %}
 5935   ins_encode %{

10444   effect(DEF dst, USE src);
10445   ins_cost(100);
10446   format %{ "movd    $dst,$src\t# MoveI2F" %}
10447   ins_encode %{
10448     __ movdl($dst$$XMMRegister, $src$$Register);
10449   %}
10450   ins_pipe( pipe_slow );
10451 %}
10452 
10453 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
10454   match(Set dst (MoveL2D src));
10455   effect(DEF dst, USE src);
10456   ins_cost(100);
10457   format %{ "movd    $dst,$src\t# MoveL2D" %}
10458   ins_encode %{
10459      __ movdq($dst$$XMMRegister, $src$$Register);
10460   %}
10461   ins_pipe( pipe_slow );
10462 %}
10463 

10464 // Fast clearing of an array
10465 // Small non-constant lenght ClearArray for non-AVX512 targets.
10466 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
10467                   Universe dummy, rFlagsReg cr)
10468 %{
10469   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
10470   match(Set dummy (ClearArray cnt base));
10471   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);





















































































































10472 
10473   format %{ $$template
10474     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
10475     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
10476     $$emit$$"jg      LARGE\n\t"
10477     $$emit$$"dec     rcx\n\t"
10478     $$emit$$"js      DONE\t# Zero length\n\t"
10479     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
10480     $$emit$$"dec     rcx\n\t"
10481     $$emit$$"jge     LOOP\n\t"
10482     $$emit$$"jmp     DONE\n\t"
10483     $$emit$$"# LARGE:\n\t"
10484     if (UseFastStosb) {
10485        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
10486        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
10487     } else if (UseXMMForObjInit) {
10488        $$emit$$"mov     rdi,rax\n\t"
10489        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
10490        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10491        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

10499        $$emit$$"jl      L_tail\n\t"
10500        $$emit$$"vmovdqu ymm0,(rax)\n\t"
10501        $$emit$$"add     0x20,rax\n\t"
10502        $$emit$$"sub     0x4,rcx\n\t"
10503        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10504        $$emit$$"add     0x4,rcx\n\t"
10505        $$emit$$"jle     L_end\n\t"
10506        $$emit$$"dec     rcx\n\t"
10507        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10508        $$emit$$"vmovq   xmm0,(rax)\n\t"
10509        $$emit$$"add     0x8,rax\n\t"
10510        $$emit$$"dec     rcx\n\t"
10511        $$emit$$"jge     L_sloop\n\t"
10512        $$emit$$"# L_end:\n\t"
10513     } else {
10514        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
10515     }
10516     $$emit$$"# DONE"
10517   %}
10518   ins_encode %{
10519     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
10520                  $tmp$$XMMRegister, false, knoreg);
10521   %}
10522   ins_pipe(pipe_slow);
10523 %}
10524 
10525 // Small non-constant length ClearArray for AVX512 targets.
10526 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
10527                        Universe dummy, rFlagsReg cr)
10528 %{
10529   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
10530   match(Set dummy (ClearArray cnt base));
10531   ins_cost(125);
10532   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
10533 
10534   format %{ $$template
10535     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
10536     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
10537     $$emit$$"jg      LARGE\n\t"
10538     $$emit$$"dec     rcx\n\t"
10539     $$emit$$"js      DONE\t# Zero length\n\t"
10540     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
10541     $$emit$$"dec     rcx\n\t"
10542     $$emit$$"jge     LOOP\n\t"
10543     $$emit$$"jmp     DONE\n\t"
10544     $$emit$$"# LARGE:\n\t"
10545     if (UseFastStosb) {
10546        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
10547        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
10548     } else if (UseXMMForObjInit) {
10549        $$emit$$"mov     rdi,rax\n\t"
10550        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
10551        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10552        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

10560        $$emit$$"jl      L_tail\n\t"
10561        $$emit$$"vmovdqu ymm0,(rax)\n\t"
10562        $$emit$$"add     0x20,rax\n\t"
10563        $$emit$$"sub     0x4,rcx\n\t"
10564        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10565        $$emit$$"add     0x4,rcx\n\t"
10566        $$emit$$"jle     L_end\n\t"
10567        $$emit$$"dec     rcx\n\t"
10568        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10569        $$emit$$"vmovq   xmm0,(rax)\n\t"
10570        $$emit$$"add     0x8,rax\n\t"
10571        $$emit$$"dec     rcx\n\t"
10572        $$emit$$"jge     L_sloop\n\t"
10573        $$emit$$"# L_end:\n\t"
10574     } else {
10575        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
10576     }
10577     $$emit$$"# DONE"
10578   %}
10579   ins_encode %{
10580     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
10581                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
10582   %}
10583   ins_pipe(pipe_slow);
10584 %}
10585 
10586 // Large non-constant length ClearArray for non-AVX512 targets.
10587 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
10588                         Universe dummy, rFlagsReg cr)
10589 %{
10590   predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
10591   match(Set dummy (ClearArray cnt base));
10592   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
































































































10593 
10594   format %{ $$template
10595     if (UseFastStosb) {
10596        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
10597        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
10598        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
10599     } else if (UseXMMForObjInit) {
10600        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
10601        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
10602        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10603        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10604        $$emit$$"vmovdqu ymm0,(rax)\n\t"
10605        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
10606        $$emit$$"add     0x40,rax\n\t"
10607        $$emit$$"# L_zero_64_bytes:\n\t"
10608        $$emit$$"sub     0x8,rcx\n\t"
10609        $$emit$$"jge     L_loop\n\t"
10610        $$emit$$"add     0x4,rcx\n\t"
10611        $$emit$$"jl      L_tail\n\t"
10612        $$emit$$"vmovdqu ymm0,(rax)\n\t"
10613        $$emit$$"add     0x20,rax\n\t"
10614        $$emit$$"sub     0x4,rcx\n\t"
10615        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10616        $$emit$$"add     0x4,rcx\n\t"
10617        $$emit$$"jle     L_end\n\t"
10618        $$emit$$"dec     rcx\n\t"
10619        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10620        $$emit$$"vmovq   xmm0,(rax)\n\t"
10621        $$emit$$"add     0x8,rax\n\t"
10622        $$emit$$"dec     rcx\n\t"
10623        $$emit$$"jge     L_sloop\n\t"
10624        $$emit$$"# L_end:\n\t"
10625     } else {
10626        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
10627        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
10628     }
10629   %}
10630   ins_encode %{
10631     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
10632                  $tmp$$XMMRegister, true, knoreg);
10633   %}
10634   ins_pipe(pipe_slow);
10635 %}
10636 
10637 // Large non-constant length ClearArray for AVX512 targets.
10638 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
10639                              Universe dummy, rFlagsReg cr)
10640 %{
10641   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
10642   match(Set dummy (ClearArray cnt base));
10643   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
10644 
10645   format %{ $$template
10646     if (UseFastStosb) {
10647        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
10648        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
10649        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
10650     } else if (UseXMMForObjInit) {
10651        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
10652        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
10653        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10654        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10655        $$emit$$"vmovdqu ymm0,(rax)\n\t"
10656        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
10657        $$emit$$"add     0x40,rax\n\t"
10658        $$emit$$"# L_zero_64_bytes:\n\t"
10659        $$emit$$"sub     0x8,rcx\n\t"
10660        $$emit$$"jge     L_loop\n\t"
10661        $$emit$$"add     0x4,rcx\n\t"
10662        $$emit$$"jl      L_tail\n\t"
10663        $$emit$$"vmovdqu ymm0,(rax)\n\t"
10664        $$emit$$"add     0x20,rax\n\t"
10665        $$emit$$"sub     0x4,rcx\n\t"
10666        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10667        $$emit$$"add     0x4,rcx\n\t"
10668        $$emit$$"jle     L_end\n\t"
10669        $$emit$$"dec     rcx\n\t"
10670        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10671        $$emit$$"vmovq   xmm0,(rax)\n\t"
10672        $$emit$$"add     0x8,rax\n\t"
10673        $$emit$$"dec     rcx\n\t"
10674        $$emit$$"jge     L_sloop\n\t"
10675        $$emit$$"# L_end:\n\t"
10676     } else {
10677        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
10678        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
10679     }
10680   %}
10681   ins_encode %{
10682     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
10683                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
10684   %}
10685   ins_pipe(pipe_slow);
10686 %}
10687 
10688 // Small constant length ClearArray for AVX512 targets.
10689 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
10690 %{
10691   predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
10692   match(Set dummy (ClearArray cnt base));

10693   ins_cost(100);
10694   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
10695   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
10696   ins_encode %{
10697    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
10698   %}
10699   ins_pipe(pipe_slow);
10700 %}
10701 
10702 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
10703                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
10704 %{
10705   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
10706   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
10707   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
10708 
10709   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
10710   ins_encode %{
10711     __ string_compare($str1$$Register, $str2$$Register,
10712                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
10713                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
10714   %}
10715   ins_pipe( pipe_slow );
10716 %}
10717 

12478 
12479   ins_cost(300);
12480   format %{ "call_leaf,runtime " %}
12481   ins_encode(clear_avx, Java_To_Runtime(meth));
12482   ins_pipe(pipe_slow);
12483 %}
12484 
12485 // Call runtime without safepoint and with vector arguments
12486 instruct CallLeafDirectVector(method meth)
12487 %{
12488   match(CallLeafVector);
12489   effect(USE meth);
12490 
12491   ins_cost(300);
12492   format %{ "call_leaf,vector " %}
12493   ins_encode(Java_To_Runtime(meth));
12494   ins_pipe(pipe_slow);
12495 %}
12496 
12497 // Call runtime without safepoint















12498 instruct CallLeafNoFPDirect(method meth)
12499 %{

12500   match(CallLeafNoFP);
12501   effect(USE meth);
12502 
12503   ins_cost(300);
12504   format %{ "call_leaf_nofp,runtime " %}
12505   ins_encode(clear_avx, Java_To_Runtime(meth));
12506   ins_pipe(pipe_slow);
12507 %}
12508 
12509 // Return Instruction
12510 // Remove the return address & jump to it.
12511 // Notice: We always emit a nop after a ret to make sure there is room
12512 // for safepoint patching
12513 instruct Ret()
12514 %{
12515   match(Return);
12516 
12517   format %{ "ret" %}
12518   ins_encode %{
12519     __ ret(0);

  586 }
  587 
  588 // !!!!! Special hack to get all types of calls to specify the byte offset
  589 //       from the start of the call to the point where the return address
  590 //       will point.
  591 int MachCallStaticJavaNode::ret_addr_offset()
  592 {
  593   int offset = 5; // 5 bytes from start of call to where return address points
  594   offset += clear_avx_size();
  595   return offset;
  596 }
  597 
  598 int MachCallDynamicJavaNode::ret_addr_offset()
  599 {
  600   int offset = 15; // 15 bytes from start of call to where return address points
  601   offset += clear_avx_size();
  602   return offset;
  603 }
  604 
  605 int MachCallRuntimeNode::ret_addr_offset() {
  606   if (_entry_point == nullptr) {
  607     // CallLeafNoFPInDirect
  608     return 3; // callq (register)
  609   }
  610   int offset = 13; // movq r10,#addr; callq (r10)
  611   if (this->ideal_Opcode() != Op_CallLeafVector) {
  612     offset += clear_avx_size();
  613   }
  614   return offset;
  615 }
  616 
  617 //
  618 // Compute padding required for nodes which need alignment
  619 //
  620 
  621 // The address of the call instruction needs to be 4-byte aligned to
  622 // ensure that it does not span a cache line so that it can be patched.
  623 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
  624 {
  625   current_offset += clear_avx_size(); // skip vzeroupper
  626   current_offset += 1; // skip call opcode byte
  627   return align_up(current_offset, alignment_required()) - current_offset;
  628 }
  629 
  630 // The address of the call instruction needs to be 4-byte aligned to
  631 // ensure that it does not span a cache line so that it can be patched.
  632 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
  633 {
  634   current_offset += clear_avx_size(); // skip vzeroupper
  635   current_offset += 11; // skip movq instruction + call opcode byte
  636   return align_up(current_offset, alignment_required()) - current_offset;

  822     st->print("\n\t");
  823     st->print("# stack alignment check");
  824 #endif
  825   }
  826   if (C->stub_function() != nullptr && BarrierSet::barrier_set()->barrier_set_nmethod() != nullptr) {
  827     st->print("\n\t");
  828     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
  829     st->print("\n\t");
  830     st->print("je      fast_entry\t");
  831     st->print("\n\t");
  832     st->print("call    #nmethod_entry_barrier_stub\t");
  833     st->print("\n\tfast_entry:");
  834   }
  835   st->cr();
  836 }
  837 #endif
  838 
  839 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
  840   Compile* C = ra_->C;
  841 
  842   __ verified_entry(C);













  843 
  844   if (ra_->C->stub_function() == nullptr) {
  845     __ entry_barrier();
  846   }
  847 
  848   if (!Compile::current()->output()->in_scratch_emit_size()) {
  849     __ bind(*_verified_entry);
  850   }
  851 
  852   C->output()->set_frame_complete(__ offset());
  853 
  854   if (C->has_mach_constant_base_node()) {
  855     // NOTE: We set the table base offset here because users might be
  856     // emitted before MachConstantBaseNode.
  857     ConstantTable& constant_table = C->output()->constant_table();
  858     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  859   }
  860 }
  861 






  862 int MachPrologNode::reloc() const
  863 {
  864   return 0; // a large enough number
  865 }
  866 
  867 //=============================================================================
  868 #ifndef PRODUCT
  869 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
  870 {
  871   Compile* C = ra_->C;
  872   if (generate_vzeroupper(C)) {
  873     st->print("vzeroupper");
  874     st->cr(); st->print("\t");
  875   }
  876 
  877   int framesize = C->output()->frame_size_in_bytes();
  878   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  879   // Remove word for return adr already pushed
  880   // and RBP
  881   framesize -= 2*wordSize;

  888   st->print_cr("popq    rbp");
  889   if (do_polling() && C->is_method_compilation()) {
  890     st->print("\t");
  891     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
  892                  "ja      #safepoint_stub\t"
  893                  "# Safepoint: poll for GC");
  894   }
  895 }
  896 #endif
  897 
  898 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
  899 {
  900   Compile* C = ra_->C;
  901 
  902   if (generate_vzeroupper(C)) {
  903     // Clear upper bits of YMM registers when current compiled code uses
  904     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  905     __ vzeroupper();
  906   }
  907 
  908   // Subtract two words to account for return address and rbp
  909   int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
  910   __ remove_frame(initial_framesize, C->needs_stack_repair());










  911 
  912   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
  913     __ reserved_stack_check();
  914   }
  915 
  916   if (do_polling() && C->is_method_compilation()) {
  917     Label dummy_label;
  918     Label* code_stub = &dummy_label;
  919     if (!C->output()->in_scratch_emit_size()) {
  920       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
  921       C->output()->add_stub(stub);
  922       code_stub = &stub->entry();
  923     }
  924     __ relocate(relocInfo::poll_return_type);
  925     __ safepoint_poll(*code_stub, r15_thread, true /* at_return */, true /* in_nmethod */);
  926   }
  927 }
  928 






  929 int MachEpilogNode::reloc() const
  930 {
  931   return 2; // a large enough number
  932 }
  933 
  934 const Pipeline* MachEpilogNode::pipeline() const
  935 {
  936   return MachNode::pipeline_class();
  937 }
  938 
  939 //=============================================================================
  940 
  941 enum RC {
  942   rc_bad,
  943   rc_int,
  944   rc_kreg,
  945   rc_float,
  946   rc_stack
  947 };
  948 

 1506   int reg = ra_->get_reg_first(this);
 1507   st->print("leaq    %s, [rsp + #%d]\t# box lock",
 1508             Matcher::regName[reg], offset);
 1509 }
 1510 #endif
 1511 
 1512 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1513 {
 1514   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1515   int reg = ra_->get_encode(this);
 1516 
 1517   __ lea(as_Register(reg), Address(rsp, offset));
 1518 }
 1519 
 1520 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 1521 {
 1522   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1523   return (offset < 0x80) ? 5 : 8; // REX
 1524 }
 1525 
 1526 //=============================================================================
 1527 #ifndef PRODUCT
 1528 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1529 {
 1530   st->print_cr("MachVEPNode");
 1531 }
 1532 #endif
 1533 
 1534 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1535 {
 1536   CodeBuffer* cbuf = masm->code();
 1537   uint insts_size = cbuf->insts_size();
 1538   if (!_verified) {
 1539     __ ic_check(1);
 1540   } else {
 1541     // TODO 8284443 Avoid creation of temporary frame
 1542     if (ra_->C->stub_function() == nullptr) {
 1543       __ verified_entry(ra_->C, 0);
 1544       __ entry_barrier();
 1545       int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
 1546       __ remove_frame(initial_framesize, false);
 1547     }
 1548     // Unpack inline type args passed as oop and then jump to
 1549     // the verified entry point (skipping the unverified entry).
 1550     int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
 1551     // Emit code for verified entry and save increment for stack repair on return
 1552     __ verified_entry(ra_->C, sp_inc);
 1553     if (Compile::current()->output()->in_scratch_emit_size()) {
 1554       Label dummy_verified_entry;
 1555       __ jmp(dummy_verified_entry);
 1556     } else {
 1557       __ jmp(*_verified_entry);
 1558     }
 1559   }
 1560   /* WARNING these NOPs are critical so that verified entry point is properly
 1561      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
 1562   int nops_cnt = 4 - ((cbuf->insts_size() - insts_size) & 0x3);
 1563   nops_cnt &= 0x3; // Do not add nops if code is aligned.
 1564   if (nops_cnt > 0) {
 1565     __ nop(nops_cnt);
 1566   }
 1567 }
 1568 
 1569 //=============================================================================
 1570 #ifndef PRODUCT
 1571 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1572 {
 1573   if (UseCompressedClassPointers) {
 1574     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 1575     st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 1576   } else {
 1577     st->print_cr("movq    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 1578     st->print_cr("\tcmpq    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 1579   }
 1580   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 1581 }
 1582 #endif
 1583 
 1584 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1585 {
 1586   __ ic_check(InteriorEntryAlignment);
 1587 }
 1588 







 1589 //=============================================================================
 1590 
 1591 bool Matcher::supports_vector_calling_convention(void) {
 1592   if (EnableVectorSupport && UseVectorStubs) {
 1593     return true;
 1594   }
 1595   return false;
 1596 }
 1597 
 1598 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1599   assert(EnableVectorSupport && UseVectorStubs, "sanity");
 1600   int lo = XMM0_num;
 1601   int hi = XMM0b_num;
 1602   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 1603   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 1604   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 1605   return OptoRegPair(hi, lo);
 1606 }
 1607 
 1608 // Is this branch offset short enough that a short branch can be used?

 3051   %}
 3052 %}
 3053 
 3054 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 3055 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 3056 %{
 3057   constraint(ALLOC_IN_RC(ptr_reg));
 3058   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 3059   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 3060 
 3061   op_cost(10);
 3062   format %{"[$reg + $off + $idx << $scale]" %}
 3063   interface(MEMORY_INTER) %{
 3064     base($reg);
 3065     index($idx);
 3066     scale($scale);
 3067     disp($off);
 3068   %}
 3069 %}
 3070 
 3071 // Indirect Narrow Oop Operand
 3072 operand indCompressedOop(rRegN reg) %{
 3073   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 3074   constraint(ALLOC_IN_RC(ptr_reg));
 3075   match(DecodeN reg);
 3076 
 3077   op_cost(10);
 3078   format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
 3079   interface(MEMORY_INTER) %{
 3080     base(0xc); // R12
 3081     index($reg);
 3082     scale(0x3);
 3083     disp(0x0);
 3084   %}
 3085 %}
 3086 
 3087 // Indirect Narrow Oop Plus Offset Operand
 3088 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 3089 // we can't free r12 even with CompressedOops::base() == nullptr.
 3090 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 3091   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 3092   constraint(ALLOC_IN_RC(ptr_reg));
 3093   match(AddP (DecodeN reg) off);
 3094 
 3095   op_cost(10);
 3096   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 3097   interface(MEMORY_INTER) %{
 3098     base(0xc); // R12
 3099     index($reg);
 3100     scale(0x3);
 3101     disp($off);
 3102   %}
 3103 %}
 3104 
 3105 // Indirect Memory Operand
 3106 operand indirectNarrow(rRegN reg)

 3413     equal(0x4, "e");
 3414     not_equal(0x5, "ne");
 3415     less(0x2, "b");
 3416     greater_equal(0x3, "ae");
 3417     less_equal(0x6, "be");
 3418     greater(0x7, "a");
 3419     overflow(0x0, "o");
 3420     no_overflow(0x1, "no");
 3421   %}
 3422 %}
 3423 
 3424 //----------OPERAND CLASSES----------------------------------------------------
 3425 // Operand Classes are groups of operands that are used as to simplify
 3426 // instruction definitions by not requiring the AD writer to specify separate
 3427 // instructions for every form of operand when the instruction accepts
 3428 // multiple operand types with the same basic encoding and format.  The classic
 3429 // case of this is memory operands.
 3430 
 3431 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 3432                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 3433                indCompressedOop, indCompressedOopOffset,
 3434                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 3435                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 3436                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 3437 
 3438 //----------PIPELINE-----------------------------------------------------------
 3439 // Rules which define the behavior of the target architectures pipeline.
 3440 pipeline %{
 3441 
 3442 //----------ATTRIBUTES---------------------------------------------------------
 3443 attributes %{
 3444   variable_size_instructions;        // Fixed size instructions
 3445   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 3446   instruction_unit_size = 1;         // An instruction is 1 bytes long
 3447   instruction_fetch_unit_size = 16;  // The processor fetches one line
 3448   instruction_fetch_units = 1;       // of 16 bytes
 3449 
 3450   // List of nop instructions
 3451   nops( MachNop );
 3452 %}
 3453 

 5921   format %{ "MEMBAR-storestore (empty encoding)" %}
 5922   ins_encode( );
 5923   ins_pipe(empty);
 5924 %}
 5925 
 5926 //----------Move Instructions--------------------------------------------------
 5927 
 5928 instruct castX2P(rRegP dst, rRegL src)
 5929 %{
 5930   match(Set dst (CastX2P src));
 5931 
 5932   format %{ "movq    $dst, $src\t# long->ptr" %}
 5933   ins_encode %{
 5934     if ($dst$$reg != $src$$reg) {
 5935       __ movptr($dst$$Register, $src$$Register);
 5936     }
 5937   %}
 5938   ins_pipe(ialu_reg_reg); // XXX
 5939 %}
 5940 
 5941 instruct castN2X(rRegL dst, rRegN src)
 5942 %{
 5943   match(Set dst (CastP2X src));
 5944 
 5945   format %{ "movq    $dst, $src\t# ptr -> long" %}
 5946   ins_encode %{
 5947     if ($dst$$reg != $src$$reg) {
 5948       __ movptr($dst$$Register, $src$$Register);
 5949     }
 5950   %}
 5951   ins_pipe(ialu_reg_reg); // XXX
 5952 %}
 5953 
 5954 instruct castP2X(rRegL dst, rRegP src)
 5955 %{
 5956   match(Set dst (CastP2X src));
 5957 
 5958   format %{ "movq    $dst, $src\t# ptr -> long" %}
 5959   ins_encode %{
 5960     if ($dst$$reg != $src$$reg) {
 5961       __ movptr($dst$$Register, $src$$Register);
 5962     }
 5963   %}
 5964   ins_pipe(ialu_reg_reg); // XXX
 5965 %}
 5966 
 5967 // Convert oop into int for vectors alignment masking
 5968 instruct convP2I(rRegI dst, rRegP src)
 5969 %{
 5970   match(Set dst (ConvL2I (CastP2X src)));
 5971 
 5972   format %{ "movl    $dst, $src\t# ptr -> int" %}
 5973   ins_encode %{

10482   effect(DEF dst, USE src);
10483   ins_cost(100);
10484   format %{ "movd    $dst,$src\t# MoveI2F" %}
10485   ins_encode %{
10486     __ movdl($dst$$XMMRegister, $src$$Register);
10487   %}
10488   ins_pipe( pipe_slow );
10489 %}
10490 
10491 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
10492   match(Set dst (MoveL2D src));
10493   effect(DEF dst, USE src);
10494   ins_cost(100);
10495   format %{ "movd    $dst,$src\t# MoveL2D" %}
10496   ins_encode %{
10497      __ movdq($dst$$XMMRegister, $src$$Register);
10498   %}
10499   ins_pipe( pipe_slow );
10500 %}
10501 
10502 
10503 // Fast clearing of an array
10504 // Small non-constant lenght ClearArray for non-AVX512 targets.
10505 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
10506                   Universe dummy, rFlagsReg cr)
10507 %{
10508   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
10509   match(Set dummy (ClearArray (Binary cnt base) val));
10510   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
10511 
10512   format %{ $$template
10513     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
10514     $$emit$$"jg      LARGE\n\t"
10515     $$emit$$"dec     rcx\n\t"
10516     $$emit$$"js      DONE\t# Zero length\n\t"
10517     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
10518     $$emit$$"dec     rcx\n\t"
10519     $$emit$$"jge     LOOP\n\t"
10520     $$emit$$"jmp     DONE\n\t"
10521     $$emit$$"# LARGE:\n\t"
10522     if (UseFastStosb) {
10523        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
10524        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
10525     } else if (UseXMMForObjInit) {
10526        $$emit$$"movdq   $tmp, $val\n\t"
10527        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
10528        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
10529        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10530        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10531        $$emit$$"vmovdqu $tmp,(rax)\n\t"
10532        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
10533        $$emit$$"add     0x40,rax\n\t"
10534        $$emit$$"# L_zero_64_bytes:\n\t"
10535        $$emit$$"sub     0x8,rcx\n\t"
10536        $$emit$$"jge     L_loop\n\t"
10537        $$emit$$"add     0x4,rcx\n\t"
10538        $$emit$$"jl      L_tail\n\t"
10539        $$emit$$"vmovdqu $tmp,(rax)\n\t"
10540        $$emit$$"add     0x20,rax\n\t"
10541        $$emit$$"sub     0x4,rcx\n\t"
10542        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10543        $$emit$$"add     0x4,rcx\n\t"
10544        $$emit$$"jle     L_end\n\t"
10545        $$emit$$"dec     rcx\n\t"
10546        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10547        $$emit$$"vmovq   xmm0,(rax)\n\t"
10548        $$emit$$"add     0x8,rax\n\t"
10549        $$emit$$"dec     rcx\n\t"
10550        $$emit$$"jge     L_sloop\n\t"
10551        $$emit$$"# L_end:\n\t"
10552     } else {
10553        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
10554     }
10555     $$emit$$"# DONE"
10556   %}
10557   ins_encode %{
10558     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10559                  $tmp$$XMMRegister, false, false);
10560   %}
10561   ins_pipe(pipe_slow);
10562 %}
10563 
10564 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
10565                             Universe dummy, rFlagsReg cr)
10566 %{
10567   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
10568   match(Set dummy (ClearArray (Binary cnt base) val));
10569   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
10570 
10571   format %{ $$template
10572     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
10573     $$emit$$"jg      LARGE\n\t"
10574     $$emit$$"dec     rcx\n\t"
10575     $$emit$$"js      DONE\t# Zero length\n\t"
10576     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
10577     $$emit$$"dec     rcx\n\t"
10578     $$emit$$"jge     LOOP\n\t"
10579     $$emit$$"jmp     DONE\n\t"
10580     $$emit$$"# LARGE:\n\t"
10581     if (UseXMMForObjInit) {
10582        $$emit$$"movdq   $tmp, $val\n\t"
10583        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
10584        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
10585        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10586        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10587        $$emit$$"vmovdqu $tmp,(rax)\n\t"
10588        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
10589        $$emit$$"add     0x40,rax\n\t"
10590        $$emit$$"# L_zero_64_bytes:\n\t"
10591        $$emit$$"sub     0x8,rcx\n\t"
10592        $$emit$$"jge     L_loop\n\t"
10593        $$emit$$"add     0x4,rcx\n\t"
10594        $$emit$$"jl      L_tail\n\t"
10595        $$emit$$"vmovdqu $tmp,(rax)\n\t"
10596        $$emit$$"add     0x20,rax\n\t"
10597        $$emit$$"sub     0x4,rcx\n\t"
10598        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10599        $$emit$$"add     0x4,rcx\n\t"
10600        $$emit$$"jle     L_end\n\t"
10601        $$emit$$"dec     rcx\n\t"
10602        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10603        $$emit$$"vmovq   xmm0,(rax)\n\t"
10604        $$emit$$"add     0x8,rax\n\t"
10605        $$emit$$"dec     rcx\n\t"
10606        $$emit$$"jge     L_sloop\n\t"
10607        $$emit$$"# L_end:\n\t"
10608     } else {
10609        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
10610     }
10611     $$emit$$"# DONE"
10612   %}
10613   ins_encode %{
10614     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10615                  $tmp$$XMMRegister, false, true);
10616   %}
10617   ins_pipe(pipe_slow);
10618 %}
10619 
10620 // Small non-constant length ClearArray for AVX512 targets.
10621 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
10622                        Universe dummy, rFlagsReg cr)
10623 %{
10624   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
10625   match(Set dummy (ClearArray (Binary cnt base) val));
10626   ins_cost(125);
10627   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
10628 
10629   format %{ $$template
10630     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
10631     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
10632     $$emit$$"jg      LARGE\n\t"
10633     $$emit$$"dec     rcx\n\t"
10634     $$emit$$"js      DONE\t# Zero length\n\t"
10635     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
10636     $$emit$$"dec     rcx\n\t"
10637     $$emit$$"jge     LOOP\n\t"
10638     $$emit$$"jmp     DONE\n\t"
10639     $$emit$$"# LARGE:\n\t"
10640     if (UseFastStosb) {
10641        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
10642        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
10643     } else if (UseXMMForObjInit) {
10644        $$emit$$"mov     rdi,rax\n\t"
10645        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
10646        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10647        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

10655        $$emit$$"jl      L_tail\n\t"
10656        $$emit$$"vmovdqu ymm0,(rax)\n\t"
10657        $$emit$$"add     0x20,rax\n\t"
10658        $$emit$$"sub     0x4,rcx\n\t"
10659        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10660        $$emit$$"add     0x4,rcx\n\t"
10661        $$emit$$"jle     L_end\n\t"
10662        $$emit$$"dec     rcx\n\t"
10663        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10664        $$emit$$"vmovq   xmm0,(rax)\n\t"
10665        $$emit$$"add     0x8,rax\n\t"
10666        $$emit$$"dec     rcx\n\t"
10667        $$emit$$"jge     L_sloop\n\t"
10668        $$emit$$"# L_end:\n\t"
10669     } else {
10670        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
10671     }
10672     $$emit$$"# DONE"
10673   %}
10674   ins_encode %{
10675     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10676                  $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
10677   %}
10678   ins_pipe(pipe_slow);
10679 %}
10680 
10681 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
10682                                  Universe dummy, rFlagsReg cr)

10683 %{
10684   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
10685   match(Set dummy (ClearArray (Binary cnt base) val));
10686   ins_cost(125);
10687   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
10688 
10689   format %{ $$template
10690     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
10691     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
10692     $$emit$$"jg      LARGE\n\t"
10693     $$emit$$"dec     rcx\n\t"
10694     $$emit$$"js      DONE\t# Zero length\n\t"
10695     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
10696     $$emit$$"dec     rcx\n\t"
10697     $$emit$$"jge     LOOP\n\t"
10698     $$emit$$"jmp     DONE\n\t"
10699     $$emit$$"# LARGE:\n\t"
10700     if (UseFastStosb) {
10701        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
10702        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
10703     } else if (UseXMMForObjInit) {
10704        $$emit$$"mov     rdi,rax\n\t"
10705        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
10706        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10707        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

10715        $$emit$$"jl      L_tail\n\t"
10716        $$emit$$"vmovdqu ymm0,(rax)\n\t"
10717        $$emit$$"add     0x20,rax\n\t"
10718        $$emit$$"sub     0x4,rcx\n\t"
10719        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10720        $$emit$$"add     0x4,rcx\n\t"
10721        $$emit$$"jle     L_end\n\t"
10722        $$emit$$"dec     rcx\n\t"
10723        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10724        $$emit$$"vmovq   xmm0,(rax)\n\t"
10725        $$emit$$"add     0x8,rax\n\t"
10726        $$emit$$"dec     rcx\n\t"
10727        $$emit$$"jge     L_sloop\n\t"
10728        $$emit$$"# L_end:\n\t"
10729     } else {
10730        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
10731     }
10732     $$emit$$"# DONE"
10733   %}
10734   ins_encode %{
10735     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10736                  $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
10737   %}
10738   ins_pipe(pipe_slow);
10739 %}
10740 
10741 // Large non-constant length ClearArray for non-AVX512 targets.
10742 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
10743                         Universe dummy, rFlagsReg cr)
10744 %{
10745   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
10746   match(Set dummy (ClearArray (Binary cnt base) val));
10747   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
10748 
10749   format %{ $$template
10750     if (UseFastStosb) {
10751        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
10752        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
10753     } else if (UseXMMForObjInit) {
10754        $$emit$$"movdq   $tmp, $val\n\t"
10755        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
10756        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
10757        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10758        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10759        $$emit$$"vmovdqu $tmp,(rax)\n\t"
10760        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
10761        $$emit$$"add     0x40,rax\n\t"
10762        $$emit$$"# L_zero_64_bytes:\n\t"
10763        $$emit$$"sub     0x8,rcx\n\t"
10764        $$emit$$"jge     L_loop\n\t"
10765        $$emit$$"add     0x4,rcx\n\t"
10766        $$emit$$"jl      L_tail\n\t"
10767        $$emit$$"vmovdqu $tmp,(rax)\n\t"
10768        $$emit$$"add     0x20,rax\n\t"
10769        $$emit$$"sub     0x4,rcx\n\t"
10770        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10771        $$emit$$"add     0x4,rcx\n\t"
10772        $$emit$$"jle     L_end\n\t"
10773        $$emit$$"dec     rcx\n\t"
10774        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10775        $$emit$$"vmovq   xmm0,(rax)\n\t"
10776        $$emit$$"add     0x8,rax\n\t"
10777        $$emit$$"dec     rcx\n\t"
10778        $$emit$$"jge     L_sloop\n\t"
10779        $$emit$$"# L_end:\n\t"
10780     } else {
10781        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
10782     }
10783   %}
10784   ins_encode %{
10785     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10786                  $tmp$$XMMRegister, true, false);
10787   %}
10788   ins_pipe(pipe_slow);
10789 %}
10790 
10791 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
10792                                   Universe dummy, rFlagsReg cr)
10793 %{
10794   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
10795   match(Set dummy (ClearArray (Binary cnt base) val));
10796   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
10797 
10798   format %{ $$template
10799     if (UseXMMForObjInit) {
10800        $$emit$$"movdq   $tmp, $val\n\t"
10801        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
10802        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
10803        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10804        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10805        $$emit$$"vmovdqu $tmp,(rax)\n\t"
10806        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
10807        $$emit$$"add     0x40,rax\n\t"
10808        $$emit$$"# L_zero_64_bytes:\n\t"
10809        $$emit$$"sub     0x8,rcx\n\t"
10810        $$emit$$"jge     L_loop\n\t"
10811        $$emit$$"add     0x4,rcx\n\t"
10812        $$emit$$"jl      L_tail\n\t"
10813        $$emit$$"vmovdqu $tmp,(rax)\n\t"
10814        $$emit$$"add     0x20,rax\n\t"
10815        $$emit$$"sub     0x4,rcx\n\t"
10816        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10817        $$emit$$"add     0x4,rcx\n\t"
10818        $$emit$$"jle     L_end\n\t"
10819        $$emit$$"dec     rcx\n\t"
10820        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10821        $$emit$$"vmovq   xmm0,(rax)\n\t"
10822        $$emit$$"add     0x8,rax\n\t"
10823        $$emit$$"dec     rcx\n\t"
10824        $$emit$$"jge     L_sloop\n\t"
10825        $$emit$$"# L_end:\n\t"
10826     } else {
10827        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
10828     }
10829   %}
10830   ins_encode %{
10831     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10832                  $tmp$$XMMRegister, true, true);
10833   %}
10834   ins_pipe(pipe_slow);
10835 %}
10836 
10837 // Large non-constant length ClearArray for AVX512 targets.
10838 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
10839                              Universe dummy, rFlagsReg cr)
10840 %{
10841   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
10842   match(Set dummy (ClearArray (Binary cnt base) val));
10843   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
10844 
10845   format %{ $$template
10846     if (UseFastStosb) {
10847        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
10848        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
10849        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
10850     } else if (UseXMMForObjInit) {
10851        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
10852        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
10853        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10854        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10855        $$emit$$"vmovdqu ymm0,(rax)\n\t"
10856        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
10857        $$emit$$"add     0x40,rax\n\t"
10858        $$emit$$"# L_zero_64_bytes:\n\t"
10859        $$emit$$"sub     0x8,rcx\n\t"
10860        $$emit$$"jge     L_loop\n\t"
10861        $$emit$$"add     0x4,rcx\n\t"
10862        $$emit$$"jl      L_tail\n\t"
10863        $$emit$$"vmovdqu ymm0,(rax)\n\t"
10864        $$emit$$"add     0x20,rax\n\t"
10865        $$emit$$"sub     0x4,rcx\n\t"
10866        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10867        $$emit$$"add     0x4,rcx\n\t"
10868        $$emit$$"jle     L_end\n\t"
10869        $$emit$$"dec     rcx\n\t"
10870        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10871        $$emit$$"vmovq   xmm0,(rax)\n\t"
10872        $$emit$$"add     0x8,rax\n\t"
10873        $$emit$$"dec     rcx\n\t"
10874        $$emit$$"jge     L_sloop\n\t"
10875        $$emit$$"# L_end:\n\t"
10876     } else {
10877        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
10878        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
10879     }
10880   %}
10881   ins_encode %{
10882     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10883                  $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
10884   %}
10885   ins_pipe(pipe_slow);
10886 %}
10887 
10888 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
10889                                        Universe dummy, rFlagsReg cr)

10890 %{
10891   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
10892   match(Set dummy (ClearArray (Binary cnt base) val));
10893   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
10894 
10895   format %{ $$template
10896     if (UseFastStosb) {
10897        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
10898        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
10899        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
10900     } else if (UseXMMForObjInit) {
10901        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
10902        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
10903        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10904        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10905        $$emit$$"vmovdqu ymm0,(rax)\n\t"
10906        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
10907        $$emit$$"add     0x40,rax\n\t"
10908        $$emit$$"# L_zero_64_bytes:\n\t"
10909        $$emit$$"sub     0x8,rcx\n\t"
10910        $$emit$$"jge     L_loop\n\t"
10911        $$emit$$"add     0x4,rcx\n\t"
10912        $$emit$$"jl      L_tail\n\t"
10913        $$emit$$"vmovdqu ymm0,(rax)\n\t"
10914        $$emit$$"add     0x20,rax\n\t"
10915        $$emit$$"sub     0x4,rcx\n\t"
10916        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10917        $$emit$$"add     0x4,rcx\n\t"
10918        $$emit$$"jle     L_end\n\t"
10919        $$emit$$"dec     rcx\n\t"
10920        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10921        $$emit$$"vmovq   xmm0,(rax)\n\t"
10922        $$emit$$"add     0x8,rax\n\t"
10923        $$emit$$"dec     rcx\n\t"
10924        $$emit$$"jge     L_sloop\n\t"
10925        $$emit$$"# L_end:\n\t"
10926     } else {
10927        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
10928        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
10929     }
10930   %}
10931   ins_encode %{
10932     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10933                  $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
10934   %}
10935   ins_pipe(pipe_slow);
10936 %}
10937 
10938 // Small constant length ClearArray for AVX512 targets.
10939 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
10940 %{
10941   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
10942             ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
10943   match(Set dummy (ClearArray (Binary cnt base) val));
10944   ins_cost(100);
10945   effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
10946   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
10947   ins_encode %{
10948     __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
10949   %}
10950   ins_pipe(pipe_slow);
10951 %}
10952 
10953 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
10954                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
10955 %{
10956   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
10957   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
10958   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
10959 
10960   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
10961   ins_encode %{
10962     __ string_compare($str1$$Register, $str2$$Register,
10963                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
10964                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
10965   %}
10966   ins_pipe( pipe_slow );
10967 %}
10968 

12729 
12730   ins_cost(300);
12731   format %{ "call_leaf,runtime " %}
12732   ins_encode(clear_avx, Java_To_Runtime(meth));
12733   ins_pipe(pipe_slow);
12734 %}
12735 
12736 // Call runtime without safepoint and with vector arguments
12737 instruct CallLeafDirectVector(method meth)
12738 %{
12739   match(CallLeafVector);
12740   effect(USE meth);
12741 
12742   ins_cost(300);
12743   format %{ "call_leaf,vector " %}
12744   ins_encode(Java_To_Runtime(meth));
12745   ins_pipe(pipe_slow);
12746 %}
12747 
12748 // Call runtime without safepoint
12749 // entry point is null, target holds the address to call
12750 instruct CallLeafNoFPInDirect(rRegP target)
12751 %{
12752   predicate(n->as_Call()->entry_point() == nullptr);
12753   match(CallLeafNoFP target);
12754 
12755   ins_cost(300);
12756   format %{ "call_leaf_nofp,runtime indirect " %}
12757   ins_encode %{
12758      __ call($target$$Register);
12759   %}
12760 
12761   ins_pipe(pipe_slow);
12762 %}
12763 
12764 instruct CallLeafNoFPDirect(method meth)
12765 %{
12766   predicate(n->as_Call()->entry_point() != nullptr);
12767   match(CallLeafNoFP);
12768   effect(USE meth);
12769 
12770   ins_cost(300);
12771   format %{ "call_leaf_nofp,runtime " %}
12772   ins_encode(clear_avx, Java_To_Runtime(meth));
12773   ins_pipe(pipe_slow);
12774 %}
12775 
12776 // Return Instruction
12777 // Remove the return address & jump to it.
12778 // Notice: We always emit a nop after a ret to make sure there is room
12779 // for safepoint patching
12780 instruct Ret()
12781 %{
12782   match(Return);
12783 
12784   format %{ "ret" %}
12785   ins_encode %{
12786     __ ret(0);
< prev index next >