< prev index next >

src/hotspot/cpu/x86/x86_64.ad

Print this page

  598 }
  599 
  600 // !!!!! Special hack to get all types of calls to specify the byte offset
  601 //       from the start of the call to the point where the return address
  602 //       will point.
  603 int MachCallStaticJavaNode::ret_addr_offset()
  604 {
  605   int offset = 5; // 5 bytes from start of call to where return address points
  606   offset += clear_avx_size();
  607   return offset;
  608 }
  609 
  610 int MachCallDynamicJavaNode::ret_addr_offset()
  611 {
  612   int offset = 15; // 15 bytes from start of call to where return address points
  613   offset += clear_avx_size();
  614   return offset;
  615 }
  616 
  617 int MachCallRuntimeNode::ret_addr_offset() {




  618   int offset = 13; // movq r10,#addr; callq (r10)
  619   if (this->ideal_Opcode() != Op_CallLeafVector) {
  620     offset += clear_avx_size();
  621   }
  622   return offset;
  623 }

  624 //
  625 // Compute padding required for nodes which need alignment
  626 //
  627 
  628 // The address of the call instruction needs to be 4-byte aligned to
  629 // ensure that it does not span a cache line so that it can be patched.
  630 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
  631 {
  632   current_offset += clear_avx_size(); // skip vzeroupper
  633   current_offset += 1; // skip call opcode byte
  634   return align_up(current_offset, alignment_required()) - current_offset;
  635 }
  636 
  637 // The address of the call instruction needs to be 4-byte aligned to
  638 // ensure that it does not span a cache line so that it can be patched.
  639 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
  640 {
  641   current_offset += clear_avx_size(); // skip vzeroupper
  642   current_offset += 11; // skip movq instruction + call opcode byte
  643   return align_up(current_offset, alignment_required()) - current_offset;

  829     st->print("\n\t");
  830     st->print("# stack alignment check");
  831 #endif
  832   }
  833   if (C->stub_function() != nullptr) {
  834     st->print("\n\t");
  835     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
  836     st->print("\n\t");
  837     st->print("je      fast_entry\t");
  838     st->print("\n\t");
  839     st->print("call    #nmethod_entry_barrier_stub\t");
  840     st->print("\n\tfast_entry:");
  841   }
  842   st->cr();
  843 }
  844 #endif
  845 
  846 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
  847   Compile* C = ra_->C;
  848 
  849   int framesize = C->output()->frame_size_in_bytes();
  850   int bangsize = C->output()->bang_size_in_bytes();
  851 
  852   if (C->clinit_barrier_on_entry()) {
  853     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
  854     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
  855 
  856     Label L_skip_barrier;
  857     Register klass = rscratch1;
  858 
  859     __ mov_metadata(klass, C->method()->holder()->constant_encoding());
  860     __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
  861 
  862     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
  863 
  864     __ bind(L_skip_barrier);

  865   }
  866 
  867   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);


  868 
  869   C->output()->set_frame_complete(__ offset());
  870 
  871   if (C->has_mach_constant_base_node()) {
  872     // NOTE: We set the table base offset here because users might be
  873     // emitted before MachConstantBaseNode.
  874     ConstantTable& constant_table = C->output()->constant_table();
  875     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  876   }
  877 }
  878 
  879 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
  880 {
  881   return MachNode::size(ra_); // too many variables; just compute it
  882                               // the hard way
  883 }
  884 
  885 int MachPrologNode::reloc() const
  886 {
  887   return 0; // a large enough number
  888 }
  889 
  890 //=============================================================================
  891 #ifndef PRODUCT
  892 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
  893 {
  894   Compile* C = ra_->C;
  895   if (generate_vzeroupper(C)) {
  896     st->print("vzeroupper");
  897     st->cr(); st->print("\t");
  898   }
  899 
  900   int framesize = C->output()->frame_size_in_bytes();
  901   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  902   // Remove word for return adr already pushed
  903   // and RBP
  904   framesize -= 2*wordSize;

  911   st->print_cr("popq    rbp");
  912   if (do_polling() && C->is_method_compilation()) {
  913     st->print("\t");
  914     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
  915                  "ja      #safepoint_stub\t"
  916                  "# Safepoint: poll for GC");
  917   }
  918 }
  919 #endif
  920 
  921 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
  922 {
  923   Compile* C = ra_->C;
  924 
  925   if (generate_vzeroupper(C)) {
  926     // Clear upper bits of YMM registers when current compiled code uses
  927     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  928     __ vzeroupper();
  929   }
  930 
  931   int framesize = C->output()->frame_size_in_bytes();
  932   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  933   // Remove word for return adr already pushed
  934   // and RBP
  935   framesize -= 2*wordSize;
  936 
  937   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
  938 
  939   if (framesize) {
  940     __ addq(rsp, framesize);
  941   }
  942 
  943   __ popq(rbp);
  944 
  945   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
  946     __ reserved_stack_check();
  947   }
  948 
  949   if (do_polling() && C->is_method_compilation()) {
  950     Label dummy_label;
  951     Label* code_stub = &dummy_label;
  952     if (!C->output()->in_scratch_emit_size()) {
  953       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
  954       C->output()->add_stub(stub);
  955       code_stub = &stub->entry();
  956     }
  957     __ relocate(relocInfo::poll_return_type);
  958     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
  959   }
  960 }
  961 
  962 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
  963 {
  964   return MachNode::size(ra_); // too many variables; just compute it
  965                               // the hard way
  966 }
  967 
  968 int MachEpilogNode::reloc() const
  969 {
  970   return 2; // a large enough number
  971 }
  972 
  973 const Pipeline* MachEpilogNode::pipeline() const
  974 {
  975   return MachNode::pipeline_class();
  976 }
  977 
  978 //=============================================================================
  979 
  980 enum RC {
  981   rc_bad,
  982   rc_int,
  983   rc_kreg,
  984   rc_float,
  985   rc_stack
  986 };
  987 

 1549 #endif
 1550 
 1551 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1552 {
 1553   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1554   int reg = ra_->get_encode(this);
 1555 
 1556   __ lea(as_Register(reg), Address(rsp, offset));
 1557 }
 1558 
 1559 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 1560 {
 1561   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1562   if (ra_->get_encode(this) > 15) {
 1563     return (offset < 0x80) ? 6 : 9; // REX2
 1564   } else {
 1565     return (offset < 0x80) ? 5 : 8; // REX
 1566   }
 1567 }
 1568 











































 1569 //=============================================================================
 1570 #ifndef PRODUCT
 1571 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1572 {
 1573   if (UseCompressedClassPointers) {
 1574     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 1575     st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 1576   } else {
 1577     st->print_cr("movq    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 1578     st->print_cr("\tcmpq    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 1579   }
 1580   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 1581 }
 1582 #endif
 1583 
 1584 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1585 {
 1586   __ ic_check(InteriorEntryAlignment);
 1587 }
 1588 
 1589 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 1590 {
 1591   return MachNode::size(ra_); // too many variables; just compute it
 1592                               // the hard way
 1593 }
 1594 
 1595 
 1596 //=============================================================================
 1597 
 1598 bool Matcher::supports_vector_calling_convention(void) {
 1599   return EnableVectorSupport;
 1600 }
 1601 
 1602 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1603   assert(EnableVectorSupport, "sanity");
 1604   int lo = XMM0_num;
 1605   int hi = XMM0b_num;
 1606   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 1607   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 1608   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 1609   return OptoRegPair(hi, lo);
 1610 }
 1611 
 1612 // Is this branch offset short enough that a short branch can be used?
 1613 //
 1614 // NOTE: If the platform does not provide any short branch variants, then
 1615 //       this method should return false for offset 0.

 3050   %}
 3051 %}
 3052 
 3053 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 3054 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 3055 %{
 3056   constraint(ALLOC_IN_RC(ptr_reg));
 3057   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 3058   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 3059 
 3060   op_cost(10);
 3061   format %{"[$reg + $off + $idx << $scale]" %}
 3062   interface(MEMORY_INTER) %{
 3063     base($reg);
 3064     index($idx);
 3065     scale($scale);
 3066     disp($off);
 3067   %}
 3068 %}
 3069 
















 3070 // Indirect Narrow Oop Plus Offset Operand
 3071 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 3072 // we can't free r12 even with CompressedOops::base() == nullptr.
 3073 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 3074   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 3075   constraint(ALLOC_IN_RC(ptr_reg));
 3076   match(AddP (DecodeN reg) off);
 3077 
 3078   op_cost(10);
 3079   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 3080   interface(MEMORY_INTER) %{
 3081     base(0xc); // R12
 3082     index($reg);
 3083     scale(0x3);
 3084     disp($off);
 3085   %}
 3086 %}
 3087 
 3088 // Indirect Memory Operand
 3089 operand indirectNarrow(rRegN reg)

 3396     equal(0x4, "e");
 3397     not_equal(0x5, "ne");
 3398     less(0x2, "b");
 3399     greater_equal(0x3, "ae");
 3400     less_equal(0x6, "be");
 3401     greater(0x7, "a");
 3402     overflow(0x0, "o");
 3403     no_overflow(0x1, "no");
 3404   %}
 3405 %}
 3406 
 3407 //----------OPERAND CLASSES----------------------------------------------------
 3408 // Operand Classes are groups of operands that are used as to simplify
 3409 // instruction definitions by not requiring the AD writer to specify separate
 3410 // instructions for every form of operand when the instruction accepts
 3411 // multiple operand types with the same basic encoding and format.  The classic
 3412 // case of this is memory operands.
 3413 
 3414 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 3415                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 3416                indCompressedOopOffset,
 3417                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 3418                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 3419                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 3420 
 3421 //----------PIPELINE-----------------------------------------------------------
 3422 // Rules which define the behavior of the target architectures pipeline.
 3423 pipeline %{
 3424 
 3425 //----------ATTRIBUTES---------------------------------------------------------
 3426 attributes %{
 3427   variable_size_instructions;        // Fixed size instructions
 3428   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 3429   instruction_unit_size = 1;         // An instruction is 1 bytes long
 3430   instruction_fetch_unit_size = 16;  // The processor fetches one line
 3431   instruction_fetch_units = 1;       // of 16 bytes
 3432 
 3433   // List of nop instructions
 3434   nops( MachNop );
 3435 %}
 3436 

 5981   format %{ "MEMBAR-storestore (empty encoding)" %}
 5982   ins_encode( );
 5983   ins_pipe(empty);
 5984 %}
 5985 
 5986 //----------Move Instructions--------------------------------------------------
 5987 
 5988 instruct castX2P(rRegP dst, rRegL src)
 5989 %{
 5990   match(Set dst (CastX2P src));
 5991 
 5992   format %{ "movq    $dst, $src\t# long->ptr" %}
 5993   ins_encode %{
 5994     if ($dst$$reg != $src$$reg) {
 5995       __ movptr($dst$$Register, $src$$Register);
 5996     }
 5997   %}
 5998   ins_pipe(ialu_reg_reg); // XXX
 5999 %}
 6000 


























 6001 instruct castP2X(rRegL dst, rRegP src)
 6002 %{
 6003   match(Set dst (CastP2X src));
 6004 
 6005   format %{ "movq    $dst, $src\t# ptr -> long" %}
 6006   ins_encode %{
 6007     if ($dst$$reg != $src$$reg) {
 6008       __ movptr($dst$$Register, $src$$Register);
 6009     }
 6010   %}
 6011   ins_pipe(ialu_reg_reg); // XXX
 6012 %}
 6013 
 6014 // Convert oop into int for vectors alignment masking
 6015 instruct convP2I(rRegI dst, rRegP src)
 6016 %{
 6017   match(Set dst (ConvL2I (CastP2X src)));
 6018 
 6019   format %{ "movl    $dst, $src\t# ptr -> int" %}
 6020   ins_encode %{

12112   effect(DEF dst, USE src);
12113   ins_cost(100);
12114   format %{ "movd    $dst,$src\t# MoveI2F" %}
12115   ins_encode %{
12116     __ movdl($dst$$XMMRegister, $src$$Register);
12117   %}
12118   ins_pipe( pipe_slow );
12119 %}
12120 
12121 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
12122   match(Set dst (MoveL2D src));
12123   effect(DEF dst, USE src);
12124   ins_cost(100);
12125   format %{ "movd    $dst,$src\t# MoveL2D" %}
12126   ins_encode %{
12127      __ movdq($dst$$XMMRegister, $src$$Register);
12128   %}
12129   ins_pipe( pipe_slow );
12130 %}
12131 

12132 // Fast clearing of an array
12133 // Small non-constant lenght ClearArray for non-AVX512 targets.
12134 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
12135                   Universe dummy, rFlagsReg cr)
12136 %{
12137   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
12138   match(Set dummy (ClearArray cnt base));
12139   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);





















































































































12140 
12141   format %{ $$template
12142     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12143     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
12144     $$emit$$"jg      LARGE\n\t"
12145     $$emit$$"dec     rcx\n\t"
12146     $$emit$$"js      DONE\t# Zero length\n\t"
12147     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
12148     $$emit$$"dec     rcx\n\t"
12149     $$emit$$"jge     LOOP\n\t"
12150     $$emit$$"jmp     DONE\n\t"
12151     $$emit$$"# LARGE:\n\t"
12152     if (UseFastStosb) {
12153        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
12154        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
12155     } else if (UseXMMForObjInit) {
12156        $$emit$$"mov     rdi,rax\n\t"
12157        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
12158        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12159        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

12167        $$emit$$"jl      L_tail\n\t"
12168        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12169        $$emit$$"add     0x20,rax\n\t"
12170        $$emit$$"sub     0x4,rcx\n\t"
12171        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12172        $$emit$$"add     0x4,rcx\n\t"
12173        $$emit$$"jle     L_end\n\t"
12174        $$emit$$"dec     rcx\n\t"
12175        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12176        $$emit$$"vmovq   xmm0,(rax)\n\t"
12177        $$emit$$"add     0x8,rax\n\t"
12178        $$emit$$"dec     rcx\n\t"
12179        $$emit$$"jge     L_sloop\n\t"
12180        $$emit$$"# L_end:\n\t"
12181     } else {
12182        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
12183     }
12184     $$emit$$"# DONE"
12185   %}
12186   ins_encode %{
12187     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
12188                  $tmp$$XMMRegister, false, knoreg);
12189   %}
12190   ins_pipe(pipe_slow);
12191 %}
12192 
12193 // Small non-constant length ClearArray for AVX512 targets.
12194 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
12195                        Universe dummy, rFlagsReg cr)
12196 %{
12197   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
12198   match(Set dummy (ClearArray cnt base));
12199   ins_cost(125);
12200   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
12201 
12202   format %{ $$template
12203     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12204     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
12205     $$emit$$"jg      LARGE\n\t"
12206     $$emit$$"dec     rcx\n\t"
12207     $$emit$$"js      DONE\t# Zero length\n\t"
12208     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
12209     $$emit$$"dec     rcx\n\t"
12210     $$emit$$"jge     LOOP\n\t"
12211     $$emit$$"jmp     DONE\n\t"
12212     $$emit$$"# LARGE:\n\t"
12213     if (UseFastStosb) {
12214        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
12215        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
12216     } else if (UseXMMForObjInit) {
12217        $$emit$$"mov     rdi,rax\n\t"
12218        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
12219        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12220        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

12228        $$emit$$"jl      L_tail\n\t"
12229        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12230        $$emit$$"add     0x20,rax\n\t"
12231        $$emit$$"sub     0x4,rcx\n\t"
12232        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12233        $$emit$$"add     0x4,rcx\n\t"
12234        $$emit$$"jle     L_end\n\t"
12235        $$emit$$"dec     rcx\n\t"
12236        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12237        $$emit$$"vmovq   xmm0,(rax)\n\t"
12238        $$emit$$"add     0x8,rax\n\t"
12239        $$emit$$"dec     rcx\n\t"
12240        $$emit$$"jge     L_sloop\n\t"
12241        $$emit$$"# L_end:\n\t"
12242     } else {
12243        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
12244     }
12245     $$emit$$"# DONE"
12246   %}
12247   ins_encode %{
12248     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
12249                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
12250   %}
12251   ins_pipe(pipe_slow);
12252 %}
12253 
12254 // Large non-constant length ClearArray for non-AVX512 targets.
12255 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
12256                         Universe dummy, rFlagsReg cr)
12257 %{
12258   predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
12259   match(Set dummy (ClearArray cnt base));
12260   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
































































































12261 
12262   format %{ $$template
12263     if (UseFastStosb) {
12264        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12265        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
12266        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
12267     } else if (UseXMMForObjInit) {
12268        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
12269        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
12270        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12271        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12272        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12273        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
12274        $$emit$$"add     0x40,rax\n\t"
12275        $$emit$$"# L_zero_64_bytes:\n\t"
12276        $$emit$$"sub     0x8,rcx\n\t"
12277        $$emit$$"jge     L_loop\n\t"
12278        $$emit$$"add     0x4,rcx\n\t"
12279        $$emit$$"jl      L_tail\n\t"
12280        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12281        $$emit$$"add     0x20,rax\n\t"
12282        $$emit$$"sub     0x4,rcx\n\t"
12283        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12284        $$emit$$"add     0x4,rcx\n\t"
12285        $$emit$$"jle     L_end\n\t"
12286        $$emit$$"dec     rcx\n\t"
12287        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12288        $$emit$$"vmovq   xmm0,(rax)\n\t"
12289        $$emit$$"add     0x8,rax\n\t"
12290        $$emit$$"dec     rcx\n\t"
12291        $$emit$$"jge     L_sloop\n\t"
12292        $$emit$$"# L_end:\n\t"
12293     } else {
12294        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12295        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
12296     }
12297   %}
12298   ins_encode %{
12299     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
12300                  $tmp$$XMMRegister, true, knoreg);
12301   %}
12302   ins_pipe(pipe_slow);
12303 %}
12304 
12305 // Large non-constant length ClearArray for AVX512 targets.
12306 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
12307                              Universe dummy, rFlagsReg cr)
12308 %{
12309   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
12310   match(Set dummy (ClearArray cnt base));
12311   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
12312 
12313   format %{ $$template
12314     if (UseFastStosb) {
12315        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12316        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
12317        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
12318     } else if (UseXMMForObjInit) {
12319        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
12320        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
12321        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12322        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12323        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12324        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
12325        $$emit$$"add     0x40,rax\n\t"
12326        $$emit$$"# L_zero_64_bytes:\n\t"
12327        $$emit$$"sub     0x8,rcx\n\t"
12328        $$emit$$"jge     L_loop\n\t"
12329        $$emit$$"add     0x4,rcx\n\t"
12330        $$emit$$"jl      L_tail\n\t"
12331        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12332        $$emit$$"add     0x20,rax\n\t"
12333        $$emit$$"sub     0x4,rcx\n\t"
12334        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12335        $$emit$$"add     0x4,rcx\n\t"
12336        $$emit$$"jle     L_end\n\t"
12337        $$emit$$"dec     rcx\n\t"
12338        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12339        $$emit$$"vmovq   xmm0,(rax)\n\t"
12340        $$emit$$"add     0x8,rax\n\t"
12341        $$emit$$"dec     rcx\n\t"
12342        $$emit$$"jge     L_sloop\n\t"
12343        $$emit$$"# L_end:\n\t"
12344     } else {
12345        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12346        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
12347     }
12348   %}
12349   ins_encode %{
12350     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
12351                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
12352   %}
12353   ins_pipe(pipe_slow);
12354 %}
12355 
12356 // Small constant length ClearArray for AVX512 targets.
12357 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
12358 %{
12359   predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
12360   match(Set dummy (ClearArray cnt base));

12361   ins_cost(100);
12362   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
12363   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
12364   ins_encode %{
12365    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
12366   %}
12367   ins_pipe(pipe_slow);
12368 %}
12369 
12370 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
12371                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
12372 %{
12373   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
12374   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12375   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
12376 
12377   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
12378   ins_encode %{
12379     __ string_compare($str1$$Register, $str2$$Register,
12380                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
12381                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
12382   %}
12383   ins_pipe( pipe_slow );
12384 %}
12385 

14218 
14219   ins_cost(300);
14220   format %{ "call_leaf,runtime " %}
14221   ins_encode(clear_avx, Java_To_Runtime(meth));
14222   ins_pipe(pipe_slow);
14223 %}
14224 
14225 // Call runtime without safepoint and with vector arguments
14226 instruct CallLeafDirectVector(method meth)
14227 %{
14228   match(CallLeafVector);
14229   effect(USE meth);
14230 
14231   ins_cost(300);
14232   format %{ "call_leaf,vector " %}
14233   ins_encode(Java_To_Runtime(meth));
14234   ins_pipe(pipe_slow);
14235 %}
14236 
14237 // Call runtime without safepoint















14238 instruct CallLeafNoFPDirect(method meth)
14239 %{

14240   match(CallLeafNoFP);
14241   effect(USE meth);
14242 
14243   ins_cost(300);
14244   format %{ "call_leaf_nofp,runtime " %}
14245   ins_encode(clear_avx, Java_To_Runtime(meth));
14246   ins_pipe(pipe_slow);
14247 %}
14248 
14249 // Return Instruction
14250 // Remove the return address & jump to it.
14251 // Notice: We always emit a nop after a ret to make sure there is room
14252 // for safepoint patching
14253 instruct Ret()
14254 %{
14255   match(Return);
14256 
14257   format %{ "ret" %}
14258   ins_encode %{
14259     __ ret(0);

  598 }
  599 
  600 // !!!!! Special hack to get all types of calls to specify the byte offset
  601 //       from the start of the call to the point where the return address
  602 //       will point.
  603 int MachCallStaticJavaNode::ret_addr_offset()
  604 {
  605   int offset = 5; // 5 bytes from start of call to where return address points
  606   offset += clear_avx_size();
  607   return offset;
  608 }
  609 
  610 int MachCallDynamicJavaNode::ret_addr_offset()
  611 {
  612   int offset = 15; // 15 bytes from start of call to where return address points
  613   offset += clear_avx_size();
  614   return offset;
  615 }
  616 
  617 int MachCallRuntimeNode::ret_addr_offset() {
  618   if (_entry_point == nullptr) {
  619     // CallLeafNoFPInDirect
  620     return 3; // callq (register)
  621   }
  622   int offset = 13; // movq r10,#addr; callq (r10)
  623   if (this->ideal_Opcode() != Op_CallLeafVector) {
  624     offset += clear_avx_size();
  625   }
  626   return offset;
  627 }
  628 
  629 //
  630 // Compute padding required for nodes which need alignment
  631 //
  632 
  633 // The address of the call instruction needs to be 4-byte aligned to
  634 // ensure that it does not span a cache line so that it can be patched.
  635 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
  636 {
  637   current_offset += clear_avx_size(); // skip vzeroupper
  638   current_offset += 1; // skip call opcode byte
  639   return align_up(current_offset, alignment_required()) - current_offset;
  640 }
  641 
  642 // The address of the call instruction needs to be 4-byte aligned to
  643 // ensure that it does not span a cache line so that it can be patched.
  644 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
  645 {
  646   current_offset += clear_avx_size(); // skip vzeroupper
  647   current_offset += 11; // skip movq instruction + call opcode byte
  648   return align_up(current_offset, alignment_required()) - current_offset;

  834     st->print("\n\t");
  835     st->print("# stack alignment check");
  836 #endif
  837   }
  838   if (C->stub_function() != nullptr) {
  839     st->print("\n\t");
  840     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
  841     st->print("\n\t");
  842     st->print("je      fast_entry\t");
  843     st->print("\n\t");
  844     st->print("call    #nmethod_entry_barrier_stub\t");
  845     st->print("\n\tfast_entry:");
  846   }
  847   st->cr();
  848 }
  849 #endif
  850 
  851 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
  852   Compile* C = ra_->C;
  853 
  854   __ verified_entry(C);













  855 
  856   if (ra_->C->stub_function() == nullptr) {
  857     __ entry_barrier();
  858   }
  859 
  860   if (!Compile::current()->output()->in_scratch_emit_size()) {
  861     __ bind(*_verified_entry);
  862   }
  863 
  864   C->output()->set_frame_complete(__ offset());
  865 
  866   if (C->has_mach_constant_base_node()) {
  867     // NOTE: We set the table base offset here because users might be
  868     // emitted before MachConstantBaseNode.
  869     ConstantTable& constant_table = C->output()->constant_table();
  870     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  871   }
  872 }
  873 






  874 int MachPrologNode::reloc() const
  875 {
  876   return 0; // a large enough number
  877 }
  878 
  879 //=============================================================================
  880 #ifndef PRODUCT
  881 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
  882 {
  883   Compile* C = ra_->C;
  884   if (generate_vzeroupper(C)) {
  885     st->print("vzeroupper");
  886     st->cr(); st->print("\t");
  887   }
  888 
  889   int framesize = C->output()->frame_size_in_bytes();
  890   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  891   // Remove word for return adr already pushed
  892   // and RBP
  893   framesize -= 2*wordSize;

  900   st->print_cr("popq    rbp");
  901   if (do_polling() && C->is_method_compilation()) {
  902     st->print("\t");
  903     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
  904                  "ja      #safepoint_stub\t"
  905                  "# Safepoint: poll for GC");
  906   }
  907 }
  908 #endif
  909 
  910 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
  911 {
  912   Compile* C = ra_->C;
  913 
  914   if (generate_vzeroupper(C)) {
  915     // Clear upper bits of YMM registers when current compiled code uses
  916     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  917     __ vzeroupper();
  918   }
  919 
  920   // Subtract two words to account for return address and rbp
  921   int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
  922   __ remove_frame(initial_framesize, C->needs_stack_repair());










  923 
  924   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
  925     __ reserved_stack_check();
  926   }
  927 
  928   if (do_polling() && C->is_method_compilation()) {
  929     Label dummy_label;
  930     Label* code_stub = &dummy_label;
  931     if (!C->output()->in_scratch_emit_size()) {
  932       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
  933       C->output()->add_stub(stub);
  934       code_stub = &stub->entry();
  935     }
  936     __ relocate(relocInfo::poll_return_type);
  937     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
  938   }
  939 }
  940 






  941 int MachEpilogNode::reloc() const
  942 {
  943   return 2; // a large enough number
  944 }
  945 
  946 const Pipeline* MachEpilogNode::pipeline() const
  947 {
  948   return MachNode::pipeline_class();
  949 }
  950 
  951 //=============================================================================
  952 
  953 enum RC {
  954   rc_bad,
  955   rc_int,
  956   rc_kreg,
  957   rc_float,
  958   rc_stack
  959 };
  960 

 1522 #endif
 1523 
 1524 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1525 {
 1526   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1527   int reg = ra_->get_encode(this);
 1528 
 1529   __ lea(as_Register(reg), Address(rsp, offset));
 1530 }
 1531 
 1532 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 1533 {
 1534   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1535   if (ra_->get_encode(this) > 15) {
 1536     return (offset < 0x80) ? 6 : 9; // REX2
 1537   } else {
 1538     return (offset < 0x80) ? 5 : 8; // REX
 1539   }
 1540 }
 1541 
 1542 //=============================================================================
 1543 #ifndef PRODUCT
 1544 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1545 {
 1546   st->print_cr("MachVEPNode");
 1547 }
 1548 #endif
 1549 
 1550 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1551 {
 1552   CodeBuffer* cbuf = masm->code();
 1553   uint insts_size = cbuf->insts_size();
 1554   if (!_verified) {
 1555     __ ic_check(1);
 1556   } else {
 1557     // TODO 8284443 Avoid creation of temporary frame
 1558     if (ra_->C->stub_function() == nullptr) {
 1559       __ verified_entry(ra_->C, 0);
 1560       __ entry_barrier();
 1561       int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
 1562       __ remove_frame(initial_framesize, false);
 1563     }
 1564     // Unpack inline type args passed as oop and then jump to
 1565     // the verified entry point (skipping the unverified entry).
 1566     int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
 1567     // Emit code for verified entry and save increment for stack repair on return
 1568     __ verified_entry(ra_->C, sp_inc);
 1569     if (Compile::current()->output()->in_scratch_emit_size()) {
 1570       Label dummy_verified_entry;
 1571       __ jmp(dummy_verified_entry);
 1572     } else {
 1573       __ jmp(*_verified_entry);
 1574     }
 1575   }
 1576   /* WARNING these NOPs are critical so that verified entry point is properly
 1577      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
 1578   int nops_cnt = 4 - ((cbuf->insts_size() - insts_size) & 0x3);
 1579   nops_cnt &= 0x3; // Do not add nops if code is aligned.
 1580   if (nops_cnt > 0) {
 1581     __ nop(nops_cnt);
 1582   }
 1583 }
 1584 
 1585 //=============================================================================
 1586 #ifndef PRODUCT
 1587 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1588 {
 1589   if (UseCompressedClassPointers) {
 1590     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 1591     st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 1592   } else {
 1593     st->print_cr("movq    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 1594     st->print_cr("\tcmpq    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 1595   }
 1596   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 1597 }
 1598 #endif
 1599 
 1600 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1601 {
 1602   __ ic_check(InteriorEntryAlignment);
 1603 }
 1604 







 1605 //=============================================================================
 1606 
 1607 bool Matcher::supports_vector_calling_convention(void) {
 1608   return EnableVectorSupport;
 1609 }
 1610 
 1611 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1612   assert(EnableVectorSupport, "sanity");
 1613   int lo = XMM0_num;
 1614   int hi = XMM0b_num;
 1615   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 1616   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 1617   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 1618   return OptoRegPair(hi, lo);
 1619 }
 1620 
 1621 // Is this branch offset short enough that a short branch can be used?
 1622 //
 1623 // NOTE: If the platform does not provide any short branch variants, then
 1624 //       this method should return false for offset 0.

 3059   %}
 3060 %}
 3061 
 3062 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 3063 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 3064 %{
 3065   constraint(ALLOC_IN_RC(ptr_reg));
 3066   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 3067   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 3068 
 3069   op_cost(10);
 3070   format %{"[$reg + $off + $idx << $scale]" %}
 3071   interface(MEMORY_INTER) %{
 3072     base($reg);
 3073     index($idx);
 3074     scale($scale);
 3075     disp($off);
 3076   %}
 3077 %}
 3078 
 3079 // Indirect Narrow Oop Operand
 3080 operand indCompressedOop(rRegN reg) %{
 3081   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 3082   constraint(ALLOC_IN_RC(ptr_reg));
 3083   match(DecodeN reg);
 3084 
 3085   op_cost(10);
 3086   format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
 3087   interface(MEMORY_INTER) %{
 3088     base(0xc); // R12
 3089     index($reg);
 3090     scale(0x3);
 3091     disp(0x0);
 3092   %}
 3093 %}
 3094 
 3095 // Indirect Narrow Oop Plus Offset Operand
 3096 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 3097 // we can't free r12 even with CompressedOops::base() == nullptr.
 3098 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 3099   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 3100   constraint(ALLOC_IN_RC(ptr_reg));
 3101   match(AddP (DecodeN reg) off);
 3102 
 3103   op_cost(10);
 3104   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 3105   interface(MEMORY_INTER) %{
 3106     base(0xc); // R12
 3107     index($reg);
 3108     scale(0x3);
 3109     disp($off);
 3110   %}
 3111 %}
 3112 
 3113 // Indirect Memory Operand
 3114 operand indirectNarrow(rRegN reg)

 3421     equal(0x4, "e");
 3422     not_equal(0x5, "ne");
 3423     less(0x2, "b");
 3424     greater_equal(0x3, "ae");
 3425     less_equal(0x6, "be");
 3426     greater(0x7, "a");
 3427     overflow(0x0, "o");
 3428     no_overflow(0x1, "no");
 3429   %}
 3430 %}
 3431 
 3432 //----------OPERAND CLASSES----------------------------------------------------
 3433 // Operand Classes are groups of operands that are used as to simplify
 3434 // instruction definitions by not requiring the AD writer to specify separate
 3435 // instructions for every form of operand when the instruction accepts
 3436 // multiple operand types with the same basic encoding and format.  The classic
 3437 // case of this is memory operands.
 3438 
 3439 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 3440                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 3441                indCompressedOop, indCompressedOopOffset,
 3442                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 3443                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 3444                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 3445 
 3446 //----------PIPELINE-----------------------------------------------------------
 3447 // Rules which define the behavior of the target architectures pipeline.
 3448 pipeline %{
 3449 
 3450 //----------ATTRIBUTES---------------------------------------------------------
 3451 attributes %{
 3452   variable_size_instructions;        // Fixed size instructions
 3453   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 3454   instruction_unit_size = 1;         // An instruction is 1 bytes long
 3455   instruction_fetch_unit_size = 16;  // The processor fetches one line
 3456   instruction_fetch_units = 1;       // of 16 bytes
 3457 
 3458   // List of nop instructions
 3459   nops( MachNop );
 3460 %}
 3461 

 6006   format %{ "MEMBAR-storestore (empty encoding)" %}
 6007   ins_encode( );
 6008   ins_pipe(empty);
 6009 %}
 6010 
 6011 //----------Move Instructions--------------------------------------------------
 6012 
 6013 instruct castX2P(rRegP dst, rRegL src)
 6014 %{
 6015   match(Set dst (CastX2P src));
 6016 
 6017   format %{ "movq    $dst, $src\t# long->ptr" %}
 6018   ins_encode %{
 6019     if ($dst$$reg != $src$$reg) {
 6020       __ movptr($dst$$Register, $src$$Register);
 6021     }
 6022   %}
 6023   ins_pipe(ialu_reg_reg); // XXX
 6024 %}
 6025 
 6026 instruct castI2N(rRegN dst, rRegI src)
 6027 %{
 6028   match(Set dst (CastI2N src));
 6029 
 6030   format %{ "movq    $dst, $src\t# int -> narrow ptr" %}
 6031   ins_encode %{
 6032     if ($dst$$reg != $src$$reg) {
 6033       __ movl($dst$$Register, $src$$Register);
 6034     }
 6035   %}
 6036   ins_pipe(ialu_reg_reg); // XXX
 6037 %}
 6038 
 6039 instruct castN2X(rRegL dst, rRegN src)
 6040 %{
 6041   match(Set dst (CastP2X src));
 6042 
 6043   format %{ "movq    $dst, $src\t# ptr -> long" %}
 6044   ins_encode %{
 6045     if ($dst$$reg != $src$$reg) {
 6046       __ movptr($dst$$Register, $src$$Register);
 6047     }
 6048   %}
 6049   ins_pipe(ialu_reg_reg); // XXX
 6050 %}
 6051 
 6052 instruct castP2X(rRegL dst, rRegP src)
 6053 %{
 6054   match(Set dst (CastP2X src));
 6055 
 6056   format %{ "movq    $dst, $src\t# ptr -> long" %}
 6057   ins_encode %{
 6058     if ($dst$$reg != $src$$reg) {
 6059       __ movptr($dst$$Register, $src$$Register);
 6060     }
 6061   %}
 6062   ins_pipe(ialu_reg_reg); // XXX
 6063 %}
 6064 
 6065 // Convert oop into int for vectors alignment masking
 6066 instruct convP2I(rRegI dst, rRegP src)
 6067 %{
 6068   match(Set dst (ConvL2I (CastP2X src)));
 6069 
 6070   format %{ "movl    $dst, $src\t# ptr -> int" %}
 6071   ins_encode %{

12163   effect(DEF dst, USE src);
12164   ins_cost(100);
12165   format %{ "movd    $dst,$src\t# MoveI2F" %}
12166   ins_encode %{
12167     __ movdl($dst$$XMMRegister, $src$$Register);
12168   %}
12169   ins_pipe( pipe_slow );
12170 %}
12171 
12172 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
12173   match(Set dst (MoveL2D src));
12174   effect(DEF dst, USE src);
12175   ins_cost(100);
12176   format %{ "movd    $dst,$src\t# MoveL2D" %}
12177   ins_encode %{
12178      __ movdq($dst$$XMMRegister, $src$$Register);
12179   %}
12180   ins_pipe( pipe_slow );
12181 %}
12182 
12183 
12184 // Fast clearing of an array
12185 // Small non-constant lenght ClearArray for non-AVX512 targets.
12186 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
12187                   Universe dummy, rFlagsReg cr)
12188 %{
12189   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
12190   match(Set dummy (ClearArray (Binary cnt base) val));
12191   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
12192 
12193   format %{ $$template
12194     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
12195     $$emit$$"jg      LARGE\n\t"
12196     $$emit$$"dec     rcx\n\t"
12197     $$emit$$"js      DONE\t# Zero length\n\t"
12198     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
12199     $$emit$$"dec     rcx\n\t"
12200     $$emit$$"jge     LOOP\n\t"
12201     $$emit$$"jmp     DONE\n\t"
12202     $$emit$$"# LARGE:\n\t"
12203     if (UseFastStosb) {
12204        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
12205        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
12206     } else if (UseXMMForObjInit) {
12207        $$emit$$"movdq   $tmp, $val\n\t"
12208        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
12209        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
12210        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12211        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12212        $$emit$$"vmovdqu $tmp,(rax)\n\t"
12213        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
12214        $$emit$$"add     0x40,rax\n\t"
12215        $$emit$$"# L_zero_64_bytes:\n\t"
12216        $$emit$$"sub     0x8,rcx\n\t"
12217        $$emit$$"jge     L_loop\n\t"
12218        $$emit$$"add     0x4,rcx\n\t"
12219        $$emit$$"jl      L_tail\n\t"
12220        $$emit$$"vmovdqu $tmp,(rax)\n\t"
12221        $$emit$$"add     0x20,rax\n\t"
12222        $$emit$$"sub     0x4,rcx\n\t"
12223        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12224        $$emit$$"add     0x4,rcx\n\t"
12225        $$emit$$"jle     L_end\n\t"
12226        $$emit$$"dec     rcx\n\t"
12227        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12228        $$emit$$"vmovq   xmm0,(rax)\n\t"
12229        $$emit$$"add     0x8,rax\n\t"
12230        $$emit$$"dec     rcx\n\t"
12231        $$emit$$"jge     L_sloop\n\t"
12232        $$emit$$"# L_end:\n\t"
12233     } else {
12234        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
12235     }
12236     $$emit$$"# DONE"
12237   %}
12238   ins_encode %{
12239     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12240                  $tmp$$XMMRegister, false, false);
12241   %}
12242   ins_pipe(pipe_slow);
12243 %}
12244 
12245 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
12246                             Universe dummy, rFlagsReg cr)
12247 %{
12248   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
12249   match(Set dummy (ClearArray (Binary cnt base) val));
12250   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
12251 
12252   format %{ $$template
12253     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
12254     $$emit$$"jg      LARGE\n\t"
12255     $$emit$$"dec     rcx\n\t"
12256     $$emit$$"js      DONE\t# Zero length\n\t"
12257     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
12258     $$emit$$"dec     rcx\n\t"
12259     $$emit$$"jge     LOOP\n\t"
12260     $$emit$$"jmp     DONE\n\t"
12261     $$emit$$"# LARGE:\n\t"
12262     if (UseXMMForObjInit) {
12263        $$emit$$"movdq   $tmp, $val\n\t"
12264        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
12265        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
12266        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12267        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12268        $$emit$$"vmovdqu $tmp,(rax)\n\t"
12269        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
12270        $$emit$$"add     0x40,rax\n\t"
12271        $$emit$$"# L_zero_64_bytes:\n\t"
12272        $$emit$$"sub     0x8,rcx\n\t"
12273        $$emit$$"jge     L_loop\n\t"
12274        $$emit$$"add     0x4,rcx\n\t"
12275        $$emit$$"jl      L_tail\n\t"
12276        $$emit$$"vmovdqu $tmp,(rax)\n\t"
12277        $$emit$$"add     0x20,rax\n\t"
12278        $$emit$$"sub     0x4,rcx\n\t"
12279        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12280        $$emit$$"add     0x4,rcx\n\t"
12281        $$emit$$"jle     L_end\n\t"
12282        $$emit$$"dec     rcx\n\t"
12283        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12284        $$emit$$"vmovq   xmm0,(rax)\n\t"
12285        $$emit$$"add     0x8,rax\n\t"
12286        $$emit$$"dec     rcx\n\t"
12287        $$emit$$"jge     L_sloop\n\t"
12288        $$emit$$"# L_end:\n\t"
12289     } else {
12290        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
12291     }
12292     $$emit$$"# DONE"
12293   %}
12294   ins_encode %{
12295     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12296                  $tmp$$XMMRegister, false, true);
12297   %}
12298   ins_pipe(pipe_slow);
12299 %}
12300 
12301 // Small non-constant length ClearArray for AVX512 targets.
12302 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
12303                        Universe dummy, rFlagsReg cr)
12304 %{
12305   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
12306   match(Set dummy (ClearArray (Binary cnt base) val));
12307   ins_cost(125);
12308   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
12309 
12310   format %{ $$template
12311     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12312     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
12313     $$emit$$"jg      LARGE\n\t"
12314     $$emit$$"dec     rcx\n\t"
12315     $$emit$$"js      DONE\t# Zero length\n\t"
12316     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
12317     $$emit$$"dec     rcx\n\t"
12318     $$emit$$"jge     LOOP\n\t"
12319     $$emit$$"jmp     DONE\n\t"
12320     $$emit$$"# LARGE:\n\t"
12321     if (UseFastStosb) {
12322        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
12323        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
12324     } else if (UseXMMForObjInit) {
12325        $$emit$$"mov     rdi,rax\n\t"
12326        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
12327        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12328        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

12336        $$emit$$"jl      L_tail\n\t"
12337        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12338        $$emit$$"add     0x20,rax\n\t"
12339        $$emit$$"sub     0x4,rcx\n\t"
12340        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12341        $$emit$$"add     0x4,rcx\n\t"
12342        $$emit$$"jle     L_end\n\t"
12343        $$emit$$"dec     rcx\n\t"
12344        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12345        $$emit$$"vmovq   xmm0,(rax)\n\t"
12346        $$emit$$"add     0x8,rax\n\t"
12347        $$emit$$"dec     rcx\n\t"
12348        $$emit$$"jge     L_sloop\n\t"
12349        $$emit$$"# L_end:\n\t"
12350     } else {
12351        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
12352     }
12353     $$emit$$"# DONE"
12354   %}
12355   ins_encode %{
12356     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12357                  $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
12358   %}
12359   ins_pipe(pipe_slow);
12360 %}
12361 
12362 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
12363                                  Universe dummy, rFlagsReg cr)

12364 %{
12365   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
12366   match(Set dummy (ClearArray (Binary cnt base) val));
12367   ins_cost(125);
12368   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
12369 
12370   format %{ $$template
12371     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12372     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
12373     $$emit$$"jg      LARGE\n\t"
12374     $$emit$$"dec     rcx\n\t"
12375     $$emit$$"js      DONE\t# Zero length\n\t"
12376     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
12377     $$emit$$"dec     rcx\n\t"
12378     $$emit$$"jge     LOOP\n\t"
12379     $$emit$$"jmp     DONE\n\t"
12380     $$emit$$"# LARGE:\n\t"
12381     if (UseFastStosb) {
12382        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
12383        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
12384     } else if (UseXMMForObjInit) {
12385        $$emit$$"mov     rdi,rax\n\t"
12386        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
12387        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12388        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

12396        $$emit$$"jl      L_tail\n\t"
12397        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12398        $$emit$$"add     0x20,rax\n\t"
12399        $$emit$$"sub     0x4,rcx\n\t"
12400        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12401        $$emit$$"add     0x4,rcx\n\t"
12402        $$emit$$"jle     L_end\n\t"
12403        $$emit$$"dec     rcx\n\t"
12404        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12405        $$emit$$"vmovq   xmm0,(rax)\n\t"
12406        $$emit$$"add     0x8,rax\n\t"
12407        $$emit$$"dec     rcx\n\t"
12408        $$emit$$"jge     L_sloop\n\t"
12409        $$emit$$"# L_end:\n\t"
12410     } else {
12411        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
12412     }
12413     $$emit$$"# DONE"
12414   %}
12415   ins_encode %{
12416     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12417                  $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
12418   %}
12419   ins_pipe(pipe_slow);
12420 %}
12421 
12422 // Large non-constant length ClearArray for non-AVX512 targets.
12423 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
12424                         Universe dummy, rFlagsReg cr)
12425 %{
12426   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
12427   match(Set dummy (ClearArray (Binary cnt base) val));
12428   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
12429 
12430   format %{ $$template
12431     if (UseFastStosb) {
12432        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
12433        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
12434     } else if (UseXMMForObjInit) {
12435        $$emit$$"movdq   $tmp, $val\n\t"
12436        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
12437        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
12438        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12439        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12440        $$emit$$"vmovdqu $tmp,(rax)\n\t"
12441        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
12442        $$emit$$"add     0x40,rax\n\t"
12443        $$emit$$"# L_zero_64_bytes:\n\t"
12444        $$emit$$"sub     0x8,rcx\n\t"
12445        $$emit$$"jge     L_loop\n\t"
12446        $$emit$$"add     0x4,rcx\n\t"
12447        $$emit$$"jl      L_tail\n\t"
12448        $$emit$$"vmovdqu $tmp,(rax)\n\t"
12449        $$emit$$"add     0x20,rax\n\t"
12450        $$emit$$"sub     0x4,rcx\n\t"
12451        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12452        $$emit$$"add     0x4,rcx\n\t"
12453        $$emit$$"jle     L_end\n\t"
12454        $$emit$$"dec     rcx\n\t"
12455        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12456        $$emit$$"vmovq   xmm0,(rax)\n\t"
12457        $$emit$$"add     0x8,rax\n\t"
12458        $$emit$$"dec     rcx\n\t"
12459        $$emit$$"jge     L_sloop\n\t"
12460        $$emit$$"# L_end:\n\t"
12461     } else {
12462        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
12463     }
12464   %}
12465   ins_encode %{
12466     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12467                  $tmp$$XMMRegister, true, false);
12468   %}
12469   ins_pipe(pipe_slow);
12470 %}
12471 
12472 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
12473                                   Universe dummy, rFlagsReg cr)
12474 %{
12475   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
12476   match(Set dummy (ClearArray (Binary cnt base) val));
12477   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
12478 
12479   format %{ $$template
12480     if (UseXMMForObjInit) {
12481        $$emit$$"movdq   $tmp, $val\n\t"
12482        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
12483        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
12484        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12485        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12486        $$emit$$"vmovdqu $tmp,(rax)\n\t"
12487        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
12488        $$emit$$"add     0x40,rax\n\t"
12489        $$emit$$"# L_zero_64_bytes:\n\t"
12490        $$emit$$"sub     0x8,rcx\n\t"
12491        $$emit$$"jge     L_loop\n\t"
12492        $$emit$$"add     0x4,rcx\n\t"
12493        $$emit$$"jl      L_tail\n\t"
12494        $$emit$$"vmovdqu $tmp,(rax)\n\t"
12495        $$emit$$"add     0x20,rax\n\t"
12496        $$emit$$"sub     0x4,rcx\n\t"
12497        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12498        $$emit$$"add     0x4,rcx\n\t"
12499        $$emit$$"jle     L_end\n\t"
12500        $$emit$$"dec     rcx\n\t"
12501        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12502        $$emit$$"vmovq   xmm0,(rax)\n\t"
12503        $$emit$$"add     0x8,rax\n\t"
12504        $$emit$$"dec     rcx\n\t"
12505        $$emit$$"jge     L_sloop\n\t"
12506        $$emit$$"# L_end:\n\t"
12507     } else {
12508        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
12509     }
12510   %}
12511   ins_encode %{
12512     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12513                  $tmp$$XMMRegister, true, true);
12514   %}
12515   ins_pipe(pipe_slow);
12516 %}
12517 
12518 // Large non-constant length ClearArray for AVX512 targets.
12519 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
12520                              Universe dummy, rFlagsReg cr)
12521 %{
12522   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
12523   match(Set dummy (ClearArray (Binary cnt base) val));
12524   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
12525 
12526   format %{ $$template
12527     if (UseFastStosb) {
12528        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12529        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
12530        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
12531     } else if (UseXMMForObjInit) {
12532        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
12533        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
12534        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12535        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12536        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12537        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
12538        $$emit$$"add     0x40,rax\n\t"
12539        $$emit$$"# L_zero_64_bytes:\n\t"
12540        $$emit$$"sub     0x8,rcx\n\t"
12541        $$emit$$"jge     L_loop\n\t"
12542        $$emit$$"add     0x4,rcx\n\t"
12543        $$emit$$"jl      L_tail\n\t"
12544        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12545        $$emit$$"add     0x20,rax\n\t"
12546        $$emit$$"sub     0x4,rcx\n\t"
12547        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12548        $$emit$$"add     0x4,rcx\n\t"
12549        $$emit$$"jle     L_end\n\t"
12550        $$emit$$"dec     rcx\n\t"
12551        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12552        $$emit$$"vmovq   xmm0,(rax)\n\t"
12553        $$emit$$"add     0x8,rax\n\t"
12554        $$emit$$"dec     rcx\n\t"
12555        $$emit$$"jge     L_sloop\n\t"
12556        $$emit$$"# L_end:\n\t"
12557     } else {
12558        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12559        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
12560     }
12561   %}
12562   ins_encode %{
12563     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12564                  $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
12565   %}
12566   ins_pipe(pipe_slow);
12567 %}
12568 
12569 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
12570                                        Universe dummy, rFlagsReg cr)

12571 %{
12572   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
12573   match(Set dummy (ClearArray (Binary cnt base) val));
12574   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
12575 
12576   format %{ $$template
12577     if (UseFastStosb) {
12578        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12579        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
12580        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
12581     } else if (UseXMMForObjInit) {
12582        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
12583        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
12584        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12585        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12586        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12587        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
12588        $$emit$$"add     0x40,rax\n\t"
12589        $$emit$$"# L_zero_64_bytes:\n\t"
12590        $$emit$$"sub     0x8,rcx\n\t"
12591        $$emit$$"jge     L_loop\n\t"
12592        $$emit$$"add     0x4,rcx\n\t"
12593        $$emit$$"jl      L_tail\n\t"
12594        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12595        $$emit$$"add     0x20,rax\n\t"
12596        $$emit$$"sub     0x4,rcx\n\t"
12597        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12598        $$emit$$"add     0x4,rcx\n\t"
12599        $$emit$$"jle     L_end\n\t"
12600        $$emit$$"dec     rcx\n\t"
12601        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12602        $$emit$$"vmovq   xmm0,(rax)\n\t"
12603        $$emit$$"add     0x8,rax\n\t"
12604        $$emit$$"dec     rcx\n\t"
12605        $$emit$$"jge     L_sloop\n\t"
12606        $$emit$$"# L_end:\n\t"
12607     } else {
12608        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12609        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
12610     }
12611   %}
12612   ins_encode %{
12613     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12614                  $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
12615   %}
12616   ins_pipe(pipe_slow);
12617 %}
12618 
12619 // Small constant length ClearArray for AVX512 targets.
12620 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
12621 %{
12622   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
12623             ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
12624   match(Set dummy (ClearArray (Binary cnt base) val));
12625   ins_cost(100);
12626   effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
12627   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
12628   ins_encode %{
12629     __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
12630   %}
12631   ins_pipe(pipe_slow);
12632 %}
12633 
12634 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
12635                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
12636 %{
12637   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
12638   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12639   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
12640 
12641   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
12642   ins_encode %{
12643     __ string_compare($str1$$Register, $str2$$Register,
12644                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
12645                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
12646   %}
12647   ins_pipe( pipe_slow );
12648 %}
12649 

14482 
14483   ins_cost(300);
14484   format %{ "call_leaf,runtime " %}
14485   ins_encode(clear_avx, Java_To_Runtime(meth));
14486   ins_pipe(pipe_slow);
14487 %}
14488 
14489 // Call runtime without safepoint and with vector arguments
14490 instruct CallLeafDirectVector(method meth)
14491 %{
14492   match(CallLeafVector);
14493   effect(USE meth);
14494 
14495   ins_cost(300);
14496   format %{ "call_leaf,vector " %}
14497   ins_encode(Java_To_Runtime(meth));
14498   ins_pipe(pipe_slow);
14499 %}
14500 
14501 // Call runtime without safepoint
14502 // entry point is null, target holds the address to call
14503 instruct CallLeafNoFPInDirect(rRegP target)
14504 %{
14505   predicate(n->as_Call()->entry_point() == nullptr);
14506   match(CallLeafNoFP target);
14507 
14508   ins_cost(300);
14509   format %{ "call_leaf_nofp,runtime indirect " %}
14510   ins_encode %{
14511      __ call($target$$Register);
14512   %}
14513 
14514   ins_pipe(pipe_slow);
14515 %}
14516 
14517 instruct CallLeafNoFPDirect(method meth)
14518 %{
14519   predicate(n->as_Call()->entry_point() != nullptr);
14520   match(CallLeafNoFP);
14521   effect(USE meth);
14522 
14523   ins_cost(300);
14524   format %{ "call_leaf_nofp,runtime " %}
14525   ins_encode(clear_avx, Java_To_Runtime(meth));
14526   ins_pipe(pipe_slow);
14527 %}
14528 
14529 // Return Instruction
14530 // Remove the return address & jump to it.
14531 // Notice: We always emit a nop after a ret to make sure there is room
14532 // for safepoint patching
14533 instruct Ret()
14534 %{
14535   match(Return);
14536 
14537   format %{ "ret" %}
14538   ins_encode %{
14539     __ ret(0);
< prev index next >