< prev index next >

src/hotspot/cpu/x86/x86.ad

Print this page

 1632 }
 1633 
 1634 // !!!!! Special hack to get all types of calls to specify the byte offset
 1635 //       from the start of the call to the point where the return address
 1636 //       will point.
 1637 int MachCallStaticJavaNode::ret_addr_offset()
 1638 {
 1639   int offset = 5; // 5 bytes from start of call to where return address points
 1640   offset += clear_avx_size();
 1641   return offset;
 1642 }
 1643 
 1644 int MachCallDynamicJavaNode::ret_addr_offset()
 1645 {
 1646   int offset = 15; // 15 bytes from start of call to where return address points
 1647   offset += clear_avx_size();
 1648   return offset;
 1649 }
 1650 
 1651 int MachCallRuntimeNode::ret_addr_offset() {




 1652   int offset = 13; // movq r10,#addr; callq (r10)
 1653   if (this->ideal_Opcode() != Op_CallLeafVector) {
 1654     offset += clear_avx_size();
 1655   }
 1656   return offset;
 1657 }

 1658 //
 1659 // Compute padding required for nodes which need alignment
 1660 //
 1661 
 1662 // The address of the call instruction needs to be 4-byte aligned to
 1663 // ensure that it does not span a cache line so that it can be patched.
 1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 1665 {
 1666   current_offset += clear_avx_size(); // skip vzeroupper
 1667   current_offset += 1; // skip call opcode byte
 1668   return align_up(current_offset, alignment_required()) - current_offset;
 1669 }
 1670 
 1671 // The address of the call instruction needs to be 4-byte aligned to
 1672 // ensure that it does not span a cache line so that it can be patched.
 1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 1674 {
 1675   current_offset += clear_avx_size(); // skip vzeroupper
 1676   current_offset += 11; // skip movq instruction + call opcode byte
 1677   return align_up(current_offset, alignment_required()) - current_offset;

 1879     st->print("\n\t");
 1880     st->print("# stack alignment check");
 1881 #endif
 1882   }
 1883   if (C->stub_function() != nullptr) {
 1884     st->print("\n\t");
 1885     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
 1886     st->print("\n\t");
 1887     st->print("je      fast_entry\t");
 1888     st->print("\n\t");
 1889     st->print("call    #nmethod_entry_barrier_stub\t");
 1890     st->print("\n\tfast_entry:");
 1891   }
 1892   st->cr();
 1893 }
 1894 #endif
 1895 
 1896 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1897   Compile* C = ra_->C;
 1898 
 1899   int framesize = C->output()->frame_size_in_bytes();
 1900   int bangsize = C->output()->bang_size_in_bytes();
 1901 
 1902   if (C->clinit_barrier_on_entry()) {
 1903     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
 1904     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
 1905 
 1906     Label L_skip_barrier;
 1907     Register klass = rscratch1;
 1908 
 1909     __ mov_metadata(klass, C->method()->holder()->constant_encoding());
 1910     __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
 1911 
 1912     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
 1913 
 1914     __ bind(L_skip_barrier);
 1915   }
 1916 
 1917   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);


 1918 
 1919   C->output()->set_frame_complete(__ offset());
 1920 
 1921   if (C->has_mach_constant_base_node()) {
 1922     // NOTE: We set the table base offset here because users might be
 1923     // emitted before MachConstantBaseNode.
 1924     ConstantTable& constant_table = C->output()->constant_table();
 1925     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1926   }
 1927 }
 1928 
 1929 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 1930 {
 1931   return MachNode::size(ra_); // too many variables; just compute it
 1932                               // the hard way
 1933 }
 1934 
 1935 int MachPrologNode::reloc() const
 1936 {
 1937   return 0; // a large enough number
 1938 }
 1939 
 1940 //=============================================================================
 1941 #ifndef PRODUCT
 1942 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1943 {
 1944   Compile* C = ra_->C;
 1945   if (generate_vzeroupper(C)) {
 1946     st->print("vzeroupper");
 1947     st->cr(); st->print("\t");
 1948   }
 1949 
 1950   int framesize = C->output()->frame_size_in_bytes();
 1951   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1952   // Remove word for return adr already pushed
 1953   // and RBP

 1961   st->print_cr("popq    rbp");
 1962   if (do_polling() && C->is_method_compilation()) {
 1963     st->print("\t");
 1964     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
 1965                  "ja      #safepoint_stub\t"
 1966                  "# Safepoint: poll for GC");
 1967   }
 1968 }
 1969 #endif
 1970 
 1971 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1972 {
 1973   Compile* C = ra_->C;
 1974 
 1975   if (generate_vzeroupper(C)) {
 1976     // Clear upper bits of YMM registers when current compiled code uses
 1977     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1978     __ vzeroupper();
 1979   }
 1980 
 1981   int framesize = C->output()->frame_size_in_bytes();
 1982   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1983   // Remove word for return adr already pushed
 1984   // and RBP
 1985   framesize -= 2*wordSize;
 1986 
 1987   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 1988 
 1989   if (framesize) {
 1990     __ addq(rsp, framesize);
 1991   }
 1992 
 1993   __ popq(rbp);
 1994 
 1995   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1996     __ reserved_stack_check();
 1997   }
 1998 
 1999   if (do_polling() && C->is_method_compilation()) {
 2000     Label dummy_label;
 2001     Label* code_stub = &dummy_label;
 2002     if (!C->output()->in_scratch_emit_size()) {
 2003       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 2004       C->output()->add_stub(stub);
 2005       code_stub = &stub->entry();
 2006     }
 2007     __ relocate(relocInfo::poll_return_type);
 2008     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
 2009   }
 2010 }
 2011 
 2012 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
 2013 {
 2014   return MachNode::size(ra_); // too many variables; just compute it
 2015                               // the hard way
 2016 }
 2017 
 2018 int MachEpilogNode::reloc() const
 2019 {
 2020   return 2; // a large enough number
 2021 }
 2022 
 2023 const Pipeline* MachEpilogNode::pipeline() const
 2024 {
 2025   return MachNode::pipeline_class();
 2026 }
 2027 
 2028 //=============================================================================
 2029 
 2030 enum RC {
 2031   rc_bad,
 2032   rc_int,
 2033   rc_kreg,
 2034   rc_float,
 2035   rc_stack
 2036 };
 2037 

 2599 #endif
 2600 
 2601 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2602 {
 2603   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2604   int reg = ra_->get_encode(this);
 2605 
 2606   __ lea(as_Register(reg), Address(rsp, offset));
 2607 }
 2608 
 2609 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 2610 {
 2611   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2612   if (ra_->get_encode(this) > 15) {
 2613     return (offset < 0x80) ? 6 : 9; // REX2
 2614   } else {
 2615     return (offset < 0x80) ? 5 : 8; // REX
 2616   }
 2617 }
 2618 











































 2619 //=============================================================================
 2620 #ifndef PRODUCT
 2621 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2622 {
 2623   st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2624   st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2625   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 2626 }
 2627 #endif
 2628 
 2629 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2630 {
 2631   __ ic_check(InteriorEntryAlignment);
 2632 }
 2633 
 2634 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 2635 {
 2636   return MachNode::size(ra_); // too many variables; just compute it
 2637                               // the hard way
 2638 }
 2639 
 2640 
 2641 //=============================================================================
 2642 
 2643 bool Matcher::supports_vector_calling_convention(void) {
 2644   return EnableVectorSupport;
 2645 }
 2646 
 2647 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
 2648   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
 2649 }
 2650 
 2651 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
 2652   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
 2653 }
 2654 
 2655 #ifdef ASSERT
 2656 static bool is_ndd_demotable(const MachNode* mdef) {
 2657   return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
 2658 }
 2659 #endif

 4593     }
 4594     __ post_call_nop();
 4595   %}
 4596 
 4597   enc_class Java_Dynamic_Call(method meth) %{
 4598     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 4599     __ post_call_nop();
 4600   %}
 4601 
 4602   enc_class call_epilog %{
 4603     if (VerifyStackAtCalls) {
 4604       // Check that stack depth is unchanged: find majik cookie on stack
 4605       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
 4606       Label L;
 4607       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
 4608       __ jccb(Assembler::equal, L);
 4609       // Die if stack mismatch
 4610       __ int3();
 4611       __ bind(L);
 4612     }

































 4613   %}
 4614 
 4615 %}
 4616 
 4617 //----------FRAME--------------------------------------------------------------
 4618 // Definition of frame structure and management information.
 4619 //
 4620 //  S T A C K   L A Y O U T    Allocators stack-slot number
 4621 //                             |   (to get allocators register number
 4622 //  G  Owned by    |        |  v    add OptoReg::stack0())
 4623 //  r   CALLER     |        |
 4624 //  o     |        +--------+      pad to even-align allocators stack-slot
 4625 //  w     V        |  pad0  |        numbers; owned by CALLER
 4626 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 4627 //  h     ^        |   in   |  5
 4628 //        |        |  args  |  4   Holes in incoming args owned by SELF
 4629 //  |     |        |        |  3
 4630 //  |     |        +--------+
 4631 //  V     |        | old out|      Empty on Intel, window on Sparc
 4632 //        |    old |preserve|      Must be even aligned.

 5771   %}
 5772 %}
 5773 
 5774 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5775 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 5776 %{
 5777   constraint(ALLOC_IN_RC(ptr_reg));
 5778   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5779   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 5780 
 5781   op_cost(10);
 5782   format %{"[$reg + $off + $idx << $scale]" %}
 5783   interface(MEMORY_INTER) %{
 5784     base($reg);
 5785     index($idx);
 5786     scale($scale);
 5787     disp($off);
 5788   %}
 5789 %}
 5790 
















 5791 // Indirect Narrow Oop Plus Offset Operand
 5792 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 5793 // we can't free r12 even with CompressedOops::base() == nullptr.
 5794 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 5795   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5796   constraint(ALLOC_IN_RC(ptr_reg));
 5797   match(AddP (DecodeN reg) off);
 5798 
 5799   op_cost(10);
 5800   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 5801   interface(MEMORY_INTER) %{
 5802     base(0xc); // R12
 5803     index($reg);
 5804     scale(0x3);
 5805     disp($off);
 5806   %}
 5807 %}
 5808 
 5809 // Indirect Memory Operand
 5810 operand indirectNarrow(rRegN reg)

 6280 %}
 6281 
 6282 // Replaces legVec during post-selection cleanup. See above.
 6283 operand legVecZ() %{
 6284   constraint(ALLOC_IN_RC(vectorz_reg_legacy));
 6285   match(VecZ);
 6286 
 6287   format %{ %}
 6288   interface(REG_INTER);
 6289 %}
 6290 
 6291 //----------OPERAND CLASSES----------------------------------------------------
 6292 // Operand Classes are groups of operands that are used as to simplify
 6293 // instruction definitions by not requiring the AD writer to specify separate
 6294 // instructions for every form of operand when the instruction accepts
 6295 // multiple operand types with the same basic encoding and format.  The classic
 6296 // case of this is memory operands.
 6297 
 6298 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 6299                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 6300                indCompressedOopOffset,
 6301                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 6302                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 6303                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 6304 
 6305 //----------PIPELINE-----------------------------------------------------------
 6306 // Rules which define the behavior of the target architectures pipeline.
 6307 pipeline %{
 6308 
 6309 //----------ATTRIBUTES---------------------------------------------------------
 6310 attributes %{
 6311   variable_size_instructions;        // Fixed size instructions
 6312   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 6313   instruction_unit_size = 1;         // An instruction is 1 bytes long
 6314   instruction_fetch_unit_size = 16;  // The processor fetches one line
 6315   instruction_fetch_units = 1;       // of 16 bytes
 6316 %}
 6317 
 6318 //----------RESOURCES----------------------------------------------------------
 6319 // Resources are the functional units available to the machine
 6320 

 8915   format %{ "MEMBAR-storestore (empty encoding)" %}
 8916   ins_encode( );
 8917   ins_pipe(empty);
 8918 %}
 8919 
 8920 //----------Move Instructions--------------------------------------------------
 8921 
 8922 instruct castX2P(rRegP dst, rRegL src)
 8923 %{
 8924   match(Set dst (CastX2P src));
 8925 
 8926   format %{ "movq    $dst, $src\t# long->ptr" %}
 8927   ins_encode %{
 8928     if ($dst$$reg != $src$$reg) {
 8929       __ movptr($dst$$Register, $src$$Register);
 8930     }
 8931   %}
 8932   ins_pipe(ialu_reg_reg); // XXX
 8933 %}
 8934 


























 8935 instruct castP2X(rRegL dst, rRegP src)
 8936 %{
 8937   match(Set dst (CastP2X src));
 8938 
 8939   format %{ "movq    $dst, $src\t# ptr -> long" %}
 8940   ins_encode %{
 8941     if ($dst$$reg != $src$$reg) {
 8942       __ movptr($dst$$Register, $src$$Register);
 8943     }
 8944   %}
 8945   ins_pipe(ialu_reg_reg); // XXX
 8946 %}
 8947 
 8948 // Convert oop into int for vectors alignment masking
 8949 instruct convP2I(rRegI dst, rRegP src)
 8950 %{
 8951   match(Set dst (ConvL2I (CastP2X src)));
 8952 
 8953   format %{ "movl    $dst, $src\t# ptr -> int" %}
 8954   ins_encode %{

15202   effect(DEF dst, USE src);
15203   ins_cost(100);
15204   format %{ "movd    $dst,$src\t# MoveI2F" %}
15205   ins_encode %{
15206     __ movdl($dst$$XMMRegister, $src$$Register);
15207   %}
15208   ins_pipe( pipe_slow );
15209 %}
15210 
15211 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15212   match(Set dst (MoveL2D src));
15213   effect(DEF dst, USE src);
15214   ins_cost(100);
15215   format %{ "movd    $dst,$src\t# MoveL2D" %}
15216   ins_encode %{
15217      __ movdq($dst$$XMMRegister, $src$$Register);
15218   %}
15219   ins_pipe( pipe_slow );
15220 %}
15221 

15222 // Fast clearing of an array
15223 // Small non-constant lenght ClearArray for non-AVX512 targets.
15224 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15225                   Universe dummy, rFlagsReg cr)
15226 %{
15227   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
15228   match(Set dummy (ClearArray cnt base));
15229   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);





















































































































15230 
15231   format %{ $$template
15232     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15233     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15234     $$emit$$"jg      LARGE\n\t"
15235     $$emit$$"dec     rcx\n\t"
15236     $$emit$$"js      DONE\t# Zero length\n\t"
15237     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15238     $$emit$$"dec     rcx\n\t"
15239     $$emit$$"jge     LOOP\n\t"
15240     $$emit$$"jmp     DONE\n\t"
15241     $$emit$$"# LARGE:\n\t"
15242     if (UseFastStosb) {
15243        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15244        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15245     } else if (UseXMMForObjInit) {
15246        $$emit$$"mov     rdi,rax\n\t"
15247        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15248        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15249        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

15257        $$emit$$"jl      L_tail\n\t"
15258        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15259        $$emit$$"add     0x20,rax\n\t"
15260        $$emit$$"sub     0x4,rcx\n\t"
15261        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15262        $$emit$$"add     0x4,rcx\n\t"
15263        $$emit$$"jle     L_end\n\t"
15264        $$emit$$"dec     rcx\n\t"
15265        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15266        $$emit$$"vmovq   xmm0,(rax)\n\t"
15267        $$emit$$"add     0x8,rax\n\t"
15268        $$emit$$"dec     rcx\n\t"
15269        $$emit$$"jge     L_sloop\n\t"
15270        $$emit$$"# L_end:\n\t"
15271     } else {
15272        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15273     }
15274     $$emit$$"# DONE"
15275   %}
15276   ins_encode %{
15277     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15278                  $tmp$$XMMRegister, false, knoreg);
15279   %}
15280   ins_pipe(pipe_slow);
15281 %}
15282 
15283 // Small non-constant length ClearArray for AVX512 targets.
15284 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15285                        Universe dummy, rFlagsReg cr)
15286 %{
15287   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15288   match(Set dummy (ClearArray cnt base));
15289   ins_cost(125);
15290   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15291 
15292   format %{ $$template
15293     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15294     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15295     $$emit$$"jg      LARGE\n\t"
15296     $$emit$$"dec     rcx\n\t"
15297     $$emit$$"js      DONE\t# Zero length\n\t"
15298     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15299     $$emit$$"dec     rcx\n\t"
15300     $$emit$$"jge     LOOP\n\t"
15301     $$emit$$"jmp     DONE\n\t"
15302     $$emit$$"# LARGE:\n\t"
15303     if (UseFastStosb) {
15304        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15305        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15306     } else if (UseXMMForObjInit) {
15307        $$emit$$"mov     rdi,rax\n\t"
15308        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15309        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15310        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

15318        $$emit$$"jl      L_tail\n\t"
15319        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15320        $$emit$$"add     0x20,rax\n\t"
15321        $$emit$$"sub     0x4,rcx\n\t"
15322        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15323        $$emit$$"add     0x4,rcx\n\t"
15324        $$emit$$"jle     L_end\n\t"
15325        $$emit$$"dec     rcx\n\t"
15326        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15327        $$emit$$"vmovq   xmm0,(rax)\n\t"
15328        $$emit$$"add     0x8,rax\n\t"
15329        $$emit$$"dec     rcx\n\t"
15330        $$emit$$"jge     L_sloop\n\t"
15331        $$emit$$"# L_end:\n\t"
15332     } else {
15333        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15334     }
15335     $$emit$$"# DONE"
15336   %}
15337   ins_encode %{
15338     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15339                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
15340   %}
15341   ins_pipe(pipe_slow);
15342 %}
15343 
15344 // Large non-constant length ClearArray for non-AVX512 targets.
15345 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15346                         Universe dummy, rFlagsReg cr)
15347 %{
15348   predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15349   match(Set dummy (ClearArray cnt base));
15350   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
































































































15351 
15352   format %{ $$template
15353     if (UseFastStosb) {
15354        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15355        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15356        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15357     } else if (UseXMMForObjInit) {
15358        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15359        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15360        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15361        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15362        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15363        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15364        $$emit$$"add     0x40,rax\n\t"
15365        $$emit$$"# L_zero_64_bytes:\n\t"
15366        $$emit$$"sub     0x8,rcx\n\t"
15367        $$emit$$"jge     L_loop\n\t"
15368        $$emit$$"add     0x4,rcx\n\t"
15369        $$emit$$"jl      L_tail\n\t"
15370        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15371        $$emit$$"add     0x20,rax\n\t"
15372        $$emit$$"sub     0x4,rcx\n\t"
15373        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15374        $$emit$$"add     0x4,rcx\n\t"
15375        $$emit$$"jle     L_end\n\t"
15376        $$emit$$"dec     rcx\n\t"
15377        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15378        $$emit$$"vmovq   xmm0,(rax)\n\t"
15379        $$emit$$"add     0x8,rax\n\t"
15380        $$emit$$"dec     rcx\n\t"
15381        $$emit$$"jge     L_sloop\n\t"
15382        $$emit$$"# L_end:\n\t"
15383     } else {
15384        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15385        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15386     }
15387   %}
15388   ins_encode %{
15389     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15390                  $tmp$$XMMRegister, true, knoreg);
15391   %}
15392   ins_pipe(pipe_slow);
15393 %}
15394 
15395 // Large non-constant length ClearArray for AVX512 targets.
15396 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15397                              Universe dummy, rFlagsReg cr)
15398 %{
15399   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15400   match(Set dummy (ClearArray cnt base));
15401   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15402 
15403   format %{ $$template
15404     if (UseFastStosb) {
15405        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15406        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15407        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15408     } else if (UseXMMForObjInit) {
15409        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15410        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15411        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15412        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15413        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15414        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15415        $$emit$$"add     0x40,rax\n\t"
15416        $$emit$$"# L_zero_64_bytes:\n\t"
15417        $$emit$$"sub     0x8,rcx\n\t"
15418        $$emit$$"jge     L_loop\n\t"
15419        $$emit$$"add     0x4,rcx\n\t"
15420        $$emit$$"jl      L_tail\n\t"
15421        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15422        $$emit$$"add     0x20,rax\n\t"
15423        $$emit$$"sub     0x4,rcx\n\t"
15424        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15425        $$emit$$"add     0x4,rcx\n\t"
15426        $$emit$$"jle     L_end\n\t"
15427        $$emit$$"dec     rcx\n\t"
15428        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15429        $$emit$$"vmovq   xmm0,(rax)\n\t"
15430        $$emit$$"add     0x8,rax\n\t"
15431        $$emit$$"dec     rcx\n\t"
15432        $$emit$$"jge     L_sloop\n\t"
15433        $$emit$$"# L_end:\n\t"
15434     } else {
15435        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15436        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15437     }
15438   %}
15439   ins_encode %{
15440     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15441                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
15442   %}
15443   ins_pipe(pipe_slow);
15444 %}
15445 
15446 // Small constant length ClearArray for AVX512 targets.
15447 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15448 %{
15449   predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15450   match(Set dummy (ClearArray cnt base));

15451   ins_cost(100);
15452   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15453   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
15454   ins_encode %{
15455    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15456   %}
15457   ins_pipe(pipe_slow);
15458 %}
15459 
15460 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15461                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15462 %{
15463   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15464   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15465   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15466 
15467   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15468   ins_encode %{
15469     __ string_compare($str1$$Register, $str2$$Register,
15470                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15471                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15472   %}
15473   ins_pipe( pipe_slow );
15474 %}
15475 

17313   effect(USE meth);
17314 
17315   ins_cost(300);
17316   format %{ "call_leaf,runtime " %}
17317   ins_encode(clear_avx, Java_To_Runtime(meth));
17318   ins_pipe(pipe_slow);
17319 %}
17320 
17321 // Call runtime without safepoint and with vector arguments
17322 instruct CallLeafDirectVector(method meth)
17323 %{
17324   match(CallLeafVector);
17325   effect(USE meth);
17326 
17327   ins_cost(300);
17328   format %{ "call_leaf,vector " %}
17329   ins_encode(Java_To_Runtime(meth));
17330   ins_pipe(pipe_slow);
17331 %}
17332 
















17333 // Call runtime without safepoint
17334 instruct CallLeafNoFPDirect(method meth)
17335 %{

17336   match(CallLeafNoFP);
17337   effect(USE meth);
17338 
17339   ins_cost(300);
17340   format %{ "call_leaf_nofp,runtime " %}
17341   ins_encode(clear_avx, Java_To_Runtime(meth));
17342   ins_pipe(pipe_slow);
17343 %}
17344 
17345 // Return Instruction
17346 // Remove the return address & jump to it.
17347 // Notice: We always emit a nop after a ret to make sure there is room
17348 // for safepoint patching
17349 instruct Ret()
17350 %{
17351   match(Return);
17352 
17353   format %{ "ret" %}
17354   ins_encode %{
17355     __ ret(0);

 1632 }
 1633 
 1634 // !!!!! Special hack to get all types of calls to specify the byte offset
 1635 //       from the start of the call to the point where the return address
 1636 //       will point.
 1637 int MachCallStaticJavaNode::ret_addr_offset()
 1638 {
 1639   int offset = 5; // 5 bytes from start of call to where return address points
 1640   offset += clear_avx_size();
 1641   return offset;
 1642 }
 1643 
 1644 int MachCallDynamicJavaNode::ret_addr_offset()
 1645 {
 1646   int offset = 15; // 15 bytes from start of call to where return address points
 1647   offset += clear_avx_size();
 1648   return offset;
 1649 }
 1650 
 1651 int MachCallRuntimeNode::ret_addr_offset() {
 1652   if (_entry_point == nullptr) {
 1653     // CallLeafNoFPInDirect
 1654     return 3; // callq (register)
 1655   }
 1656   int offset = 13; // movq r10,#addr; callq (r10)
 1657   if (this->ideal_Opcode() != Op_CallLeafVector) {
 1658     offset += clear_avx_size();
 1659   }
 1660   return offset;
 1661 }
 1662 
 1663 //
 1664 // Compute padding required for nodes which need alignment
 1665 //
 1666 
 1667 // The address of the call instruction needs to be 4-byte aligned to
 1668 // ensure that it does not span a cache line so that it can be patched.
 1669 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 1670 {
 1671   current_offset += clear_avx_size(); // skip vzeroupper
 1672   current_offset += 1; // skip call opcode byte
 1673   return align_up(current_offset, alignment_required()) - current_offset;
 1674 }
 1675 
 1676 // The address of the call instruction needs to be 4-byte aligned to
 1677 // ensure that it does not span a cache line so that it can be patched.
 1678 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 1679 {
 1680   current_offset += clear_avx_size(); // skip vzeroupper
 1681   current_offset += 11; // skip movq instruction + call opcode byte
 1682   return align_up(current_offset, alignment_required()) - current_offset;

 1884     st->print("\n\t");
 1885     st->print("# stack alignment check");
 1886 #endif
 1887   }
 1888   if (C->stub_function() != nullptr) {
 1889     st->print("\n\t");
 1890     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
 1891     st->print("\n\t");
 1892     st->print("je      fast_entry\t");
 1893     st->print("\n\t");
 1894     st->print("call    #nmethod_entry_barrier_stub\t");
 1895     st->print("\n\tfast_entry:");
 1896   }
 1897   st->cr();
 1898 }
 1899 #endif
 1900 
 1901 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1902   Compile* C = ra_->C;
 1903 
 1904   __ verified_entry(C);





 1905 
 1906   if (ra_->C->stub_function() == nullptr) {
 1907     __ entry_barrier();







 1908   }
 1909 
 1910   if (!Compile::current()->output()->in_scratch_emit_size()) {
 1911     __ bind(*_verified_entry);
 1912   }
 1913 
 1914   C->output()->set_frame_complete(__ offset());
 1915 
 1916   if (C->has_mach_constant_base_node()) {
 1917     // NOTE: We set the table base offset here because users might be
 1918     // emitted before MachConstantBaseNode.
 1919     ConstantTable& constant_table = C->output()->constant_table();
 1920     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1921   }
 1922 }
 1923 





 1924 
 1925 int MachPrologNode::reloc() const
 1926 {
 1927   return 0; // a large enough number
 1928 }
 1929 
 1930 //=============================================================================
 1931 #ifndef PRODUCT
 1932 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1933 {
 1934   Compile* C = ra_->C;
 1935   if (generate_vzeroupper(C)) {
 1936     st->print("vzeroupper");
 1937     st->cr(); st->print("\t");
 1938   }
 1939 
 1940   int framesize = C->output()->frame_size_in_bytes();
 1941   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1942   // Remove word for return adr already pushed
 1943   // and RBP

 1951   st->print_cr("popq    rbp");
 1952   if (do_polling() && C->is_method_compilation()) {
 1953     st->print("\t");
 1954     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
 1955                  "ja      #safepoint_stub\t"
 1956                  "# Safepoint: poll for GC");
 1957   }
 1958 }
 1959 #endif
 1960 
 1961 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1962 {
 1963   Compile* C = ra_->C;
 1964 
 1965   if (generate_vzeroupper(C)) {
 1966     // Clear upper bits of YMM registers when current compiled code uses
 1967     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1968     __ vzeroupper();
 1969   }
 1970 
 1971   // Subtract two words to account for return address and rbp
 1972   int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
 1973   __ remove_frame(initial_framesize, C->needs_stack_repair());










 1974 
 1975   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1976     __ reserved_stack_check();
 1977   }
 1978 
 1979   if (do_polling() && C->is_method_compilation()) {
 1980     Label dummy_label;
 1981     Label* code_stub = &dummy_label;
 1982     if (!C->output()->in_scratch_emit_size()) {
 1983       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1984       C->output()->add_stub(stub);
 1985       code_stub = &stub->entry();
 1986     }
 1987     __ relocate(relocInfo::poll_return_type);
 1988     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
 1989   }
 1990 }
 1991 






 1992 int MachEpilogNode::reloc() const
 1993 {
 1994   return 2; // a large enough number
 1995 }
 1996 
 1997 const Pipeline* MachEpilogNode::pipeline() const
 1998 {
 1999   return MachNode::pipeline_class();
 2000 }
 2001 
 2002 //=============================================================================
 2003 
 2004 enum RC {
 2005   rc_bad,
 2006   rc_int,
 2007   rc_kreg,
 2008   rc_float,
 2009   rc_stack
 2010 };
 2011 

 2573 #endif
 2574 
 2575 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2576 {
 2577   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2578   int reg = ra_->get_encode(this);
 2579 
 2580   __ lea(as_Register(reg), Address(rsp, offset));
 2581 }
 2582 
 2583 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 2584 {
 2585   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2586   if (ra_->get_encode(this) > 15) {
 2587     return (offset < 0x80) ? 6 : 9; // REX2
 2588   } else {
 2589     return (offset < 0x80) ? 5 : 8; // REX
 2590   }
 2591 }
 2592 
 2593 //=============================================================================
 2594 #ifndef PRODUCT
 2595 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2596 {
 2597   st->print_cr("MachVEPNode");
 2598 }
 2599 #endif
 2600 
 2601 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2602 {
 2603   CodeBuffer* cbuf = masm->code();
 2604   uint insts_size = cbuf->insts_size();
 2605   if (!_verified) {
 2606     __ ic_check(1);
 2607   } else {
 2608     // TODO 8284443 Avoid creation of temporary frame
 2609     if (ra_->C->stub_function() == nullptr) {
 2610       __ verified_entry(ra_->C, 0);
 2611       __ entry_barrier();
 2612       int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
 2613       __ remove_frame(initial_framesize, false);
 2614     }
 2615     // Unpack inline type args passed as oop and then jump to
 2616     // the verified entry point (skipping the unverified entry).
 2617     int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
 2618     // Emit code for verified entry and save increment for stack repair on return
 2619     __ verified_entry(ra_->C, sp_inc);
 2620     if (Compile::current()->output()->in_scratch_emit_size()) {
 2621       Label dummy_verified_entry;
 2622       __ jmp(dummy_verified_entry);
 2623     } else {
 2624       __ jmp(*_verified_entry);
 2625     }
 2626   }
 2627   /* WARNING these NOPs are critical so that verified entry point is properly
 2628      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
 2629   int nops_cnt = 4 - ((cbuf->insts_size() - insts_size) & 0x3);
 2630   nops_cnt &= 0x3; // Do not add nops if code is aligned.
 2631   if (nops_cnt > 0) {
 2632     __ nop(nops_cnt);
 2633   }
 2634 }
 2635 
 2636 //=============================================================================
 2637 #ifndef PRODUCT
 2638 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2639 {
 2640   st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2641   st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2642   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 2643 }
 2644 #endif
 2645 
 2646 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2647 {
 2648   __ ic_check(InteriorEntryAlignment);
 2649 }
 2650 






 2651 
 2652 //=============================================================================
 2653 
 2654 bool Matcher::supports_vector_calling_convention(void) {
 2655   return EnableVectorSupport;
 2656 }
 2657 
 2658 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
 2659   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
 2660 }
 2661 
 2662 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
 2663   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
 2664 }
 2665 
 2666 #ifdef ASSERT
 2667 static bool is_ndd_demotable(const MachNode* mdef) {
 2668   return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
 2669 }
 2670 #endif

 4604     }
 4605     __ post_call_nop();
 4606   %}
 4607 
 4608   enc_class Java_Dynamic_Call(method meth) %{
 4609     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 4610     __ post_call_nop();
 4611   %}
 4612 
 4613   enc_class call_epilog %{
 4614     if (VerifyStackAtCalls) {
 4615       // Check that stack depth is unchanged: find majik cookie on stack
 4616       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
 4617       Label L;
 4618       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
 4619       __ jccb(Assembler::equal, L);
 4620       // Die if stack mismatch
 4621       __ int3();
 4622       __ bind(L);
 4623     }
 4624     if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) {
 4625       // The last return value is not set by the callee but used to pass the null marker to compiled code.
 4626       // Search for the corresponding projection, get the register and emit code that initialized it.
 4627       uint con = (tf()->range_cc()->cnt() - 1);
 4628       for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
 4629         ProjNode* proj = fast_out(i)->as_Proj();
 4630         if (proj->_con == con) {
 4631           // Set null marker if rax is non-null (a non-null value is returned buffered or scalarized)
 4632           OptoReg::Name optoReg = ra_->get_reg_first(proj);
 4633           VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
 4634           Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
 4635           __ testq(rax, rax);
 4636           __ setb(Assembler::notZero, toReg);
 4637           __ movzbl(toReg, toReg);
 4638           if (reg->is_stack()) {
 4639             int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
 4640             __ movq(Address(rsp, st_off), toReg);
 4641           }
 4642           break;
 4643         }
 4644       }
 4645       if (return_value_is_used()) {
 4646         // An inline type is returned as fields in multiple registers.
 4647         // Rax either contains an oop if the inline type is buffered or a pointer
 4648         // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax
 4649         // if the lowest bit is set to allow C2 to use the oop after null checking.
 4650         // rax &= (rax & 1) - 1
 4651         __ movptr(rscratch1, rax);
 4652         __ andptr(rscratch1, 0x1);
 4653         __ subptr(rscratch1, 0x1);
 4654         __ andptr(rax, rscratch1);
 4655       }
 4656     }
 4657   %}
 4658 
 4659 %}
 4660 
 4661 //----------FRAME--------------------------------------------------------------
 4662 // Definition of frame structure and management information.
 4663 //
 4664 //  S T A C K   L A Y O U T    Allocators stack-slot number
 4665 //                             |   (to get allocators register number
 4666 //  G  Owned by    |        |  v    add OptoReg::stack0())
 4667 //  r   CALLER     |        |
 4668 //  o     |        +--------+      pad to even-align allocators stack-slot
 4669 //  w     V        |  pad0  |        numbers; owned by CALLER
 4670 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 4671 //  h     ^        |   in   |  5
 4672 //        |        |  args  |  4   Holes in incoming args owned by SELF
 4673 //  |     |        |        |  3
 4674 //  |     |        +--------+
 4675 //  V     |        | old out|      Empty on Intel, window on Sparc
 4676 //        |    old |preserve|      Must be even aligned.

 5815   %}
 5816 %}
 5817 
 5818 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5819 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 5820 %{
 5821   constraint(ALLOC_IN_RC(ptr_reg));
 5822   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5823   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 5824 
 5825   op_cost(10);
 5826   format %{"[$reg + $off + $idx << $scale]" %}
 5827   interface(MEMORY_INTER) %{
 5828     base($reg);
 5829     index($idx);
 5830     scale($scale);
 5831     disp($off);
 5832   %}
 5833 %}
 5834 
 5835 // Indirect Narrow Oop Operand
 5836 operand indCompressedOop(rRegN reg) %{
 5837   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5838   constraint(ALLOC_IN_RC(ptr_reg));
 5839   match(DecodeN reg);
 5840 
 5841   op_cost(10);
 5842   format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
 5843   interface(MEMORY_INTER) %{
 5844     base(0xc); // R12
 5845     index($reg);
 5846     scale(0x3);
 5847     disp(0x0);
 5848   %}
 5849 %}
 5850 
 5851 // Indirect Narrow Oop Plus Offset Operand
 5852 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 5853 // we can't free r12 even with CompressedOops::base() == nullptr.
 5854 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 5855   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5856   constraint(ALLOC_IN_RC(ptr_reg));
 5857   match(AddP (DecodeN reg) off);
 5858 
 5859   op_cost(10);
 5860   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 5861   interface(MEMORY_INTER) %{
 5862     base(0xc); // R12
 5863     index($reg);
 5864     scale(0x3);
 5865     disp($off);
 5866   %}
 5867 %}
 5868 
 5869 // Indirect Memory Operand
 5870 operand indirectNarrow(rRegN reg)

 6340 %}
 6341 
 6342 // Replaces legVec during post-selection cleanup. See above.
 6343 operand legVecZ() %{
 6344   constraint(ALLOC_IN_RC(vectorz_reg_legacy));
 6345   match(VecZ);
 6346 
 6347   format %{ %}
 6348   interface(REG_INTER);
 6349 %}
 6350 
 6351 //----------OPERAND CLASSES----------------------------------------------------
 6352 // Operand Classes are groups of operands that are used as to simplify
 6353 // instruction definitions by not requiring the AD writer to specify separate
 6354 // instructions for every form of operand when the instruction accepts
 6355 // multiple operand types with the same basic encoding and format.  The classic
 6356 // case of this is memory operands.
 6357 
 6358 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 6359                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 6360                indCompressedOop, indCompressedOopOffset,
 6361                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 6362                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 6363                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 6364 
 6365 //----------PIPELINE-----------------------------------------------------------
 6366 // Rules which define the behavior of the target architectures pipeline.
 6367 pipeline %{
 6368 
 6369 //----------ATTRIBUTES---------------------------------------------------------
 6370 attributes %{
 6371   variable_size_instructions;        // Fixed size instructions
 6372   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 6373   instruction_unit_size = 1;         // An instruction is 1 bytes long
 6374   instruction_fetch_unit_size = 16;  // The processor fetches one line
 6375   instruction_fetch_units = 1;       // of 16 bytes
 6376 %}
 6377 
 6378 //----------RESOURCES----------------------------------------------------------
 6379 // Resources are the functional units available to the machine
 6380 

 8975   format %{ "MEMBAR-storestore (empty encoding)" %}
 8976   ins_encode( );
 8977   ins_pipe(empty);
 8978 %}
 8979 
 8980 //----------Move Instructions--------------------------------------------------
 8981 
 8982 instruct castX2P(rRegP dst, rRegL src)
 8983 %{
 8984   match(Set dst (CastX2P src));
 8985 
 8986   format %{ "movq    $dst, $src\t# long->ptr" %}
 8987   ins_encode %{
 8988     if ($dst$$reg != $src$$reg) {
 8989       __ movptr($dst$$Register, $src$$Register);
 8990     }
 8991   %}
 8992   ins_pipe(ialu_reg_reg); // XXX
 8993 %}
 8994 
 8995 instruct castI2N(rRegN dst, rRegI src)
 8996 %{
 8997   match(Set dst (CastI2N src));
 8998 
 8999   format %{ "movq    $dst, $src\t# int -> narrow ptr" %}
 9000   ins_encode %{
 9001     if ($dst$$reg != $src$$reg) {
 9002       __ movl($dst$$Register, $src$$Register);
 9003     }
 9004   %}
 9005   ins_pipe(ialu_reg_reg); // XXX
 9006 %}
 9007 
 9008 instruct castN2X(rRegL dst, rRegN src)
 9009 %{
 9010   match(Set dst (CastP2X src));
 9011 
 9012   format %{ "movq    $dst, $src\t# ptr -> long" %}
 9013   ins_encode %{
 9014     if ($dst$$reg != $src$$reg) {
 9015       __ movptr($dst$$Register, $src$$Register);
 9016     }
 9017   %}
 9018   ins_pipe(ialu_reg_reg); // XXX
 9019 %}
 9020 
 9021 instruct castP2X(rRegL dst, rRegP src)
 9022 %{
 9023   match(Set dst (CastP2X src));
 9024 
 9025   format %{ "movq    $dst, $src\t# ptr -> long" %}
 9026   ins_encode %{
 9027     if ($dst$$reg != $src$$reg) {
 9028       __ movptr($dst$$Register, $src$$Register);
 9029     }
 9030   %}
 9031   ins_pipe(ialu_reg_reg); // XXX
 9032 %}
 9033 
 9034 // Convert oop into int for vectors alignment masking
 9035 instruct convP2I(rRegI dst, rRegP src)
 9036 %{
 9037   match(Set dst (ConvL2I (CastP2X src)));
 9038 
 9039   format %{ "movl    $dst, $src\t# ptr -> int" %}
 9040   ins_encode %{

15288   effect(DEF dst, USE src);
15289   ins_cost(100);
15290   format %{ "movd    $dst,$src\t# MoveI2F" %}
15291   ins_encode %{
15292     __ movdl($dst$$XMMRegister, $src$$Register);
15293   %}
15294   ins_pipe( pipe_slow );
15295 %}
15296 
15297 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15298   match(Set dst (MoveL2D src));
15299   effect(DEF dst, USE src);
15300   ins_cost(100);
15301   format %{ "movd    $dst,$src\t# MoveL2D" %}
15302   ins_encode %{
15303      __ movdq($dst$$XMMRegister, $src$$Register);
15304   %}
15305   ins_pipe( pipe_slow );
15306 %}
15307 
15308 
15309 // Fast clearing of an array
15310 // Small non-constant lenght ClearArray for non-AVX512 targets.
15311 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15312                   Universe dummy, rFlagsReg cr)
15313 %{
15314   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15315   match(Set dummy (ClearArray (Binary cnt base) val));
15316   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15317 
15318   format %{ $$template
15319     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15320     $$emit$$"jg      LARGE\n\t"
15321     $$emit$$"dec     rcx\n\t"
15322     $$emit$$"js      DONE\t# Zero length\n\t"
15323     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15324     $$emit$$"dec     rcx\n\t"
15325     $$emit$$"jge     LOOP\n\t"
15326     $$emit$$"jmp     DONE\n\t"
15327     $$emit$$"# LARGE:\n\t"
15328     if (UseFastStosb) {
15329        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15330        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15331     } else if (UseXMMForObjInit) {
15332        $$emit$$"movdq   $tmp, $val\n\t"
15333        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15334        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15335        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15336        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15337        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15338        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15339        $$emit$$"add     0x40,rax\n\t"
15340        $$emit$$"# L_zero_64_bytes:\n\t"
15341        $$emit$$"sub     0x8,rcx\n\t"
15342        $$emit$$"jge     L_loop\n\t"
15343        $$emit$$"add     0x4,rcx\n\t"
15344        $$emit$$"jl      L_tail\n\t"
15345        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15346        $$emit$$"add     0x20,rax\n\t"
15347        $$emit$$"sub     0x4,rcx\n\t"
15348        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15349        $$emit$$"add     0x4,rcx\n\t"
15350        $$emit$$"jle     L_end\n\t"
15351        $$emit$$"dec     rcx\n\t"
15352        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15353        $$emit$$"vmovq   xmm0,(rax)\n\t"
15354        $$emit$$"add     0x8,rax\n\t"
15355        $$emit$$"dec     rcx\n\t"
15356        $$emit$$"jge     L_sloop\n\t"
15357        $$emit$$"# L_end:\n\t"
15358     } else {
15359        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15360     }
15361     $$emit$$"# DONE"
15362   %}
15363   ins_encode %{
15364     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15365                  $tmp$$XMMRegister, false, false);
15366   %}
15367   ins_pipe(pipe_slow);
15368 %}
15369 
15370 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15371                             Universe dummy, rFlagsReg cr)
15372 %{
15373   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15374   match(Set dummy (ClearArray (Binary cnt base) val));
15375   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15376 
15377   format %{ $$template
15378     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15379     $$emit$$"jg      LARGE\n\t"
15380     $$emit$$"dec     rcx\n\t"
15381     $$emit$$"js      DONE\t# Zero length\n\t"
15382     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15383     $$emit$$"dec     rcx\n\t"
15384     $$emit$$"jge     LOOP\n\t"
15385     $$emit$$"jmp     DONE\n\t"
15386     $$emit$$"# LARGE:\n\t"
15387     if (UseXMMForObjInit) {
15388        $$emit$$"movdq   $tmp, $val\n\t"
15389        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15390        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15391        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15392        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15393        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15394        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15395        $$emit$$"add     0x40,rax\n\t"
15396        $$emit$$"# L_zero_64_bytes:\n\t"
15397        $$emit$$"sub     0x8,rcx\n\t"
15398        $$emit$$"jge     L_loop\n\t"
15399        $$emit$$"add     0x4,rcx\n\t"
15400        $$emit$$"jl      L_tail\n\t"
15401        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15402        $$emit$$"add     0x20,rax\n\t"
15403        $$emit$$"sub     0x4,rcx\n\t"
15404        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15405        $$emit$$"add     0x4,rcx\n\t"
15406        $$emit$$"jle     L_end\n\t"
15407        $$emit$$"dec     rcx\n\t"
15408        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15409        $$emit$$"vmovq   xmm0,(rax)\n\t"
15410        $$emit$$"add     0x8,rax\n\t"
15411        $$emit$$"dec     rcx\n\t"
15412        $$emit$$"jge     L_sloop\n\t"
15413        $$emit$$"# L_end:\n\t"
15414     } else {
15415        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15416     }
15417     $$emit$$"# DONE"
15418   %}
15419   ins_encode %{
15420     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15421                  $tmp$$XMMRegister, false, true);
15422   %}
15423   ins_pipe(pipe_slow);
15424 %}
15425 
15426 // Small non-constant length ClearArray for AVX512 targets.
15427 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15428                        Universe dummy, rFlagsReg cr)
15429 %{
15430   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15431   match(Set dummy (ClearArray (Binary cnt base) val));
15432   ins_cost(125);
15433   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15434 
15435   format %{ $$template
15436     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15437     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15438     $$emit$$"jg      LARGE\n\t"
15439     $$emit$$"dec     rcx\n\t"
15440     $$emit$$"js      DONE\t# Zero length\n\t"
15441     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15442     $$emit$$"dec     rcx\n\t"
15443     $$emit$$"jge     LOOP\n\t"
15444     $$emit$$"jmp     DONE\n\t"
15445     $$emit$$"# LARGE:\n\t"
15446     if (UseFastStosb) {
15447        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15448        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15449     } else if (UseXMMForObjInit) {
15450        $$emit$$"mov     rdi,rax\n\t"
15451        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15452        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15453        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

15461        $$emit$$"jl      L_tail\n\t"
15462        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15463        $$emit$$"add     0x20,rax\n\t"
15464        $$emit$$"sub     0x4,rcx\n\t"
15465        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15466        $$emit$$"add     0x4,rcx\n\t"
15467        $$emit$$"jle     L_end\n\t"
15468        $$emit$$"dec     rcx\n\t"
15469        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15470        $$emit$$"vmovq   xmm0,(rax)\n\t"
15471        $$emit$$"add     0x8,rax\n\t"
15472        $$emit$$"dec     rcx\n\t"
15473        $$emit$$"jge     L_sloop\n\t"
15474        $$emit$$"# L_end:\n\t"
15475     } else {
15476        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15477     }
15478     $$emit$$"# DONE"
15479   %}
15480   ins_encode %{
15481     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15482                  $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
15483   %}
15484   ins_pipe(pipe_slow);
15485 %}
15486 
15487 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15488                                  Universe dummy, rFlagsReg cr)

15489 %{
15490   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15491   match(Set dummy (ClearArray (Binary cnt base) val));
15492   ins_cost(125);
15493   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15494 
15495   format %{ $$template
15496     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15497     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15498     $$emit$$"jg      LARGE\n\t"
15499     $$emit$$"dec     rcx\n\t"
15500     $$emit$$"js      DONE\t# Zero length\n\t"
15501     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15502     $$emit$$"dec     rcx\n\t"
15503     $$emit$$"jge     LOOP\n\t"
15504     $$emit$$"jmp     DONE\n\t"
15505     $$emit$$"# LARGE:\n\t"
15506     if (UseFastStosb) {
15507        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15508        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15509     } else if (UseXMMForObjInit) {
15510        $$emit$$"mov     rdi,rax\n\t"
15511        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15512        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15513        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

15521        $$emit$$"jl      L_tail\n\t"
15522        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15523        $$emit$$"add     0x20,rax\n\t"
15524        $$emit$$"sub     0x4,rcx\n\t"
15525        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15526        $$emit$$"add     0x4,rcx\n\t"
15527        $$emit$$"jle     L_end\n\t"
15528        $$emit$$"dec     rcx\n\t"
15529        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15530        $$emit$$"vmovq   xmm0,(rax)\n\t"
15531        $$emit$$"add     0x8,rax\n\t"
15532        $$emit$$"dec     rcx\n\t"
15533        $$emit$$"jge     L_sloop\n\t"
15534        $$emit$$"# L_end:\n\t"
15535     } else {
15536        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15537     }
15538     $$emit$$"# DONE"
15539   %}
15540   ins_encode %{
15541     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15542                  $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
15543   %}
15544   ins_pipe(pipe_slow);
15545 %}
15546 
15547 // Large non-constant length ClearArray for non-AVX512 targets.
15548 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15549                         Universe dummy, rFlagsReg cr)
15550 %{
15551   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15552   match(Set dummy (ClearArray (Binary cnt base) val));
15553   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15554 
15555   format %{ $$template
15556     if (UseFastStosb) {
15557        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15558        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15559     } else if (UseXMMForObjInit) {
15560        $$emit$$"movdq   $tmp, $val\n\t"
15561        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15562        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15563        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15564        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15565        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15566        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15567        $$emit$$"add     0x40,rax\n\t"
15568        $$emit$$"# L_zero_64_bytes:\n\t"
15569        $$emit$$"sub     0x8,rcx\n\t"
15570        $$emit$$"jge     L_loop\n\t"
15571        $$emit$$"add     0x4,rcx\n\t"
15572        $$emit$$"jl      L_tail\n\t"
15573        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15574        $$emit$$"add     0x20,rax\n\t"
15575        $$emit$$"sub     0x4,rcx\n\t"
15576        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15577        $$emit$$"add     0x4,rcx\n\t"
15578        $$emit$$"jle     L_end\n\t"
15579        $$emit$$"dec     rcx\n\t"
15580        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15581        $$emit$$"vmovq   xmm0,(rax)\n\t"
15582        $$emit$$"add     0x8,rax\n\t"
15583        $$emit$$"dec     rcx\n\t"
15584        $$emit$$"jge     L_sloop\n\t"
15585        $$emit$$"# L_end:\n\t"
15586     } else {
15587        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15588     }
15589   %}
15590   ins_encode %{
15591     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15592                  $tmp$$XMMRegister, true, false);
15593   %}
15594   ins_pipe(pipe_slow);
15595 %}
15596 
15597 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15598                                   Universe dummy, rFlagsReg cr)
15599 %{
15600   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15601   match(Set dummy (ClearArray (Binary cnt base) val));
15602   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15603 
15604   format %{ $$template
15605     if (UseXMMForObjInit) {
15606        $$emit$$"movdq   $tmp, $val\n\t"
15607        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15608        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15609        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15610        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15611        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15612        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15613        $$emit$$"add     0x40,rax\n\t"
15614        $$emit$$"# L_zero_64_bytes:\n\t"
15615        $$emit$$"sub     0x8,rcx\n\t"
15616        $$emit$$"jge     L_loop\n\t"
15617        $$emit$$"add     0x4,rcx\n\t"
15618        $$emit$$"jl      L_tail\n\t"
15619        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15620        $$emit$$"add     0x20,rax\n\t"
15621        $$emit$$"sub     0x4,rcx\n\t"
15622        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15623        $$emit$$"add     0x4,rcx\n\t"
15624        $$emit$$"jle     L_end\n\t"
15625        $$emit$$"dec     rcx\n\t"
15626        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15627        $$emit$$"vmovq   xmm0,(rax)\n\t"
15628        $$emit$$"add     0x8,rax\n\t"
15629        $$emit$$"dec     rcx\n\t"
15630        $$emit$$"jge     L_sloop\n\t"
15631        $$emit$$"# L_end:\n\t"
15632     } else {
15633        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15634     }
15635   %}
15636   ins_encode %{
15637     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15638                  $tmp$$XMMRegister, true, true);
15639   %}
15640   ins_pipe(pipe_slow);
15641 %}
15642 
15643 // Large non-constant length ClearArray for AVX512 targets.
15644 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15645                              Universe dummy, rFlagsReg cr)
15646 %{
15647   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15648   match(Set dummy (ClearArray (Binary cnt base) val));
15649   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15650 
15651   format %{ $$template
15652     if (UseFastStosb) {
15653        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15654        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15655        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15656     } else if (UseXMMForObjInit) {
15657        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15658        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15659        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15660        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15661        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15662        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15663        $$emit$$"add     0x40,rax\n\t"
15664        $$emit$$"# L_zero_64_bytes:\n\t"
15665        $$emit$$"sub     0x8,rcx\n\t"
15666        $$emit$$"jge     L_loop\n\t"
15667        $$emit$$"add     0x4,rcx\n\t"
15668        $$emit$$"jl      L_tail\n\t"
15669        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15670        $$emit$$"add     0x20,rax\n\t"
15671        $$emit$$"sub     0x4,rcx\n\t"
15672        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15673        $$emit$$"add     0x4,rcx\n\t"
15674        $$emit$$"jle     L_end\n\t"
15675        $$emit$$"dec     rcx\n\t"
15676        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15677        $$emit$$"vmovq   xmm0,(rax)\n\t"
15678        $$emit$$"add     0x8,rax\n\t"
15679        $$emit$$"dec     rcx\n\t"
15680        $$emit$$"jge     L_sloop\n\t"
15681        $$emit$$"# L_end:\n\t"
15682     } else {
15683        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15684        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15685     }
15686   %}
15687   ins_encode %{
15688     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15689                  $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
15690   %}
15691   ins_pipe(pipe_slow);
15692 %}
15693 
15694 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15695                                        Universe dummy, rFlagsReg cr)

15696 %{
15697   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15698   match(Set dummy (ClearArray (Binary cnt base) val));
15699   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15700 
15701   format %{ $$template
15702     if (UseFastStosb) {
15703        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15704        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15705        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15706     } else if (UseXMMForObjInit) {
15707        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15708        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15709        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15710        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15711        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15712        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15713        $$emit$$"add     0x40,rax\n\t"
15714        $$emit$$"# L_zero_64_bytes:\n\t"
15715        $$emit$$"sub     0x8,rcx\n\t"
15716        $$emit$$"jge     L_loop\n\t"
15717        $$emit$$"add     0x4,rcx\n\t"
15718        $$emit$$"jl      L_tail\n\t"
15719        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15720        $$emit$$"add     0x20,rax\n\t"
15721        $$emit$$"sub     0x4,rcx\n\t"
15722        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15723        $$emit$$"add     0x4,rcx\n\t"
15724        $$emit$$"jle     L_end\n\t"
15725        $$emit$$"dec     rcx\n\t"
15726        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15727        $$emit$$"vmovq   xmm0,(rax)\n\t"
15728        $$emit$$"add     0x8,rax\n\t"
15729        $$emit$$"dec     rcx\n\t"
15730        $$emit$$"jge     L_sloop\n\t"
15731        $$emit$$"# L_end:\n\t"
15732     } else {
15733        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15734        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15735     }
15736   %}
15737   ins_encode %{
15738     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15739                  $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
15740   %}
15741   ins_pipe(pipe_slow);
15742 %}
15743 
15744 // Small constant length ClearArray for AVX512 targets.
15745 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
15746 %{
15747   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
15748             ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
15749   match(Set dummy (ClearArray (Binary cnt base) val));
15750   ins_cost(100);
15751   effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
15752   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
15753   ins_encode %{
15754     __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15755   %}
15756   ins_pipe(pipe_slow);
15757 %}
15758 
15759 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15760                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15761 %{
15762   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15763   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15764   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15765 
15766   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15767   ins_encode %{
15768     __ string_compare($str1$$Register, $str2$$Register,
15769                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15770                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15771   %}
15772   ins_pipe( pipe_slow );
15773 %}
15774 

17612   effect(USE meth);
17613 
17614   ins_cost(300);
17615   format %{ "call_leaf,runtime " %}
17616   ins_encode(clear_avx, Java_To_Runtime(meth));
17617   ins_pipe(pipe_slow);
17618 %}
17619 
17620 // Call runtime without safepoint and with vector arguments
17621 instruct CallLeafDirectVector(method meth)
17622 %{
17623   match(CallLeafVector);
17624   effect(USE meth);
17625 
17626   ins_cost(300);
17627   format %{ "call_leaf,vector " %}
17628   ins_encode(Java_To_Runtime(meth));
17629   ins_pipe(pipe_slow);
17630 %}
17631 
17632 // Call runtime without safepoint
17633 // entry point is null, target holds the address to call
17634 instruct CallLeafNoFPInDirect(rRegP target)
17635 %{
17636   predicate(n->as_Call()->entry_point() == nullptr);
17637   match(CallLeafNoFP target);
17638 
17639   ins_cost(300);
17640   format %{ "call_leaf_nofp,runtime indirect " %}
17641   ins_encode %{
17642      __ call($target$$Register);
17643   %}
17644 
17645   ins_pipe(pipe_slow);
17646 %}
17647 
17648 // Call runtime without safepoint
17649 instruct CallLeafNoFPDirect(method meth)
17650 %{
17651   predicate(n->as_Call()->entry_point() != nullptr);
17652   match(CallLeafNoFP);
17653   effect(USE meth);
17654 
17655   ins_cost(300);
17656   format %{ "call_leaf_nofp,runtime " %}
17657   ins_encode(clear_avx, Java_To_Runtime(meth));
17658   ins_pipe(pipe_slow);
17659 %}
17660 
17661 // Return Instruction
17662 // Remove the return address & jump to it.
17663 // Notice: We always emit a nop after a ret to make sure there is room
17664 // for safepoint patching
17665 instruct Ret()
17666 %{
17667   match(Return);
17668 
17669   format %{ "ret" %}
17670   ins_encode %{
17671     __ ret(0);
< prev index next >