< prev index next >

src/hotspot/cpu/x86/x86.ad

Print this page

 1632 }
 1633 
 1634 // !!!!! Special hack to get all types of calls to specify the byte offset
 1635 //       from the start of the call to the point where the return address
 1636 //       will point.
 1637 int MachCallStaticJavaNode::ret_addr_offset()
 1638 {
 1639   int offset = 5; // 5 bytes from start of call to where return address points
 1640   offset += clear_avx_size();
 1641   return offset;
 1642 }
 1643 
 1644 int MachCallDynamicJavaNode::ret_addr_offset()
 1645 {
 1646   int offset = 15; // 15 bytes from start of call to where return address points
 1647   offset += clear_avx_size();
 1648   return offset;
 1649 }
 1650 
 1651 int MachCallRuntimeNode::ret_addr_offset() {




 1652   int offset = 13; // movq r10,#addr; callq (r10)
 1653   if (this->ideal_Opcode() != Op_CallLeafVector) {
 1654     offset += clear_avx_size();
 1655   }
 1656   return offset;
 1657 }

 1658 //
 1659 // Compute padding required for nodes which need alignment
 1660 //
 1661 
 1662 // The address of the call instruction needs to be 4-byte aligned to
 1663 // ensure that it does not span a cache line so that it can be patched.
 1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 1665 {
 1666   current_offset += clear_avx_size(); // skip vzeroupper
 1667   current_offset += 1; // skip call opcode byte
 1668   return align_up(current_offset, alignment_required()) - current_offset;
 1669 }
 1670 
 1671 // The address of the call instruction needs to be 4-byte aligned to
 1672 // ensure that it does not span a cache line so that it can be patched.
 1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 1674 {
 1675   current_offset += clear_avx_size(); // skip vzeroupper
 1676   current_offset += 11; // skip movq instruction + call opcode byte
 1677   return align_up(current_offset, alignment_required()) - current_offset;

 1879     st->print("\n\t");
 1880     st->print("# stack alignment check");
 1881 #endif
 1882   }
 1883   if (C->stub_function() != nullptr) {
 1884     st->print("\n\t");
 1885     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
 1886     st->print("\n\t");
 1887     st->print("je      fast_entry\t");
 1888     st->print("\n\t");
 1889     st->print("call    #nmethod_entry_barrier_stub\t");
 1890     st->print("\n\tfast_entry:");
 1891   }
 1892   st->cr();
 1893 }
 1894 #endif
 1895 
 1896 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1897   Compile* C = ra_->C;
 1898 
 1899   int framesize = C->output()->frame_size_in_bytes();
 1900   int bangsize = C->output()->bang_size_in_bytes();
 1901 
 1902   if (C->clinit_barrier_on_entry()) {
 1903     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
 1904     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
 1905 
 1906     Label L_skip_barrier;
 1907     Register klass = rscratch1;
 1908 
 1909     __ mov_metadata(klass, C->method()->holder()->constant_encoding());
 1910     __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
 1911 
 1912     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
 1913 
 1914     __ bind(L_skip_barrier);
 1915   }
 1916 
 1917   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);


 1918 
 1919   C->output()->set_frame_complete(__ offset());
 1920 
 1921   if (C->has_mach_constant_base_node()) {
 1922     // NOTE: We set the table base offset here because users might be
 1923     // emitted before MachConstantBaseNode.
 1924     ConstantTable& constant_table = C->output()->constant_table();
 1925     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1926   }
 1927 }
 1928 
 1929 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 1930 {
 1931   return MachNode::size(ra_); // too many variables; just compute it
 1932                               // the hard way
 1933 }
 1934 
 1935 int MachPrologNode::reloc() const
 1936 {
 1937   return 0; // a large enough number
 1938 }
 1939 
 1940 //=============================================================================
 1941 #ifndef PRODUCT
 1942 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1943 {
 1944   Compile* C = ra_->C;
 1945   if (generate_vzeroupper(C)) {
 1946     st->print("vzeroupper");
 1947     st->cr(); st->print("\t");
 1948   }
 1949 
 1950   int framesize = C->output()->frame_size_in_bytes();
 1951   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1952   // Remove word for return adr already pushed
 1953   // and RBP

 1961   st->print_cr("popq    rbp");
 1962   if (do_polling() && C->is_method_compilation()) {
 1963     st->print("\t");
 1964     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
 1965                  "ja      #safepoint_stub\t"
 1966                  "# Safepoint: poll for GC");
 1967   }
 1968 }
 1969 #endif
 1970 
 1971 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1972 {
 1973   Compile* C = ra_->C;
 1974 
 1975   if (generate_vzeroupper(C)) {
 1976     // Clear upper bits of YMM registers when current compiled code uses
 1977     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1978     __ vzeroupper();
 1979   }
 1980 
 1981   int framesize = C->output()->frame_size_in_bytes();
 1982   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1983   // Remove word for return adr already pushed
 1984   // and RBP
 1985   framesize -= 2*wordSize;
 1986 
 1987   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 1988 
 1989   if (framesize) {
 1990     __ addq(rsp, framesize);
 1991   }
 1992 
 1993   __ popq(rbp);
 1994 
 1995   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1996     __ reserved_stack_check();
 1997   }
 1998 
 1999   if (do_polling() && C->is_method_compilation()) {
 2000     Label dummy_label;
 2001     Label* code_stub = &dummy_label;
 2002     if (!C->output()->in_scratch_emit_size()) {
 2003       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 2004       C->output()->add_stub(stub);
 2005       code_stub = &stub->entry();
 2006     }
 2007     __ relocate(relocInfo::poll_return_type);
 2008     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
 2009   }
 2010 }
 2011 
 2012 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
 2013 {
 2014   return MachNode::size(ra_); // too many variables; just compute it
 2015                               // the hard way
 2016 }
 2017 
 2018 int MachEpilogNode::reloc() const
 2019 {
 2020   return 2; // a large enough number
 2021 }
 2022 
 2023 const Pipeline* MachEpilogNode::pipeline() const
 2024 {
 2025   return MachNode::pipeline_class();
 2026 }
 2027 
 2028 //=============================================================================
 2029 
 2030 enum RC {
 2031   rc_bad,
 2032   rc_int,
 2033   rc_kreg,
 2034   rc_float,
 2035   rc_stack
 2036 };
 2037 

 2599 #endif
 2600 
 2601 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2602 {
 2603   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2604   int reg = ra_->get_encode(this);
 2605 
 2606   __ lea(as_Register(reg), Address(rsp, offset));
 2607 }
 2608 
 2609 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 2610 {
 2611   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2612   if (ra_->get_encode(this) > 15) {
 2613     return (offset < 0x80) ? 6 : 9; // REX2
 2614   } else {
 2615     return (offset < 0x80) ? 5 : 8; // REX
 2616   }
 2617 }
 2618 












































 2619 //=============================================================================
 2620 #ifndef PRODUCT
 2621 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2622 {
 2623   st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2624   st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2625   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 2626 }
 2627 #endif
 2628 
 2629 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2630 {
 2631   __ ic_check(InteriorEntryAlignment);
 2632 }
 2633 
 2634 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 2635 {
 2636   return MachNode::size(ra_); // too many variables; just compute it
 2637                               // the hard way
 2638 }
 2639 
 2640 
 2641 //=============================================================================
 2642 
 2643 bool Matcher::supports_vector_calling_convention(void) {
 2644   return EnableVectorSupport;
 2645 }
 2646 
 2647 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
 2648   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
 2649 }
 2650 
 2651 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
 2652   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
 2653 }
 2654 
 2655 #ifdef ASSERT
 2656 static bool is_ndd_demotable(const MachNode* mdef) {
 2657   return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
 2658 }
 2659 #endif

 4593     }
 4594     __ post_call_nop();
 4595   %}
 4596 
 4597   enc_class Java_Dynamic_Call(method meth) %{
 4598     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 4599     __ post_call_nop();
 4600   %}
 4601 
 4602   enc_class call_epilog %{
 4603     if (VerifyStackAtCalls) {
 4604       // Check that stack depth is unchanged: find majik cookie on stack
 4605       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
 4606       Label L;
 4607       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
 4608       __ jccb(Assembler::equal, L);
 4609       // Die if stack mismatch
 4610       __ int3();
 4611       __ bind(L);
 4612     }

































 4613   %}
 4614 
 4615 %}
 4616 
 4617 //----------FRAME--------------------------------------------------------------
 4618 // Definition of frame structure and management information.
 4619 //
 4620 //  S T A C K   L A Y O U T    Allocators stack-slot number
 4621 //                             |   (to get allocators register number
 4622 //  G  Owned by    |        |  v    add OptoReg::stack0())
 4623 //  r   CALLER     |        |
 4624 //  o     |        +--------+      pad to even-align allocators stack-slot
 4625 //  w     V        |  pad0  |        numbers; owned by CALLER
 4626 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 4627 //  h     ^        |   in   |  5
 4628 //        |        |  args  |  4   Holes in incoming args owned by SELF
 4629 //  |     |        |        |  3
 4630 //  |     |        +--------+
 4631 //  V     |        | old out|      Empty on Intel, window on Sparc
 4632 //        |    old |preserve|      Must be even aligned.

 5771   %}
 5772 %}
 5773 
 5774 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5775 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 5776 %{
 5777   constraint(ALLOC_IN_RC(ptr_reg));
 5778   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5779   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 5780 
 5781   op_cost(10);
 5782   format %{"[$reg + $off + $idx << $scale]" %}
 5783   interface(MEMORY_INTER) %{
 5784     base($reg);
 5785     index($idx);
 5786     scale($scale);
 5787     disp($off);
 5788   %}
 5789 %}
 5790 
















 5791 // Indirect Narrow Oop Plus Offset Operand
 5792 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 5793 // we can't free r12 even with CompressedOops::base() == nullptr.
 5794 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 5795   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5796   constraint(ALLOC_IN_RC(ptr_reg));
 5797   match(AddP (DecodeN reg) off);
 5798 
 5799   op_cost(10);
 5800   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 5801   interface(MEMORY_INTER) %{
 5802     base(0xc); // R12
 5803     index($reg);
 5804     scale(0x3);
 5805     disp($off);
 5806   %}
 5807 %}
 5808 
 5809 // Indirect Memory Operand
 5810 operand indirectNarrow(rRegN reg)

 6280 %}
 6281 
 6282 // Replaces legVec during post-selection cleanup. See above.
 6283 operand legVecZ() %{
 6284   constraint(ALLOC_IN_RC(vectorz_reg_legacy));
 6285   match(VecZ);
 6286 
 6287   format %{ %}
 6288   interface(REG_INTER);
 6289 %}
 6290 
 6291 //----------OPERAND CLASSES----------------------------------------------------
 6292 // Operand Classes are groups of operands that are used as to simplify
 6293 // instruction definitions by not requiring the AD writer to specify separate
 6294 // instructions for every form of operand when the instruction accepts
 6295 // multiple operand types with the same basic encoding and format.  The classic
 6296 // case of this is memory operands.
 6297 
 6298 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 6299                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 6300                indCompressedOopOffset,
 6301                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 6302                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 6303                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 6304 
 6305 //----------PIPELINE-----------------------------------------------------------
 6306 // Rules which define the behavior of the target architectures pipeline.
 6307 pipeline %{
 6308 
 6309 //----------ATTRIBUTES---------------------------------------------------------
 6310 attributes %{
 6311   variable_size_instructions;        // Fixed size instructions
 6312   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 6313   instruction_unit_size = 1;         // An instruction is 1 bytes long
 6314   instruction_fetch_unit_size = 16;  // The processor fetches one line
 6315   instruction_fetch_units = 1;       // of 16 bytes
 6316 %}
 6317 
 6318 //----------RESOURCES----------------------------------------------------------
 6319 // Resources are the functional units available to the machine
 6320 

 8915   format %{ "MEMBAR-storestore (empty encoding)" %}
 8916   ins_encode( );
 8917   ins_pipe(empty);
 8918 %}
 8919 
 8920 //----------Move Instructions--------------------------------------------------
 8921 
 8922 instruct castX2P(rRegP dst, rRegL src)
 8923 %{
 8924   match(Set dst (CastX2P src));
 8925 
 8926   format %{ "movq    $dst, $src\t# long->ptr" %}
 8927   ins_encode %{
 8928     if ($dst$$reg != $src$$reg) {
 8929       __ movptr($dst$$Register, $src$$Register);
 8930     }
 8931   %}
 8932   ins_pipe(ialu_reg_reg); // XXX
 8933 %}
 8934 


























 8935 instruct castP2X(rRegL dst, rRegP src)
 8936 %{
 8937   match(Set dst (CastP2X src));
 8938 
 8939   format %{ "movq    $dst, $src\t# ptr -> long" %}
 8940   ins_encode %{
 8941     if ($dst$$reg != $src$$reg) {
 8942       __ movptr($dst$$Register, $src$$Register);
 8943     }
 8944   %}
 8945   ins_pipe(ialu_reg_reg); // XXX
 8946 %}
 8947 
 8948 // Convert oop into int for vectors alignment masking
 8949 instruct convP2I(rRegI dst, rRegP src)
 8950 %{
 8951   match(Set dst (ConvL2I (CastP2X src)));
 8952 
 8953   format %{ "movl    $dst, $src\t# ptr -> int" %}
 8954   ins_encode %{

15206   effect(DEF dst, USE src);
15207   ins_cost(100);
15208   format %{ "movd    $dst,$src\t# MoveI2F" %}
15209   ins_encode %{
15210     __ movdl($dst$$XMMRegister, $src$$Register);
15211   %}
15212   ins_pipe( pipe_slow );
15213 %}
15214 
15215 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15216   match(Set dst (MoveL2D src));
15217   effect(DEF dst, USE src);
15218   ins_cost(100);
15219   format %{ "movd    $dst,$src\t# MoveL2D" %}
15220   ins_encode %{
15221      __ movdq($dst$$XMMRegister, $src$$Register);
15222   %}
15223   ins_pipe( pipe_slow );
15224 %}
15225 

15226 // Fast clearing of an array
15227 // Small non-constant lenght ClearArray for non-AVX512 targets.
15228 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15229                   Universe dummy, rFlagsReg cr)
15230 %{
15231   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
15232   match(Set dummy (ClearArray cnt base));
15233   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);





















































































































15234 
15235   format %{ $$template
15236     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15237     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15238     $$emit$$"jg      LARGE\n\t"
15239     $$emit$$"dec     rcx\n\t"
15240     $$emit$$"js      DONE\t# Zero length\n\t"
15241     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15242     $$emit$$"dec     rcx\n\t"
15243     $$emit$$"jge     LOOP\n\t"
15244     $$emit$$"jmp     DONE\n\t"
15245     $$emit$$"# LARGE:\n\t"
15246     if (UseFastStosb) {
15247        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15248        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15249     } else if (UseXMMForObjInit) {
15250        $$emit$$"mov     rdi,rax\n\t"
15251        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15252        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15253        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

15261        $$emit$$"jl      L_tail\n\t"
15262        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15263        $$emit$$"add     0x20,rax\n\t"
15264        $$emit$$"sub     0x4,rcx\n\t"
15265        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15266        $$emit$$"add     0x4,rcx\n\t"
15267        $$emit$$"jle     L_end\n\t"
15268        $$emit$$"dec     rcx\n\t"
15269        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15270        $$emit$$"vmovq   xmm0,(rax)\n\t"
15271        $$emit$$"add     0x8,rax\n\t"
15272        $$emit$$"dec     rcx\n\t"
15273        $$emit$$"jge     L_sloop\n\t"
15274        $$emit$$"# L_end:\n\t"
15275     } else {
15276        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15277     }
15278     $$emit$$"# DONE"
15279   %}
15280   ins_encode %{
15281     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15282                  $tmp$$XMMRegister, false, knoreg);
15283   %}
15284   ins_pipe(pipe_slow);
15285 %}
15286 
15287 // Small non-constant length ClearArray for AVX512 targets.
15288 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15289                        Universe dummy, rFlagsReg cr)
15290 %{
15291   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15292   match(Set dummy (ClearArray cnt base));
15293   ins_cost(125);
15294   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15295 
15296   format %{ $$template
15297     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15298     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15299     $$emit$$"jg      LARGE\n\t"
15300     $$emit$$"dec     rcx\n\t"
15301     $$emit$$"js      DONE\t# Zero length\n\t"
15302     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15303     $$emit$$"dec     rcx\n\t"
15304     $$emit$$"jge     LOOP\n\t"
15305     $$emit$$"jmp     DONE\n\t"
15306     $$emit$$"# LARGE:\n\t"
15307     if (UseFastStosb) {
15308        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15309        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15310     } else if (UseXMMForObjInit) {
15311        $$emit$$"mov     rdi,rax\n\t"
15312        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15313        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15314        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

15322        $$emit$$"jl      L_tail\n\t"
15323        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15324        $$emit$$"add     0x20,rax\n\t"
15325        $$emit$$"sub     0x4,rcx\n\t"
15326        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15327        $$emit$$"add     0x4,rcx\n\t"
15328        $$emit$$"jle     L_end\n\t"
15329        $$emit$$"dec     rcx\n\t"
15330        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15331        $$emit$$"vmovq   xmm0,(rax)\n\t"
15332        $$emit$$"add     0x8,rax\n\t"
15333        $$emit$$"dec     rcx\n\t"
15334        $$emit$$"jge     L_sloop\n\t"
15335        $$emit$$"# L_end:\n\t"
15336     } else {
15337        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15338     }
15339     $$emit$$"# DONE"
15340   %}
15341   ins_encode %{
15342     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15343                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
15344   %}
15345   ins_pipe(pipe_slow);
15346 %}
15347 
15348 // Large non-constant length ClearArray for non-AVX512 targets.
15349 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15350                         Universe dummy, rFlagsReg cr)
15351 %{
15352   predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15353   match(Set dummy (ClearArray cnt base));
15354   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
































































































15355 
15356   format %{ $$template
15357     if (UseFastStosb) {
15358        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15359        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15360        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15361     } else if (UseXMMForObjInit) {
15362        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15363        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15364        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15365        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15366        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15367        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15368        $$emit$$"add     0x40,rax\n\t"
15369        $$emit$$"# L_zero_64_bytes:\n\t"
15370        $$emit$$"sub     0x8,rcx\n\t"
15371        $$emit$$"jge     L_loop\n\t"
15372        $$emit$$"add     0x4,rcx\n\t"
15373        $$emit$$"jl      L_tail\n\t"
15374        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15375        $$emit$$"add     0x20,rax\n\t"
15376        $$emit$$"sub     0x4,rcx\n\t"
15377        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15378        $$emit$$"add     0x4,rcx\n\t"
15379        $$emit$$"jle     L_end\n\t"
15380        $$emit$$"dec     rcx\n\t"
15381        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15382        $$emit$$"vmovq   xmm0,(rax)\n\t"
15383        $$emit$$"add     0x8,rax\n\t"
15384        $$emit$$"dec     rcx\n\t"
15385        $$emit$$"jge     L_sloop\n\t"
15386        $$emit$$"# L_end:\n\t"
15387     } else {
15388        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15389        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15390     }
15391   %}
15392   ins_encode %{
15393     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15394                  $tmp$$XMMRegister, true, knoreg);
15395   %}
15396   ins_pipe(pipe_slow);
15397 %}
15398 
15399 // Large non-constant length ClearArray for AVX512 targets.
15400 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15401                              Universe dummy, rFlagsReg cr)
15402 %{
15403   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15404   match(Set dummy (ClearArray cnt base));
15405   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15406 
15407   format %{ $$template
15408     if (UseFastStosb) {
15409        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15410        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15411        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15412     } else if (UseXMMForObjInit) {
15413        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15414        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15415        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15416        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15417        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15418        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15419        $$emit$$"add     0x40,rax\n\t"
15420        $$emit$$"# L_zero_64_bytes:\n\t"
15421        $$emit$$"sub     0x8,rcx\n\t"
15422        $$emit$$"jge     L_loop\n\t"
15423        $$emit$$"add     0x4,rcx\n\t"
15424        $$emit$$"jl      L_tail\n\t"
15425        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15426        $$emit$$"add     0x20,rax\n\t"
15427        $$emit$$"sub     0x4,rcx\n\t"
15428        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15429        $$emit$$"add     0x4,rcx\n\t"
15430        $$emit$$"jle     L_end\n\t"
15431        $$emit$$"dec     rcx\n\t"
15432        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15433        $$emit$$"vmovq   xmm0,(rax)\n\t"
15434        $$emit$$"add     0x8,rax\n\t"
15435        $$emit$$"dec     rcx\n\t"
15436        $$emit$$"jge     L_sloop\n\t"
15437        $$emit$$"# L_end:\n\t"
15438     } else {
15439        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15440        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15441     }
15442   %}
15443   ins_encode %{
15444     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15445                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
15446   %}
15447   ins_pipe(pipe_slow);
15448 %}
15449 
15450 // Small constant length ClearArray for AVX512 targets.
15451 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15452 %{
15453   predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15454   match(Set dummy (ClearArray cnt base));

15455   ins_cost(100);
15456   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15457   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
15458   ins_encode %{
15459    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15460   %}
15461   ins_pipe(pipe_slow);
15462 %}
15463 
15464 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15465                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15466 %{
15467   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15468   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15469   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15470 
15471   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15472   ins_encode %{
15473     __ string_compare($str1$$Register, $str2$$Register,
15474                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15475                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15476   %}
15477   ins_pipe( pipe_slow );
15478 %}
15479 

17317   effect(USE meth);
17318 
17319   ins_cost(300);
17320   format %{ "call_leaf,runtime " %}
17321   ins_encode(clear_avx, Java_To_Runtime(meth));
17322   ins_pipe(pipe_slow);
17323 %}
17324 
17325 // Call runtime without safepoint and with vector arguments
17326 instruct CallLeafDirectVector(method meth)
17327 %{
17328   match(CallLeafVector);
17329   effect(USE meth);
17330 
17331   ins_cost(300);
17332   format %{ "call_leaf,vector " %}
17333   ins_encode(Java_To_Runtime(meth));
17334   ins_pipe(pipe_slow);
17335 %}
17336 
















17337 // Call runtime without safepoint
17338 instruct CallLeafNoFPDirect(method meth)
17339 %{

17340   match(CallLeafNoFP);
17341   effect(USE meth);
17342 
17343   ins_cost(300);
17344   format %{ "call_leaf_nofp,runtime " %}
17345   ins_encode(clear_avx, Java_To_Runtime(meth));
17346   ins_pipe(pipe_slow);
17347 %}
17348 
17349 // Return Instruction
17350 // Remove the return address & jump to it.
17351 // Notice: We always emit a nop after a ret to make sure there is room
17352 // for safepoint patching
17353 instruct Ret()
17354 %{
17355   match(Return);
17356 
17357   format %{ "ret" %}
17358   ins_encode %{
17359     __ ret(0);

 1632 }
 1633 
 1634 // !!!!! Special hack to get all types of calls to specify the byte offset
 1635 //       from the start of the call to the point where the return address
 1636 //       will point.
 1637 int MachCallStaticJavaNode::ret_addr_offset()
 1638 {
 1639   int offset = 5; // 5 bytes from start of call to where return address points
 1640   offset += clear_avx_size();
 1641   return offset;
 1642 }
 1643 
 1644 int MachCallDynamicJavaNode::ret_addr_offset()
 1645 {
 1646   int offset = 15; // 15 bytes from start of call to where return address points
 1647   offset += clear_avx_size();
 1648   return offset;
 1649 }
 1650 
 1651 int MachCallRuntimeNode::ret_addr_offset() {
 1652   if (_entry_point == nullptr) {
 1653     // CallLeafNoFPInDirect
 1654     return 3; // callq (register)
 1655   }
 1656   int offset = 13; // movq r10,#addr; callq (r10)
 1657   if (this->ideal_Opcode() != Op_CallLeafVector) {
 1658     offset += clear_avx_size();
 1659   }
 1660   return offset;
 1661 }
 1662 
 1663 //
 1664 // Compute padding required for nodes which need alignment
 1665 //
 1666 
 1667 // The address of the call instruction needs to be 4-byte aligned to
 1668 // ensure that it does not span a cache line so that it can be patched.
 1669 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 1670 {
 1671   current_offset += clear_avx_size(); // skip vzeroupper
 1672   current_offset += 1; // skip call opcode byte
 1673   return align_up(current_offset, alignment_required()) - current_offset;
 1674 }
 1675 
 1676 // The address of the call instruction needs to be 4-byte aligned to
 1677 // ensure that it does not span a cache line so that it can be patched.
 1678 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 1679 {
 1680   current_offset += clear_avx_size(); // skip vzeroupper
 1681   current_offset += 11; // skip movq instruction + call opcode byte
 1682   return align_up(current_offset, alignment_required()) - current_offset;

 1884     st->print("\n\t");
 1885     st->print("# stack alignment check");
 1886 #endif
 1887   }
 1888   if (C->stub_function() != nullptr) {
 1889     st->print("\n\t");
 1890     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
 1891     st->print("\n\t");
 1892     st->print("je      fast_entry\t");
 1893     st->print("\n\t");
 1894     st->print("call    #nmethod_entry_barrier_stub\t");
 1895     st->print("\n\tfast_entry:");
 1896   }
 1897   st->cr();
 1898 }
 1899 #endif
 1900 
 1901 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1902   Compile* C = ra_->C;
 1903 
 1904   __ verified_entry(C);





 1905 
 1906   if (ra_->C->stub_function() == nullptr) {
 1907     __ entry_barrier();







 1908   }
 1909 
 1910   if (!Compile::current()->output()->in_scratch_emit_size()) {
 1911     __ bind(*_verified_entry);
 1912   }
 1913 
 1914   C->output()->set_frame_complete(__ offset());
 1915 
 1916   if (C->has_mach_constant_base_node()) {
 1917     // NOTE: We set the table base offset here because users might be
 1918     // emitted before MachConstantBaseNode.
 1919     ConstantTable& constant_table = C->output()->constant_table();
 1920     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1921   }
 1922 }
 1923 





 1924 
 1925 int MachPrologNode::reloc() const
 1926 {
 1927   return 0; // a large enough number
 1928 }
 1929 
 1930 //=============================================================================
 1931 #ifndef PRODUCT
 1932 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1933 {
 1934   Compile* C = ra_->C;
 1935   if (generate_vzeroupper(C)) {
 1936     st->print("vzeroupper");
 1937     st->cr(); st->print("\t");
 1938   }
 1939 
 1940   int framesize = C->output()->frame_size_in_bytes();
 1941   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1942   // Remove word for return adr already pushed
 1943   // and RBP

 1951   st->print_cr("popq    rbp");
 1952   if (do_polling() && C->is_method_compilation()) {
 1953     st->print("\t");
 1954     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
 1955                  "ja      #safepoint_stub\t"
 1956                  "# Safepoint: poll for GC");
 1957   }
 1958 }
 1959 #endif
 1960 
 1961 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1962 {
 1963   Compile* C = ra_->C;
 1964 
 1965   if (generate_vzeroupper(C)) {
 1966     // Clear upper bits of YMM registers when current compiled code uses
 1967     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1968     __ vzeroupper();
 1969   }
 1970 
 1971   // Subtract two words to account for return address and rbp
 1972   int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
 1973   __ remove_frame(initial_framesize, C->needs_stack_repair());










 1974 
 1975   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1976     __ reserved_stack_check();
 1977   }
 1978 
 1979   if (do_polling() && C->is_method_compilation()) {
 1980     Label dummy_label;
 1981     Label* code_stub = &dummy_label;
 1982     if (!C->output()->in_scratch_emit_size()) {
 1983       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1984       C->output()->add_stub(stub);
 1985       code_stub = &stub->entry();
 1986     }
 1987     __ relocate(relocInfo::poll_return_type);
 1988     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
 1989   }
 1990 }
 1991 






 1992 int MachEpilogNode::reloc() const
 1993 {
 1994   return 2; // a large enough number
 1995 }
 1996 
 1997 const Pipeline* MachEpilogNode::pipeline() const
 1998 {
 1999   return MachNode::pipeline_class();
 2000 }
 2001 
 2002 //=============================================================================
 2003 
 2004 enum RC {
 2005   rc_bad,
 2006   rc_int,
 2007   rc_kreg,
 2008   rc_float,
 2009   rc_stack
 2010 };
 2011 

 2573 #endif
 2574 
 2575 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2576 {
 2577   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2578   int reg = ra_->get_encode(this);
 2579 
 2580   __ lea(as_Register(reg), Address(rsp, offset));
 2581 }
 2582 
 2583 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 2584 {
 2585   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2586   if (ra_->get_encode(this) > 15) {
 2587     return (offset < 0x80) ? 6 : 9; // REX2
 2588   } else {
 2589     return (offset < 0x80) ? 5 : 8; // REX
 2590   }
 2591 }
 2592 
 2593 //=============================================================================
 2594 #ifndef PRODUCT
 2595 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2596 {
 2597   st->print_cr("MachVEPNode");
 2598 }
 2599 #endif
 2600 
 2601 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2602 {
 2603   CodeBuffer* cbuf = masm->code();
 2604   uint insts_size = cbuf->insts_size();
 2605   if (!_verified) {
 2606     __ ic_check(1);
 2607   } else {
 2608     if (ra_->C->stub_function() == nullptr) {
 2609       // Emit the entry barrier in a temporary frame before unpacking because
 2610       // it can deopt, which would require packing the scalarized args again.
 2611       __ verified_entry(ra_->C, 0);
 2612       __ entry_barrier();
 2613       int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
 2614       __ remove_frame(initial_framesize, false);
 2615     }
 2616     // Unpack inline type args passed as oop and then jump to
 2617     // the verified entry point (skipping the unverified entry).
 2618     int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
 2619     // Emit code for verified entry and save increment for stack repair on return
 2620     __ verified_entry(ra_->C, sp_inc);
 2621     if (Compile::current()->output()->in_scratch_emit_size()) {
 2622       Label dummy_verified_entry;
 2623       __ jmp(dummy_verified_entry);
 2624     } else {
 2625       __ jmp(*_verified_entry);
 2626     }
 2627   }
 2628   /* WARNING these NOPs are critical so that verified entry point is properly
 2629      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
 2630   int nops_cnt = 4 - ((cbuf->insts_size() - insts_size) & 0x3);
 2631   nops_cnt &= 0x3; // Do not add nops if code is aligned.
 2632   if (nops_cnt > 0) {
 2633     __ nop(nops_cnt);
 2634   }
 2635 }
 2636 
 2637 //=============================================================================
 2638 #ifndef PRODUCT
 2639 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2640 {
 2641   st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2642   st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2643   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 2644 }
 2645 #endif
 2646 
 2647 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2648 {
 2649   __ ic_check(InteriorEntryAlignment);
 2650 }
 2651 






 2652 
 2653 //=============================================================================
 2654 
 2655 bool Matcher::supports_vector_calling_convention(void) {
 2656   return EnableVectorSupport;
 2657 }
 2658 
 2659 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
 2660   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
 2661 }
 2662 
 2663 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
 2664   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
 2665 }
 2666 
 2667 #ifdef ASSERT
 2668 static bool is_ndd_demotable(const MachNode* mdef) {
 2669   return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
 2670 }
 2671 #endif

 4605     }
 4606     __ post_call_nop();
 4607   %}
 4608 
 4609   enc_class Java_Dynamic_Call(method meth) %{
 4610     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 4611     __ post_call_nop();
 4612   %}
 4613 
 4614   enc_class call_epilog %{
 4615     if (VerifyStackAtCalls) {
 4616       // Check that stack depth is unchanged: find majik cookie on stack
 4617       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
 4618       Label L;
 4619       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
 4620       __ jccb(Assembler::equal, L);
 4621       // Die if stack mismatch
 4622       __ int3();
 4623       __ bind(L);
 4624     }
 4625     if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) {
 4626       // The last return value is not set by the callee but used to pass the null marker to compiled code.
 4627       // Search for the corresponding projection, get the register and emit code that initialized it.
 4628       uint con = (tf()->range_cc()->cnt() - 1);
 4629       for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
 4630         ProjNode* proj = fast_out(i)->as_Proj();
 4631         if (proj->_con == con) {
 4632           // Set null marker if rax is non-null (a non-null value is returned buffered or scalarized)
 4633           OptoReg::Name optoReg = ra_->get_reg_first(proj);
 4634           VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
 4635           Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
 4636           __ testq(rax, rax);
 4637           __ setb(Assembler::notZero, toReg);
 4638           __ movzbl(toReg, toReg);
 4639           if (reg->is_stack()) {
 4640             int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
 4641             __ movq(Address(rsp, st_off), toReg);
 4642           }
 4643           break;
 4644         }
 4645       }
 4646       if (return_value_is_used()) {
 4647         // An inline type is returned as fields in multiple registers.
 4648         // Rax either contains an oop if the inline type is buffered or a pointer
 4649         // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax
 4650         // if the lowest bit is set to allow C2 to use the oop after null checking.
 4651         // rax &= (rax & 1) - 1
 4652         __ movptr(rscratch1, rax);
 4653         __ andptr(rscratch1, 0x1);
 4654         __ subptr(rscratch1, 0x1);
 4655         __ andptr(rax, rscratch1);
 4656       }
 4657     }
 4658   %}
 4659 
 4660 %}
 4661 
 4662 //----------FRAME--------------------------------------------------------------
 4663 // Definition of frame structure and management information.
 4664 //
 4665 //  S T A C K   L A Y O U T    Allocators stack-slot number
 4666 //                             |   (to get allocators register number
 4667 //  G  Owned by    |        |  v    add OptoReg::stack0())
 4668 //  r   CALLER     |        |
 4669 //  o     |        +--------+      pad to even-align allocators stack-slot
 4670 //  w     V        |  pad0  |        numbers; owned by CALLER
 4671 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 4672 //  h     ^        |   in   |  5
 4673 //        |        |  args  |  4   Holes in incoming args owned by SELF
 4674 //  |     |        |        |  3
 4675 //  |     |        +--------+
 4676 //  V     |        | old out|      Empty on Intel, window on Sparc
 4677 //        |    old |preserve|      Must be even aligned.

 5816   %}
 5817 %}
 5818 
 5819 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5820 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 5821 %{
 5822   constraint(ALLOC_IN_RC(ptr_reg));
 5823   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5824   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 5825 
 5826   op_cost(10);
 5827   format %{"[$reg + $off + $idx << $scale]" %}
 5828   interface(MEMORY_INTER) %{
 5829     base($reg);
 5830     index($idx);
 5831     scale($scale);
 5832     disp($off);
 5833   %}
 5834 %}
 5835 
 5836 // Indirect Narrow Oop Operand
 5837 operand indCompressedOop(rRegN reg) %{
 5838   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5839   constraint(ALLOC_IN_RC(ptr_reg));
 5840   match(DecodeN reg);
 5841 
 5842   op_cost(10);
 5843   format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
 5844   interface(MEMORY_INTER) %{
 5845     base(0xc); // R12
 5846     index($reg);
 5847     scale(0x3);
 5848     disp(0x0);
 5849   %}
 5850 %}
 5851 
 5852 // Indirect Narrow Oop Plus Offset Operand
 5853 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 5854 // we can't free r12 even with CompressedOops::base() == nullptr.
 5855 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 5856   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5857   constraint(ALLOC_IN_RC(ptr_reg));
 5858   match(AddP (DecodeN reg) off);
 5859 
 5860   op_cost(10);
 5861   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 5862   interface(MEMORY_INTER) %{
 5863     base(0xc); // R12
 5864     index($reg);
 5865     scale(0x3);
 5866     disp($off);
 5867   %}
 5868 %}
 5869 
 5870 // Indirect Memory Operand
 5871 operand indirectNarrow(rRegN reg)

 6341 %}
 6342 
 6343 // Replaces legVec during post-selection cleanup. See above.
 6344 operand legVecZ() %{
 6345   constraint(ALLOC_IN_RC(vectorz_reg_legacy));
 6346   match(VecZ);
 6347 
 6348   format %{ %}
 6349   interface(REG_INTER);
 6350 %}
 6351 
 6352 //----------OPERAND CLASSES----------------------------------------------------
 6353 // Operand Classes are groups of operands that are used as to simplify
 6354 // instruction definitions by not requiring the AD writer to specify separate
 6355 // instructions for every form of operand when the instruction accepts
 6356 // multiple operand types with the same basic encoding and format.  The classic
 6357 // case of this is memory operands.
 6358 
 6359 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 6360                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 6361                indCompressedOop, indCompressedOopOffset,
 6362                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 6363                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 6364                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 6365 
 6366 //----------PIPELINE-----------------------------------------------------------
 6367 // Rules which define the behavior of the target architectures pipeline.
 6368 pipeline %{
 6369 
 6370 //----------ATTRIBUTES---------------------------------------------------------
 6371 attributes %{
 6372   variable_size_instructions;        // Fixed size instructions
 6373   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 6374   instruction_unit_size = 1;         // An instruction is 1 bytes long
 6375   instruction_fetch_unit_size = 16;  // The processor fetches one line
 6376   instruction_fetch_units = 1;       // of 16 bytes
 6377 %}
 6378 
 6379 //----------RESOURCES----------------------------------------------------------
 6380 // Resources are the functional units available to the machine
 6381 

 8976   format %{ "MEMBAR-storestore (empty encoding)" %}
 8977   ins_encode( );
 8978   ins_pipe(empty);
 8979 %}
 8980 
 8981 //----------Move Instructions--------------------------------------------------
 8982 
 8983 instruct castX2P(rRegP dst, rRegL src)
 8984 %{
 8985   match(Set dst (CastX2P src));
 8986 
 8987   format %{ "movq    $dst, $src\t# long->ptr" %}
 8988   ins_encode %{
 8989     if ($dst$$reg != $src$$reg) {
 8990       __ movptr($dst$$Register, $src$$Register);
 8991     }
 8992   %}
 8993   ins_pipe(ialu_reg_reg); // XXX
 8994 %}
 8995 
 8996 instruct castI2N(rRegN dst, rRegI src)
 8997 %{
 8998   match(Set dst (CastI2N src));
 8999 
 9000   format %{ "movq    $dst, $src\t# int -> narrow ptr" %}
 9001   ins_encode %{
 9002     if ($dst$$reg != $src$$reg) {
 9003       __ movl($dst$$Register, $src$$Register);
 9004     }
 9005   %}
 9006   ins_pipe(ialu_reg_reg); // XXX
 9007 %}
 9008 
 9009 instruct castN2X(rRegL dst, rRegN src)
 9010 %{
 9011   match(Set dst (CastP2X src));
 9012 
 9013   format %{ "movq    $dst, $src\t# ptr -> long" %}
 9014   ins_encode %{
 9015     if ($dst$$reg != $src$$reg) {
 9016       __ movptr($dst$$Register, $src$$Register);
 9017     }
 9018   %}
 9019   ins_pipe(ialu_reg_reg); // XXX
 9020 %}
 9021 
 9022 instruct castP2X(rRegL dst, rRegP src)
 9023 %{
 9024   match(Set dst (CastP2X src));
 9025 
 9026   format %{ "movq    $dst, $src\t# ptr -> long" %}
 9027   ins_encode %{
 9028     if ($dst$$reg != $src$$reg) {
 9029       __ movptr($dst$$Register, $src$$Register);
 9030     }
 9031   %}
 9032   ins_pipe(ialu_reg_reg); // XXX
 9033 %}
 9034 
 9035 // Convert oop into int for vectors alignment masking
 9036 instruct convP2I(rRegI dst, rRegP src)
 9037 %{
 9038   match(Set dst (ConvL2I (CastP2X src)));
 9039 
 9040   format %{ "movl    $dst, $src\t# ptr -> int" %}
 9041   ins_encode %{

15293   effect(DEF dst, USE src);
15294   ins_cost(100);
15295   format %{ "movd    $dst,$src\t# MoveI2F" %}
15296   ins_encode %{
15297     __ movdl($dst$$XMMRegister, $src$$Register);
15298   %}
15299   ins_pipe( pipe_slow );
15300 %}
15301 
15302 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15303   match(Set dst (MoveL2D src));
15304   effect(DEF dst, USE src);
15305   ins_cost(100);
15306   format %{ "movd    $dst,$src\t# MoveL2D" %}
15307   ins_encode %{
15308      __ movdq($dst$$XMMRegister, $src$$Register);
15309   %}
15310   ins_pipe( pipe_slow );
15311 %}
15312 
15313 
15314 // Fast clearing of an array
15315 // Small non-constant lenght ClearArray for non-AVX512 targets.
15316 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15317                   Universe dummy, rFlagsReg cr)
15318 %{
15319   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15320   match(Set dummy (ClearArray (Binary cnt base) val));
15321   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15322 
15323   format %{ $$template
15324     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15325     $$emit$$"jg      LARGE\n\t"
15326     $$emit$$"dec     rcx\n\t"
15327     $$emit$$"js      DONE\t# Zero length\n\t"
15328     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15329     $$emit$$"dec     rcx\n\t"
15330     $$emit$$"jge     LOOP\n\t"
15331     $$emit$$"jmp     DONE\n\t"
15332     $$emit$$"# LARGE:\n\t"
15333     if (UseFastStosb) {
15334        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15335        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15336     } else if (UseXMMForObjInit) {
15337        $$emit$$"movdq   $tmp, $val\n\t"
15338        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15339        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15340        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15341        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15342        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15343        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15344        $$emit$$"add     0x40,rax\n\t"
15345        $$emit$$"# L_zero_64_bytes:\n\t"
15346        $$emit$$"sub     0x8,rcx\n\t"
15347        $$emit$$"jge     L_loop\n\t"
15348        $$emit$$"add     0x4,rcx\n\t"
15349        $$emit$$"jl      L_tail\n\t"
15350        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15351        $$emit$$"add     0x20,rax\n\t"
15352        $$emit$$"sub     0x4,rcx\n\t"
15353        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15354        $$emit$$"add     0x4,rcx\n\t"
15355        $$emit$$"jle     L_end\n\t"
15356        $$emit$$"dec     rcx\n\t"
15357        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15358        $$emit$$"vmovq   xmm0,(rax)\n\t"
15359        $$emit$$"add     0x8,rax\n\t"
15360        $$emit$$"dec     rcx\n\t"
15361        $$emit$$"jge     L_sloop\n\t"
15362        $$emit$$"# L_end:\n\t"
15363     } else {
15364        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15365     }
15366     $$emit$$"# DONE"
15367   %}
15368   ins_encode %{
15369     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15370                  $tmp$$XMMRegister, false, false);
15371   %}
15372   ins_pipe(pipe_slow);
15373 %}
15374 
15375 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15376                             Universe dummy, rFlagsReg cr)
15377 %{
15378   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15379   match(Set dummy (ClearArray (Binary cnt base) val));
15380   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15381 
15382   format %{ $$template
15383     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15384     $$emit$$"jg      LARGE\n\t"
15385     $$emit$$"dec     rcx\n\t"
15386     $$emit$$"js      DONE\t# Zero length\n\t"
15387     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15388     $$emit$$"dec     rcx\n\t"
15389     $$emit$$"jge     LOOP\n\t"
15390     $$emit$$"jmp     DONE\n\t"
15391     $$emit$$"# LARGE:\n\t"
15392     if (UseXMMForObjInit) {
15393        $$emit$$"movdq   $tmp, $val\n\t"
15394        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15395        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15396        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15397        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15398        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15399        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15400        $$emit$$"add     0x40,rax\n\t"
15401        $$emit$$"# L_zero_64_bytes:\n\t"
15402        $$emit$$"sub     0x8,rcx\n\t"
15403        $$emit$$"jge     L_loop\n\t"
15404        $$emit$$"add     0x4,rcx\n\t"
15405        $$emit$$"jl      L_tail\n\t"
15406        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15407        $$emit$$"add     0x20,rax\n\t"
15408        $$emit$$"sub     0x4,rcx\n\t"
15409        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15410        $$emit$$"add     0x4,rcx\n\t"
15411        $$emit$$"jle     L_end\n\t"
15412        $$emit$$"dec     rcx\n\t"
15413        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15414        $$emit$$"vmovq   xmm0,(rax)\n\t"
15415        $$emit$$"add     0x8,rax\n\t"
15416        $$emit$$"dec     rcx\n\t"
15417        $$emit$$"jge     L_sloop\n\t"
15418        $$emit$$"# L_end:\n\t"
15419     } else {
15420        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15421     }
15422     $$emit$$"# DONE"
15423   %}
15424   ins_encode %{
15425     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15426                  $tmp$$XMMRegister, false, true);
15427   %}
15428   ins_pipe(pipe_slow);
15429 %}
15430 
15431 // Small non-constant length ClearArray for AVX512 targets.
15432 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15433                        Universe dummy, rFlagsReg cr)
15434 %{
15435   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15436   match(Set dummy (ClearArray (Binary cnt base) val));
15437   ins_cost(125);
15438   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15439 
15440   format %{ $$template
15441     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15442     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15443     $$emit$$"jg      LARGE\n\t"
15444     $$emit$$"dec     rcx\n\t"
15445     $$emit$$"js      DONE\t# Zero length\n\t"
15446     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15447     $$emit$$"dec     rcx\n\t"
15448     $$emit$$"jge     LOOP\n\t"
15449     $$emit$$"jmp     DONE\n\t"
15450     $$emit$$"# LARGE:\n\t"
15451     if (UseFastStosb) {
15452        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15453        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15454     } else if (UseXMMForObjInit) {
15455        $$emit$$"mov     rdi,rax\n\t"
15456        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15457        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15458        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

15466        $$emit$$"jl      L_tail\n\t"
15467        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15468        $$emit$$"add     0x20,rax\n\t"
15469        $$emit$$"sub     0x4,rcx\n\t"
15470        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15471        $$emit$$"add     0x4,rcx\n\t"
15472        $$emit$$"jle     L_end\n\t"
15473        $$emit$$"dec     rcx\n\t"
15474        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15475        $$emit$$"vmovq   xmm0,(rax)\n\t"
15476        $$emit$$"add     0x8,rax\n\t"
15477        $$emit$$"dec     rcx\n\t"
15478        $$emit$$"jge     L_sloop\n\t"
15479        $$emit$$"# L_end:\n\t"
15480     } else {
15481        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15482     }
15483     $$emit$$"# DONE"
15484   %}
15485   ins_encode %{
15486     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15487                  $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
15488   %}
15489   ins_pipe(pipe_slow);
15490 %}
15491 
15492 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15493                                  Universe dummy, rFlagsReg cr)

15494 %{
15495   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15496   match(Set dummy (ClearArray (Binary cnt base) val));
15497   ins_cost(125);
15498   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15499 
15500   format %{ $$template
15501     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15502     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15503     $$emit$$"jg      LARGE\n\t"
15504     $$emit$$"dec     rcx\n\t"
15505     $$emit$$"js      DONE\t# Zero length\n\t"
15506     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15507     $$emit$$"dec     rcx\n\t"
15508     $$emit$$"jge     LOOP\n\t"
15509     $$emit$$"jmp     DONE\n\t"
15510     $$emit$$"# LARGE:\n\t"
15511     if (UseFastStosb) {
15512        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15513        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15514     } else if (UseXMMForObjInit) {
15515        $$emit$$"mov     rdi,rax\n\t"
15516        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15517        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15518        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

15526        $$emit$$"jl      L_tail\n\t"
15527        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15528        $$emit$$"add     0x20,rax\n\t"
15529        $$emit$$"sub     0x4,rcx\n\t"
15530        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15531        $$emit$$"add     0x4,rcx\n\t"
15532        $$emit$$"jle     L_end\n\t"
15533        $$emit$$"dec     rcx\n\t"
15534        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15535        $$emit$$"vmovq   xmm0,(rax)\n\t"
15536        $$emit$$"add     0x8,rax\n\t"
15537        $$emit$$"dec     rcx\n\t"
15538        $$emit$$"jge     L_sloop\n\t"
15539        $$emit$$"# L_end:\n\t"
15540     } else {
15541        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15542     }
15543     $$emit$$"# DONE"
15544   %}
15545   ins_encode %{
15546     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15547                  $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
15548   %}
15549   ins_pipe(pipe_slow);
15550 %}
15551 
15552 // Large non-constant length ClearArray for non-AVX512 targets.
15553 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15554                         Universe dummy, rFlagsReg cr)
15555 %{
15556   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15557   match(Set dummy (ClearArray (Binary cnt base) val));
15558   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15559 
15560   format %{ $$template
15561     if (UseFastStosb) {
15562        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15563        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15564     } else if (UseXMMForObjInit) {
15565        $$emit$$"movdq   $tmp, $val\n\t"
15566        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15567        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15568        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15569        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15570        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15571        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15572        $$emit$$"add     0x40,rax\n\t"
15573        $$emit$$"# L_zero_64_bytes:\n\t"
15574        $$emit$$"sub     0x8,rcx\n\t"
15575        $$emit$$"jge     L_loop\n\t"
15576        $$emit$$"add     0x4,rcx\n\t"
15577        $$emit$$"jl      L_tail\n\t"
15578        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15579        $$emit$$"add     0x20,rax\n\t"
15580        $$emit$$"sub     0x4,rcx\n\t"
15581        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15582        $$emit$$"add     0x4,rcx\n\t"
15583        $$emit$$"jle     L_end\n\t"
15584        $$emit$$"dec     rcx\n\t"
15585        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15586        $$emit$$"vmovq   xmm0,(rax)\n\t"
15587        $$emit$$"add     0x8,rax\n\t"
15588        $$emit$$"dec     rcx\n\t"
15589        $$emit$$"jge     L_sloop\n\t"
15590        $$emit$$"# L_end:\n\t"
15591     } else {
15592        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15593     }
15594   %}
15595   ins_encode %{
15596     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15597                  $tmp$$XMMRegister, true, false);
15598   %}
15599   ins_pipe(pipe_slow);
15600 %}
15601 
15602 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15603                                   Universe dummy, rFlagsReg cr)
15604 %{
15605   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15606   match(Set dummy (ClearArray (Binary cnt base) val));
15607   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15608 
15609   format %{ $$template
15610     if (UseXMMForObjInit) {
15611        $$emit$$"movdq   $tmp, $val\n\t"
15612        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15613        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15614        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15615        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15616        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15617        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15618        $$emit$$"add     0x40,rax\n\t"
15619        $$emit$$"# L_zero_64_bytes:\n\t"
15620        $$emit$$"sub     0x8,rcx\n\t"
15621        $$emit$$"jge     L_loop\n\t"
15622        $$emit$$"add     0x4,rcx\n\t"
15623        $$emit$$"jl      L_tail\n\t"
15624        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15625        $$emit$$"add     0x20,rax\n\t"
15626        $$emit$$"sub     0x4,rcx\n\t"
15627        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15628        $$emit$$"add     0x4,rcx\n\t"
15629        $$emit$$"jle     L_end\n\t"
15630        $$emit$$"dec     rcx\n\t"
15631        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15632        $$emit$$"vmovq   xmm0,(rax)\n\t"
15633        $$emit$$"add     0x8,rax\n\t"
15634        $$emit$$"dec     rcx\n\t"
15635        $$emit$$"jge     L_sloop\n\t"
15636        $$emit$$"# L_end:\n\t"
15637     } else {
15638        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15639     }
15640   %}
15641   ins_encode %{
15642     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15643                  $tmp$$XMMRegister, true, true);
15644   %}
15645   ins_pipe(pipe_slow);
15646 %}
15647 
15648 // Large non-constant length ClearArray for AVX512 targets.
15649 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15650                              Universe dummy, rFlagsReg cr)
15651 %{
15652   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15653   match(Set dummy (ClearArray (Binary cnt base) val));
15654   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15655 
15656   format %{ $$template
15657     if (UseFastStosb) {
15658        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15659        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15660        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15661     } else if (UseXMMForObjInit) {
15662        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15663        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15664        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15665        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15666        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15667        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15668        $$emit$$"add     0x40,rax\n\t"
15669        $$emit$$"# L_zero_64_bytes:\n\t"
15670        $$emit$$"sub     0x8,rcx\n\t"
15671        $$emit$$"jge     L_loop\n\t"
15672        $$emit$$"add     0x4,rcx\n\t"
15673        $$emit$$"jl      L_tail\n\t"
15674        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15675        $$emit$$"add     0x20,rax\n\t"
15676        $$emit$$"sub     0x4,rcx\n\t"
15677        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15678        $$emit$$"add     0x4,rcx\n\t"
15679        $$emit$$"jle     L_end\n\t"
15680        $$emit$$"dec     rcx\n\t"
15681        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15682        $$emit$$"vmovq   xmm0,(rax)\n\t"
15683        $$emit$$"add     0x8,rax\n\t"
15684        $$emit$$"dec     rcx\n\t"
15685        $$emit$$"jge     L_sloop\n\t"
15686        $$emit$$"# L_end:\n\t"
15687     } else {
15688        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15689        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15690     }
15691   %}
15692   ins_encode %{
15693     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15694                  $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
15695   %}
15696   ins_pipe(pipe_slow);
15697 %}
15698 
15699 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15700                                        Universe dummy, rFlagsReg cr)

15701 %{
15702   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15703   match(Set dummy (ClearArray (Binary cnt base) val));
15704   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15705 
15706   format %{ $$template
15707     if (UseFastStosb) {
15708        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15709        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15710        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15711     } else if (UseXMMForObjInit) {
15712        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15713        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15714        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15715        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15716        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15717        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15718        $$emit$$"add     0x40,rax\n\t"
15719        $$emit$$"# L_zero_64_bytes:\n\t"
15720        $$emit$$"sub     0x8,rcx\n\t"
15721        $$emit$$"jge     L_loop\n\t"
15722        $$emit$$"add     0x4,rcx\n\t"
15723        $$emit$$"jl      L_tail\n\t"
15724        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15725        $$emit$$"add     0x20,rax\n\t"
15726        $$emit$$"sub     0x4,rcx\n\t"
15727        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15728        $$emit$$"add     0x4,rcx\n\t"
15729        $$emit$$"jle     L_end\n\t"
15730        $$emit$$"dec     rcx\n\t"
15731        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15732        $$emit$$"vmovq   xmm0,(rax)\n\t"
15733        $$emit$$"add     0x8,rax\n\t"
15734        $$emit$$"dec     rcx\n\t"
15735        $$emit$$"jge     L_sloop\n\t"
15736        $$emit$$"# L_end:\n\t"
15737     } else {
15738        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15739        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15740     }
15741   %}
15742   ins_encode %{
15743     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15744                  $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
15745   %}
15746   ins_pipe(pipe_slow);
15747 %}
15748 
15749 // Small constant length ClearArray for AVX512 targets.
15750 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
15751 %{
15752   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
15753             ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
15754   match(Set dummy (ClearArray (Binary cnt base) val));
15755   ins_cost(100);
15756   effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
15757   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
15758   ins_encode %{
15759     __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15760   %}
15761   ins_pipe(pipe_slow);
15762 %}
15763 
15764 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15765                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15766 %{
15767   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15768   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15769   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15770 
15771   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15772   ins_encode %{
15773     __ string_compare($str1$$Register, $str2$$Register,
15774                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15775                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15776   %}
15777   ins_pipe( pipe_slow );
15778 %}
15779 

17617   effect(USE meth);
17618 
17619   ins_cost(300);
17620   format %{ "call_leaf,runtime " %}
17621   ins_encode(clear_avx, Java_To_Runtime(meth));
17622   ins_pipe(pipe_slow);
17623 %}
17624 
17625 // Call runtime without safepoint and with vector arguments
17626 instruct CallLeafDirectVector(method meth)
17627 %{
17628   match(CallLeafVector);
17629   effect(USE meth);
17630 
17631   ins_cost(300);
17632   format %{ "call_leaf,vector " %}
17633   ins_encode(Java_To_Runtime(meth));
17634   ins_pipe(pipe_slow);
17635 %}
17636 
17637 // Call runtime without safepoint
17638 // entry point is null, target holds the address to call
17639 instruct CallLeafNoFPInDirect(rRegP target)
17640 %{
17641   predicate(n->as_Call()->entry_point() == nullptr);
17642   match(CallLeafNoFP target);
17643 
17644   ins_cost(300);
17645   format %{ "call_leaf_nofp,runtime indirect " %}
17646   ins_encode %{
17647      __ call($target$$Register);
17648   %}
17649 
17650   ins_pipe(pipe_slow);
17651 %}
17652 
17653 // Call runtime without safepoint
17654 instruct CallLeafNoFPDirect(method meth)
17655 %{
17656   predicate(n->as_Call()->entry_point() != nullptr);
17657   match(CallLeafNoFP);
17658   effect(USE meth);
17659 
17660   ins_cost(300);
17661   format %{ "call_leaf_nofp,runtime " %}
17662   ins_encode(clear_avx, Java_To_Runtime(meth));
17663   ins_pipe(pipe_slow);
17664 %}
17665 
17666 // Return Instruction
17667 // Remove the return address & jump to it.
17668 // Notice: We always emit a nop after a ret to make sure there is room
17669 // for safepoint patching
17670 instruct Ret()
17671 %{
17672   match(Return);
17673 
17674   format %{ "ret" %}
17675   ins_encode %{
17676     __ ret(0);
< prev index next >