< prev index next >

src/hotspot/cpu/x86/x86.ad

Print this page

 1632 }
 1633 
 1634 // !!!!! Special hack to get all types of calls to specify the byte offset
 1635 //       from the start of the call to the point where the return address
 1636 //       will point.
 1637 int MachCallStaticJavaNode::ret_addr_offset()
 1638 {
 1639   int offset = 5; // 5 bytes from start of call to where return address points
 1640   offset += clear_avx_size();
 1641   return offset;
 1642 }
 1643 
 1644 int MachCallDynamicJavaNode::ret_addr_offset()
 1645 {
 1646   int offset = 15; // 15 bytes from start of call to where return address points
 1647   offset += clear_avx_size();
 1648   return offset;
 1649 }
 1650 
 1651 int MachCallRuntimeNode::ret_addr_offset() {




 1652   int offset = 13; // movq r10,#addr; callq (r10)
 1653   if (this->ideal_Opcode() != Op_CallLeafVector) {
 1654     offset += clear_avx_size();
 1655   }
 1656   return offset;
 1657 }

 1658 //
 1659 // Compute padding required for nodes which need alignment
 1660 //
 1661 
 1662 // The address of the call instruction needs to be 4-byte aligned to
 1663 // ensure that it does not span a cache line so that it can be patched.
 1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 1665 {
 1666   current_offset += clear_avx_size(); // skip vzeroupper
 1667   current_offset += 1; // skip call opcode byte
 1668   return align_up(current_offset, alignment_required()) - current_offset;
 1669 }
 1670 
 1671 // The address of the call instruction needs to be 4-byte aligned to
 1672 // ensure that it does not span a cache line so that it can be patched.
 1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 1674 {
 1675   current_offset += clear_avx_size(); // skip vzeroupper
 1676   current_offset += 11; // skip movq instruction + call opcode byte
 1677   return align_up(current_offset, alignment_required()) - current_offset;

 1863     st->print("\n\t");
 1864     st->print("# stack alignment check");
 1865 #endif
 1866   }
 1867   if (C->stub_function() != nullptr) {
 1868     st->print("\n\t");
 1869     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
 1870     st->print("\n\t");
 1871     st->print("je      fast_entry\t");
 1872     st->print("\n\t");
 1873     st->print("call    #nmethod_entry_barrier_stub\t");
 1874     st->print("\n\tfast_entry:");
 1875   }
 1876   st->cr();
 1877 }
 1878 #endif
 1879 
 1880 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1881   Compile* C = ra_->C;
 1882 
 1883   int framesize = C->output()->frame_size_in_bytes();
 1884   int bangsize = C->output()->bang_size_in_bytes();
 1885 
 1886   if (C->clinit_barrier_on_entry()) {
 1887     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
 1888     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
 1889 
 1890     Label L_skip_barrier;
 1891     Register klass = rscratch1;
 1892 
 1893     __ mov_metadata(klass, C->method()->holder()->constant_encoding());
 1894     __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
 1895 
 1896     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
 1897 
 1898     __ bind(L_skip_barrier);
 1899   }
 1900 
 1901   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);


 1902 
 1903   C->output()->set_frame_complete(__ offset());
 1904 
 1905   if (C->has_mach_constant_base_node()) {
 1906     // NOTE: We set the table base offset here because users might be
 1907     // emitted before MachConstantBaseNode.
 1908     ConstantTable& constant_table = C->output()->constant_table();
 1909     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1910   }
 1911 }
 1912 
 1913 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 1914 {
 1915   return MachNode::size(ra_); // too many variables; just compute it
 1916                               // the hard way
 1917 }
 1918 
 1919 int MachPrologNode::reloc() const
 1920 {
 1921   return 0; // a large enough number
 1922 }
 1923 
 1924 //=============================================================================
 1925 #ifndef PRODUCT
 1926 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1927 {
 1928   Compile* C = ra_->C;
 1929   if (generate_vzeroupper(C)) {
 1930     st->print("vzeroupper");
 1931     st->cr(); st->print("\t");
 1932   }
 1933 
 1934   int framesize = C->output()->frame_size_in_bytes();
 1935   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1936   // Remove word for return adr already pushed
 1937   // and RBP

 1945   st->print_cr("popq    rbp");
 1946   if (do_polling() && C->is_method_compilation()) {
 1947     st->print("\t");
 1948     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
 1949                  "ja      #safepoint_stub\t"
 1950                  "# Safepoint: poll for GC");
 1951   }
 1952 }
 1953 #endif
 1954 
 1955 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1956 {
 1957   Compile* C = ra_->C;
 1958 
 1959   if (generate_vzeroupper(C)) {
 1960     // Clear upper bits of YMM registers when current compiled code uses
 1961     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1962     __ vzeroupper();
 1963   }
 1964 
 1965   int framesize = C->output()->frame_size_in_bytes();
 1966   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1967   // Remove word for return adr already pushed
 1968   // and RBP
 1969   framesize -= 2*wordSize;
 1970 
 1971   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 1972 
 1973   if (framesize) {
 1974     __ addq(rsp, framesize);
 1975   }
 1976 
 1977   __ popq(rbp);
 1978 
 1979   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1980     __ reserved_stack_check();
 1981   }
 1982 
 1983   if (do_polling() && C->is_method_compilation()) {
 1984     Label dummy_label;
 1985     Label* code_stub = &dummy_label;
 1986     if (!C->output()->in_scratch_emit_size()) {
 1987       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1988       C->output()->add_stub(stub);
 1989       code_stub = &stub->entry();
 1990     }
 1991     __ relocate(relocInfo::poll_return_type);
 1992     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
 1993   }
 1994 }
 1995 
 1996 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
 1997 {
 1998   return MachNode::size(ra_); // too many variables; just compute it
 1999                               // the hard way
 2000 }
 2001 
 2002 int MachEpilogNode::reloc() const
 2003 {
 2004   return 2; // a large enough number
 2005 }
 2006 
 2007 const Pipeline* MachEpilogNode::pipeline() const
 2008 {
 2009   return MachNode::pipeline_class();
 2010 }
 2011 
 2012 //=============================================================================
 2013 
 2014 enum RC {
 2015   rc_bad,
 2016   rc_int,
 2017   rc_kreg,
 2018   rc_float,
 2019   rc_stack
 2020 };
 2021 

 2583 #endif
 2584 
 2585 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2586 {
 2587   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2588   int reg = ra_->get_encode(this);
 2589 
 2590   __ lea(as_Register(reg), Address(rsp, offset));
 2591 }
 2592 
 2593 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 2594 {
 2595   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2596   if (ra_->get_encode(this) > 15) {
 2597     return (offset < 0x80) ? 6 : 9; // REX2
 2598   } else {
 2599     return (offset < 0x80) ? 5 : 8; // REX
 2600   }
 2601 }
 2602 











































 2603 //=============================================================================
 2604 #ifndef PRODUCT
 2605 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2606 {
 2607   if (UseCompressedClassPointers) {
 2608     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2609     st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2610   } else {
 2611     st->print_cr("movq    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2612     st->print_cr("\tcmpq    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2613   }
 2614   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 2615 }
 2616 #endif
 2617 
 2618 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2619 {
 2620   __ ic_check(InteriorEntryAlignment);
 2621 }
 2622 
 2623 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 2624 {
 2625   return MachNode::size(ra_); // too many variables; just compute it
 2626                               // the hard way
 2627 }
 2628 
 2629 
 2630 //=============================================================================
 2631 
 2632 bool Matcher::supports_vector_calling_convention(void) {
 2633   return EnableVectorSupport;
 2634 }
 2635 
 2636 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
 2637   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
 2638 }
 2639 
 2640 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
 2641   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
 2642 }
 2643 
 2644 #ifdef ASSERT
 2645 static bool is_ndd_demotable(const MachNode* mdef) {
 2646   return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
 2647 }
 2648 #endif

 4584     }
 4585     __ post_call_nop();
 4586   %}
 4587 
 4588   enc_class Java_Dynamic_Call(method meth) %{
 4589     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 4590     __ post_call_nop();
 4591   %}
 4592 
 4593   enc_class call_epilog %{
 4594     if (VerifyStackAtCalls) {
 4595       // Check that stack depth is unchanged: find majik cookie on stack
 4596       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
 4597       Label L;
 4598       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
 4599       __ jccb(Assembler::equal, L);
 4600       // Die if stack mismatch
 4601       __ int3();
 4602       __ bind(L);
 4603     }

































 4604   %}
 4605 
 4606 %}
 4607 
 4608 //----------FRAME--------------------------------------------------------------
 4609 // Definition of frame structure and management information.
 4610 //
 4611 //  S T A C K   L A Y O U T    Allocators stack-slot number
 4612 //                             |   (to get allocators register number
 4613 //  G  Owned by    |        |  v    add OptoReg::stack0())
 4614 //  r   CALLER     |        |
 4615 //  o     |        +--------+      pad to even-align allocators stack-slot
 4616 //  w     V        |  pad0  |        numbers; owned by CALLER
 4617 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 4618 //  h     ^        |   in   |  5
 4619 //        |        |  args  |  4   Holes in incoming args owned by SELF
 4620 //  |     |        |        |  3
 4621 //  |     |        +--------+
 4622 //  V     |        | old out|      Empty on Intel, window on Sparc
 4623 //        |    old |preserve|      Must be even aligned.

 5746   %}
 5747 %}
 5748 
 5749 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5750 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 5751 %{
 5752   constraint(ALLOC_IN_RC(ptr_reg));
 5753   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5754   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 5755 
 5756   op_cost(10);
 5757   format %{"[$reg + $off + $idx << $scale]" %}
 5758   interface(MEMORY_INTER) %{
 5759     base($reg);
 5760     index($idx);
 5761     scale($scale);
 5762     disp($off);
 5763   %}
 5764 %}
 5765 
















 5766 // Indirect Narrow Oop Plus Offset Operand
 5767 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 5768 // we can't free r12 even with CompressedOops::base() == nullptr.
 5769 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 5770   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5771   constraint(ALLOC_IN_RC(ptr_reg));
 5772   match(AddP (DecodeN reg) off);
 5773 
 5774   op_cost(10);
 5775   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 5776   interface(MEMORY_INTER) %{
 5777     base(0xc); // R12
 5778     index($reg);
 5779     scale(0x3);
 5780     disp($off);
 5781   %}
 5782 %}
 5783 
 5784 // Indirect Memory Operand
 5785 operand indirectNarrow(rRegN reg)

 6222 %}
 6223 
 6224 // Replaces legVec during post-selection cleanup. See above.
 6225 operand legVecZ() %{
 6226   constraint(ALLOC_IN_RC(vectorz_reg_legacy));
 6227   match(VecZ);
 6228 
 6229   format %{ %}
 6230   interface(REG_INTER);
 6231 %}
 6232 
 6233 //----------OPERAND CLASSES----------------------------------------------------
 6234 // Operand Classes are groups of operands that are used as to simplify
 6235 // instruction definitions by not requiring the AD writer to specify separate
 6236 // instructions for every form of operand when the instruction accepts
 6237 // multiple operand types with the same basic encoding and format.  The classic
 6238 // case of this is memory operands.
 6239 
 6240 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 6241                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 6242                indCompressedOopOffset,
 6243                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 6244                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 6245                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 6246 
 6247 //----------PIPELINE-----------------------------------------------------------
 6248 // Rules which define the behavior of the target architectures pipeline.
 6249 pipeline %{
 6250 
 6251 //----------ATTRIBUTES---------------------------------------------------------
 6252 attributes %{
 6253   variable_size_instructions;        // Fixed size instructions
 6254   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 6255   instruction_unit_size = 1;         // An instruction is 1 bytes long
 6256   instruction_fetch_unit_size = 16;  // The processor fetches one line
 6257   instruction_fetch_units = 1;       // of 16 bytes
 6258 %}
 6259 
 6260 //----------RESOURCES----------------------------------------------------------
 6261 // Resources are the functional units available to the machine
 6262 

 8820   format %{ "MEMBAR-storestore (empty encoding)" %}
 8821   ins_encode( );
 8822   ins_pipe(empty);
 8823 %}
 8824 
 8825 //----------Move Instructions--------------------------------------------------
 8826 
 8827 instruct castX2P(rRegP dst, rRegL src)
 8828 %{
 8829   match(Set dst (CastX2P src));
 8830 
 8831   format %{ "movq    $dst, $src\t# long->ptr" %}
 8832   ins_encode %{
 8833     if ($dst$$reg != $src$$reg) {
 8834       __ movptr($dst$$Register, $src$$Register);
 8835     }
 8836   %}
 8837   ins_pipe(ialu_reg_reg); // XXX
 8838 %}
 8839 


























 8840 instruct castP2X(rRegL dst, rRegP src)
 8841 %{
 8842   match(Set dst (CastP2X src));
 8843 
 8844   format %{ "movq    $dst, $src\t# ptr -> long" %}
 8845   ins_encode %{
 8846     if ($dst$$reg != $src$$reg) {
 8847       __ movptr($dst$$Register, $src$$Register);
 8848     }
 8849   %}
 8850   ins_pipe(ialu_reg_reg); // XXX
 8851 %}
 8852 
 8853 // Convert oop into int for vectors alignment masking
 8854 instruct convP2I(rRegI dst, rRegP src)
 8855 %{
 8856   match(Set dst (ConvL2I (CastP2X src)));
 8857 
 8858   format %{ "movl    $dst, $src\t# ptr -> int" %}
 8859   ins_encode %{

15066   effect(DEF dst, USE src);
15067   ins_cost(100);
15068   format %{ "movd    $dst,$src\t# MoveI2F" %}
15069   ins_encode %{
15070     __ movdl($dst$$XMMRegister, $src$$Register);
15071   %}
15072   ins_pipe( pipe_slow );
15073 %}
15074 
15075 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15076   match(Set dst (MoveL2D src));
15077   effect(DEF dst, USE src);
15078   ins_cost(100);
15079   format %{ "movd    $dst,$src\t# MoveL2D" %}
15080   ins_encode %{
15081      __ movdq($dst$$XMMRegister, $src$$Register);
15082   %}
15083   ins_pipe( pipe_slow );
15084 %}
15085 

15086 // Fast clearing of an array
15087 // Small non-constant lenght ClearArray for non-AVX512 targets.
15088 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15089                   Universe dummy, rFlagsReg cr)
15090 %{
15091   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
15092   match(Set dummy (ClearArray cnt base));
15093   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);





















































































































15094 
15095   format %{ $$template
15096     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15097     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15098     $$emit$$"jg      LARGE\n\t"
15099     $$emit$$"dec     rcx\n\t"
15100     $$emit$$"js      DONE\t# Zero length\n\t"
15101     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15102     $$emit$$"dec     rcx\n\t"
15103     $$emit$$"jge     LOOP\n\t"
15104     $$emit$$"jmp     DONE\n\t"
15105     $$emit$$"# LARGE:\n\t"
15106     if (UseFastStosb) {
15107        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15108        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15109     } else if (UseXMMForObjInit) {
15110        $$emit$$"mov     rdi,rax\n\t"
15111        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15112        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15113        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

15121        $$emit$$"jl      L_tail\n\t"
15122        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15123        $$emit$$"add     0x20,rax\n\t"
15124        $$emit$$"sub     0x4,rcx\n\t"
15125        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15126        $$emit$$"add     0x4,rcx\n\t"
15127        $$emit$$"jle     L_end\n\t"
15128        $$emit$$"dec     rcx\n\t"
15129        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15130        $$emit$$"vmovq   xmm0,(rax)\n\t"
15131        $$emit$$"add     0x8,rax\n\t"
15132        $$emit$$"dec     rcx\n\t"
15133        $$emit$$"jge     L_sloop\n\t"
15134        $$emit$$"# L_end:\n\t"
15135     } else {
15136        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15137     }
15138     $$emit$$"# DONE"
15139   %}
15140   ins_encode %{
15141     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15142                  $tmp$$XMMRegister, false, knoreg);
15143   %}
15144   ins_pipe(pipe_slow);
15145 %}
15146 
15147 // Small non-constant length ClearArray for AVX512 targets.
15148 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15149                        Universe dummy, rFlagsReg cr)
15150 %{
15151   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15152   match(Set dummy (ClearArray cnt base));
15153   ins_cost(125);
15154   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15155 
15156   format %{ $$template
15157     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15158     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15159     $$emit$$"jg      LARGE\n\t"
15160     $$emit$$"dec     rcx\n\t"
15161     $$emit$$"js      DONE\t# Zero length\n\t"
15162     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15163     $$emit$$"dec     rcx\n\t"
15164     $$emit$$"jge     LOOP\n\t"
15165     $$emit$$"jmp     DONE\n\t"
15166     $$emit$$"# LARGE:\n\t"
15167     if (UseFastStosb) {
15168        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15169        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15170     } else if (UseXMMForObjInit) {
15171        $$emit$$"mov     rdi,rax\n\t"
15172        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15173        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15174        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

15182        $$emit$$"jl      L_tail\n\t"
15183        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15184        $$emit$$"add     0x20,rax\n\t"
15185        $$emit$$"sub     0x4,rcx\n\t"
15186        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15187        $$emit$$"add     0x4,rcx\n\t"
15188        $$emit$$"jle     L_end\n\t"
15189        $$emit$$"dec     rcx\n\t"
15190        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15191        $$emit$$"vmovq   xmm0,(rax)\n\t"
15192        $$emit$$"add     0x8,rax\n\t"
15193        $$emit$$"dec     rcx\n\t"
15194        $$emit$$"jge     L_sloop\n\t"
15195        $$emit$$"# L_end:\n\t"
15196     } else {
15197        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15198     }
15199     $$emit$$"# DONE"
15200   %}
15201   ins_encode %{
15202     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15203                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
15204   %}
15205   ins_pipe(pipe_slow);
15206 %}
15207 
15208 // Large non-constant length ClearArray for non-AVX512 targets.
15209 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15210                         Universe dummy, rFlagsReg cr)
15211 %{
15212   predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15213   match(Set dummy (ClearArray cnt base));
15214   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
































































































15215 
15216   format %{ $$template
15217     if (UseFastStosb) {
15218        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15219        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15220        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15221     } else if (UseXMMForObjInit) {
15222        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15223        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15224        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15225        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15226        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15227        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15228        $$emit$$"add     0x40,rax\n\t"
15229        $$emit$$"# L_zero_64_bytes:\n\t"
15230        $$emit$$"sub     0x8,rcx\n\t"
15231        $$emit$$"jge     L_loop\n\t"
15232        $$emit$$"add     0x4,rcx\n\t"
15233        $$emit$$"jl      L_tail\n\t"
15234        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15235        $$emit$$"add     0x20,rax\n\t"
15236        $$emit$$"sub     0x4,rcx\n\t"
15237        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15238        $$emit$$"add     0x4,rcx\n\t"
15239        $$emit$$"jle     L_end\n\t"
15240        $$emit$$"dec     rcx\n\t"
15241        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15242        $$emit$$"vmovq   xmm0,(rax)\n\t"
15243        $$emit$$"add     0x8,rax\n\t"
15244        $$emit$$"dec     rcx\n\t"
15245        $$emit$$"jge     L_sloop\n\t"
15246        $$emit$$"# L_end:\n\t"
15247     } else {
15248        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15249        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15250     }
15251   %}
15252   ins_encode %{
15253     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15254                  $tmp$$XMMRegister, true, knoreg);
15255   %}
15256   ins_pipe(pipe_slow);
15257 %}
15258 
15259 // Large non-constant length ClearArray for AVX512 targets.
15260 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15261                              Universe dummy, rFlagsReg cr)
15262 %{
15263   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15264   match(Set dummy (ClearArray cnt base));
15265   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15266 
15267   format %{ $$template
15268     if (UseFastStosb) {
15269        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15270        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15271        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15272     } else if (UseXMMForObjInit) {
15273        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15274        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15275        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15276        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15277        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15278        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15279        $$emit$$"add     0x40,rax\n\t"
15280        $$emit$$"# L_zero_64_bytes:\n\t"
15281        $$emit$$"sub     0x8,rcx\n\t"
15282        $$emit$$"jge     L_loop\n\t"
15283        $$emit$$"add     0x4,rcx\n\t"
15284        $$emit$$"jl      L_tail\n\t"
15285        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15286        $$emit$$"add     0x20,rax\n\t"
15287        $$emit$$"sub     0x4,rcx\n\t"
15288        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15289        $$emit$$"add     0x4,rcx\n\t"
15290        $$emit$$"jle     L_end\n\t"
15291        $$emit$$"dec     rcx\n\t"
15292        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15293        $$emit$$"vmovq   xmm0,(rax)\n\t"
15294        $$emit$$"add     0x8,rax\n\t"
15295        $$emit$$"dec     rcx\n\t"
15296        $$emit$$"jge     L_sloop\n\t"
15297        $$emit$$"# L_end:\n\t"
15298     } else {
15299        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15300        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15301     }
15302   %}
15303   ins_encode %{
15304     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15305                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
15306   %}
15307   ins_pipe(pipe_slow);
15308 %}
15309 
15310 // Small constant length ClearArray for AVX512 targets.
15311 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15312 %{
15313   predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15314   match(Set dummy (ClearArray cnt base));

15315   ins_cost(100);
15316   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15317   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
15318   ins_encode %{
15319    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15320   %}
15321   ins_pipe(pipe_slow);
15322 %}
15323 
15324 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15325                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15326 %{
15327   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15328   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15329   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15330 
15331   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15332   ins_encode %{
15333     __ string_compare($str1$$Register, $str2$$Register,
15334                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15335                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15336   %}
15337   ins_pipe( pipe_slow );
15338 %}
15339 

17146   effect(USE meth);
17147 
17148   ins_cost(300);
17149   format %{ "call_leaf,runtime " %}
17150   ins_encode(clear_avx, Java_To_Runtime(meth));
17151   ins_pipe(pipe_slow);
17152 %}
17153 
17154 // Call runtime without safepoint and with vector arguments
17155 instruct CallLeafDirectVector(method meth)
17156 %{
17157   match(CallLeafVector);
17158   effect(USE meth);
17159 
17160   ins_cost(300);
17161   format %{ "call_leaf,vector " %}
17162   ins_encode(Java_To_Runtime(meth));
17163   ins_pipe(pipe_slow);
17164 %}
17165 
















17166 // Call runtime without safepoint
17167 instruct CallLeafNoFPDirect(method meth)
17168 %{

17169   match(CallLeafNoFP);
17170   effect(USE meth);
17171 
17172   ins_cost(300);
17173   format %{ "call_leaf_nofp,runtime " %}
17174   ins_encode(clear_avx, Java_To_Runtime(meth));
17175   ins_pipe(pipe_slow);
17176 %}
17177 
17178 // Return Instruction
17179 // Remove the return address & jump to it.
17180 // Notice: We always emit a nop after a ret to make sure there is room
17181 // for safepoint patching
17182 instruct Ret()
17183 %{
17184   match(Return);
17185 
17186   format %{ "ret" %}
17187   ins_encode %{
17188     __ ret(0);

 1632 }
 1633 
 1634 // !!!!! Special hack to get all types of calls to specify the byte offset
 1635 //       from the start of the call to the point where the return address
 1636 //       will point.
 1637 int MachCallStaticJavaNode::ret_addr_offset()
 1638 {
 1639   int offset = 5; // 5 bytes from start of call to where return address points
 1640   offset += clear_avx_size();
 1641   return offset;
 1642 }
 1643 
 1644 int MachCallDynamicJavaNode::ret_addr_offset()
 1645 {
 1646   int offset = 15; // 15 bytes from start of call to where return address points
 1647   offset += clear_avx_size();
 1648   return offset;
 1649 }
 1650 
 1651 int MachCallRuntimeNode::ret_addr_offset() {
 1652   if (_entry_point == nullptr) {
 1653     // CallLeafNoFPInDirect
 1654     return 3; // callq (register)
 1655   }
 1656   int offset = 13; // movq r10,#addr; callq (r10)
 1657   if (this->ideal_Opcode() != Op_CallLeafVector) {
 1658     offset += clear_avx_size();
 1659   }
 1660   return offset;
 1661 }
 1662 
 1663 //
 1664 // Compute padding required for nodes which need alignment
 1665 //
 1666 
 1667 // The address of the call instruction needs to be 4-byte aligned to
 1668 // ensure that it does not span a cache line so that it can be patched.
 1669 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 1670 {
 1671   current_offset += clear_avx_size(); // skip vzeroupper
 1672   current_offset += 1; // skip call opcode byte
 1673   return align_up(current_offset, alignment_required()) - current_offset;
 1674 }
 1675 
 1676 // The address of the call instruction needs to be 4-byte aligned to
 1677 // ensure that it does not span a cache line so that it can be patched.
 1678 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 1679 {
 1680   current_offset += clear_avx_size(); // skip vzeroupper
 1681   current_offset += 11; // skip movq instruction + call opcode byte
 1682   return align_up(current_offset, alignment_required()) - current_offset;

 1868     st->print("\n\t");
 1869     st->print("# stack alignment check");
 1870 #endif
 1871   }
 1872   if (C->stub_function() != nullptr) {
 1873     st->print("\n\t");
 1874     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
 1875     st->print("\n\t");
 1876     st->print("je      fast_entry\t");
 1877     st->print("\n\t");
 1878     st->print("call    #nmethod_entry_barrier_stub\t");
 1879     st->print("\n\tfast_entry:");
 1880   }
 1881   st->cr();
 1882 }
 1883 #endif
 1884 
 1885 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1886   Compile* C = ra_->C;
 1887 
 1888   __ verified_entry(C);





 1889 
 1890   if (ra_->C->stub_function() == nullptr) {
 1891     __ entry_barrier();







 1892   }
 1893 
 1894   if (!Compile::current()->output()->in_scratch_emit_size()) {
 1895     __ bind(*_verified_entry);
 1896   }
 1897 
 1898   C->output()->set_frame_complete(__ offset());
 1899 
 1900   if (C->has_mach_constant_base_node()) {
 1901     // NOTE: We set the table base offset here because users might be
 1902     // emitted before MachConstantBaseNode.
 1903     ConstantTable& constant_table = C->output()->constant_table();
 1904     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1905   }
 1906 }
 1907 





 1908 
 1909 int MachPrologNode::reloc() const
 1910 {
 1911   return 0; // a large enough number
 1912 }
 1913 
 1914 //=============================================================================
 1915 #ifndef PRODUCT
 1916 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1917 {
 1918   Compile* C = ra_->C;
 1919   if (generate_vzeroupper(C)) {
 1920     st->print("vzeroupper");
 1921     st->cr(); st->print("\t");
 1922   }
 1923 
 1924   int framesize = C->output()->frame_size_in_bytes();
 1925   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1926   // Remove word for return adr already pushed
 1927   // and RBP

 1935   st->print_cr("popq    rbp");
 1936   if (do_polling() && C->is_method_compilation()) {
 1937     st->print("\t");
 1938     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
 1939                  "ja      #safepoint_stub\t"
 1940                  "# Safepoint: poll for GC");
 1941   }
 1942 }
 1943 #endif
 1944 
 1945 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1946 {
 1947   Compile* C = ra_->C;
 1948 
 1949   if (generate_vzeroupper(C)) {
 1950     // Clear upper bits of YMM registers when current compiled code uses
 1951     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1952     __ vzeroupper();
 1953   }
 1954 
 1955   // Subtract two words to account for return address and rbp
 1956   int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
 1957   __ remove_frame(initial_framesize, C->needs_stack_repair());










 1958 
 1959   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1960     __ reserved_stack_check();
 1961   }
 1962 
 1963   if (do_polling() && C->is_method_compilation()) {
 1964     Label dummy_label;
 1965     Label* code_stub = &dummy_label;
 1966     if (!C->output()->in_scratch_emit_size()) {
 1967       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1968       C->output()->add_stub(stub);
 1969       code_stub = &stub->entry();
 1970     }
 1971     __ relocate(relocInfo::poll_return_type);
 1972     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
 1973   }
 1974 }
 1975 






 1976 int MachEpilogNode::reloc() const
 1977 {
 1978   return 2; // a large enough number
 1979 }
 1980 
 1981 const Pipeline* MachEpilogNode::pipeline() const
 1982 {
 1983   return MachNode::pipeline_class();
 1984 }
 1985 
 1986 //=============================================================================
 1987 
 1988 enum RC {
 1989   rc_bad,
 1990   rc_int,
 1991   rc_kreg,
 1992   rc_float,
 1993   rc_stack
 1994 };
 1995 

 2557 #endif
 2558 
 2559 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2560 {
 2561   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2562   int reg = ra_->get_encode(this);
 2563 
 2564   __ lea(as_Register(reg), Address(rsp, offset));
 2565 }
 2566 
 2567 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 2568 {
 2569   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2570   if (ra_->get_encode(this) > 15) {
 2571     return (offset < 0x80) ? 6 : 9; // REX2
 2572   } else {
 2573     return (offset < 0x80) ? 5 : 8; // REX
 2574   }
 2575 }
 2576 
 2577 //=============================================================================
 2578 #ifndef PRODUCT
 2579 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2580 {
 2581   st->print_cr("MachVEPNode");
 2582 }
 2583 #endif
 2584 
 2585 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2586 {
 2587   CodeBuffer* cbuf = masm->code();
 2588   uint insts_size = cbuf->insts_size();
 2589   if (!_verified) {
 2590     __ ic_check(1);
 2591   } else {
 2592     // TODO 8284443 Avoid creation of temporary frame
 2593     if (ra_->C->stub_function() == nullptr) {
 2594       __ verified_entry(ra_->C, 0);
 2595       __ entry_barrier();
 2596       int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
 2597       __ remove_frame(initial_framesize, false);
 2598     }
 2599     // Unpack inline type args passed as oop and then jump to
 2600     // the verified entry point (skipping the unverified entry).
 2601     int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
 2602     // Emit code for verified entry and save increment for stack repair on return
 2603     __ verified_entry(ra_->C, sp_inc);
 2604     if (Compile::current()->output()->in_scratch_emit_size()) {
 2605       Label dummy_verified_entry;
 2606       __ jmp(dummy_verified_entry);
 2607     } else {
 2608       __ jmp(*_verified_entry);
 2609     }
 2610   }
 2611   /* WARNING these NOPs are critical so that verified entry point is properly
 2612      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
 2613   int nops_cnt = 4 - ((cbuf->insts_size() - insts_size) & 0x3);
 2614   nops_cnt &= 0x3; // Do not add nops if code is aligned.
 2615   if (nops_cnt > 0) {
 2616     __ nop(nops_cnt);
 2617   }
 2618 }
 2619 
 2620 //=============================================================================
 2621 #ifndef PRODUCT
 2622 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2623 {
 2624   if (UseCompressedClassPointers) {
 2625     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2626     st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2627   } else {
 2628     st->print_cr("movq    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2629     st->print_cr("\tcmpq    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2630   }
 2631   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 2632 }
 2633 #endif
 2634 
 2635 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2636 {
 2637   __ ic_check(InteriorEntryAlignment);
 2638 }
 2639 






 2640 
 2641 //=============================================================================
 2642 
 2643 bool Matcher::supports_vector_calling_convention(void) {
 2644   return EnableVectorSupport;
 2645 }
 2646 
 2647 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
 2648   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
 2649 }
 2650 
 2651 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
 2652   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
 2653 }
 2654 
 2655 #ifdef ASSERT
 2656 static bool is_ndd_demotable(const MachNode* mdef) {
 2657   return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
 2658 }
 2659 #endif

 4595     }
 4596     __ post_call_nop();
 4597   %}
 4598 
 4599   enc_class Java_Dynamic_Call(method meth) %{
 4600     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 4601     __ post_call_nop();
 4602   %}
 4603 
 4604   enc_class call_epilog %{
 4605     if (VerifyStackAtCalls) {
 4606       // Check that stack depth is unchanged: find majik cookie on stack
 4607       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
 4608       Label L;
 4609       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
 4610       __ jccb(Assembler::equal, L);
 4611       // Die if stack mismatch
 4612       __ int3();
 4613       __ bind(L);
 4614     }
 4615     if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) {
 4616       // The last return value is not set by the callee but used to pass the null marker to compiled code.
 4617       // Search for the corresponding projection, get the register and emit code that initialized it.
 4618       uint con = (tf()->range_cc()->cnt() - 1);
 4619       for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
 4620         ProjNode* proj = fast_out(i)->as_Proj();
 4621         if (proj->_con == con) {
 4622           // Set null marker if rax is non-null (a non-null value is returned buffered or scalarized)
 4623           OptoReg::Name optoReg = ra_->get_reg_first(proj);
 4624           VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
 4625           Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
 4626           __ testq(rax, rax);
 4627           __ setb(Assembler::notZero, toReg);
 4628           __ movzbl(toReg, toReg);
 4629           if (reg->is_stack()) {
 4630             int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
 4631             __ movq(Address(rsp, st_off), toReg);
 4632           }
 4633           break;
 4634         }
 4635       }
 4636       if (return_value_is_used()) {
 4637         // An inline type is returned as fields in multiple registers.
 4638         // Rax either contains an oop if the inline type is buffered or a pointer
 4639         // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax
 4640         // if the lowest bit is set to allow C2 to use the oop after null checking.
 4641         // rax &= (rax & 1) - 1
 4642         __ movptr(rscratch1, rax);
 4643         __ andptr(rscratch1, 0x1);
 4644         __ subptr(rscratch1, 0x1);
 4645         __ andptr(rax, rscratch1);
 4646       }
 4647     }
 4648   %}
 4649 
 4650 %}
 4651 
 4652 //----------FRAME--------------------------------------------------------------
 4653 // Definition of frame structure and management information.
 4654 //
 4655 //  S T A C K   L A Y O U T    Allocators stack-slot number
 4656 //                             |   (to get allocators register number
 4657 //  G  Owned by    |        |  v    add OptoReg::stack0())
 4658 //  r   CALLER     |        |
 4659 //  o     |        +--------+      pad to even-align allocators stack-slot
 4660 //  w     V        |  pad0  |        numbers; owned by CALLER
 4661 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 4662 //  h     ^        |   in   |  5
 4663 //        |        |  args  |  4   Holes in incoming args owned by SELF
 4664 //  |     |        |        |  3
 4665 //  |     |        +--------+
 4666 //  V     |        | old out|      Empty on Intel, window on Sparc
 4667 //        |    old |preserve|      Must be even aligned.

 5790   %}
 5791 %}
 5792 
 5793 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5794 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 5795 %{
 5796   constraint(ALLOC_IN_RC(ptr_reg));
 5797   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5798   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 5799 
 5800   op_cost(10);
 5801   format %{"[$reg + $off + $idx << $scale]" %}
 5802   interface(MEMORY_INTER) %{
 5803     base($reg);
 5804     index($idx);
 5805     scale($scale);
 5806     disp($off);
 5807   %}
 5808 %}
 5809 
 5810 // Indirect Narrow Oop Operand
 5811 operand indCompressedOop(rRegN reg) %{
 5812   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5813   constraint(ALLOC_IN_RC(ptr_reg));
 5814   match(DecodeN reg);
 5815 
 5816   op_cost(10);
 5817   format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
 5818   interface(MEMORY_INTER) %{
 5819     base(0xc); // R12
 5820     index($reg);
 5821     scale(0x3);
 5822     disp(0x0);
 5823   %}
 5824 %}
 5825 
 5826 // Indirect Narrow Oop Plus Offset Operand
 5827 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 5828 // we can't free r12 even with CompressedOops::base() == nullptr.
 5829 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 5830   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5831   constraint(ALLOC_IN_RC(ptr_reg));
 5832   match(AddP (DecodeN reg) off);
 5833 
 5834   op_cost(10);
 5835   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 5836   interface(MEMORY_INTER) %{
 5837     base(0xc); // R12
 5838     index($reg);
 5839     scale(0x3);
 5840     disp($off);
 5841   %}
 5842 %}
 5843 
 5844 // Indirect Memory Operand
 5845 operand indirectNarrow(rRegN reg)

 6282 %}
 6283 
 6284 // Replaces legVec during post-selection cleanup. See above.
 6285 operand legVecZ() %{
 6286   constraint(ALLOC_IN_RC(vectorz_reg_legacy));
 6287   match(VecZ);
 6288 
 6289   format %{ %}
 6290   interface(REG_INTER);
 6291 %}
 6292 
 6293 //----------OPERAND CLASSES----------------------------------------------------
 6294 // Operand Classes are groups of operands that are used as to simplify
 6295 // instruction definitions by not requiring the AD writer to specify separate
 6296 // instructions for every form of operand when the instruction accepts
 6297 // multiple operand types with the same basic encoding and format.  The classic
 6298 // case of this is memory operands.
 6299 
 6300 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 6301                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 6302                indCompressedOop, indCompressedOopOffset,
 6303                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 6304                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 6305                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 6306 
 6307 //----------PIPELINE-----------------------------------------------------------
 6308 // Rules which define the behavior of the target architectures pipeline.
 6309 pipeline %{
 6310 
 6311 //----------ATTRIBUTES---------------------------------------------------------
 6312 attributes %{
 6313   variable_size_instructions;        // Fixed size instructions
 6314   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 6315   instruction_unit_size = 1;         // An instruction is 1 bytes long
 6316   instruction_fetch_unit_size = 16;  // The processor fetches one line
 6317   instruction_fetch_units = 1;       // of 16 bytes
 6318 %}
 6319 
 6320 //----------RESOURCES----------------------------------------------------------
 6321 // Resources are the functional units available to the machine
 6322 

 8880   format %{ "MEMBAR-storestore (empty encoding)" %}
 8881   ins_encode( );
 8882   ins_pipe(empty);
 8883 %}
 8884 
 8885 //----------Move Instructions--------------------------------------------------
 8886 
 8887 instruct castX2P(rRegP dst, rRegL src)
 8888 %{
 8889   match(Set dst (CastX2P src));
 8890 
 8891   format %{ "movq    $dst, $src\t# long->ptr" %}
 8892   ins_encode %{
 8893     if ($dst$$reg != $src$$reg) {
 8894       __ movptr($dst$$Register, $src$$Register);
 8895     }
 8896   %}
 8897   ins_pipe(ialu_reg_reg); // XXX
 8898 %}
 8899 
 8900 instruct castI2N(rRegN dst, rRegI src)
 8901 %{
 8902   match(Set dst (CastI2N src));
 8903 
 8904   format %{ "movq    $dst, $src\t# int -> narrow ptr" %}
 8905   ins_encode %{
 8906     if ($dst$$reg != $src$$reg) {
 8907       __ movl($dst$$Register, $src$$Register);
 8908     }
 8909   %}
 8910   ins_pipe(ialu_reg_reg); // XXX
 8911 %}
 8912 
 8913 instruct castN2X(rRegL dst, rRegN src)
 8914 %{
 8915   match(Set dst (CastP2X src));
 8916 
 8917   format %{ "movq    $dst, $src\t# ptr -> long" %}
 8918   ins_encode %{
 8919     if ($dst$$reg != $src$$reg) {
 8920       __ movptr($dst$$Register, $src$$Register);
 8921     }
 8922   %}
 8923   ins_pipe(ialu_reg_reg); // XXX
 8924 %}
 8925 
 8926 instruct castP2X(rRegL dst, rRegP src)
 8927 %{
 8928   match(Set dst (CastP2X src));
 8929 
 8930   format %{ "movq    $dst, $src\t# ptr -> long" %}
 8931   ins_encode %{
 8932     if ($dst$$reg != $src$$reg) {
 8933       __ movptr($dst$$Register, $src$$Register);
 8934     }
 8935   %}
 8936   ins_pipe(ialu_reg_reg); // XXX
 8937 %}
 8938 
 8939 // Convert oop into int for vectors alignment masking
 8940 instruct convP2I(rRegI dst, rRegP src)
 8941 %{
 8942   match(Set dst (ConvL2I (CastP2X src)));
 8943 
 8944   format %{ "movl    $dst, $src\t# ptr -> int" %}
 8945   ins_encode %{

15152   effect(DEF dst, USE src);
15153   ins_cost(100);
15154   format %{ "movd    $dst,$src\t# MoveI2F" %}
15155   ins_encode %{
15156     __ movdl($dst$$XMMRegister, $src$$Register);
15157   %}
15158   ins_pipe( pipe_slow );
15159 %}
15160 
15161 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15162   match(Set dst (MoveL2D src));
15163   effect(DEF dst, USE src);
15164   ins_cost(100);
15165   format %{ "movd    $dst,$src\t# MoveL2D" %}
15166   ins_encode %{
15167      __ movdq($dst$$XMMRegister, $src$$Register);
15168   %}
15169   ins_pipe( pipe_slow );
15170 %}
15171 
15172 
15173 // Fast clearing of an array
15174 // Small non-constant lenght ClearArray for non-AVX512 targets.
15175 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15176                   Universe dummy, rFlagsReg cr)
15177 %{
15178   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15179   match(Set dummy (ClearArray (Binary cnt base) val));
15180   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15181 
15182   format %{ $$template
15183     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15184     $$emit$$"jg      LARGE\n\t"
15185     $$emit$$"dec     rcx\n\t"
15186     $$emit$$"js      DONE\t# Zero length\n\t"
15187     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15188     $$emit$$"dec     rcx\n\t"
15189     $$emit$$"jge     LOOP\n\t"
15190     $$emit$$"jmp     DONE\n\t"
15191     $$emit$$"# LARGE:\n\t"
15192     if (UseFastStosb) {
15193        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15194        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15195     } else if (UseXMMForObjInit) {
15196        $$emit$$"movdq   $tmp, $val\n\t"
15197        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15198        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15199        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15200        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15201        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15202        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15203        $$emit$$"add     0x40,rax\n\t"
15204        $$emit$$"# L_zero_64_bytes:\n\t"
15205        $$emit$$"sub     0x8,rcx\n\t"
15206        $$emit$$"jge     L_loop\n\t"
15207        $$emit$$"add     0x4,rcx\n\t"
15208        $$emit$$"jl      L_tail\n\t"
15209        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15210        $$emit$$"add     0x20,rax\n\t"
15211        $$emit$$"sub     0x4,rcx\n\t"
15212        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15213        $$emit$$"add     0x4,rcx\n\t"
15214        $$emit$$"jle     L_end\n\t"
15215        $$emit$$"dec     rcx\n\t"
15216        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15217        $$emit$$"vmovq   xmm0,(rax)\n\t"
15218        $$emit$$"add     0x8,rax\n\t"
15219        $$emit$$"dec     rcx\n\t"
15220        $$emit$$"jge     L_sloop\n\t"
15221        $$emit$$"# L_end:\n\t"
15222     } else {
15223        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15224     }
15225     $$emit$$"# DONE"
15226   %}
15227   ins_encode %{
15228     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15229                  $tmp$$XMMRegister, false, false);
15230   %}
15231   ins_pipe(pipe_slow);
15232 %}
15233 
15234 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15235                             Universe dummy, rFlagsReg cr)
15236 %{
15237   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15238   match(Set dummy (ClearArray (Binary cnt base) val));
15239   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15240 
15241   format %{ $$template
15242     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15243     $$emit$$"jg      LARGE\n\t"
15244     $$emit$$"dec     rcx\n\t"
15245     $$emit$$"js      DONE\t# Zero length\n\t"
15246     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15247     $$emit$$"dec     rcx\n\t"
15248     $$emit$$"jge     LOOP\n\t"
15249     $$emit$$"jmp     DONE\n\t"
15250     $$emit$$"# LARGE:\n\t"
15251     if (UseXMMForObjInit) {
15252        $$emit$$"movdq   $tmp, $val\n\t"
15253        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15254        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15255        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15256        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15257        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15258        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15259        $$emit$$"add     0x40,rax\n\t"
15260        $$emit$$"# L_zero_64_bytes:\n\t"
15261        $$emit$$"sub     0x8,rcx\n\t"
15262        $$emit$$"jge     L_loop\n\t"
15263        $$emit$$"add     0x4,rcx\n\t"
15264        $$emit$$"jl      L_tail\n\t"
15265        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15266        $$emit$$"add     0x20,rax\n\t"
15267        $$emit$$"sub     0x4,rcx\n\t"
15268        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15269        $$emit$$"add     0x4,rcx\n\t"
15270        $$emit$$"jle     L_end\n\t"
15271        $$emit$$"dec     rcx\n\t"
15272        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15273        $$emit$$"vmovq   xmm0,(rax)\n\t"
15274        $$emit$$"add     0x8,rax\n\t"
15275        $$emit$$"dec     rcx\n\t"
15276        $$emit$$"jge     L_sloop\n\t"
15277        $$emit$$"# L_end:\n\t"
15278     } else {
15279        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15280     }
15281     $$emit$$"# DONE"
15282   %}
15283   ins_encode %{
15284     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15285                  $tmp$$XMMRegister, false, true);
15286   %}
15287   ins_pipe(pipe_slow);
15288 %}
15289 
15290 // Small non-constant length ClearArray for AVX512 targets.
15291 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15292                        Universe dummy, rFlagsReg cr)
15293 %{
15294   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15295   match(Set dummy (ClearArray (Binary cnt base) val));
15296   ins_cost(125);
15297   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15298 
15299   format %{ $$template
15300     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15301     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15302     $$emit$$"jg      LARGE\n\t"
15303     $$emit$$"dec     rcx\n\t"
15304     $$emit$$"js      DONE\t# Zero length\n\t"
15305     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15306     $$emit$$"dec     rcx\n\t"
15307     $$emit$$"jge     LOOP\n\t"
15308     $$emit$$"jmp     DONE\n\t"
15309     $$emit$$"# LARGE:\n\t"
15310     if (UseFastStosb) {
15311        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15312        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15313     } else if (UseXMMForObjInit) {
15314        $$emit$$"mov     rdi,rax\n\t"
15315        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15316        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15317        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

15325        $$emit$$"jl      L_tail\n\t"
15326        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15327        $$emit$$"add     0x20,rax\n\t"
15328        $$emit$$"sub     0x4,rcx\n\t"
15329        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15330        $$emit$$"add     0x4,rcx\n\t"
15331        $$emit$$"jle     L_end\n\t"
15332        $$emit$$"dec     rcx\n\t"
15333        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15334        $$emit$$"vmovq   xmm0,(rax)\n\t"
15335        $$emit$$"add     0x8,rax\n\t"
15336        $$emit$$"dec     rcx\n\t"
15337        $$emit$$"jge     L_sloop\n\t"
15338        $$emit$$"# L_end:\n\t"
15339     } else {
15340        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15341     }
15342     $$emit$$"# DONE"
15343   %}
15344   ins_encode %{
15345     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15346                  $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
15347   %}
15348   ins_pipe(pipe_slow);
15349 %}
15350 
15351 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15352                                  Universe dummy, rFlagsReg cr)

15353 %{
15354   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15355   match(Set dummy (ClearArray (Binary cnt base) val));
15356   ins_cost(125);
15357   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15358 
15359   format %{ $$template
15360     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15361     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15362     $$emit$$"jg      LARGE\n\t"
15363     $$emit$$"dec     rcx\n\t"
15364     $$emit$$"js      DONE\t# Zero length\n\t"
15365     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15366     $$emit$$"dec     rcx\n\t"
15367     $$emit$$"jge     LOOP\n\t"
15368     $$emit$$"jmp     DONE\n\t"
15369     $$emit$$"# LARGE:\n\t"
15370     if (UseFastStosb) {
15371        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15372        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15373     } else if (UseXMMForObjInit) {
15374        $$emit$$"mov     rdi,rax\n\t"
15375        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15376        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15377        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

15385        $$emit$$"jl      L_tail\n\t"
15386        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15387        $$emit$$"add     0x20,rax\n\t"
15388        $$emit$$"sub     0x4,rcx\n\t"
15389        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15390        $$emit$$"add     0x4,rcx\n\t"
15391        $$emit$$"jle     L_end\n\t"
15392        $$emit$$"dec     rcx\n\t"
15393        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15394        $$emit$$"vmovq   xmm0,(rax)\n\t"
15395        $$emit$$"add     0x8,rax\n\t"
15396        $$emit$$"dec     rcx\n\t"
15397        $$emit$$"jge     L_sloop\n\t"
15398        $$emit$$"# L_end:\n\t"
15399     } else {
15400        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15401     }
15402     $$emit$$"# DONE"
15403   %}
15404   ins_encode %{
15405     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15406                  $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
15407   %}
15408   ins_pipe(pipe_slow);
15409 %}
15410 
15411 // Large non-constant length ClearArray for non-AVX512 targets.
15412 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15413                         Universe dummy, rFlagsReg cr)
15414 %{
15415   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15416   match(Set dummy (ClearArray (Binary cnt base) val));
15417   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15418 
15419   format %{ $$template
15420     if (UseFastStosb) {
15421        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15422        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15423     } else if (UseXMMForObjInit) {
15424        $$emit$$"movdq   $tmp, $val\n\t"
15425        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15426        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15427        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15428        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15429        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15430        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15431        $$emit$$"add     0x40,rax\n\t"
15432        $$emit$$"# L_zero_64_bytes:\n\t"
15433        $$emit$$"sub     0x8,rcx\n\t"
15434        $$emit$$"jge     L_loop\n\t"
15435        $$emit$$"add     0x4,rcx\n\t"
15436        $$emit$$"jl      L_tail\n\t"
15437        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15438        $$emit$$"add     0x20,rax\n\t"
15439        $$emit$$"sub     0x4,rcx\n\t"
15440        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15441        $$emit$$"add     0x4,rcx\n\t"
15442        $$emit$$"jle     L_end\n\t"
15443        $$emit$$"dec     rcx\n\t"
15444        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15445        $$emit$$"vmovq   xmm0,(rax)\n\t"
15446        $$emit$$"add     0x8,rax\n\t"
15447        $$emit$$"dec     rcx\n\t"
15448        $$emit$$"jge     L_sloop\n\t"
15449        $$emit$$"# L_end:\n\t"
15450     } else {
15451        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15452     }
15453   %}
15454   ins_encode %{
15455     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15456                  $tmp$$XMMRegister, true, false);
15457   %}
15458   ins_pipe(pipe_slow);
15459 %}
15460 
15461 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15462                                   Universe dummy, rFlagsReg cr)
15463 %{
15464   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15465   match(Set dummy (ClearArray (Binary cnt base) val));
15466   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15467 
15468   format %{ $$template
15469     if (UseXMMForObjInit) {
15470        $$emit$$"movdq   $tmp, $val\n\t"
15471        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15472        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15473        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15474        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15475        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15476        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15477        $$emit$$"add     0x40,rax\n\t"
15478        $$emit$$"# L_zero_64_bytes:\n\t"
15479        $$emit$$"sub     0x8,rcx\n\t"
15480        $$emit$$"jge     L_loop\n\t"
15481        $$emit$$"add     0x4,rcx\n\t"
15482        $$emit$$"jl      L_tail\n\t"
15483        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15484        $$emit$$"add     0x20,rax\n\t"
15485        $$emit$$"sub     0x4,rcx\n\t"
15486        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15487        $$emit$$"add     0x4,rcx\n\t"
15488        $$emit$$"jle     L_end\n\t"
15489        $$emit$$"dec     rcx\n\t"
15490        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15491        $$emit$$"vmovq   xmm0,(rax)\n\t"
15492        $$emit$$"add     0x8,rax\n\t"
15493        $$emit$$"dec     rcx\n\t"
15494        $$emit$$"jge     L_sloop\n\t"
15495        $$emit$$"# L_end:\n\t"
15496     } else {
15497        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15498     }
15499   %}
15500   ins_encode %{
15501     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15502                  $tmp$$XMMRegister, true, true);
15503   %}
15504   ins_pipe(pipe_slow);
15505 %}
15506 
15507 // Large non-constant length ClearArray for AVX512 targets.
15508 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15509                              Universe dummy, rFlagsReg cr)
15510 %{
15511   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15512   match(Set dummy (ClearArray (Binary cnt base) val));
15513   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15514 
15515   format %{ $$template
15516     if (UseFastStosb) {
15517        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15518        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15519        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15520     } else if (UseXMMForObjInit) {
15521        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15522        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15523        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15524        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15525        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15526        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15527        $$emit$$"add     0x40,rax\n\t"
15528        $$emit$$"# L_zero_64_bytes:\n\t"
15529        $$emit$$"sub     0x8,rcx\n\t"
15530        $$emit$$"jge     L_loop\n\t"
15531        $$emit$$"add     0x4,rcx\n\t"
15532        $$emit$$"jl      L_tail\n\t"
15533        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15534        $$emit$$"add     0x20,rax\n\t"
15535        $$emit$$"sub     0x4,rcx\n\t"
15536        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15537        $$emit$$"add     0x4,rcx\n\t"
15538        $$emit$$"jle     L_end\n\t"
15539        $$emit$$"dec     rcx\n\t"
15540        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15541        $$emit$$"vmovq   xmm0,(rax)\n\t"
15542        $$emit$$"add     0x8,rax\n\t"
15543        $$emit$$"dec     rcx\n\t"
15544        $$emit$$"jge     L_sloop\n\t"
15545        $$emit$$"# L_end:\n\t"
15546     } else {
15547        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15548        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15549     }
15550   %}
15551   ins_encode %{
15552     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15553                  $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
15554   %}
15555   ins_pipe(pipe_slow);
15556 %}
15557 
15558 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15559                                        Universe dummy, rFlagsReg cr)

15560 %{
15561   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15562   match(Set dummy (ClearArray (Binary cnt base) val));
15563   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15564 
15565   format %{ $$template
15566     if (UseFastStosb) {
15567        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15568        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15569        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15570     } else if (UseXMMForObjInit) {
15571        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15572        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15573        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15574        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15575        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15576        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15577        $$emit$$"add     0x40,rax\n\t"
15578        $$emit$$"# L_zero_64_bytes:\n\t"
15579        $$emit$$"sub     0x8,rcx\n\t"
15580        $$emit$$"jge     L_loop\n\t"
15581        $$emit$$"add     0x4,rcx\n\t"
15582        $$emit$$"jl      L_tail\n\t"
15583        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15584        $$emit$$"add     0x20,rax\n\t"
15585        $$emit$$"sub     0x4,rcx\n\t"
15586        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15587        $$emit$$"add     0x4,rcx\n\t"
15588        $$emit$$"jle     L_end\n\t"
15589        $$emit$$"dec     rcx\n\t"
15590        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15591        $$emit$$"vmovq   xmm0,(rax)\n\t"
15592        $$emit$$"add     0x8,rax\n\t"
15593        $$emit$$"dec     rcx\n\t"
15594        $$emit$$"jge     L_sloop\n\t"
15595        $$emit$$"# L_end:\n\t"
15596     } else {
15597        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15598        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15599     }
15600   %}
15601   ins_encode %{
15602     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15603                  $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
15604   %}
15605   ins_pipe(pipe_slow);
15606 %}
15607 
15608 // Small constant length ClearArray for AVX512 targets.
15609 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
15610 %{
15611   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
15612             ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
15613   match(Set dummy (ClearArray (Binary cnt base) val));
15614   ins_cost(100);
15615   effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
15616   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
15617   ins_encode %{
15618     __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15619   %}
15620   ins_pipe(pipe_slow);
15621 %}
15622 
15623 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15624                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15625 %{
15626   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15627   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15628   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15629 
15630   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15631   ins_encode %{
15632     __ string_compare($str1$$Register, $str2$$Register,
15633                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15634                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15635   %}
15636   ins_pipe( pipe_slow );
15637 %}
15638 

17445   effect(USE meth);
17446 
17447   ins_cost(300);
17448   format %{ "call_leaf,runtime " %}
17449   ins_encode(clear_avx, Java_To_Runtime(meth));
17450   ins_pipe(pipe_slow);
17451 %}
17452 
17453 // Call runtime without safepoint and with vector arguments
17454 instruct CallLeafDirectVector(method meth)
17455 %{
17456   match(CallLeafVector);
17457   effect(USE meth);
17458 
17459   ins_cost(300);
17460   format %{ "call_leaf,vector " %}
17461   ins_encode(Java_To_Runtime(meth));
17462   ins_pipe(pipe_slow);
17463 %}
17464 
17465 // Call runtime without safepoint
17466 // entry point is null, target holds the address to call
17467 instruct CallLeafNoFPInDirect(rRegP target)
17468 %{
17469   predicate(n->as_Call()->entry_point() == nullptr);
17470   match(CallLeafNoFP target);
17471 
17472   ins_cost(300);
17473   format %{ "call_leaf_nofp,runtime indirect " %}
17474   ins_encode %{
17475      __ call($target$$Register);
17476   %}
17477 
17478   ins_pipe(pipe_slow);
17479 %}
17480 
17481 // Call runtime without safepoint
17482 instruct CallLeafNoFPDirect(method meth)
17483 %{
17484   predicate(n->as_Call()->entry_point() != nullptr);
17485   match(CallLeafNoFP);
17486   effect(USE meth);
17487 
17488   ins_cost(300);
17489   format %{ "call_leaf_nofp,runtime " %}
17490   ins_encode(clear_avx, Java_To_Runtime(meth));
17491   ins_pipe(pipe_slow);
17492 %}
17493 
17494 // Return Instruction
17495 // Remove the return address & jump to it.
17496 // Notice: We always emit a nop after a ret to make sure there is room
17497 // for safepoint patching
17498 instruct Ret()
17499 %{
17500   match(Return);
17501 
17502   format %{ "ret" %}
17503   ins_encode %{
17504     __ ret(0);
< prev index next >