< prev index next >

src/hotspot/cpu/x86/x86.ad

Print this page

 1632 }
 1633 
 1634 // !!!!! Special hack to get all types of calls to specify the byte offset
 1635 //       from the start of the call to the point where the return address
 1636 //       will point.
 1637 int MachCallStaticJavaNode::ret_addr_offset()
 1638 {
 1639   int offset = 5; // 5 bytes from start of call to where return address points
 1640   offset += clear_avx_size();
 1641   return offset;
 1642 }
 1643 
 1644 int MachCallDynamicJavaNode::ret_addr_offset()
 1645 {
 1646   int offset = 15; // 15 bytes from start of call to where return address points
 1647   offset += clear_avx_size();
 1648   return offset;
 1649 }
 1650 
 1651 int MachCallRuntimeNode::ret_addr_offset() {




 1652   int offset = 13; // movq r10,#addr; callq (r10)
 1653   if (this->ideal_Opcode() != Op_CallLeafVector) {
 1654     offset += clear_avx_size();
 1655   }
 1656   return offset;
 1657 }

 1658 //
 1659 // Compute padding required for nodes which need alignment
 1660 //
 1661 
 1662 // The address of the call instruction needs to be 4-byte aligned to
 1663 // ensure that it does not span a cache line so that it can be patched.
 1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 1665 {
 1666   current_offset += clear_avx_size(); // skip vzeroupper
 1667   current_offset += 1; // skip call opcode byte
 1668   return align_up(current_offset, alignment_required()) - current_offset;
 1669 }
 1670 
 1671 // The address of the call instruction needs to be 4-byte aligned to
 1672 // ensure that it does not span a cache line so that it can be patched.
 1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 1674 {
 1675   current_offset += clear_avx_size(); // skip vzeroupper
 1676   current_offset += 11; // skip movq instruction + call opcode byte
 1677   return align_up(current_offset, alignment_required()) - current_offset;

 1863     st->print("\n\t");
 1864     st->print("# stack alignment check");
 1865 #endif
 1866   }
 1867   if (C->stub_function() != nullptr) {
 1868     st->print("\n\t");
 1869     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
 1870     st->print("\n\t");
 1871     st->print("je      fast_entry\t");
 1872     st->print("\n\t");
 1873     st->print("call    #nmethod_entry_barrier_stub\t");
 1874     st->print("\n\tfast_entry:");
 1875   }
 1876   st->cr();
 1877 }
 1878 #endif
 1879 
 1880 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1881   Compile* C = ra_->C;
 1882 
 1883   int framesize = C->output()->frame_size_in_bytes();
 1884   int bangsize = C->output()->bang_size_in_bytes();
 1885 
 1886   if (C->clinit_barrier_on_entry()) {
 1887     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
 1888     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
 1889 
 1890     Label L_skip_barrier;
 1891     Register klass = rscratch1;
 1892 
 1893     __ mov_metadata(klass, C->method()->holder()->constant_encoding());
 1894     __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
 1895 
 1896     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
 1897 
 1898     __ bind(L_skip_barrier);
 1899   }
 1900 
 1901   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);


 1902 
 1903   C->output()->set_frame_complete(__ offset());
 1904 
 1905   if (C->has_mach_constant_base_node()) {
 1906     // NOTE: We set the table base offset here because users might be
 1907     // emitted before MachConstantBaseNode.
 1908     ConstantTable& constant_table = C->output()->constant_table();
 1909     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1910   }
 1911 }
 1912 
 1913 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 1914 {
 1915   return MachNode::size(ra_); // too many variables; just compute it
 1916                               // the hard way
 1917 }
 1918 
 1919 int MachPrologNode::reloc() const
 1920 {
 1921   return 0; // a large enough number
 1922 }
 1923 
 1924 //=============================================================================
 1925 #ifndef PRODUCT
 1926 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1927 {
 1928   Compile* C = ra_->C;
 1929   if (generate_vzeroupper(C)) {
 1930     st->print("vzeroupper");
 1931     st->cr(); st->print("\t");
 1932   }
 1933 
 1934   int framesize = C->output()->frame_size_in_bytes();
 1935   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1936   // Remove word for return adr already pushed
 1937   // and RBP

 1945   st->print_cr("popq    rbp");
 1946   if (do_polling() && C->is_method_compilation()) {
 1947     st->print("\t");
 1948     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
 1949                  "ja      #safepoint_stub\t"
 1950                  "# Safepoint: poll for GC");
 1951   }
 1952 }
 1953 #endif
 1954 
 1955 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1956 {
 1957   Compile* C = ra_->C;
 1958 
 1959   if (generate_vzeroupper(C)) {
 1960     // Clear upper bits of YMM registers when current compiled code uses
 1961     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1962     __ vzeroupper();
 1963   }
 1964 
 1965   int framesize = C->output()->frame_size_in_bytes();
 1966   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1967   // Remove word for return adr already pushed
 1968   // and RBP
 1969   framesize -= 2*wordSize;
 1970 
 1971   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 1972 
 1973   if (framesize) {
 1974     __ addq(rsp, framesize);
 1975   }
 1976 
 1977   __ popq(rbp);
 1978 
 1979   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1980     __ reserved_stack_check();
 1981   }
 1982 
 1983   if (do_polling() && C->is_method_compilation()) {
 1984     Label dummy_label;
 1985     Label* code_stub = &dummy_label;
 1986     if (!C->output()->in_scratch_emit_size()) {
 1987       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1988       C->output()->add_stub(stub);
 1989       code_stub = &stub->entry();
 1990     }
 1991     __ relocate(relocInfo::poll_return_type);
 1992     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
 1993   }
 1994 }
 1995 
 1996 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
 1997 {
 1998   return MachNode::size(ra_); // too many variables; just compute it
 1999                               // the hard way
 2000 }
 2001 
 2002 int MachEpilogNode::reloc() const
 2003 {
 2004   return 2; // a large enough number
 2005 }
 2006 
 2007 const Pipeline* MachEpilogNode::pipeline() const
 2008 {
 2009   return MachNode::pipeline_class();
 2010 }
 2011 
 2012 //=============================================================================
 2013 
 2014 enum RC {
 2015   rc_bad,
 2016   rc_int,
 2017   rc_kreg,
 2018   rc_float,
 2019   rc_stack
 2020 };
 2021 

 2583 #endif
 2584 
 2585 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2586 {
 2587   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2588   int reg = ra_->get_encode(this);
 2589 
 2590   __ lea(as_Register(reg), Address(rsp, offset));
 2591 }
 2592 
 2593 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 2594 {
 2595   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2596   if (ra_->get_encode(this) > 15) {
 2597     return (offset < 0x80) ? 6 : 9; // REX2
 2598   } else {
 2599     return (offset < 0x80) ? 5 : 8; // REX
 2600   }
 2601 }
 2602 











































 2603 //=============================================================================
 2604 #ifndef PRODUCT
 2605 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2606 {
 2607   if (UseCompressedClassPointers) {
 2608     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2609     st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2610   } else {
 2611     st->print_cr("movq    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2612     st->print_cr("\tcmpq    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2613   }
 2614   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 2615 }
 2616 #endif
 2617 
 2618 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2619 {
 2620   __ ic_check(InteriorEntryAlignment);
 2621 }
 2622 
 2623 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 2624 {
 2625   return MachNode::size(ra_); // too many variables; just compute it
 2626                               // the hard way
 2627 }
 2628 
 2629 
 2630 //=============================================================================
 2631 
 2632 bool Matcher::supports_vector_calling_convention(void) {
 2633   return EnableVectorSupport;
 2634 }
 2635 
 2636 static bool is_ndd_demotable(const MachNode* mdef) {
 2637   return ((mdef->flags() & Node::PD::Flag_ndd_demotable) != 0);
 2638 }
 2639 
 2640 static bool is_ndd_demotable_commutative(const MachNode* mdef) {
 2641   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_commutative) != 0);
 2642 }
 2643 
 2644 static bool is_demotion_candidate(const MachNode* mdef) {
 2645   return (is_ndd_demotable(mdef) || is_ndd_demotable_commutative(mdef));
 2646 }
 2647 
 2648 bool Matcher::is_register_biasing_candidate(const MachNode* mdef,

 4583     }
 4584     __ post_call_nop();
 4585   %}
 4586 
 4587   enc_class Java_Dynamic_Call(method meth) %{
 4588     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 4589     __ post_call_nop();
 4590   %}
 4591 
 4592   enc_class call_epilog %{
 4593     if (VerifyStackAtCalls) {
 4594       // Check that stack depth is unchanged: find majik cookie on stack
 4595       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
 4596       Label L;
 4597       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
 4598       __ jccb(Assembler::equal, L);
 4599       // Die if stack mismatch
 4600       __ int3();
 4601       __ bind(L);
 4602     }

































 4603   %}
 4604 
 4605 %}
 4606 
 4607 //----------FRAME--------------------------------------------------------------
 4608 // Definition of frame structure and management information.
 4609 //
 4610 //  S T A C K   L A Y O U T    Allocators stack-slot number
 4611 //                             |   (to get allocators register number
 4612 //  G  Owned by    |        |  v    add OptoReg::stack0())
 4613 //  r   CALLER     |        |
 4614 //  o     |        +--------+      pad to even-align allocators stack-slot
 4615 //  w     V        |  pad0  |        numbers; owned by CALLER
 4616 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 4617 //  h     ^        |   in   |  5
 4618 //        |        |  args  |  4   Holes in incoming args owned by SELF
 4619 //  |     |        |        |  3
 4620 //  |     |        +--------+
 4621 //  V     |        | old out|      Empty on Intel, window on Sparc
 4622 //        |    old |preserve|      Must be even aligned.

 5745   %}
 5746 %}
 5747 
 5748 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5749 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 5750 %{
 5751   constraint(ALLOC_IN_RC(ptr_reg));
 5752   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5753   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 5754 
 5755   op_cost(10);
 5756   format %{"[$reg + $off + $idx << $scale]" %}
 5757   interface(MEMORY_INTER) %{
 5758     base($reg);
 5759     index($idx);
 5760     scale($scale);
 5761     disp($off);
 5762   %}
 5763 %}
 5764 
















 5765 // Indirect Narrow Oop Plus Offset Operand
 5766 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 5767 // we can't free r12 even with CompressedOops::base() == nullptr.
 5768 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 5769   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5770   constraint(ALLOC_IN_RC(ptr_reg));
 5771   match(AddP (DecodeN reg) off);
 5772 
 5773   op_cost(10);
 5774   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 5775   interface(MEMORY_INTER) %{
 5776     base(0xc); // R12
 5777     index($reg);
 5778     scale(0x3);
 5779     disp($off);
 5780   %}
 5781 %}
 5782 
 5783 // Indirect Memory Operand
 5784 operand indirectNarrow(rRegN reg)

 6221 %}
 6222 
 6223 // Replaces legVec during post-selection cleanup. See above.
 6224 operand legVecZ() %{
 6225   constraint(ALLOC_IN_RC(vectorz_reg_legacy));
 6226   match(VecZ);
 6227 
 6228   format %{ %}
 6229   interface(REG_INTER);
 6230 %}
 6231 
 6232 //----------OPERAND CLASSES----------------------------------------------------
 6233 // Operand Classes are groups of operands that are used as to simplify
 6234 // instruction definitions by not requiring the AD writer to specify separate
 6235 // instructions for every form of operand when the instruction accepts
 6236 // multiple operand types with the same basic encoding and format.  The classic
 6237 // case of this is memory operands.
 6238 
 6239 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 6240                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 6241                indCompressedOopOffset,
 6242                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 6243                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 6244                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 6245 
 6246 //----------PIPELINE-----------------------------------------------------------
 6247 // Rules which define the behavior of the target architectures pipeline.
 6248 pipeline %{
 6249 
 6250 //----------ATTRIBUTES---------------------------------------------------------
 6251 attributes %{
 6252   variable_size_instructions;        // Fixed size instructions
 6253   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 6254   instruction_unit_size = 1;         // An instruction is 1 bytes long
 6255   instruction_fetch_unit_size = 16;  // The processor fetches one line
 6256   instruction_fetch_units = 1;       // of 16 bytes
 6257 %}
 6258 
 6259 //----------RESOURCES----------------------------------------------------------
 6260 // Resources are the functional units available to the machine
 6261 

 8819   format %{ "MEMBAR-storestore (empty encoding)" %}
 8820   ins_encode( );
 8821   ins_pipe(empty);
 8822 %}
 8823 
 8824 //----------Move Instructions--------------------------------------------------
 8825 
 8826 instruct castX2P(rRegP dst, rRegL src)
 8827 %{
 8828   match(Set dst (CastX2P src));
 8829 
 8830   format %{ "movq    $dst, $src\t# long->ptr" %}
 8831   ins_encode %{
 8832     if ($dst$$reg != $src$$reg) {
 8833       __ movptr($dst$$Register, $src$$Register);
 8834     }
 8835   %}
 8836   ins_pipe(ialu_reg_reg); // XXX
 8837 %}
 8838 


























 8839 instruct castP2X(rRegL dst, rRegP src)
 8840 %{
 8841   match(Set dst (CastP2X src));
 8842 
 8843   format %{ "movq    $dst, $src\t# ptr -> long" %}
 8844   ins_encode %{
 8845     if ($dst$$reg != $src$$reg) {
 8846       __ movptr($dst$$Register, $src$$Register);
 8847     }
 8848   %}
 8849   ins_pipe(ialu_reg_reg); // XXX
 8850 %}
 8851 
 8852 // Convert oop into int for vectors alignment masking
 8853 instruct convP2I(rRegI dst, rRegP src)
 8854 %{
 8855   match(Set dst (ConvL2I (CastP2X src)));
 8856 
 8857   format %{ "movl    $dst, $src\t# ptr -> int" %}
 8858   ins_encode %{

15065   effect(DEF dst, USE src);
15066   ins_cost(100);
15067   format %{ "movd    $dst,$src\t# MoveI2F" %}
15068   ins_encode %{
15069     __ movdl($dst$$XMMRegister, $src$$Register);
15070   %}
15071   ins_pipe( pipe_slow );
15072 %}
15073 
15074 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15075   match(Set dst (MoveL2D src));
15076   effect(DEF dst, USE src);
15077   ins_cost(100);
15078   format %{ "movd    $dst,$src\t# MoveL2D" %}
15079   ins_encode %{
15080      __ movdq($dst$$XMMRegister, $src$$Register);
15081   %}
15082   ins_pipe( pipe_slow );
15083 %}
15084 

15085 // Fast clearing of an array
15086 // Small non-constant lenght ClearArray for non-AVX512 targets.
15087 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15088                   Universe dummy, rFlagsReg cr)
15089 %{
15090   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
15091   match(Set dummy (ClearArray cnt base));
15092   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);





















































































































15093 
15094   format %{ $$template
15095     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15096     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15097     $$emit$$"jg      LARGE\n\t"
15098     $$emit$$"dec     rcx\n\t"
15099     $$emit$$"js      DONE\t# Zero length\n\t"
15100     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15101     $$emit$$"dec     rcx\n\t"
15102     $$emit$$"jge     LOOP\n\t"
15103     $$emit$$"jmp     DONE\n\t"
15104     $$emit$$"# LARGE:\n\t"
15105     if (UseFastStosb) {
15106        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15107        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15108     } else if (UseXMMForObjInit) {
15109        $$emit$$"mov     rdi,rax\n\t"
15110        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15111        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15112        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

15120        $$emit$$"jl      L_tail\n\t"
15121        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15122        $$emit$$"add     0x20,rax\n\t"
15123        $$emit$$"sub     0x4,rcx\n\t"
15124        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15125        $$emit$$"add     0x4,rcx\n\t"
15126        $$emit$$"jle     L_end\n\t"
15127        $$emit$$"dec     rcx\n\t"
15128        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15129        $$emit$$"vmovq   xmm0,(rax)\n\t"
15130        $$emit$$"add     0x8,rax\n\t"
15131        $$emit$$"dec     rcx\n\t"
15132        $$emit$$"jge     L_sloop\n\t"
15133        $$emit$$"# L_end:\n\t"
15134     } else {
15135        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15136     }
15137     $$emit$$"# DONE"
15138   %}
15139   ins_encode %{
15140     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15141                  $tmp$$XMMRegister, false, knoreg);
15142   %}
15143   ins_pipe(pipe_slow);
15144 %}
15145 
15146 // Small non-constant length ClearArray for AVX512 targets.
15147 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15148                        Universe dummy, rFlagsReg cr)
15149 %{
15150   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15151   match(Set dummy (ClearArray cnt base));
15152   ins_cost(125);
15153   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15154 
15155   format %{ $$template
15156     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15157     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15158     $$emit$$"jg      LARGE\n\t"
15159     $$emit$$"dec     rcx\n\t"
15160     $$emit$$"js      DONE\t# Zero length\n\t"
15161     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15162     $$emit$$"dec     rcx\n\t"
15163     $$emit$$"jge     LOOP\n\t"
15164     $$emit$$"jmp     DONE\n\t"
15165     $$emit$$"# LARGE:\n\t"
15166     if (UseFastStosb) {
15167        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15168        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15169     } else if (UseXMMForObjInit) {
15170        $$emit$$"mov     rdi,rax\n\t"
15171        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15172        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15173        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

15181        $$emit$$"jl      L_tail\n\t"
15182        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15183        $$emit$$"add     0x20,rax\n\t"
15184        $$emit$$"sub     0x4,rcx\n\t"
15185        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15186        $$emit$$"add     0x4,rcx\n\t"
15187        $$emit$$"jle     L_end\n\t"
15188        $$emit$$"dec     rcx\n\t"
15189        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15190        $$emit$$"vmovq   xmm0,(rax)\n\t"
15191        $$emit$$"add     0x8,rax\n\t"
15192        $$emit$$"dec     rcx\n\t"
15193        $$emit$$"jge     L_sloop\n\t"
15194        $$emit$$"# L_end:\n\t"
15195     } else {
15196        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15197     }
15198     $$emit$$"# DONE"
15199   %}
15200   ins_encode %{
15201     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15202                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
15203   %}
15204   ins_pipe(pipe_slow);
15205 %}
15206 
15207 // Large non-constant length ClearArray for non-AVX512 targets.
15208 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15209                         Universe dummy, rFlagsReg cr)
15210 %{
15211   predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15212   match(Set dummy (ClearArray cnt base));
15213   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
































































































15214 
15215   format %{ $$template
15216     if (UseFastStosb) {
15217        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15218        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15219        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15220     } else if (UseXMMForObjInit) {
15221        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15222        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15223        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15224        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15225        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15226        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15227        $$emit$$"add     0x40,rax\n\t"
15228        $$emit$$"# L_zero_64_bytes:\n\t"
15229        $$emit$$"sub     0x8,rcx\n\t"
15230        $$emit$$"jge     L_loop\n\t"
15231        $$emit$$"add     0x4,rcx\n\t"
15232        $$emit$$"jl      L_tail\n\t"
15233        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15234        $$emit$$"add     0x20,rax\n\t"
15235        $$emit$$"sub     0x4,rcx\n\t"
15236        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15237        $$emit$$"add     0x4,rcx\n\t"
15238        $$emit$$"jle     L_end\n\t"
15239        $$emit$$"dec     rcx\n\t"
15240        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15241        $$emit$$"vmovq   xmm0,(rax)\n\t"
15242        $$emit$$"add     0x8,rax\n\t"
15243        $$emit$$"dec     rcx\n\t"
15244        $$emit$$"jge     L_sloop\n\t"
15245        $$emit$$"# L_end:\n\t"
15246     } else {
15247        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15248        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15249     }
15250   %}
15251   ins_encode %{
15252     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15253                  $tmp$$XMMRegister, true, knoreg);
15254   %}
15255   ins_pipe(pipe_slow);
15256 %}
15257 
15258 // Large non-constant length ClearArray for AVX512 targets.
15259 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15260                              Universe dummy, rFlagsReg cr)
15261 %{
15262   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15263   match(Set dummy (ClearArray cnt base));
15264   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15265 
15266   format %{ $$template
15267     if (UseFastStosb) {
15268        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15269        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15270        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15271     } else if (UseXMMForObjInit) {
15272        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15273        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15274        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15275        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15276        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15277        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15278        $$emit$$"add     0x40,rax\n\t"
15279        $$emit$$"# L_zero_64_bytes:\n\t"
15280        $$emit$$"sub     0x8,rcx\n\t"
15281        $$emit$$"jge     L_loop\n\t"
15282        $$emit$$"add     0x4,rcx\n\t"
15283        $$emit$$"jl      L_tail\n\t"
15284        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15285        $$emit$$"add     0x20,rax\n\t"
15286        $$emit$$"sub     0x4,rcx\n\t"
15287        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15288        $$emit$$"add     0x4,rcx\n\t"
15289        $$emit$$"jle     L_end\n\t"
15290        $$emit$$"dec     rcx\n\t"
15291        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15292        $$emit$$"vmovq   xmm0,(rax)\n\t"
15293        $$emit$$"add     0x8,rax\n\t"
15294        $$emit$$"dec     rcx\n\t"
15295        $$emit$$"jge     L_sloop\n\t"
15296        $$emit$$"# L_end:\n\t"
15297     } else {
15298        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15299        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15300     }
15301   %}
15302   ins_encode %{
15303     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15304                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
15305   %}
15306   ins_pipe(pipe_slow);
15307 %}
15308 
15309 // Small constant length ClearArray for AVX512 targets.
15310 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15311 %{
15312   predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15313   match(Set dummy (ClearArray cnt base));

15314   ins_cost(100);
15315   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15316   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
15317   ins_encode %{
15318    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15319   %}
15320   ins_pipe(pipe_slow);
15321 %}
15322 
15323 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15324                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15325 %{
15326   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15327   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15328   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15329 
15330   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15331   ins_encode %{
15332     __ string_compare($str1$$Register, $str2$$Register,
15333                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15334                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15335   %}
15336   ins_pipe( pipe_slow );
15337 %}
15338 

17145   effect(USE meth);
17146 
17147   ins_cost(300);
17148   format %{ "call_leaf,runtime " %}
17149   ins_encode(clear_avx, Java_To_Runtime(meth));
17150   ins_pipe(pipe_slow);
17151 %}
17152 
17153 // Call runtime without safepoint and with vector arguments
17154 instruct CallLeafDirectVector(method meth)
17155 %{
17156   match(CallLeafVector);
17157   effect(USE meth);
17158 
17159   ins_cost(300);
17160   format %{ "call_leaf,vector " %}
17161   ins_encode(Java_To_Runtime(meth));
17162   ins_pipe(pipe_slow);
17163 %}
17164 
















17165 // Call runtime without safepoint
17166 instruct CallLeafNoFPDirect(method meth)
17167 %{

17168   match(CallLeafNoFP);
17169   effect(USE meth);
17170 
17171   ins_cost(300);
17172   format %{ "call_leaf_nofp,runtime " %}
17173   ins_encode(clear_avx, Java_To_Runtime(meth));
17174   ins_pipe(pipe_slow);
17175 %}
17176 
17177 // Return Instruction
17178 // Remove the return address & jump to it.
17179 // Notice: We always emit a nop after a ret to make sure there is room
17180 // for safepoint patching
17181 instruct Ret()
17182 %{
17183   match(Return);
17184 
17185   format %{ "ret" %}
17186   ins_encode %{
17187     __ ret(0);

 1632 }
 1633 
 1634 // !!!!! Special hack to get all types of calls to specify the byte offset
 1635 //       from the start of the call to the point where the return address
 1636 //       will point.
 1637 int MachCallStaticJavaNode::ret_addr_offset()
 1638 {
 1639   int offset = 5; // 5 bytes from start of call to where return address points
 1640   offset += clear_avx_size();
 1641   return offset;
 1642 }
 1643 
 1644 int MachCallDynamicJavaNode::ret_addr_offset()
 1645 {
 1646   int offset = 15; // 15 bytes from start of call to where return address points
 1647   offset += clear_avx_size();
 1648   return offset;
 1649 }
 1650 
 1651 int MachCallRuntimeNode::ret_addr_offset() {
 1652   if (_entry_point == nullptr) {
 1653     // CallLeafNoFPInDirect
 1654     return 3; // callq (register)
 1655   }
 1656   int offset = 13; // movq r10,#addr; callq (r10)
 1657   if (this->ideal_Opcode() != Op_CallLeafVector) {
 1658     offset += clear_avx_size();
 1659   }
 1660   return offset;
 1661 }
 1662 
 1663 //
 1664 // Compute padding required for nodes which need alignment
 1665 //
 1666 
 1667 // The address of the call instruction needs to be 4-byte aligned to
 1668 // ensure that it does not span a cache line so that it can be patched.
 1669 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 1670 {
 1671   current_offset += clear_avx_size(); // skip vzeroupper
 1672   current_offset += 1; // skip call opcode byte
 1673   return align_up(current_offset, alignment_required()) - current_offset;
 1674 }
 1675 
 1676 // The address of the call instruction needs to be 4-byte aligned to
 1677 // ensure that it does not span a cache line so that it can be patched.
 1678 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 1679 {
 1680   current_offset += clear_avx_size(); // skip vzeroupper
 1681   current_offset += 11; // skip movq instruction + call opcode byte
 1682   return align_up(current_offset, alignment_required()) - current_offset;

 1868     st->print("\n\t");
 1869     st->print("# stack alignment check");
 1870 #endif
 1871   }
 1872   if (C->stub_function() != nullptr) {
 1873     st->print("\n\t");
 1874     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
 1875     st->print("\n\t");
 1876     st->print("je      fast_entry\t");
 1877     st->print("\n\t");
 1878     st->print("call    #nmethod_entry_barrier_stub\t");
 1879     st->print("\n\tfast_entry:");
 1880   }
 1881   st->cr();
 1882 }
 1883 #endif
 1884 
 1885 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1886   Compile* C = ra_->C;
 1887 
 1888   __ verified_entry(C);





 1889 
 1890   if (ra_->C->stub_function() == nullptr) {
 1891     __ entry_barrier();







 1892   }
 1893 
 1894   if (!Compile::current()->output()->in_scratch_emit_size()) {
 1895     __ bind(*_verified_entry);
 1896   }
 1897 
 1898   C->output()->set_frame_complete(__ offset());
 1899 
 1900   if (C->has_mach_constant_base_node()) {
 1901     // NOTE: We set the table base offset here because users might be
 1902     // emitted before MachConstantBaseNode.
 1903     ConstantTable& constant_table = C->output()->constant_table();
 1904     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1905   }
 1906 }
 1907 





 1908 
 1909 int MachPrologNode::reloc() const
 1910 {
 1911   return 0; // a large enough number
 1912 }
 1913 
 1914 //=============================================================================
 1915 #ifndef PRODUCT
 1916 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1917 {
 1918   Compile* C = ra_->C;
 1919   if (generate_vzeroupper(C)) {
 1920     st->print("vzeroupper");
 1921     st->cr(); st->print("\t");
 1922   }
 1923 
 1924   int framesize = C->output()->frame_size_in_bytes();
 1925   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1926   // Remove word for return adr already pushed
 1927   // and RBP

 1935   st->print_cr("popq    rbp");
 1936   if (do_polling() && C->is_method_compilation()) {
 1937     st->print("\t");
 1938     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
 1939                  "ja      #safepoint_stub\t"
 1940                  "# Safepoint: poll for GC");
 1941   }
 1942 }
 1943 #endif
 1944 
 1945 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1946 {
 1947   Compile* C = ra_->C;
 1948 
 1949   if (generate_vzeroupper(C)) {
 1950     // Clear upper bits of YMM registers when current compiled code uses
 1951     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1952     __ vzeroupper();
 1953   }
 1954 
 1955   // Subtract two words to account for return address and rbp
 1956   int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
 1957   __ remove_frame(initial_framesize, C->needs_stack_repair());










 1958 
 1959   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1960     __ reserved_stack_check();
 1961   }
 1962 
 1963   if (do_polling() && C->is_method_compilation()) {
 1964     Label dummy_label;
 1965     Label* code_stub = &dummy_label;
 1966     if (!C->output()->in_scratch_emit_size()) {
 1967       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1968       C->output()->add_stub(stub);
 1969       code_stub = &stub->entry();
 1970     }
 1971     __ relocate(relocInfo::poll_return_type);
 1972     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
 1973   }
 1974 }
 1975 






 1976 int MachEpilogNode::reloc() const
 1977 {
 1978   return 2; // a large enough number
 1979 }
 1980 
 1981 const Pipeline* MachEpilogNode::pipeline() const
 1982 {
 1983   return MachNode::pipeline_class();
 1984 }
 1985 
 1986 //=============================================================================
 1987 
 1988 enum RC {
 1989   rc_bad,
 1990   rc_int,
 1991   rc_kreg,
 1992   rc_float,
 1993   rc_stack
 1994 };
 1995 

 2557 #endif
 2558 
 2559 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2560 {
 2561   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2562   int reg = ra_->get_encode(this);
 2563 
 2564   __ lea(as_Register(reg), Address(rsp, offset));
 2565 }
 2566 
 2567 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 2568 {
 2569   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2570   if (ra_->get_encode(this) > 15) {
 2571     return (offset < 0x80) ? 6 : 9; // REX2
 2572   } else {
 2573     return (offset < 0x80) ? 5 : 8; // REX
 2574   }
 2575 }
 2576 
 2577 //=============================================================================
 2578 #ifndef PRODUCT
 2579 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2580 {
 2581   st->print_cr("MachVEPNode");
 2582 }
 2583 #endif
 2584 
 2585 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2586 {
 2587   CodeBuffer* cbuf = masm->code();
 2588   uint insts_size = cbuf->insts_size();
 2589   if (!_verified) {
 2590     __ ic_check(1);
 2591   } else {
 2592     // TODO 8284443 Avoid creation of temporary frame
 2593     if (ra_->C->stub_function() == nullptr) {
 2594       __ verified_entry(ra_->C, 0);
 2595       __ entry_barrier();
 2596       int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
 2597       __ remove_frame(initial_framesize, false);
 2598     }
 2599     // Unpack inline type args passed as oop and then jump to
 2600     // the verified entry point (skipping the unverified entry).
 2601     int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
 2602     // Emit code for verified entry and save increment for stack repair on return
 2603     __ verified_entry(ra_->C, sp_inc);
 2604     if (Compile::current()->output()->in_scratch_emit_size()) {
 2605       Label dummy_verified_entry;
 2606       __ jmp(dummy_verified_entry);
 2607     } else {
 2608       __ jmp(*_verified_entry);
 2609     }
 2610   }
 2611   /* WARNING these NOPs are critical so that verified entry point is properly
 2612      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
 2613   int nops_cnt = 4 - ((cbuf->insts_size() - insts_size) & 0x3);
 2614   nops_cnt &= 0x3; // Do not add nops if code is aligned.
 2615   if (nops_cnt > 0) {
 2616     __ nop(nops_cnt);
 2617   }
 2618 }
 2619 
 2620 //=============================================================================
 2621 #ifndef PRODUCT
 2622 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2623 {
 2624   if (UseCompressedClassPointers) {
 2625     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2626     st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2627   } else {
 2628     st->print_cr("movq    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2629     st->print_cr("\tcmpq    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2630   }
 2631   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 2632 }
 2633 #endif
 2634 
 2635 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2636 {
 2637   __ ic_check(InteriorEntryAlignment);
 2638 }
 2639 






 2640 
 2641 //=============================================================================
 2642 
 2643 bool Matcher::supports_vector_calling_convention(void) {
 2644   return EnableVectorSupport;
 2645 }
 2646 
 2647 static bool is_ndd_demotable(const MachNode* mdef) {
 2648   return ((mdef->flags() & Node::PD::Flag_ndd_demotable) != 0);
 2649 }
 2650 
 2651 static bool is_ndd_demotable_commutative(const MachNode* mdef) {
 2652   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_commutative) != 0);
 2653 }
 2654 
 2655 static bool is_demotion_candidate(const MachNode* mdef) {
 2656   return (is_ndd_demotable(mdef) || is_ndd_demotable_commutative(mdef));
 2657 }
 2658 
 2659 bool Matcher::is_register_biasing_candidate(const MachNode* mdef,

 4594     }
 4595     __ post_call_nop();
 4596   %}
 4597 
 4598   enc_class Java_Dynamic_Call(method meth) %{
 4599     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 4600     __ post_call_nop();
 4601   %}
 4602 
 4603   enc_class call_epilog %{
 4604     if (VerifyStackAtCalls) {
 4605       // Check that stack depth is unchanged: find majik cookie on stack
 4606       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
 4607       Label L;
 4608       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
 4609       __ jccb(Assembler::equal, L);
 4610       // Die if stack mismatch
 4611       __ int3();
 4612       __ bind(L);
 4613     }
 4614     if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) {
 4615       // The last return value is not set by the callee but used to pass the null marker to compiled code.
 4616       // Search for the corresponding projection, get the register and emit code that initialized it.
 4617       uint con = (tf()->range_cc()->cnt() - 1);
 4618       for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
 4619         ProjNode* proj = fast_out(i)->as_Proj();
 4620         if (proj->_con == con) {
 4621           // Set null marker if rax is non-null (a non-null value is returned buffered or scalarized)
 4622           OptoReg::Name optoReg = ra_->get_reg_first(proj);
 4623           VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
 4624           Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
 4625           __ testq(rax, rax);
 4626           __ setb(Assembler::notZero, toReg);
 4627           __ movzbl(toReg, toReg);
 4628           if (reg->is_stack()) {
 4629             int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
 4630             __ movq(Address(rsp, st_off), toReg);
 4631           }
 4632           break;
 4633         }
 4634       }
 4635       if (return_value_is_used()) {
 4636         // An inline type is returned as fields in multiple registers.
 4637         // Rax either contains an oop if the inline type is buffered or a pointer
 4638         // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax
 4639         // if the lowest bit is set to allow C2 to use the oop after null checking.
 4640         // rax &= (rax & 1) - 1
 4641         __ movptr(rscratch1, rax);
 4642         __ andptr(rscratch1, 0x1);
 4643         __ subptr(rscratch1, 0x1);
 4644         __ andptr(rax, rscratch1);
 4645       }
 4646     }
 4647   %}
 4648 
 4649 %}
 4650 
 4651 //----------FRAME--------------------------------------------------------------
 4652 // Definition of frame structure and management information.
 4653 //
 4654 //  S T A C K   L A Y O U T    Allocators stack-slot number
 4655 //                             |   (to get allocators register number
 4656 //  G  Owned by    |        |  v    add OptoReg::stack0())
 4657 //  r   CALLER     |        |
 4658 //  o     |        +--------+      pad to even-align allocators stack-slot
 4659 //  w     V        |  pad0  |        numbers; owned by CALLER
 4660 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 4661 //  h     ^        |   in   |  5
 4662 //        |        |  args  |  4   Holes in incoming args owned by SELF
 4663 //  |     |        |        |  3
 4664 //  |     |        +--------+
 4665 //  V     |        | old out|      Empty on Intel, window on Sparc
 4666 //        |    old |preserve|      Must be even aligned.

 5789   %}
 5790 %}
 5791 
 5792 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5793 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 5794 %{
 5795   constraint(ALLOC_IN_RC(ptr_reg));
 5796   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5797   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 5798 
 5799   op_cost(10);
 5800   format %{"[$reg + $off + $idx << $scale]" %}
 5801   interface(MEMORY_INTER) %{
 5802     base($reg);
 5803     index($idx);
 5804     scale($scale);
 5805     disp($off);
 5806   %}
 5807 %}
 5808 
 5809 // Indirect Narrow Oop Operand
 5810 operand indCompressedOop(rRegN reg) %{
 5811   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5812   constraint(ALLOC_IN_RC(ptr_reg));
 5813   match(DecodeN reg);
 5814 
 5815   op_cost(10);
 5816   format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
 5817   interface(MEMORY_INTER) %{
 5818     base(0xc); // R12
 5819     index($reg);
 5820     scale(0x3);
 5821     disp(0x0);
 5822   %}
 5823 %}
 5824 
 5825 // Indirect Narrow Oop Plus Offset Operand
 5826 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 5827 // we can't free r12 even with CompressedOops::base() == nullptr.
 5828 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 5829   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5830   constraint(ALLOC_IN_RC(ptr_reg));
 5831   match(AddP (DecodeN reg) off);
 5832 
 5833   op_cost(10);
 5834   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 5835   interface(MEMORY_INTER) %{
 5836     base(0xc); // R12
 5837     index($reg);
 5838     scale(0x3);
 5839     disp($off);
 5840   %}
 5841 %}
 5842 
 5843 // Indirect Memory Operand
 5844 operand indirectNarrow(rRegN reg)

 6281 %}
 6282 
 6283 // Replaces legVec during post-selection cleanup. See above.
 6284 operand legVecZ() %{
 6285   constraint(ALLOC_IN_RC(vectorz_reg_legacy));
 6286   match(VecZ);
 6287 
 6288   format %{ %}
 6289   interface(REG_INTER);
 6290 %}
 6291 
 6292 //----------OPERAND CLASSES----------------------------------------------------
 6293 // Operand Classes are groups of operands that are used as to simplify
 6294 // instruction definitions by not requiring the AD writer to specify separate
 6295 // instructions for every form of operand when the instruction accepts
 6296 // multiple operand types with the same basic encoding and format.  The classic
 6297 // case of this is memory operands.
 6298 
 6299 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 6300                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 6301                indCompressedOop, indCompressedOopOffset,
 6302                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 6303                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 6304                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 6305 
 6306 //----------PIPELINE-----------------------------------------------------------
 6307 // Rules which define the behavior of the target architectures pipeline.
 6308 pipeline %{
 6309 
 6310 //----------ATTRIBUTES---------------------------------------------------------
 6311 attributes %{
 6312   variable_size_instructions;        // Fixed size instructions
 6313   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 6314   instruction_unit_size = 1;         // An instruction is 1 bytes long
 6315   instruction_fetch_unit_size = 16;  // The processor fetches one line
 6316   instruction_fetch_units = 1;       // of 16 bytes
 6317 %}
 6318 
 6319 //----------RESOURCES----------------------------------------------------------
 6320 // Resources are the functional units available to the machine
 6321 

 8879   format %{ "MEMBAR-storestore (empty encoding)" %}
 8880   ins_encode( );
 8881   ins_pipe(empty);
 8882 %}
 8883 
 8884 //----------Move Instructions--------------------------------------------------
 8885 
 8886 instruct castX2P(rRegP dst, rRegL src)
 8887 %{
 8888   match(Set dst (CastX2P src));
 8889 
 8890   format %{ "movq    $dst, $src\t# long->ptr" %}
 8891   ins_encode %{
 8892     if ($dst$$reg != $src$$reg) {
 8893       __ movptr($dst$$Register, $src$$Register);
 8894     }
 8895   %}
 8896   ins_pipe(ialu_reg_reg); // XXX
 8897 %}
 8898 
 8899 instruct castI2N(rRegN dst, rRegI src)
 8900 %{
 8901   match(Set dst (CastI2N src));
 8902 
 8903   format %{ "movq    $dst, $src\t# int -> narrow ptr" %}
 8904   ins_encode %{
 8905     if ($dst$$reg != $src$$reg) {
 8906       __ movl($dst$$Register, $src$$Register);
 8907     }
 8908   %}
 8909   ins_pipe(ialu_reg_reg); // XXX
 8910 %}
 8911 
 8912 instruct castN2X(rRegL dst, rRegN src)
 8913 %{
 8914   match(Set dst (CastP2X src));
 8915 
 8916   format %{ "movq    $dst, $src\t# ptr -> long" %}
 8917   ins_encode %{
 8918     if ($dst$$reg != $src$$reg) {
 8919       __ movptr($dst$$Register, $src$$Register);
 8920     }
 8921   %}
 8922   ins_pipe(ialu_reg_reg); // XXX
 8923 %}
 8924 
 8925 instruct castP2X(rRegL dst, rRegP src)
 8926 %{
 8927   match(Set dst (CastP2X src));
 8928 
 8929   format %{ "movq    $dst, $src\t# ptr -> long" %}
 8930   ins_encode %{
 8931     if ($dst$$reg != $src$$reg) {
 8932       __ movptr($dst$$Register, $src$$Register);
 8933     }
 8934   %}
 8935   ins_pipe(ialu_reg_reg); // XXX
 8936 %}
 8937 
 8938 // Convert oop into int for vectors alignment masking
 8939 instruct convP2I(rRegI dst, rRegP src)
 8940 %{
 8941   match(Set dst (ConvL2I (CastP2X src)));
 8942 
 8943   format %{ "movl    $dst, $src\t# ptr -> int" %}
 8944   ins_encode %{

15151   effect(DEF dst, USE src);
15152   ins_cost(100);
15153   format %{ "movd    $dst,$src\t# MoveI2F" %}
15154   ins_encode %{
15155     __ movdl($dst$$XMMRegister, $src$$Register);
15156   %}
15157   ins_pipe( pipe_slow );
15158 %}
15159 
15160 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15161   match(Set dst (MoveL2D src));
15162   effect(DEF dst, USE src);
15163   ins_cost(100);
15164   format %{ "movd    $dst,$src\t# MoveL2D" %}
15165   ins_encode %{
15166      __ movdq($dst$$XMMRegister, $src$$Register);
15167   %}
15168   ins_pipe( pipe_slow );
15169 %}
15170 
15171 
15172 // Fast clearing of an array
15173 // Small non-constant lenght ClearArray for non-AVX512 targets.
15174 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15175                   Universe dummy, rFlagsReg cr)
15176 %{
15177   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15178   match(Set dummy (ClearArray (Binary cnt base) val));
15179   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15180 
15181   format %{ $$template
15182     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15183     $$emit$$"jg      LARGE\n\t"
15184     $$emit$$"dec     rcx\n\t"
15185     $$emit$$"js      DONE\t# Zero length\n\t"
15186     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15187     $$emit$$"dec     rcx\n\t"
15188     $$emit$$"jge     LOOP\n\t"
15189     $$emit$$"jmp     DONE\n\t"
15190     $$emit$$"# LARGE:\n\t"
15191     if (UseFastStosb) {
15192        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15193        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15194     } else if (UseXMMForObjInit) {
15195        $$emit$$"movdq   $tmp, $val\n\t"
15196        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15197        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15198        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15199        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15200        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15201        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15202        $$emit$$"add     0x40,rax\n\t"
15203        $$emit$$"# L_zero_64_bytes:\n\t"
15204        $$emit$$"sub     0x8,rcx\n\t"
15205        $$emit$$"jge     L_loop\n\t"
15206        $$emit$$"add     0x4,rcx\n\t"
15207        $$emit$$"jl      L_tail\n\t"
15208        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15209        $$emit$$"add     0x20,rax\n\t"
15210        $$emit$$"sub     0x4,rcx\n\t"
15211        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15212        $$emit$$"add     0x4,rcx\n\t"
15213        $$emit$$"jle     L_end\n\t"
15214        $$emit$$"dec     rcx\n\t"
15215        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15216        $$emit$$"vmovq   xmm0,(rax)\n\t"
15217        $$emit$$"add     0x8,rax\n\t"
15218        $$emit$$"dec     rcx\n\t"
15219        $$emit$$"jge     L_sloop\n\t"
15220        $$emit$$"# L_end:\n\t"
15221     } else {
15222        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15223     }
15224     $$emit$$"# DONE"
15225   %}
15226   ins_encode %{
15227     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15228                  $tmp$$XMMRegister, false, false);
15229   %}
15230   ins_pipe(pipe_slow);
15231 %}
15232 
15233 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15234                             Universe dummy, rFlagsReg cr)
15235 %{
15236   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15237   match(Set dummy (ClearArray (Binary cnt base) val));
15238   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15239 
15240   format %{ $$template
15241     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15242     $$emit$$"jg      LARGE\n\t"
15243     $$emit$$"dec     rcx\n\t"
15244     $$emit$$"js      DONE\t# Zero length\n\t"
15245     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15246     $$emit$$"dec     rcx\n\t"
15247     $$emit$$"jge     LOOP\n\t"
15248     $$emit$$"jmp     DONE\n\t"
15249     $$emit$$"# LARGE:\n\t"
15250     if (UseXMMForObjInit) {
15251        $$emit$$"movdq   $tmp, $val\n\t"
15252        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15253        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15254        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15255        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15256        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15257        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15258        $$emit$$"add     0x40,rax\n\t"
15259        $$emit$$"# L_zero_64_bytes:\n\t"
15260        $$emit$$"sub     0x8,rcx\n\t"
15261        $$emit$$"jge     L_loop\n\t"
15262        $$emit$$"add     0x4,rcx\n\t"
15263        $$emit$$"jl      L_tail\n\t"
15264        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15265        $$emit$$"add     0x20,rax\n\t"
15266        $$emit$$"sub     0x4,rcx\n\t"
15267        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15268        $$emit$$"add     0x4,rcx\n\t"
15269        $$emit$$"jle     L_end\n\t"
15270        $$emit$$"dec     rcx\n\t"
15271        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15272        $$emit$$"vmovq   xmm0,(rax)\n\t"
15273        $$emit$$"add     0x8,rax\n\t"
15274        $$emit$$"dec     rcx\n\t"
15275        $$emit$$"jge     L_sloop\n\t"
15276        $$emit$$"# L_end:\n\t"
15277     } else {
15278        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15279     }
15280     $$emit$$"# DONE"
15281   %}
15282   ins_encode %{
15283     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15284                  $tmp$$XMMRegister, false, true);
15285   %}
15286   ins_pipe(pipe_slow);
15287 %}
15288 
15289 // Small non-constant length ClearArray for AVX512 targets.
15290 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15291                        Universe dummy, rFlagsReg cr)
15292 %{
15293   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15294   match(Set dummy (ClearArray (Binary cnt base) val));
15295   ins_cost(125);
15296   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15297 
15298   format %{ $$template
15299     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15300     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15301     $$emit$$"jg      LARGE\n\t"
15302     $$emit$$"dec     rcx\n\t"
15303     $$emit$$"js      DONE\t# Zero length\n\t"
15304     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15305     $$emit$$"dec     rcx\n\t"
15306     $$emit$$"jge     LOOP\n\t"
15307     $$emit$$"jmp     DONE\n\t"
15308     $$emit$$"# LARGE:\n\t"
15309     if (UseFastStosb) {
15310        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15311        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15312     } else if (UseXMMForObjInit) {
15313        $$emit$$"mov     rdi,rax\n\t"
15314        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15315        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15316        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

15324        $$emit$$"jl      L_tail\n\t"
15325        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15326        $$emit$$"add     0x20,rax\n\t"
15327        $$emit$$"sub     0x4,rcx\n\t"
15328        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15329        $$emit$$"add     0x4,rcx\n\t"
15330        $$emit$$"jle     L_end\n\t"
15331        $$emit$$"dec     rcx\n\t"
15332        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15333        $$emit$$"vmovq   xmm0,(rax)\n\t"
15334        $$emit$$"add     0x8,rax\n\t"
15335        $$emit$$"dec     rcx\n\t"
15336        $$emit$$"jge     L_sloop\n\t"
15337        $$emit$$"# L_end:\n\t"
15338     } else {
15339        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15340     }
15341     $$emit$$"# DONE"
15342   %}
15343   ins_encode %{
15344     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15345                  $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
15346   %}
15347   ins_pipe(pipe_slow);
15348 %}
15349 
15350 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15351                                  Universe dummy, rFlagsReg cr)

15352 %{
15353   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15354   match(Set dummy (ClearArray (Binary cnt base) val));
15355   ins_cost(125);
15356   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15357 
15358   format %{ $$template
15359     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15360     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15361     $$emit$$"jg      LARGE\n\t"
15362     $$emit$$"dec     rcx\n\t"
15363     $$emit$$"js      DONE\t# Zero length\n\t"
15364     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15365     $$emit$$"dec     rcx\n\t"
15366     $$emit$$"jge     LOOP\n\t"
15367     $$emit$$"jmp     DONE\n\t"
15368     $$emit$$"# LARGE:\n\t"
15369     if (UseFastStosb) {
15370        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15371        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15372     } else if (UseXMMForObjInit) {
15373        $$emit$$"mov     rdi,rax\n\t"
15374        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15375        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15376        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

15384        $$emit$$"jl      L_tail\n\t"
15385        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15386        $$emit$$"add     0x20,rax\n\t"
15387        $$emit$$"sub     0x4,rcx\n\t"
15388        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15389        $$emit$$"add     0x4,rcx\n\t"
15390        $$emit$$"jle     L_end\n\t"
15391        $$emit$$"dec     rcx\n\t"
15392        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15393        $$emit$$"vmovq   xmm0,(rax)\n\t"
15394        $$emit$$"add     0x8,rax\n\t"
15395        $$emit$$"dec     rcx\n\t"
15396        $$emit$$"jge     L_sloop\n\t"
15397        $$emit$$"# L_end:\n\t"
15398     } else {
15399        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15400     }
15401     $$emit$$"# DONE"
15402   %}
15403   ins_encode %{
15404     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15405                  $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
15406   %}
15407   ins_pipe(pipe_slow);
15408 %}
15409 
15410 // Large non-constant length ClearArray for non-AVX512 targets.
15411 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15412                         Universe dummy, rFlagsReg cr)
15413 %{
15414   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15415   match(Set dummy (ClearArray (Binary cnt base) val));
15416   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15417 
15418   format %{ $$template
15419     if (UseFastStosb) {
15420        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15421        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15422     } else if (UseXMMForObjInit) {
15423        $$emit$$"movdq   $tmp, $val\n\t"
15424        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15425        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15426        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15427        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15428        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15429        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15430        $$emit$$"add     0x40,rax\n\t"
15431        $$emit$$"# L_zero_64_bytes:\n\t"
15432        $$emit$$"sub     0x8,rcx\n\t"
15433        $$emit$$"jge     L_loop\n\t"
15434        $$emit$$"add     0x4,rcx\n\t"
15435        $$emit$$"jl      L_tail\n\t"
15436        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15437        $$emit$$"add     0x20,rax\n\t"
15438        $$emit$$"sub     0x4,rcx\n\t"
15439        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15440        $$emit$$"add     0x4,rcx\n\t"
15441        $$emit$$"jle     L_end\n\t"
15442        $$emit$$"dec     rcx\n\t"
15443        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15444        $$emit$$"vmovq   xmm0,(rax)\n\t"
15445        $$emit$$"add     0x8,rax\n\t"
15446        $$emit$$"dec     rcx\n\t"
15447        $$emit$$"jge     L_sloop\n\t"
15448        $$emit$$"# L_end:\n\t"
15449     } else {
15450        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15451     }
15452   %}
15453   ins_encode %{
15454     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15455                  $tmp$$XMMRegister, true, false);
15456   %}
15457   ins_pipe(pipe_slow);
15458 %}
15459 
15460 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15461                                   Universe dummy, rFlagsReg cr)
15462 %{
15463   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15464   match(Set dummy (ClearArray (Binary cnt base) val));
15465   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15466 
15467   format %{ $$template
15468     if (UseXMMForObjInit) {
15469        $$emit$$"movdq   $tmp, $val\n\t"
15470        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15471        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15472        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15473        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15474        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15475        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15476        $$emit$$"add     0x40,rax\n\t"
15477        $$emit$$"# L_zero_64_bytes:\n\t"
15478        $$emit$$"sub     0x8,rcx\n\t"
15479        $$emit$$"jge     L_loop\n\t"
15480        $$emit$$"add     0x4,rcx\n\t"
15481        $$emit$$"jl      L_tail\n\t"
15482        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15483        $$emit$$"add     0x20,rax\n\t"
15484        $$emit$$"sub     0x4,rcx\n\t"
15485        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15486        $$emit$$"add     0x4,rcx\n\t"
15487        $$emit$$"jle     L_end\n\t"
15488        $$emit$$"dec     rcx\n\t"
15489        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15490        $$emit$$"vmovq   xmm0,(rax)\n\t"
15491        $$emit$$"add     0x8,rax\n\t"
15492        $$emit$$"dec     rcx\n\t"
15493        $$emit$$"jge     L_sloop\n\t"
15494        $$emit$$"# L_end:\n\t"
15495     } else {
15496        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15497     }
15498   %}
15499   ins_encode %{
15500     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15501                  $tmp$$XMMRegister, true, true);
15502   %}
15503   ins_pipe(pipe_slow);
15504 %}
15505 
15506 // Large non-constant length ClearArray for AVX512 targets.
15507 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15508                              Universe dummy, rFlagsReg cr)
15509 %{
15510   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15511   match(Set dummy (ClearArray (Binary cnt base) val));
15512   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15513 
15514   format %{ $$template
15515     if (UseFastStosb) {
15516        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15517        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15518        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15519     } else if (UseXMMForObjInit) {
15520        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15521        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15522        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15523        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15524        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15525        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15526        $$emit$$"add     0x40,rax\n\t"
15527        $$emit$$"# L_zero_64_bytes:\n\t"
15528        $$emit$$"sub     0x8,rcx\n\t"
15529        $$emit$$"jge     L_loop\n\t"
15530        $$emit$$"add     0x4,rcx\n\t"
15531        $$emit$$"jl      L_tail\n\t"
15532        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15533        $$emit$$"add     0x20,rax\n\t"
15534        $$emit$$"sub     0x4,rcx\n\t"
15535        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15536        $$emit$$"add     0x4,rcx\n\t"
15537        $$emit$$"jle     L_end\n\t"
15538        $$emit$$"dec     rcx\n\t"
15539        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15540        $$emit$$"vmovq   xmm0,(rax)\n\t"
15541        $$emit$$"add     0x8,rax\n\t"
15542        $$emit$$"dec     rcx\n\t"
15543        $$emit$$"jge     L_sloop\n\t"
15544        $$emit$$"# L_end:\n\t"
15545     } else {
15546        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15547        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15548     }
15549   %}
15550   ins_encode %{
15551     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15552                  $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
15553   %}
15554   ins_pipe(pipe_slow);
15555 %}
15556 
15557 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15558                                        Universe dummy, rFlagsReg cr)

15559 %{
15560   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15561   match(Set dummy (ClearArray (Binary cnt base) val));
15562   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15563 
15564   format %{ $$template
15565     if (UseFastStosb) {
15566        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15567        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15568        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15569     } else if (UseXMMForObjInit) {
15570        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15571        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15572        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15573        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15574        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15575        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15576        $$emit$$"add     0x40,rax\n\t"
15577        $$emit$$"# L_zero_64_bytes:\n\t"
15578        $$emit$$"sub     0x8,rcx\n\t"
15579        $$emit$$"jge     L_loop\n\t"
15580        $$emit$$"add     0x4,rcx\n\t"
15581        $$emit$$"jl      L_tail\n\t"
15582        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15583        $$emit$$"add     0x20,rax\n\t"
15584        $$emit$$"sub     0x4,rcx\n\t"
15585        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15586        $$emit$$"add     0x4,rcx\n\t"
15587        $$emit$$"jle     L_end\n\t"
15588        $$emit$$"dec     rcx\n\t"
15589        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15590        $$emit$$"vmovq   xmm0,(rax)\n\t"
15591        $$emit$$"add     0x8,rax\n\t"
15592        $$emit$$"dec     rcx\n\t"
15593        $$emit$$"jge     L_sloop\n\t"
15594        $$emit$$"# L_end:\n\t"
15595     } else {
15596        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15597        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15598     }
15599   %}
15600   ins_encode %{
15601     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15602                  $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
15603   %}
15604   ins_pipe(pipe_slow);
15605 %}
15606 
15607 // Small constant length ClearArray for AVX512 targets.
15608 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
15609 %{
15610   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
15611             ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
15612   match(Set dummy (ClearArray (Binary cnt base) val));
15613   ins_cost(100);
15614   effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
15615   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
15616   ins_encode %{
15617     __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15618   %}
15619   ins_pipe(pipe_slow);
15620 %}
15621 
15622 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15623                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15624 %{
15625   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15626   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15627   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15628 
15629   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15630   ins_encode %{
15631     __ string_compare($str1$$Register, $str2$$Register,
15632                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15633                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15634   %}
15635   ins_pipe( pipe_slow );
15636 %}
15637 

17444   effect(USE meth);
17445 
17446   ins_cost(300);
17447   format %{ "call_leaf,runtime " %}
17448   ins_encode(clear_avx, Java_To_Runtime(meth));
17449   ins_pipe(pipe_slow);
17450 %}
17451 
17452 // Call runtime without safepoint and with vector arguments
17453 instruct CallLeafDirectVector(method meth)
17454 %{
17455   match(CallLeafVector);
17456   effect(USE meth);
17457 
17458   ins_cost(300);
17459   format %{ "call_leaf,vector " %}
17460   ins_encode(Java_To_Runtime(meth));
17461   ins_pipe(pipe_slow);
17462 %}
17463 
17464 // Call runtime without safepoint
17465 // entry point is null, target holds the address to call
17466 instruct CallLeafNoFPInDirect(rRegP target)
17467 %{
17468   predicate(n->as_Call()->entry_point() == nullptr);
17469   match(CallLeafNoFP target);
17470 
17471   ins_cost(300);
17472   format %{ "call_leaf_nofp,runtime indirect " %}
17473   ins_encode %{
17474      __ call($target$$Register);
17475   %}
17476 
17477   ins_pipe(pipe_slow);
17478 %}
17479 
17480 // Call runtime without safepoint
17481 instruct CallLeafNoFPDirect(method meth)
17482 %{
17483   predicate(n->as_Call()->entry_point() != nullptr);
17484   match(CallLeafNoFP);
17485   effect(USE meth);
17486 
17487   ins_cost(300);
17488   format %{ "call_leaf_nofp,runtime " %}
17489   ins_encode(clear_avx, Java_To_Runtime(meth));
17490   ins_pipe(pipe_slow);
17491 %}
17492 
17493 // Return Instruction
17494 // Remove the return address & jump to it.
17495 // Notice: We always emit a nop after a ret to make sure there is room
17496 // for safepoint patching
17497 instruct Ret()
17498 %{
17499   match(Return);
17500 
17501   format %{ "ret" %}
17502   ins_encode %{
17503     __ ret(0);
< prev index next >