< prev index next >

src/hotspot/cpu/x86/x86.ad

Print this page

 1632 }
 1633 
 1634 // !!!!! Special hack to get all types of calls to specify the byte offset
 1635 //       from the start of the call to the point where the return address
 1636 //       will point.
 1637 int MachCallStaticJavaNode::ret_addr_offset()
 1638 {
 1639   int offset = 5; // 5 bytes from start of call to where return address points
 1640   offset += clear_avx_size();
 1641   return offset;
 1642 }
 1643 
 1644 int MachCallDynamicJavaNode::ret_addr_offset()
 1645 {
 1646   int offset = 15; // 15 bytes from start of call to where return address points
 1647   offset += clear_avx_size();
 1648   return offset;
 1649 }
 1650 
 1651 int MachCallRuntimeNode::ret_addr_offset() {




 1652   int offset = 13; // movq r10,#addr; callq (r10)
 1653   if (this->ideal_Opcode() != Op_CallLeafVector) {
 1654     offset += clear_avx_size();
 1655   }
 1656   return offset;
 1657 }

 1658 //
 1659 // Compute padding required for nodes which need alignment
 1660 //
 1661 
 1662 // The address of the call instruction needs to be 4-byte aligned to
 1663 // ensure that it does not span a cache line so that it can be patched.
 1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 1665 {
 1666   current_offset += clear_avx_size(); // skip vzeroupper
 1667   current_offset += 1; // skip call opcode byte
 1668   return align_up(current_offset, alignment_required()) - current_offset;
 1669 }
 1670 
 1671 // The address of the call instruction needs to be 4-byte aligned to
 1672 // ensure that it does not span a cache line so that it can be patched.
 1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 1674 {
 1675   current_offset += clear_avx_size(); // skip vzeroupper
 1676   current_offset += 11; // skip movq instruction + call opcode byte
 1677   return align_up(current_offset, alignment_required()) - current_offset;

 1864     st->print("\n\t");
 1865     st->print("# stack alignment check");
 1866 #endif
 1867   }
 1868   if (C->stub_function() != nullptr) {
 1869     st->print("\n\t");
 1870     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
 1871     st->print("\n\t");
 1872     st->print("je      fast_entry\t");
 1873     st->print("\n\t");
 1874     st->print("call    #nmethod_entry_barrier_stub\t");
 1875     st->print("\n\tfast_entry:");
 1876   }
 1877   st->cr();
 1878 }
 1879 #endif
 1880 
 1881 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1882   Compile* C = ra_->C;
 1883 
 1884   int framesize = C->output()->frame_size_in_bytes();
 1885   int bangsize = C->output()->bang_size_in_bytes();
 1886 
 1887   if (C->clinit_barrier_on_entry()) {
 1888     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
 1889     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
 1890 
 1891     Label L_skip_barrier;
 1892     Register klass = rscratch1;
 1893 
 1894     __ mov_metadata(klass, C->method()->holder()->constant_encoding());
 1895     __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
 1896 
 1897     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
 1898 
 1899     __ bind(L_skip_barrier);
 1900   }
 1901 
 1902   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);


 1903 
 1904   C->output()->set_frame_complete(__ offset());
 1905 
 1906   if (C->has_mach_constant_base_node()) {
 1907     // NOTE: We set the table base offset here because users might be
 1908     // emitted before MachConstantBaseNode.
 1909     ConstantTable& constant_table = C->output()->constant_table();
 1910     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1911   }
 1912 }
 1913 
 1914 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 1915 {
 1916   return MachNode::size(ra_); // too many variables; just compute it
 1917                               // the hard way
 1918 }
 1919 
 1920 int MachPrologNode::reloc() const
 1921 {
 1922   return 0; // a large enough number
 1923 }
 1924 
 1925 //=============================================================================
 1926 #ifndef PRODUCT
 1927 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1928 {
 1929   Compile* C = ra_->C;
 1930   if (generate_vzeroupper(C)) {
 1931     st->print("vzeroupper");
 1932     st->cr(); st->print("\t");
 1933   }
 1934 
 1935   int framesize = C->output()->frame_size_in_bytes();
 1936   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1937   // Remove word for return adr already pushed
 1938   // and RBP

 1946   st->print_cr("popq    rbp");
 1947   if (do_polling() && C->is_method_compilation()) {
 1948     st->print("\t");
 1949     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
 1950                  "ja      #safepoint_stub\t"
 1951                  "# Safepoint: poll for GC");
 1952   }
 1953 }
 1954 #endif
 1955 
 1956 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1957 {
 1958   Compile* C = ra_->C;
 1959 
 1960   if (generate_vzeroupper(C)) {
 1961     // Clear upper bits of YMM registers when current compiled code uses
 1962     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1963     __ vzeroupper();
 1964   }
 1965 
 1966   int framesize = C->output()->frame_size_in_bytes();
 1967   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1968   // Remove word for return adr already pushed
 1969   // and RBP
 1970   framesize -= 2*wordSize;
 1971 
 1972   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 1973 
 1974   if (framesize) {
 1975     __ addq(rsp, framesize);
 1976   }
 1977 
 1978   __ popq(rbp);
 1979 
 1980   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1981     __ reserved_stack_check();
 1982   }
 1983 
 1984   if (do_polling() && C->is_method_compilation()) {
 1985     Label dummy_label;
 1986     Label* code_stub = &dummy_label;
 1987     if (!C->output()->in_scratch_emit_size()) {
 1988       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1989       C->output()->add_stub(stub);
 1990       code_stub = &stub->entry();
 1991     }
 1992     __ relocate(relocInfo::poll_return_type);
 1993     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
 1994   }
 1995 }
 1996 
 1997 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
 1998 {
 1999   return MachNode::size(ra_); // too many variables; just compute it
 2000                               // the hard way
 2001 }
 2002 
 2003 int MachEpilogNode::reloc() const
 2004 {
 2005   return 2; // a large enough number
 2006 }
 2007 
 2008 const Pipeline* MachEpilogNode::pipeline() const
 2009 {
 2010   return MachNode::pipeline_class();
 2011 }
 2012 
 2013 //=============================================================================
 2014 
 2015 enum RC {
 2016   rc_bad,
 2017   rc_int,
 2018   rc_kreg,
 2019   rc_float,
 2020   rc_stack
 2021 };
 2022 

 2584 #endif
 2585 
 2586 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2587 {
 2588   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2589   int reg = ra_->get_encode(this);
 2590 
 2591   __ lea(as_Register(reg), Address(rsp, offset));
 2592 }
 2593 
 2594 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 2595 {
 2596   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2597   if (ra_->get_encode(this) > 15) {
 2598     return (offset < 0x80) ? 6 : 9; // REX2
 2599   } else {
 2600     return (offset < 0x80) ? 5 : 8; // REX
 2601   }
 2602 }
 2603 











































 2604 //=============================================================================
 2605 #ifndef PRODUCT
 2606 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2607 {
 2608   if (UseCompressedClassPointers) {
 2609     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2610     st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2611   } else {
 2612     st->print_cr("movq    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2613     st->print_cr("\tcmpq    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2614   }
 2615   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 2616 }
 2617 #endif
 2618 
 2619 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2620 {
 2621   __ ic_check(InteriorEntryAlignment);
 2622 }
 2623 
 2624 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 2625 {
 2626   return MachNode::size(ra_); // too many variables; just compute it
 2627                               // the hard way
 2628 }
 2629 
 2630 
 2631 //=============================================================================
 2632 
 2633 bool Matcher::supports_vector_calling_convention(void) {
 2634   return EnableVectorSupport;
 2635 }
 2636 
 2637 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
 2638   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
 2639 }
 2640 
 2641 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
 2642   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
 2643 }
 2644 
 2645 #ifdef ASSERT
 2646 static bool is_ndd_demotable(const MachNode* mdef) {
 2647   return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
 2648 }
 2649 #endif

 4590     }
 4591     __ post_call_nop();
 4592   %}
 4593 
 4594   enc_class Java_Dynamic_Call(method meth) %{
 4595     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 4596     __ post_call_nop();
 4597   %}
 4598 
 4599   enc_class call_epilog %{
 4600     if (VerifyStackAtCalls) {
 4601       // Check that stack depth is unchanged: find majik cookie on stack
 4602       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
 4603       Label L;
 4604       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
 4605       __ jccb(Assembler::equal, L);
 4606       // Die if stack mismatch
 4607       __ int3();
 4608       __ bind(L);
 4609     }

































 4610   %}
 4611 
 4612 %}
 4613 
 4614 //----------FRAME--------------------------------------------------------------
 4615 // Definition of frame structure and management information.
 4616 //
 4617 //  S T A C K   L A Y O U T    Allocators stack-slot number
 4618 //                             |   (to get allocators register number
 4619 //  G  Owned by    |        |  v    add OptoReg::stack0())
 4620 //  r   CALLER     |        |
 4621 //  o     |        +--------+      pad to even-align allocators stack-slot
 4622 //  w     V        |  pad0  |        numbers; owned by CALLER
 4623 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 4624 //  h     ^        |   in   |  5
 4625 //        |        |  args  |  4   Holes in incoming args owned by SELF
 4626 //  |     |        |        |  3
 4627 //  |     |        +--------+
 4628 //  V     |        | old out|      Empty on Intel, window on Sparc
 4629 //        |    old |preserve|      Must be even aligned.

 5768   %}
 5769 %}
 5770 
 5771 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5772 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 5773 %{
 5774   constraint(ALLOC_IN_RC(ptr_reg));
 5775   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5776   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 5777 
 5778   op_cost(10);
 5779   format %{"[$reg + $off + $idx << $scale]" %}
 5780   interface(MEMORY_INTER) %{
 5781     base($reg);
 5782     index($idx);
 5783     scale($scale);
 5784     disp($off);
 5785   %}
 5786 %}
 5787 
















 5788 // Indirect Narrow Oop Plus Offset Operand
 5789 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 5790 // we can't free r12 even with CompressedOops::base() == nullptr.
 5791 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 5792   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5793   constraint(ALLOC_IN_RC(ptr_reg));
 5794   match(AddP (DecodeN reg) off);
 5795 
 5796   op_cost(10);
 5797   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 5798   interface(MEMORY_INTER) %{
 5799     base(0xc); // R12
 5800     index($reg);
 5801     scale(0x3);
 5802     disp($off);
 5803   %}
 5804 %}
 5805 
 5806 // Indirect Memory Operand
 5807 operand indirectNarrow(rRegN reg)

 6277 %}
 6278 
 6279 // Replaces legVec during post-selection cleanup. See above.
 6280 operand legVecZ() %{
 6281   constraint(ALLOC_IN_RC(vectorz_reg_legacy));
 6282   match(VecZ);
 6283 
 6284   format %{ %}
 6285   interface(REG_INTER);
 6286 %}
 6287 
 6288 //----------OPERAND CLASSES----------------------------------------------------
 6289 // Operand Classes are groups of operands that are used as to simplify
 6290 // instruction definitions by not requiring the AD writer to specify separate
 6291 // instructions for every form of operand when the instruction accepts
 6292 // multiple operand types with the same basic encoding and format.  The classic
 6293 // case of this is memory operands.
 6294 
 6295 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 6296                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 6297                indCompressedOopOffset,
 6298                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 6299                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 6300                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 6301 
 6302 //----------PIPELINE-----------------------------------------------------------
 6303 // Rules which define the behavior of the target architectures pipeline.
 6304 pipeline %{
 6305 
 6306 //----------ATTRIBUTES---------------------------------------------------------
 6307 attributes %{
 6308   variable_size_instructions;        // Fixed size instructions
 6309   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 6310   instruction_unit_size = 1;         // An instruction is 1 bytes long
 6311   instruction_fetch_unit_size = 16;  // The processor fetches one line
 6312   instruction_fetch_units = 1;       // of 16 bytes
 6313 %}
 6314 
 6315 //----------RESOURCES----------------------------------------------------------
 6316 // Resources are the functional units available to the machine
 6317 

 8888   format %{ "MEMBAR-storestore (empty encoding)" %}
 8889   ins_encode( );
 8890   ins_pipe(empty);
 8891 %}
 8892 
 8893 //----------Move Instructions--------------------------------------------------
 8894 
 8895 instruct castX2P(rRegP dst, rRegL src)
 8896 %{
 8897   match(Set dst (CastX2P src));
 8898 
 8899   format %{ "movq    $dst, $src\t# long->ptr" %}
 8900   ins_encode %{
 8901     if ($dst$$reg != $src$$reg) {
 8902       __ movptr($dst$$Register, $src$$Register);
 8903     }
 8904   %}
 8905   ins_pipe(ialu_reg_reg); // XXX
 8906 %}
 8907 


























 8908 instruct castP2X(rRegL dst, rRegP src)
 8909 %{
 8910   match(Set dst (CastP2X src));
 8911 
 8912   format %{ "movq    $dst, $src\t# ptr -> long" %}
 8913   ins_encode %{
 8914     if ($dst$$reg != $src$$reg) {
 8915       __ movptr($dst$$Register, $src$$Register);
 8916     }
 8917   %}
 8918   ins_pipe(ialu_reg_reg); // XXX
 8919 %}
 8920 
 8921 // Convert oop into int for vectors alignment masking
 8922 instruct convP2I(rRegI dst, rRegP src)
 8923 %{
 8924   match(Set dst (ConvL2I (CastP2X src)));
 8925 
 8926   format %{ "movl    $dst, $src\t# ptr -> int" %}
 8927   ins_encode %{

15175   effect(DEF dst, USE src);
15176   ins_cost(100);
15177   format %{ "movd    $dst,$src\t# MoveI2F" %}
15178   ins_encode %{
15179     __ movdl($dst$$XMMRegister, $src$$Register);
15180   %}
15181   ins_pipe( pipe_slow );
15182 %}
15183 
15184 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15185   match(Set dst (MoveL2D src));
15186   effect(DEF dst, USE src);
15187   ins_cost(100);
15188   format %{ "movd    $dst,$src\t# MoveL2D" %}
15189   ins_encode %{
15190      __ movdq($dst$$XMMRegister, $src$$Register);
15191   %}
15192   ins_pipe( pipe_slow );
15193 %}
15194 

15195 // Fast clearing of an array
15196 // Small non-constant lenght ClearArray for non-AVX512 targets.
15197 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15198                   Universe dummy, rFlagsReg cr)
15199 %{
15200   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
15201   match(Set dummy (ClearArray cnt base));
15202   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);





















































































































15203 
15204   format %{ $$template
15205     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15206     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15207     $$emit$$"jg      LARGE\n\t"
15208     $$emit$$"dec     rcx\n\t"
15209     $$emit$$"js      DONE\t# Zero length\n\t"
15210     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15211     $$emit$$"dec     rcx\n\t"
15212     $$emit$$"jge     LOOP\n\t"
15213     $$emit$$"jmp     DONE\n\t"
15214     $$emit$$"# LARGE:\n\t"
15215     if (UseFastStosb) {
15216        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15217        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15218     } else if (UseXMMForObjInit) {
15219        $$emit$$"mov     rdi,rax\n\t"
15220        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15221        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15222        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

15230        $$emit$$"jl      L_tail\n\t"
15231        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15232        $$emit$$"add     0x20,rax\n\t"
15233        $$emit$$"sub     0x4,rcx\n\t"
15234        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15235        $$emit$$"add     0x4,rcx\n\t"
15236        $$emit$$"jle     L_end\n\t"
15237        $$emit$$"dec     rcx\n\t"
15238        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15239        $$emit$$"vmovq   xmm0,(rax)\n\t"
15240        $$emit$$"add     0x8,rax\n\t"
15241        $$emit$$"dec     rcx\n\t"
15242        $$emit$$"jge     L_sloop\n\t"
15243        $$emit$$"# L_end:\n\t"
15244     } else {
15245        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15246     }
15247     $$emit$$"# DONE"
15248   %}
15249   ins_encode %{
15250     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15251                  $tmp$$XMMRegister, false, knoreg);
15252   %}
15253   ins_pipe(pipe_slow);
15254 %}
15255 
15256 // Small non-constant length ClearArray for AVX512 targets.
15257 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15258                        Universe dummy, rFlagsReg cr)
15259 %{
15260   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15261   match(Set dummy (ClearArray cnt base));
15262   ins_cost(125);
15263   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15264 
15265   format %{ $$template
15266     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15267     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15268     $$emit$$"jg      LARGE\n\t"
15269     $$emit$$"dec     rcx\n\t"
15270     $$emit$$"js      DONE\t# Zero length\n\t"
15271     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15272     $$emit$$"dec     rcx\n\t"
15273     $$emit$$"jge     LOOP\n\t"
15274     $$emit$$"jmp     DONE\n\t"
15275     $$emit$$"# LARGE:\n\t"
15276     if (UseFastStosb) {
15277        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15278        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15279     } else if (UseXMMForObjInit) {
15280        $$emit$$"mov     rdi,rax\n\t"
15281        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15282        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15283        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

15291        $$emit$$"jl      L_tail\n\t"
15292        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15293        $$emit$$"add     0x20,rax\n\t"
15294        $$emit$$"sub     0x4,rcx\n\t"
15295        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15296        $$emit$$"add     0x4,rcx\n\t"
15297        $$emit$$"jle     L_end\n\t"
15298        $$emit$$"dec     rcx\n\t"
15299        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15300        $$emit$$"vmovq   xmm0,(rax)\n\t"
15301        $$emit$$"add     0x8,rax\n\t"
15302        $$emit$$"dec     rcx\n\t"
15303        $$emit$$"jge     L_sloop\n\t"
15304        $$emit$$"# L_end:\n\t"
15305     } else {
15306        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15307     }
15308     $$emit$$"# DONE"
15309   %}
15310   ins_encode %{
15311     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15312                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
15313   %}
15314   ins_pipe(pipe_slow);
15315 %}
15316 
15317 // Large non-constant length ClearArray for non-AVX512 targets.
15318 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15319                         Universe dummy, rFlagsReg cr)
15320 %{
15321   predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15322   match(Set dummy (ClearArray cnt base));
15323   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
































































































15324 
15325   format %{ $$template
15326     if (UseFastStosb) {
15327        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15328        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15329        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15330     } else if (UseXMMForObjInit) {
15331        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15332        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15333        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15334        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15335        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15336        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15337        $$emit$$"add     0x40,rax\n\t"
15338        $$emit$$"# L_zero_64_bytes:\n\t"
15339        $$emit$$"sub     0x8,rcx\n\t"
15340        $$emit$$"jge     L_loop\n\t"
15341        $$emit$$"add     0x4,rcx\n\t"
15342        $$emit$$"jl      L_tail\n\t"
15343        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15344        $$emit$$"add     0x20,rax\n\t"
15345        $$emit$$"sub     0x4,rcx\n\t"
15346        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15347        $$emit$$"add     0x4,rcx\n\t"
15348        $$emit$$"jle     L_end\n\t"
15349        $$emit$$"dec     rcx\n\t"
15350        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15351        $$emit$$"vmovq   xmm0,(rax)\n\t"
15352        $$emit$$"add     0x8,rax\n\t"
15353        $$emit$$"dec     rcx\n\t"
15354        $$emit$$"jge     L_sloop\n\t"
15355        $$emit$$"# L_end:\n\t"
15356     } else {
15357        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15358        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15359     }
15360   %}
15361   ins_encode %{
15362     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15363                  $tmp$$XMMRegister, true, knoreg);
15364   %}
15365   ins_pipe(pipe_slow);
15366 %}
15367 
15368 // Large non-constant length ClearArray for AVX512 targets.
15369 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15370                              Universe dummy, rFlagsReg cr)
15371 %{
15372   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15373   match(Set dummy (ClearArray cnt base));
15374   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15375 
15376   format %{ $$template
15377     if (UseFastStosb) {
15378        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15379        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15380        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15381     } else if (UseXMMForObjInit) {
15382        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15383        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15384        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15385        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15386        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15387        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15388        $$emit$$"add     0x40,rax\n\t"
15389        $$emit$$"# L_zero_64_bytes:\n\t"
15390        $$emit$$"sub     0x8,rcx\n\t"
15391        $$emit$$"jge     L_loop\n\t"
15392        $$emit$$"add     0x4,rcx\n\t"
15393        $$emit$$"jl      L_tail\n\t"
15394        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15395        $$emit$$"add     0x20,rax\n\t"
15396        $$emit$$"sub     0x4,rcx\n\t"
15397        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15398        $$emit$$"add     0x4,rcx\n\t"
15399        $$emit$$"jle     L_end\n\t"
15400        $$emit$$"dec     rcx\n\t"
15401        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15402        $$emit$$"vmovq   xmm0,(rax)\n\t"
15403        $$emit$$"add     0x8,rax\n\t"
15404        $$emit$$"dec     rcx\n\t"
15405        $$emit$$"jge     L_sloop\n\t"
15406        $$emit$$"# L_end:\n\t"
15407     } else {
15408        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15409        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15410     }
15411   %}
15412   ins_encode %{
15413     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15414                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
15415   %}
15416   ins_pipe(pipe_slow);
15417 %}
15418 
15419 // Small constant length ClearArray for AVX512 targets.
15420 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15421 %{
15422   predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15423   match(Set dummy (ClearArray cnt base));

15424   ins_cost(100);
15425   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15426   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
15427   ins_encode %{
15428    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15429   %}
15430   ins_pipe(pipe_slow);
15431 %}
15432 
15433 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15434                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15435 %{
15436   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15437   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15438   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15439 
15440   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15441   ins_encode %{
15442     __ string_compare($str1$$Register, $str2$$Register,
15443                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15444                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15445   %}
15446   ins_pipe( pipe_slow );
15447 %}
15448 

17286   effect(USE meth);
17287 
17288   ins_cost(300);
17289   format %{ "call_leaf,runtime " %}
17290   ins_encode(clear_avx, Java_To_Runtime(meth));
17291   ins_pipe(pipe_slow);
17292 %}
17293 
17294 // Call runtime without safepoint and with vector arguments
17295 instruct CallLeafDirectVector(method meth)
17296 %{
17297   match(CallLeafVector);
17298   effect(USE meth);
17299 
17300   ins_cost(300);
17301   format %{ "call_leaf,vector " %}
17302   ins_encode(Java_To_Runtime(meth));
17303   ins_pipe(pipe_slow);
17304 %}
17305 
















17306 // Call runtime without safepoint
17307 instruct CallLeafNoFPDirect(method meth)
17308 %{

17309   match(CallLeafNoFP);
17310   effect(USE meth);
17311 
17312   ins_cost(300);
17313   format %{ "call_leaf_nofp,runtime " %}
17314   ins_encode(clear_avx, Java_To_Runtime(meth));
17315   ins_pipe(pipe_slow);
17316 %}
17317 
17318 // Return Instruction
17319 // Remove the return address & jump to it.
17320 // Notice: We always emit a nop after a ret to make sure there is room
17321 // for safepoint patching
17322 instruct Ret()
17323 %{
17324   match(Return);
17325 
17326   format %{ "ret" %}
17327   ins_encode %{
17328     __ ret(0);

 1632 }
 1633 
 1634 // !!!!! Special hack to get all types of calls to specify the byte offset
 1635 //       from the start of the call to the point where the return address
 1636 //       will point.
 1637 int MachCallStaticJavaNode::ret_addr_offset()
 1638 {
 1639   int offset = 5; // 5 bytes from start of call to where return address points
 1640   offset += clear_avx_size();
 1641   return offset;
 1642 }
 1643 
 1644 int MachCallDynamicJavaNode::ret_addr_offset()
 1645 {
 1646   int offset = 15; // 15 bytes from start of call to where return address points
 1647   offset += clear_avx_size();
 1648   return offset;
 1649 }
 1650 
 1651 int MachCallRuntimeNode::ret_addr_offset() {
 1652   if (_entry_point == nullptr) {
 1653     // CallLeafNoFPInDirect
 1654     return 3; // callq (register)
 1655   }
 1656   int offset = 13; // movq r10,#addr; callq (r10)
 1657   if (this->ideal_Opcode() != Op_CallLeafVector) {
 1658     offset += clear_avx_size();
 1659   }
 1660   return offset;
 1661 }
 1662 
 1663 //
 1664 // Compute padding required for nodes which need alignment
 1665 //
 1666 
 1667 // The address of the call instruction needs to be 4-byte aligned to
 1668 // ensure that it does not span a cache line so that it can be patched.
 1669 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 1670 {
 1671   current_offset += clear_avx_size(); // skip vzeroupper
 1672   current_offset += 1; // skip call opcode byte
 1673   return align_up(current_offset, alignment_required()) - current_offset;
 1674 }
 1675 
 1676 // The address of the call instruction needs to be 4-byte aligned to
 1677 // ensure that it does not span a cache line so that it can be patched.
 1678 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 1679 {
 1680   current_offset += clear_avx_size(); // skip vzeroupper
 1681   current_offset += 11; // skip movq instruction + call opcode byte
 1682   return align_up(current_offset, alignment_required()) - current_offset;

 1869     st->print("\n\t");
 1870     st->print("# stack alignment check");
 1871 #endif
 1872   }
 1873   if (C->stub_function() != nullptr) {
 1874     st->print("\n\t");
 1875     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
 1876     st->print("\n\t");
 1877     st->print("je      fast_entry\t");
 1878     st->print("\n\t");
 1879     st->print("call    #nmethod_entry_barrier_stub\t");
 1880     st->print("\n\tfast_entry:");
 1881   }
 1882   st->cr();
 1883 }
 1884 #endif
 1885 
 1886 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1887   Compile* C = ra_->C;
 1888 
 1889   __ verified_entry(C);





 1890 
 1891   if (ra_->C->stub_function() == nullptr) {
 1892     __ entry_barrier();







 1893   }
 1894 
 1895   if (!Compile::current()->output()->in_scratch_emit_size()) {
 1896     __ bind(*_verified_entry);
 1897   }
 1898 
 1899   C->output()->set_frame_complete(__ offset());
 1900 
 1901   if (C->has_mach_constant_base_node()) {
 1902     // NOTE: We set the table base offset here because users might be
 1903     // emitted before MachConstantBaseNode.
 1904     ConstantTable& constant_table = C->output()->constant_table();
 1905     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1906   }
 1907 }
 1908 





 1909 
 1910 int MachPrologNode::reloc() const
 1911 {
 1912   return 0; // a large enough number
 1913 }
 1914 
 1915 //=============================================================================
 1916 #ifndef PRODUCT
 1917 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1918 {
 1919   Compile* C = ra_->C;
 1920   if (generate_vzeroupper(C)) {
 1921     st->print("vzeroupper");
 1922     st->cr(); st->print("\t");
 1923   }
 1924 
 1925   int framesize = C->output()->frame_size_in_bytes();
 1926   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1927   // Remove word for return adr already pushed
 1928   // and RBP

 1936   st->print_cr("popq    rbp");
 1937   if (do_polling() && C->is_method_compilation()) {
 1938     st->print("\t");
 1939     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
 1940                  "ja      #safepoint_stub\t"
 1941                  "# Safepoint: poll for GC");
 1942   }
 1943 }
 1944 #endif
 1945 
 1946 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1947 {
 1948   Compile* C = ra_->C;
 1949 
 1950   if (generate_vzeroupper(C)) {
 1951     // Clear upper bits of YMM registers when current compiled code uses
 1952     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1953     __ vzeroupper();
 1954   }
 1955 
 1956   // Subtract two words to account for return address and rbp
 1957   int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
 1958   __ remove_frame(initial_framesize, C->needs_stack_repair());










 1959 
 1960   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1961     __ reserved_stack_check();
 1962   }
 1963 
 1964   if (do_polling() && C->is_method_compilation()) {
 1965     Label dummy_label;
 1966     Label* code_stub = &dummy_label;
 1967     if (!C->output()->in_scratch_emit_size()) {
 1968       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1969       C->output()->add_stub(stub);
 1970       code_stub = &stub->entry();
 1971     }
 1972     __ relocate(relocInfo::poll_return_type);
 1973     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
 1974   }
 1975 }
 1976 






 1977 int MachEpilogNode::reloc() const
 1978 {
 1979   return 2; // a large enough number
 1980 }
 1981 
 1982 const Pipeline* MachEpilogNode::pipeline() const
 1983 {
 1984   return MachNode::pipeline_class();
 1985 }
 1986 
 1987 //=============================================================================
 1988 
 1989 enum RC {
 1990   rc_bad,
 1991   rc_int,
 1992   rc_kreg,
 1993   rc_float,
 1994   rc_stack
 1995 };
 1996 

 2558 #endif
 2559 
 2560 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2561 {
 2562   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2563   int reg = ra_->get_encode(this);
 2564 
 2565   __ lea(as_Register(reg), Address(rsp, offset));
 2566 }
 2567 
 2568 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 2569 {
 2570   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2571   if (ra_->get_encode(this) > 15) {
 2572     return (offset < 0x80) ? 6 : 9; // REX2
 2573   } else {
 2574     return (offset < 0x80) ? 5 : 8; // REX
 2575   }
 2576 }
 2577 
 2578 //=============================================================================
 2579 #ifndef PRODUCT
 2580 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2581 {
 2582   st->print_cr("MachVEPNode");
 2583 }
 2584 #endif
 2585 
 2586 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2587 {
 2588   CodeBuffer* cbuf = masm->code();
 2589   uint insts_size = cbuf->insts_size();
 2590   if (!_verified) {
 2591     __ ic_check(1);
 2592   } else {
 2593     // TODO 8284443 Avoid creation of temporary frame
 2594     if (ra_->C->stub_function() == nullptr) {
 2595       __ verified_entry(ra_->C, 0);
 2596       __ entry_barrier();
 2597       int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
 2598       __ remove_frame(initial_framesize, false);
 2599     }
 2600     // Unpack inline type args passed as oop and then jump to
 2601     // the verified entry point (skipping the unverified entry).
 2602     int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
 2603     // Emit code for verified entry and save increment for stack repair on return
 2604     __ verified_entry(ra_->C, sp_inc);
 2605     if (Compile::current()->output()->in_scratch_emit_size()) {
 2606       Label dummy_verified_entry;
 2607       __ jmp(dummy_verified_entry);
 2608     } else {
 2609       __ jmp(*_verified_entry);
 2610     }
 2611   }
 2612   /* WARNING these NOPs are critical so that verified entry point is properly
 2613      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
 2614   int nops_cnt = 4 - ((cbuf->insts_size() - insts_size) & 0x3);
 2615   nops_cnt &= 0x3; // Do not add nops if code is aligned.
 2616   if (nops_cnt > 0) {
 2617     __ nop(nops_cnt);
 2618   }
 2619 }
 2620 
 2621 //=============================================================================
 2622 #ifndef PRODUCT
 2623 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2624 {
 2625   if (UseCompressedClassPointers) {
 2626     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2627     st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2628   } else {
 2629     st->print_cr("movq    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2630     st->print_cr("\tcmpq    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2631   }
 2632   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 2633 }
 2634 #endif
 2635 
 2636 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2637 {
 2638   __ ic_check(InteriorEntryAlignment);
 2639 }
 2640 






 2641 
 2642 //=============================================================================
 2643 
 2644 bool Matcher::supports_vector_calling_convention(void) {
 2645   return EnableVectorSupport;
 2646 }
 2647 
 2648 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
 2649   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
 2650 }
 2651 
 2652 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
 2653   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
 2654 }
 2655 
 2656 #ifdef ASSERT
 2657 static bool is_ndd_demotable(const MachNode* mdef) {
 2658   return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
 2659 }
 2660 #endif

 4601     }
 4602     __ post_call_nop();
 4603   %}
 4604 
 4605   enc_class Java_Dynamic_Call(method meth) %{
 4606     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 4607     __ post_call_nop();
 4608   %}
 4609 
 4610   enc_class call_epilog %{
 4611     if (VerifyStackAtCalls) {
 4612       // Check that stack depth is unchanged: find majik cookie on stack
 4613       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
 4614       Label L;
 4615       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
 4616       __ jccb(Assembler::equal, L);
 4617       // Die if stack mismatch
 4618       __ int3();
 4619       __ bind(L);
 4620     }
 4621     if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) {
 4622       // The last return value is not set by the callee but used to pass the null marker to compiled code.
 4623       // Search for the corresponding projection, get the register and emit code that initialized it.
 4624       uint con = (tf()->range_cc()->cnt() - 1);
 4625       for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
 4626         ProjNode* proj = fast_out(i)->as_Proj();
 4627         if (proj->_con == con) {
 4628           // Set null marker if rax is non-null (a non-null value is returned buffered or scalarized)
 4629           OptoReg::Name optoReg = ra_->get_reg_first(proj);
 4630           VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
 4631           Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
 4632           __ testq(rax, rax);
 4633           __ setb(Assembler::notZero, toReg);
 4634           __ movzbl(toReg, toReg);
 4635           if (reg->is_stack()) {
 4636             int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
 4637             __ movq(Address(rsp, st_off), toReg);
 4638           }
 4639           break;
 4640         }
 4641       }
 4642       if (return_value_is_used()) {
 4643         // An inline type is returned as fields in multiple registers.
 4644         // Rax either contains an oop if the inline type is buffered or a pointer
 4645         // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax
 4646         // if the lowest bit is set to allow C2 to use the oop after null checking.
 4647         // rax &= (rax & 1) - 1
 4648         __ movptr(rscratch1, rax);
 4649         __ andptr(rscratch1, 0x1);
 4650         __ subptr(rscratch1, 0x1);
 4651         __ andptr(rax, rscratch1);
 4652       }
 4653     }
 4654   %}
 4655 
 4656 %}
 4657 
 4658 //----------FRAME--------------------------------------------------------------
 4659 // Definition of frame structure and management information.
 4660 //
 4661 //  S T A C K   L A Y O U T    Allocators stack-slot number
 4662 //                             |   (to get allocators register number
 4663 //  G  Owned by    |        |  v    add OptoReg::stack0())
 4664 //  r   CALLER     |        |
 4665 //  o     |        +--------+      pad to even-align allocators stack-slot
 4666 //  w     V        |  pad0  |        numbers; owned by CALLER
 4667 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 4668 //  h     ^        |   in   |  5
 4669 //        |        |  args  |  4   Holes in incoming args owned by SELF
 4670 //  |     |        |        |  3
 4671 //  |     |        +--------+
 4672 //  V     |        | old out|      Empty on Intel, window on Sparc
 4673 //        |    old |preserve|      Must be even aligned.

 5812   %}
 5813 %}
 5814 
 5815 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5816 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 5817 %{
 5818   constraint(ALLOC_IN_RC(ptr_reg));
 5819   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5820   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 5821 
 5822   op_cost(10);
 5823   format %{"[$reg + $off + $idx << $scale]" %}
 5824   interface(MEMORY_INTER) %{
 5825     base($reg);
 5826     index($idx);
 5827     scale($scale);
 5828     disp($off);
 5829   %}
 5830 %}
 5831 
 5832 // Indirect Narrow Oop Operand
 5833 operand indCompressedOop(rRegN reg) %{
 5834   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5835   constraint(ALLOC_IN_RC(ptr_reg));
 5836   match(DecodeN reg);
 5837 
 5838   op_cost(10);
 5839   format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
 5840   interface(MEMORY_INTER) %{
 5841     base(0xc); // R12
 5842     index($reg);
 5843     scale(0x3);
 5844     disp(0x0);
 5845   %}
 5846 %}
 5847 
 5848 // Indirect Narrow Oop Plus Offset Operand
 5849 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 5850 // we can't free r12 even with CompressedOops::base() == nullptr.
 5851 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 5852   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5853   constraint(ALLOC_IN_RC(ptr_reg));
 5854   match(AddP (DecodeN reg) off);
 5855 
 5856   op_cost(10);
 5857   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 5858   interface(MEMORY_INTER) %{
 5859     base(0xc); // R12
 5860     index($reg);
 5861     scale(0x3);
 5862     disp($off);
 5863   %}
 5864 %}
 5865 
 5866 // Indirect Memory Operand
 5867 operand indirectNarrow(rRegN reg)

 6337 %}
 6338 
 6339 // Replaces legVec during post-selection cleanup. See above.
 6340 operand legVecZ() %{
 6341   constraint(ALLOC_IN_RC(vectorz_reg_legacy));
 6342   match(VecZ);
 6343 
 6344   format %{ %}
 6345   interface(REG_INTER);
 6346 %}
 6347 
 6348 //----------OPERAND CLASSES----------------------------------------------------
 6349 // Operand Classes are groups of operands that are used as to simplify
 6350 // instruction definitions by not requiring the AD writer to specify separate
 6351 // instructions for every form of operand when the instruction accepts
 6352 // multiple operand types with the same basic encoding and format.  The classic
 6353 // case of this is memory operands.
 6354 
 6355 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 6356                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 6357                indCompressedOop, indCompressedOopOffset,
 6358                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 6359                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 6360                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 6361 
 6362 //----------PIPELINE-----------------------------------------------------------
 6363 // Rules which define the behavior of the target architectures pipeline.
 6364 pipeline %{
 6365 
 6366 //----------ATTRIBUTES---------------------------------------------------------
 6367 attributes %{
 6368   variable_size_instructions;        // Fixed size instructions
 6369   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 6370   instruction_unit_size = 1;         // An instruction is 1 bytes long
 6371   instruction_fetch_unit_size = 16;  // The processor fetches one line
 6372   instruction_fetch_units = 1;       // of 16 bytes
 6373 %}
 6374 
 6375 //----------RESOURCES----------------------------------------------------------
 6376 // Resources are the functional units available to the machine
 6377 

 8948   format %{ "MEMBAR-storestore (empty encoding)" %}
 8949   ins_encode( );
 8950   ins_pipe(empty);
 8951 %}
 8952 
 8953 //----------Move Instructions--------------------------------------------------
 8954 
 8955 instruct castX2P(rRegP dst, rRegL src)
 8956 %{
 8957   match(Set dst (CastX2P src));
 8958 
 8959   format %{ "movq    $dst, $src\t# long->ptr" %}
 8960   ins_encode %{
 8961     if ($dst$$reg != $src$$reg) {
 8962       __ movptr($dst$$Register, $src$$Register);
 8963     }
 8964   %}
 8965   ins_pipe(ialu_reg_reg); // XXX
 8966 %}
 8967 
 8968 instruct castI2N(rRegN dst, rRegI src)
 8969 %{
 8970   match(Set dst (CastI2N src));
 8971 
 8972   format %{ "movq    $dst, $src\t# int -> narrow ptr" %}
 8973   ins_encode %{
 8974     if ($dst$$reg != $src$$reg) {
 8975       __ movl($dst$$Register, $src$$Register);
 8976     }
 8977   %}
 8978   ins_pipe(ialu_reg_reg); // XXX
 8979 %}
 8980 
 8981 instruct castN2X(rRegL dst, rRegN src)
 8982 %{
 8983   match(Set dst (CastP2X src));
 8984 
 8985   format %{ "movq    $dst, $src\t# ptr -> long" %}
 8986   ins_encode %{
 8987     if ($dst$$reg != $src$$reg) {
 8988       __ movptr($dst$$Register, $src$$Register);
 8989     }
 8990   %}
 8991   ins_pipe(ialu_reg_reg); // XXX
 8992 %}
 8993 
 8994 instruct castP2X(rRegL dst, rRegP src)
 8995 %{
 8996   match(Set dst (CastP2X src));
 8997 
 8998   format %{ "movq    $dst, $src\t# ptr -> long" %}
 8999   ins_encode %{
 9000     if ($dst$$reg != $src$$reg) {
 9001       __ movptr($dst$$Register, $src$$Register);
 9002     }
 9003   %}
 9004   ins_pipe(ialu_reg_reg); // XXX
 9005 %}
 9006 
 9007 // Convert oop into int for vectors alignment masking
 9008 instruct convP2I(rRegI dst, rRegP src)
 9009 %{
 9010   match(Set dst (ConvL2I (CastP2X src)));
 9011 
 9012   format %{ "movl    $dst, $src\t# ptr -> int" %}
 9013   ins_encode %{

15261   effect(DEF dst, USE src);
15262   ins_cost(100);
15263   format %{ "movd    $dst,$src\t# MoveI2F" %}
15264   ins_encode %{
15265     __ movdl($dst$$XMMRegister, $src$$Register);
15266   %}
15267   ins_pipe( pipe_slow );
15268 %}
15269 
15270 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15271   match(Set dst (MoveL2D src));
15272   effect(DEF dst, USE src);
15273   ins_cost(100);
15274   format %{ "movd    $dst,$src\t# MoveL2D" %}
15275   ins_encode %{
15276      __ movdq($dst$$XMMRegister, $src$$Register);
15277   %}
15278   ins_pipe( pipe_slow );
15279 %}
15280 
15281 
15282 // Fast clearing of an array
15283 // Small non-constant lenght ClearArray for non-AVX512 targets.
15284 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15285                   Universe dummy, rFlagsReg cr)
15286 %{
15287   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15288   match(Set dummy (ClearArray (Binary cnt base) val));
15289   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15290 
15291   format %{ $$template
15292     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15293     $$emit$$"jg      LARGE\n\t"
15294     $$emit$$"dec     rcx\n\t"
15295     $$emit$$"js      DONE\t# Zero length\n\t"
15296     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15297     $$emit$$"dec     rcx\n\t"
15298     $$emit$$"jge     LOOP\n\t"
15299     $$emit$$"jmp     DONE\n\t"
15300     $$emit$$"# LARGE:\n\t"
15301     if (UseFastStosb) {
15302        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15303        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15304     } else if (UseXMMForObjInit) {
15305        $$emit$$"movdq   $tmp, $val\n\t"
15306        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15307        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15308        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15309        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15310        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15311        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15312        $$emit$$"add     0x40,rax\n\t"
15313        $$emit$$"# L_zero_64_bytes:\n\t"
15314        $$emit$$"sub     0x8,rcx\n\t"
15315        $$emit$$"jge     L_loop\n\t"
15316        $$emit$$"add     0x4,rcx\n\t"
15317        $$emit$$"jl      L_tail\n\t"
15318        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15319        $$emit$$"add     0x20,rax\n\t"
15320        $$emit$$"sub     0x4,rcx\n\t"
15321        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15322        $$emit$$"add     0x4,rcx\n\t"
15323        $$emit$$"jle     L_end\n\t"
15324        $$emit$$"dec     rcx\n\t"
15325        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15326        $$emit$$"vmovq   xmm0,(rax)\n\t"
15327        $$emit$$"add     0x8,rax\n\t"
15328        $$emit$$"dec     rcx\n\t"
15329        $$emit$$"jge     L_sloop\n\t"
15330        $$emit$$"# L_end:\n\t"
15331     } else {
15332        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15333     }
15334     $$emit$$"# DONE"
15335   %}
15336   ins_encode %{
15337     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15338                  $tmp$$XMMRegister, false, false);
15339   %}
15340   ins_pipe(pipe_slow);
15341 %}
15342 
15343 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15344                             Universe dummy, rFlagsReg cr)
15345 %{
15346   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15347   match(Set dummy (ClearArray (Binary cnt base) val));
15348   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15349 
15350   format %{ $$template
15351     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15352     $$emit$$"jg      LARGE\n\t"
15353     $$emit$$"dec     rcx\n\t"
15354     $$emit$$"js      DONE\t# Zero length\n\t"
15355     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15356     $$emit$$"dec     rcx\n\t"
15357     $$emit$$"jge     LOOP\n\t"
15358     $$emit$$"jmp     DONE\n\t"
15359     $$emit$$"# LARGE:\n\t"
15360     if (UseXMMForObjInit) {
15361        $$emit$$"movdq   $tmp, $val\n\t"
15362        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15363        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15364        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15365        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15366        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15367        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15368        $$emit$$"add     0x40,rax\n\t"
15369        $$emit$$"# L_zero_64_bytes:\n\t"
15370        $$emit$$"sub     0x8,rcx\n\t"
15371        $$emit$$"jge     L_loop\n\t"
15372        $$emit$$"add     0x4,rcx\n\t"
15373        $$emit$$"jl      L_tail\n\t"
15374        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15375        $$emit$$"add     0x20,rax\n\t"
15376        $$emit$$"sub     0x4,rcx\n\t"
15377        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15378        $$emit$$"add     0x4,rcx\n\t"
15379        $$emit$$"jle     L_end\n\t"
15380        $$emit$$"dec     rcx\n\t"
15381        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15382        $$emit$$"vmovq   xmm0,(rax)\n\t"
15383        $$emit$$"add     0x8,rax\n\t"
15384        $$emit$$"dec     rcx\n\t"
15385        $$emit$$"jge     L_sloop\n\t"
15386        $$emit$$"# L_end:\n\t"
15387     } else {
15388        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15389     }
15390     $$emit$$"# DONE"
15391   %}
15392   ins_encode %{
15393     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15394                  $tmp$$XMMRegister, false, true);
15395   %}
15396   ins_pipe(pipe_slow);
15397 %}
15398 
15399 // Small non-constant length ClearArray for AVX512 targets.
15400 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15401                        Universe dummy, rFlagsReg cr)
15402 %{
15403   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15404   match(Set dummy (ClearArray (Binary cnt base) val));
15405   ins_cost(125);
15406   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15407 
15408   format %{ $$template
15409     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15410     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15411     $$emit$$"jg      LARGE\n\t"
15412     $$emit$$"dec     rcx\n\t"
15413     $$emit$$"js      DONE\t# Zero length\n\t"
15414     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15415     $$emit$$"dec     rcx\n\t"
15416     $$emit$$"jge     LOOP\n\t"
15417     $$emit$$"jmp     DONE\n\t"
15418     $$emit$$"# LARGE:\n\t"
15419     if (UseFastStosb) {
15420        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15421        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15422     } else if (UseXMMForObjInit) {
15423        $$emit$$"mov     rdi,rax\n\t"
15424        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15425        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15426        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

15434        $$emit$$"jl      L_tail\n\t"
15435        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15436        $$emit$$"add     0x20,rax\n\t"
15437        $$emit$$"sub     0x4,rcx\n\t"
15438        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15439        $$emit$$"add     0x4,rcx\n\t"
15440        $$emit$$"jle     L_end\n\t"
15441        $$emit$$"dec     rcx\n\t"
15442        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15443        $$emit$$"vmovq   xmm0,(rax)\n\t"
15444        $$emit$$"add     0x8,rax\n\t"
15445        $$emit$$"dec     rcx\n\t"
15446        $$emit$$"jge     L_sloop\n\t"
15447        $$emit$$"# L_end:\n\t"
15448     } else {
15449        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15450     }
15451     $$emit$$"# DONE"
15452   %}
15453   ins_encode %{
15454     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15455                  $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
15456   %}
15457   ins_pipe(pipe_slow);
15458 %}
15459 
15460 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15461                                  Universe dummy, rFlagsReg cr)

15462 %{
15463   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15464   match(Set dummy (ClearArray (Binary cnt base) val));
15465   ins_cost(125);
15466   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15467 
15468   format %{ $$template
15469     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15470     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15471     $$emit$$"jg      LARGE\n\t"
15472     $$emit$$"dec     rcx\n\t"
15473     $$emit$$"js      DONE\t# Zero length\n\t"
15474     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15475     $$emit$$"dec     rcx\n\t"
15476     $$emit$$"jge     LOOP\n\t"
15477     $$emit$$"jmp     DONE\n\t"
15478     $$emit$$"# LARGE:\n\t"
15479     if (UseFastStosb) {
15480        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15481        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15482     } else if (UseXMMForObjInit) {
15483        $$emit$$"mov     rdi,rax\n\t"
15484        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15485        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15486        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

15494        $$emit$$"jl      L_tail\n\t"
15495        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15496        $$emit$$"add     0x20,rax\n\t"
15497        $$emit$$"sub     0x4,rcx\n\t"
15498        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15499        $$emit$$"add     0x4,rcx\n\t"
15500        $$emit$$"jle     L_end\n\t"
15501        $$emit$$"dec     rcx\n\t"
15502        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15503        $$emit$$"vmovq   xmm0,(rax)\n\t"
15504        $$emit$$"add     0x8,rax\n\t"
15505        $$emit$$"dec     rcx\n\t"
15506        $$emit$$"jge     L_sloop\n\t"
15507        $$emit$$"# L_end:\n\t"
15508     } else {
15509        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15510     }
15511     $$emit$$"# DONE"
15512   %}
15513   ins_encode %{
15514     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15515                  $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
15516   %}
15517   ins_pipe(pipe_slow);
15518 %}
15519 
15520 // Large non-constant length ClearArray for non-AVX512 targets.
15521 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15522                         Universe dummy, rFlagsReg cr)
15523 %{
15524   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15525   match(Set dummy (ClearArray (Binary cnt base) val));
15526   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15527 
15528   format %{ $$template
15529     if (UseFastStosb) {
15530        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15531        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15532     } else if (UseXMMForObjInit) {
15533        $$emit$$"movdq   $tmp, $val\n\t"
15534        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15535        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15536        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15537        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15538        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15539        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15540        $$emit$$"add     0x40,rax\n\t"
15541        $$emit$$"# L_zero_64_bytes:\n\t"
15542        $$emit$$"sub     0x8,rcx\n\t"
15543        $$emit$$"jge     L_loop\n\t"
15544        $$emit$$"add     0x4,rcx\n\t"
15545        $$emit$$"jl      L_tail\n\t"
15546        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15547        $$emit$$"add     0x20,rax\n\t"
15548        $$emit$$"sub     0x4,rcx\n\t"
15549        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15550        $$emit$$"add     0x4,rcx\n\t"
15551        $$emit$$"jle     L_end\n\t"
15552        $$emit$$"dec     rcx\n\t"
15553        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15554        $$emit$$"vmovq   xmm0,(rax)\n\t"
15555        $$emit$$"add     0x8,rax\n\t"
15556        $$emit$$"dec     rcx\n\t"
15557        $$emit$$"jge     L_sloop\n\t"
15558        $$emit$$"# L_end:\n\t"
15559     } else {
15560        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15561     }
15562   %}
15563   ins_encode %{
15564     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15565                  $tmp$$XMMRegister, true, false);
15566   %}
15567   ins_pipe(pipe_slow);
15568 %}
15569 
15570 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15571                                   Universe dummy, rFlagsReg cr)
15572 %{
15573   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15574   match(Set dummy (ClearArray (Binary cnt base) val));
15575   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15576 
15577   format %{ $$template
15578     if (UseXMMForObjInit) {
15579        $$emit$$"movdq   $tmp, $val\n\t"
15580        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15581        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15582        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15583        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15584        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15585        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15586        $$emit$$"add     0x40,rax\n\t"
15587        $$emit$$"# L_zero_64_bytes:\n\t"
15588        $$emit$$"sub     0x8,rcx\n\t"
15589        $$emit$$"jge     L_loop\n\t"
15590        $$emit$$"add     0x4,rcx\n\t"
15591        $$emit$$"jl      L_tail\n\t"
15592        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15593        $$emit$$"add     0x20,rax\n\t"
15594        $$emit$$"sub     0x4,rcx\n\t"
15595        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15596        $$emit$$"add     0x4,rcx\n\t"
15597        $$emit$$"jle     L_end\n\t"
15598        $$emit$$"dec     rcx\n\t"
15599        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15600        $$emit$$"vmovq   xmm0,(rax)\n\t"
15601        $$emit$$"add     0x8,rax\n\t"
15602        $$emit$$"dec     rcx\n\t"
15603        $$emit$$"jge     L_sloop\n\t"
15604        $$emit$$"# L_end:\n\t"
15605     } else {
15606        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15607     }
15608   %}
15609   ins_encode %{
15610     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15611                  $tmp$$XMMRegister, true, true);
15612   %}
15613   ins_pipe(pipe_slow);
15614 %}
15615 
15616 // Large non-constant length ClearArray for AVX512 targets.
15617 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15618                              Universe dummy, rFlagsReg cr)
15619 %{
15620   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15621   match(Set dummy (ClearArray (Binary cnt base) val));
15622   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15623 
15624   format %{ $$template
15625     if (UseFastStosb) {
15626        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15627        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15628        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15629     } else if (UseXMMForObjInit) {
15630        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15631        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15632        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15633        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15634        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15635        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15636        $$emit$$"add     0x40,rax\n\t"
15637        $$emit$$"# L_zero_64_bytes:\n\t"
15638        $$emit$$"sub     0x8,rcx\n\t"
15639        $$emit$$"jge     L_loop\n\t"
15640        $$emit$$"add     0x4,rcx\n\t"
15641        $$emit$$"jl      L_tail\n\t"
15642        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15643        $$emit$$"add     0x20,rax\n\t"
15644        $$emit$$"sub     0x4,rcx\n\t"
15645        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15646        $$emit$$"add     0x4,rcx\n\t"
15647        $$emit$$"jle     L_end\n\t"
15648        $$emit$$"dec     rcx\n\t"
15649        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15650        $$emit$$"vmovq   xmm0,(rax)\n\t"
15651        $$emit$$"add     0x8,rax\n\t"
15652        $$emit$$"dec     rcx\n\t"
15653        $$emit$$"jge     L_sloop\n\t"
15654        $$emit$$"# L_end:\n\t"
15655     } else {
15656        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15657        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15658     }
15659   %}
15660   ins_encode %{
15661     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15662                  $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
15663   %}
15664   ins_pipe(pipe_slow);
15665 %}
15666 
15667 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15668                                        Universe dummy, rFlagsReg cr)

15669 %{
15670   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15671   match(Set dummy (ClearArray (Binary cnt base) val));
15672   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15673 
15674   format %{ $$template
15675     if (UseFastStosb) {
15676        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15677        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15678        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15679     } else if (UseXMMForObjInit) {
15680        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15681        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15682        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15683        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15684        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15685        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15686        $$emit$$"add     0x40,rax\n\t"
15687        $$emit$$"# L_zero_64_bytes:\n\t"
15688        $$emit$$"sub     0x8,rcx\n\t"
15689        $$emit$$"jge     L_loop\n\t"
15690        $$emit$$"add     0x4,rcx\n\t"
15691        $$emit$$"jl      L_tail\n\t"
15692        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15693        $$emit$$"add     0x20,rax\n\t"
15694        $$emit$$"sub     0x4,rcx\n\t"
15695        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15696        $$emit$$"add     0x4,rcx\n\t"
15697        $$emit$$"jle     L_end\n\t"
15698        $$emit$$"dec     rcx\n\t"
15699        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15700        $$emit$$"vmovq   xmm0,(rax)\n\t"
15701        $$emit$$"add     0x8,rax\n\t"
15702        $$emit$$"dec     rcx\n\t"
15703        $$emit$$"jge     L_sloop\n\t"
15704        $$emit$$"# L_end:\n\t"
15705     } else {
15706        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15707        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15708     }
15709   %}
15710   ins_encode %{
15711     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15712                  $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
15713   %}
15714   ins_pipe(pipe_slow);
15715 %}
15716 
15717 // Small constant length ClearArray for AVX512 targets.
15718 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
15719 %{
15720   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
15721             ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
15722   match(Set dummy (ClearArray (Binary cnt base) val));
15723   ins_cost(100);
15724   effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
15725   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
15726   ins_encode %{
15727     __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15728   %}
15729   ins_pipe(pipe_slow);
15730 %}
15731 
15732 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15733                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15734 %{
15735   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15736   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15737   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15738 
15739   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15740   ins_encode %{
15741     __ string_compare($str1$$Register, $str2$$Register,
15742                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15743                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15744   %}
15745   ins_pipe( pipe_slow );
15746 %}
15747 

17585   effect(USE meth);
17586 
17587   ins_cost(300);
17588   format %{ "call_leaf,runtime " %}
17589   ins_encode(clear_avx, Java_To_Runtime(meth));
17590   ins_pipe(pipe_slow);
17591 %}
17592 
17593 // Call runtime without safepoint and with vector arguments
17594 instruct CallLeafDirectVector(method meth)
17595 %{
17596   match(CallLeafVector);
17597   effect(USE meth);
17598 
17599   ins_cost(300);
17600   format %{ "call_leaf,vector " %}
17601   ins_encode(Java_To_Runtime(meth));
17602   ins_pipe(pipe_slow);
17603 %}
17604 
17605 // Call runtime without safepoint
17606 // entry point is null, target holds the address to call
17607 instruct CallLeafNoFPInDirect(rRegP target)
17608 %{
17609   predicate(n->as_Call()->entry_point() == nullptr);
17610   match(CallLeafNoFP target);
17611 
17612   ins_cost(300);
17613   format %{ "call_leaf_nofp,runtime indirect " %}
17614   ins_encode %{
17615      __ call($target$$Register);
17616   %}
17617 
17618   ins_pipe(pipe_slow);
17619 %}
17620 
17621 // Call runtime without safepoint
17622 instruct CallLeafNoFPDirect(method meth)
17623 %{
17624   predicate(n->as_Call()->entry_point() != nullptr);
17625   match(CallLeafNoFP);
17626   effect(USE meth);
17627 
17628   ins_cost(300);
17629   format %{ "call_leaf_nofp,runtime " %}
17630   ins_encode(clear_avx, Java_To_Runtime(meth));
17631   ins_pipe(pipe_slow);
17632 %}
17633 
17634 // Return Instruction
17635 // Remove the return address & jump to it.
17636 // Notice: We always emit a nop after a ret to make sure there is room
17637 // for safepoint patching
17638 instruct Ret()
17639 %{
17640   match(Return);
17641 
17642   format %{ "ret" %}
17643   ins_encode %{
17644     __ ret(0);
< prev index next >