< prev index next >

src/hotspot/cpu/x86/x86.ad

Print this page

 1632 }
 1633 
 1634 // !!!!! Special hack to get all types of calls to specify the byte offset
 1635 //       from the start of the call to the point where the return address
 1636 //       will point.
 1637 int MachCallStaticJavaNode::ret_addr_offset()
 1638 {
 1639   int offset = 5; // 5 bytes from start of call to where return address points
 1640   offset += clear_avx_size();
 1641   return offset;
 1642 }
 1643 
 1644 int MachCallDynamicJavaNode::ret_addr_offset()
 1645 {
 1646   int offset = 15; // 15 bytes from start of call to where return address points
 1647   offset += clear_avx_size();
 1648   return offset;
 1649 }
 1650 
 1651 int MachCallRuntimeNode::ret_addr_offset() {




 1652   int offset = 13; // movq r10,#addr; callq (r10)
 1653   if (this->ideal_Opcode() != Op_CallLeafVector) {
 1654     offset += clear_avx_size();
 1655   }
 1656   return offset;
 1657 }

 1658 //
 1659 // Compute padding required for nodes which need alignment
 1660 //
 1661 
 1662 // The address of the call instruction needs to be 4-byte aligned to
 1663 // ensure that it does not span a cache line so that it can be patched.
 1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 1665 {
 1666   current_offset += clear_avx_size(); // skip vzeroupper
 1667   current_offset += 1; // skip call opcode byte
 1668   return align_up(current_offset, alignment_required()) - current_offset;
 1669 }
 1670 
 1671 // The address of the call instruction needs to be 4-byte aligned to
 1672 // ensure that it does not span a cache line so that it can be patched.
 1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 1674 {
 1675   current_offset += clear_avx_size(); // skip vzeroupper
 1676   current_offset += 11; // skip movq instruction + call opcode byte
 1677   return align_up(current_offset, alignment_required()) - current_offset;

 1864     st->print("\n\t");
 1865     st->print("# stack alignment check");
 1866 #endif
 1867   }
 1868   if (C->stub_function() != nullptr) {
 1869     st->print("\n\t");
 1870     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
 1871     st->print("\n\t");
 1872     st->print("je      fast_entry\t");
 1873     st->print("\n\t");
 1874     st->print("call    #nmethod_entry_barrier_stub\t");
 1875     st->print("\n\tfast_entry:");
 1876   }
 1877   st->cr();
 1878 }
 1879 #endif
 1880 
 1881 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1882   Compile* C = ra_->C;
 1883 
 1884   int framesize = C->output()->frame_size_in_bytes();
 1885   int bangsize = C->output()->bang_size_in_bytes();
 1886 
 1887   if (C->clinit_barrier_on_entry()) {
 1888     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
 1889     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
 1890 
 1891     Label L_skip_barrier;
 1892     Register klass = rscratch1;
 1893 
 1894     __ mov_metadata(klass, C->method()->holder()->constant_encoding());
 1895     __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
 1896 
 1897     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
 1898 
 1899     __ bind(L_skip_barrier);
 1900   }
 1901 
 1902   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);


 1903 
 1904   C->output()->set_frame_complete(__ offset());
 1905 
 1906   if (C->has_mach_constant_base_node()) {
 1907     // NOTE: We set the table base offset here because users might be
 1908     // emitted before MachConstantBaseNode.
 1909     ConstantTable& constant_table = C->output()->constant_table();
 1910     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1911   }
 1912 }
 1913 
 1914 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 1915 {
 1916   return MachNode::size(ra_); // too many variables; just compute it
 1917                               // the hard way
 1918 }
 1919 
 1920 int MachPrologNode::reloc() const
 1921 {
 1922   return 0; // a large enough number
 1923 }
 1924 
 1925 //=============================================================================
 1926 #ifndef PRODUCT
 1927 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1928 {
 1929   Compile* C = ra_->C;
 1930   if (generate_vzeroupper(C)) {
 1931     st->print("vzeroupper");
 1932     st->cr(); st->print("\t");
 1933   }
 1934 
 1935   int framesize = C->output()->frame_size_in_bytes();
 1936   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1937   // Remove word for return adr already pushed
 1938   // and RBP

 1946   st->print_cr("popq    rbp");
 1947   if (do_polling() && C->is_method_compilation()) {
 1948     st->print("\t");
 1949     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
 1950                  "ja      #safepoint_stub\t"
 1951                  "# Safepoint: poll for GC");
 1952   }
 1953 }
 1954 #endif
 1955 
 1956 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1957 {
 1958   Compile* C = ra_->C;
 1959 
 1960   if (generate_vzeroupper(C)) {
 1961     // Clear upper bits of YMM registers when current compiled code uses
 1962     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1963     __ vzeroupper();
 1964   }
 1965 
 1966   int framesize = C->output()->frame_size_in_bytes();
 1967   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1968   // Remove word for return adr already pushed
 1969   // and RBP
 1970   framesize -= 2*wordSize;
 1971 
 1972   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 1973 
 1974   if (framesize) {
 1975     __ addq(rsp, framesize);
 1976   }
 1977 
 1978   __ popq(rbp);
 1979 
 1980   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1981     __ reserved_stack_check();
 1982   }
 1983 
 1984   if (do_polling() && C->is_method_compilation()) {
 1985     Label dummy_label;
 1986     Label* code_stub = &dummy_label;
 1987     if (!C->output()->in_scratch_emit_size()) {
 1988       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1989       C->output()->add_stub(stub);
 1990       code_stub = &stub->entry();
 1991     }
 1992     __ relocate(relocInfo::poll_return_type);
 1993     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
 1994   }
 1995 }
 1996 
 1997 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
 1998 {
 1999   return MachNode::size(ra_); // too many variables; just compute it
 2000                               // the hard way
 2001 }
 2002 
 2003 int MachEpilogNode::reloc() const
 2004 {
 2005   return 2; // a large enough number
 2006 }
 2007 
 2008 const Pipeline* MachEpilogNode::pipeline() const
 2009 {
 2010   return MachNode::pipeline_class();
 2011 }
 2012 
 2013 //=============================================================================
 2014 
 2015 enum RC {
 2016   rc_bad,
 2017   rc_int,
 2018   rc_kreg,
 2019   rc_float,
 2020   rc_stack
 2021 };
 2022 

 2584 #endif
 2585 
 2586 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2587 {
 2588   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2589   int reg = ra_->get_encode(this);
 2590 
 2591   __ lea(as_Register(reg), Address(rsp, offset));
 2592 }
 2593 
 2594 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 2595 {
 2596   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2597   if (ra_->get_encode(this) > 15) {
 2598     return (offset < 0x80) ? 6 : 9; // REX2
 2599   } else {
 2600     return (offset < 0x80) ? 5 : 8; // REX
 2601   }
 2602 }
 2603 











































 2604 //=============================================================================
 2605 #ifndef PRODUCT
 2606 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2607 {
 2608   if (UseCompressedClassPointers) {
 2609     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2610     st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2611   } else {
 2612     st->print_cr("movq    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2613     st->print_cr("\tcmpq    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2614   }
 2615   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 2616 }
 2617 #endif
 2618 
 2619 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2620 {
 2621   __ ic_check(InteriorEntryAlignment);
 2622 }
 2623 
 2624 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 2625 {
 2626   return MachNode::size(ra_); // too many variables; just compute it
 2627                               // the hard way
 2628 }
 2629 
 2630 
 2631 //=============================================================================
 2632 
 2633 bool Matcher::supports_vector_calling_convention(void) {
 2634   return EnableVectorSupport;
 2635 }
 2636 
 2637 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
 2638   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
 2639 }
 2640 
 2641 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
 2642   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
 2643 }
 2644 
 2645 #ifdef ASSERT
 2646 static bool is_ndd_demotable(const MachNode* mdef) {
 2647   return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
 2648 }
 2649 #endif

 4585     }
 4586     __ post_call_nop();
 4587   %}
 4588 
 4589   enc_class Java_Dynamic_Call(method meth) %{
 4590     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 4591     __ post_call_nop();
 4592   %}
 4593 
 4594   enc_class call_epilog %{
 4595     if (VerifyStackAtCalls) {
 4596       // Check that stack depth is unchanged: find majik cookie on stack
 4597       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
 4598       Label L;
 4599       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
 4600       __ jccb(Assembler::equal, L);
 4601       // Die if stack mismatch
 4602       __ int3();
 4603       __ bind(L);
 4604     }

































 4605   %}
 4606 
 4607 %}
 4608 
 4609 //----------FRAME--------------------------------------------------------------
 4610 // Definition of frame structure and management information.
 4611 //
 4612 //  S T A C K   L A Y O U T    Allocators stack-slot number
 4613 //                             |   (to get allocators register number
 4614 //  G  Owned by    |        |  v    add OptoReg::stack0())
 4615 //  r   CALLER     |        |
 4616 //  o     |        +--------+      pad to even-align allocators stack-slot
 4617 //  w     V        |  pad0  |        numbers; owned by CALLER
 4618 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 4619 //  h     ^        |   in   |  5
 4620 //        |        |  args  |  4   Holes in incoming args owned by SELF
 4621 //  |     |        |        |  3
 4622 //  |     |        +--------+
 4623 //  V     |        | old out|      Empty on Intel, window on Sparc
 4624 //        |    old |preserve|      Must be even aligned.

 5756   %}
 5757 %}
 5758 
 5759 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5760 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 5761 %{
 5762   constraint(ALLOC_IN_RC(ptr_reg));
 5763   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5764   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 5765 
 5766   op_cost(10);
 5767   format %{"[$reg + $off + $idx << $scale]" %}
 5768   interface(MEMORY_INTER) %{
 5769     base($reg);
 5770     index($idx);
 5771     scale($scale);
 5772     disp($off);
 5773   %}
 5774 %}
 5775 
















 5776 // Indirect Narrow Oop Plus Offset Operand
 5777 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 5778 // we can't free r12 even with CompressedOops::base() == nullptr.
 5779 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 5780   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5781   constraint(ALLOC_IN_RC(ptr_reg));
 5782   match(AddP (DecodeN reg) off);
 5783 
 5784   op_cost(10);
 5785   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 5786   interface(MEMORY_INTER) %{
 5787     base(0xc); // R12
 5788     index($reg);
 5789     scale(0x3);
 5790     disp($off);
 5791   %}
 5792 %}
 5793 
 5794 // Indirect Memory Operand
 5795 operand indirectNarrow(rRegN reg)

 6265 %}
 6266 
 6267 // Replaces legVec during post-selection cleanup. See above.
 6268 operand legVecZ() %{
 6269   constraint(ALLOC_IN_RC(vectorz_reg_legacy));
 6270   match(VecZ);
 6271 
 6272   format %{ %}
 6273   interface(REG_INTER);
 6274 %}
 6275 
 6276 //----------OPERAND CLASSES----------------------------------------------------
 6277 // Operand Classes are groups of operands that are used as to simplify
 6278 // instruction definitions by not requiring the AD writer to specify separate
 6279 // instructions for every form of operand when the instruction accepts
 6280 // multiple operand types with the same basic encoding and format.  The classic
 6281 // case of this is memory operands.
 6282 
 6283 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 6284                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 6285                indCompressedOopOffset,
 6286                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 6287                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 6288                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 6289 
 6290 //----------PIPELINE-----------------------------------------------------------
 6291 // Rules which define the behavior of the target architectures pipeline.
 6292 pipeline %{
 6293 
 6294 //----------ATTRIBUTES---------------------------------------------------------
 6295 attributes %{
 6296   variable_size_instructions;        // Fixed size instructions
 6297   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 6298   instruction_unit_size = 1;         // An instruction is 1 bytes long
 6299   instruction_fetch_unit_size = 16;  // The processor fetches one line
 6300   instruction_fetch_units = 1;       // of 16 bytes
 6301 %}
 6302 
 6303 //----------RESOURCES----------------------------------------------------------
 6304 // Resources are the functional units available to the machine
 6305 

 8863   format %{ "MEMBAR-storestore (empty encoding)" %}
 8864   ins_encode( );
 8865   ins_pipe(empty);
 8866 %}
 8867 
 8868 //----------Move Instructions--------------------------------------------------
 8869 
 8870 instruct castX2P(rRegP dst, rRegL src)
 8871 %{
 8872   match(Set dst (CastX2P src));
 8873 
 8874   format %{ "movq    $dst, $src\t# long->ptr" %}
 8875   ins_encode %{
 8876     if ($dst$$reg != $src$$reg) {
 8877       __ movptr($dst$$Register, $src$$Register);
 8878     }
 8879   %}
 8880   ins_pipe(ialu_reg_reg); // XXX
 8881 %}
 8882 


























 8883 instruct castP2X(rRegL dst, rRegP src)
 8884 %{
 8885   match(Set dst (CastP2X src));
 8886 
 8887   format %{ "movq    $dst, $src\t# ptr -> long" %}
 8888   ins_encode %{
 8889     if ($dst$$reg != $src$$reg) {
 8890       __ movptr($dst$$Register, $src$$Register);
 8891     }
 8892   %}
 8893   ins_pipe(ialu_reg_reg); // XXX
 8894 %}
 8895 
 8896 // Convert oop into int for vectors alignment masking
 8897 instruct convP2I(rRegI dst, rRegP src)
 8898 %{
 8899   match(Set dst (ConvL2I (CastP2X src)));
 8900 
 8901   format %{ "movl    $dst, $src\t# ptr -> int" %}
 8902   ins_encode %{

15150   effect(DEF dst, USE src);
15151   ins_cost(100);
15152   format %{ "movd    $dst,$src\t# MoveI2F" %}
15153   ins_encode %{
15154     __ movdl($dst$$XMMRegister, $src$$Register);
15155   %}
15156   ins_pipe( pipe_slow );
15157 %}
15158 
15159 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15160   match(Set dst (MoveL2D src));
15161   effect(DEF dst, USE src);
15162   ins_cost(100);
15163   format %{ "movd    $dst,$src\t# MoveL2D" %}
15164   ins_encode %{
15165      __ movdq($dst$$XMMRegister, $src$$Register);
15166   %}
15167   ins_pipe( pipe_slow );
15168 %}
15169 

15170 // Fast clearing of an array
15171 // Small non-constant lenght ClearArray for non-AVX512 targets.
15172 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15173                   Universe dummy, rFlagsReg cr)
15174 %{
15175   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
15176   match(Set dummy (ClearArray cnt base));
15177   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);





















































































































15178 
15179   format %{ $$template
15180     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15181     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15182     $$emit$$"jg      LARGE\n\t"
15183     $$emit$$"dec     rcx\n\t"
15184     $$emit$$"js      DONE\t# Zero length\n\t"
15185     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15186     $$emit$$"dec     rcx\n\t"
15187     $$emit$$"jge     LOOP\n\t"
15188     $$emit$$"jmp     DONE\n\t"
15189     $$emit$$"# LARGE:\n\t"
15190     if (UseFastStosb) {
15191        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15192        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15193     } else if (UseXMMForObjInit) {
15194        $$emit$$"mov     rdi,rax\n\t"
15195        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15196        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15197        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

15205        $$emit$$"jl      L_tail\n\t"
15206        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15207        $$emit$$"add     0x20,rax\n\t"
15208        $$emit$$"sub     0x4,rcx\n\t"
15209        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15210        $$emit$$"add     0x4,rcx\n\t"
15211        $$emit$$"jle     L_end\n\t"
15212        $$emit$$"dec     rcx\n\t"
15213        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15214        $$emit$$"vmovq   xmm0,(rax)\n\t"
15215        $$emit$$"add     0x8,rax\n\t"
15216        $$emit$$"dec     rcx\n\t"
15217        $$emit$$"jge     L_sloop\n\t"
15218        $$emit$$"# L_end:\n\t"
15219     } else {
15220        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15221     }
15222     $$emit$$"# DONE"
15223   %}
15224   ins_encode %{
15225     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15226                  $tmp$$XMMRegister, false, knoreg);
15227   %}
15228   ins_pipe(pipe_slow);
15229 %}
15230 
15231 // Small non-constant length ClearArray for AVX512 targets.
15232 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15233                        Universe dummy, rFlagsReg cr)
15234 %{
15235   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15236   match(Set dummy (ClearArray cnt base));
15237   ins_cost(125);
15238   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15239 
15240   format %{ $$template
15241     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15242     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15243     $$emit$$"jg      LARGE\n\t"
15244     $$emit$$"dec     rcx\n\t"
15245     $$emit$$"js      DONE\t# Zero length\n\t"
15246     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15247     $$emit$$"dec     rcx\n\t"
15248     $$emit$$"jge     LOOP\n\t"
15249     $$emit$$"jmp     DONE\n\t"
15250     $$emit$$"# LARGE:\n\t"
15251     if (UseFastStosb) {
15252        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15253        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15254     } else if (UseXMMForObjInit) {
15255        $$emit$$"mov     rdi,rax\n\t"
15256        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15257        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15258        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

15266        $$emit$$"jl      L_tail\n\t"
15267        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15268        $$emit$$"add     0x20,rax\n\t"
15269        $$emit$$"sub     0x4,rcx\n\t"
15270        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15271        $$emit$$"add     0x4,rcx\n\t"
15272        $$emit$$"jle     L_end\n\t"
15273        $$emit$$"dec     rcx\n\t"
15274        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15275        $$emit$$"vmovq   xmm0,(rax)\n\t"
15276        $$emit$$"add     0x8,rax\n\t"
15277        $$emit$$"dec     rcx\n\t"
15278        $$emit$$"jge     L_sloop\n\t"
15279        $$emit$$"# L_end:\n\t"
15280     } else {
15281        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15282     }
15283     $$emit$$"# DONE"
15284   %}
15285   ins_encode %{
15286     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15287                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
15288   %}
15289   ins_pipe(pipe_slow);
15290 %}
15291 
15292 // Large non-constant length ClearArray for non-AVX512 targets.
15293 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15294                         Universe dummy, rFlagsReg cr)
15295 %{
15296   predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15297   match(Set dummy (ClearArray cnt base));
15298   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
































































































15299 
15300   format %{ $$template
15301     if (UseFastStosb) {
15302        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15303        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15304        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15305     } else if (UseXMMForObjInit) {
15306        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15307        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15308        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15309        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15310        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15311        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15312        $$emit$$"add     0x40,rax\n\t"
15313        $$emit$$"# L_zero_64_bytes:\n\t"
15314        $$emit$$"sub     0x8,rcx\n\t"
15315        $$emit$$"jge     L_loop\n\t"
15316        $$emit$$"add     0x4,rcx\n\t"
15317        $$emit$$"jl      L_tail\n\t"
15318        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15319        $$emit$$"add     0x20,rax\n\t"
15320        $$emit$$"sub     0x4,rcx\n\t"
15321        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15322        $$emit$$"add     0x4,rcx\n\t"
15323        $$emit$$"jle     L_end\n\t"
15324        $$emit$$"dec     rcx\n\t"
15325        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15326        $$emit$$"vmovq   xmm0,(rax)\n\t"
15327        $$emit$$"add     0x8,rax\n\t"
15328        $$emit$$"dec     rcx\n\t"
15329        $$emit$$"jge     L_sloop\n\t"
15330        $$emit$$"# L_end:\n\t"
15331     } else {
15332        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15333        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15334     }
15335   %}
15336   ins_encode %{
15337     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15338                  $tmp$$XMMRegister, true, knoreg);
15339   %}
15340   ins_pipe(pipe_slow);
15341 %}
15342 
15343 // Large non-constant length ClearArray for AVX512 targets.
15344 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15345                              Universe dummy, rFlagsReg cr)
15346 %{
15347   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15348   match(Set dummy (ClearArray cnt base));
15349   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15350 
15351   format %{ $$template
15352     if (UseFastStosb) {
15353        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15354        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15355        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15356     } else if (UseXMMForObjInit) {
15357        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15358        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15359        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15360        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15361        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15362        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15363        $$emit$$"add     0x40,rax\n\t"
15364        $$emit$$"# L_zero_64_bytes:\n\t"
15365        $$emit$$"sub     0x8,rcx\n\t"
15366        $$emit$$"jge     L_loop\n\t"
15367        $$emit$$"add     0x4,rcx\n\t"
15368        $$emit$$"jl      L_tail\n\t"
15369        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15370        $$emit$$"add     0x20,rax\n\t"
15371        $$emit$$"sub     0x4,rcx\n\t"
15372        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15373        $$emit$$"add     0x4,rcx\n\t"
15374        $$emit$$"jle     L_end\n\t"
15375        $$emit$$"dec     rcx\n\t"
15376        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15377        $$emit$$"vmovq   xmm0,(rax)\n\t"
15378        $$emit$$"add     0x8,rax\n\t"
15379        $$emit$$"dec     rcx\n\t"
15380        $$emit$$"jge     L_sloop\n\t"
15381        $$emit$$"# L_end:\n\t"
15382     } else {
15383        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15384        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15385     }
15386   %}
15387   ins_encode %{
15388     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15389                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
15390   %}
15391   ins_pipe(pipe_slow);
15392 %}
15393 
15394 // Small constant length ClearArray for AVX512 targets.
15395 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15396 %{
15397   predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15398   match(Set dummy (ClearArray cnt base));

15399   ins_cost(100);
15400   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15401   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
15402   ins_encode %{
15403    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15404   %}
15405   ins_pipe(pipe_slow);
15406 %}
15407 
15408 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15409                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15410 %{
15411   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15412   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15413   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15414 
15415   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15416   ins_encode %{
15417     __ string_compare($str1$$Register, $str2$$Register,
15418                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15419                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15420   %}
15421   ins_pipe( pipe_slow );
15422 %}
15423 

17261   effect(USE meth);
17262 
17263   ins_cost(300);
17264   format %{ "call_leaf,runtime " %}
17265   ins_encode(clear_avx, Java_To_Runtime(meth));
17266   ins_pipe(pipe_slow);
17267 %}
17268 
17269 // Call runtime without safepoint and with vector arguments
17270 instruct CallLeafDirectVector(method meth)
17271 %{
17272   match(CallLeafVector);
17273   effect(USE meth);
17274 
17275   ins_cost(300);
17276   format %{ "call_leaf,vector " %}
17277   ins_encode(Java_To_Runtime(meth));
17278   ins_pipe(pipe_slow);
17279 %}
17280 
















17281 // Call runtime without safepoint
17282 instruct CallLeafNoFPDirect(method meth)
17283 %{

17284   match(CallLeafNoFP);
17285   effect(USE meth);
17286 
17287   ins_cost(300);
17288   format %{ "call_leaf_nofp,runtime " %}
17289   ins_encode(clear_avx, Java_To_Runtime(meth));
17290   ins_pipe(pipe_slow);
17291 %}
17292 
17293 // Return Instruction
17294 // Remove the return address & jump to it.
17295 // Notice: We always emit a nop after a ret to make sure there is room
17296 // for safepoint patching
17297 instruct Ret()
17298 %{
17299   match(Return);
17300 
17301   format %{ "ret" %}
17302   ins_encode %{
17303     __ ret(0);

 1632 }
 1633 
 1634 // !!!!! Special hack to get all types of calls to specify the byte offset
 1635 //       from the start of the call to the point where the return address
 1636 //       will point.
 1637 int MachCallStaticJavaNode::ret_addr_offset()
 1638 {
 1639   int offset = 5; // 5 bytes from start of call to where return address points
 1640   offset += clear_avx_size();
 1641   return offset;
 1642 }
 1643 
 1644 int MachCallDynamicJavaNode::ret_addr_offset()
 1645 {
 1646   int offset = 15; // 15 bytes from start of call to where return address points
 1647   offset += clear_avx_size();
 1648   return offset;
 1649 }
 1650 
 1651 int MachCallRuntimeNode::ret_addr_offset() {
 1652   if (_entry_point == nullptr) {
 1653     // CallLeafNoFPInDirect
 1654     return 3; // callq (register)
 1655   }
 1656   int offset = 13; // movq r10,#addr; callq (r10)
 1657   if (this->ideal_Opcode() != Op_CallLeafVector) {
 1658     offset += clear_avx_size();
 1659   }
 1660   return offset;
 1661 }
 1662 
 1663 //
 1664 // Compute padding required for nodes which need alignment
 1665 //
 1666 
 1667 // The address of the call instruction needs to be 4-byte aligned to
 1668 // ensure that it does not span a cache line so that it can be patched.
 1669 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 1670 {
 1671   current_offset += clear_avx_size(); // skip vzeroupper
 1672   current_offset += 1; // skip call opcode byte
 1673   return align_up(current_offset, alignment_required()) - current_offset;
 1674 }
 1675 
 1676 // The address of the call instruction needs to be 4-byte aligned to
 1677 // ensure that it does not span a cache line so that it can be patched.
 1678 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 1679 {
 1680   current_offset += clear_avx_size(); // skip vzeroupper
 1681   current_offset += 11; // skip movq instruction + call opcode byte
 1682   return align_up(current_offset, alignment_required()) - current_offset;

 1869     st->print("\n\t");
 1870     st->print("# stack alignment check");
 1871 #endif
 1872   }
 1873   if (C->stub_function() != nullptr) {
 1874     st->print("\n\t");
 1875     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
 1876     st->print("\n\t");
 1877     st->print("je      fast_entry\t");
 1878     st->print("\n\t");
 1879     st->print("call    #nmethod_entry_barrier_stub\t");
 1880     st->print("\n\tfast_entry:");
 1881   }
 1882   st->cr();
 1883 }
 1884 #endif
 1885 
 1886 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1887   Compile* C = ra_->C;
 1888 
 1889   __ verified_entry(C);





 1890 
 1891   if (ra_->C->stub_function() == nullptr) {
 1892     __ entry_barrier();







 1893   }
 1894 
 1895   if (!Compile::current()->output()->in_scratch_emit_size()) {
 1896     __ bind(*_verified_entry);
 1897   }
 1898 
 1899   C->output()->set_frame_complete(__ offset());
 1900 
 1901   if (C->has_mach_constant_base_node()) {
 1902     // NOTE: We set the table base offset here because users might be
 1903     // emitted before MachConstantBaseNode.
 1904     ConstantTable& constant_table = C->output()->constant_table();
 1905     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1906   }
 1907 }
 1908 





 1909 
 1910 int MachPrologNode::reloc() const
 1911 {
 1912   return 0; // a large enough number
 1913 }
 1914 
 1915 //=============================================================================
 1916 #ifndef PRODUCT
 1917 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1918 {
 1919   Compile* C = ra_->C;
 1920   if (generate_vzeroupper(C)) {
 1921     st->print("vzeroupper");
 1922     st->cr(); st->print("\t");
 1923   }
 1924 
 1925   int framesize = C->output()->frame_size_in_bytes();
 1926   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1927   // Remove word for return adr already pushed
 1928   // and RBP

 1936   st->print_cr("popq    rbp");
 1937   if (do_polling() && C->is_method_compilation()) {
 1938     st->print("\t");
 1939     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
 1940                  "ja      #safepoint_stub\t"
 1941                  "# Safepoint: poll for GC");
 1942   }
 1943 }
 1944 #endif
 1945 
 1946 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1947 {
 1948   Compile* C = ra_->C;
 1949 
 1950   if (generate_vzeroupper(C)) {
 1951     // Clear upper bits of YMM registers when current compiled code uses
 1952     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1953     __ vzeroupper();
 1954   }
 1955 
 1956   // Subtract two words to account for return address and rbp
 1957   int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
 1958   __ remove_frame(initial_framesize, C->needs_stack_repair());










 1959 
 1960   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1961     __ reserved_stack_check();
 1962   }
 1963 
 1964   if (do_polling() && C->is_method_compilation()) {
 1965     Label dummy_label;
 1966     Label* code_stub = &dummy_label;
 1967     if (!C->output()->in_scratch_emit_size()) {
 1968       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1969       C->output()->add_stub(stub);
 1970       code_stub = &stub->entry();
 1971     }
 1972     __ relocate(relocInfo::poll_return_type);
 1973     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
 1974   }
 1975 }
 1976 






 1977 int MachEpilogNode::reloc() const
 1978 {
 1979   return 2; // a large enough number
 1980 }
 1981 
 1982 const Pipeline* MachEpilogNode::pipeline() const
 1983 {
 1984   return MachNode::pipeline_class();
 1985 }
 1986 
 1987 //=============================================================================
 1988 
 1989 enum RC {
 1990   rc_bad,
 1991   rc_int,
 1992   rc_kreg,
 1993   rc_float,
 1994   rc_stack
 1995 };
 1996 

 2558 #endif
 2559 
 2560 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2561 {
 2562   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2563   int reg = ra_->get_encode(this);
 2564 
 2565   __ lea(as_Register(reg), Address(rsp, offset));
 2566 }
 2567 
 2568 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 2569 {
 2570   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2571   if (ra_->get_encode(this) > 15) {
 2572     return (offset < 0x80) ? 6 : 9; // REX2
 2573   } else {
 2574     return (offset < 0x80) ? 5 : 8; // REX
 2575   }
 2576 }
 2577 
 2578 //=============================================================================
 2579 #ifndef PRODUCT
 2580 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2581 {
 2582   st->print_cr("MachVEPNode");
 2583 }
 2584 #endif
 2585 
 2586 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2587 {
 2588   CodeBuffer* cbuf = masm->code();
 2589   uint insts_size = cbuf->insts_size();
 2590   if (!_verified) {
 2591     __ ic_check(1);
 2592   } else {
 2593     // TODO 8284443 Avoid creation of temporary frame
 2594     if (ra_->C->stub_function() == nullptr) {
 2595       __ verified_entry(ra_->C, 0);
 2596       __ entry_barrier();
 2597       int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
 2598       __ remove_frame(initial_framesize, false);
 2599     }
 2600     // Unpack inline type args passed as oop and then jump to
 2601     // the verified entry point (skipping the unverified entry).
 2602     int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
 2603     // Emit code for verified entry and save increment for stack repair on return
 2604     __ verified_entry(ra_->C, sp_inc);
 2605     if (Compile::current()->output()->in_scratch_emit_size()) {
 2606       Label dummy_verified_entry;
 2607       __ jmp(dummy_verified_entry);
 2608     } else {
 2609       __ jmp(*_verified_entry);
 2610     }
 2611   }
 2612   /* WARNING these NOPs are critical so that verified entry point is properly
 2613      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
 2614   int nops_cnt = 4 - ((cbuf->insts_size() - insts_size) & 0x3);
 2615   nops_cnt &= 0x3; // Do not add nops if code is aligned.
 2616   if (nops_cnt > 0) {
 2617     __ nop(nops_cnt);
 2618   }
 2619 }
 2620 
 2621 //=============================================================================
 2622 #ifndef PRODUCT
 2623 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2624 {
 2625   if (UseCompressedClassPointers) {
 2626     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2627     st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2628   } else {
 2629     st->print_cr("movq    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2630     st->print_cr("\tcmpq    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2631   }
 2632   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 2633 }
 2634 #endif
 2635 
 2636 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2637 {
 2638   __ ic_check(InteriorEntryAlignment);
 2639 }
 2640 






 2641 
 2642 //=============================================================================
 2643 
 2644 bool Matcher::supports_vector_calling_convention(void) {
 2645   return EnableVectorSupport;
 2646 }
 2647 
 2648 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
 2649   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
 2650 }
 2651 
 2652 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
 2653   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
 2654 }
 2655 
 2656 #ifdef ASSERT
 2657 static bool is_ndd_demotable(const MachNode* mdef) {
 2658   return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
 2659 }
 2660 #endif

 4596     }
 4597     __ post_call_nop();
 4598   %}
 4599 
 4600   enc_class Java_Dynamic_Call(method meth) %{
 4601     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 4602     __ post_call_nop();
 4603   %}
 4604 
 4605   enc_class call_epilog %{
 4606     if (VerifyStackAtCalls) {
 4607       // Check that stack depth is unchanged: find majik cookie on stack
 4608       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
 4609       Label L;
 4610       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
 4611       __ jccb(Assembler::equal, L);
 4612       // Die if stack mismatch
 4613       __ int3();
 4614       __ bind(L);
 4615     }
 4616     if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) {
 4617       // The last return value is not set by the callee but used to pass the null marker to compiled code.
 4618       // Search for the corresponding projection, get the register and emit code that initialized it.
 4619       uint con = (tf()->range_cc()->cnt() - 1);
 4620       for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
 4621         ProjNode* proj = fast_out(i)->as_Proj();
 4622         if (proj->_con == con) {
 4623           // Set null marker if rax is non-null (a non-null value is returned buffered or scalarized)
 4624           OptoReg::Name optoReg = ra_->get_reg_first(proj);
 4625           VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
 4626           Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
 4627           __ testq(rax, rax);
 4628           __ setb(Assembler::notZero, toReg);
 4629           __ movzbl(toReg, toReg);
 4630           if (reg->is_stack()) {
 4631             int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
 4632             __ movq(Address(rsp, st_off), toReg);
 4633           }
 4634           break;
 4635         }
 4636       }
 4637       if (return_value_is_used()) {
 4638         // An inline type is returned as fields in multiple registers.
 4639         // Rax either contains an oop if the inline type is buffered or a pointer
 4640         // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax
 4641         // if the lowest bit is set to allow C2 to use the oop after null checking.
 4642         // rax &= (rax & 1) - 1
 4643         __ movptr(rscratch1, rax);
 4644         __ andptr(rscratch1, 0x1);
 4645         __ subptr(rscratch1, 0x1);
 4646         __ andptr(rax, rscratch1);
 4647       }
 4648     }
 4649   %}
 4650 
 4651 %}
 4652 
 4653 //----------FRAME--------------------------------------------------------------
 4654 // Definition of frame structure and management information.
 4655 //
 4656 //  S T A C K   L A Y O U T    Allocators stack-slot number
 4657 //                             |   (to get allocators register number
 4658 //  G  Owned by    |        |  v    add OptoReg::stack0())
 4659 //  r   CALLER     |        |
 4660 //  o     |        +--------+      pad to even-align allocators stack-slot
 4661 //  w     V        |  pad0  |        numbers; owned by CALLER
 4662 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 4663 //  h     ^        |   in   |  5
 4664 //        |        |  args  |  4   Holes in incoming args owned by SELF
 4665 //  |     |        |        |  3
 4666 //  |     |        +--------+
 4667 //  V     |        | old out|      Empty on Intel, window on Sparc
 4668 //        |    old |preserve|      Must be even aligned.

 5800   %}
 5801 %}
 5802 
 5803 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5804 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 5805 %{
 5806   constraint(ALLOC_IN_RC(ptr_reg));
 5807   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5808   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 5809 
 5810   op_cost(10);
 5811   format %{"[$reg + $off + $idx << $scale]" %}
 5812   interface(MEMORY_INTER) %{
 5813     base($reg);
 5814     index($idx);
 5815     scale($scale);
 5816     disp($off);
 5817   %}
 5818 %}
 5819 
 5820 // Indirect Narrow Oop Operand
 5821 operand indCompressedOop(rRegN reg) %{
 5822   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5823   constraint(ALLOC_IN_RC(ptr_reg));
 5824   match(DecodeN reg);
 5825 
 5826   op_cost(10);
 5827   format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
 5828   interface(MEMORY_INTER) %{
 5829     base(0xc); // R12
 5830     index($reg);
 5831     scale(0x3);
 5832     disp(0x0);
 5833   %}
 5834 %}
 5835 
 5836 // Indirect Narrow Oop Plus Offset Operand
 5837 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 5838 // we can't free r12 even with CompressedOops::base() == nullptr.
 5839 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 5840   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5841   constraint(ALLOC_IN_RC(ptr_reg));
 5842   match(AddP (DecodeN reg) off);
 5843 
 5844   op_cost(10);
 5845   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 5846   interface(MEMORY_INTER) %{
 5847     base(0xc); // R12
 5848     index($reg);
 5849     scale(0x3);
 5850     disp($off);
 5851   %}
 5852 %}
 5853 
 5854 // Indirect Memory Operand
 5855 operand indirectNarrow(rRegN reg)

 6325 %}
 6326 
 6327 // Replaces legVec during post-selection cleanup. See above.
 6328 operand legVecZ() %{
 6329   constraint(ALLOC_IN_RC(vectorz_reg_legacy));
 6330   match(VecZ);
 6331 
 6332   format %{ %}
 6333   interface(REG_INTER);
 6334 %}
 6335 
 6336 //----------OPERAND CLASSES----------------------------------------------------
 6337 // Operand Classes are groups of operands that are used as to simplify
 6338 // instruction definitions by not requiring the AD writer to specify separate
 6339 // instructions for every form of operand when the instruction accepts
 6340 // multiple operand types with the same basic encoding and format.  The classic
 6341 // case of this is memory operands.
 6342 
 6343 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 6344                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 6345                indCompressedOop, indCompressedOopOffset,
 6346                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 6347                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 6348                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 6349 
 6350 //----------PIPELINE-----------------------------------------------------------
 6351 // Rules which define the behavior of the target architectures pipeline.
 6352 pipeline %{
 6353 
 6354 //----------ATTRIBUTES---------------------------------------------------------
 6355 attributes %{
 6356   variable_size_instructions;        // Fixed size instructions
 6357   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 6358   instruction_unit_size = 1;         // An instruction is 1 bytes long
 6359   instruction_fetch_unit_size = 16;  // The processor fetches one line
 6360   instruction_fetch_units = 1;       // of 16 bytes
 6361 %}
 6362 
 6363 //----------RESOURCES----------------------------------------------------------
 6364 // Resources are the functional units available to the machine
 6365 

 8923   format %{ "MEMBAR-storestore (empty encoding)" %}
 8924   ins_encode( );
 8925   ins_pipe(empty);
 8926 %}
 8927 
 8928 //----------Move Instructions--------------------------------------------------
 8929 
 8930 instruct castX2P(rRegP dst, rRegL src)
 8931 %{
 8932   match(Set dst (CastX2P src));
 8933 
 8934   format %{ "movq    $dst, $src\t# long->ptr" %}
 8935   ins_encode %{
 8936     if ($dst$$reg != $src$$reg) {
 8937       __ movptr($dst$$Register, $src$$Register);
 8938     }
 8939   %}
 8940   ins_pipe(ialu_reg_reg); // XXX
 8941 %}
 8942 
 8943 instruct castI2N(rRegN dst, rRegI src)
 8944 %{
 8945   match(Set dst (CastI2N src));
 8946 
 8947   format %{ "movq    $dst, $src\t# int -> narrow ptr" %}
 8948   ins_encode %{
 8949     if ($dst$$reg != $src$$reg) {
 8950       __ movl($dst$$Register, $src$$Register);
 8951     }
 8952   %}
 8953   ins_pipe(ialu_reg_reg); // XXX
 8954 %}
 8955 
 8956 instruct castN2X(rRegL dst, rRegN src)
 8957 %{
 8958   match(Set dst (CastP2X src));
 8959 
 8960   format %{ "movq    $dst, $src\t# ptr -> long" %}
 8961   ins_encode %{
 8962     if ($dst$$reg != $src$$reg) {
 8963       __ movptr($dst$$Register, $src$$Register);
 8964     }
 8965   %}
 8966   ins_pipe(ialu_reg_reg); // XXX
 8967 %}
 8968 
 8969 instruct castP2X(rRegL dst, rRegP src)
 8970 %{
 8971   match(Set dst (CastP2X src));
 8972 
 8973   format %{ "movq    $dst, $src\t# ptr -> long" %}
 8974   ins_encode %{
 8975     if ($dst$$reg != $src$$reg) {
 8976       __ movptr($dst$$Register, $src$$Register);
 8977     }
 8978   %}
 8979   ins_pipe(ialu_reg_reg); // XXX
 8980 %}
 8981 
 8982 // Convert oop into int for vectors alignment masking
 8983 instruct convP2I(rRegI dst, rRegP src)
 8984 %{
 8985   match(Set dst (ConvL2I (CastP2X src)));
 8986 
 8987   format %{ "movl    $dst, $src\t# ptr -> int" %}
 8988   ins_encode %{

15236   effect(DEF dst, USE src);
15237   ins_cost(100);
15238   format %{ "movd    $dst,$src\t# MoveI2F" %}
15239   ins_encode %{
15240     __ movdl($dst$$XMMRegister, $src$$Register);
15241   %}
15242   ins_pipe( pipe_slow );
15243 %}
15244 
15245 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15246   match(Set dst (MoveL2D src));
15247   effect(DEF dst, USE src);
15248   ins_cost(100);
15249   format %{ "movd    $dst,$src\t# MoveL2D" %}
15250   ins_encode %{
15251      __ movdq($dst$$XMMRegister, $src$$Register);
15252   %}
15253   ins_pipe( pipe_slow );
15254 %}
15255 
15256 
15257 // Fast clearing of an array
15258 // Small non-constant lenght ClearArray for non-AVX512 targets.
15259 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15260                   Universe dummy, rFlagsReg cr)
15261 %{
15262   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15263   match(Set dummy (ClearArray (Binary cnt base) val));
15264   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15265 
15266   format %{ $$template
15267     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15268     $$emit$$"jg      LARGE\n\t"
15269     $$emit$$"dec     rcx\n\t"
15270     $$emit$$"js      DONE\t# Zero length\n\t"
15271     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15272     $$emit$$"dec     rcx\n\t"
15273     $$emit$$"jge     LOOP\n\t"
15274     $$emit$$"jmp     DONE\n\t"
15275     $$emit$$"# LARGE:\n\t"
15276     if (UseFastStosb) {
15277        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15278        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15279     } else if (UseXMMForObjInit) {
15280        $$emit$$"movdq   $tmp, $val\n\t"
15281        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15282        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15283        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15284        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15285        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15286        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15287        $$emit$$"add     0x40,rax\n\t"
15288        $$emit$$"# L_zero_64_bytes:\n\t"
15289        $$emit$$"sub     0x8,rcx\n\t"
15290        $$emit$$"jge     L_loop\n\t"
15291        $$emit$$"add     0x4,rcx\n\t"
15292        $$emit$$"jl      L_tail\n\t"
15293        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15294        $$emit$$"add     0x20,rax\n\t"
15295        $$emit$$"sub     0x4,rcx\n\t"
15296        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15297        $$emit$$"add     0x4,rcx\n\t"
15298        $$emit$$"jle     L_end\n\t"
15299        $$emit$$"dec     rcx\n\t"
15300        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15301        $$emit$$"vmovq   xmm0,(rax)\n\t"
15302        $$emit$$"add     0x8,rax\n\t"
15303        $$emit$$"dec     rcx\n\t"
15304        $$emit$$"jge     L_sloop\n\t"
15305        $$emit$$"# L_end:\n\t"
15306     } else {
15307        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15308     }
15309     $$emit$$"# DONE"
15310   %}
15311   ins_encode %{
15312     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15313                  $tmp$$XMMRegister, false, false);
15314   %}
15315   ins_pipe(pipe_slow);
15316 %}
15317 
15318 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15319                             Universe dummy, rFlagsReg cr)
15320 %{
15321   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15322   match(Set dummy (ClearArray (Binary cnt base) val));
15323   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15324 
15325   format %{ $$template
15326     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15327     $$emit$$"jg      LARGE\n\t"
15328     $$emit$$"dec     rcx\n\t"
15329     $$emit$$"js      DONE\t# Zero length\n\t"
15330     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15331     $$emit$$"dec     rcx\n\t"
15332     $$emit$$"jge     LOOP\n\t"
15333     $$emit$$"jmp     DONE\n\t"
15334     $$emit$$"# LARGE:\n\t"
15335     if (UseXMMForObjInit) {
15336        $$emit$$"movdq   $tmp, $val\n\t"
15337        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15338        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15339        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15340        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15341        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15342        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15343        $$emit$$"add     0x40,rax\n\t"
15344        $$emit$$"# L_zero_64_bytes:\n\t"
15345        $$emit$$"sub     0x8,rcx\n\t"
15346        $$emit$$"jge     L_loop\n\t"
15347        $$emit$$"add     0x4,rcx\n\t"
15348        $$emit$$"jl      L_tail\n\t"
15349        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15350        $$emit$$"add     0x20,rax\n\t"
15351        $$emit$$"sub     0x4,rcx\n\t"
15352        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15353        $$emit$$"add     0x4,rcx\n\t"
15354        $$emit$$"jle     L_end\n\t"
15355        $$emit$$"dec     rcx\n\t"
15356        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15357        $$emit$$"vmovq   xmm0,(rax)\n\t"
15358        $$emit$$"add     0x8,rax\n\t"
15359        $$emit$$"dec     rcx\n\t"
15360        $$emit$$"jge     L_sloop\n\t"
15361        $$emit$$"# L_end:\n\t"
15362     } else {
15363        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15364     }
15365     $$emit$$"# DONE"
15366   %}
15367   ins_encode %{
15368     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15369                  $tmp$$XMMRegister, false, true);
15370   %}
15371   ins_pipe(pipe_slow);
15372 %}
15373 
15374 // Small non-constant length ClearArray for AVX512 targets.
15375 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15376                        Universe dummy, rFlagsReg cr)
15377 %{
15378   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15379   match(Set dummy (ClearArray (Binary cnt base) val));
15380   ins_cost(125);
15381   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15382 
15383   format %{ $$template
15384     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15385     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15386     $$emit$$"jg      LARGE\n\t"
15387     $$emit$$"dec     rcx\n\t"
15388     $$emit$$"js      DONE\t# Zero length\n\t"
15389     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15390     $$emit$$"dec     rcx\n\t"
15391     $$emit$$"jge     LOOP\n\t"
15392     $$emit$$"jmp     DONE\n\t"
15393     $$emit$$"# LARGE:\n\t"
15394     if (UseFastStosb) {
15395        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15396        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15397     } else if (UseXMMForObjInit) {
15398        $$emit$$"mov     rdi,rax\n\t"
15399        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15400        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15401        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

15409        $$emit$$"jl      L_tail\n\t"
15410        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15411        $$emit$$"add     0x20,rax\n\t"
15412        $$emit$$"sub     0x4,rcx\n\t"
15413        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15414        $$emit$$"add     0x4,rcx\n\t"
15415        $$emit$$"jle     L_end\n\t"
15416        $$emit$$"dec     rcx\n\t"
15417        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15418        $$emit$$"vmovq   xmm0,(rax)\n\t"
15419        $$emit$$"add     0x8,rax\n\t"
15420        $$emit$$"dec     rcx\n\t"
15421        $$emit$$"jge     L_sloop\n\t"
15422        $$emit$$"# L_end:\n\t"
15423     } else {
15424        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15425     }
15426     $$emit$$"# DONE"
15427   %}
15428   ins_encode %{
15429     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15430                  $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
15431   %}
15432   ins_pipe(pipe_slow);
15433 %}
15434 
15435 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15436                                  Universe dummy, rFlagsReg cr)

15437 %{
15438   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15439   match(Set dummy (ClearArray (Binary cnt base) val));
15440   ins_cost(125);
15441   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15442 
15443   format %{ $$template
15444     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15445     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15446     $$emit$$"jg      LARGE\n\t"
15447     $$emit$$"dec     rcx\n\t"
15448     $$emit$$"js      DONE\t# Zero length\n\t"
15449     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15450     $$emit$$"dec     rcx\n\t"
15451     $$emit$$"jge     LOOP\n\t"
15452     $$emit$$"jmp     DONE\n\t"
15453     $$emit$$"# LARGE:\n\t"
15454     if (UseFastStosb) {
15455        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15456        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15457     } else if (UseXMMForObjInit) {
15458        $$emit$$"mov     rdi,rax\n\t"
15459        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15460        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15461        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

15469        $$emit$$"jl      L_tail\n\t"
15470        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15471        $$emit$$"add     0x20,rax\n\t"
15472        $$emit$$"sub     0x4,rcx\n\t"
15473        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15474        $$emit$$"add     0x4,rcx\n\t"
15475        $$emit$$"jle     L_end\n\t"
15476        $$emit$$"dec     rcx\n\t"
15477        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15478        $$emit$$"vmovq   xmm0,(rax)\n\t"
15479        $$emit$$"add     0x8,rax\n\t"
15480        $$emit$$"dec     rcx\n\t"
15481        $$emit$$"jge     L_sloop\n\t"
15482        $$emit$$"# L_end:\n\t"
15483     } else {
15484        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15485     }
15486     $$emit$$"# DONE"
15487   %}
15488   ins_encode %{
15489     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15490                  $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
15491   %}
15492   ins_pipe(pipe_slow);
15493 %}
15494 
15495 // Large non-constant length ClearArray for non-AVX512 targets.
15496 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15497                         Universe dummy, rFlagsReg cr)
15498 %{
15499   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15500   match(Set dummy (ClearArray (Binary cnt base) val));
15501   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15502 
15503   format %{ $$template
15504     if (UseFastStosb) {
15505        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15506        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15507     } else if (UseXMMForObjInit) {
15508        $$emit$$"movdq   $tmp, $val\n\t"
15509        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15510        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15511        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15512        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15513        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15514        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15515        $$emit$$"add     0x40,rax\n\t"
15516        $$emit$$"# L_zero_64_bytes:\n\t"
15517        $$emit$$"sub     0x8,rcx\n\t"
15518        $$emit$$"jge     L_loop\n\t"
15519        $$emit$$"add     0x4,rcx\n\t"
15520        $$emit$$"jl      L_tail\n\t"
15521        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15522        $$emit$$"add     0x20,rax\n\t"
15523        $$emit$$"sub     0x4,rcx\n\t"
15524        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15525        $$emit$$"add     0x4,rcx\n\t"
15526        $$emit$$"jle     L_end\n\t"
15527        $$emit$$"dec     rcx\n\t"
15528        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15529        $$emit$$"vmovq   xmm0,(rax)\n\t"
15530        $$emit$$"add     0x8,rax\n\t"
15531        $$emit$$"dec     rcx\n\t"
15532        $$emit$$"jge     L_sloop\n\t"
15533        $$emit$$"# L_end:\n\t"
15534     } else {
15535        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15536     }
15537   %}
15538   ins_encode %{
15539     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15540                  $tmp$$XMMRegister, true, false);
15541   %}
15542   ins_pipe(pipe_slow);
15543 %}
15544 
15545 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15546                                   Universe dummy, rFlagsReg cr)
15547 %{
15548   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15549   match(Set dummy (ClearArray (Binary cnt base) val));
15550   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15551 
15552   format %{ $$template
15553     if (UseXMMForObjInit) {
15554        $$emit$$"movdq   $tmp, $val\n\t"
15555        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15556        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15557        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15558        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15559        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15560        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15561        $$emit$$"add     0x40,rax\n\t"
15562        $$emit$$"# L_zero_64_bytes:\n\t"
15563        $$emit$$"sub     0x8,rcx\n\t"
15564        $$emit$$"jge     L_loop\n\t"
15565        $$emit$$"add     0x4,rcx\n\t"
15566        $$emit$$"jl      L_tail\n\t"
15567        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15568        $$emit$$"add     0x20,rax\n\t"
15569        $$emit$$"sub     0x4,rcx\n\t"
15570        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15571        $$emit$$"add     0x4,rcx\n\t"
15572        $$emit$$"jle     L_end\n\t"
15573        $$emit$$"dec     rcx\n\t"
15574        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15575        $$emit$$"vmovq   xmm0,(rax)\n\t"
15576        $$emit$$"add     0x8,rax\n\t"
15577        $$emit$$"dec     rcx\n\t"
15578        $$emit$$"jge     L_sloop\n\t"
15579        $$emit$$"# L_end:\n\t"
15580     } else {
15581        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15582     }
15583   %}
15584   ins_encode %{
15585     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15586                  $tmp$$XMMRegister, true, true);
15587   %}
15588   ins_pipe(pipe_slow);
15589 %}
15590 
15591 // Large non-constant length ClearArray for AVX512 targets.
15592 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15593                              Universe dummy, rFlagsReg cr)
15594 %{
15595   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15596   match(Set dummy (ClearArray (Binary cnt base) val));
15597   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15598 
15599   format %{ $$template
15600     if (UseFastStosb) {
15601        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15602        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15603        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15604     } else if (UseXMMForObjInit) {
15605        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15606        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15607        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15608        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15609        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15610        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15611        $$emit$$"add     0x40,rax\n\t"
15612        $$emit$$"# L_zero_64_bytes:\n\t"
15613        $$emit$$"sub     0x8,rcx\n\t"
15614        $$emit$$"jge     L_loop\n\t"
15615        $$emit$$"add     0x4,rcx\n\t"
15616        $$emit$$"jl      L_tail\n\t"
15617        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15618        $$emit$$"add     0x20,rax\n\t"
15619        $$emit$$"sub     0x4,rcx\n\t"
15620        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15621        $$emit$$"add     0x4,rcx\n\t"
15622        $$emit$$"jle     L_end\n\t"
15623        $$emit$$"dec     rcx\n\t"
15624        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15625        $$emit$$"vmovq   xmm0,(rax)\n\t"
15626        $$emit$$"add     0x8,rax\n\t"
15627        $$emit$$"dec     rcx\n\t"
15628        $$emit$$"jge     L_sloop\n\t"
15629        $$emit$$"# L_end:\n\t"
15630     } else {
15631        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15632        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15633     }
15634   %}
15635   ins_encode %{
15636     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15637                  $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
15638   %}
15639   ins_pipe(pipe_slow);
15640 %}
15641 
15642 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15643                                        Universe dummy, rFlagsReg cr)

15644 %{
15645   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15646   match(Set dummy (ClearArray (Binary cnt base) val));
15647   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15648 
15649   format %{ $$template
15650     if (UseFastStosb) {
15651        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15652        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15653        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15654     } else if (UseXMMForObjInit) {
15655        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15656        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15657        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15658        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15659        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15660        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15661        $$emit$$"add     0x40,rax\n\t"
15662        $$emit$$"# L_zero_64_bytes:\n\t"
15663        $$emit$$"sub     0x8,rcx\n\t"
15664        $$emit$$"jge     L_loop\n\t"
15665        $$emit$$"add     0x4,rcx\n\t"
15666        $$emit$$"jl      L_tail\n\t"
15667        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15668        $$emit$$"add     0x20,rax\n\t"
15669        $$emit$$"sub     0x4,rcx\n\t"
15670        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15671        $$emit$$"add     0x4,rcx\n\t"
15672        $$emit$$"jle     L_end\n\t"
15673        $$emit$$"dec     rcx\n\t"
15674        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15675        $$emit$$"vmovq   xmm0,(rax)\n\t"
15676        $$emit$$"add     0x8,rax\n\t"
15677        $$emit$$"dec     rcx\n\t"
15678        $$emit$$"jge     L_sloop\n\t"
15679        $$emit$$"# L_end:\n\t"
15680     } else {
15681        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15682        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15683     }
15684   %}
15685   ins_encode %{
15686     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15687                  $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
15688   %}
15689   ins_pipe(pipe_slow);
15690 %}
15691 
15692 // Small constant length ClearArray for AVX512 targets.
15693 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
15694 %{
15695   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
15696             ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
15697   match(Set dummy (ClearArray (Binary cnt base) val));
15698   ins_cost(100);
15699   effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
15700   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
15701   ins_encode %{
15702     __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15703   %}
15704   ins_pipe(pipe_slow);
15705 %}
15706 
15707 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15708                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15709 %{
15710   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15711   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15712   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15713 
15714   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15715   ins_encode %{
15716     __ string_compare($str1$$Register, $str2$$Register,
15717                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15718                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15719   %}
15720   ins_pipe( pipe_slow );
15721 %}
15722 

17560   effect(USE meth);
17561 
17562   ins_cost(300);
17563   format %{ "call_leaf,runtime " %}
17564   ins_encode(clear_avx, Java_To_Runtime(meth));
17565   ins_pipe(pipe_slow);
17566 %}
17567 
17568 // Call runtime without safepoint and with vector arguments
17569 instruct CallLeafDirectVector(method meth)
17570 %{
17571   match(CallLeafVector);
17572   effect(USE meth);
17573 
17574   ins_cost(300);
17575   format %{ "call_leaf,vector " %}
17576   ins_encode(Java_To_Runtime(meth));
17577   ins_pipe(pipe_slow);
17578 %}
17579 
17580 // Call runtime without safepoint
17581 // entry point is null, target holds the address to call
17582 instruct CallLeafNoFPInDirect(rRegP target)
17583 %{
17584   predicate(n->as_Call()->entry_point() == nullptr);
17585   match(CallLeafNoFP target);
17586 
17587   ins_cost(300);
17588   format %{ "call_leaf_nofp,runtime indirect " %}
17589   ins_encode %{
17590      __ call($target$$Register);
17591   %}
17592 
17593   ins_pipe(pipe_slow);
17594 %}
17595 
17596 // Call runtime without safepoint
17597 instruct CallLeafNoFPDirect(method meth)
17598 %{
17599   predicate(n->as_Call()->entry_point() != nullptr);
17600   match(CallLeafNoFP);
17601   effect(USE meth);
17602 
17603   ins_cost(300);
17604   format %{ "call_leaf_nofp,runtime " %}
17605   ins_encode(clear_avx, Java_To_Runtime(meth));
17606   ins_pipe(pipe_slow);
17607 %}
17608 
17609 // Return Instruction
17610 // Remove the return address & jump to it.
17611 // Notice: We always emit a nop after a ret to make sure there is room
17612 // for safepoint patching
17613 instruct Ret()
17614 %{
17615   match(Return);
17616 
17617   format %{ "ret" %}
17618   ins_encode %{
17619     __ ret(0);
< prev index next >