< prev index next >

src/hotspot/cpu/x86/x86.ad

Print this page

 1632 }
 1633 
 1634 // !!!!! Special hack to get all types of calls to specify the byte offset
 1635 //       from the start of the call to the point where the return address
 1636 //       will point.
 1637 int MachCallStaticJavaNode::ret_addr_offset()
 1638 {
 1639   int offset = 5; // 5 bytes from start of call to where return address points
 1640   offset += clear_avx_size();
 1641   return offset;
 1642 }
 1643 
 1644 int MachCallDynamicJavaNode::ret_addr_offset()
 1645 {
 1646   int offset = 15; // 15 bytes from start of call to where return address points
 1647   offset += clear_avx_size();
 1648   return offset;
 1649 }
 1650 
 1651 int MachCallRuntimeNode::ret_addr_offset() {




 1652   int offset = 13; // movq r10,#addr; callq (r10)
 1653   if (this->ideal_Opcode() != Op_CallLeafVector) {
 1654     offset += clear_avx_size();
 1655   }
 1656   return offset;
 1657 }
 1658 //
 1659 // Compute padding required for nodes which need alignment
 1660 //
 1661 
 1662 // The address of the call instruction needs to be 4-byte aligned to
 1663 // ensure that it does not span a cache line so that it can be patched.
 1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 1665 {
 1666   current_offset += clear_avx_size(); // skip vzeroupper
 1667   current_offset += 1; // skip call opcode byte
 1668   return align_up(current_offset, alignment_required()) - current_offset;
 1669 }
 1670 
 1671 // The address of the call instruction needs to be 4-byte aligned to

 1879     st->print("\n\t");
 1880     st->print("# stack alignment check");
 1881 #endif
 1882   }
 1883   if (C->stub_function() != nullptr) {
 1884     st->print("\n\t");
 1885     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
 1886     st->print("\n\t");
 1887     st->print("je      fast_entry\t");
 1888     st->print("\n\t");
 1889     st->print("call    #nmethod_entry_barrier_stub\t");
 1890     st->print("\n\tfast_entry:");
 1891   }
 1892   st->cr();
 1893 }
 1894 #endif
 1895 
 1896 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1897   Compile* C = ra_->C;
 1898 
 1899   int framesize = C->output()->frame_size_in_bytes();
 1900   int bangsize = C->output()->bang_size_in_bytes();
 1901 
 1902   if (C->clinit_barrier_on_entry()) {
 1903     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
 1904     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
 1905 
 1906     Label L_skip_barrier;
 1907     Register klass = rscratch1;
 1908 
 1909     __ mov_metadata(klass, C->method()->holder()->constant_encoding());
 1910     __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
 1911 
 1912     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
 1913 
 1914     __ bind(L_skip_barrier);
 1915   }
 1916 
 1917   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);


 1918 
 1919   C->output()->set_frame_complete(__ offset());
 1920 
 1921   if (C->has_mach_constant_base_node()) {
 1922     // NOTE: We set the table base offset here because users might be
 1923     // emitted before MachConstantBaseNode.
 1924     ConstantTable& constant_table = C->output()->constant_table();
 1925     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1926   }
 1927 }
 1928 
 1929 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 1930 {
 1931   return MachNode::size(ra_); // too many variables; just compute it
 1932                               // the hard way
 1933 }
 1934 
 1935 int MachPrologNode::reloc() const
 1936 {
 1937   return 0; // a large enough number
 1938 }
 1939 
 1940 //=============================================================================
 1941 #ifndef PRODUCT
 1942 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1943 {
 1944   Compile* C = ra_->C;
 1945   if (generate_vzeroupper(C)) {
 1946     st->print("vzeroupper");
 1947     st->cr(); st->print("\t");
 1948   }
 1949 
 1950   int framesize = C->output()->frame_size_in_bytes();
 1951   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1952   // Remove word for return adr already pushed
 1953   // and RBP

 1961   st->print_cr("popq    rbp");
 1962   if (do_polling() && C->is_method_compilation()) {
 1963     st->print("\t");
 1964     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
 1965                  "ja      #safepoint_stub\t"
 1966                  "# Safepoint: poll for GC");
 1967   }
 1968 }
 1969 #endif
 1970 
 1971 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1972 {
 1973   Compile* C = ra_->C;
 1974 
 1975   if (generate_vzeroupper(C)) {
 1976     // Clear upper bits of YMM registers when current compiled code uses
 1977     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1978     __ vzeroupper();
 1979   }
 1980 
 1981   int framesize = C->output()->frame_size_in_bytes();
 1982   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1983   // Remove word for return adr already pushed
 1984   // and RBP
 1985   framesize -= 2*wordSize;
 1986 
 1987   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 1988 
 1989   if (framesize) {
 1990     __ addq(rsp, framesize);
 1991   }
 1992 
 1993   __ popq(rbp);
 1994 
 1995   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1996     __ reserved_stack_check();
 1997   }
 1998 
 1999   if (do_polling() && C->is_method_compilation()) {
 2000     Label dummy_label;
 2001     Label* code_stub = &dummy_label;
 2002     if (!C->output()->in_scratch_emit_size()) {
 2003       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 2004       C->output()->add_stub(stub);
 2005       code_stub = &stub->entry();
 2006     }
 2007     __ relocate(relocInfo::poll_return_type);
 2008     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
 2009   }
 2010 }
 2011 
 2012 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
 2013 {
 2014   return MachNode::size(ra_); // too many variables; just compute it
 2015                               // the hard way
 2016 }
 2017 
 2018 int MachEpilogNode::reloc() const
 2019 {
 2020   return 2; // a large enough number
 2021 }
 2022 
 2023 const Pipeline* MachEpilogNode::pipeline() const
 2024 {
 2025   return MachNode::pipeline_class();
 2026 }
 2027 
 2028 //=============================================================================
 2029 
 2030 enum RC {
 2031   rc_bad,
 2032   rc_int,
 2033   rc_kreg,
 2034   rc_float,
 2035   rc_stack
 2036 };
 2037 

 2599 #endif
 2600 
 2601 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2602 {
 2603   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2604   int reg = ra_->get_encode(this);
 2605 
 2606   __ lea(as_Register(reg), Address(rsp, offset));
 2607 }
 2608 
 2609 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 2610 {
 2611   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2612   if (ra_->get_encode(this) > 15) {
 2613     return (offset < 0x80) ? 6 : 9; // REX2
 2614   } else {
 2615     return (offset < 0x80) ? 5 : 8; // REX
 2616   }
 2617 }
 2618 












































 2619 //=============================================================================
 2620 #ifndef PRODUCT
 2621 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2622 {
 2623   st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2624   st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2625   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 2626 }
 2627 #endif
 2628 
 2629 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2630 {
 2631   __ ic_check(InteriorEntryAlignment);
 2632 }
 2633 
 2634 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 2635 {
 2636   return MachNode::size(ra_); // too many variables; just compute it
 2637                               // the hard way
 2638 }
 2639 
 2640 
 2641 //=============================================================================
 2642 
 2643 bool Matcher::supports_vector_calling_convention(void) {
 2644   return EnableVectorSupport;
 2645 }
 2646 
 2647 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
 2648   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
 2649 }
 2650 
 2651 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
 2652   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
 2653 }
 2654 
 2655 #ifdef ASSERT
 2656 static bool is_ndd_demotable(const MachNode* mdef) {
 2657   return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
 2658 }
 2659 #endif

 4593     }
 4594     __ post_call_nop();
 4595   %}
 4596 
 4597   enc_class Java_Dynamic_Call(method meth) %{
 4598     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 4599     __ post_call_nop();
 4600   %}
 4601 
 4602   enc_class call_epilog %{
 4603     if (VerifyStackAtCalls) {
 4604       // Check that stack depth is unchanged: find majik cookie on stack
 4605       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
 4606       Label L;
 4607       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
 4608       __ jccb(Assembler::equal, L);
 4609       // Die if stack mismatch
 4610       __ int3();
 4611       __ bind(L);
 4612     }

































 4613   %}
 4614 
 4615 %}
 4616 
 4617 //----------FRAME--------------------------------------------------------------
 4618 // Definition of frame structure and management information.
 4619 //
 4620 //  S T A C K   L A Y O U T    Allocators stack-slot number
 4621 //                             |   (to get allocators register number
 4622 //  G  Owned by    |        |  v    add OptoReg::stack0())
 4623 //  r   CALLER     |        |
 4624 //  o     |        +--------+      pad to even-align allocators stack-slot
 4625 //  w     V        |  pad0  |        numbers; owned by CALLER
 4626 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 4627 //  h     ^        |   in   |  5
 4628 //        |        |  args  |  4   Holes in incoming args owned by SELF
 4629 //  |     |        |        |  3
 4630 //  |     |        +--------+
 4631 //  V     |        | old out|      Empty on Intel, window on Sparc
 4632 //        |    old |preserve|      Must be even aligned.

 5771   %}
 5772 %}
 5773 
 5774 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5775 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 5776 %{
 5777   constraint(ALLOC_IN_RC(ptr_reg));
 5778   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5779   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 5780 
 5781   op_cost(10);
 5782   format %{"[$reg + $off + $idx << $scale]" %}
 5783   interface(MEMORY_INTER) %{
 5784     base($reg);
 5785     index($idx);
 5786     scale($scale);
 5787     disp($off);
 5788   %}
 5789 %}
 5790 
















 5791 // Indirect Narrow Oop Plus Offset Operand
 5792 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 5793 // we can't free r12 even with CompressedOops::base() == nullptr.
 5794 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 5795   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5796   constraint(ALLOC_IN_RC(ptr_reg));
 5797   match(AddP (DecodeN reg) off);
 5798 
 5799   op_cost(10);
 5800   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 5801   interface(MEMORY_INTER) %{
 5802     base(0xc); // R12
 5803     index($reg);
 5804     scale(0x3);
 5805     disp($off);
 5806   %}
 5807 %}
 5808 
 5809 // Indirect Memory Operand
 5810 operand indirectNarrow(rRegN reg)

 6280 %}
 6281 
 6282 // Replaces legVec during post-selection cleanup. See above.
 6283 operand legVecZ() %{
 6284   constraint(ALLOC_IN_RC(vectorz_reg_legacy));
 6285   match(VecZ);
 6286 
 6287   format %{ %}
 6288   interface(REG_INTER);
 6289 %}
 6290 
 6291 //----------OPERAND CLASSES----------------------------------------------------
 6292 // Operand Classes are groups of operands that are used as to simplify
 6293 // instruction definitions by not requiring the AD writer to specify separate
 6294 // instructions for every form of operand when the instruction accepts
 6295 // multiple operand types with the same basic encoding and format.  The classic
 6296 // case of this is memory operands.
 6297 
 6298 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 6299                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 6300                indCompressedOopOffset,
 6301                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 6302                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 6303                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 6304 
 6305 //----------PIPELINE-----------------------------------------------------------
 6306 // Rules which define the behavior of the target architectures pipeline.
 6307 pipeline %{
 6308 
 6309 //----------ATTRIBUTES---------------------------------------------------------
 6310 attributes %{
 6311   variable_size_instructions;        // Fixed size instructions
 6312   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 6313   instruction_unit_size = 1;         // An instruction is 1 bytes long
 6314   instruction_fetch_unit_size = 16;  // The processor fetches one line
 6315   instruction_fetch_units = 1;       // of 16 bytes
 6316 %}
 6317 
 6318 //----------RESOURCES----------------------------------------------------------
 6319 // Resources are the functional units available to the machine
 6320 

 8915   format %{ "MEMBAR-storestore (empty encoding)" %}
 8916   ins_encode( );
 8917   ins_pipe(empty);
 8918 %}
 8919 
 8920 //----------Move Instructions--------------------------------------------------
 8921 
 8922 instruct castX2P(rRegP dst, rRegL src)
 8923 %{
 8924   match(Set dst (CastX2P src));
 8925 
 8926   format %{ "movq    $dst, $src\t# long->ptr" %}
 8927   ins_encode %{
 8928     if ($dst$$reg != $src$$reg) {
 8929       __ movptr($dst$$Register, $src$$Register);
 8930     }
 8931   %}
 8932   ins_pipe(ialu_reg_reg); // XXX
 8933 %}
 8934 


























 8935 instruct castP2X(rRegL dst, rRegP src)
 8936 %{
 8937   match(Set dst (CastP2X src));
 8938 
 8939   format %{ "movq    $dst, $src\t# ptr -> long" %}
 8940   ins_encode %{
 8941     if ($dst$$reg != $src$$reg) {
 8942       __ movptr($dst$$Register, $src$$Register);
 8943     }
 8944   %}
 8945   ins_pipe(ialu_reg_reg); // XXX
 8946 %}
 8947 
 8948 // Convert oop into int for vectors alignment masking
 8949 instruct convP2I(rRegI dst, rRegP src)
 8950 %{
 8951   match(Set dst (ConvL2I (CastP2X src)));
 8952 
 8953   format %{ "movl    $dst, $src\t# ptr -> int" %}
 8954   ins_encode %{

15206   effect(DEF dst, USE src);
15207   ins_cost(100);
15208   format %{ "movd    $dst,$src\t# MoveI2F" %}
15209   ins_encode %{
15210     __ movdl($dst$$XMMRegister, $src$$Register);
15211   %}
15212   ins_pipe( pipe_slow );
15213 %}
15214 
15215 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15216   match(Set dst (MoveL2D src));
15217   effect(DEF dst, USE src);
15218   ins_cost(100);
15219   format %{ "movd    $dst,$src\t# MoveL2D" %}
15220   ins_encode %{
15221      __ movdq($dst$$XMMRegister, $src$$Register);
15222   %}
15223   ins_pipe( pipe_slow );
15224 %}
15225 

15226 // Fast clearing of an array
15227 // Small non-constant lenght ClearArray for non-AVX512 targets.
15228 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15229                   Universe dummy, rFlagsReg cr)
15230 %{
15231   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
15232   match(Set dummy (ClearArray cnt base));
15233   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);





















































































































15234 
15235   format %{ $$template
15236     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15237     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15238     $$emit$$"jg      LARGE\n\t"
15239     $$emit$$"dec     rcx\n\t"
15240     $$emit$$"js      DONE\t# Zero length\n\t"
15241     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15242     $$emit$$"dec     rcx\n\t"
15243     $$emit$$"jge     LOOP\n\t"
15244     $$emit$$"jmp     DONE\n\t"
15245     $$emit$$"# LARGE:\n\t"
15246     if (UseFastStosb) {
15247        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15248        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15249     } else if (UseXMMForObjInit) {
15250        $$emit$$"mov     rdi,rax\n\t"
15251        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15252        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15253        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

15261        $$emit$$"jl      L_tail\n\t"
15262        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15263        $$emit$$"add     0x20,rax\n\t"
15264        $$emit$$"sub     0x4,rcx\n\t"
15265        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15266        $$emit$$"add     0x4,rcx\n\t"
15267        $$emit$$"jle     L_end\n\t"
15268        $$emit$$"dec     rcx\n\t"
15269        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15270        $$emit$$"vmovq   xmm0,(rax)\n\t"
15271        $$emit$$"add     0x8,rax\n\t"
15272        $$emit$$"dec     rcx\n\t"
15273        $$emit$$"jge     L_sloop\n\t"
15274        $$emit$$"# L_end:\n\t"
15275     } else {
15276        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15277     }
15278     $$emit$$"# DONE"
15279   %}
15280   ins_encode %{
15281     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15282                  $tmp$$XMMRegister, false, knoreg);
15283   %}
15284   ins_pipe(pipe_slow);
15285 %}
15286 
15287 // Small non-constant length ClearArray for AVX512 targets.
15288 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15289                        Universe dummy, rFlagsReg cr)
15290 %{
15291   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15292   match(Set dummy (ClearArray cnt base));
15293   ins_cost(125);
15294   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15295 
15296   format %{ $$template
15297     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15298     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15299     $$emit$$"jg      LARGE\n\t"
15300     $$emit$$"dec     rcx\n\t"
15301     $$emit$$"js      DONE\t# Zero length\n\t"
15302     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15303     $$emit$$"dec     rcx\n\t"
15304     $$emit$$"jge     LOOP\n\t"
15305     $$emit$$"jmp     DONE\n\t"
15306     $$emit$$"# LARGE:\n\t"
15307     if (UseFastStosb) {
15308        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15309        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15310     } else if (UseXMMForObjInit) {
15311        $$emit$$"mov     rdi,rax\n\t"
15312        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15313        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15314        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

15322        $$emit$$"jl      L_tail\n\t"
15323        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15324        $$emit$$"add     0x20,rax\n\t"
15325        $$emit$$"sub     0x4,rcx\n\t"
15326        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15327        $$emit$$"add     0x4,rcx\n\t"
15328        $$emit$$"jle     L_end\n\t"
15329        $$emit$$"dec     rcx\n\t"
15330        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15331        $$emit$$"vmovq   xmm0,(rax)\n\t"
15332        $$emit$$"add     0x8,rax\n\t"
15333        $$emit$$"dec     rcx\n\t"
15334        $$emit$$"jge     L_sloop\n\t"
15335        $$emit$$"# L_end:\n\t"
15336     } else {
15337        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15338     }
15339     $$emit$$"# DONE"
15340   %}
15341   ins_encode %{
15342     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15343                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
15344   %}
15345   ins_pipe(pipe_slow);
15346 %}
15347 
15348 // Large non-constant length ClearArray for non-AVX512 targets.
15349 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15350                         Universe dummy, rFlagsReg cr)
15351 %{
15352   predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15353   match(Set dummy (ClearArray cnt base));
15354   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
































































































15355 
15356   format %{ $$template
15357     if (UseFastStosb) {
15358        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15359        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15360        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15361     } else if (UseXMMForObjInit) {
15362        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15363        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15364        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15365        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15366        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15367        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15368        $$emit$$"add     0x40,rax\n\t"
15369        $$emit$$"# L_zero_64_bytes:\n\t"
15370        $$emit$$"sub     0x8,rcx\n\t"
15371        $$emit$$"jge     L_loop\n\t"
15372        $$emit$$"add     0x4,rcx\n\t"
15373        $$emit$$"jl      L_tail\n\t"
15374        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15375        $$emit$$"add     0x20,rax\n\t"
15376        $$emit$$"sub     0x4,rcx\n\t"
15377        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15378        $$emit$$"add     0x4,rcx\n\t"
15379        $$emit$$"jle     L_end\n\t"
15380        $$emit$$"dec     rcx\n\t"
15381        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15382        $$emit$$"vmovq   xmm0,(rax)\n\t"
15383        $$emit$$"add     0x8,rax\n\t"
15384        $$emit$$"dec     rcx\n\t"
15385        $$emit$$"jge     L_sloop\n\t"
15386        $$emit$$"# L_end:\n\t"
15387     } else {
15388        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15389        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15390     }
15391   %}
15392   ins_encode %{
15393     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15394                  $tmp$$XMMRegister, true, knoreg);
15395   %}
15396   ins_pipe(pipe_slow);
15397 %}
15398 
15399 // Large non-constant length ClearArray for AVX512 targets.
15400 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15401                              Universe dummy, rFlagsReg cr)
15402 %{
15403   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15404   match(Set dummy (ClearArray cnt base));
15405   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15406 
15407   format %{ $$template
15408     if (UseFastStosb) {
15409        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15410        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15411        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15412     } else if (UseXMMForObjInit) {
15413        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15414        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15415        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15416        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15417        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15418        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15419        $$emit$$"add     0x40,rax\n\t"
15420        $$emit$$"# L_zero_64_bytes:\n\t"
15421        $$emit$$"sub     0x8,rcx\n\t"
15422        $$emit$$"jge     L_loop\n\t"
15423        $$emit$$"add     0x4,rcx\n\t"
15424        $$emit$$"jl      L_tail\n\t"
15425        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15426        $$emit$$"add     0x20,rax\n\t"
15427        $$emit$$"sub     0x4,rcx\n\t"
15428        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15429        $$emit$$"add     0x4,rcx\n\t"
15430        $$emit$$"jle     L_end\n\t"
15431        $$emit$$"dec     rcx\n\t"
15432        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15433        $$emit$$"vmovq   xmm0,(rax)\n\t"
15434        $$emit$$"add     0x8,rax\n\t"
15435        $$emit$$"dec     rcx\n\t"
15436        $$emit$$"jge     L_sloop\n\t"
15437        $$emit$$"# L_end:\n\t"
15438     } else {
15439        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15440        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15441     }
15442   %}
15443   ins_encode %{
15444     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15445                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
15446   %}
15447   ins_pipe(pipe_slow);
15448 %}
15449 
15450 // Small constant length ClearArray for AVX512 targets.
15451 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15452 %{
15453   predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15454   match(Set dummy (ClearArray cnt base));

15455   ins_cost(100);
15456   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15457   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
15458   ins_encode %{
15459    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15460   %}
15461   ins_pipe(pipe_slow);
15462 %}
15463 
15464 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15465                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15466 %{
15467   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15468   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15469   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15470 
15471   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15472   ins_encode %{
15473     __ string_compare($str1$$Register, $str2$$Register,
15474                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15475                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15476   %}
15477   ins_pipe( pipe_slow );
15478 %}
15479 

17317   effect(USE meth);
17318 
17319   ins_cost(300);
17320   format %{ "call_leaf,runtime " %}
17321   ins_encode(clear_avx, Java_To_Runtime(meth));
17322   ins_pipe(pipe_slow);
17323 %}
17324 
17325 // Call runtime without safepoint and with vector arguments
17326 instruct CallLeafDirectVector(method meth)
17327 %{
17328   match(CallLeafVector);
17329   effect(USE meth);
17330 
17331   ins_cost(300);
17332   format %{ "call_leaf,vector " %}
17333   ins_encode(Java_To_Runtime(meth));
17334   ins_pipe(pipe_slow);
17335 %}
17336 
















17337 // Call runtime without safepoint
17338 instruct CallLeafNoFPDirect(method meth)
17339 %{

17340   match(CallLeafNoFP);
17341   effect(USE meth);
17342 
17343   ins_cost(300);
17344   format %{ "call_leaf_nofp,runtime " %}
17345   ins_encode(clear_avx, Java_To_Runtime(meth));
17346   ins_pipe(pipe_slow);
17347 %}
17348 
17349 // Return Instruction
17350 // Remove the return address & jump to it.
17351 // Notice: We always emit a nop after a ret to make sure there is room
17352 // for safepoint patching
17353 instruct Ret()
17354 %{
17355   match(Return);
17356 
17357   format %{ "ret" %}
17358   ins_encode %{
17359     __ ret(0);

 1632 }
 1633 
 1634 // !!!!! Special hack to get all types of calls to specify the byte offset
 1635 //       from the start of the call to the point where the return address
 1636 //       will point.
 1637 int MachCallStaticJavaNode::ret_addr_offset()
 1638 {
 1639   int offset = 5; // 5 bytes from start of call to where return address points
 1640   offset += clear_avx_size();
 1641   return offset;
 1642 }
 1643 
 1644 int MachCallDynamicJavaNode::ret_addr_offset()
 1645 {
 1646   int offset = 15; // 15 bytes from start of call to where return address points
 1647   offset += clear_avx_size();
 1648   return offset;
 1649 }
 1650 
 1651 int MachCallRuntimeNode::ret_addr_offset() {
 1652   if (_entry_point == nullptr) {
 1653     // CallLeafNoFPInDirect
 1654     return 3; // callq (register)
 1655   }
 1656   int offset = 13; // movq r10,#addr; callq (r10)
 1657   if (this->ideal_Opcode() != Op_CallLeafVector) {
 1658     offset += clear_avx_size();
 1659   }
 1660   return offset;
 1661 }
 1662 //
 1663 // Compute padding required for nodes which need alignment
 1664 //
 1665 
 1666 // The address of the call instruction needs to be 4-byte aligned to
 1667 // ensure that it does not span a cache line so that it can be patched.
 1668 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 1669 {
 1670   current_offset += clear_avx_size(); // skip vzeroupper
 1671   current_offset += 1; // skip call opcode byte
 1672   return align_up(current_offset, alignment_required()) - current_offset;
 1673 }
 1674 
 1675 // The address of the call instruction needs to be 4-byte aligned to

 1883     st->print("\n\t");
 1884     st->print("# stack alignment check");
 1885 #endif
 1886   }
 1887   if (C->stub_function() != nullptr) {
 1888     st->print("\n\t");
 1889     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
 1890     st->print("\n\t");
 1891     st->print("je      fast_entry\t");
 1892     st->print("\n\t");
 1893     st->print("call    #nmethod_entry_barrier_stub\t");
 1894     st->print("\n\tfast_entry:");
 1895   }
 1896   st->cr();
 1897 }
 1898 #endif
 1899 
 1900 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1901   Compile* C = ra_->C;
 1902 
 1903   __ verified_entry(C);








 1904 
 1905   if (ra_->C->stub_function() == nullptr) {
 1906     __ entry_barrier();




 1907   }
 1908 
 1909   if (!Compile::current()->output()->in_scratch_emit_size()) {
 1910     __ bind(*_verified_entry);
 1911   }
 1912 
 1913   C->output()->set_frame_complete(__ offset());
 1914 
 1915   if (C->has_mach_constant_base_node()) {
 1916     // NOTE: We set the table base offset here because users might be
 1917     // emitted before MachConstantBaseNode.
 1918     ConstantTable& constant_table = C->output()->constant_table();
 1919     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1920   }
 1921 }
 1922 





 1923 
 1924 int MachPrologNode::reloc() const
 1925 {
 1926   return 0; // a large enough number
 1927 }
 1928 
 1929 //=============================================================================
 1930 #ifndef PRODUCT
 1931 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1932 {
 1933   Compile* C = ra_->C;
 1934   if (generate_vzeroupper(C)) {
 1935     st->print("vzeroupper");
 1936     st->cr(); st->print("\t");
 1937   }
 1938 
 1939   int framesize = C->output()->frame_size_in_bytes();
 1940   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1941   // Remove word for return adr already pushed
 1942   // and RBP

 1950   st->print_cr("popq    rbp");
 1951   if (do_polling() && C->is_method_compilation()) {
 1952     st->print("\t");
 1953     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
 1954                  "ja      #safepoint_stub\t"
 1955                  "# Safepoint: poll for GC");
 1956   }
 1957 }
 1958 #endif
 1959 
 1960 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1961 {
 1962   Compile* C = ra_->C;
 1963 
 1964   if (generate_vzeroupper(C)) {
 1965     // Clear upper bits of YMM registers when current compiled code uses
 1966     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1967     __ vzeroupper();
 1968   }
 1969 
 1970   // Subtract two words to account for return address and rbp
 1971   int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
 1972   __ remove_frame(initial_framesize, C->needs_stack_repair());










 1973 
 1974   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1975     __ reserved_stack_check();
 1976   }
 1977 
 1978   if (do_polling() && C->is_method_compilation()) {
 1979     Label dummy_label;
 1980     Label* code_stub = &dummy_label;
 1981     if (!C->output()->in_scratch_emit_size()) {
 1982       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1983       C->output()->add_stub(stub);
 1984       code_stub = &stub->entry();
 1985     }
 1986     __ relocate(relocInfo::poll_return_type);
 1987     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
 1988   }
 1989 }
 1990 






 1991 int MachEpilogNode::reloc() const
 1992 {
 1993   return 2; // a large enough number
 1994 }
 1995 
 1996 const Pipeline* MachEpilogNode::pipeline() const
 1997 {
 1998   return MachNode::pipeline_class();
 1999 }
 2000 
 2001 //=============================================================================
 2002 
 2003 enum RC {
 2004   rc_bad,
 2005   rc_int,
 2006   rc_kreg,
 2007   rc_float,
 2008   rc_stack
 2009 };
 2010 

 2572 #endif
 2573 
 2574 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2575 {
 2576   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2577   int reg = ra_->get_encode(this);
 2578 
 2579   __ lea(as_Register(reg), Address(rsp, offset));
 2580 }
 2581 
 2582 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 2583 {
 2584   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2585   if (ra_->get_encode(this) > 15) {
 2586     return (offset < 0x80) ? 6 : 9; // REX2
 2587   } else {
 2588     return (offset < 0x80) ? 5 : 8; // REX
 2589   }
 2590 }
 2591 
 2592 //=============================================================================
 2593 #ifndef PRODUCT
 2594 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2595 {
 2596   st->print_cr("MachVEPNode");
 2597 }
 2598 #endif
 2599 
 2600 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2601 {
 2602   CodeBuffer* cbuf = masm->code();
 2603   uint insts_size = cbuf->insts_size();
 2604   if (!_verified) {
 2605     __ ic_check(1);
 2606   } else {
 2607     if (ra_->C->stub_function() == nullptr) {
 2608       // Emit the entry barrier in a temporary frame before unpacking because
 2609       // it can deopt, which would require packing the scalarized args again.
 2610       __ verified_entry(ra_->C, 0);
 2611       __ entry_barrier();
 2612       int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
 2613       __ remove_frame(initial_framesize, false);
 2614     }
 2615     // Unpack inline type args passed as oop and then jump to
 2616     // the verified entry point (skipping the unverified entry).
 2617     int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
 2618     // Emit code for verified entry and save increment for stack repair on return
 2619     __ verified_entry(ra_->C, sp_inc);
 2620     if (Compile::current()->output()->in_scratch_emit_size()) {
 2621       Label dummy_verified_entry;
 2622       __ jmp(dummy_verified_entry);
 2623     } else {
 2624       __ jmp(*_verified_entry);
 2625     }
 2626   }
 2627   /* WARNING these NOPs are critical so that verified entry point is properly
 2628      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
 2629   int nops_cnt = 4 - ((cbuf->insts_size() - insts_size) & 0x3);
 2630   nops_cnt &= 0x3; // Do not add nops if code is aligned.
 2631   if (nops_cnt > 0) {
 2632     __ nop(nops_cnt);
 2633   }
 2634 }
 2635 
 2636 //=============================================================================
 2637 #ifndef PRODUCT
 2638 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2639 {
 2640   st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2641   st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2642   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 2643 }
 2644 #endif
 2645 
 2646 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2647 {
 2648   __ ic_check(InteriorEntryAlignment);
 2649 }
 2650 






 2651 
 2652 //=============================================================================
 2653 
 2654 bool Matcher::supports_vector_calling_convention(void) {
 2655   return EnableVectorSupport;
 2656 }
 2657 
 2658 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
 2659   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
 2660 }
 2661 
 2662 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
 2663   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
 2664 }
 2665 
 2666 #ifdef ASSERT
 2667 static bool is_ndd_demotable(const MachNode* mdef) {
 2668   return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
 2669 }
 2670 #endif

 4604     }
 4605     __ post_call_nop();
 4606   %}
 4607 
 4608   enc_class Java_Dynamic_Call(method meth) %{
 4609     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 4610     __ post_call_nop();
 4611   %}
 4612 
 4613   enc_class call_epilog %{
 4614     if (VerifyStackAtCalls) {
 4615       // Check that stack depth is unchanged: find majik cookie on stack
 4616       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
 4617       Label L;
 4618       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
 4619       __ jccb(Assembler::equal, L);
 4620       // Die if stack mismatch
 4621       __ int3();
 4622       __ bind(L);
 4623     }
 4624     if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) {
 4625       // The last return value is not set by the callee but used to pass the null marker to compiled code.
 4626       // Search for the corresponding projection, get the register and emit code that initialized it.
 4627       uint con = (tf()->range_cc()->cnt() - 1);
 4628       for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
 4629         ProjNode* proj = fast_out(i)->as_Proj();
 4630         if (proj->_con == con) {
 4631           // Set null marker if rax is non-null (a non-null value is returned buffered or scalarized)
 4632           OptoReg::Name optoReg = ra_->get_reg_first(proj);
 4633           VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
 4634           Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
 4635           __ testq(rax, rax);
 4636           __ setb(Assembler::notZero, toReg);
 4637           __ movzbl(toReg, toReg);
 4638           if (reg->is_stack()) {
 4639             int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
 4640             __ movq(Address(rsp, st_off), toReg);
 4641           }
 4642           break;
 4643         }
 4644       }
 4645       if (return_value_is_used()) {
 4646         // An inline type is returned as fields in multiple registers.
 4647         // Rax either contains an oop if the inline type is buffered or a pointer
 4648         // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax
 4649         // if the lowest bit is set to allow C2 to use the oop after null checking.
 4650         // rax &= (rax & 1) - 1
 4651         __ movptr(rscratch1, rax);
 4652         __ andptr(rscratch1, 0x1);
 4653         __ subptr(rscratch1, 0x1);
 4654         __ andptr(rax, rscratch1);
 4655       }
 4656     }
 4657   %}
 4658 
 4659 %}
 4660 
 4661 //----------FRAME--------------------------------------------------------------
 4662 // Definition of frame structure and management information.
 4663 //
 4664 //  S T A C K   L A Y O U T    Allocators stack-slot number
 4665 //                             |   (to get allocators register number
 4666 //  G  Owned by    |        |  v    add OptoReg::stack0())
 4667 //  r   CALLER     |        |
 4668 //  o     |        +--------+      pad to even-align allocators stack-slot
 4669 //  w     V        |  pad0  |        numbers; owned by CALLER
 4670 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 4671 //  h     ^        |   in   |  5
 4672 //        |        |  args  |  4   Holes in incoming args owned by SELF
 4673 //  |     |        |        |  3
 4674 //  |     |        +--------+
 4675 //  V     |        | old out|      Empty on Intel, window on Sparc
 4676 //        |    old |preserve|      Must be even aligned.

 5815   %}
 5816 %}
 5817 
 5818 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5819 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 5820 %{
 5821   constraint(ALLOC_IN_RC(ptr_reg));
 5822   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5823   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 5824 
 5825   op_cost(10);
 5826   format %{"[$reg + $off + $idx << $scale]" %}
 5827   interface(MEMORY_INTER) %{
 5828     base($reg);
 5829     index($idx);
 5830     scale($scale);
 5831     disp($off);
 5832   %}
 5833 %}
 5834 
 5835 // Indirect Narrow Oop Operand
 5836 operand indCompressedOop(rRegN reg) %{
 5837   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5838   constraint(ALLOC_IN_RC(ptr_reg));
 5839   match(DecodeN reg);
 5840 
 5841   op_cost(10);
 5842   format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
 5843   interface(MEMORY_INTER) %{
 5844     base(0xc); // R12
 5845     index($reg);
 5846     scale(0x3);
 5847     disp(0x0);
 5848   %}
 5849 %}
 5850 
 5851 // Indirect Narrow Oop Plus Offset Operand
 5852 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 5853 // we can't free r12 even with CompressedOops::base() == nullptr.
 5854 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 5855   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5856   constraint(ALLOC_IN_RC(ptr_reg));
 5857   match(AddP (DecodeN reg) off);
 5858 
 5859   op_cost(10);
 5860   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 5861   interface(MEMORY_INTER) %{
 5862     base(0xc); // R12
 5863     index($reg);
 5864     scale(0x3);
 5865     disp($off);
 5866   %}
 5867 %}
 5868 
 5869 // Indirect Memory Operand
 5870 operand indirectNarrow(rRegN reg)

 6340 %}
 6341 
 6342 // Replaces legVec during post-selection cleanup. See above.
 6343 operand legVecZ() %{
 6344   constraint(ALLOC_IN_RC(vectorz_reg_legacy));
 6345   match(VecZ);
 6346 
 6347   format %{ %}
 6348   interface(REG_INTER);
 6349 %}
 6350 
 6351 //----------OPERAND CLASSES----------------------------------------------------
 6352 // Operand Classes are groups of operands that are used as to simplify
 6353 // instruction definitions by not requiring the AD writer to specify separate
 6354 // instructions for every form of operand when the instruction accepts
 6355 // multiple operand types with the same basic encoding and format.  The classic
 6356 // case of this is memory operands.
 6357 
 6358 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 6359                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 6360                indCompressedOop, indCompressedOopOffset,
 6361                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 6362                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 6363                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 6364 
 6365 //----------PIPELINE-----------------------------------------------------------
 6366 // Rules which define the behavior of the target architectures pipeline.
 6367 pipeline %{
 6368 
 6369 //----------ATTRIBUTES---------------------------------------------------------
 6370 attributes %{
 6371   variable_size_instructions;        // Fixed size instructions
 6372   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 6373   instruction_unit_size = 1;         // An instruction is 1 bytes long
 6374   instruction_fetch_unit_size = 16;  // The processor fetches one line
 6375   instruction_fetch_units = 1;       // of 16 bytes
 6376 %}
 6377 
 6378 //----------RESOURCES----------------------------------------------------------
 6379 // Resources are the functional units available to the machine
 6380 

 8975   format %{ "MEMBAR-storestore (empty encoding)" %}
 8976   ins_encode( );
 8977   ins_pipe(empty);
 8978 %}
 8979 
 8980 //----------Move Instructions--------------------------------------------------
 8981 
 8982 instruct castX2P(rRegP dst, rRegL src)
 8983 %{
 8984   match(Set dst (CastX2P src));
 8985 
 8986   format %{ "movq    $dst, $src\t# long->ptr" %}
 8987   ins_encode %{
 8988     if ($dst$$reg != $src$$reg) {
 8989       __ movptr($dst$$Register, $src$$Register);
 8990     }
 8991   %}
 8992   ins_pipe(ialu_reg_reg); // XXX
 8993 %}
 8994 
 8995 instruct castI2N(rRegN dst, rRegI src)
 8996 %{
 8997   match(Set dst (CastI2N src));
 8998 
 8999   format %{ "movq    $dst, $src\t# int -> narrow ptr" %}
 9000   ins_encode %{
 9001     if ($dst$$reg != $src$$reg) {
 9002       __ movl($dst$$Register, $src$$Register);
 9003     }
 9004   %}
 9005   ins_pipe(ialu_reg_reg); // XXX
 9006 %}
 9007 
 9008 instruct castN2X(rRegL dst, rRegN src)
 9009 %{
 9010   match(Set dst (CastP2X src));
 9011 
 9012   format %{ "movq    $dst, $src\t# ptr -> long" %}
 9013   ins_encode %{
 9014     if ($dst$$reg != $src$$reg) {
 9015       __ movptr($dst$$Register, $src$$Register);
 9016     }
 9017   %}
 9018   ins_pipe(ialu_reg_reg); // XXX
 9019 %}
 9020 
 9021 instruct castP2X(rRegL dst, rRegP src)
 9022 %{
 9023   match(Set dst (CastP2X src));
 9024 
 9025   format %{ "movq    $dst, $src\t# ptr -> long" %}
 9026   ins_encode %{
 9027     if ($dst$$reg != $src$$reg) {
 9028       __ movptr($dst$$Register, $src$$Register);
 9029     }
 9030   %}
 9031   ins_pipe(ialu_reg_reg); // XXX
 9032 %}
 9033 
 9034 // Convert oop into int for vectors alignment masking
 9035 instruct convP2I(rRegI dst, rRegP src)
 9036 %{
 9037   match(Set dst (ConvL2I (CastP2X src)));
 9038 
 9039   format %{ "movl    $dst, $src\t# ptr -> int" %}
 9040   ins_encode %{

15292   effect(DEF dst, USE src);
15293   ins_cost(100);
15294   format %{ "movd    $dst,$src\t# MoveI2F" %}
15295   ins_encode %{
15296     __ movdl($dst$$XMMRegister, $src$$Register);
15297   %}
15298   ins_pipe( pipe_slow );
15299 %}
15300 
15301 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15302   match(Set dst (MoveL2D src));
15303   effect(DEF dst, USE src);
15304   ins_cost(100);
15305   format %{ "movd    $dst,$src\t# MoveL2D" %}
15306   ins_encode %{
15307      __ movdq($dst$$XMMRegister, $src$$Register);
15308   %}
15309   ins_pipe( pipe_slow );
15310 %}
15311 
15312 
15313 // Fast clearing of an array
15314 // Small non-constant lenght ClearArray for non-AVX512 targets.
15315 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15316                   Universe dummy, rFlagsReg cr)
15317 %{
15318   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15319   match(Set dummy (ClearArray (Binary cnt base) val));
15320   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15321 
15322   format %{ $$template
15323     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15324     $$emit$$"jg      LARGE\n\t"
15325     $$emit$$"dec     rcx\n\t"
15326     $$emit$$"js      DONE\t# Zero length\n\t"
15327     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15328     $$emit$$"dec     rcx\n\t"
15329     $$emit$$"jge     LOOP\n\t"
15330     $$emit$$"jmp     DONE\n\t"
15331     $$emit$$"# LARGE:\n\t"
15332     if (UseFastStosb) {
15333        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15334        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15335     } else if (UseXMMForObjInit) {
15336        $$emit$$"movdq   $tmp, $val\n\t"
15337        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15338        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15339        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15340        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15341        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15342        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15343        $$emit$$"add     0x40,rax\n\t"
15344        $$emit$$"# L_zero_64_bytes:\n\t"
15345        $$emit$$"sub     0x8,rcx\n\t"
15346        $$emit$$"jge     L_loop\n\t"
15347        $$emit$$"add     0x4,rcx\n\t"
15348        $$emit$$"jl      L_tail\n\t"
15349        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15350        $$emit$$"add     0x20,rax\n\t"
15351        $$emit$$"sub     0x4,rcx\n\t"
15352        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15353        $$emit$$"add     0x4,rcx\n\t"
15354        $$emit$$"jle     L_end\n\t"
15355        $$emit$$"dec     rcx\n\t"
15356        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15357        $$emit$$"vmovq   xmm0,(rax)\n\t"
15358        $$emit$$"add     0x8,rax\n\t"
15359        $$emit$$"dec     rcx\n\t"
15360        $$emit$$"jge     L_sloop\n\t"
15361        $$emit$$"# L_end:\n\t"
15362     } else {
15363        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15364     }
15365     $$emit$$"# DONE"
15366   %}
15367   ins_encode %{
15368     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15369                  $tmp$$XMMRegister, false, false);
15370   %}
15371   ins_pipe(pipe_slow);
15372 %}
15373 
15374 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15375                             Universe dummy, rFlagsReg cr)
15376 %{
15377   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15378   match(Set dummy (ClearArray (Binary cnt base) val));
15379   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15380 
15381   format %{ $$template
15382     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15383     $$emit$$"jg      LARGE\n\t"
15384     $$emit$$"dec     rcx\n\t"
15385     $$emit$$"js      DONE\t# Zero length\n\t"
15386     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15387     $$emit$$"dec     rcx\n\t"
15388     $$emit$$"jge     LOOP\n\t"
15389     $$emit$$"jmp     DONE\n\t"
15390     $$emit$$"# LARGE:\n\t"
15391     if (UseXMMForObjInit) {
15392        $$emit$$"movdq   $tmp, $val\n\t"
15393        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15394        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15395        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15396        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15397        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15398        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15399        $$emit$$"add     0x40,rax\n\t"
15400        $$emit$$"# L_zero_64_bytes:\n\t"
15401        $$emit$$"sub     0x8,rcx\n\t"
15402        $$emit$$"jge     L_loop\n\t"
15403        $$emit$$"add     0x4,rcx\n\t"
15404        $$emit$$"jl      L_tail\n\t"
15405        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15406        $$emit$$"add     0x20,rax\n\t"
15407        $$emit$$"sub     0x4,rcx\n\t"
15408        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15409        $$emit$$"add     0x4,rcx\n\t"
15410        $$emit$$"jle     L_end\n\t"
15411        $$emit$$"dec     rcx\n\t"
15412        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15413        $$emit$$"vmovq   xmm0,(rax)\n\t"
15414        $$emit$$"add     0x8,rax\n\t"
15415        $$emit$$"dec     rcx\n\t"
15416        $$emit$$"jge     L_sloop\n\t"
15417        $$emit$$"# L_end:\n\t"
15418     } else {
15419        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15420     }
15421     $$emit$$"# DONE"
15422   %}
15423   ins_encode %{
15424     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15425                  $tmp$$XMMRegister, false, true);
15426   %}
15427   ins_pipe(pipe_slow);
15428 %}
15429 
15430 // Small non-constant length ClearArray for AVX512 targets.
15431 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15432                        Universe dummy, rFlagsReg cr)
15433 %{
15434   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15435   match(Set dummy (ClearArray (Binary cnt base) val));
15436   ins_cost(125);
15437   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15438 
15439   format %{ $$template
15440     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15441     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15442     $$emit$$"jg      LARGE\n\t"
15443     $$emit$$"dec     rcx\n\t"
15444     $$emit$$"js      DONE\t# Zero length\n\t"
15445     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15446     $$emit$$"dec     rcx\n\t"
15447     $$emit$$"jge     LOOP\n\t"
15448     $$emit$$"jmp     DONE\n\t"
15449     $$emit$$"# LARGE:\n\t"
15450     if (UseFastStosb) {
15451        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15452        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15453     } else if (UseXMMForObjInit) {
15454        $$emit$$"mov     rdi,rax\n\t"
15455        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15456        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15457        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

15465        $$emit$$"jl      L_tail\n\t"
15466        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15467        $$emit$$"add     0x20,rax\n\t"
15468        $$emit$$"sub     0x4,rcx\n\t"
15469        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15470        $$emit$$"add     0x4,rcx\n\t"
15471        $$emit$$"jle     L_end\n\t"
15472        $$emit$$"dec     rcx\n\t"
15473        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15474        $$emit$$"vmovq   xmm0,(rax)\n\t"
15475        $$emit$$"add     0x8,rax\n\t"
15476        $$emit$$"dec     rcx\n\t"
15477        $$emit$$"jge     L_sloop\n\t"
15478        $$emit$$"# L_end:\n\t"
15479     } else {
15480        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15481     }
15482     $$emit$$"# DONE"
15483   %}
15484   ins_encode %{
15485     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15486                  $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
15487   %}
15488   ins_pipe(pipe_slow);
15489 %}
15490 
15491 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15492                                  Universe dummy, rFlagsReg cr)

15493 %{
15494   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15495   match(Set dummy (ClearArray (Binary cnt base) val));
15496   ins_cost(125);
15497   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15498 
15499   format %{ $$template
15500     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15501     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15502     $$emit$$"jg      LARGE\n\t"
15503     $$emit$$"dec     rcx\n\t"
15504     $$emit$$"js      DONE\t# Zero length\n\t"
15505     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15506     $$emit$$"dec     rcx\n\t"
15507     $$emit$$"jge     LOOP\n\t"
15508     $$emit$$"jmp     DONE\n\t"
15509     $$emit$$"# LARGE:\n\t"
15510     if (UseFastStosb) {
15511        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15512        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15513     } else if (UseXMMForObjInit) {
15514        $$emit$$"mov     rdi,rax\n\t"
15515        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15516        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15517        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

15525        $$emit$$"jl      L_tail\n\t"
15526        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15527        $$emit$$"add     0x20,rax\n\t"
15528        $$emit$$"sub     0x4,rcx\n\t"
15529        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15530        $$emit$$"add     0x4,rcx\n\t"
15531        $$emit$$"jle     L_end\n\t"
15532        $$emit$$"dec     rcx\n\t"
15533        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15534        $$emit$$"vmovq   xmm0,(rax)\n\t"
15535        $$emit$$"add     0x8,rax\n\t"
15536        $$emit$$"dec     rcx\n\t"
15537        $$emit$$"jge     L_sloop\n\t"
15538        $$emit$$"# L_end:\n\t"
15539     } else {
15540        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15541     }
15542     $$emit$$"# DONE"
15543   %}
15544   ins_encode %{
15545     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15546                  $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
15547   %}
15548   ins_pipe(pipe_slow);
15549 %}
15550 
15551 // Large non-constant length ClearArray for non-AVX512 targets.
15552 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15553                         Universe dummy, rFlagsReg cr)
15554 %{
15555   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15556   match(Set dummy (ClearArray (Binary cnt base) val));
15557   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15558 
15559   format %{ $$template
15560     if (UseFastStosb) {
15561        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15562        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15563     } else if (UseXMMForObjInit) {
15564        $$emit$$"movdq   $tmp, $val\n\t"
15565        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15566        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15567        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15568        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15569        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15570        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15571        $$emit$$"add     0x40,rax\n\t"
15572        $$emit$$"# L_zero_64_bytes:\n\t"
15573        $$emit$$"sub     0x8,rcx\n\t"
15574        $$emit$$"jge     L_loop\n\t"
15575        $$emit$$"add     0x4,rcx\n\t"
15576        $$emit$$"jl      L_tail\n\t"
15577        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15578        $$emit$$"add     0x20,rax\n\t"
15579        $$emit$$"sub     0x4,rcx\n\t"
15580        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15581        $$emit$$"add     0x4,rcx\n\t"
15582        $$emit$$"jle     L_end\n\t"
15583        $$emit$$"dec     rcx\n\t"
15584        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15585        $$emit$$"vmovq   xmm0,(rax)\n\t"
15586        $$emit$$"add     0x8,rax\n\t"
15587        $$emit$$"dec     rcx\n\t"
15588        $$emit$$"jge     L_sloop\n\t"
15589        $$emit$$"# L_end:\n\t"
15590     } else {
15591        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15592     }
15593   %}
15594   ins_encode %{
15595     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15596                  $tmp$$XMMRegister, true, false);
15597   %}
15598   ins_pipe(pipe_slow);
15599 %}
15600 
15601 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15602                                   Universe dummy, rFlagsReg cr)
15603 %{
15604   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15605   match(Set dummy (ClearArray (Binary cnt base) val));
15606   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15607 
15608   format %{ $$template
15609     if (UseXMMForObjInit) {
15610        $$emit$$"movdq   $tmp, $val\n\t"
15611        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15612        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15613        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15614        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15615        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15616        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15617        $$emit$$"add     0x40,rax\n\t"
15618        $$emit$$"# L_zero_64_bytes:\n\t"
15619        $$emit$$"sub     0x8,rcx\n\t"
15620        $$emit$$"jge     L_loop\n\t"
15621        $$emit$$"add     0x4,rcx\n\t"
15622        $$emit$$"jl      L_tail\n\t"
15623        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15624        $$emit$$"add     0x20,rax\n\t"
15625        $$emit$$"sub     0x4,rcx\n\t"
15626        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15627        $$emit$$"add     0x4,rcx\n\t"
15628        $$emit$$"jle     L_end\n\t"
15629        $$emit$$"dec     rcx\n\t"
15630        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15631        $$emit$$"vmovq   xmm0,(rax)\n\t"
15632        $$emit$$"add     0x8,rax\n\t"
15633        $$emit$$"dec     rcx\n\t"
15634        $$emit$$"jge     L_sloop\n\t"
15635        $$emit$$"# L_end:\n\t"
15636     } else {
15637        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15638     }
15639   %}
15640   ins_encode %{
15641     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15642                  $tmp$$XMMRegister, true, true);
15643   %}
15644   ins_pipe(pipe_slow);
15645 %}
15646 
15647 // Large non-constant length ClearArray for AVX512 targets.
15648 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15649                              Universe dummy, rFlagsReg cr)
15650 %{
15651   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15652   match(Set dummy (ClearArray (Binary cnt base) val));
15653   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15654 
15655   format %{ $$template
15656     if (UseFastStosb) {
15657        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15658        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15659        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15660     } else if (UseXMMForObjInit) {
15661        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15662        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15663        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15664        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15665        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15666        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15667        $$emit$$"add     0x40,rax\n\t"
15668        $$emit$$"# L_zero_64_bytes:\n\t"
15669        $$emit$$"sub     0x8,rcx\n\t"
15670        $$emit$$"jge     L_loop\n\t"
15671        $$emit$$"add     0x4,rcx\n\t"
15672        $$emit$$"jl      L_tail\n\t"
15673        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15674        $$emit$$"add     0x20,rax\n\t"
15675        $$emit$$"sub     0x4,rcx\n\t"
15676        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15677        $$emit$$"add     0x4,rcx\n\t"
15678        $$emit$$"jle     L_end\n\t"
15679        $$emit$$"dec     rcx\n\t"
15680        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15681        $$emit$$"vmovq   xmm0,(rax)\n\t"
15682        $$emit$$"add     0x8,rax\n\t"
15683        $$emit$$"dec     rcx\n\t"
15684        $$emit$$"jge     L_sloop\n\t"
15685        $$emit$$"# L_end:\n\t"
15686     } else {
15687        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15688        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15689     }
15690   %}
15691   ins_encode %{
15692     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15693                  $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
15694   %}
15695   ins_pipe(pipe_slow);
15696 %}
15697 
15698 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15699                                        Universe dummy, rFlagsReg cr)

15700 %{
15701   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15702   match(Set dummy (ClearArray (Binary cnt base) val));
15703   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15704 
15705   format %{ $$template
15706     if (UseFastStosb) {
15707        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15708        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15709        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15710     } else if (UseXMMForObjInit) {
15711        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15712        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15713        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15714        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15715        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15716        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15717        $$emit$$"add     0x40,rax\n\t"
15718        $$emit$$"# L_zero_64_bytes:\n\t"
15719        $$emit$$"sub     0x8,rcx\n\t"
15720        $$emit$$"jge     L_loop\n\t"
15721        $$emit$$"add     0x4,rcx\n\t"
15722        $$emit$$"jl      L_tail\n\t"
15723        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15724        $$emit$$"add     0x20,rax\n\t"
15725        $$emit$$"sub     0x4,rcx\n\t"
15726        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15727        $$emit$$"add     0x4,rcx\n\t"
15728        $$emit$$"jle     L_end\n\t"
15729        $$emit$$"dec     rcx\n\t"
15730        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15731        $$emit$$"vmovq   xmm0,(rax)\n\t"
15732        $$emit$$"add     0x8,rax\n\t"
15733        $$emit$$"dec     rcx\n\t"
15734        $$emit$$"jge     L_sloop\n\t"
15735        $$emit$$"# L_end:\n\t"
15736     } else {
15737        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15738        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15739     }
15740   %}
15741   ins_encode %{
15742     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15743                  $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
15744   %}
15745   ins_pipe(pipe_slow);
15746 %}
15747 
15748 // Small constant length ClearArray for AVX512 targets.
15749 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
15750 %{
15751   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
15752             ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
15753   match(Set dummy (ClearArray (Binary cnt base) val));
15754   ins_cost(100);
15755   effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
15756   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
15757   ins_encode %{
15758     __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15759   %}
15760   ins_pipe(pipe_slow);
15761 %}
15762 
15763 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15764                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15765 %{
15766   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15767   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15768   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15769 
15770   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15771   ins_encode %{
15772     __ string_compare($str1$$Register, $str2$$Register,
15773                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15774                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15775   %}
15776   ins_pipe( pipe_slow );
15777 %}
15778 

17616   effect(USE meth);
17617 
17618   ins_cost(300);
17619   format %{ "call_leaf,runtime " %}
17620   ins_encode(clear_avx, Java_To_Runtime(meth));
17621   ins_pipe(pipe_slow);
17622 %}
17623 
17624 // Call runtime without safepoint and with vector arguments
17625 instruct CallLeafDirectVector(method meth)
17626 %{
17627   match(CallLeafVector);
17628   effect(USE meth);
17629 
17630   ins_cost(300);
17631   format %{ "call_leaf,vector " %}
17632   ins_encode(Java_To_Runtime(meth));
17633   ins_pipe(pipe_slow);
17634 %}
17635 
17636 // Call runtime without safepoint
17637 // entry point is null, target holds the address to call
17638 instruct CallLeafNoFPInDirect(rRegP target)
17639 %{
17640   predicate(n->as_Call()->entry_point() == nullptr);
17641   match(CallLeafNoFP target);
17642 
17643   ins_cost(300);
17644   format %{ "call_leaf_nofp,runtime indirect " %}
17645   ins_encode %{
17646      __ call($target$$Register);
17647   %}
17648 
17649   ins_pipe(pipe_slow);
17650 %}
17651 
17652 // Call runtime without safepoint
17653 instruct CallLeafNoFPDirect(method meth)
17654 %{
17655   predicate(n->as_Call()->entry_point() != nullptr);
17656   match(CallLeafNoFP);
17657   effect(USE meth);
17658 
17659   ins_cost(300);
17660   format %{ "call_leaf_nofp,runtime " %}
17661   ins_encode(clear_avx, Java_To_Runtime(meth));
17662   ins_pipe(pipe_slow);
17663 %}
17664 
17665 // Return Instruction
17666 // Remove the return address & jump to it.
17667 // Notice: We always emit a nop after a ret to make sure there is room
17668 // for safepoint patching
17669 instruct Ret()
17670 %{
17671   match(Return);
17672 
17673   format %{ "ret" %}
17674   ins_encode %{
17675     __ ret(0);
< prev index next >