< prev index next >

src/hotspot/cpu/x86/x86.ad

Print this page

 1632 }
 1633 
 1634 // !!!!! Special hack to get all types of calls to specify the byte offset
 1635 //       from the start of the call to the point where the return address
 1636 //       will point.
 1637 int MachCallStaticJavaNode::ret_addr_offset()
 1638 {
 1639   int offset = 5; // 5 bytes from start of call to where return address points
 1640   offset += clear_avx_size();
 1641   return offset;
 1642 }
 1643 
 1644 int MachCallDynamicJavaNode::ret_addr_offset()
 1645 {
 1646   int offset = 15; // 15 bytes from start of call to where return address points
 1647   offset += clear_avx_size();
 1648   return offset;
 1649 }
 1650 
 1651 int MachCallRuntimeNode::ret_addr_offset() {




 1652   int offset = 13; // movq r10,#addr; callq (r10)
 1653   if (this->ideal_Opcode() != Op_CallLeafVector) {
 1654     offset += clear_avx_size();
 1655   }
 1656   return offset;
 1657 }
 1658 //
 1659 // Compute padding required for nodes which need alignment
 1660 //
 1661 
 1662 // The address of the call instruction needs to be 4-byte aligned to
 1663 // ensure that it does not span a cache line so that it can be patched.
 1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 1665 {
 1666   current_offset += clear_avx_size(); // skip vzeroupper
 1667   current_offset += 1; // skip call opcode byte
 1668   return align_up(current_offset, alignment_required()) - current_offset;
 1669 }
 1670 
 1671 // The address of the call instruction needs to be 4-byte aligned to

 1870     st->print("\n\t");
 1871     st->print("# stack alignment check");
 1872 #endif
 1873   }
 1874   if (C->stub_function() != nullptr) {
 1875     st->print("\n\t");
 1876     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
 1877     st->print("\n\t");
 1878     st->print("je      fast_entry\t");
 1879     st->print("\n\t");
 1880     st->print("call    #nmethod_entry_barrier_stub\t");
 1881     st->print("\n\tfast_entry:");
 1882   }
 1883   st->cr();
 1884 }
 1885 #endif
 1886 
 1887 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1888   Compile* C = ra_->C;
 1889 
 1890   int framesize = C->output()->frame_size_in_bytes();
 1891   int bangsize = C->output()->bang_size_in_bytes();
 1892 
 1893   if (C->clinit_barrier_on_entry()) {
 1894     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
 1895     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
 1896 
 1897     Label L_skip_barrier;
 1898     Register klass = rscratch1;
 1899 
 1900     __ mov_metadata(klass, C->method()->holder()->constant_encoding());
 1901     __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
 1902 
 1903     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
 1904 
 1905     __ bind(L_skip_barrier);
 1906   }
 1907 
 1908   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);


 1909 
 1910   C->output()->set_frame_complete(__ offset());
 1911 
 1912   if (C->has_mach_constant_base_node()) {
 1913     // NOTE: We set the table base offset here because users might be
 1914     // emitted before MachConstantBaseNode.
 1915     ConstantTable& constant_table = C->output()->constant_table();
 1916     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1917   }
 1918 }
 1919 
 1920 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 1921 {
 1922   return MachNode::size(ra_); // too many variables; just compute it
 1923                               // the hard way
 1924 }
 1925 
 1926 int MachPrologNode::reloc() const
 1927 {
 1928   return 0; // a large enough number
 1929 }
 1930 
 1931 //=============================================================================
 1932 #ifndef PRODUCT
 1933 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1934 {
 1935   Compile* C = ra_->C;
 1936   if (generate_vzeroupper(C)) {
 1937     st->print("vzeroupper");
 1938     st->cr(); st->print("\t");
 1939   }
 1940 
 1941   int framesize = C->output()->frame_size_in_bytes();
 1942   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1943   // Remove word for return adr already pushed
 1944   // and RBP

 1952   st->print_cr("popq    rbp");
 1953   if (do_polling() && C->is_method_compilation()) {
 1954     st->print("\t");
 1955     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
 1956                  "ja      #safepoint_stub\t"
 1957                  "# Safepoint: poll for GC");
 1958   }
 1959 }
 1960 #endif
 1961 
 1962 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1963 {
 1964   Compile* C = ra_->C;
 1965 
 1966   if (generate_vzeroupper(C)) {
 1967     // Clear upper bits of YMM registers when current compiled code uses
 1968     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1969     __ vzeroupper();
 1970   }
 1971 
 1972   int framesize = C->output()->frame_size_in_bytes();
 1973   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1974   // Remove word for return adr already pushed
 1975   // and RBP
 1976   framesize -= 2*wordSize;
 1977 
 1978   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 1979 
 1980   if (framesize) {
 1981     __ addq(rsp, framesize);
 1982   }
 1983 
 1984   __ popq(rbp);
 1985 
 1986   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1987     __ reserved_stack_check();
 1988   }
 1989 
 1990   if (do_polling() && C->is_method_compilation()) {
 1991     Label dummy_label;
 1992     Label* code_stub = &dummy_label;
 1993     if (!C->output()->in_scratch_emit_size()) {
 1994       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1995       C->output()->add_stub(stub);
 1996       code_stub = &stub->entry();
 1997     }
 1998     __ relocate(relocInfo::poll_return_type);
 1999     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
 2000   }
 2001 }
 2002 
 2003 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
 2004 {
 2005   return MachNode::size(ra_); // too many variables; just compute it
 2006                               // the hard way
 2007 }
 2008 
 2009 int MachEpilogNode::reloc() const
 2010 {
 2011   return 2; // a large enough number
 2012 }
 2013 
 2014 const Pipeline* MachEpilogNode::pipeline() const
 2015 {
 2016   return MachNode::pipeline_class();
 2017 }
 2018 
 2019 //=============================================================================
 2020 
 2021 enum RC {
 2022   rc_bad,
 2023   rc_int,
 2024   rc_kreg,
 2025   rc_float,
 2026   rc_stack
 2027 };
 2028 

 2590 #endif
 2591 
 2592 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2593 {
 2594   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2595   int reg = ra_->get_encode(this);
 2596 
 2597   __ lea(as_Register(reg), Address(rsp, offset));
 2598 }
 2599 
 2600 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 2601 {
 2602   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2603   if (ra_->get_encode(this) > 15) {
 2604     return (offset < 0x80) ? 6 : 9; // REX2
 2605   } else {
 2606     return (offset < 0x80) ? 5 : 8; // REX
 2607   }
 2608 }
 2609 













































 2610 //=============================================================================
 2611 #ifndef PRODUCT
 2612 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2613 {
 2614   st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2615   st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2616   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 2617 }
 2618 #endif
 2619 
 2620 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2621 {
 2622   __ ic_check(InteriorEntryAlignment);
 2623 }
 2624 
 2625 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 2626 {
 2627   return MachNode::size(ra_); // too many variables; just compute it
 2628                               // the hard way
 2629 }
 2630 
 2631 
 2632 //=============================================================================
 2633 
 2634 bool Matcher::supports_vector_calling_convention(void) {
 2635   return EnableVectorSupport;
 2636 }
 2637 
 2638 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
 2639   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
 2640 }
 2641 
 2642 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
 2643   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
 2644 }
 2645 
 2646 #ifdef ASSERT
 2647 static bool is_ndd_demotable(const MachNode* mdef) {
 2648   return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
 2649 }
 2650 #endif

 4584     }
 4585     __ post_call_nop();
 4586   %}
 4587 
 4588   enc_class Java_Dynamic_Call(method meth) %{
 4589     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 4590     __ post_call_nop();
 4591   %}
 4592 
 4593   enc_class call_epilog %{
 4594     if (VerifyStackAtCalls) {
 4595       // Check that stack depth is unchanged: find majik cookie on stack
 4596       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
 4597       Label L;
 4598       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
 4599       __ jccb(Assembler::equal, L);
 4600       // Die if stack mismatch
 4601       __ int3();
 4602       __ bind(L);
 4603     }

































 4604   %}
 4605 
 4606 %}
 4607 
 4608 //----------FRAME--------------------------------------------------------------
 4609 // Definition of frame structure and management information.
 4610 //
 4611 //  S T A C K   L A Y O U T    Allocators stack-slot number
 4612 //                             |   (to get allocators register number
 4613 //  G  Owned by    |        |  v    add OptoReg::stack0())
 4614 //  r   CALLER     |        |
 4615 //  o     |        +--------+      pad to even-align allocators stack-slot
 4616 //  w     V        |  pad0  |        numbers; owned by CALLER
 4617 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 4618 //  h     ^        |   in   |  5
 4619 //        |        |  args  |  4   Holes in incoming args owned by SELF
 4620 //  |     |        |        |  3
 4621 //  |     |        +--------+
 4622 //  V     |        | old out|      Empty on Intel, window on Sparc
 4623 //        |    old |preserve|      Must be even aligned.

 5762   %}
 5763 %}
 5764 
 5765 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5766 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 5767 %{
 5768   constraint(ALLOC_IN_RC(ptr_reg));
 5769   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5770   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 5771 
 5772   op_cost(10);
 5773   format %{"[$reg + $off + $idx << $scale]" %}
 5774   interface(MEMORY_INTER) %{
 5775     base($reg);
 5776     index($idx);
 5777     scale($scale);
 5778     disp($off);
 5779   %}
 5780 %}
 5781 
















 5782 // Indirect Narrow Oop Plus Offset Operand
 5783 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 5784 // we can't free r12 even with CompressedOops::base() == nullptr.
 5785 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 5786   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5787   constraint(ALLOC_IN_RC(ptr_reg));
 5788   match(AddP (DecodeN reg) off);
 5789 
 5790   op_cost(10);
 5791   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 5792   interface(MEMORY_INTER) %{
 5793     base(0xc); // R12
 5794     index($reg);
 5795     scale(0x3);
 5796     disp($off);
 5797   %}
 5798 %}
 5799 
 5800 // Indirect Memory Operand
 5801 operand indirectNarrow(rRegN reg)

 6271 %}
 6272 
 6273 // Replaces legVec during post-selection cleanup. See above.
 6274 operand legVecZ() %{
 6275   constraint(ALLOC_IN_RC(vectorz_reg_legacy));
 6276   match(VecZ);
 6277 
 6278   format %{ %}
 6279   interface(REG_INTER);
 6280 %}
 6281 
 6282 //----------OPERAND CLASSES----------------------------------------------------
 6283 // Operand Classes are groups of operands that are used as to simplify
 6284 // instruction definitions by not requiring the AD writer to specify separate
 6285 // instructions for every form of operand when the instruction accepts
 6286 // multiple operand types with the same basic encoding and format.  The classic
 6287 // case of this is memory operands.
 6288 
 6289 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 6290                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 6291                indCompressedOopOffset,
 6292                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 6293                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 6294                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 6295 
 6296 //----------PIPELINE-----------------------------------------------------------
 6297 // Rules which define the behavior of the target architectures pipeline.
 6298 pipeline %{
 6299 
 6300 //----------ATTRIBUTES---------------------------------------------------------
 6301 attributes %{
 6302   variable_size_instructions;        // Fixed size instructions
 6303   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 6304   instruction_unit_size = 1;         // An instruction is 1 bytes long
 6305   instruction_fetch_unit_size = 16;  // The processor fetches one line
 6306   instruction_fetch_units = 1;       // of 16 bytes
 6307 %}
 6308 
 6309 //----------RESOURCES----------------------------------------------------------
 6310 // Resources are the functional units available to the machine
 6311 

 8906   format %{ "MEMBAR-storestore (empty encoding)" %}
 8907   ins_encode( );
 8908   ins_pipe(empty);
 8909 %}
 8910 
 8911 //----------Move Instructions--------------------------------------------------
 8912 
 8913 instruct castX2P(rRegP dst, rRegL src)
 8914 %{
 8915   match(Set dst (CastX2P src));
 8916 
 8917   format %{ "movq    $dst, $src\t# long->ptr" %}
 8918   ins_encode %{
 8919     if ($dst$$reg != $src$$reg) {
 8920       __ movptr($dst$$Register, $src$$Register);
 8921     }
 8922   %}
 8923   ins_pipe(ialu_reg_reg); // XXX
 8924 %}
 8925 


























 8926 instruct castP2X(rRegL dst, rRegP src)
 8927 %{
 8928   match(Set dst (CastP2X src));
 8929 
 8930   format %{ "movq    $dst, $src\t# ptr -> long" %}
 8931   ins_encode %{
 8932     if ($dst$$reg != $src$$reg) {
 8933       __ movptr($dst$$Register, $src$$Register);
 8934     }
 8935   %}
 8936   ins_pipe(ialu_reg_reg); // XXX
 8937 %}
 8938 
 8939 // Convert oop into int for vectors alignment masking
 8940 instruct convP2I(rRegI dst, rRegP src)
 8941 %{
 8942   match(Set dst (ConvL2I (CastP2X src)));
 8943 
 8944   format %{ "movl    $dst, $src\t# ptr -> int" %}
 8945   ins_encode %{

15199   effect(DEF dst, USE src);
15200   ins_cost(100);
15201   format %{ "movd    $dst,$src\t# MoveI2F" %}
15202   ins_encode %{
15203     __ movdl($dst$$XMMRegister, $src$$Register);
15204   %}
15205   ins_pipe( pipe_slow );
15206 %}
15207 
15208 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15209   match(Set dst (MoveL2D src));
15210   effect(DEF dst, USE src);
15211   ins_cost(100);
15212   format %{ "movd    $dst,$src\t# MoveL2D" %}
15213   ins_encode %{
15214      __ movdq($dst$$XMMRegister, $src$$Register);
15215   %}
15216   ins_pipe( pipe_slow );
15217 %}
15218 

15219 // Fast clearing of an array
15220 // Small non-constant lenght ClearArray for non-AVX512 targets.
15221 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15222                   Universe dummy, rFlagsReg cr)
15223 %{
15224   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
15225   match(Set dummy (ClearArray cnt base));
15226   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);





















































































































15227 
15228   format %{ $$template
15229     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15230     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15231     $$emit$$"jg      LARGE\n\t"
15232     $$emit$$"dec     rcx\n\t"
15233     $$emit$$"js      DONE\t# Zero length\n\t"
15234     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15235     $$emit$$"dec     rcx\n\t"
15236     $$emit$$"jge     LOOP\n\t"
15237     $$emit$$"jmp     DONE\n\t"
15238     $$emit$$"# LARGE:\n\t"
15239     if (UseFastStosb) {
15240        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15241        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15242     } else if (UseXMMForObjInit) {
15243        $$emit$$"mov     rdi,rax\n\t"
15244        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15245        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15246        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

15254        $$emit$$"jl      L_tail\n\t"
15255        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15256        $$emit$$"add     0x20,rax\n\t"
15257        $$emit$$"sub     0x4,rcx\n\t"
15258        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15259        $$emit$$"add     0x4,rcx\n\t"
15260        $$emit$$"jle     L_end\n\t"
15261        $$emit$$"dec     rcx\n\t"
15262        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15263        $$emit$$"vmovq   xmm0,(rax)\n\t"
15264        $$emit$$"add     0x8,rax\n\t"
15265        $$emit$$"dec     rcx\n\t"
15266        $$emit$$"jge     L_sloop\n\t"
15267        $$emit$$"# L_end:\n\t"
15268     } else {
15269        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15270     }
15271     $$emit$$"# DONE"
15272   %}
15273   ins_encode %{
15274     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15275                  $tmp$$XMMRegister, false, knoreg);
15276   %}
15277   ins_pipe(pipe_slow);
15278 %}
15279 
15280 // Small non-constant length ClearArray for AVX512 targets.
15281 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15282                        Universe dummy, rFlagsReg cr)
15283 %{
15284   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15285   match(Set dummy (ClearArray cnt base));
15286   ins_cost(125);
15287   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15288 
15289   format %{ $$template
15290     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15291     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15292     $$emit$$"jg      LARGE\n\t"
15293     $$emit$$"dec     rcx\n\t"
15294     $$emit$$"js      DONE\t# Zero length\n\t"
15295     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15296     $$emit$$"dec     rcx\n\t"
15297     $$emit$$"jge     LOOP\n\t"
15298     $$emit$$"jmp     DONE\n\t"
15299     $$emit$$"# LARGE:\n\t"
15300     if (UseFastStosb) {
15301        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15302        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15303     } else if (UseXMMForObjInit) {
15304        $$emit$$"mov     rdi,rax\n\t"
15305        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15306        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15307        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

15315        $$emit$$"jl      L_tail\n\t"
15316        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15317        $$emit$$"add     0x20,rax\n\t"
15318        $$emit$$"sub     0x4,rcx\n\t"
15319        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15320        $$emit$$"add     0x4,rcx\n\t"
15321        $$emit$$"jle     L_end\n\t"
15322        $$emit$$"dec     rcx\n\t"
15323        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15324        $$emit$$"vmovq   xmm0,(rax)\n\t"
15325        $$emit$$"add     0x8,rax\n\t"
15326        $$emit$$"dec     rcx\n\t"
15327        $$emit$$"jge     L_sloop\n\t"
15328        $$emit$$"# L_end:\n\t"
15329     } else {
15330        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15331     }
15332     $$emit$$"# DONE"
15333   %}
15334   ins_encode %{
15335     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15336                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
15337   %}
15338   ins_pipe(pipe_slow);
15339 %}
15340 
15341 // Large non-constant length ClearArray for non-AVX512 targets.
15342 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15343                         Universe dummy, rFlagsReg cr)
15344 %{
15345   predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15346   match(Set dummy (ClearArray cnt base));
15347   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
































































































15348 
15349   format %{ $$template
15350     if (UseFastStosb) {
15351        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15352        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15353        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15354     } else if (UseXMMForObjInit) {
15355        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15356        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15357        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15358        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15359        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15360        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15361        $$emit$$"add     0x40,rax\n\t"
15362        $$emit$$"# L_zero_64_bytes:\n\t"
15363        $$emit$$"sub     0x8,rcx\n\t"
15364        $$emit$$"jge     L_loop\n\t"
15365        $$emit$$"add     0x4,rcx\n\t"
15366        $$emit$$"jl      L_tail\n\t"
15367        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15368        $$emit$$"add     0x20,rax\n\t"
15369        $$emit$$"sub     0x4,rcx\n\t"
15370        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15371        $$emit$$"add     0x4,rcx\n\t"
15372        $$emit$$"jle     L_end\n\t"
15373        $$emit$$"dec     rcx\n\t"
15374        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15375        $$emit$$"vmovq   xmm0,(rax)\n\t"
15376        $$emit$$"add     0x8,rax\n\t"
15377        $$emit$$"dec     rcx\n\t"
15378        $$emit$$"jge     L_sloop\n\t"
15379        $$emit$$"# L_end:\n\t"
15380     } else {
15381        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15382        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15383     }
15384   %}
15385   ins_encode %{
15386     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15387                  $tmp$$XMMRegister, true, knoreg);
15388   %}
15389   ins_pipe(pipe_slow);
15390 %}
15391 
15392 // Large non-constant length ClearArray for AVX512 targets.
15393 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15394                              Universe dummy, rFlagsReg cr)
15395 %{
15396   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15397   match(Set dummy (ClearArray cnt base));
15398   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15399 
15400   format %{ $$template
15401     if (UseFastStosb) {
15402        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15403        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15404        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15405     } else if (UseXMMForObjInit) {
15406        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15407        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15408        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15409        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15410        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15411        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15412        $$emit$$"add     0x40,rax\n\t"
15413        $$emit$$"# L_zero_64_bytes:\n\t"
15414        $$emit$$"sub     0x8,rcx\n\t"
15415        $$emit$$"jge     L_loop\n\t"
15416        $$emit$$"add     0x4,rcx\n\t"
15417        $$emit$$"jl      L_tail\n\t"
15418        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15419        $$emit$$"add     0x20,rax\n\t"
15420        $$emit$$"sub     0x4,rcx\n\t"
15421        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15422        $$emit$$"add     0x4,rcx\n\t"
15423        $$emit$$"jle     L_end\n\t"
15424        $$emit$$"dec     rcx\n\t"
15425        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15426        $$emit$$"vmovq   xmm0,(rax)\n\t"
15427        $$emit$$"add     0x8,rax\n\t"
15428        $$emit$$"dec     rcx\n\t"
15429        $$emit$$"jge     L_sloop\n\t"
15430        $$emit$$"# L_end:\n\t"
15431     } else {
15432        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15433        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15434     }
15435   %}
15436   ins_encode %{
15437     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15438                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
15439   %}
15440   ins_pipe(pipe_slow);
15441 %}
15442 
15443 // Small constant length ClearArray for AVX512 targets.
15444 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15445 %{
15446   predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15447   match(Set dummy (ClearArray cnt base));

15448   ins_cost(100);
15449   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15450   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
15451   ins_encode %{
15452    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15453   %}
15454   ins_pipe(pipe_slow);
15455 %}
15456 
15457 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15458                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15459 %{
15460   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15461   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15462   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15463 
15464   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15465   ins_encode %{
15466     __ string_compare($str1$$Register, $str2$$Register,
15467                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15468                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15469   %}
15470   ins_pipe( pipe_slow );
15471 %}
15472 

17310   effect(USE meth);
17311 
17312   ins_cost(300);
17313   format %{ "call_leaf,runtime " %}
17314   ins_encode(clear_avx, Java_To_Runtime(meth));
17315   ins_pipe(pipe_slow);
17316 %}
17317 
17318 // Call runtime without safepoint and with vector arguments
17319 instruct CallLeafDirectVector(method meth)
17320 %{
17321   match(CallLeafVector);
17322   effect(USE meth);
17323 
17324   ins_cost(300);
17325   format %{ "call_leaf,vector " %}
17326   ins_encode(Java_To_Runtime(meth));
17327   ins_pipe(pipe_slow);
17328 %}
17329 
















17330 // Call runtime without safepoint
17331 instruct CallLeafNoFPDirect(method meth)
17332 %{

17333   match(CallLeafNoFP);
17334   effect(USE meth);
17335 
17336   ins_cost(300);
17337   format %{ "call_leaf_nofp,runtime " %}
17338   ins_encode(clear_avx, Java_To_Runtime(meth));
17339   ins_pipe(pipe_slow);
17340 %}
17341 
17342 // Return Instruction
17343 // Remove the return address & jump to it.
17344 // Notice: We always emit a nop after a ret to make sure there is room
17345 // for safepoint patching
17346 instruct Ret()
17347 %{
17348   match(Return);
17349 
17350   format %{ "ret" %}
17351   ins_encode %{
17352     __ ret(0);

 1632 }
 1633 
 1634 // !!!!! Special hack to get all types of calls to specify the byte offset
 1635 //       from the start of the call to the point where the return address
 1636 //       will point.
 1637 int MachCallStaticJavaNode::ret_addr_offset()
 1638 {
 1639   int offset = 5; // 5 bytes from start of call to where return address points
 1640   offset += clear_avx_size();
 1641   return offset;
 1642 }
 1643 
 1644 int MachCallDynamicJavaNode::ret_addr_offset()
 1645 {
 1646   int offset = 15; // 15 bytes from start of call to where return address points
 1647   offset += clear_avx_size();
 1648   return offset;
 1649 }
 1650 
 1651 int MachCallRuntimeNode::ret_addr_offset() {
 1652   if (_entry_point == nullptr) {
 1653     // CallLeafNoFPInDirect
 1654     return 3; // callq (register)
 1655   }
 1656   int offset = 13; // movq r10,#addr; callq (r10)
 1657   if (this->ideal_Opcode() != Op_CallLeafVector) {
 1658     offset += clear_avx_size();
 1659   }
 1660   return offset;
 1661 }
 1662 //
 1663 // Compute padding required for nodes which need alignment
 1664 //
 1665 
 1666 // The address of the call instruction needs to be 4-byte aligned to
 1667 // ensure that it does not span a cache line so that it can be patched.
 1668 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 1669 {
 1670   current_offset += clear_avx_size(); // skip vzeroupper
 1671   current_offset += 1; // skip call opcode byte
 1672   return align_up(current_offset, alignment_required()) - current_offset;
 1673 }
 1674 
 1675 // The address of the call instruction needs to be 4-byte aligned to

 1874     st->print("\n\t");
 1875     st->print("# stack alignment check");
 1876 #endif
 1877   }
 1878   if (C->stub_function() != nullptr) {
 1879     st->print("\n\t");
 1880     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
 1881     st->print("\n\t");
 1882     st->print("je      fast_entry\t");
 1883     st->print("\n\t");
 1884     st->print("call    #nmethod_entry_barrier_stub\t");
 1885     st->print("\n\tfast_entry:");
 1886   }
 1887   st->cr();
 1888 }
 1889 #endif
 1890 
 1891 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1892   Compile* C = ra_->C;
 1893 
 1894   __ verified_entry(C);








 1895 
 1896   if (ra_->C->stub_function() == nullptr) {
 1897     __ entry_barrier();




 1898   }
 1899 
 1900   if (!Compile::current()->output()->in_scratch_emit_size()) {
 1901     __ bind(*_verified_entry);
 1902   }
 1903 
 1904   C->output()->set_frame_complete(__ offset());
 1905 
 1906   if (C->has_mach_constant_base_node()) {
 1907     // NOTE: We set the table base offset here because users might be
 1908     // emitted before MachConstantBaseNode.
 1909     ConstantTable& constant_table = C->output()->constant_table();
 1910     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1911   }
 1912 }
 1913 





 1914 
 1915 int MachPrologNode::reloc() const
 1916 {
 1917   return 0; // a large enough number
 1918 }
 1919 
 1920 //=============================================================================
 1921 #ifndef PRODUCT
 1922 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1923 {
 1924   Compile* C = ra_->C;
 1925   if (generate_vzeroupper(C)) {
 1926     st->print("vzeroupper");
 1927     st->cr(); st->print("\t");
 1928   }
 1929 
 1930   int framesize = C->output()->frame_size_in_bytes();
 1931   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1932   // Remove word for return adr already pushed
 1933   // and RBP

 1941   st->print_cr("popq    rbp");
 1942   if (do_polling() && C->is_method_compilation()) {
 1943     st->print("\t");
 1944     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
 1945                  "ja      #safepoint_stub\t"
 1946                  "# Safepoint: poll for GC");
 1947   }
 1948 }
 1949 #endif
 1950 
 1951 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1952 {
 1953   Compile* C = ra_->C;
 1954 
 1955   if (generate_vzeroupper(C)) {
 1956     // Clear upper bits of YMM registers when current compiled code uses
 1957     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1958     __ vzeroupper();
 1959   }
 1960 
 1961   // Subtract two words to account for return address and rbp
 1962   int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
 1963   __ remove_frame(initial_framesize, C->needs_stack_repair());










 1964 
 1965   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1966     __ reserved_stack_check();
 1967   }
 1968 
 1969   if (do_polling() && C->is_method_compilation()) {
 1970     Label dummy_label;
 1971     Label* code_stub = &dummy_label;
 1972     if (!C->output()->in_scratch_emit_size()) {
 1973       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1974       C->output()->add_stub(stub);
 1975       code_stub = &stub->entry();
 1976     }
 1977     __ relocate(relocInfo::poll_return_type);
 1978     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
 1979   }
 1980 }
 1981 






 1982 int MachEpilogNode::reloc() const
 1983 {
 1984   return 2; // a large enough number
 1985 }
 1986 
 1987 const Pipeline* MachEpilogNode::pipeline() const
 1988 {
 1989   return MachNode::pipeline_class();
 1990 }
 1991 
 1992 //=============================================================================
 1993 
 1994 enum RC {
 1995   rc_bad,
 1996   rc_int,
 1997   rc_kreg,
 1998   rc_float,
 1999   rc_stack
 2000 };
 2001 

 2563 #endif
 2564 
 2565 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2566 {
 2567   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2568   int reg = ra_->get_encode(this);
 2569 
 2570   __ lea(as_Register(reg), Address(rsp, offset));
 2571 }
 2572 
 2573 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 2574 {
 2575   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2576   if (ra_->get_encode(this) > 15) {
 2577     return (offset < 0x80) ? 6 : 9; // REX2
 2578   } else {
 2579     return (offset < 0x80) ? 5 : 8; // REX
 2580   }
 2581 }
 2582 
 2583 //=============================================================================
 2584 #ifndef PRODUCT
 2585 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2586 {
 2587   st->print_cr("MachVEPNode");
 2588 }
 2589 #endif
 2590 
 2591 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2592 {
 2593   CodeBuffer* cbuf = masm->code();
 2594   if (!_verified) {
 2595     __ ic_check(1);
 2596   } else {
 2597     if (ra_->C->stub_function() == nullptr) {
 2598       // Emit the entry barrier in a temporary frame before unpacking because
 2599       // it can deopt, which would require packing the scalarized args again.
 2600       __ verified_entry(ra_->C, 0);
 2601       __ entry_barrier();
 2602       int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
 2603       __ remove_frame(initial_framesize, false);
 2604     }
 2605     // Unpack inline type args passed as oop and then jump to
 2606     // the verified entry point (skipping the unverified entry).
 2607     int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
 2608     // Emit code for verified entry and save increment for stack repair on return
 2609     __ verified_entry(ra_->C, sp_inc);
 2610     if (Compile::current()->output()->in_scratch_emit_size()) {
 2611       Label dummy_verified_entry;
 2612       __ jmp(dummy_verified_entry);
 2613     } else {
 2614       __ jmp(*_verified_entry);
 2615     }
 2616   }
 2617   if (ra_->C->stub_function() == nullptr) {
 2618     // Pad so that the next call to MachVEPNode::emit() starts out with the
 2619     // correct alignment.  This is needed by entry_barrier() to align the
 2620     // compare.  But unfortunately we need to align all 4 MachVEPNodes because
 2621     // entry point offsets are computed using scratch_emit_size(), so starting
 2622     // alignment must match the alignment of the scratch buffer, otherwise the sizes
 2623     // will be off.
 2624     __ align(4);
 2625   }
 2626 }
 2627 
 2628 //=============================================================================
 2629 #ifndef PRODUCT
 2630 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2631 {
 2632   st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2633   st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2634   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 2635 }
 2636 #endif
 2637 
 2638 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2639 {
 2640   __ ic_check(InteriorEntryAlignment);
 2641 }
 2642 






 2643 
 2644 //=============================================================================
 2645 
 2646 bool Matcher::supports_vector_calling_convention(void) {
 2647   return EnableVectorSupport;
 2648 }
 2649 
 2650 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
 2651   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
 2652 }
 2653 
 2654 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
 2655   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
 2656 }
 2657 
 2658 #ifdef ASSERT
 2659 static bool is_ndd_demotable(const MachNode* mdef) {
 2660   return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
 2661 }
 2662 #endif

 4596     }
 4597     __ post_call_nop();
 4598   %}
 4599 
 4600   enc_class Java_Dynamic_Call(method meth) %{
 4601     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 4602     __ post_call_nop();
 4603   %}
 4604 
 4605   enc_class call_epilog %{
 4606     if (VerifyStackAtCalls) {
 4607       // Check that stack depth is unchanged: find majik cookie on stack
 4608       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
 4609       Label L;
 4610       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
 4611       __ jccb(Assembler::equal, L);
 4612       // Die if stack mismatch
 4613       __ int3();
 4614       __ bind(L);
 4615     }
 4616     if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) {
 4617       // The last return value is not set by the callee but used to pass the null marker to compiled code.
 4618       // Search for the corresponding projection, get the register and emit code that initializes it.
 4619       uint con = (tf()->range_cc()->cnt() - 1);
 4620       for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
 4621         ProjNode* proj = fast_out(i)->as_Proj();
 4622         if (proj->_con == con) {
 4623           // Set null marker if rax is non-null (a non-null value is returned buffered or scalarized)
 4624           OptoReg::Name optoReg = ra_->get_reg_first(proj);
 4625           VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
 4626           Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
 4627           __ testq(rax, rax);
 4628           __ setb(Assembler::notZero, toReg);
 4629           __ movzbl(toReg, toReg);
 4630           if (reg->is_stack()) {
 4631             int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
 4632             __ movq(Address(rsp, st_off), toReg);
 4633           }
 4634           break;
 4635         }
 4636       }
 4637       if (return_value_is_used()) {
 4638         // An inline type is returned as fields in multiple registers.
 4639         // Rax either contains an oop if the inline type is buffered or a pointer
 4640         // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax
 4641         // if the lowest bit is set to allow C2 to use the oop after null checking.
 4642         // rax &= (rax & 1) - 1
 4643         __ movptr(rscratch1, rax);
 4644         __ andptr(rscratch1, 0x1);
 4645         __ subptr(rscratch1, 0x1);
 4646         __ andptr(rax, rscratch1);
 4647       }
 4648     }
 4649   %}
 4650 
 4651 %}
 4652 
 4653 //----------FRAME--------------------------------------------------------------
 4654 // Definition of frame structure and management information.
 4655 //
 4656 //  S T A C K   L A Y O U T    Allocators stack-slot number
 4657 //                             |   (to get allocators register number
 4658 //  G  Owned by    |        |  v    add OptoReg::stack0())
 4659 //  r   CALLER     |        |
 4660 //  o     |        +--------+      pad to even-align allocators stack-slot
 4661 //  w     V        |  pad0  |        numbers; owned by CALLER
 4662 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 4663 //  h     ^        |   in   |  5
 4664 //        |        |  args  |  4   Holes in incoming args owned by SELF
 4665 //  |     |        |        |  3
 4666 //  |     |        +--------+
 4667 //  V     |        | old out|      Empty on Intel, window on Sparc
 4668 //        |    old |preserve|      Must be even aligned.

 5807   %}
 5808 %}
 5809 
 5810 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5811 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 5812 %{
 5813   constraint(ALLOC_IN_RC(ptr_reg));
 5814   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5815   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 5816 
 5817   op_cost(10);
 5818   format %{"[$reg + $off + $idx << $scale]" %}
 5819   interface(MEMORY_INTER) %{
 5820     base($reg);
 5821     index($idx);
 5822     scale($scale);
 5823     disp($off);
 5824   %}
 5825 %}
 5826 
 5827 // Indirect Narrow Oop Operand
 5828 operand indCompressedOop(rRegN reg) %{
 5829   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5830   constraint(ALLOC_IN_RC(ptr_reg));
 5831   match(DecodeN reg);
 5832 
 5833   op_cost(10);
 5834   format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
 5835   interface(MEMORY_INTER) %{
 5836     base(0xc); // R12
 5837     index($reg);
 5838     scale(0x3);
 5839     disp(0x0);
 5840   %}
 5841 %}
 5842 
 5843 // Indirect Narrow Oop Plus Offset Operand
 5844 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 5845 // we can't free r12 even with CompressedOops::base() == nullptr.
 5846 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 5847   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5848   constraint(ALLOC_IN_RC(ptr_reg));
 5849   match(AddP (DecodeN reg) off);
 5850 
 5851   op_cost(10);
 5852   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 5853   interface(MEMORY_INTER) %{
 5854     base(0xc); // R12
 5855     index($reg);
 5856     scale(0x3);
 5857     disp($off);
 5858   %}
 5859 %}
 5860 
 5861 // Indirect Memory Operand
 5862 operand indirectNarrow(rRegN reg)

 6332 %}
 6333 
 6334 // Replaces legVec during post-selection cleanup. See above.
 6335 operand legVecZ() %{
 6336   constraint(ALLOC_IN_RC(vectorz_reg_legacy));
 6337   match(VecZ);
 6338 
 6339   format %{ %}
 6340   interface(REG_INTER);
 6341 %}
 6342 
 6343 //----------OPERAND CLASSES----------------------------------------------------
 6344 // Operand Classes are groups of operands that are used as to simplify
 6345 // instruction definitions by not requiring the AD writer to specify separate
 6346 // instructions for every form of operand when the instruction accepts
 6347 // multiple operand types with the same basic encoding and format.  The classic
 6348 // case of this is memory operands.
 6349 
 6350 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 6351                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 6352                indCompressedOop, indCompressedOopOffset,
 6353                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 6354                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 6355                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 6356 
 6357 //----------PIPELINE-----------------------------------------------------------
 6358 // Rules which define the behavior of the target architectures pipeline.
 6359 pipeline %{
 6360 
 6361 //----------ATTRIBUTES---------------------------------------------------------
 6362 attributes %{
 6363   variable_size_instructions;        // Fixed size instructions
 6364   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 6365   instruction_unit_size = 1;         // An instruction is 1 bytes long
 6366   instruction_fetch_unit_size = 16;  // The processor fetches one line
 6367   instruction_fetch_units = 1;       // of 16 bytes
 6368 %}
 6369 
 6370 //----------RESOURCES----------------------------------------------------------
 6371 // Resources are the functional units available to the machine
 6372 

 8967   format %{ "MEMBAR-storestore (empty encoding)" %}
 8968   ins_encode( );
 8969   ins_pipe(empty);
 8970 %}
 8971 
 8972 //----------Move Instructions--------------------------------------------------
 8973 
 8974 instruct castX2P(rRegP dst, rRegL src)
 8975 %{
 8976   match(Set dst (CastX2P src));
 8977 
 8978   format %{ "movq    $dst, $src\t# long->ptr" %}
 8979   ins_encode %{
 8980     if ($dst$$reg != $src$$reg) {
 8981       __ movptr($dst$$Register, $src$$Register);
 8982     }
 8983   %}
 8984   ins_pipe(ialu_reg_reg); // XXX
 8985 %}
 8986 
 8987 instruct castI2N(rRegN dst, rRegI src)
 8988 %{
 8989   match(Set dst (CastI2N src));
 8990 
 8991   format %{ "movq    $dst, $src\t# int -> narrow ptr" %}
 8992   ins_encode %{
 8993     if ($dst$$reg != $src$$reg) {
 8994       __ movl($dst$$Register, $src$$Register);
 8995     }
 8996   %}
 8997   ins_pipe(ialu_reg_reg); // XXX
 8998 %}
 8999 
 9000 instruct castN2X(rRegL dst, rRegN src)
 9001 %{
 9002   match(Set dst (CastP2X src));
 9003 
 9004   format %{ "movq    $dst, $src\t# ptr -> long" %}
 9005   ins_encode %{
 9006     if ($dst$$reg != $src$$reg) {
 9007       __ movptr($dst$$Register, $src$$Register);
 9008     }
 9009   %}
 9010   ins_pipe(ialu_reg_reg); // XXX
 9011 %}
 9012 
 9013 instruct castP2X(rRegL dst, rRegP src)
 9014 %{
 9015   match(Set dst (CastP2X src));
 9016 
 9017   format %{ "movq    $dst, $src\t# ptr -> long" %}
 9018   ins_encode %{
 9019     if ($dst$$reg != $src$$reg) {
 9020       __ movptr($dst$$Register, $src$$Register);
 9021     }
 9022   %}
 9023   ins_pipe(ialu_reg_reg); // XXX
 9024 %}
 9025 
 9026 // Convert oop into int for vectors alignment masking
 9027 instruct convP2I(rRegI dst, rRegP src)
 9028 %{
 9029   match(Set dst (ConvL2I (CastP2X src)));
 9030 
 9031   format %{ "movl    $dst, $src\t# ptr -> int" %}
 9032   ins_encode %{

15286   effect(DEF dst, USE src);
15287   ins_cost(100);
15288   format %{ "movd    $dst,$src\t# MoveI2F" %}
15289   ins_encode %{
15290     __ movdl($dst$$XMMRegister, $src$$Register);
15291   %}
15292   ins_pipe( pipe_slow );
15293 %}
15294 
15295 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15296   match(Set dst (MoveL2D src));
15297   effect(DEF dst, USE src);
15298   ins_cost(100);
15299   format %{ "movd    $dst,$src\t# MoveL2D" %}
15300   ins_encode %{
15301      __ movdq($dst$$XMMRegister, $src$$Register);
15302   %}
15303   ins_pipe( pipe_slow );
15304 %}
15305 
15306 
15307 // Fast clearing of an array
15308 // Small non-constant lenght ClearArray for non-AVX512 targets.
15309 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15310                   Universe dummy, rFlagsReg cr)
15311 %{
15312   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15313   match(Set dummy (ClearArray (Binary cnt base) val));
15314   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15315 
15316   format %{ $$template
15317     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15318     $$emit$$"jg      LARGE\n\t"
15319     $$emit$$"dec     rcx\n\t"
15320     $$emit$$"js      DONE\t# Zero length\n\t"
15321     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15322     $$emit$$"dec     rcx\n\t"
15323     $$emit$$"jge     LOOP\n\t"
15324     $$emit$$"jmp     DONE\n\t"
15325     $$emit$$"# LARGE:\n\t"
15326     if (UseFastStosb) {
15327        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15328        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15329     } else if (UseXMMForObjInit) {
15330        $$emit$$"movdq   $tmp, $val\n\t"
15331        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15332        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15333        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15334        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15335        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15336        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15337        $$emit$$"add     0x40,rax\n\t"
15338        $$emit$$"# L_zero_64_bytes:\n\t"
15339        $$emit$$"sub     0x8,rcx\n\t"
15340        $$emit$$"jge     L_loop\n\t"
15341        $$emit$$"add     0x4,rcx\n\t"
15342        $$emit$$"jl      L_tail\n\t"
15343        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15344        $$emit$$"add     0x20,rax\n\t"
15345        $$emit$$"sub     0x4,rcx\n\t"
15346        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15347        $$emit$$"add     0x4,rcx\n\t"
15348        $$emit$$"jle     L_end\n\t"
15349        $$emit$$"dec     rcx\n\t"
15350        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15351        $$emit$$"vmovq   xmm0,(rax)\n\t"
15352        $$emit$$"add     0x8,rax\n\t"
15353        $$emit$$"dec     rcx\n\t"
15354        $$emit$$"jge     L_sloop\n\t"
15355        $$emit$$"# L_end:\n\t"
15356     } else {
15357        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15358     }
15359     $$emit$$"# DONE"
15360   %}
15361   ins_encode %{
15362     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15363                  $tmp$$XMMRegister, false, false);
15364   %}
15365   ins_pipe(pipe_slow);
15366 %}
15367 
15368 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15369                             Universe dummy, rFlagsReg cr)
15370 %{
15371   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15372   match(Set dummy (ClearArray (Binary cnt base) val));
15373   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15374 
15375   format %{ $$template
15376     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15377     $$emit$$"jg      LARGE\n\t"
15378     $$emit$$"dec     rcx\n\t"
15379     $$emit$$"js      DONE\t# Zero length\n\t"
15380     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15381     $$emit$$"dec     rcx\n\t"
15382     $$emit$$"jge     LOOP\n\t"
15383     $$emit$$"jmp     DONE\n\t"
15384     $$emit$$"# LARGE:\n\t"
15385     if (UseXMMForObjInit) {
15386        $$emit$$"movdq   $tmp, $val\n\t"
15387        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15388        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15389        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15390        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15391        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15392        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15393        $$emit$$"add     0x40,rax\n\t"
15394        $$emit$$"# L_zero_64_bytes:\n\t"
15395        $$emit$$"sub     0x8,rcx\n\t"
15396        $$emit$$"jge     L_loop\n\t"
15397        $$emit$$"add     0x4,rcx\n\t"
15398        $$emit$$"jl      L_tail\n\t"
15399        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15400        $$emit$$"add     0x20,rax\n\t"
15401        $$emit$$"sub     0x4,rcx\n\t"
15402        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15403        $$emit$$"add     0x4,rcx\n\t"
15404        $$emit$$"jle     L_end\n\t"
15405        $$emit$$"dec     rcx\n\t"
15406        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15407        $$emit$$"vmovq   xmm0,(rax)\n\t"
15408        $$emit$$"add     0x8,rax\n\t"
15409        $$emit$$"dec     rcx\n\t"
15410        $$emit$$"jge     L_sloop\n\t"
15411        $$emit$$"# L_end:\n\t"
15412     } else {
15413        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15414     }
15415     $$emit$$"# DONE"
15416   %}
15417   ins_encode %{
15418     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15419                  $tmp$$XMMRegister, false, true);
15420   %}
15421   ins_pipe(pipe_slow);
15422 %}
15423 
15424 // Small non-constant length ClearArray for AVX512 targets.
15425 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15426                        Universe dummy, rFlagsReg cr)
15427 %{
15428   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15429   match(Set dummy (ClearArray (Binary cnt base) val));
15430   ins_cost(125);
15431   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15432 
15433   format %{ $$template
15434     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15435     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15436     $$emit$$"jg      LARGE\n\t"
15437     $$emit$$"dec     rcx\n\t"
15438     $$emit$$"js      DONE\t# Zero length\n\t"
15439     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15440     $$emit$$"dec     rcx\n\t"
15441     $$emit$$"jge     LOOP\n\t"
15442     $$emit$$"jmp     DONE\n\t"
15443     $$emit$$"# LARGE:\n\t"
15444     if (UseFastStosb) {
15445        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15446        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15447     } else if (UseXMMForObjInit) {
15448        $$emit$$"mov     rdi,rax\n\t"
15449        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15450        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15451        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

15459        $$emit$$"jl      L_tail\n\t"
15460        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15461        $$emit$$"add     0x20,rax\n\t"
15462        $$emit$$"sub     0x4,rcx\n\t"
15463        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15464        $$emit$$"add     0x4,rcx\n\t"
15465        $$emit$$"jle     L_end\n\t"
15466        $$emit$$"dec     rcx\n\t"
15467        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15468        $$emit$$"vmovq   xmm0,(rax)\n\t"
15469        $$emit$$"add     0x8,rax\n\t"
15470        $$emit$$"dec     rcx\n\t"
15471        $$emit$$"jge     L_sloop\n\t"
15472        $$emit$$"# L_end:\n\t"
15473     } else {
15474        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15475     }
15476     $$emit$$"# DONE"
15477   %}
15478   ins_encode %{
15479     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15480                  $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
15481   %}
15482   ins_pipe(pipe_slow);
15483 %}
15484 
15485 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15486                                  Universe dummy, rFlagsReg cr)

15487 %{
15488   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15489   match(Set dummy (ClearArray (Binary cnt base) val));
15490   ins_cost(125);
15491   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15492 
15493   format %{ $$template
15494     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15495     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15496     $$emit$$"jg      LARGE\n\t"
15497     $$emit$$"dec     rcx\n\t"
15498     $$emit$$"js      DONE\t# Zero length\n\t"
15499     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15500     $$emit$$"dec     rcx\n\t"
15501     $$emit$$"jge     LOOP\n\t"
15502     $$emit$$"jmp     DONE\n\t"
15503     $$emit$$"# LARGE:\n\t"
15504     if (UseFastStosb) {
15505        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15506        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15507     } else if (UseXMMForObjInit) {
15508        $$emit$$"mov     rdi,rax\n\t"
15509        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15510        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15511        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

15519        $$emit$$"jl      L_tail\n\t"
15520        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15521        $$emit$$"add     0x20,rax\n\t"
15522        $$emit$$"sub     0x4,rcx\n\t"
15523        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15524        $$emit$$"add     0x4,rcx\n\t"
15525        $$emit$$"jle     L_end\n\t"
15526        $$emit$$"dec     rcx\n\t"
15527        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15528        $$emit$$"vmovq   xmm0,(rax)\n\t"
15529        $$emit$$"add     0x8,rax\n\t"
15530        $$emit$$"dec     rcx\n\t"
15531        $$emit$$"jge     L_sloop\n\t"
15532        $$emit$$"# L_end:\n\t"
15533     } else {
15534        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15535     }
15536     $$emit$$"# DONE"
15537   %}
15538   ins_encode %{
15539     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15540                  $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
15541   %}
15542   ins_pipe(pipe_slow);
15543 %}
15544 
15545 // Large non-constant length ClearArray for non-AVX512 targets.
15546 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15547                         Universe dummy, rFlagsReg cr)
15548 %{
15549   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15550   match(Set dummy (ClearArray (Binary cnt base) val));
15551   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15552 
15553   format %{ $$template
15554     if (UseFastStosb) {
15555        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15556        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15557     } else if (UseXMMForObjInit) {
15558        $$emit$$"movdq   $tmp, $val\n\t"
15559        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15560        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15561        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15562        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15563        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15564        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15565        $$emit$$"add     0x40,rax\n\t"
15566        $$emit$$"# L_zero_64_bytes:\n\t"
15567        $$emit$$"sub     0x8,rcx\n\t"
15568        $$emit$$"jge     L_loop\n\t"
15569        $$emit$$"add     0x4,rcx\n\t"
15570        $$emit$$"jl      L_tail\n\t"
15571        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15572        $$emit$$"add     0x20,rax\n\t"
15573        $$emit$$"sub     0x4,rcx\n\t"
15574        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15575        $$emit$$"add     0x4,rcx\n\t"
15576        $$emit$$"jle     L_end\n\t"
15577        $$emit$$"dec     rcx\n\t"
15578        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15579        $$emit$$"vmovq   xmm0,(rax)\n\t"
15580        $$emit$$"add     0x8,rax\n\t"
15581        $$emit$$"dec     rcx\n\t"
15582        $$emit$$"jge     L_sloop\n\t"
15583        $$emit$$"# L_end:\n\t"
15584     } else {
15585        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15586     }
15587   %}
15588   ins_encode %{
15589     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15590                  $tmp$$XMMRegister, true, false);
15591   %}
15592   ins_pipe(pipe_slow);
15593 %}
15594 
15595 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15596                                   Universe dummy, rFlagsReg cr)
15597 %{
15598   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15599   match(Set dummy (ClearArray (Binary cnt base) val));
15600   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15601 
15602   format %{ $$template
15603     if (UseXMMForObjInit) {
15604        $$emit$$"movdq   $tmp, $val\n\t"
15605        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15606        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15607        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15608        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15609        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15610        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15611        $$emit$$"add     0x40,rax\n\t"
15612        $$emit$$"# L_zero_64_bytes:\n\t"
15613        $$emit$$"sub     0x8,rcx\n\t"
15614        $$emit$$"jge     L_loop\n\t"
15615        $$emit$$"add     0x4,rcx\n\t"
15616        $$emit$$"jl      L_tail\n\t"
15617        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15618        $$emit$$"add     0x20,rax\n\t"
15619        $$emit$$"sub     0x4,rcx\n\t"
15620        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15621        $$emit$$"add     0x4,rcx\n\t"
15622        $$emit$$"jle     L_end\n\t"
15623        $$emit$$"dec     rcx\n\t"
15624        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15625        $$emit$$"vmovq   xmm0,(rax)\n\t"
15626        $$emit$$"add     0x8,rax\n\t"
15627        $$emit$$"dec     rcx\n\t"
15628        $$emit$$"jge     L_sloop\n\t"
15629        $$emit$$"# L_end:\n\t"
15630     } else {
15631        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15632     }
15633   %}
15634   ins_encode %{
15635     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15636                  $tmp$$XMMRegister, true, true);
15637   %}
15638   ins_pipe(pipe_slow);
15639 %}
15640 
15641 // Large non-constant length ClearArray for AVX512 targets.
15642 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15643                              Universe dummy, rFlagsReg cr)
15644 %{
15645   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15646   match(Set dummy (ClearArray (Binary cnt base) val));
15647   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15648 
15649   format %{ $$template
15650     if (UseFastStosb) {
15651        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15652        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15653        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15654     } else if (UseXMMForObjInit) {
15655        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15656        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15657        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15658        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15659        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15660        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15661        $$emit$$"add     0x40,rax\n\t"
15662        $$emit$$"# L_zero_64_bytes:\n\t"
15663        $$emit$$"sub     0x8,rcx\n\t"
15664        $$emit$$"jge     L_loop\n\t"
15665        $$emit$$"add     0x4,rcx\n\t"
15666        $$emit$$"jl      L_tail\n\t"
15667        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15668        $$emit$$"add     0x20,rax\n\t"
15669        $$emit$$"sub     0x4,rcx\n\t"
15670        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15671        $$emit$$"add     0x4,rcx\n\t"
15672        $$emit$$"jle     L_end\n\t"
15673        $$emit$$"dec     rcx\n\t"
15674        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15675        $$emit$$"vmovq   xmm0,(rax)\n\t"
15676        $$emit$$"add     0x8,rax\n\t"
15677        $$emit$$"dec     rcx\n\t"
15678        $$emit$$"jge     L_sloop\n\t"
15679        $$emit$$"# L_end:\n\t"
15680     } else {
15681        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15682        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15683     }
15684   %}
15685   ins_encode %{
15686     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15687                  $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
15688   %}
15689   ins_pipe(pipe_slow);
15690 %}
15691 
15692 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15693                                        Universe dummy, rFlagsReg cr)

15694 %{
15695   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15696   match(Set dummy (ClearArray (Binary cnt base) val));
15697   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15698 
15699   format %{ $$template
15700     if (UseFastStosb) {
15701        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15702        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15703        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15704     } else if (UseXMMForObjInit) {
15705        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15706        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15707        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15708        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15709        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15710        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15711        $$emit$$"add     0x40,rax\n\t"
15712        $$emit$$"# L_zero_64_bytes:\n\t"
15713        $$emit$$"sub     0x8,rcx\n\t"
15714        $$emit$$"jge     L_loop\n\t"
15715        $$emit$$"add     0x4,rcx\n\t"
15716        $$emit$$"jl      L_tail\n\t"
15717        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15718        $$emit$$"add     0x20,rax\n\t"
15719        $$emit$$"sub     0x4,rcx\n\t"
15720        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15721        $$emit$$"add     0x4,rcx\n\t"
15722        $$emit$$"jle     L_end\n\t"
15723        $$emit$$"dec     rcx\n\t"
15724        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15725        $$emit$$"vmovq   xmm0,(rax)\n\t"
15726        $$emit$$"add     0x8,rax\n\t"
15727        $$emit$$"dec     rcx\n\t"
15728        $$emit$$"jge     L_sloop\n\t"
15729        $$emit$$"# L_end:\n\t"
15730     } else {
15731        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15732        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15733     }
15734   %}
15735   ins_encode %{
15736     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15737                  $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
15738   %}
15739   ins_pipe(pipe_slow);
15740 %}
15741 
15742 // Small constant length ClearArray for AVX512 targets.
15743 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
15744 %{
15745   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
15746             ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
15747   match(Set dummy (ClearArray (Binary cnt base) val));
15748   ins_cost(100);
15749   effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
15750   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
15751   ins_encode %{
15752     __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15753   %}
15754   ins_pipe(pipe_slow);
15755 %}
15756 
15757 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15758                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15759 %{
15760   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15761   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15762   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15763 
15764   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15765   ins_encode %{
15766     __ string_compare($str1$$Register, $str2$$Register,
15767                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15768                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15769   %}
15770   ins_pipe( pipe_slow );
15771 %}
15772 

17610   effect(USE meth);
17611 
17612   ins_cost(300);
17613   format %{ "call_leaf,runtime " %}
17614   ins_encode(clear_avx, Java_To_Runtime(meth));
17615   ins_pipe(pipe_slow);
17616 %}
17617 
17618 // Call runtime without safepoint and with vector arguments
17619 instruct CallLeafDirectVector(method meth)
17620 %{
17621   match(CallLeafVector);
17622   effect(USE meth);
17623 
17624   ins_cost(300);
17625   format %{ "call_leaf,vector " %}
17626   ins_encode(Java_To_Runtime(meth));
17627   ins_pipe(pipe_slow);
17628 %}
17629 
17630 // Call runtime without safepoint
17631 // entry point is null, target holds the address to call
17632 instruct CallLeafNoFPInDirect(rRegP target)
17633 %{
17634   predicate(n->as_Call()->entry_point() == nullptr);
17635   match(CallLeafNoFP target);
17636 
17637   ins_cost(300);
17638   format %{ "call_leaf_nofp,runtime indirect " %}
17639   ins_encode %{
17640      __ call($target$$Register);
17641   %}
17642 
17643   ins_pipe(pipe_slow);
17644 %}
17645 
17646 // Call runtime without safepoint
17647 instruct CallLeafNoFPDirect(method meth)
17648 %{
17649   predicate(n->as_Call()->entry_point() != nullptr);
17650   match(CallLeafNoFP);
17651   effect(USE meth);
17652 
17653   ins_cost(300);
17654   format %{ "call_leaf_nofp,runtime " %}
17655   ins_encode(clear_avx, Java_To_Runtime(meth));
17656   ins_pipe(pipe_slow);
17657 %}
17658 
17659 // Return Instruction
17660 // Remove the return address & jump to it.
17661 // Notice: We always emit a nop after a ret to make sure there is room
17662 // for safepoint patching
17663 instruct Ret()
17664 %{
17665   match(Return);
17666 
17667   format %{ "ret" %}
17668   ins_encode %{
17669     __ ret(0);
< prev index next >