< prev index next >

src/hotspot/cpu/x86/x86.ad

Print this page

 1632 }
 1633 
 1634 // !!!!! Special hack to get all types of calls to specify the byte offset
 1635 //       from the start of the call to the point where the return address
 1636 //       will point.
 1637 int MachCallStaticJavaNode::ret_addr_offset()
 1638 {
 1639   int offset = 5; // 5 bytes from start of call to where return address points
 1640   offset += clear_avx_size();
 1641   return offset;
 1642 }
 1643 
 1644 int MachCallDynamicJavaNode::ret_addr_offset()
 1645 {
 1646   int offset = 15; // 15 bytes from start of call to where return address points
 1647   offset += clear_avx_size();
 1648   return offset;
 1649 }
 1650 
 1651 int MachCallRuntimeNode::ret_addr_offset() {




 1652   int offset = 13; // movq r10,#addr; callq (r10)
 1653   if (this->ideal_Opcode() != Op_CallLeafVector) {
 1654     offset += clear_avx_size();
 1655   }
 1656   return offset;
 1657 }
 1658 //
 1659 // Compute padding required for nodes which need alignment
 1660 //
 1661 
 1662 // The address of the call instruction needs to be 4-byte aligned to
 1663 // ensure that it does not span a cache line so that it can be patched.
 1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 1665 {
 1666   current_offset += clear_avx_size(); // skip vzeroupper
 1667   current_offset += 1; // skip call opcode byte
 1668   return align_up(current_offset, alignment_required()) - current_offset;
 1669 }
 1670 
 1671 // The address of the call instruction needs to be 4-byte aligned to

 1870     st->print("\n\t");
 1871     st->print("# stack alignment check");
 1872 #endif
 1873   }
 1874   if (C->stub_function() != nullptr) {
 1875     st->print("\n\t");
 1876     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
 1877     st->print("\n\t");
 1878     st->print("je      fast_entry\t");
 1879     st->print("\n\t");
 1880     st->print("call    #nmethod_entry_barrier_stub\t");
 1881     st->print("\n\tfast_entry:");
 1882   }
 1883   st->cr();
 1884 }
 1885 #endif
 1886 
 1887 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1888   Compile* C = ra_->C;
 1889 
 1890   int framesize = C->output()->frame_size_in_bytes();
 1891   int bangsize = C->output()->bang_size_in_bytes();
 1892 
 1893   if (C->clinit_barrier_on_entry()) {
 1894     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
 1895     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
 1896 
 1897     Label L_skip_barrier;
 1898     Register klass = rscratch1;
 1899 
 1900     __ mov_metadata(klass, C->method()->holder()->constant_encoding());
 1901     __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
 1902 
 1903     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
 1904 
 1905     __ bind(L_skip_barrier);
 1906   }
 1907 
 1908   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);


 1909 
 1910   C->output()->set_frame_complete(__ offset());
 1911 
 1912   if (C->has_mach_constant_base_node()) {
 1913     // NOTE: We set the table base offset here because users might be
 1914     // emitted before MachConstantBaseNode.
 1915     ConstantTable& constant_table = C->output()->constant_table();
 1916     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1917   }
 1918 }
 1919 
 1920 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 1921 {
 1922   return MachNode::size(ra_); // too many variables; just compute it
 1923                               // the hard way
 1924 }
 1925 
 1926 int MachPrologNode::reloc() const
 1927 {
 1928   return 0; // a large enough number
 1929 }
 1930 
 1931 //=============================================================================
 1932 #ifndef PRODUCT
 1933 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1934 {
 1935   Compile* C = ra_->C;
 1936   if (generate_vzeroupper(C)) {
 1937     st->print("vzeroupper");
 1938     st->cr(); st->print("\t");
 1939   }
 1940 
 1941   int framesize = C->output()->frame_size_in_bytes();
 1942   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1943   // Remove word for return adr already pushed
 1944   // and RBP

 1952   st->print_cr("popq    rbp");
 1953   if (do_polling() && C->is_method_compilation()) {
 1954     st->print("\t");
 1955     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
 1956                  "ja      #safepoint_stub\t"
 1957                  "# Safepoint: poll for GC");
 1958   }
 1959 }
 1960 #endif
 1961 
 1962 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1963 {
 1964   Compile* C = ra_->C;
 1965 
 1966   if (generate_vzeroupper(C)) {
 1967     // Clear upper bits of YMM registers when current compiled code uses
 1968     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1969     __ vzeroupper();
 1970   }
 1971 
 1972   int framesize = C->output()->frame_size_in_bytes();
 1973   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1974   // Remove word for return adr already pushed
 1975   // and RBP
 1976   framesize -= 2*wordSize;
 1977 
 1978   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 1979 
 1980   if (framesize) {
 1981     __ addq(rsp, framesize);
 1982   }
 1983 
 1984   __ popq(rbp);
 1985 
 1986   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1987     __ reserved_stack_check();
 1988   }
 1989 
 1990   if (do_polling() && C->is_method_compilation()) {
 1991     Label dummy_label;
 1992     Label* code_stub = &dummy_label;
 1993     if (!C->output()->in_scratch_emit_size()) {
 1994       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1995       C->output()->add_stub(stub);
 1996       code_stub = &stub->entry();
 1997     }
 1998     __ relocate(relocInfo::poll_return_type);
 1999     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
 2000   }
 2001 }
 2002 
 2003 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
 2004 {
 2005   return MachNode::size(ra_); // too many variables; just compute it
 2006                               // the hard way
 2007 }
 2008 
 2009 int MachEpilogNode::reloc() const
 2010 {
 2011   return 2; // a large enough number
 2012 }
 2013 
 2014 const Pipeline* MachEpilogNode::pipeline() const
 2015 {
 2016   return MachNode::pipeline_class();
 2017 }
 2018 
 2019 //=============================================================================
 2020 
 2021 enum RC {
 2022   rc_bad,
 2023   rc_int,
 2024   rc_kreg,
 2025   rc_float,
 2026   rc_stack
 2027 };
 2028 

 2590 #endif
 2591 
 2592 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2593 {
 2594   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2595   int reg = ra_->get_encode(this);
 2596 
 2597   __ lea(as_Register(reg), Address(rsp, offset));
 2598 }
 2599 
 2600 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 2601 {
 2602   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2603   if (ra_->get_encode(this) > 15) {
 2604     return (offset < 0x80) ? 6 : 9; // REX2
 2605   } else {
 2606     return (offset < 0x80) ? 5 : 8; // REX
 2607   }
 2608 }
 2609 













































 2610 //=============================================================================
 2611 #ifndef PRODUCT
 2612 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2613 {
 2614   st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2615   st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2616   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 2617 }
 2618 #endif
 2619 
 2620 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2621 {
 2622   __ ic_check(InteriorEntryAlignment);
 2623 }
 2624 
 2625 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 2626 {
 2627   return MachNode::size(ra_); // too many variables; just compute it
 2628                               // the hard way
 2629 }
 2630 
 2631 
 2632 //=============================================================================
 2633 
 2634 bool Matcher::supports_vector_calling_convention(void) {
 2635   return EnableVectorSupport;
 2636 }
 2637 
 2638 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
 2639   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
 2640 }
 2641 
 2642 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
 2643   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
 2644 }
 2645 
 2646 #ifdef ASSERT
 2647 static bool is_ndd_demotable(const MachNode* mdef) {
 2648   return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
 2649 }
 2650 #endif

 4584     }
 4585     __ post_call_nop();
 4586   %}
 4587 
 4588   enc_class Java_Dynamic_Call(method meth) %{
 4589     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 4590     __ post_call_nop();
 4591   %}
 4592 
 4593   enc_class call_epilog %{
 4594     if (VerifyStackAtCalls) {
 4595       // Check that stack depth is unchanged: find majik cookie on stack
 4596       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
 4597       Label L;
 4598       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
 4599       __ jccb(Assembler::equal, L);
 4600       // Die if stack mismatch
 4601       __ int3();
 4602       __ bind(L);
 4603     }

































 4604   %}
 4605 
 4606 %}
 4607 
 4608 //----------FRAME--------------------------------------------------------------
 4609 // Definition of frame structure and management information.
 4610 //
 4611 //  S T A C K   L A Y O U T    Allocators stack-slot number
 4612 //                             |   (to get allocators register number
 4613 //  G  Owned by    |        |  v    add OptoReg::stack0())
 4614 //  r   CALLER     |        |
 4615 //  o     |        +--------+      pad to even-align allocators stack-slot
 4616 //  w     V        |  pad0  |        numbers; owned by CALLER
 4617 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 4618 //  h     ^        |   in   |  5
 4619 //        |        |  args  |  4   Holes in incoming args owned by SELF
 4620 //  |     |        |        |  3
 4621 //  |     |        +--------+
 4622 //  V     |        | old out|      Empty on Intel, window on Sparc
 4623 //        |    old |preserve|      Must be even aligned.

 5762   %}
 5763 %}
 5764 
 5765 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5766 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 5767 %{
 5768   constraint(ALLOC_IN_RC(ptr_reg));
 5769   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5770   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 5771 
 5772   op_cost(10);
 5773   format %{"[$reg + $off + $idx << $scale]" %}
 5774   interface(MEMORY_INTER) %{
 5775     base($reg);
 5776     index($idx);
 5777     scale($scale);
 5778     disp($off);
 5779   %}
 5780 %}
 5781 
















 5782 // Indirect Narrow Oop Plus Offset Operand
 5783 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 5784 // we can't free r12 even with CompressedOops::base() == nullptr.
 5785 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 5786   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5787   constraint(ALLOC_IN_RC(ptr_reg));
 5788   match(AddP (DecodeN reg) off);
 5789 
 5790   op_cost(10);
 5791   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 5792   interface(MEMORY_INTER) %{
 5793     base(0xc); // R12
 5794     index($reg);
 5795     scale(0x3);
 5796     disp($off);
 5797   %}
 5798 %}
 5799 
 5800 // Indirect Memory Operand
 5801 operand indirectNarrow(rRegN reg)

 6271 %}
 6272 
 6273 // Replaces legVec during post-selection cleanup. See above.
 6274 operand legVecZ() %{
 6275   constraint(ALLOC_IN_RC(vectorz_reg_legacy));
 6276   match(VecZ);
 6277 
 6278   format %{ %}
 6279   interface(REG_INTER);
 6280 %}
 6281 
 6282 //----------OPERAND CLASSES----------------------------------------------------
 6283 // Operand Classes are groups of operands that are used as to simplify
 6284 // instruction definitions by not requiring the AD writer to specify separate
 6285 // instructions for every form of operand when the instruction accepts
 6286 // multiple operand types with the same basic encoding and format.  The classic
 6287 // case of this is memory operands.
 6288 
 6289 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 6290                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 6291                indCompressedOopOffset,
 6292                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 6293                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 6294                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 6295 
 6296 //----------PIPELINE-----------------------------------------------------------
 6297 // Rules which define the behavior of the target architectures pipeline.
 6298 pipeline %{
 6299 
 6300 //----------ATTRIBUTES---------------------------------------------------------
 6301 attributes %{
 6302   variable_size_instructions;        // Fixed size instructions
 6303   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 6304   instruction_unit_size = 1;         // An instruction is 1 bytes long
 6305   instruction_fetch_unit_size = 16;  // The processor fetches one line
 6306   instruction_fetch_units = 1;       // of 16 bytes
 6307 %}
 6308 
 6309 //----------RESOURCES----------------------------------------------------------
 6310 // Resources are the functional units available to the machine
 6311 

 8901   format %{ "MEMBAR-storestore (empty encoding)" %}
 8902   ins_encode( );
 8903   ins_pipe(empty);
 8904 %}
 8905 
 8906 //----------Move Instructions--------------------------------------------------
 8907 
 8908 instruct castX2P(rRegP dst, rRegL src)
 8909 %{
 8910   match(Set dst (CastX2P src));
 8911 
 8912   format %{ "movq    $dst, $src\t# long->ptr" %}
 8913   ins_encode %{
 8914     if ($dst$$reg != $src$$reg) {
 8915       __ movptr($dst$$Register, $src$$Register);
 8916     }
 8917   %}
 8918   ins_pipe(ialu_reg_reg); // XXX
 8919 %}
 8920 


























 8921 instruct castP2X(rRegL dst, rRegP src)
 8922 %{
 8923   match(Set dst (CastP2X src));
 8924 
 8925   format %{ "movq    $dst, $src\t# ptr -> long" %}
 8926   ins_encode %{
 8927     if ($dst$$reg != $src$$reg) {
 8928       __ movptr($dst$$Register, $src$$Register);
 8929     }
 8930   %}
 8931   ins_pipe(ialu_reg_reg); // XXX
 8932 %}
 8933 
 8934 // Convert oop into int for vectors alignment masking
 8935 instruct convP2I(rRegI dst, rRegP src)
 8936 %{
 8937   match(Set dst (ConvL2I (CastP2X src)));
 8938 
 8939   format %{ "movl    $dst, $src\t# ptr -> int" %}
 8940   ins_encode %{

14639   effect(DEF dst, USE src);
14640   ins_cost(100);
14641   format %{ "movd    $dst,$src\t# MoveI2F" %}
14642   ins_encode %{
14643     __ movdl($dst$$XMMRegister, $src$$Register);
14644   %}
14645   ins_pipe( pipe_slow );
14646 %}
14647 
14648 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
14649   match(Set dst (MoveL2D src));
14650   effect(DEF dst, USE src);
14651   ins_cost(100);
14652   format %{ "movd    $dst,$src\t# MoveL2D" %}
14653   ins_encode %{
14654      __ movdq($dst$$XMMRegister, $src$$Register);
14655   %}
14656   ins_pipe( pipe_slow );
14657 %}
14658 

14659 // Fast clearing of an array
14660 // Small non-constant lenght ClearArray for non-AVX512 targets.
14661 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
14662                   Universe dummy, rFlagsReg cr)
14663 %{
14664   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
14665   match(Set dummy (ClearArray cnt base));
14666   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);





















































































































14667 
14668   format %{ $$template
14669     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
14670     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
14671     $$emit$$"jg      LARGE\n\t"
14672     $$emit$$"dec     rcx\n\t"
14673     $$emit$$"js      DONE\t# Zero length\n\t"
14674     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
14675     $$emit$$"dec     rcx\n\t"
14676     $$emit$$"jge     LOOP\n\t"
14677     $$emit$$"jmp     DONE\n\t"
14678     $$emit$$"# LARGE:\n\t"
14679     if (UseFastStosb) {
14680        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
14681        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
14682     } else if (UseXMMForObjInit) {
14683        $$emit$$"mov     rdi,rax\n\t"
14684        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
14685        $$emit$$"jmpq    L_zero_64_bytes\n\t"
14686        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

14694        $$emit$$"jl      L_tail\n\t"
14695        $$emit$$"vmovdqu ymm0,(rax)\n\t"
14696        $$emit$$"add     0x20,rax\n\t"
14697        $$emit$$"sub     0x4,rcx\n\t"
14698        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
14699        $$emit$$"add     0x4,rcx\n\t"
14700        $$emit$$"jle     L_end\n\t"
14701        $$emit$$"dec     rcx\n\t"
14702        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
14703        $$emit$$"vmovq   xmm0,(rax)\n\t"
14704        $$emit$$"add     0x8,rax\n\t"
14705        $$emit$$"dec     rcx\n\t"
14706        $$emit$$"jge     L_sloop\n\t"
14707        $$emit$$"# L_end:\n\t"
14708     } else {
14709        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
14710     }
14711     $$emit$$"# DONE"
14712   %}
14713   ins_encode %{
14714     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
14715                  $tmp$$XMMRegister, false, knoreg);
14716   %}
14717   ins_pipe(pipe_slow);
14718 %}
14719 
14720 // Small non-constant length ClearArray for AVX512 targets.
14721 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
14722                        Universe dummy, rFlagsReg cr)
14723 %{
14724   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
14725   match(Set dummy (ClearArray cnt base));
14726   ins_cost(125);
14727   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
14728 
14729   format %{ $$template
14730     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
14731     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
14732     $$emit$$"jg      LARGE\n\t"
14733     $$emit$$"dec     rcx\n\t"
14734     $$emit$$"js      DONE\t# Zero length\n\t"
14735     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
14736     $$emit$$"dec     rcx\n\t"
14737     $$emit$$"jge     LOOP\n\t"
14738     $$emit$$"jmp     DONE\n\t"
14739     $$emit$$"# LARGE:\n\t"
14740     if (UseFastStosb) {
14741        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
14742        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
14743     } else if (UseXMMForObjInit) {
14744        $$emit$$"mov     rdi,rax\n\t"
14745        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
14746        $$emit$$"jmpq    L_zero_64_bytes\n\t"
14747        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

14755        $$emit$$"jl      L_tail\n\t"
14756        $$emit$$"vmovdqu ymm0,(rax)\n\t"
14757        $$emit$$"add     0x20,rax\n\t"
14758        $$emit$$"sub     0x4,rcx\n\t"
14759        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
14760        $$emit$$"add     0x4,rcx\n\t"
14761        $$emit$$"jle     L_end\n\t"
14762        $$emit$$"dec     rcx\n\t"
14763        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
14764        $$emit$$"vmovq   xmm0,(rax)\n\t"
14765        $$emit$$"add     0x8,rax\n\t"
14766        $$emit$$"dec     rcx\n\t"
14767        $$emit$$"jge     L_sloop\n\t"
14768        $$emit$$"# L_end:\n\t"
14769     } else {
14770        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
14771     }
14772     $$emit$$"# DONE"
14773   %}
14774   ins_encode %{
14775     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
14776                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
14777   %}
14778   ins_pipe(pipe_slow);
14779 %}
14780 
14781 // Large non-constant length ClearArray for non-AVX512 targets.
14782 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
14783                         Universe dummy, rFlagsReg cr)
14784 %{
14785   predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
14786   match(Set dummy (ClearArray cnt base));
14787   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
































































































14788 
14789   format %{ $$template
14790     if (UseFastStosb) {
14791        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
14792        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
14793        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
14794     } else if (UseXMMForObjInit) {
14795        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
14796        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
14797        $$emit$$"jmpq    L_zero_64_bytes\n\t"
14798        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
14799        $$emit$$"vmovdqu ymm0,(rax)\n\t"
14800        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
14801        $$emit$$"add     0x40,rax\n\t"
14802        $$emit$$"# L_zero_64_bytes:\n\t"
14803        $$emit$$"sub     0x8,rcx\n\t"
14804        $$emit$$"jge     L_loop\n\t"
14805        $$emit$$"add     0x4,rcx\n\t"
14806        $$emit$$"jl      L_tail\n\t"
14807        $$emit$$"vmovdqu ymm0,(rax)\n\t"
14808        $$emit$$"add     0x20,rax\n\t"
14809        $$emit$$"sub     0x4,rcx\n\t"
14810        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
14811        $$emit$$"add     0x4,rcx\n\t"
14812        $$emit$$"jle     L_end\n\t"
14813        $$emit$$"dec     rcx\n\t"
14814        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
14815        $$emit$$"vmovq   xmm0,(rax)\n\t"
14816        $$emit$$"add     0x8,rax\n\t"
14817        $$emit$$"dec     rcx\n\t"
14818        $$emit$$"jge     L_sloop\n\t"
14819        $$emit$$"# L_end:\n\t"
14820     } else {
14821        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
14822        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
14823     }
14824   %}
14825   ins_encode %{
14826     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
14827                  $tmp$$XMMRegister, true, knoreg);
14828   %}
14829   ins_pipe(pipe_slow);
14830 %}
14831 
14832 // Large non-constant length ClearArray for AVX512 targets.
14833 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
14834                              Universe dummy, rFlagsReg cr)
14835 %{
14836   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
14837   match(Set dummy (ClearArray cnt base));
14838   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
14839 
14840   format %{ $$template
14841     if (UseFastStosb) {
14842        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
14843        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
14844        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
14845     } else if (UseXMMForObjInit) {
14846        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
14847        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
14848        $$emit$$"jmpq    L_zero_64_bytes\n\t"
14849        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
14850        $$emit$$"vmovdqu ymm0,(rax)\n\t"
14851        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
14852        $$emit$$"add     0x40,rax\n\t"
14853        $$emit$$"# L_zero_64_bytes:\n\t"
14854        $$emit$$"sub     0x8,rcx\n\t"
14855        $$emit$$"jge     L_loop\n\t"
14856        $$emit$$"add     0x4,rcx\n\t"
14857        $$emit$$"jl      L_tail\n\t"
14858        $$emit$$"vmovdqu ymm0,(rax)\n\t"
14859        $$emit$$"add     0x20,rax\n\t"
14860        $$emit$$"sub     0x4,rcx\n\t"
14861        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
14862        $$emit$$"add     0x4,rcx\n\t"
14863        $$emit$$"jle     L_end\n\t"
14864        $$emit$$"dec     rcx\n\t"
14865        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
14866        $$emit$$"vmovq   xmm0,(rax)\n\t"
14867        $$emit$$"add     0x8,rax\n\t"
14868        $$emit$$"dec     rcx\n\t"
14869        $$emit$$"jge     L_sloop\n\t"
14870        $$emit$$"# L_end:\n\t"
14871     } else {
14872        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
14873        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
14874     }
14875   %}
14876   ins_encode %{
14877     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
14878                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
14879   %}
14880   ins_pipe(pipe_slow);
14881 %}
14882 
14883 // Small constant length ClearArray for AVX512 targets.
14884 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
14885 %{
14886   predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
14887   match(Set dummy (ClearArray cnt base));

14888   ins_cost(100);
14889   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
14890   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
14891   ins_encode %{
14892    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
14893   %}
14894   ins_pipe(pipe_slow);
14895 %}
14896 
14897 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
14898                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
14899 %{
14900   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
14901   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14902   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14903 
14904   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
14905   ins_encode %{
14906     __ string_compare($str1$$Register, $str2$$Register,
14907                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
14908                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
14909   %}
14910   ins_pipe( pipe_slow );
14911 %}
14912 

16750   effect(USE meth);
16751 
16752   ins_cost(300);
16753   format %{ "call_leaf,runtime " %}
16754   ins_encode(clear_avx, Java_To_Runtime(meth));
16755   ins_pipe(pipe_slow);
16756 %}
16757 
16758 // Call runtime without safepoint and with vector arguments
16759 instruct CallLeafDirectVector(method meth)
16760 %{
16761   match(CallLeafVector);
16762   effect(USE meth);
16763 
16764   ins_cost(300);
16765   format %{ "call_leaf,vector " %}
16766   ins_encode(Java_To_Runtime(meth));
16767   ins_pipe(pipe_slow);
16768 %}
16769 
















16770 // Call runtime without safepoint
16771 instruct CallLeafNoFPDirect(method meth)
16772 %{

16773   match(CallLeafNoFP);
16774   effect(USE meth);
16775 
16776   ins_cost(300);
16777   format %{ "call_leaf_nofp,runtime " %}
16778   ins_encode(clear_avx, Java_To_Runtime(meth));
16779   ins_pipe(pipe_slow);
16780 %}
16781 
16782 // Return Instruction
16783 // Remove the return address & jump to it.
16784 // Notice: We always emit a nop after a ret to make sure there is room
16785 // for safepoint patching
16786 instruct Ret()
16787 %{
16788   match(Return);
16789 
16790   format %{ "ret" %}
16791   ins_encode %{
16792     __ ret(0);

 1632 }
 1633 
 1634 // !!!!! Special hack to get all types of calls to specify the byte offset
 1635 //       from the start of the call to the point where the return address
 1636 //       will point.
 1637 int MachCallStaticJavaNode::ret_addr_offset()
 1638 {
 1639   int offset = 5; // 5 bytes from start of call to where return address points
 1640   offset += clear_avx_size();
 1641   return offset;
 1642 }
 1643 
 1644 int MachCallDynamicJavaNode::ret_addr_offset()
 1645 {
 1646   int offset = 15; // 15 bytes from start of call to where return address points
 1647   offset += clear_avx_size();
 1648   return offset;
 1649 }
 1650 
 1651 int MachCallRuntimeNode::ret_addr_offset() {
 1652   if (_entry_point == nullptr) {
 1653     // CallLeafNoFPInDirect
 1654     return 3; // callq (register)
 1655   }
 1656   int offset = 13; // movq r10,#addr; callq (r10)
 1657   if (this->ideal_Opcode() != Op_CallLeafVector) {
 1658     offset += clear_avx_size();
 1659   }
 1660   return offset;
 1661 }
 1662 //
 1663 // Compute padding required for nodes which need alignment
 1664 //
 1665 
 1666 // The address of the call instruction needs to be 4-byte aligned to
 1667 // ensure that it does not span a cache line so that it can be patched.
 1668 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 1669 {
 1670   current_offset += clear_avx_size(); // skip vzeroupper
 1671   current_offset += 1; // skip call opcode byte
 1672   return align_up(current_offset, alignment_required()) - current_offset;
 1673 }
 1674 
 1675 // The address of the call instruction needs to be 4-byte aligned to

 1874     st->print("\n\t");
 1875     st->print("# stack alignment check");
 1876 #endif
 1877   }
 1878   if (C->stub_function() != nullptr) {
 1879     st->print("\n\t");
 1880     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
 1881     st->print("\n\t");
 1882     st->print("je      fast_entry\t");
 1883     st->print("\n\t");
 1884     st->print("call    #nmethod_entry_barrier_stub\t");
 1885     st->print("\n\tfast_entry:");
 1886   }
 1887   st->cr();
 1888 }
 1889 #endif
 1890 
 1891 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1892   Compile* C = ra_->C;
 1893 
 1894   __ verified_entry(C);








 1895 
 1896   if (ra_->C->stub_function() == nullptr) {
 1897     __ entry_barrier();




 1898   }
 1899 
 1900   if (!Compile::current()->output()->in_scratch_emit_size()) {
 1901     __ bind(*_verified_entry);
 1902   }
 1903 
 1904   C->output()->set_frame_complete(__ offset());
 1905 
 1906   if (C->has_mach_constant_base_node()) {
 1907     // NOTE: We set the table base offset here because users might be
 1908     // emitted before MachConstantBaseNode.
 1909     ConstantTable& constant_table = C->output()->constant_table();
 1910     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1911   }
 1912 }
 1913 





 1914 
 1915 int MachPrologNode::reloc() const
 1916 {
 1917   return 0; // a large enough number
 1918 }
 1919 
 1920 //=============================================================================
 1921 #ifndef PRODUCT
 1922 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1923 {
 1924   Compile* C = ra_->C;
 1925   if (generate_vzeroupper(C)) {
 1926     st->print("vzeroupper");
 1927     st->cr(); st->print("\t");
 1928   }
 1929 
 1930   int framesize = C->output()->frame_size_in_bytes();
 1931   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1932   // Remove word for return adr already pushed
 1933   // and RBP

 1941   st->print_cr("popq    rbp");
 1942   if (do_polling() && C->is_method_compilation()) {
 1943     st->print("\t");
 1944     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
 1945                  "ja      #safepoint_stub\t"
 1946                  "# Safepoint: poll for GC");
 1947   }
 1948 }
 1949 #endif
 1950 
 1951 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1952 {
 1953   Compile* C = ra_->C;
 1954 
 1955   if (generate_vzeroupper(C)) {
 1956     // Clear upper bits of YMM registers when current compiled code uses
 1957     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1958     __ vzeroupper();
 1959   }
 1960 
 1961   // Subtract two words to account for return address and rbp
 1962   int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
 1963   __ remove_frame(initial_framesize, C->needs_stack_repair());










 1964 
 1965   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1966     __ reserved_stack_check();
 1967   }
 1968 
 1969   if (do_polling() && C->is_method_compilation()) {
 1970     Label dummy_label;
 1971     Label* code_stub = &dummy_label;
 1972     if (!C->output()->in_scratch_emit_size()) {
 1973       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1974       C->output()->add_stub(stub);
 1975       code_stub = &stub->entry();
 1976     }
 1977     __ relocate(relocInfo::poll_return_type);
 1978     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
 1979   }
 1980 }
 1981 






 1982 int MachEpilogNode::reloc() const
 1983 {
 1984   return 2; // a large enough number
 1985 }
 1986 
 1987 const Pipeline* MachEpilogNode::pipeline() const
 1988 {
 1989   return MachNode::pipeline_class();
 1990 }
 1991 
 1992 //=============================================================================
 1993 
 1994 enum RC {
 1995   rc_bad,
 1996   rc_int,
 1997   rc_kreg,
 1998   rc_float,
 1999   rc_stack
 2000 };
 2001 

 2563 #endif
 2564 
 2565 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2566 {
 2567   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2568   int reg = ra_->get_encode(this);
 2569 
 2570   __ lea(as_Register(reg), Address(rsp, offset));
 2571 }
 2572 
 2573 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 2574 {
 2575   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2576   if (ra_->get_encode(this) > 15) {
 2577     return (offset < 0x80) ? 6 : 9; // REX2
 2578   } else {
 2579     return (offset < 0x80) ? 5 : 8; // REX
 2580   }
 2581 }
 2582 
 2583 //=============================================================================
 2584 #ifndef PRODUCT
 2585 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2586 {
 2587   st->print_cr("MachVEPNode");
 2588 }
 2589 #endif
 2590 
 2591 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2592 {
 2593   CodeBuffer* cbuf = masm->code();
 2594   if (!_verified) {
 2595     __ ic_check(1);
 2596   } else {
 2597     if (ra_->C->stub_function() == nullptr) {
 2598       // Emit the entry barrier in a temporary frame before unpacking because
 2599       // it can deopt, which would require packing the scalarized args again.
 2600       __ verified_entry(ra_->C, 0);
 2601       __ entry_barrier();
 2602       int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
 2603       __ remove_frame(initial_framesize, false);
 2604     }
 2605     // Unpack inline type args passed as oop and then jump to
 2606     // the verified entry point (skipping the unverified entry).
 2607     int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
 2608     // Emit code for verified entry and save increment for stack repair on return
 2609     __ verified_entry(ra_->C, sp_inc);
 2610     if (Compile::current()->output()->in_scratch_emit_size()) {
 2611       Label dummy_verified_entry;
 2612       __ jmp(dummy_verified_entry);
 2613     } else {
 2614       __ jmp(*_verified_entry);
 2615     }
 2616   }
 2617   if (ra_->C->stub_function() == nullptr) {
 2618     // Pad so that the next call to MachVEPNode::emit() starts out with the
 2619     // correct alignment.  This is needed by entry_barrier() to align the
 2620     // compare.  But unfortunately we need to align all 4 MachVEPNodes because
 2621     // entry point offsets are computed using scratch_emit_size(), so starting
 2622     // alignment must match the alignment of the scratch buffer, otherwise the sizes
 2623     // will be off.
 2624     __ align(4);
 2625   }
 2626 }
 2627 
 2628 //=============================================================================
 2629 #ifndef PRODUCT
 2630 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2631 {
 2632   st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2633   st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2634   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 2635 }
 2636 #endif
 2637 
 2638 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2639 {
 2640   __ ic_check(InteriorEntryAlignment);
 2641 }
 2642 






 2643 
 2644 //=============================================================================
 2645 
 2646 bool Matcher::supports_vector_calling_convention(void) {
 2647   return EnableVectorSupport;
 2648 }
 2649 
 2650 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
 2651   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
 2652 }
 2653 
 2654 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
 2655   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
 2656 }
 2657 
 2658 #ifdef ASSERT
 2659 static bool is_ndd_demotable(const MachNode* mdef) {
 2660   return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
 2661 }
 2662 #endif

 4596     }
 4597     __ post_call_nop();
 4598   %}
 4599 
 4600   enc_class Java_Dynamic_Call(method meth) %{
 4601     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 4602     __ post_call_nop();
 4603   %}
 4604 
 4605   enc_class call_epilog %{
 4606     if (VerifyStackAtCalls) {
 4607       // Check that stack depth is unchanged: find majik cookie on stack
 4608       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
 4609       Label L;
 4610       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
 4611       __ jccb(Assembler::equal, L);
 4612       // Die if stack mismatch
 4613       __ int3();
 4614       __ bind(L);
 4615     }
 4616     if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) {
 4617       // The last return value is not set by the callee but used to pass the null marker to compiled code.
 4618       // Search for the corresponding projection, get the register and emit code that initializes it.
 4619       uint con = (tf()->range_cc()->cnt() - 1);
 4620       for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
 4621         ProjNode* proj = fast_out(i)->as_Proj();
 4622         if (proj->_con == con) {
 4623           // Set null marker if rax is non-null (a non-null value is returned buffered or scalarized)
 4624           OptoReg::Name optoReg = ra_->get_reg_first(proj);
 4625           VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
 4626           Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
 4627           __ testq(rax, rax);
 4628           __ setb(Assembler::notZero, toReg);
 4629           __ movzbl(toReg, toReg);
 4630           if (reg->is_stack()) {
 4631             int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
 4632             __ movq(Address(rsp, st_off), toReg);
 4633           }
 4634           break;
 4635         }
 4636       }
 4637       if (return_value_is_used()) {
 4638         // An inline type is returned as fields in multiple registers.
 4639         // Rax either contains an oop if the inline type is buffered or a pointer
 4640         // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax
 4641         // if the lowest bit is set to allow C2 to use the oop after null checking.
 4642         // rax &= (rax & 1) - 1
 4643         __ movptr(rscratch1, rax);
 4644         __ andptr(rscratch1, 0x1);
 4645         __ subptr(rscratch1, 0x1);
 4646         __ andptr(rax, rscratch1);
 4647       }
 4648     }
 4649   %}
 4650 
 4651 %}
 4652 
 4653 //----------FRAME--------------------------------------------------------------
 4654 // Definition of frame structure and management information.
 4655 //
 4656 //  S T A C K   L A Y O U T    Allocators stack-slot number
 4657 //                             |   (to get allocators register number
 4658 //  G  Owned by    |        |  v    add OptoReg::stack0())
 4659 //  r   CALLER     |        |
 4660 //  o     |        +--------+      pad to even-align allocators stack-slot
 4661 //  w     V        |  pad0  |        numbers; owned by CALLER
 4662 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 4663 //  h     ^        |   in   |  5
 4664 //        |        |  args  |  4   Holes in incoming args owned by SELF
 4665 //  |     |        |        |  3
 4666 //  |     |        +--------+
 4667 //  V     |        | old out|      Empty on Intel, window on Sparc
 4668 //        |    old |preserve|      Must be even aligned.

 5807   %}
 5808 %}
 5809 
 5810 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5811 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 5812 %{
 5813   constraint(ALLOC_IN_RC(ptr_reg));
 5814   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5815   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 5816 
 5817   op_cost(10);
 5818   format %{"[$reg + $off + $idx << $scale]" %}
 5819   interface(MEMORY_INTER) %{
 5820     base($reg);
 5821     index($idx);
 5822     scale($scale);
 5823     disp($off);
 5824   %}
 5825 %}
 5826 
 5827 // Indirect Narrow Oop Operand
 5828 operand indCompressedOop(rRegN reg) %{
 5829   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5830   constraint(ALLOC_IN_RC(ptr_reg));
 5831   match(DecodeN reg);
 5832 
 5833   op_cost(10);
 5834   format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
 5835   interface(MEMORY_INTER) %{
 5836     base(0xc); // R12
 5837     index($reg);
 5838     scale(0x3);
 5839     disp(0x0);
 5840   %}
 5841 %}
 5842 
 5843 // Indirect Narrow Oop Plus Offset Operand
 5844 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 5845 // we can't free r12 even with CompressedOops::base() == nullptr.
 5846 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 5847   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5848   constraint(ALLOC_IN_RC(ptr_reg));
 5849   match(AddP (DecodeN reg) off);
 5850 
 5851   op_cost(10);
 5852   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 5853   interface(MEMORY_INTER) %{
 5854     base(0xc); // R12
 5855     index($reg);
 5856     scale(0x3);
 5857     disp($off);
 5858   %}
 5859 %}
 5860 
 5861 // Indirect Memory Operand
 5862 operand indirectNarrow(rRegN reg)

 6332 %}
 6333 
 6334 // Replaces legVec during post-selection cleanup. See above.
 6335 operand legVecZ() %{
 6336   constraint(ALLOC_IN_RC(vectorz_reg_legacy));
 6337   match(VecZ);
 6338 
 6339   format %{ %}
 6340   interface(REG_INTER);
 6341 %}
 6342 
 6343 //----------OPERAND CLASSES----------------------------------------------------
 6344 // Operand Classes are groups of operands that are used as to simplify
 6345 // instruction definitions by not requiring the AD writer to specify separate
 6346 // instructions for every form of operand when the instruction accepts
 6347 // multiple operand types with the same basic encoding and format.  The classic
 6348 // case of this is memory operands.
 6349 
 6350 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 6351                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 6352                indCompressedOop, indCompressedOopOffset,
 6353                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 6354                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 6355                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 6356 
 6357 //----------PIPELINE-----------------------------------------------------------
 6358 // Rules which define the behavior of the target architectures pipeline.
 6359 pipeline %{
 6360 
 6361 //----------ATTRIBUTES---------------------------------------------------------
 6362 attributes %{
 6363   variable_size_instructions;        // Fixed size instructions
 6364   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 6365   instruction_unit_size = 1;         // An instruction is 1 bytes long
 6366   instruction_fetch_unit_size = 16;  // The processor fetches one line
 6367   instruction_fetch_units = 1;       // of 16 bytes
 6368 %}
 6369 
 6370 //----------RESOURCES----------------------------------------------------------
 6371 // Resources are the functional units available to the machine
 6372 

 8962   format %{ "MEMBAR-storestore (empty encoding)" %}
 8963   ins_encode( );
 8964   ins_pipe(empty);
 8965 %}
 8966 
 8967 //----------Move Instructions--------------------------------------------------
 8968 
 8969 instruct castX2P(rRegP dst, rRegL src)
 8970 %{
 8971   match(Set dst (CastX2P src));
 8972 
 8973   format %{ "movq    $dst, $src\t# long->ptr" %}
 8974   ins_encode %{
 8975     if ($dst$$reg != $src$$reg) {
 8976       __ movptr($dst$$Register, $src$$Register);
 8977     }
 8978   %}
 8979   ins_pipe(ialu_reg_reg); // XXX
 8980 %}
 8981 
 8982 instruct castI2N(rRegN dst, rRegI src)
 8983 %{
 8984   match(Set dst (CastI2N src));
 8985 
 8986   format %{ "movq    $dst, $src\t# int -> narrow ptr" %}
 8987   ins_encode %{
 8988     if ($dst$$reg != $src$$reg) {
 8989       __ movl($dst$$Register, $src$$Register);
 8990     }
 8991   %}
 8992   ins_pipe(ialu_reg_reg); // XXX
 8993 %}
 8994 
 8995 instruct castN2X(rRegL dst, rRegN src)
 8996 %{
 8997   match(Set dst (CastP2X src));
 8998 
 8999   format %{ "movq    $dst, $src\t# ptr -> long" %}
 9000   ins_encode %{
 9001     if ($dst$$reg != $src$$reg) {
 9002       __ movptr($dst$$Register, $src$$Register);
 9003     }
 9004   %}
 9005   ins_pipe(ialu_reg_reg); // XXX
 9006 %}
 9007 
 9008 instruct castP2X(rRegL dst, rRegP src)
 9009 %{
 9010   match(Set dst (CastP2X src));
 9011 
 9012   format %{ "movq    $dst, $src\t# ptr -> long" %}
 9013   ins_encode %{
 9014     if ($dst$$reg != $src$$reg) {
 9015       __ movptr($dst$$Register, $src$$Register);
 9016     }
 9017   %}
 9018   ins_pipe(ialu_reg_reg); // XXX
 9019 %}
 9020 
 9021 // Convert oop into int for vectors alignment masking
 9022 instruct convP2I(rRegI dst, rRegP src)
 9023 %{
 9024   match(Set dst (ConvL2I (CastP2X src)));
 9025 
 9026   format %{ "movl    $dst, $src\t# ptr -> int" %}
 9027   ins_encode %{

14726   effect(DEF dst, USE src);
14727   ins_cost(100);
14728   format %{ "movd    $dst,$src\t# MoveI2F" %}
14729   ins_encode %{
14730     __ movdl($dst$$XMMRegister, $src$$Register);
14731   %}
14732   ins_pipe( pipe_slow );
14733 %}
14734 
14735 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
14736   match(Set dst (MoveL2D src));
14737   effect(DEF dst, USE src);
14738   ins_cost(100);
14739   format %{ "movd    $dst,$src\t# MoveL2D" %}
14740   ins_encode %{
14741      __ movdq($dst$$XMMRegister, $src$$Register);
14742   %}
14743   ins_pipe( pipe_slow );
14744 %}
14745 
14746 
14747 // Fast clearing of an array
14748 // Small non-constant lenght ClearArray for non-AVX512 targets.
14749 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
14750                   Universe dummy, rFlagsReg cr)
14751 %{
14752   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
14753   match(Set dummy (ClearArray (Binary cnt base) val));
14754   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
14755 
14756   format %{ $$template
14757     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
14758     $$emit$$"jg      LARGE\n\t"
14759     $$emit$$"dec     rcx\n\t"
14760     $$emit$$"js      DONE\t# Zero length\n\t"
14761     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
14762     $$emit$$"dec     rcx\n\t"
14763     $$emit$$"jge     LOOP\n\t"
14764     $$emit$$"jmp     DONE\n\t"
14765     $$emit$$"# LARGE:\n\t"
14766     if (UseFastStosb) {
14767        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
14768        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
14769     } else if (UseXMMForObjInit) {
14770        $$emit$$"movdq   $tmp, $val\n\t"
14771        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
14772        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
14773        $$emit$$"jmpq    L_zero_64_bytes\n\t"
14774        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
14775        $$emit$$"vmovdqu $tmp,(rax)\n\t"
14776        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
14777        $$emit$$"add     0x40,rax\n\t"
14778        $$emit$$"# L_zero_64_bytes:\n\t"
14779        $$emit$$"sub     0x8,rcx\n\t"
14780        $$emit$$"jge     L_loop\n\t"
14781        $$emit$$"add     0x4,rcx\n\t"
14782        $$emit$$"jl      L_tail\n\t"
14783        $$emit$$"vmovdqu $tmp,(rax)\n\t"
14784        $$emit$$"add     0x20,rax\n\t"
14785        $$emit$$"sub     0x4,rcx\n\t"
14786        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
14787        $$emit$$"add     0x4,rcx\n\t"
14788        $$emit$$"jle     L_end\n\t"
14789        $$emit$$"dec     rcx\n\t"
14790        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
14791        $$emit$$"vmovq   xmm0,(rax)\n\t"
14792        $$emit$$"add     0x8,rax\n\t"
14793        $$emit$$"dec     rcx\n\t"
14794        $$emit$$"jge     L_sloop\n\t"
14795        $$emit$$"# L_end:\n\t"
14796     } else {
14797        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
14798     }
14799     $$emit$$"# DONE"
14800   %}
14801   ins_encode %{
14802     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
14803                  $tmp$$XMMRegister, false, false);
14804   %}
14805   ins_pipe(pipe_slow);
14806 %}
14807 
14808 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
14809                             Universe dummy, rFlagsReg cr)
14810 %{
14811   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
14812   match(Set dummy (ClearArray (Binary cnt base) val));
14813   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
14814 
14815   format %{ $$template
14816     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
14817     $$emit$$"jg      LARGE\n\t"
14818     $$emit$$"dec     rcx\n\t"
14819     $$emit$$"js      DONE\t# Zero length\n\t"
14820     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
14821     $$emit$$"dec     rcx\n\t"
14822     $$emit$$"jge     LOOP\n\t"
14823     $$emit$$"jmp     DONE\n\t"
14824     $$emit$$"# LARGE:\n\t"
14825     if (UseXMMForObjInit) {
14826        $$emit$$"movdq   $tmp, $val\n\t"
14827        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
14828        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
14829        $$emit$$"jmpq    L_zero_64_bytes\n\t"
14830        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
14831        $$emit$$"vmovdqu $tmp,(rax)\n\t"
14832        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
14833        $$emit$$"add     0x40,rax\n\t"
14834        $$emit$$"# L_zero_64_bytes:\n\t"
14835        $$emit$$"sub     0x8,rcx\n\t"
14836        $$emit$$"jge     L_loop\n\t"
14837        $$emit$$"add     0x4,rcx\n\t"
14838        $$emit$$"jl      L_tail\n\t"
14839        $$emit$$"vmovdqu $tmp,(rax)\n\t"
14840        $$emit$$"add     0x20,rax\n\t"
14841        $$emit$$"sub     0x4,rcx\n\t"
14842        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
14843        $$emit$$"add     0x4,rcx\n\t"
14844        $$emit$$"jle     L_end\n\t"
14845        $$emit$$"dec     rcx\n\t"
14846        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
14847        $$emit$$"vmovq   xmm0,(rax)\n\t"
14848        $$emit$$"add     0x8,rax\n\t"
14849        $$emit$$"dec     rcx\n\t"
14850        $$emit$$"jge     L_sloop\n\t"
14851        $$emit$$"# L_end:\n\t"
14852     } else {
14853        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
14854     }
14855     $$emit$$"# DONE"
14856   %}
14857   ins_encode %{
14858     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
14859                  $tmp$$XMMRegister, false, true);
14860   %}
14861   ins_pipe(pipe_slow);
14862 %}
14863 
14864 // Small non-constant length ClearArray for AVX512 targets.
14865 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
14866                        Universe dummy, rFlagsReg cr)
14867 %{
14868   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
14869   match(Set dummy (ClearArray (Binary cnt base) val));
14870   ins_cost(125);
14871   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
14872 
14873   format %{ $$template
14874     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
14875     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
14876     $$emit$$"jg      LARGE\n\t"
14877     $$emit$$"dec     rcx\n\t"
14878     $$emit$$"js      DONE\t# Zero length\n\t"
14879     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
14880     $$emit$$"dec     rcx\n\t"
14881     $$emit$$"jge     LOOP\n\t"
14882     $$emit$$"jmp     DONE\n\t"
14883     $$emit$$"# LARGE:\n\t"
14884     if (UseFastStosb) {
14885        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
14886        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
14887     } else if (UseXMMForObjInit) {
14888        $$emit$$"mov     rdi,rax\n\t"
14889        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
14890        $$emit$$"jmpq    L_zero_64_bytes\n\t"
14891        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

14899        $$emit$$"jl      L_tail\n\t"
14900        $$emit$$"vmovdqu ymm0,(rax)\n\t"
14901        $$emit$$"add     0x20,rax\n\t"
14902        $$emit$$"sub     0x4,rcx\n\t"
14903        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
14904        $$emit$$"add     0x4,rcx\n\t"
14905        $$emit$$"jle     L_end\n\t"
14906        $$emit$$"dec     rcx\n\t"
14907        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
14908        $$emit$$"vmovq   xmm0,(rax)\n\t"
14909        $$emit$$"add     0x8,rax\n\t"
14910        $$emit$$"dec     rcx\n\t"
14911        $$emit$$"jge     L_sloop\n\t"
14912        $$emit$$"# L_end:\n\t"
14913     } else {
14914        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
14915     }
14916     $$emit$$"# DONE"
14917   %}
14918   ins_encode %{
14919     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
14920                  $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
14921   %}
14922   ins_pipe(pipe_slow);
14923 %}
14924 
14925 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
14926                                  Universe dummy, rFlagsReg cr)

14927 %{
14928   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
14929   match(Set dummy (ClearArray (Binary cnt base) val));
14930   ins_cost(125);
14931   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
14932 
14933   format %{ $$template
14934     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
14935     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
14936     $$emit$$"jg      LARGE\n\t"
14937     $$emit$$"dec     rcx\n\t"
14938     $$emit$$"js      DONE\t# Zero length\n\t"
14939     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
14940     $$emit$$"dec     rcx\n\t"
14941     $$emit$$"jge     LOOP\n\t"
14942     $$emit$$"jmp     DONE\n\t"
14943     $$emit$$"# LARGE:\n\t"
14944     if (UseFastStosb) {
14945        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
14946        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
14947     } else if (UseXMMForObjInit) {
14948        $$emit$$"mov     rdi,rax\n\t"
14949        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
14950        $$emit$$"jmpq    L_zero_64_bytes\n\t"
14951        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

14959        $$emit$$"jl      L_tail\n\t"
14960        $$emit$$"vmovdqu ymm0,(rax)\n\t"
14961        $$emit$$"add     0x20,rax\n\t"
14962        $$emit$$"sub     0x4,rcx\n\t"
14963        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
14964        $$emit$$"add     0x4,rcx\n\t"
14965        $$emit$$"jle     L_end\n\t"
14966        $$emit$$"dec     rcx\n\t"
14967        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
14968        $$emit$$"vmovq   xmm0,(rax)\n\t"
14969        $$emit$$"add     0x8,rax\n\t"
14970        $$emit$$"dec     rcx\n\t"
14971        $$emit$$"jge     L_sloop\n\t"
14972        $$emit$$"# L_end:\n\t"
14973     } else {
14974        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
14975     }
14976     $$emit$$"# DONE"
14977   %}
14978   ins_encode %{
14979     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
14980                  $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
14981   %}
14982   ins_pipe(pipe_slow);
14983 %}
14984 
14985 // Large non-constant length ClearArray for non-AVX512 targets.
14986 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
14987                         Universe dummy, rFlagsReg cr)
14988 %{
14989   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
14990   match(Set dummy (ClearArray (Binary cnt base) val));
14991   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
14992 
14993   format %{ $$template
14994     if (UseFastStosb) {
14995        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
14996        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
14997     } else if (UseXMMForObjInit) {
14998        $$emit$$"movdq   $tmp, $val\n\t"
14999        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15000        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15001        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15002        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15003        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15004        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15005        $$emit$$"add     0x40,rax\n\t"
15006        $$emit$$"# L_zero_64_bytes:\n\t"
15007        $$emit$$"sub     0x8,rcx\n\t"
15008        $$emit$$"jge     L_loop\n\t"
15009        $$emit$$"add     0x4,rcx\n\t"
15010        $$emit$$"jl      L_tail\n\t"
15011        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15012        $$emit$$"add     0x20,rax\n\t"
15013        $$emit$$"sub     0x4,rcx\n\t"
15014        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15015        $$emit$$"add     0x4,rcx\n\t"
15016        $$emit$$"jle     L_end\n\t"
15017        $$emit$$"dec     rcx\n\t"
15018        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15019        $$emit$$"vmovq   xmm0,(rax)\n\t"
15020        $$emit$$"add     0x8,rax\n\t"
15021        $$emit$$"dec     rcx\n\t"
15022        $$emit$$"jge     L_sloop\n\t"
15023        $$emit$$"# L_end:\n\t"
15024     } else {
15025        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15026     }
15027   %}
15028   ins_encode %{
15029     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15030                  $tmp$$XMMRegister, true, false);
15031   %}
15032   ins_pipe(pipe_slow);
15033 %}
15034 
15035 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15036                                   Universe dummy, rFlagsReg cr)
15037 %{
15038   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15039   match(Set dummy (ClearArray (Binary cnt base) val));
15040   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15041 
15042   format %{ $$template
15043     if (UseXMMForObjInit) {
15044        $$emit$$"movdq   $tmp, $val\n\t"
15045        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15046        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15047        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15048        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15049        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15050        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15051        $$emit$$"add     0x40,rax\n\t"
15052        $$emit$$"# L_zero_64_bytes:\n\t"
15053        $$emit$$"sub     0x8,rcx\n\t"
15054        $$emit$$"jge     L_loop\n\t"
15055        $$emit$$"add     0x4,rcx\n\t"
15056        $$emit$$"jl      L_tail\n\t"
15057        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15058        $$emit$$"add     0x20,rax\n\t"
15059        $$emit$$"sub     0x4,rcx\n\t"
15060        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15061        $$emit$$"add     0x4,rcx\n\t"
15062        $$emit$$"jle     L_end\n\t"
15063        $$emit$$"dec     rcx\n\t"
15064        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15065        $$emit$$"vmovq   xmm0,(rax)\n\t"
15066        $$emit$$"add     0x8,rax\n\t"
15067        $$emit$$"dec     rcx\n\t"
15068        $$emit$$"jge     L_sloop\n\t"
15069        $$emit$$"# L_end:\n\t"
15070     } else {
15071        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15072     }
15073   %}
15074   ins_encode %{
15075     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15076                  $tmp$$XMMRegister, true, true);
15077   %}
15078   ins_pipe(pipe_slow);
15079 %}
15080 
15081 // Large non-constant length ClearArray for AVX512 targets.
15082 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15083                              Universe dummy, rFlagsReg cr)
15084 %{
15085   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15086   match(Set dummy (ClearArray (Binary cnt base) val));
15087   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15088 
15089   format %{ $$template
15090     if (UseFastStosb) {
15091        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15092        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15093        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15094     } else if (UseXMMForObjInit) {
15095        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15096        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15097        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15098        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15099        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15100        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15101        $$emit$$"add     0x40,rax\n\t"
15102        $$emit$$"# L_zero_64_bytes:\n\t"
15103        $$emit$$"sub     0x8,rcx\n\t"
15104        $$emit$$"jge     L_loop\n\t"
15105        $$emit$$"add     0x4,rcx\n\t"
15106        $$emit$$"jl      L_tail\n\t"
15107        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15108        $$emit$$"add     0x20,rax\n\t"
15109        $$emit$$"sub     0x4,rcx\n\t"
15110        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15111        $$emit$$"add     0x4,rcx\n\t"
15112        $$emit$$"jle     L_end\n\t"
15113        $$emit$$"dec     rcx\n\t"
15114        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15115        $$emit$$"vmovq   xmm0,(rax)\n\t"
15116        $$emit$$"add     0x8,rax\n\t"
15117        $$emit$$"dec     rcx\n\t"
15118        $$emit$$"jge     L_sloop\n\t"
15119        $$emit$$"# L_end:\n\t"
15120     } else {
15121        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15122        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15123     }
15124   %}
15125   ins_encode %{
15126     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15127                  $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
15128   %}
15129   ins_pipe(pipe_slow);
15130 %}
15131 
15132 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15133                                        Universe dummy, rFlagsReg cr)

15134 %{
15135   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15136   match(Set dummy (ClearArray (Binary cnt base) val));
15137   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15138 
15139   format %{ $$template
15140     if (UseFastStosb) {
15141        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15142        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15143        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15144     } else if (UseXMMForObjInit) {
15145        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15146        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15147        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15148        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15149        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15150        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15151        $$emit$$"add     0x40,rax\n\t"
15152        $$emit$$"# L_zero_64_bytes:\n\t"
15153        $$emit$$"sub     0x8,rcx\n\t"
15154        $$emit$$"jge     L_loop\n\t"
15155        $$emit$$"add     0x4,rcx\n\t"
15156        $$emit$$"jl      L_tail\n\t"
15157        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15158        $$emit$$"add     0x20,rax\n\t"
15159        $$emit$$"sub     0x4,rcx\n\t"
15160        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15161        $$emit$$"add     0x4,rcx\n\t"
15162        $$emit$$"jle     L_end\n\t"
15163        $$emit$$"dec     rcx\n\t"
15164        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15165        $$emit$$"vmovq   xmm0,(rax)\n\t"
15166        $$emit$$"add     0x8,rax\n\t"
15167        $$emit$$"dec     rcx\n\t"
15168        $$emit$$"jge     L_sloop\n\t"
15169        $$emit$$"# L_end:\n\t"
15170     } else {
15171        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15172        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15173     }
15174   %}
15175   ins_encode %{
15176     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15177                  $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
15178   %}
15179   ins_pipe(pipe_slow);
15180 %}
15181 
15182 // Small constant length ClearArray for AVX512 targets.
15183 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
15184 %{
15185   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
15186             ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
15187   match(Set dummy (ClearArray (Binary cnt base) val));
15188   ins_cost(100);
15189   effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
15190   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
15191   ins_encode %{
15192     __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15193   %}
15194   ins_pipe(pipe_slow);
15195 %}
15196 
15197 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15198                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15199 %{
15200   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15201   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15202   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15203 
15204   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15205   ins_encode %{
15206     __ string_compare($str1$$Register, $str2$$Register,
15207                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15208                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15209   %}
15210   ins_pipe( pipe_slow );
15211 %}
15212 

17050   effect(USE meth);
17051 
17052   ins_cost(300);
17053   format %{ "call_leaf,runtime " %}
17054   ins_encode(clear_avx, Java_To_Runtime(meth));
17055   ins_pipe(pipe_slow);
17056 %}
17057 
17058 // Call runtime without safepoint and with vector arguments
17059 instruct CallLeafDirectVector(method meth)
17060 %{
17061   match(CallLeafVector);
17062   effect(USE meth);
17063 
17064   ins_cost(300);
17065   format %{ "call_leaf,vector " %}
17066   ins_encode(Java_To_Runtime(meth));
17067   ins_pipe(pipe_slow);
17068 %}
17069 
17070 // Call runtime without safepoint
17071 // entry point is null, target holds the address to call
17072 instruct CallLeafNoFPInDirect(rRegP target)
17073 %{
17074   predicate(n->as_Call()->entry_point() == nullptr);
17075   match(CallLeafNoFP target);
17076 
17077   ins_cost(300);
17078   format %{ "call_leaf_nofp,runtime indirect " %}
17079   ins_encode %{
17080      __ call($target$$Register);
17081   %}
17082 
17083   ins_pipe(pipe_slow);
17084 %}
17085 
17086 // Call runtime without safepoint
17087 instruct CallLeafNoFPDirect(method meth)
17088 %{
17089   predicate(n->as_Call()->entry_point() != nullptr);
17090   match(CallLeafNoFP);
17091   effect(USE meth);
17092 
17093   ins_cost(300);
17094   format %{ "call_leaf_nofp,runtime " %}
17095   ins_encode(clear_avx, Java_To_Runtime(meth));
17096   ins_pipe(pipe_slow);
17097 %}
17098 
17099 // Return Instruction
17100 // Remove the return address & jump to it.
17101 // Notice: We always emit a nop after a ret to make sure there is room
17102 // for safepoint patching
17103 instruct Ret()
17104 %{
17105   match(Return);
17106 
17107   format %{ "ret" %}
17108   ins_encode %{
17109     __ ret(0);
< prev index next >