< prev index next >

src/hotspot/cpu/x86/x86.ad

Print this page

 1632 }
 1633 
 1634 // !!!!! Special hack to get all types of calls to specify the byte offset
 1635 //       from the start of the call to the point where the return address
 1636 //       will point.
 1637 int MachCallStaticJavaNode::ret_addr_offset()
 1638 {
 1639   int offset = 5; // 5 bytes from start of call to where return address points
 1640   offset += clear_avx_size();
 1641   return offset;
 1642 }
 1643 
 1644 int MachCallDynamicJavaNode::ret_addr_offset()
 1645 {
 1646   int offset = 15; // 15 bytes from start of call to where return address points
 1647   offset += clear_avx_size();
 1648   return offset;
 1649 }
 1650 
 1651 int MachCallRuntimeNode::ret_addr_offset() {




 1652   int offset = 13; // movq r10,#addr; callq (r10)
 1653   if (this->ideal_Opcode() != Op_CallLeafVector) {
 1654     offset += clear_avx_size();
 1655   }
 1656   return offset;
 1657 }

 1658 //
 1659 // Compute padding required for nodes which need alignment
 1660 //
 1661 
 1662 // The address of the call instruction needs to be 4-byte aligned to
 1663 // ensure that it does not span a cache line so that it can be patched.
 1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 1665 {
 1666   current_offset += clear_avx_size(); // skip vzeroupper
 1667   current_offset += 1; // skip call opcode byte
 1668   return align_up(current_offset, alignment_required()) - current_offset;
 1669 }
 1670 
 1671 // The address of the call instruction needs to be 4-byte aligned to
 1672 // ensure that it does not span a cache line so that it can be patched.
 1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 1674 {
 1675   current_offset += clear_avx_size(); // skip vzeroupper
 1676   current_offset += 11; // skip movq instruction + call opcode byte
 1677   return align_up(current_offset, alignment_required()) - current_offset;

 1863     st->print("\n\t");
 1864     st->print("# stack alignment check");
 1865 #endif
 1866   }
 1867   if (C->stub_function() != nullptr) {
 1868     st->print("\n\t");
 1869     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
 1870     st->print("\n\t");
 1871     st->print("je      fast_entry\t");
 1872     st->print("\n\t");
 1873     st->print("call    #nmethod_entry_barrier_stub\t");
 1874     st->print("\n\tfast_entry:");
 1875   }
 1876   st->cr();
 1877 }
 1878 #endif
 1879 
 1880 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1881   Compile* C = ra_->C;
 1882 
 1883   int framesize = C->output()->frame_size_in_bytes();
 1884   int bangsize = C->output()->bang_size_in_bytes();
 1885 
 1886   if (C->clinit_barrier_on_entry()) {
 1887     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
 1888     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
 1889 
 1890     Label L_skip_barrier;
 1891     Register klass = rscratch1;
 1892 
 1893     __ mov_metadata(klass, C->method()->holder()->constant_encoding());
 1894     __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
 1895 
 1896     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
 1897 
 1898     __ bind(L_skip_barrier);
 1899   }
 1900 
 1901   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);


 1902 
 1903   C->output()->set_frame_complete(__ offset());
 1904 
 1905   if (C->has_mach_constant_base_node()) {
 1906     // NOTE: We set the table base offset here because users might be
 1907     // emitted before MachConstantBaseNode.
 1908     ConstantTable& constant_table = C->output()->constant_table();
 1909     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1910   }
 1911 }
 1912 
 1913 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 1914 {
 1915   return MachNode::size(ra_); // too many variables; just compute it
 1916                               // the hard way
 1917 }
 1918 
 1919 int MachPrologNode::reloc() const
 1920 {
 1921   return 0; // a large enough number
 1922 }
 1923 
 1924 //=============================================================================
 1925 #ifndef PRODUCT
 1926 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1927 {
 1928   Compile* C = ra_->C;
 1929   if (generate_vzeroupper(C)) {
 1930     st->print("vzeroupper");
 1931     st->cr(); st->print("\t");
 1932   }
 1933 
 1934   int framesize = C->output()->frame_size_in_bytes();
 1935   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1936   // Remove word for return adr already pushed
 1937   // and RBP

 1945   st->print_cr("popq    rbp");
 1946   if (do_polling() && C->is_method_compilation()) {
 1947     st->print("\t");
 1948     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
 1949                  "ja      #safepoint_stub\t"
 1950                  "# Safepoint: poll for GC");
 1951   }
 1952 }
 1953 #endif
 1954 
 1955 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1956 {
 1957   Compile* C = ra_->C;
 1958 
 1959   if (generate_vzeroupper(C)) {
 1960     // Clear upper bits of YMM registers when current compiled code uses
 1961     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1962     __ vzeroupper();
 1963   }
 1964 
 1965   int framesize = C->output()->frame_size_in_bytes();
 1966   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1967   // Remove word for return adr already pushed
 1968   // and RBP
 1969   framesize -= 2*wordSize;
 1970 
 1971   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 1972 
 1973   if (framesize) {
 1974     __ addq(rsp, framesize);
 1975   }
 1976 
 1977   __ popq(rbp);
 1978 
 1979   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1980     __ reserved_stack_check();
 1981   }
 1982 
 1983   if (do_polling() && C->is_method_compilation()) {
 1984     Label dummy_label;
 1985     Label* code_stub = &dummy_label;
 1986     if (!C->output()->in_scratch_emit_size()) {
 1987       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1988       C->output()->add_stub(stub);
 1989       code_stub = &stub->entry();
 1990     }
 1991     __ relocate(relocInfo::poll_return_type);
 1992     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
 1993   }
 1994 }
 1995 
 1996 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
 1997 {
 1998   return MachNode::size(ra_); // too many variables; just compute it
 1999                               // the hard way
 2000 }
 2001 
 2002 int MachEpilogNode::reloc() const
 2003 {
 2004   return 2; // a large enough number
 2005 }
 2006 
 2007 const Pipeline* MachEpilogNode::pipeline() const
 2008 {
 2009   return MachNode::pipeline_class();
 2010 }
 2011 
 2012 //=============================================================================
 2013 
 2014 enum RC {
 2015   rc_bad,
 2016   rc_int,
 2017   rc_kreg,
 2018   rc_float,
 2019   rc_stack
 2020 };
 2021 

 2583 #endif
 2584 
 2585 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2586 {
 2587   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2588   int reg = ra_->get_encode(this);
 2589 
 2590   __ lea(as_Register(reg), Address(rsp, offset));
 2591 }
 2592 
 2593 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 2594 {
 2595   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2596   if (ra_->get_encode(this) > 15) {
 2597     return (offset < 0x80) ? 6 : 9; // REX2
 2598   } else {
 2599     return (offset < 0x80) ? 5 : 8; // REX
 2600   }
 2601 }
 2602 











































 2603 //=============================================================================
 2604 #ifndef PRODUCT
 2605 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2606 {
 2607   if (UseCompressedClassPointers) {
 2608     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2609     st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2610   } else {
 2611     st->print_cr("movq    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2612     st->print_cr("\tcmpq    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2613   }
 2614   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 2615 }
 2616 #endif
 2617 
 2618 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2619 {
 2620   __ ic_check(InteriorEntryAlignment);
 2621 }
 2622 
 2623 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 2624 {
 2625   return MachNode::size(ra_); // too many variables; just compute it
 2626                               // the hard way
 2627 }
 2628 
 2629 
 2630 //=============================================================================
 2631 
 2632 bool Matcher::supports_vector_calling_convention(void) {
 2633   return EnableVectorSupport;
 2634 }
 2635 
 2636 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 2637   assert(EnableVectorSupport, "sanity");
 2638   int lo = XMM0_num;
 2639   int hi = XMM0b_num;
 2640   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 2641   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 2642   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 2643   return OptoRegPair(hi, lo);
 2644 }
 2645 
 2646 // Is this branch offset short enough that a short branch can be used?
 2647 //
 2648 // NOTE: If the platform does not provide any short branch variants, then

 4512     }
 4513     __ post_call_nop();
 4514   %}
 4515 
 4516   enc_class Java_Dynamic_Call(method meth) %{
 4517     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 4518     __ post_call_nop();
 4519   %}
 4520 
 4521   enc_class call_epilog %{
 4522     if (VerifyStackAtCalls) {
 4523       // Check that stack depth is unchanged: find majik cookie on stack
 4524       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
 4525       Label L;
 4526       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
 4527       __ jccb(Assembler::equal, L);
 4528       // Die if stack mismatch
 4529       __ int3();
 4530       __ bind(L);
 4531     }

































 4532   %}
 4533 
 4534 %}
 4535 
 4536 //----------FRAME--------------------------------------------------------------
 4537 // Definition of frame structure and management information.
 4538 //
 4539 //  S T A C K   L A Y O U T    Allocators stack-slot number
 4540 //                             |   (to get allocators register number
 4541 //  G  Owned by    |        |  v    add OptoReg::stack0())
 4542 //  r   CALLER     |        |
 4543 //  o     |        +--------+      pad to even-align allocators stack-slot
 4544 //  w     V        |  pad0  |        numbers; owned by CALLER
 4545 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 4546 //  h     ^        |   in   |  5
 4547 //        |        |  args  |  4   Holes in incoming args owned by SELF
 4548 //  |     |        |        |  3
 4549 //  |     |        +--------+
 4550 //  V     |        | old out|      Empty on Intel, window on Sparc
 4551 //        |    old |preserve|      Must be even aligned.

 5674   %}
 5675 %}
 5676 
 5677 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5678 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 5679 %{
 5680   constraint(ALLOC_IN_RC(ptr_reg));
 5681   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5682   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 5683 
 5684   op_cost(10);
 5685   format %{"[$reg + $off + $idx << $scale]" %}
 5686   interface(MEMORY_INTER) %{
 5687     base($reg);
 5688     index($idx);
 5689     scale($scale);
 5690     disp($off);
 5691   %}
 5692 %}
 5693 
















 5694 // Indirect Narrow Oop Plus Offset Operand
 5695 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 5696 // we can't free r12 even with CompressedOops::base() == nullptr.
 5697 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 5698   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5699   constraint(ALLOC_IN_RC(ptr_reg));
 5700   match(AddP (DecodeN reg) off);
 5701 
 5702   op_cost(10);
 5703   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 5704   interface(MEMORY_INTER) %{
 5705     base(0xc); // R12
 5706     index($reg);
 5707     scale(0x3);
 5708     disp($off);
 5709   %}
 5710 %}
 5711 
 5712 // Indirect Memory Operand
 5713 operand indirectNarrow(rRegN reg)

 6150 %}
 6151 
 6152 // Replaces legVec during post-selection cleanup. See above.
 6153 operand legVecZ() %{
 6154   constraint(ALLOC_IN_RC(vectorz_reg_legacy));
 6155   match(VecZ);
 6156 
 6157   format %{ %}
 6158   interface(REG_INTER);
 6159 %}
 6160 
 6161 //----------OPERAND CLASSES----------------------------------------------------
 6162 // Operand Classes are groups of operands that are used as to simplify
 6163 // instruction definitions by not requiring the AD writer to specify separate
 6164 // instructions for every form of operand when the instruction accepts
 6165 // multiple operand types with the same basic encoding and format.  The classic
 6166 // case of this is memory operands.
 6167 
 6168 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 6169                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 6170                indCompressedOopOffset,
 6171                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 6172                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 6173                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 6174 
 6175 //----------PIPELINE-----------------------------------------------------------
 6176 // Rules which define the behavior of the target architectures pipeline.
 6177 pipeline %{
 6178 
 6179 //----------ATTRIBUTES---------------------------------------------------------
 6180 attributes %{
 6181   variable_size_instructions;        // Fixed size instructions
 6182   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 6183   instruction_unit_size = 1;         // An instruction is 1 bytes long
 6184   instruction_fetch_unit_size = 16;  // The processor fetches one line
 6185   instruction_fetch_units = 1;       // of 16 bytes
 6186 %}
 6187 
 6188 //----------RESOURCES----------------------------------------------------------
 6189 // Resources are the functional units available to the machine
 6190 

 8748   format %{ "MEMBAR-storestore (empty encoding)" %}
 8749   ins_encode( );
 8750   ins_pipe(empty);
 8751 %}
 8752 
 8753 //----------Move Instructions--------------------------------------------------
 8754 
 8755 instruct castX2P(rRegP dst, rRegL src)
 8756 %{
 8757   match(Set dst (CastX2P src));
 8758 
 8759   format %{ "movq    $dst, $src\t# long->ptr" %}
 8760   ins_encode %{
 8761     if ($dst$$reg != $src$$reg) {
 8762       __ movptr($dst$$Register, $src$$Register);
 8763     }
 8764   %}
 8765   ins_pipe(ialu_reg_reg); // XXX
 8766 %}
 8767 


























 8768 instruct castP2X(rRegL dst, rRegP src)
 8769 %{
 8770   match(Set dst (CastP2X src));
 8771 
 8772   format %{ "movq    $dst, $src\t# ptr -> long" %}
 8773   ins_encode %{
 8774     if ($dst$$reg != $src$$reg) {
 8775       __ movptr($dst$$Register, $src$$Register);
 8776     }
 8777   %}
 8778   ins_pipe(ialu_reg_reg); // XXX
 8779 %}
 8780 
 8781 // Convert oop into int for vectors alignment masking
 8782 instruct convP2I(rRegI dst, rRegP src)
 8783 %{
 8784   match(Set dst (ConvL2I (CastP2X src)));
 8785 
 8786   format %{ "movl    $dst, $src\t# ptr -> int" %}
 8787   ins_encode %{

14971   effect(DEF dst, USE src);
14972   ins_cost(100);
14973   format %{ "movd    $dst,$src\t# MoveI2F" %}
14974   ins_encode %{
14975     __ movdl($dst$$XMMRegister, $src$$Register);
14976   %}
14977   ins_pipe( pipe_slow );
14978 %}
14979 
14980 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
14981   match(Set dst (MoveL2D src));
14982   effect(DEF dst, USE src);
14983   ins_cost(100);
14984   format %{ "movd    $dst,$src\t# MoveL2D" %}
14985   ins_encode %{
14986      __ movdq($dst$$XMMRegister, $src$$Register);
14987   %}
14988   ins_pipe( pipe_slow );
14989 %}
14990 

14991 // Fast clearing of an array
14992 // Small non-constant lenght ClearArray for non-AVX512 targets.
14993 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
14994                   Universe dummy, rFlagsReg cr)
14995 %{
14996   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
14997   match(Set dummy (ClearArray cnt base));
14998   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);





















































































































14999 
15000   format %{ $$template
15001     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15002     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15003     $$emit$$"jg      LARGE\n\t"
15004     $$emit$$"dec     rcx\n\t"
15005     $$emit$$"js      DONE\t# Zero length\n\t"
15006     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15007     $$emit$$"dec     rcx\n\t"
15008     $$emit$$"jge     LOOP\n\t"
15009     $$emit$$"jmp     DONE\n\t"
15010     $$emit$$"# LARGE:\n\t"
15011     if (UseFastStosb) {
15012        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15013        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15014     } else if (UseXMMForObjInit) {
15015        $$emit$$"mov     rdi,rax\n\t"
15016        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15017        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15018        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

15026        $$emit$$"jl      L_tail\n\t"
15027        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15028        $$emit$$"add     0x20,rax\n\t"
15029        $$emit$$"sub     0x4,rcx\n\t"
15030        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15031        $$emit$$"add     0x4,rcx\n\t"
15032        $$emit$$"jle     L_end\n\t"
15033        $$emit$$"dec     rcx\n\t"
15034        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15035        $$emit$$"vmovq   xmm0,(rax)\n\t"
15036        $$emit$$"add     0x8,rax\n\t"
15037        $$emit$$"dec     rcx\n\t"
15038        $$emit$$"jge     L_sloop\n\t"
15039        $$emit$$"# L_end:\n\t"
15040     } else {
15041        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15042     }
15043     $$emit$$"# DONE"
15044   %}
15045   ins_encode %{
15046     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15047                  $tmp$$XMMRegister, false, knoreg);
15048   %}
15049   ins_pipe(pipe_slow);
15050 %}
15051 
15052 // Small non-constant length ClearArray for AVX512 targets.
15053 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15054                        Universe dummy, rFlagsReg cr)
15055 %{
15056   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15057   match(Set dummy (ClearArray cnt base));
15058   ins_cost(125);
15059   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15060 
15061   format %{ $$template
15062     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15063     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15064     $$emit$$"jg      LARGE\n\t"
15065     $$emit$$"dec     rcx\n\t"
15066     $$emit$$"js      DONE\t# Zero length\n\t"
15067     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15068     $$emit$$"dec     rcx\n\t"
15069     $$emit$$"jge     LOOP\n\t"
15070     $$emit$$"jmp     DONE\n\t"
15071     $$emit$$"# LARGE:\n\t"
15072     if (UseFastStosb) {
15073        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15074        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15075     } else if (UseXMMForObjInit) {
15076        $$emit$$"mov     rdi,rax\n\t"
15077        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15078        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15079        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

15087        $$emit$$"jl      L_tail\n\t"
15088        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15089        $$emit$$"add     0x20,rax\n\t"
15090        $$emit$$"sub     0x4,rcx\n\t"
15091        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15092        $$emit$$"add     0x4,rcx\n\t"
15093        $$emit$$"jle     L_end\n\t"
15094        $$emit$$"dec     rcx\n\t"
15095        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15096        $$emit$$"vmovq   xmm0,(rax)\n\t"
15097        $$emit$$"add     0x8,rax\n\t"
15098        $$emit$$"dec     rcx\n\t"
15099        $$emit$$"jge     L_sloop\n\t"
15100        $$emit$$"# L_end:\n\t"
15101     } else {
15102        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15103     }
15104     $$emit$$"# DONE"
15105   %}
15106   ins_encode %{
15107     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15108                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
15109   %}
15110   ins_pipe(pipe_slow);
15111 %}
15112 
15113 // Large non-constant length ClearArray for non-AVX512 targets.
15114 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15115                         Universe dummy, rFlagsReg cr)
15116 %{
15117   predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15118   match(Set dummy (ClearArray cnt base));
15119   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
































































































15120 
15121   format %{ $$template
15122     if (UseFastStosb) {
15123        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15124        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15125        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15126     } else if (UseXMMForObjInit) {
15127        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15128        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15129        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15130        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15131        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15132        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15133        $$emit$$"add     0x40,rax\n\t"
15134        $$emit$$"# L_zero_64_bytes:\n\t"
15135        $$emit$$"sub     0x8,rcx\n\t"
15136        $$emit$$"jge     L_loop\n\t"
15137        $$emit$$"add     0x4,rcx\n\t"
15138        $$emit$$"jl      L_tail\n\t"
15139        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15140        $$emit$$"add     0x20,rax\n\t"
15141        $$emit$$"sub     0x4,rcx\n\t"
15142        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15143        $$emit$$"add     0x4,rcx\n\t"
15144        $$emit$$"jle     L_end\n\t"
15145        $$emit$$"dec     rcx\n\t"
15146        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15147        $$emit$$"vmovq   xmm0,(rax)\n\t"
15148        $$emit$$"add     0x8,rax\n\t"
15149        $$emit$$"dec     rcx\n\t"
15150        $$emit$$"jge     L_sloop\n\t"
15151        $$emit$$"# L_end:\n\t"
15152     } else {
15153        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15154        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15155     }
15156   %}
15157   ins_encode %{
15158     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15159                  $tmp$$XMMRegister, true, knoreg);
15160   %}
15161   ins_pipe(pipe_slow);
15162 %}
15163 
15164 // Large non-constant length ClearArray for AVX512 targets.
15165 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15166                              Universe dummy, rFlagsReg cr)
15167 %{
15168   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15169   match(Set dummy (ClearArray cnt base));
15170   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15171 
15172   format %{ $$template
15173     if (UseFastStosb) {
15174        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15175        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15176        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15177     } else if (UseXMMForObjInit) {
15178        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15179        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15180        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15181        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15182        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15183        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15184        $$emit$$"add     0x40,rax\n\t"
15185        $$emit$$"# L_zero_64_bytes:\n\t"
15186        $$emit$$"sub     0x8,rcx\n\t"
15187        $$emit$$"jge     L_loop\n\t"
15188        $$emit$$"add     0x4,rcx\n\t"
15189        $$emit$$"jl      L_tail\n\t"
15190        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15191        $$emit$$"add     0x20,rax\n\t"
15192        $$emit$$"sub     0x4,rcx\n\t"
15193        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15194        $$emit$$"add     0x4,rcx\n\t"
15195        $$emit$$"jle     L_end\n\t"
15196        $$emit$$"dec     rcx\n\t"
15197        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15198        $$emit$$"vmovq   xmm0,(rax)\n\t"
15199        $$emit$$"add     0x8,rax\n\t"
15200        $$emit$$"dec     rcx\n\t"
15201        $$emit$$"jge     L_sloop\n\t"
15202        $$emit$$"# L_end:\n\t"
15203     } else {
15204        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15205        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15206     }
15207   %}
15208   ins_encode %{
15209     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15210                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
15211   %}
15212   ins_pipe(pipe_slow);
15213 %}
15214 
15215 // Small constant length ClearArray for AVX512 targets.
15216 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15217 %{
15218   predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15219   match(Set dummy (ClearArray cnt base));

15220   ins_cost(100);
15221   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15222   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
15223   ins_encode %{
15224    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15225   %}
15226   ins_pipe(pipe_slow);
15227 %}
15228 
15229 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15230                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15231 %{
15232   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15233   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15234   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15235 
15236   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15237   ins_encode %{
15238     __ string_compare($str1$$Register, $str2$$Register,
15239                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15240                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15241   %}
15242   ins_pipe( pipe_slow );
15243 %}
15244 

17049   effect(USE meth);
17050 
17051   ins_cost(300);
17052   format %{ "call_leaf,runtime " %}
17053   ins_encode(clear_avx, Java_To_Runtime(meth));
17054   ins_pipe(pipe_slow);
17055 %}
17056 
17057 // Call runtime without safepoint and with vector arguments
17058 instruct CallLeafDirectVector(method meth)
17059 %{
17060   match(CallLeafVector);
17061   effect(USE meth);
17062 
17063   ins_cost(300);
17064   format %{ "call_leaf,vector " %}
17065   ins_encode(Java_To_Runtime(meth));
17066   ins_pipe(pipe_slow);
17067 %}
17068 
















17069 // Call runtime without safepoint
17070 instruct CallLeafNoFPDirect(method meth)
17071 %{

17072   match(CallLeafNoFP);
17073   effect(USE meth);
17074 
17075   ins_cost(300);
17076   format %{ "call_leaf_nofp,runtime " %}
17077   ins_encode(clear_avx, Java_To_Runtime(meth));
17078   ins_pipe(pipe_slow);
17079 %}
17080 
17081 // Return Instruction
17082 // Remove the return address & jump to it.
17083 // Notice: We always emit a nop after a ret to make sure there is room
17084 // for safepoint patching
17085 instruct Ret()
17086 %{
17087   match(Return);
17088 
17089   format %{ "ret" %}
17090   ins_encode %{
17091     __ ret(0);

 1632 }
 1633 
 1634 // !!!!! Special hack to get all types of calls to specify the byte offset
 1635 //       from the start of the call to the point where the return address
 1636 //       will point.
 1637 int MachCallStaticJavaNode::ret_addr_offset()
 1638 {
 1639   int offset = 5; // 5 bytes from start of call to where return address points
 1640   offset += clear_avx_size();
 1641   return offset;
 1642 }
 1643 
 1644 int MachCallDynamicJavaNode::ret_addr_offset()
 1645 {
 1646   int offset = 15; // 15 bytes from start of call to where return address points
 1647   offset += clear_avx_size();
 1648   return offset;
 1649 }
 1650 
 1651 int MachCallRuntimeNode::ret_addr_offset() {
 1652   if (_entry_point == nullptr) {
 1653     // CallLeafNoFPInDirect
 1654     return 3; // callq (register)
 1655   }
 1656   int offset = 13; // movq r10,#addr; callq (r10)
 1657   if (this->ideal_Opcode() != Op_CallLeafVector) {
 1658     offset += clear_avx_size();
 1659   }
 1660   return offset;
 1661 }
 1662 
 1663 //
 1664 // Compute padding required for nodes which need alignment
 1665 //
 1666 
 1667 // The address of the call instruction needs to be 4-byte aligned to
 1668 // ensure that it does not span a cache line so that it can be patched.
 1669 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 1670 {
 1671   current_offset += clear_avx_size(); // skip vzeroupper
 1672   current_offset += 1; // skip call opcode byte
 1673   return align_up(current_offset, alignment_required()) - current_offset;
 1674 }
 1675 
 1676 // The address of the call instruction needs to be 4-byte aligned to
 1677 // ensure that it does not span a cache line so that it can be patched.
 1678 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 1679 {
 1680   current_offset += clear_avx_size(); // skip vzeroupper
 1681   current_offset += 11; // skip movq instruction + call opcode byte
 1682   return align_up(current_offset, alignment_required()) - current_offset;

 1868     st->print("\n\t");
 1869     st->print("# stack alignment check");
 1870 #endif
 1871   }
 1872   if (C->stub_function() != nullptr) {
 1873     st->print("\n\t");
 1874     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
 1875     st->print("\n\t");
 1876     st->print("je      fast_entry\t");
 1877     st->print("\n\t");
 1878     st->print("call    #nmethod_entry_barrier_stub\t");
 1879     st->print("\n\tfast_entry:");
 1880   }
 1881   st->cr();
 1882 }
 1883 #endif
 1884 
 1885 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1886   Compile* C = ra_->C;
 1887 
 1888   __ verified_entry(C);





 1889 
 1890   if (ra_->C->stub_function() == nullptr) {
 1891     __ entry_barrier();







 1892   }
 1893 
 1894   if (!Compile::current()->output()->in_scratch_emit_size()) {
 1895     __ bind(*_verified_entry);
 1896   }
 1897 
 1898   C->output()->set_frame_complete(__ offset());
 1899 
 1900   if (C->has_mach_constant_base_node()) {
 1901     // NOTE: We set the table base offset here because users might be
 1902     // emitted before MachConstantBaseNode.
 1903     ConstantTable& constant_table = C->output()->constant_table();
 1904     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1905   }
 1906 }
 1907 





 1908 
 1909 int MachPrologNode::reloc() const
 1910 {
 1911   return 0; // a large enough number
 1912 }
 1913 
 1914 //=============================================================================
 1915 #ifndef PRODUCT
 1916 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1917 {
 1918   Compile* C = ra_->C;
 1919   if (generate_vzeroupper(C)) {
 1920     st->print("vzeroupper");
 1921     st->cr(); st->print("\t");
 1922   }
 1923 
 1924   int framesize = C->output()->frame_size_in_bytes();
 1925   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1926   // Remove word for return adr already pushed
 1927   // and RBP

 1935   st->print_cr("popq    rbp");
 1936   if (do_polling() && C->is_method_compilation()) {
 1937     st->print("\t");
 1938     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
 1939                  "ja      #safepoint_stub\t"
 1940                  "# Safepoint: poll for GC");
 1941   }
 1942 }
 1943 #endif
 1944 
 1945 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1946 {
 1947   Compile* C = ra_->C;
 1948 
 1949   if (generate_vzeroupper(C)) {
 1950     // Clear upper bits of YMM registers when current compiled code uses
 1951     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1952     __ vzeroupper();
 1953   }
 1954 
 1955   // Subtract two words to account for return address and rbp
 1956   int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
 1957   __ remove_frame(initial_framesize, C->needs_stack_repair());










 1958 
 1959   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1960     __ reserved_stack_check();
 1961   }
 1962 
 1963   if (do_polling() && C->is_method_compilation()) {
 1964     Label dummy_label;
 1965     Label* code_stub = &dummy_label;
 1966     if (!C->output()->in_scratch_emit_size()) {
 1967       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1968       C->output()->add_stub(stub);
 1969       code_stub = &stub->entry();
 1970     }
 1971     __ relocate(relocInfo::poll_return_type);
 1972     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
 1973   }
 1974 }
 1975 






 1976 int MachEpilogNode::reloc() const
 1977 {
 1978   return 2; // a large enough number
 1979 }
 1980 
 1981 const Pipeline* MachEpilogNode::pipeline() const
 1982 {
 1983   return MachNode::pipeline_class();
 1984 }
 1985 
 1986 //=============================================================================
 1987 
 1988 enum RC {
 1989   rc_bad,
 1990   rc_int,
 1991   rc_kreg,
 1992   rc_float,
 1993   rc_stack
 1994 };
 1995 

 2557 #endif
 2558 
 2559 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2560 {
 2561   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2562   int reg = ra_->get_encode(this);
 2563 
 2564   __ lea(as_Register(reg), Address(rsp, offset));
 2565 }
 2566 
 2567 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 2568 {
 2569   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2570   if (ra_->get_encode(this) > 15) {
 2571     return (offset < 0x80) ? 6 : 9; // REX2
 2572   } else {
 2573     return (offset < 0x80) ? 5 : 8; // REX
 2574   }
 2575 }
 2576 
 2577 //=============================================================================
 2578 #ifndef PRODUCT
 2579 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2580 {
 2581   st->print_cr("MachVEPNode");
 2582 }
 2583 #endif
 2584 
 2585 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2586 {
 2587   CodeBuffer* cbuf = masm->code();
 2588   uint insts_size = cbuf->insts_size();
 2589   if (!_verified) {
 2590     __ ic_check(1);
 2591   } else {
 2592     // TODO 8284443 Avoid creation of temporary frame
 2593     if (ra_->C->stub_function() == nullptr) {
 2594       __ verified_entry(ra_->C, 0);
 2595       __ entry_barrier();
 2596       int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
 2597       __ remove_frame(initial_framesize, false);
 2598     }
 2599     // Unpack inline type args passed as oop and then jump to
 2600     // the verified entry point (skipping the unverified entry).
 2601     int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
 2602     // Emit code for verified entry and save increment for stack repair on return
 2603     __ verified_entry(ra_->C, sp_inc);
 2604     if (Compile::current()->output()->in_scratch_emit_size()) {
 2605       Label dummy_verified_entry;
 2606       __ jmp(dummy_verified_entry);
 2607     } else {
 2608       __ jmp(*_verified_entry);
 2609     }
 2610   }
 2611   /* WARNING these NOPs are critical so that verified entry point is properly
 2612      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
 2613   int nops_cnt = 4 - ((cbuf->insts_size() - insts_size) & 0x3);
 2614   nops_cnt &= 0x3; // Do not add nops if code is aligned.
 2615   if (nops_cnt > 0) {
 2616     __ nop(nops_cnt);
 2617   }
 2618 }
 2619 
 2620 //=============================================================================
 2621 #ifndef PRODUCT
 2622 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2623 {
 2624   if (UseCompressedClassPointers) {
 2625     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2626     st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2627   } else {
 2628     st->print_cr("movq    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2629     st->print_cr("\tcmpq    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2630   }
 2631   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 2632 }
 2633 #endif
 2634 
 2635 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2636 {
 2637   __ ic_check(InteriorEntryAlignment);
 2638 }
 2639 






 2640 
 2641 //=============================================================================
 2642 
 2643 bool Matcher::supports_vector_calling_convention(void) {
 2644   return EnableVectorSupport;
 2645 }
 2646 
 2647 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 2648   assert(EnableVectorSupport, "sanity");
 2649   int lo = XMM0_num;
 2650   int hi = XMM0b_num;
 2651   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 2652   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 2653   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 2654   return OptoRegPair(hi, lo);
 2655 }
 2656 
 2657 // Is this branch offset short enough that a short branch can be used?
 2658 //
 2659 // NOTE: If the platform does not provide any short branch variants, then

 4523     }
 4524     __ post_call_nop();
 4525   %}
 4526 
 4527   enc_class Java_Dynamic_Call(method meth) %{
 4528     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 4529     __ post_call_nop();
 4530   %}
 4531 
 4532   enc_class call_epilog %{
 4533     if (VerifyStackAtCalls) {
 4534       // Check that stack depth is unchanged: find majik cookie on stack
 4535       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
 4536       Label L;
 4537       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
 4538       __ jccb(Assembler::equal, L);
 4539       // Die if stack mismatch
 4540       __ int3();
 4541       __ bind(L);
 4542     }
 4543     if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) {
 4544       // The last return value is not set by the callee but used to pass the null marker to compiled code.
 4545       // Search for the corresponding projection, get the register and emit code that initialized it.
 4546       uint con = (tf()->range_cc()->cnt() - 1);
 4547       for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
 4548         ProjNode* proj = fast_out(i)->as_Proj();
 4549         if (proj->_con == con) {
 4550           // Set null marker if rax is non-null (a non-null value is returned buffered or scalarized)
 4551           OptoReg::Name optoReg = ra_->get_reg_first(proj);
 4552           VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
 4553           Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
 4554           __ testq(rax, rax);
 4555           __ setb(Assembler::notZero, toReg);
 4556           __ movzbl(toReg, toReg);
 4557           if (reg->is_stack()) {
 4558             int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
 4559             __ movq(Address(rsp, st_off), toReg);
 4560           }
 4561           break;
 4562         }
 4563       }
 4564       if (return_value_is_used()) {
 4565         // An inline type is returned as fields in multiple registers.
 4566         // Rax either contains an oop if the inline type is buffered or a pointer
 4567         // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax
 4568         // if the lowest bit is set to allow C2 to use the oop after null checking.
 4569         // rax &= (rax & 1) - 1
 4570         __ movptr(rscratch1, rax);
 4571         __ andptr(rscratch1, 0x1);
 4572         __ subptr(rscratch1, 0x1);
 4573         __ andptr(rax, rscratch1);
 4574       }
 4575     }
 4576   %}
 4577 
 4578 %}
 4579 
 4580 //----------FRAME--------------------------------------------------------------
 4581 // Definition of frame structure and management information.
 4582 //
 4583 //  S T A C K   L A Y O U T    Allocators stack-slot number
 4584 //                             |   (to get allocators register number
 4585 //  G  Owned by    |        |  v    add OptoReg::stack0())
 4586 //  r   CALLER     |        |
 4587 //  o     |        +--------+      pad to even-align allocators stack-slot
 4588 //  w     V        |  pad0  |        numbers; owned by CALLER
 4589 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 4590 //  h     ^        |   in   |  5
 4591 //        |        |  args  |  4   Holes in incoming args owned by SELF
 4592 //  |     |        |        |  3
 4593 //  |     |        +--------+
 4594 //  V     |        | old out|      Empty on Intel, window on Sparc
 4595 //        |    old |preserve|      Must be even aligned.

 5718   %}
 5719 %}
 5720 
 5721 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5722 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 5723 %{
 5724   constraint(ALLOC_IN_RC(ptr_reg));
 5725   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5726   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 5727 
 5728   op_cost(10);
 5729   format %{"[$reg + $off + $idx << $scale]" %}
 5730   interface(MEMORY_INTER) %{
 5731     base($reg);
 5732     index($idx);
 5733     scale($scale);
 5734     disp($off);
 5735   %}
 5736 %}
 5737 
 5738 // Indirect Narrow Oop Operand
 5739 operand indCompressedOop(rRegN reg) %{
 5740   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5741   constraint(ALLOC_IN_RC(ptr_reg));
 5742   match(DecodeN reg);
 5743 
 5744   op_cost(10);
 5745   format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
 5746   interface(MEMORY_INTER) %{
 5747     base(0xc); // R12
 5748     index($reg);
 5749     scale(0x3);
 5750     disp(0x0);
 5751   %}
 5752 %}
 5753 
 5754 // Indirect Narrow Oop Plus Offset Operand
 5755 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 5756 // we can't free r12 even with CompressedOops::base() == nullptr.
 5757 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 5758   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5759   constraint(ALLOC_IN_RC(ptr_reg));
 5760   match(AddP (DecodeN reg) off);
 5761 
 5762   op_cost(10);
 5763   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 5764   interface(MEMORY_INTER) %{
 5765     base(0xc); // R12
 5766     index($reg);
 5767     scale(0x3);
 5768     disp($off);
 5769   %}
 5770 %}
 5771 
 5772 // Indirect Memory Operand
 5773 operand indirectNarrow(rRegN reg)

 6210 %}
 6211 
 6212 // Replaces legVec during post-selection cleanup. See above.
 6213 operand legVecZ() %{
 6214   constraint(ALLOC_IN_RC(vectorz_reg_legacy));
 6215   match(VecZ);
 6216 
 6217   format %{ %}
 6218   interface(REG_INTER);
 6219 %}
 6220 
 6221 //----------OPERAND CLASSES----------------------------------------------------
 6222 // Operand Classes are groups of operands that are used as to simplify
 6223 // instruction definitions by not requiring the AD writer to specify separate
 6224 // instructions for every form of operand when the instruction accepts
 6225 // multiple operand types with the same basic encoding and format.  The classic
 6226 // case of this is memory operands.
 6227 
 6228 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 6229                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 6230                indCompressedOop, indCompressedOopOffset,
 6231                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 6232                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 6233                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 6234 
 6235 //----------PIPELINE-----------------------------------------------------------
 6236 // Rules which define the behavior of the target architectures pipeline.
 6237 pipeline %{
 6238 
 6239 //----------ATTRIBUTES---------------------------------------------------------
 6240 attributes %{
 6241   variable_size_instructions;        // Fixed size instructions
 6242   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 6243   instruction_unit_size = 1;         // An instruction is 1 bytes long
 6244   instruction_fetch_unit_size = 16;  // The processor fetches one line
 6245   instruction_fetch_units = 1;       // of 16 bytes
 6246 %}
 6247 
 6248 //----------RESOURCES----------------------------------------------------------
 6249 // Resources are the functional units available to the machine
 6250 

 8808   format %{ "MEMBAR-storestore (empty encoding)" %}
 8809   ins_encode( );
 8810   ins_pipe(empty);
 8811 %}
 8812 
 8813 //----------Move Instructions--------------------------------------------------
 8814 
 8815 instruct castX2P(rRegP dst, rRegL src)
 8816 %{
 8817   match(Set dst (CastX2P src));
 8818 
 8819   format %{ "movq    $dst, $src\t# long->ptr" %}
 8820   ins_encode %{
 8821     if ($dst$$reg != $src$$reg) {
 8822       __ movptr($dst$$Register, $src$$Register);
 8823     }
 8824   %}
 8825   ins_pipe(ialu_reg_reg); // XXX
 8826 %}
 8827 
 8828 instruct castI2N(rRegN dst, rRegI src)
 8829 %{
 8830   match(Set dst (CastI2N src));
 8831 
 8832   format %{ "movq    $dst, $src\t# int -> narrow ptr" %}
 8833   ins_encode %{
 8834     if ($dst$$reg != $src$$reg) {
 8835       __ movl($dst$$Register, $src$$Register);
 8836     }
 8837   %}
 8838   ins_pipe(ialu_reg_reg); // XXX
 8839 %}
 8840 
 8841 instruct castN2X(rRegL dst, rRegN src)
 8842 %{
 8843   match(Set dst (CastP2X src));
 8844 
 8845   format %{ "movq    $dst, $src\t# ptr -> long" %}
 8846   ins_encode %{
 8847     if ($dst$$reg != $src$$reg) {
 8848       __ movptr($dst$$Register, $src$$Register);
 8849     }
 8850   %}
 8851   ins_pipe(ialu_reg_reg); // XXX
 8852 %}
 8853 
 8854 instruct castP2X(rRegL dst, rRegP src)
 8855 %{
 8856   match(Set dst (CastP2X src));
 8857 
 8858   format %{ "movq    $dst, $src\t# ptr -> long" %}
 8859   ins_encode %{
 8860     if ($dst$$reg != $src$$reg) {
 8861       __ movptr($dst$$Register, $src$$Register);
 8862     }
 8863   %}
 8864   ins_pipe(ialu_reg_reg); // XXX
 8865 %}
 8866 
 8867 // Convert oop into int for vectors alignment masking
 8868 instruct convP2I(rRegI dst, rRegP src)
 8869 %{
 8870   match(Set dst (ConvL2I (CastP2X src)));
 8871 
 8872   format %{ "movl    $dst, $src\t# ptr -> int" %}
 8873   ins_encode %{

15057   effect(DEF dst, USE src);
15058   ins_cost(100);
15059   format %{ "movd    $dst,$src\t# MoveI2F" %}
15060   ins_encode %{
15061     __ movdl($dst$$XMMRegister, $src$$Register);
15062   %}
15063   ins_pipe( pipe_slow );
15064 %}
15065 
15066 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15067   match(Set dst (MoveL2D src));
15068   effect(DEF dst, USE src);
15069   ins_cost(100);
15070   format %{ "movd    $dst,$src\t# MoveL2D" %}
15071   ins_encode %{
15072      __ movdq($dst$$XMMRegister, $src$$Register);
15073   %}
15074   ins_pipe( pipe_slow );
15075 %}
15076 
15077 
15078 // Fast clearing of an array
15079 // Small non-constant lenght ClearArray for non-AVX512 targets.
15080 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15081                   Universe dummy, rFlagsReg cr)
15082 %{
15083   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15084   match(Set dummy (ClearArray (Binary cnt base) val));
15085   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15086 
15087   format %{ $$template
15088     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15089     $$emit$$"jg      LARGE\n\t"
15090     $$emit$$"dec     rcx\n\t"
15091     $$emit$$"js      DONE\t# Zero length\n\t"
15092     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15093     $$emit$$"dec     rcx\n\t"
15094     $$emit$$"jge     LOOP\n\t"
15095     $$emit$$"jmp     DONE\n\t"
15096     $$emit$$"# LARGE:\n\t"
15097     if (UseFastStosb) {
15098        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15099        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15100     } else if (UseXMMForObjInit) {
15101        $$emit$$"movdq   $tmp, $val\n\t"
15102        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15103        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15104        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15105        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15106        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15107        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15108        $$emit$$"add     0x40,rax\n\t"
15109        $$emit$$"# L_zero_64_bytes:\n\t"
15110        $$emit$$"sub     0x8,rcx\n\t"
15111        $$emit$$"jge     L_loop\n\t"
15112        $$emit$$"add     0x4,rcx\n\t"
15113        $$emit$$"jl      L_tail\n\t"
15114        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15115        $$emit$$"add     0x20,rax\n\t"
15116        $$emit$$"sub     0x4,rcx\n\t"
15117        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15118        $$emit$$"add     0x4,rcx\n\t"
15119        $$emit$$"jle     L_end\n\t"
15120        $$emit$$"dec     rcx\n\t"
15121        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15122        $$emit$$"vmovq   xmm0,(rax)\n\t"
15123        $$emit$$"add     0x8,rax\n\t"
15124        $$emit$$"dec     rcx\n\t"
15125        $$emit$$"jge     L_sloop\n\t"
15126        $$emit$$"# L_end:\n\t"
15127     } else {
15128        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15129     }
15130     $$emit$$"# DONE"
15131   %}
15132   ins_encode %{
15133     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15134                  $tmp$$XMMRegister, false, false);
15135   %}
15136   ins_pipe(pipe_slow);
15137 %}
15138 
15139 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15140                             Universe dummy, rFlagsReg cr)
15141 %{
15142   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15143   match(Set dummy (ClearArray (Binary cnt base) val));
15144   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15145 
15146   format %{ $$template
15147     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15148     $$emit$$"jg      LARGE\n\t"
15149     $$emit$$"dec     rcx\n\t"
15150     $$emit$$"js      DONE\t# Zero length\n\t"
15151     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15152     $$emit$$"dec     rcx\n\t"
15153     $$emit$$"jge     LOOP\n\t"
15154     $$emit$$"jmp     DONE\n\t"
15155     $$emit$$"# LARGE:\n\t"
15156     if (UseXMMForObjInit) {
15157        $$emit$$"movdq   $tmp, $val\n\t"
15158        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15159        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15160        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15161        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15162        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15163        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15164        $$emit$$"add     0x40,rax\n\t"
15165        $$emit$$"# L_zero_64_bytes:\n\t"
15166        $$emit$$"sub     0x8,rcx\n\t"
15167        $$emit$$"jge     L_loop\n\t"
15168        $$emit$$"add     0x4,rcx\n\t"
15169        $$emit$$"jl      L_tail\n\t"
15170        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15171        $$emit$$"add     0x20,rax\n\t"
15172        $$emit$$"sub     0x4,rcx\n\t"
15173        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15174        $$emit$$"add     0x4,rcx\n\t"
15175        $$emit$$"jle     L_end\n\t"
15176        $$emit$$"dec     rcx\n\t"
15177        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15178        $$emit$$"vmovq   xmm0,(rax)\n\t"
15179        $$emit$$"add     0x8,rax\n\t"
15180        $$emit$$"dec     rcx\n\t"
15181        $$emit$$"jge     L_sloop\n\t"
15182        $$emit$$"# L_end:\n\t"
15183     } else {
15184        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15185     }
15186     $$emit$$"# DONE"
15187   %}
15188   ins_encode %{
15189     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15190                  $tmp$$XMMRegister, false, true);
15191   %}
15192   ins_pipe(pipe_slow);
15193 %}
15194 
15195 // Small non-constant length ClearArray for AVX512 targets.
15196 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15197                        Universe dummy, rFlagsReg cr)
15198 %{
15199   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15200   match(Set dummy (ClearArray (Binary cnt base) val));
15201   ins_cost(125);
15202   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15203 
15204   format %{ $$template
15205     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15206     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15207     $$emit$$"jg      LARGE\n\t"
15208     $$emit$$"dec     rcx\n\t"
15209     $$emit$$"js      DONE\t# Zero length\n\t"
15210     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15211     $$emit$$"dec     rcx\n\t"
15212     $$emit$$"jge     LOOP\n\t"
15213     $$emit$$"jmp     DONE\n\t"
15214     $$emit$$"# LARGE:\n\t"
15215     if (UseFastStosb) {
15216        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15217        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15218     } else if (UseXMMForObjInit) {
15219        $$emit$$"mov     rdi,rax\n\t"
15220        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15221        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15222        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

15230        $$emit$$"jl      L_tail\n\t"
15231        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15232        $$emit$$"add     0x20,rax\n\t"
15233        $$emit$$"sub     0x4,rcx\n\t"
15234        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15235        $$emit$$"add     0x4,rcx\n\t"
15236        $$emit$$"jle     L_end\n\t"
15237        $$emit$$"dec     rcx\n\t"
15238        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15239        $$emit$$"vmovq   xmm0,(rax)\n\t"
15240        $$emit$$"add     0x8,rax\n\t"
15241        $$emit$$"dec     rcx\n\t"
15242        $$emit$$"jge     L_sloop\n\t"
15243        $$emit$$"# L_end:\n\t"
15244     } else {
15245        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15246     }
15247     $$emit$$"# DONE"
15248   %}
15249   ins_encode %{
15250     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15251                  $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
15252   %}
15253   ins_pipe(pipe_slow);
15254 %}
15255 
15256 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15257                                  Universe dummy, rFlagsReg cr)

15258 %{
15259   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15260   match(Set dummy (ClearArray (Binary cnt base) val));
15261   ins_cost(125);
15262   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15263 
15264   format %{ $$template
15265     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15266     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15267     $$emit$$"jg      LARGE\n\t"
15268     $$emit$$"dec     rcx\n\t"
15269     $$emit$$"js      DONE\t# Zero length\n\t"
15270     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15271     $$emit$$"dec     rcx\n\t"
15272     $$emit$$"jge     LOOP\n\t"
15273     $$emit$$"jmp     DONE\n\t"
15274     $$emit$$"# LARGE:\n\t"
15275     if (UseFastStosb) {
15276        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15277        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15278     } else if (UseXMMForObjInit) {
15279        $$emit$$"mov     rdi,rax\n\t"
15280        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15281        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15282        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

15290        $$emit$$"jl      L_tail\n\t"
15291        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15292        $$emit$$"add     0x20,rax\n\t"
15293        $$emit$$"sub     0x4,rcx\n\t"
15294        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15295        $$emit$$"add     0x4,rcx\n\t"
15296        $$emit$$"jle     L_end\n\t"
15297        $$emit$$"dec     rcx\n\t"
15298        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15299        $$emit$$"vmovq   xmm0,(rax)\n\t"
15300        $$emit$$"add     0x8,rax\n\t"
15301        $$emit$$"dec     rcx\n\t"
15302        $$emit$$"jge     L_sloop\n\t"
15303        $$emit$$"# L_end:\n\t"
15304     } else {
15305        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15306     }
15307     $$emit$$"# DONE"
15308   %}
15309   ins_encode %{
15310     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15311                  $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
15312   %}
15313   ins_pipe(pipe_slow);
15314 %}
15315 
15316 // Large non-constant length ClearArray for non-AVX512 targets.
15317 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15318                         Universe dummy, rFlagsReg cr)
15319 %{
15320   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15321   match(Set dummy (ClearArray (Binary cnt base) val));
15322   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15323 
15324   format %{ $$template
15325     if (UseFastStosb) {
15326        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15327        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15328     } else if (UseXMMForObjInit) {
15329        $$emit$$"movdq   $tmp, $val\n\t"
15330        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15331        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15332        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15333        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15334        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15335        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15336        $$emit$$"add     0x40,rax\n\t"
15337        $$emit$$"# L_zero_64_bytes:\n\t"
15338        $$emit$$"sub     0x8,rcx\n\t"
15339        $$emit$$"jge     L_loop\n\t"
15340        $$emit$$"add     0x4,rcx\n\t"
15341        $$emit$$"jl      L_tail\n\t"
15342        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15343        $$emit$$"add     0x20,rax\n\t"
15344        $$emit$$"sub     0x4,rcx\n\t"
15345        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15346        $$emit$$"add     0x4,rcx\n\t"
15347        $$emit$$"jle     L_end\n\t"
15348        $$emit$$"dec     rcx\n\t"
15349        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15350        $$emit$$"vmovq   xmm0,(rax)\n\t"
15351        $$emit$$"add     0x8,rax\n\t"
15352        $$emit$$"dec     rcx\n\t"
15353        $$emit$$"jge     L_sloop\n\t"
15354        $$emit$$"# L_end:\n\t"
15355     } else {
15356        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15357     }
15358   %}
15359   ins_encode %{
15360     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15361                  $tmp$$XMMRegister, true, false);
15362   %}
15363   ins_pipe(pipe_slow);
15364 %}
15365 
15366 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15367                                   Universe dummy, rFlagsReg cr)
15368 %{
15369   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15370   match(Set dummy (ClearArray (Binary cnt base) val));
15371   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15372 
15373   format %{ $$template
15374     if (UseXMMForObjInit) {
15375        $$emit$$"movdq   $tmp, $val\n\t"
15376        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15377        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15378        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15379        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15380        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15381        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15382        $$emit$$"add     0x40,rax\n\t"
15383        $$emit$$"# L_zero_64_bytes:\n\t"
15384        $$emit$$"sub     0x8,rcx\n\t"
15385        $$emit$$"jge     L_loop\n\t"
15386        $$emit$$"add     0x4,rcx\n\t"
15387        $$emit$$"jl      L_tail\n\t"
15388        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15389        $$emit$$"add     0x20,rax\n\t"
15390        $$emit$$"sub     0x4,rcx\n\t"
15391        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15392        $$emit$$"add     0x4,rcx\n\t"
15393        $$emit$$"jle     L_end\n\t"
15394        $$emit$$"dec     rcx\n\t"
15395        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15396        $$emit$$"vmovq   xmm0,(rax)\n\t"
15397        $$emit$$"add     0x8,rax\n\t"
15398        $$emit$$"dec     rcx\n\t"
15399        $$emit$$"jge     L_sloop\n\t"
15400        $$emit$$"# L_end:\n\t"
15401     } else {
15402        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15403     }
15404   %}
15405   ins_encode %{
15406     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15407                  $tmp$$XMMRegister, true, true);
15408   %}
15409   ins_pipe(pipe_slow);
15410 %}
15411 
15412 // Large non-constant length ClearArray for AVX512 targets.
15413 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15414                              Universe dummy, rFlagsReg cr)
15415 %{
15416   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15417   match(Set dummy (ClearArray (Binary cnt base) val));
15418   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15419 
15420   format %{ $$template
15421     if (UseFastStosb) {
15422        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15423        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15424        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15425     } else if (UseXMMForObjInit) {
15426        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15427        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15428        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15429        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15430        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15431        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15432        $$emit$$"add     0x40,rax\n\t"
15433        $$emit$$"# L_zero_64_bytes:\n\t"
15434        $$emit$$"sub     0x8,rcx\n\t"
15435        $$emit$$"jge     L_loop\n\t"
15436        $$emit$$"add     0x4,rcx\n\t"
15437        $$emit$$"jl      L_tail\n\t"
15438        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15439        $$emit$$"add     0x20,rax\n\t"
15440        $$emit$$"sub     0x4,rcx\n\t"
15441        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15442        $$emit$$"add     0x4,rcx\n\t"
15443        $$emit$$"jle     L_end\n\t"
15444        $$emit$$"dec     rcx\n\t"
15445        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15446        $$emit$$"vmovq   xmm0,(rax)\n\t"
15447        $$emit$$"add     0x8,rax\n\t"
15448        $$emit$$"dec     rcx\n\t"
15449        $$emit$$"jge     L_sloop\n\t"
15450        $$emit$$"# L_end:\n\t"
15451     } else {
15452        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15453        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15454     }
15455   %}
15456   ins_encode %{
15457     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15458                  $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
15459   %}
15460   ins_pipe(pipe_slow);
15461 %}
15462 
15463 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15464                                        Universe dummy, rFlagsReg cr)

15465 %{
15466   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15467   match(Set dummy (ClearArray (Binary cnt base) val));
15468   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15469 
15470   format %{ $$template
15471     if (UseFastStosb) {
15472        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15473        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15474        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15475     } else if (UseXMMForObjInit) {
15476        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15477        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15478        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15479        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15480        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15481        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15482        $$emit$$"add     0x40,rax\n\t"
15483        $$emit$$"# L_zero_64_bytes:\n\t"
15484        $$emit$$"sub     0x8,rcx\n\t"
15485        $$emit$$"jge     L_loop\n\t"
15486        $$emit$$"add     0x4,rcx\n\t"
15487        $$emit$$"jl      L_tail\n\t"
15488        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15489        $$emit$$"add     0x20,rax\n\t"
15490        $$emit$$"sub     0x4,rcx\n\t"
15491        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15492        $$emit$$"add     0x4,rcx\n\t"
15493        $$emit$$"jle     L_end\n\t"
15494        $$emit$$"dec     rcx\n\t"
15495        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15496        $$emit$$"vmovq   xmm0,(rax)\n\t"
15497        $$emit$$"add     0x8,rax\n\t"
15498        $$emit$$"dec     rcx\n\t"
15499        $$emit$$"jge     L_sloop\n\t"
15500        $$emit$$"# L_end:\n\t"
15501     } else {
15502        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15503        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15504     }
15505   %}
15506   ins_encode %{
15507     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15508                  $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
15509   %}
15510   ins_pipe(pipe_slow);
15511 %}
15512 
15513 // Small constant length ClearArray for AVX512 targets.
15514 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
15515 %{
15516   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
15517             ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
15518   match(Set dummy (ClearArray (Binary cnt base) val));
15519   ins_cost(100);
15520   effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
15521   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
15522   ins_encode %{
15523     __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15524   %}
15525   ins_pipe(pipe_slow);
15526 %}
15527 
15528 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15529                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15530 %{
15531   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15532   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15533   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15534 
15535   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15536   ins_encode %{
15537     __ string_compare($str1$$Register, $str2$$Register,
15538                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15539                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15540   %}
15541   ins_pipe( pipe_slow );
15542 %}
15543 

17348   effect(USE meth);
17349 
17350   ins_cost(300);
17351   format %{ "call_leaf,runtime " %}
17352   ins_encode(clear_avx, Java_To_Runtime(meth));
17353   ins_pipe(pipe_slow);
17354 %}
17355 
17356 // Call runtime without safepoint and with vector arguments
17357 instruct CallLeafDirectVector(method meth)
17358 %{
17359   match(CallLeafVector);
17360   effect(USE meth);
17361 
17362   ins_cost(300);
17363   format %{ "call_leaf,vector " %}
17364   ins_encode(Java_To_Runtime(meth));
17365   ins_pipe(pipe_slow);
17366 %}
17367 
17368 // Call runtime without safepoint
17369 // entry point is null, target holds the address to call
17370 instruct CallLeafNoFPInDirect(rRegP target)
17371 %{
17372   predicate(n->as_Call()->entry_point() == nullptr);
17373   match(CallLeafNoFP target);
17374 
17375   ins_cost(300);
17376   format %{ "call_leaf_nofp,runtime indirect " %}
17377   ins_encode %{
17378      __ call($target$$Register);
17379   %}
17380 
17381   ins_pipe(pipe_slow);
17382 %}
17383 
17384 // Call runtime without safepoint
17385 instruct CallLeafNoFPDirect(method meth)
17386 %{
17387   predicate(n->as_Call()->entry_point() != nullptr);
17388   match(CallLeafNoFP);
17389   effect(USE meth);
17390 
17391   ins_cost(300);
17392   format %{ "call_leaf_nofp,runtime " %}
17393   ins_encode(clear_avx, Java_To_Runtime(meth));
17394   ins_pipe(pipe_slow);
17395 %}
17396 
17397 // Return Instruction
17398 // Remove the return address & jump to it.
17399 // Notice: We always emit a nop after a ret to make sure there is room
17400 // for safepoint patching
17401 instruct Ret()
17402 %{
17403   match(Return);
17404 
17405   format %{ "ret" %}
17406   ins_encode %{
17407     __ ret(0);
< prev index next >