< prev index next >

src/hotspot/cpu/x86/x86.ad

Print this page

 1632 }
 1633 
 1634 // !!!!! Special hack to get all types of calls to specify the byte offset
 1635 //       from the start of the call to the point where the return address
 1636 //       will point.
 1637 int MachCallStaticJavaNode::ret_addr_offset()
 1638 {
 1639   int offset = 5; // 5 bytes from start of call to where return address points
 1640   offset += clear_avx_size();
 1641   return offset;
 1642 }
 1643 
 1644 int MachCallDynamicJavaNode::ret_addr_offset()
 1645 {
 1646   int offset = 15; // 15 bytes from start of call to where return address points
 1647   offset += clear_avx_size();
 1648   return offset;
 1649 }
 1650 
 1651 int MachCallRuntimeNode::ret_addr_offset() {




 1652   int offset = 13; // movq r10,#addr; callq (r10)
 1653   if (this->ideal_Opcode() != Op_CallLeafVector) {
 1654     offset += clear_avx_size();
 1655   }
 1656   return offset;
 1657 }

 1658 //
 1659 // Compute padding required for nodes which need alignment
 1660 //
 1661 
 1662 // The address of the call instruction needs to be 4-byte aligned to
 1663 // ensure that it does not span a cache line so that it can be patched.
 1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 1665 {
 1666   current_offset += clear_avx_size(); // skip vzeroupper
 1667   current_offset += 1; // skip call opcode byte
 1668   return align_up(current_offset, alignment_required()) - current_offset;
 1669 }
 1670 
 1671 // The address of the call instruction needs to be 4-byte aligned to
 1672 // ensure that it does not span a cache line so that it can be patched.
 1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 1674 {
 1675   current_offset += clear_avx_size(); // skip vzeroupper
 1676   current_offset += 11; // skip movq instruction + call opcode byte
 1677   return align_up(current_offset, alignment_required()) - current_offset;

 1863     st->print("\n\t");
 1864     st->print("# stack alignment check");
 1865 #endif
 1866   }
 1867   if (C->stub_function() != nullptr) {
 1868     st->print("\n\t");
 1869     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
 1870     st->print("\n\t");
 1871     st->print("je      fast_entry\t");
 1872     st->print("\n\t");
 1873     st->print("call    #nmethod_entry_barrier_stub\t");
 1874     st->print("\n\tfast_entry:");
 1875   }
 1876   st->cr();
 1877 }
 1878 #endif
 1879 
 1880 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1881   Compile* C = ra_->C;
 1882 
 1883   int framesize = C->output()->frame_size_in_bytes();
 1884   int bangsize = C->output()->bang_size_in_bytes();
 1885 
 1886   if (C->clinit_barrier_on_entry()) {
 1887     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
 1888     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
 1889 
 1890     Label L_skip_barrier;
 1891     Register klass = rscratch1;
 1892 
 1893     __ mov_metadata(klass, C->method()->holder()->constant_encoding());
 1894     __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
 1895 
 1896     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
 1897 
 1898     __ bind(L_skip_barrier);
 1899   }
 1900 
 1901   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);


 1902 
 1903   C->output()->set_frame_complete(__ offset());
 1904 
 1905   if (C->has_mach_constant_base_node()) {
 1906     // NOTE: We set the table base offset here because users might be
 1907     // emitted before MachConstantBaseNode.
 1908     ConstantTable& constant_table = C->output()->constant_table();
 1909     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1910   }
 1911 }
 1912 
 1913 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 1914 {
 1915   return MachNode::size(ra_); // too many variables; just compute it
 1916                               // the hard way
 1917 }
 1918 
 1919 int MachPrologNode::reloc() const
 1920 {
 1921   return 0; // a large enough number
 1922 }
 1923 
 1924 //=============================================================================
 1925 #ifndef PRODUCT
 1926 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1927 {
 1928   Compile* C = ra_->C;
 1929   if (generate_vzeroupper(C)) {
 1930     st->print("vzeroupper");
 1931     st->cr(); st->print("\t");
 1932   }
 1933 
 1934   int framesize = C->output()->frame_size_in_bytes();
 1935   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1936   // Remove word for return adr already pushed
 1937   // and RBP

 1945   st->print_cr("popq    rbp");
 1946   if (do_polling() && C->is_method_compilation()) {
 1947     st->print("\t");
 1948     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
 1949                  "ja      #safepoint_stub\t"
 1950                  "# Safepoint: poll for GC");
 1951   }
 1952 }
 1953 #endif
 1954 
 1955 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1956 {
 1957   Compile* C = ra_->C;
 1958 
 1959   if (generate_vzeroupper(C)) {
 1960     // Clear upper bits of YMM registers when current compiled code uses
 1961     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1962     __ vzeroupper();
 1963   }
 1964 
 1965   int framesize = C->output()->frame_size_in_bytes();
 1966   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1967   // Remove word for return adr already pushed
 1968   // and RBP
 1969   framesize -= 2*wordSize;
 1970 
 1971   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 1972 
 1973   if (framesize) {
 1974     __ addq(rsp, framesize);
 1975   }
 1976 
 1977   __ popq(rbp);
 1978 
 1979   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1980     __ reserved_stack_check();
 1981   }
 1982 
 1983   if (do_polling() && C->is_method_compilation()) {
 1984     Label dummy_label;
 1985     Label* code_stub = &dummy_label;
 1986     if (!C->output()->in_scratch_emit_size()) {
 1987       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1988       C->output()->add_stub(stub);
 1989       code_stub = &stub->entry();
 1990     }
 1991     __ relocate(relocInfo::poll_return_type);
 1992     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
 1993   }
 1994 }
 1995 
 1996 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
 1997 {
 1998   return MachNode::size(ra_); // too many variables; just compute it
 1999                               // the hard way
 2000 }
 2001 
 2002 int MachEpilogNode::reloc() const
 2003 {
 2004   return 2; // a large enough number
 2005 }
 2006 
 2007 const Pipeline* MachEpilogNode::pipeline() const
 2008 {
 2009   return MachNode::pipeline_class();
 2010 }
 2011 
 2012 //=============================================================================
 2013 
 2014 enum RC {
 2015   rc_bad,
 2016   rc_int,
 2017   rc_kreg,
 2018   rc_float,
 2019   rc_stack
 2020 };
 2021 

 2583 #endif
 2584 
 2585 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2586 {
 2587   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2588   int reg = ra_->get_encode(this);
 2589 
 2590   __ lea(as_Register(reg), Address(rsp, offset));
 2591 }
 2592 
 2593 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 2594 {
 2595   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2596   if (ra_->get_encode(this) > 15) {
 2597     return (offset < 0x80) ? 6 : 9; // REX2
 2598   } else {
 2599     return (offset < 0x80) ? 5 : 8; // REX
 2600   }
 2601 }
 2602 











































 2603 //=============================================================================
 2604 #ifndef PRODUCT
 2605 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2606 {
 2607   if (UseCompressedClassPointers) {
 2608     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2609     st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2610   } else {
 2611     st->print_cr("movq    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2612     st->print_cr("\tcmpq    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2613   }
 2614   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 2615 }
 2616 #endif
 2617 
 2618 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2619 {
 2620   __ ic_check(InteriorEntryAlignment);
 2621 }
 2622 
 2623 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 2624 {
 2625   return MachNode::size(ra_); // too many variables; just compute it
 2626                               // the hard way
 2627 }
 2628 
 2629 
 2630 //=============================================================================
 2631 
 2632 bool Matcher::supports_vector_calling_convention(void) {
 2633   return EnableVectorSupport;
 2634 }
 2635 
 2636 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 2637   assert(EnableVectorSupport, "sanity");
 2638   int lo = XMM0_num;
 2639   int hi = XMM0b_num;
 2640   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 2641   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 2642   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 2643   return OptoRegPair(hi, lo);
 2644 }
 2645 
 2646 // Is this branch offset short enough that a short branch can be used?
 2647 //
 2648 // NOTE: If the platform does not provide any short branch variants, then

 4505     }
 4506     __ post_call_nop();
 4507   %}
 4508 
 4509   enc_class Java_Dynamic_Call(method meth) %{
 4510     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 4511     __ post_call_nop();
 4512   %}
 4513 
 4514   enc_class call_epilog %{
 4515     if (VerifyStackAtCalls) {
 4516       // Check that stack depth is unchanged: find majik cookie on stack
 4517       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
 4518       Label L;
 4519       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
 4520       __ jccb(Assembler::equal, L);
 4521       // Die if stack mismatch
 4522       __ int3();
 4523       __ bind(L);
 4524     }

































 4525   %}
 4526 
 4527 %}
 4528 
 4529 //----------FRAME--------------------------------------------------------------
 4530 // Definition of frame structure and management information.
 4531 //
 4532 //  S T A C K   L A Y O U T    Allocators stack-slot number
 4533 //                             |   (to get allocators register number
 4534 //  G  Owned by    |        |  v    add OptoReg::stack0())
 4535 //  r   CALLER     |        |
 4536 //  o     |        +--------+      pad to even-align allocators stack-slot
 4537 //  w     V        |  pad0  |        numbers; owned by CALLER
 4538 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 4539 //  h     ^        |   in   |  5
 4540 //        |        |  args  |  4   Holes in incoming args owned by SELF
 4541 //  |     |        |        |  3
 4542 //  |     |        +--------+
 4543 //  V     |        | old out|      Empty on Intel, window on Sparc
 4544 //        |    old |preserve|      Must be even aligned.

 5667   %}
 5668 %}
 5669 
 5670 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5671 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 5672 %{
 5673   constraint(ALLOC_IN_RC(ptr_reg));
 5674   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5675   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 5676 
 5677   op_cost(10);
 5678   format %{"[$reg + $off + $idx << $scale]" %}
 5679   interface(MEMORY_INTER) %{
 5680     base($reg);
 5681     index($idx);
 5682     scale($scale);
 5683     disp($off);
 5684   %}
 5685 %}
 5686 
















 5687 // Indirect Narrow Oop Plus Offset Operand
 5688 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 5689 // we can't free r12 even with CompressedOops::base() == nullptr.
 5690 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 5691   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5692   constraint(ALLOC_IN_RC(ptr_reg));
 5693   match(AddP (DecodeN reg) off);
 5694 
 5695   op_cost(10);
 5696   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 5697   interface(MEMORY_INTER) %{
 5698     base(0xc); // R12
 5699     index($reg);
 5700     scale(0x3);
 5701     disp($off);
 5702   %}
 5703 %}
 5704 
 5705 // Indirect Memory Operand
 5706 operand indirectNarrow(rRegN reg)

 6143 %}
 6144 
 6145 // Replaces legVec during post-selection cleanup. See above.
 6146 operand legVecZ() %{
 6147   constraint(ALLOC_IN_RC(vectorz_reg_legacy));
 6148   match(VecZ);
 6149 
 6150   format %{ %}
 6151   interface(REG_INTER);
 6152 %}
 6153 
 6154 //----------OPERAND CLASSES----------------------------------------------------
 6155 // Operand Classes are groups of operands that are used as to simplify
 6156 // instruction definitions by not requiring the AD writer to specify separate
 6157 // instructions for every form of operand when the instruction accepts
 6158 // multiple operand types with the same basic encoding and format.  The classic
 6159 // case of this is memory operands.
 6160 
 6161 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 6162                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 6163                indCompressedOopOffset,
 6164                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 6165                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 6166                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 6167 
 6168 //----------PIPELINE-----------------------------------------------------------
 6169 // Rules which define the behavior of the target architectures pipeline.
 6170 pipeline %{
 6171 
 6172 //----------ATTRIBUTES---------------------------------------------------------
 6173 attributes %{
 6174   variable_size_instructions;        // Fixed size instructions
 6175   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 6176   instruction_unit_size = 1;         // An instruction is 1 bytes long
 6177   instruction_fetch_unit_size = 16;  // The processor fetches one line
 6178   instruction_fetch_units = 1;       // of 16 bytes
 6179 %}
 6180 
 6181 //----------RESOURCES----------------------------------------------------------
 6182 // Resources are the functional units available to the machine
 6183 

 8741   format %{ "MEMBAR-storestore (empty encoding)" %}
 8742   ins_encode( );
 8743   ins_pipe(empty);
 8744 %}
 8745 
 8746 //----------Move Instructions--------------------------------------------------
 8747 
 8748 instruct castX2P(rRegP dst, rRegL src)
 8749 %{
 8750   match(Set dst (CastX2P src));
 8751 
 8752   format %{ "movq    $dst, $src\t# long->ptr" %}
 8753   ins_encode %{
 8754     if ($dst$$reg != $src$$reg) {
 8755       __ movptr($dst$$Register, $src$$Register);
 8756     }
 8757   %}
 8758   ins_pipe(ialu_reg_reg); // XXX
 8759 %}
 8760 


























 8761 instruct castP2X(rRegL dst, rRegP src)
 8762 %{
 8763   match(Set dst (CastP2X src));
 8764 
 8765   format %{ "movq    $dst, $src\t# ptr -> long" %}
 8766   ins_encode %{
 8767     if ($dst$$reg != $src$$reg) {
 8768       __ movptr($dst$$Register, $src$$Register);
 8769     }
 8770   %}
 8771   ins_pipe(ialu_reg_reg); // XXX
 8772 %}
 8773 
 8774 // Convert oop into int for vectors alignment masking
 8775 instruct convP2I(rRegI dst, rRegP src)
 8776 %{
 8777   match(Set dst (ConvL2I (CastP2X src)));
 8778 
 8779   format %{ "movl    $dst, $src\t# ptr -> int" %}
 8780   ins_encode %{

14964   effect(DEF dst, USE src);
14965   ins_cost(100);
14966   format %{ "movd    $dst,$src\t# MoveI2F" %}
14967   ins_encode %{
14968     __ movdl($dst$$XMMRegister, $src$$Register);
14969   %}
14970   ins_pipe( pipe_slow );
14971 %}
14972 
14973 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
14974   match(Set dst (MoveL2D src));
14975   effect(DEF dst, USE src);
14976   ins_cost(100);
14977   format %{ "movd    $dst,$src\t# MoveL2D" %}
14978   ins_encode %{
14979      __ movdq($dst$$XMMRegister, $src$$Register);
14980   %}
14981   ins_pipe( pipe_slow );
14982 %}
14983 

14984 // Fast clearing of an array
14985 // Small non-constant lenght ClearArray for non-AVX512 targets.
14986 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
14987                   Universe dummy, rFlagsReg cr)
14988 %{
14989   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
14990   match(Set dummy (ClearArray cnt base));
14991   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);





















































































































14992 
14993   format %{ $$template
14994     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
14995     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
14996     $$emit$$"jg      LARGE\n\t"
14997     $$emit$$"dec     rcx\n\t"
14998     $$emit$$"js      DONE\t# Zero length\n\t"
14999     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15000     $$emit$$"dec     rcx\n\t"
15001     $$emit$$"jge     LOOP\n\t"
15002     $$emit$$"jmp     DONE\n\t"
15003     $$emit$$"# LARGE:\n\t"
15004     if (UseFastStosb) {
15005        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15006        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15007     } else if (UseXMMForObjInit) {
15008        $$emit$$"mov     rdi,rax\n\t"
15009        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15010        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15011        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

15019        $$emit$$"jl      L_tail\n\t"
15020        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15021        $$emit$$"add     0x20,rax\n\t"
15022        $$emit$$"sub     0x4,rcx\n\t"
15023        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15024        $$emit$$"add     0x4,rcx\n\t"
15025        $$emit$$"jle     L_end\n\t"
15026        $$emit$$"dec     rcx\n\t"
15027        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15028        $$emit$$"vmovq   xmm0,(rax)\n\t"
15029        $$emit$$"add     0x8,rax\n\t"
15030        $$emit$$"dec     rcx\n\t"
15031        $$emit$$"jge     L_sloop\n\t"
15032        $$emit$$"# L_end:\n\t"
15033     } else {
15034        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15035     }
15036     $$emit$$"# DONE"
15037   %}
15038   ins_encode %{
15039     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15040                  $tmp$$XMMRegister, false, knoreg);
15041   %}
15042   ins_pipe(pipe_slow);
15043 %}
15044 
15045 // Small non-constant length ClearArray for AVX512 targets.
15046 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15047                        Universe dummy, rFlagsReg cr)
15048 %{
15049   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15050   match(Set dummy (ClearArray cnt base));
15051   ins_cost(125);
15052   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15053 
15054   format %{ $$template
15055     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15056     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15057     $$emit$$"jg      LARGE\n\t"
15058     $$emit$$"dec     rcx\n\t"
15059     $$emit$$"js      DONE\t# Zero length\n\t"
15060     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15061     $$emit$$"dec     rcx\n\t"
15062     $$emit$$"jge     LOOP\n\t"
15063     $$emit$$"jmp     DONE\n\t"
15064     $$emit$$"# LARGE:\n\t"
15065     if (UseFastStosb) {
15066        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15067        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15068     } else if (UseXMMForObjInit) {
15069        $$emit$$"mov     rdi,rax\n\t"
15070        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15071        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15072        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

15080        $$emit$$"jl      L_tail\n\t"
15081        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15082        $$emit$$"add     0x20,rax\n\t"
15083        $$emit$$"sub     0x4,rcx\n\t"
15084        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15085        $$emit$$"add     0x4,rcx\n\t"
15086        $$emit$$"jle     L_end\n\t"
15087        $$emit$$"dec     rcx\n\t"
15088        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15089        $$emit$$"vmovq   xmm0,(rax)\n\t"
15090        $$emit$$"add     0x8,rax\n\t"
15091        $$emit$$"dec     rcx\n\t"
15092        $$emit$$"jge     L_sloop\n\t"
15093        $$emit$$"# L_end:\n\t"
15094     } else {
15095        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15096     }
15097     $$emit$$"# DONE"
15098   %}
15099   ins_encode %{
15100     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15101                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
15102   %}
15103   ins_pipe(pipe_slow);
15104 %}
15105 
15106 // Large non-constant length ClearArray for non-AVX512 targets.
15107 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15108                         Universe dummy, rFlagsReg cr)
15109 %{
15110   predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15111   match(Set dummy (ClearArray cnt base));
15112   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
































































































15113 
15114   format %{ $$template
15115     if (UseFastStosb) {
15116        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15117        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15118        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15119     } else if (UseXMMForObjInit) {
15120        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15121        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15122        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15123        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15124        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15125        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15126        $$emit$$"add     0x40,rax\n\t"
15127        $$emit$$"# L_zero_64_bytes:\n\t"
15128        $$emit$$"sub     0x8,rcx\n\t"
15129        $$emit$$"jge     L_loop\n\t"
15130        $$emit$$"add     0x4,rcx\n\t"
15131        $$emit$$"jl      L_tail\n\t"
15132        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15133        $$emit$$"add     0x20,rax\n\t"
15134        $$emit$$"sub     0x4,rcx\n\t"
15135        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15136        $$emit$$"add     0x4,rcx\n\t"
15137        $$emit$$"jle     L_end\n\t"
15138        $$emit$$"dec     rcx\n\t"
15139        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15140        $$emit$$"vmovq   xmm0,(rax)\n\t"
15141        $$emit$$"add     0x8,rax\n\t"
15142        $$emit$$"dec     rcx\n\t"
15143        $$emit$$"jge     L_sloop\n\t"
15144        $$emit$$"# L_end:\n\t"
15145     } else {
15146        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15147        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15148     }
15149   %}
15150   ins_encode %{
15151     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15152                  $tmp$$XMMRegister, true, knoreg);
15153   %}
15154   ins_pipe(pipe_slow);
15155 %}
15156 
15157 // Large non-constant length ClearArray for AVX512 targets.
15158 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15159                              Universe dummy, rFlagsReg cr)
15160 %{
15161   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15162   match(Set dummy (ClearArray cnt base));
15163   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15164 
15165   format %{ $$template
15166     if (UseFastStosb) {
15167        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15168        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15169        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15170     } else if (UseXMMForObjInit) {
15171        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15172        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15173        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15174        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15175        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15176        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15177        $$emit$$"add     0x40,rax\n\t"
15178        $$emit$$"# L_zero_64_bytes:\n\t"
15179        $$emit$$"sub     0x8,rcx\n\t"
15180        $$emit$$"jge     L_loop\n\t"
15181        $$emit$$"add     0x4,rcx\n\t"
15182        $$emit$$"jl      L_tail\n\t"
15183        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15184        $$emit$$"add     0x20,rax\n\t"
15185        $$emit$$"sub     0x4,rcx\n\t"
15186        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15187        $$emit$$"add     0x4,rcx\n\t"
15188        $$emit$$"jle     L_end\n\t"
15189        $$emit$$"dec     rcx\n\t"
15190        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15191        $$emit$$"vmovq   xmm0,(rax)\n\t"
15192        $$emit$$"add     0x8,rax\n\t"
15193        $$emit$$"dec     rcx\n\t"
15194        $$emit$$"jge     L_sloop\n\t"
15195        $$emit$$"# L_end:\n\t"
15196     } else {
15197        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15198        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15199     }
15200   %}
15201   ins_encode %{
15202     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15203                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
15204   %}
15205   ins_pipe(pipe_slow);
15206 %}
15207 
15208 // Small constant length ClearArray for AVX512 targets.
15209 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15210 %{
15211   predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15212   match(Set dummy (ClearArray cnt base));

15213   ins_cost(100);
15214   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15215   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
15216   ins_encode %{
15217    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15218   %}
15219   ins_pipe(pipe_slow);
15220 %}
15221 
15222 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15223                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15224 %{
15225   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15226   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15227   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15228 
15229   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15230   ins_encode %{
15231     __ string_compare($str1$$Register, $str2$$Register,
15232                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15233                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15234   %}
15235   ins_pipe( pipe_slow );
15236 %}
15237 

17042   effect(USE meth);
17043 
17044   ins_cost(300);
17045   format %{ "call_leaf,runtime " %}
17046   ins_encode(clear_avx, Java_To_Runtime(meth));
17047   ins_pipe(pipe_slow);
17048 %}
17049 
17050 // Call runtime without safepoint and with vector arguments
17051 instruct CallLeafDirectVector(method meth)
17052 %{
17053   match(CallLeafVector);
17054   effect(USE meth);
17055 
17056   ins_cost(300);
17057   format %{ "call_leaf,vector " %}
17058   ins_encode(Java_To_Runtime(meth));
17059   ins_pipe(pipe_slow);
17060 %}
17061 
















17062 // Call runtime without safepoint
17063 instruct CallLeafNoFPDirect(method meth)
17064 %{

17065   match(CallLeafNoFP);
17066   effect(USE meth);
17067 
17068   ins_cost(300);
17069   format %{ "call_leaf_nofp,runtime " %}
17070   ins_encode(clear_avx, Java_To_Runtime(meth));
17071   ins_pipe(pipe_slow);
17072 %}
17073 
17074 // Return Instruction
17075 // Remove the return address & jump to it.
17076 // Notice: We always emit a nop after a ret to make sure there is room
17077 // for safepoint patching
17078 instruct Ret()
17079 %{
17080   match(Return);
17081 
17082   format %{ "ret" %}
17083   ins_encode %{
17084     __ ret(0);

 1632 }
 1633 
 1634 // !!!!! Special hack to get all types of calls to specify the byte offset
 1635 //       from the start of the call to the point where the return address
 1636 //       will point.
 1637 int MachCallStaticJavaNode::ret_addr_offset()
 1638 {
 1639   int offset = 5; // 5 bytes from start of call to where return address points
 1640   offset += clear_avx_size();
 1641   return offset;
 1642 }
 1643 
 1644 int MachCallDynamicJavaNode::ret_addr_offset()
 1645 {
 1646   int offset = 15; // 15 bytes from start of call to where return address points
 1647   offset += clear_avx_size();
 1648   return offset;
 1649 }
 1650 
 1651 int MachCallRuntimeNode::ret_addr_offset() {
 1652   if (_entry_point == nullptr) {
 1653     // CallLeafNoFPInDirect
 1654     return 3; // callq (register)
 1655   }
 1656   int offset = 13; // movq r10,#addr; callq (r10)
 1657   if (this->ideal_Opcode() != Op_CallLeafVector) {
 1658     offset += clear_avx_size();
 1659   }
 1660   return offset;
 1661 }
 1662 
 1663 //
 1664 // Compute padding required for nodes which need alignment
 1665 //
 1666 
 1667 // The address of the call instruction needs to be 4-byte aligned to
 1668 // ensure that it does not span a cache line so that it can be patched.
 1669 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 1670 {
 1671   current_offset += clear_avx_size(); // skip vzeroupper
 1672   current_offset += 1; // skip call opcode byte
 1673   return align_up(current_offset, alignment_required()) - current_offset;
 1674 }
 1675 
 1676 // The address of the call instruction needs to be 4-byte aligned to
 1677 // ensure that it does not span a cache line so that it can be patched.
 1678 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 1679 {
 1680   current_offset += clear_avx_size(); // skip vzeroupper
 1681   current_offset += 11; // skip movq instruction + call opcode byte
 1682   return align_up(current_offset, alignment_required()) - current_offset;

 1868     st->print("\n\t");
 1869     st->print("# stack alignment check");
 1870 #endif
 1871   }
 1872   if (C->stub_function() != nullptr) {
 1873     st->print("\n\t");
 1874     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
 1875     st->print("\n\t");
 1876     st->print("je      fast_entry\t");
 1877     st->print("\n\t");
 1878     st->print("call    #nmethod_entry_barrier_stub\t");
 1879     st->print("\n\tfast_entry:");
 1880   }
 1881   st->cr();
 1882 }
 1883 #endif
 1884 
 1885 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1886   Compile* C = ra_->C;
 1887 
 1888   __ verified_entry(C);





 1889 
 1890   if (ra_->C->stub_function() == nullptr) {
 1891     __ entry_barrier();







 1892   }
 1893 
 1894   if (!Compile::current()->output()->in_scratch_emit_size()) {
 1895     __ bind(*_verified_entry);
 1896   }
 1897 
 1898   C->output()->set_frame_complete(__ offset());
 1899 
 1900   if (C->has_mach_constant_base_node()) {
 1901     // NOTE: We set the table base offset here because users might be
 1902     // emitted before MachConstantBaseNode.
 1903     ConstantTable& constant_table = C->output()->constant_table();
 1904     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1905   }
 1906 }
 1907 





 1908 
 1909 int MachPrologNode::reloc() const
 1910 {
 1911   return 0; // a large enough number
 1912 }
 1913 
 1914 //=============================================================================
 1915 #ifndef PRODUCT
 1916 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1917 {
 1918   Compile* C = ra_->C;
 1919   if (generate_vzeroupper(C)) {
 1920     st->print("vzeroupper");
 1921     st->cr(); st->print("\t");
 1922   }
 1923 
 1924   int framesize = C->output()->frame_size_in_bytes();
 1925   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1926   // Remove word for return adr already pushed
 1927   // and RBP

 1935   st->print_cr("popq    rbp");
 1936   if (do_polling() && C->is_method_compilation()) {
 1937     st->print("\t");
 1938     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
 1939                  "ja      #safepoint_stub\t"
 1940                  "# Safepoint: poll for GC");
 1941   }
 1942 }
 1943 #endif
 1944 
 1945 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1946 {
 1947   Compile* C = ra_->C;
 1948 
 1949   if (generate_vzeroupper(C)) {
 1950     // Clear upper bits of YMM registers when current compiled code uses
 1951     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1952     __ vzeroupper();
 1953   }
 1954 
 1955   // Subtract two words to account for return address and rbp
 1956   int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
 1957   __ remove_frame(initial_framesize, C->needs_stack_repair());










 1958 
 1959   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1960     __ reserved_stack_check();
 1961   }
 1962 
 1963   if (do_polling() && C->is_method_compilation()) {
 1964     Label dummy_label;
 1965     Label* code_stub = &dummy_label;
 1966     if (!C->output()->in_scratch_emit_size()) {
 1967       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1968       C->output()->add_stub(stub);
 1969       code_stub = &stub->entry();
 1970     }
 1971     __ relocate(relocInfo::poll_return_type);
 1972     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
 1973   }
 1974 }
 1975 






 1976 int MachEpilogNode::reloc() const
 1977 {
 1978   return 2; // a large enough number
 1979 }
 1980 
 1981 const Pipeline* MachEpilogNode::pipeline() const
 1982 {
 1983   return MachNode::pipeline_class();
 1984 }
 1985 
 1986 //=============================================================================
 1987 
 1988 enum RC {
 1989   rc_bad,
 1990   rc_int,
 1991   rc_kreg,
 1992   rc_float,
 1993   rc_stack
 1994 };
 1995 

 2557 #endif
 2558 
 2559 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2560 {
 2561   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2562   int reg = ra_->get_encode(this);
 2563 
 2564   __ lea(as_Register(reg), Address(rsp, offset));
 2565 }
 2566 
 2567 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 2568 {
 2569   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2570   if (ra_->get_encode(this) > 15) {
 2571     return (offset < 0x80) ? 6 : 9; // REX2
 2572   } else {
 2573     return (offset < 0x80) ? 5 : 8; // REX
 2574   }
 2575 }
 2576 
 2577 //=============================================================================
 2578 #ifndef PRODUCT
 2579 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2580 {
 2581   st->print_cr("MachVEPNode");
 2582 }
 2583 #endif
 2584 
 2585 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2586 {
 2587   CodeBuffer* cbuf = masm->code();
 2588   uint insts_size = cbuf->insts_size();
 2589   if (!_verified) {
 2590     __ ic_check(1);
 2591   } else {
 2592     // TODO 8284443 Avoid creation of temporary frame
 2593     if (ra_->C->stub_function() == nullptr) {
 2594       __ verified_entry(ra_->C, 0);
 2595       __ entry_barrier();
 2596       int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
 2597       __ remove_frame(initial_framesize, false);
 2598     }
 2599     // Unpack inline type args passed as oop and then jump to
 2600     // the verified entry point (skipping the unverified entry).
 2601     int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
 2602     // Emit code for verified entry and save increment for stack repair on return
 2603     __ verified_entry(ra_->C, sp_inc);
 2604     if (Compile::current()->output()->in_scratch_emit_size()) {
 2605       Label dummy_verified_entry;
 2606       __ jmp(dummy_verified_entry);
 2607     } else {
 2608       __ jmp(*_verified_entry);
 2609     }
 2610   }
 2611   /* WARNING these NOPs are critical so that verified entry point is properly
 2612      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
 2613   int nops_cnt = 4 - ((cbuf->insts_size() - insts_size) & 0x3);
 2614   nops_cnt &= 0x3; // Do not add nops if code is aligned.
 2615   if (nops_cnt > 0) {
 2616     __ nop(nops_cnt);
 2617   }
 2618 }
 2619 
 2620 //=============================================================================
 2621 #ifndef PRODUCT
 2622 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2623 {
 2624   if (UseCompressedClassPointers) {
 2625     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2626     st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2627   } else {
 2628     st->print_cr("movq    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2629     st->print_cr("\tcmpq    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2630   }
 2631   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 2632 }
 2633 #endif
 2634 
 2635 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2636 {
 2637   __ ic_check(InteriorEntryAlignment);
 2638 }
 2639 






 2640 
 2641 //=============================================================================
 2642 
 2643 bool Matcher::supports_vector_calling_convention(void) {
 2644   return EnableVectorSupport;
 2645 }
 2646 
 2647 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 2648   assert(EnableVectorSupport, "sanity");
 2649   int lo = XMM0_num;
 2650   int hi = XMM0b_num;
 2651   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 2652   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 2653   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 2654   return OptoRegPair(hi, lo);
 2655 }
 2656 
 2657 // Is this branch offset short enough that a short branch can be used?
 2658 //
 2659 // NOTE: If the platform does not provide any short branch variants, then

 4516     }
 4517     __ post_call_nop();
 4518   %}
 4519 
 4520   enc_class Java_Dynamic_Call(method meth) %{
 4521     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 4522     __ post_call_nop();
 4523   %}
 4524 
 4525   enc_class call_epilog %{
 4526     if (VerifyStackAtCalls) {
 4527       // Check that stack depth is unchanged: find majik cookie on stack
 4528       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
 4529       Label L;
 4530       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
 4531       __ jccb(Assembler::equal, L);
 4532       // Die if stack mismatch
 4533       __ int3();
 4534       __ bind(L);
 4535     }
 4536     if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) {
 4537       // The last return value is not set by the callee but used to pass the null marker to compiled code.
 4538       // Search for the corresponding projection, get the register and emit code that initialized it.
 4539       uint con = (tf()->range_cc()->cnt() - 1);
 4540       for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
 4541         ProjNode* proj = fast_out(i)->as_Proj();
 4542         if (proj->_con == con) {
 4543           // Set null marker if rax is non-null (a non-null value is returned buffered or scalarized)
 4544           OptoReg::Name optoReg = ra_->get_reg_first(proj);
 4545           VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
 4546           Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
 4547           __ testq(rax, rax);
 4548           __ setb(Assembler::notZero, toReg);
 4549           __ movzbl(toReg, toReg);
 4550           if (reg->is_stack()) {
 4551             int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
 4552             __ movq(Address(rsp, st_off), toReg);
 4553           }
 4554           break;
 4555         }
 4556       }
 4557       if (return_value_is_used()) {
 4558         // An inline type is returned as fields in multiple registers.
 4559         // Rax either contains an oop if the inline type is buffered or a pointer
 4560         // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax
 4561         // if the lowest bit is set to allow C2 to use the oop after null checking.
 4562         // rax &= (rax & 1) - 1
 4563         __ movptr(rscratch1, rax);
 4564         __ andptr(rscratch1, 0x1);
 4565         __ subptr(rscratch1, 0x1);
 4566         __ andptr(rax, rscratch1);
 4567       }
 4568     }
 4569   %}
 4570 
 4571 %}
 4572 
 4573 //----------FRAME--------------------------------------------------------------
 4574 // Definition of frame structure and management information.
 4575 //
 4576 //  S T A C K   L A Y O U T    Allocators stack-slot number
 4577 //                             |   (to get allocators register number
 4578 //  G  Owned by    |        |  v    add OptoReg::stack0())
 4579 //  r   CALLER     |        |
 4580 //  o     |        +--------+      pad to even-align allocators stack-slot
 4581 //  w     V        |  pad0  |        numbers; owned by CALLER
 4582 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 4583 //  h     ^        |   in   |  5
 4584 //        |        |  args  |  4   Holes in incoming args owned by SELF
 4585 //  |     |        |        |  3
 4586 //  |     |        +--------+
 4587 //  V     |        | old out|      Empty on Intel, window on Sparc
 4588 //        |    old |preserve|      Must be even aligned.

 5711   %}
 5712 %}
 5713 
 5714 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5715 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 5716 %{
 5717   constraint(ALLOC_IN_RC(ptr_reg));
 5718   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5719   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 5720 
 5721   op_cost(10);
 5722   format %{"[$reg + $off + $idx << $scale]" %}
 5723   interface(MEMORY_INTER) %{
 5724     base($reg);
 5725     index($idx);
 5726     scale($scale);
 5727     disp($off);
 5728   %}
 5729 %}
 5730 
 5731 // Indirect Narrow Oop Operand
 5732 operand indCompressedOop(rRegN reg) %{
 5733   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5734   constraint(ALLOC_IN_RC(ptr_reg));
 5735   match(DecodeN reg);
 5736 
 5737   op_cost(10);
 5738   format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
 5739   interface(MEMORY_INTER) %{
 5740     base(0xc); // R12
 5741     index($reg);
 5742     scale(0x3);
 5743     disp(0x0);
 5744   %}
 5745 %}
 5746 
 5747 // Indirect Narrow Oop Plus Offset Operand
 5748 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 5749 // we can't free r12 even with CompressedOops::base() == nullptr.
 5750 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 5751   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5752   constraint(ALLOC_IN_RC(ptr_reg));
 5753   match(AddP (DecodeN reg) off);
 5754 
 5755   op_cost(10);
 5756   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 5757   interface(MEMORY_INTER) %{
 5758     base(0xc); // R12
 5759     index($reg);
 5760     scale(0x3);
 5761     disp($off);
 5762   %}
 5763 %}
 5764 
 5765 // Indirect Memory Operand
 5766 operand indirectNarrow(rRegN reg)

 6203 %}
 6204 
 6205 // Replaces legVec during post-selection cleanup. See above.
 6206 operand legVecZ() %{
 6207   constraint(ALLOC_IN_RC(vectorz_reg_legacy));
 6208   match(VecZ);
 6209 
 6210   format %{ %}
 6211   interface(REG_INTER);
 6212 %}
 6213 
 6214 //----------OPERAND CLASSES----------------------------------------------------
 6215 // Operand Classes are groups of operands that are used as to simplify
 6216 // instruction definitions by not requiring the AD writer to specify separate
 6217 // instructions for every form of operand when the instruction accepts
 6218 // multiple operand types with the same basic encoding and format.  The classic
 6219 // case of this is memory operands.
 6220 
 6221 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 6222                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 6223                indCompressedOop, indCompressedOopOffset,
 6224                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 6225                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 6226                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 6227 
 6228 //----------PIPELINE-----------------------------------------------------------
 6229 // Rules which define the behavior of the target architectures pipeline.
 6230 pipeline %{
 6231 
 6232 //----------ATTRIBUTES---------------------------------------------------------
 6233 attributes %{
 6234   variable_size_instructions;        // Fixed size instructions
 6235   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 6236   instruction_unit_size = 1;         // An instruction is 1 bytes long
 6237   instruction_fetch_unit_size = 16;  // The processor fetches one line
 6238   instruction_fetch_units = 1;       // of 16 bytes
 6239 %}
 6240 
 6241 //----------RESOURCES----------------------------------------------------------
 6242 // Resources are the functional units available to the machine
 6243 

 8801   format %{ "MEMBAR-storestore (empty encoding)" %}
 8802   ins_encode( );
 8803   ins_pipe(empty);
 8804 %}
 8805 
 8806 //----------Move Instructions--------------------------------------------------
 8807 
 8808 instruct castX2P(rRegP dst, rRegL src)
 8809 %{
 8810   match(Set dst (CastX2P src));
 8811 
 8812   format %{ "movq    $dst, $src\t# long->ptr" %}
 8813   ins_encode %{
 8814     if ($dst$$reg != $src$$reg) {
 8815       __ movptr($dst$$Register, $src$$Register);
 8816     }
 8817   %}
 8818   ins_pipe(ialu_reg_reg); // XXX
 8819 %}
 8820 
 8821 instruct castI2N(rRegN dst, rRegI src)
 8822 %{
 8823   match(Set dst (CastI2N src));
 8824 
 8825   format %{ "movq    $dst, $src\t# int -> narrow ptr" %}
 8826   ins_encode %{
 8827     if ($dst$$reg != $src$$reg) {
 8828       __ movl($dst$$Register, $src$$Register);
 8829     }
 8830   %}
 8831   ins_pipe(ialu_reg_reg); // XXX
 8832 %}
 8833 
 8834 instruct castN2X(rRegL dst, rRegN src)
 8835 %{
 8836   match(Set dst (CastP2X src));
 8837 
 8838   format %{ "movq    $dst, $src\t# ptr -> long" %}
 8839   ins_encode %{
 8840     if ($dst$$reg != $src$$reg) {
 8841       __ movptr($dst$$Register, $src$$Register);
 8842     }
 8843   %}
 8844   ins_pipe(ialu_reg_reg); // XXX
 8845 %}
 8846 
 8847 instruct castP2X(rRegL dst, rRegP src)
 8848 %{
 8849   match(Set dst (CastP2X src));
 8850 
 8851   format %{ "movq    $dst, $src\t# ptr -> long" %}
 8852   ins_encode %{
 8853     if ($dst$$reg != $src$$reg) {
 8854       __ movptr($dst$$Register, $src$$Register);
 8855     }
 8856   %}
 8857   ins_pipe(ialu_reg_reg); // XXX
 8858 %}
 8859 
 8860 // Convert oop into int for vectors alignment masking
 8861 instruct convP2I(rRegI dst, rRegP src)
 8862 %{
 8863   match(Set dst (ConvL2I (CastP2X src)));
 8864 
 8865   format %{ "movl    $dst, $src\t# ptr -> int" %}
 8866   ins_encode %{

15050   effect(DEF dst, USE src);
15051   ins_cost(100);
15052   format %{ "movd    $dst,$src\t# MoveI2F" %}
15053   ins_encode %{
15054     __ movdl($dst$$XMMRegister, $src$$Register);
15055   %}
15056   ins_pipe( pipe_slow );
15057 %}
15058 
15059 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15060   match(Set dst (MoveL2D src));
15061   effect(DEF dst, USE src);
15062   ins_cost(100);
15063   format %{ "movd    $dst,$src\t# MoveL2D" %}
15064   ins_encode %{
15065      __ movdq($dst$$XMMRegister, $src$$Register);
15066   %}
15067   ins_pipe( pipe_slow );
15068 %}
15069 
15070 
15071 // Fast clearing of an array
15072 // Small non-constant lenght ClearArray for non-AVX512 targets.
15073 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15074                   Universe dummy, rFlagsReg cr)
15075 %{
15076   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15077   match(Set dummy (ClearArray (Binary cnt base) val));
15078   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15079 
15080   format %{ $$template
15081     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15082     $$emit$$"jg      LARGE\n\t"
15083     $$emit$$"dec     rcx\n\t"
15084     $$emit$$"js      DONE\t# Zero length\n\t"
15085     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15086     $$emit$$"dec     rcx\n\t"
15087     $$emit$$"jge     LOOP\n\t"
15088     $$emit$$"jmp     DONE\n\t"
15089     $$emit$$"# LARGE:\n\t"
15090     if (UseFastStosb) {
15091        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15092        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15093     } else if (UseXMMForObjInit) {
15094        $$emit$$"movdq   $tmp, $val\n\t"
15095        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15096        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15097        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15098        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15099        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15100        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15101        $$emit$$"add     0x40,rax\n\t"
15102        $$emit$$"# L_zero_64_bytes:\n\t"
15103        $$emit$$"sub     0x8,rcx\n\t"
15104        $$emit$$"jge     L_loop\n\t"
15105        $$emit$$"add     0x4,rcx\n\t"
15106        $$emit$$"jl      L_tail\n\t"
15107        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15108        $$emit$$"add     0x20,rax\n\t"
15109        $$emit$$"sub     0x4,rcx\n\t"
15110        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15111        $$emit$$"add     0x4,rcx\n\t"
15112        $$emit$$"jle     L_end\n\t"
15113        $$emit$$"dec     rcx\n\t"
15114        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15115        $$emit$$"vmovq   xmm0,(rax)\n\t"
15116        $$emit$$"add     0x8,rax\n\t"
15117        $$emit$$"dec     rcx\n\t"
15118        $$emit$$"jge     L_sloop\n\t"
15119        $$emit$$"# L_end:\n\t"
15120     } else {
15121        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15122     }
15123     $$emit$$"# DONE"
15124   %}
15125   ins_encode %{
15126     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15127                  $tmp$$XMMRegister, false, false);
15128   %}
15129   ins_pipe(pipe_slow);
15130 %}
15131 
15132 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15133                             Universe dummy, rFlagsReg cr)
15134 %{
15135   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15136   match(Set dummy (ClearArray (Binary cnt base) val));
15137   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15138 
15139   format %{ $$template
15140     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15141     $$emit$$"jg      LARGE\n\t"
15142     $$emit$$"dec     rcx\n\t"
15143     $$emit$$"js      DONE\t# Zero length\n\t"
15144     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15145     $$emit$$"dec     rcx\n\t"
15146     $$emit$$"jge     LOOP\n\t"
15147     $$emit$$"jmp     DONE\n\t"
15148     $$emit$$"# LARGE:\n\t"
15149     if (UseXMMForObjInit) {
15150        $$emit$$"movdq   $tmp, $val\n\t"
15151        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15152        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15153        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15154        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15155        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15156        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15157        $$emit$$"add     0x40,rax\n\t"
15158        $$emit$$"# L_zero_64_bytes:\n\t"
15159        $$emit$$"sub     0x8,rcx\n\t"
15160        $$emit$$"jge     L_loop\n\t"
15161        $$emit$$"add     0x4,rcx\n\t"
15162        $$emit$$"jl      L_tail\n\t"
15163        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15164        $$emit$$"add     0x20,rax\n\t"
15165        $$emit$$"sub     0x4,rcx\n\t"
15166        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15167        $$emit$$"add     0x4,rcx\n\t"
15168        $$emit$$"jle     L_end\n\t"
15169        $$emit$$"dec     rcx\n\t"
15170        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15171        $$emit$$"vmovq   xmm0,(rax)\n\t"
15172        $$emit$$"add     0x8,rax\n\t"
15173        $$emit$$"dec     rcx\n\t"
15174        $$emit$$"jge     L_sloop\n\t"
15175        $$emit$$"# L_end:\n\t"
15176     } else {
15177        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15178     }
15179     $$emit$$"# DONE"
15180   %}
15181   ins_encode %{
15182     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15183                  $tmp$$XMMRegister, false, true);
15184   %}
15185   ins_pipe(pipe_slow);
15186 %}
15187 
15188 // Small non-constant length ClearArray for AVX512 targets.
15189 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15190                        Universe dummy, rFlagsReg cr)
15191 %{
15192   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15193   match(Set dummy (ClearArray (Binary cnt base) val));
15194   ins_cost(125);
15195   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15196 
15197   format %{ $$template
15198     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15199     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15200     $$emit$$"jg      LARGE\n\t"
15201     $$emit$$"dec     rcx\n\t"
15202     $$emit$$"js      DONE\t# Zero length\n\t"
15203     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15204     $$emit$$"dec     rcx\n\t"
15205     $$emit$$"jge     LOOP\n\t"
15206     $$emit$$"jmp     DONE\n\t"
15207     $$emit$$"# LARGE:\n\t"
15208     if (UseFastStosb) {
15209        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15210        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15211     } else if (UseXMMForObjInit) {
15212        $$emit$$"mov     rdi,rax\n\t"
15213        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15214        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15215        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

15223        $$emit$$"jl      L_tail\n\t"
15224        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15225        $$emit$$"add     0x20,rax\n\t"
15226        $$emit$$"sub     0x4,rcx\n\t"
15227        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15228        $$emit$$"add     0x4,rcx\n\t"
15229        $$emit$$"jle     L_end\n\t"
15230        $$emit$$"dec     rcx\n\t"
15231        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15232        $$emit$$"vmovq   xmm0,(rax)\n\t"
15233        $$emit$$"add     0x8,rax\n\t"
15234        $$emit$$"dec     rcx\n\t"
15235        $$emit$$"jge     L_sloop\n\t"
15236        $$emit$$"# L_end:\n\t"
15237     } else {
15238        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15239     }
15240     $$emit$$"# DONE"
15241   %}
15242   ins_encode %{
15243     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15244                  $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
15245   %}
15246   ins_pipe(pipe_slow);
15247 %}
15248 
15249 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15250                                  Universe dummy, rFlagsReg cr)

15251 %{
15252   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15253   match(Set dummy (ClearArray (Binary cnt base) val));
15254   ins_cost(125);
15255   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15256 
15257   format %{ $$template
15258     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15259     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15260     $$emit$$"jg      LARGE\n\t"
15261     $$emit$$"dec     rcx\n\t"
15262     $$emit$$"js      DONE\t# Zero length\n\t"
15263     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15264     $$emit$$"dec     rcx\n\t"
15265     $$emit$$"jge     LOOP\n\t"
15266     $$emit$$"jmp     DONE\n\t"
15267     $$emit$$"# LARGE:\n\t"
15268     if (UseFastStosb) {
15269        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15270        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15271     } else if (UseXMMForObjInit) {
15272        $$emit$$"mov     rdi,rax\n\t"
15273        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15274        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15275        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

15283        $$emit$$"jl      L_tail\n\t"
15284        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15285        $$emit$$"add     0x20,rax\n\t"
15286        $$emit$$"sub     0x4,rcx\n\t"
15287        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15288        $$emit$$"add     0x4,rcx\n\t"
15289        $$emit$$"jle     L_end\n\t"
15290        $$emit$$"dec     rcx\n\t"
15291        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15292        $$emit$$"vmovq   xmm0,(rax)\n\t"
15293        $$emit$$"add     0x8,rax\n\t"
15294        $$emit$$"dec     rcx\n\t"
15295        $$emit$$"jge     L_sloop\n\t"
15296        $$emit$$"# L_end:\n\t"
15297     } else {
15298        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15299     }
15300     $$emit$$"# DONE"
15301   %}
15302   ins_encode %{
15303     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15304                  $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
15305   %}
15306   ins_pipe(pipe_slow);
15307 %}
15308 
15309 // Large non-constant length ClearArray for non-AVX512 targets.
15310 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15311                         Universe dummy, rFlagsReg cr)
15312 %{
15313   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15314   match(Set dummy (ClearArray (Binary cnt base) val));
15315   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15316 
15317   format %{ $$template
15318     if (UseFastStosb) {
15319        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15320        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15321     } else if (UseXMMForObjInit) {
15322        $$emit$$"movdq   $tmp, $val\n\t"
15323        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15324        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15325        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15326        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15327        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15328        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15329        $$emit$$"add     0x40,rax\n\t"
15330        $$emit$$"# L_zero_64_bytes:\n\t"
15331        $$emit$$"sub     0x8,rcx\n\t"
15332        $$emit$$"jge     L_loop\n\t"
15333        $$emit$$"add     0x4,rcx\n\t"
15334        $$emit$$"jl      L_tail\n\t"
15335        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15336        $$emit$$"add     0x20,rax\n\t"
15337        $$emit$$"sub     0x4,rcx\n\t"
15338        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15339        $$emit$$"add     0x4,rcx\n\t"
15340        $$emit$$"jle     L_end\n\t"
15341        $$emit$$"dec     rcx\n\t"
15342        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15343        $$emit$$"vmovq   xmm0,(rax)\n\t"
15344        $$emit$$"add     0x8,rax\n\t"
15345        $$emit$$"dec     rcx\n\t"
15346        $$emit$$"jge     L_sloop\n\t"
15347        $$emit$$"# L_end:\n\t"
15348     } else {
15349        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15350     }
15351   %}
15352   ins_encode %{
15353     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15354                  $tmp$$XMMRegister, true, false);
15355   %}
15356   ins_pipe(pipe_slow);
15357 %}
15358 
15359 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15360                                   Universe dummy, rFlagsReg cr)
15361 %{
15362   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15363   match(Set dummy (ClearArray (Binary cnt base) val));
15364   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15365 
15366   format %{ $$template
15367     if (UseXMMForObjInit) {
15368        $$emit$$"movdq   $tmp, $val\n\t"
15369        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15370        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15371        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15372        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15373        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15374        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15375        $$emit$$"add     0x40,rax\n\t"
15376        $$emit$$"# L_zero_64_bytes:\n\t"
15377        $$emit$$"sub     0x8,rcx\n\t"
15378        $$emit$$"jge     L_loop\n\t"
15379        $$emit$$"add     0x4,rcx\n\t"
15380        $$emit$$"jl      L_tail\n\t"
15381        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15382        $$emit$$"add     0x20,rax\n\t"
15383        $$emit$$"sub     0x4,rcx\n\t"
15384        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15385        $$emit$$"add     0x4,rcx\n\t"
15386        $$emit$$"jle     L_end\n\t"
15387        $$emit$$"dec     rcx\n\t"
15388        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15389        $$emit$$"vmovq   xmm0,(rax)\n\t"
15390        $$emit$$"add     0x8,rax\n\t"
15391        $$emit$$"dec     rcx\n\t"
15392        $$emit$$"jge     L_sloop\n\t"
15393        $$emit$$"# L_end:\n\t"
15394     } else {
15395        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15396     }
15397   %}
15398   ins_encode %{
15399     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15400                  $tmp$$XMMRegister, true, true);
15401   %}
15402   ins_pipe(pipe_slow);
15403 %}
15404 
15405 // Large non-constant length ClearArray for AVX512 targets.
15406 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15407                              Universe dummy, rFlagsReg cr)
15408 %{
15409   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15410   match(Set dummy (ClearArray (Binary cnt base) val));
15411   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15412 
15413   format %{ $$template
15414     if (UseFastStosb) {
15415        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15416        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15417        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15418     } else if (UseXMMForObjInit) {
15419        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15420        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15421        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15422        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15423        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15424        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15425        $$emit$$"add     0x40,rax\n\t"
15426        $$emit$$"# L_zero_64_bytes:\n\t"
15427        $$emit$$"sub     0x8,rcx\n\t"
15428        $$emit$$"jge     L_loop\n\t"
15429        $$emit$$"add     0x4,rcx\n\t"
15430        $$emit$$"jl      L_tail\n\t"
15431        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15432        $$emit$$"add     0x20,rax\n\t"
15433        $$emit$$"sub     0x4,rcx\n\t"
15434        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15435        $$emit$$"add     0x4,rcx\n\t"
15436        $$emit$$"jle     L_end\n\t"
15437        $$emit$$"dec     rcx\n\t"
15438        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15439        $$emit$$"vmovq   xmm0,(rax)\n\t"
15440        $$emit$$"add     0x8,rax\n\t"
15441        $$emit$$"dec     rcx\n\t"
15442        $$emit$$"jge     L_sloop\n\t"
15443        $$emit$$"# L_end:\n\t"
15444     } else {
15445        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15446        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15447     }
15448   %}
15449   ins_encode %{
15450     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15451                  $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
15452   %}
15453   ins_pipe(pipe_slow);
15454 %}
15455 
15456 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15457                                        Universe dummy, rFlagsReg cr)

15458 %{
15459   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15460   match(Set dummy (ClearArray (Binary cnt base) val));
15461   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15462 
15463   format %{ $$template
15464     if (UseFastStosb) {
15465        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15466        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15467        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15468     } else if (UseXMMForObjInit) {
15469        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15470        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15471        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15472        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15473        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15474        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15475        $$emit$$"add     0x40,rax\n\t"
15476        $$emit$$"# L_zero_64_bytes:\n\t"
15477        $$emit$$"sub     0x8,rcx\n\t"
15478        $$emit$$"jge     L_loop\n\t"
15479        $$emit$$"add     0x4,rcx\n\t"
15480        $$emit$$"jl      L_tail\n\t"
15481        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15482        $$emit$$"add     0x20,rax\n\t"
15483        $$emit$$"sub     0x4,rcx\n\t"
15484        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15485        $$emit$$"add     0x4,rcx\n\t"
15486        $$emit$$"jle     L_end\n\t"
15487        $$emit$$"dec     rcx\n\t"
15488        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15489        $$emit$$"vmovq   xmm0,(rax)\n\t"
15490        $$emit$$"add     0x8,rax\n\t"
15491        $$emit$$"dec     rcx\n\t"
15492        $$emit$$"jge     L_sloop\n\t"
15493        $$emit$$"# L_end:\n\t"
15494     } else {
15495        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15496        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15497     }
15498   %}
15499   ins_encode %{
15500     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15501                  $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
15502   %}
15503   ins_pipe(pipe_slow);
15504 %}
15505 
15506 // Small constant length ClearArray for AVX512 targets.
15507 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
15508 %{
15509   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
15510             ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
15511   match(Set dummy (ClearArray (Binary cnt base) val));
15512   ins_cost(100);
15513   effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
15514   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
15515   ins_encode %{
15516     __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15517   %}
15518   ins_pipe(pipe_slow);
15519 %}
15520 
15521 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15522                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15523 %{
15524   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15525   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15526   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15527 
15528   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15529   ins_encode %{
15530     __ string_compare($str1$$Register, $str2$$Register,
15531                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15532                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15533   %}
15534   ins_pipe( pipe_slow );
15535 %}
15536 

17341   effect(USE meth);
17342 
17343   ins_cost(300);
17344   format %{ "call_leaf,runtime " %}
17345   ins_encode(clear_avx, Java_To_Runtime(meth));
17346   ins_pipe(pipe_slow);
17347 %}
17348 
17349 // Call runtime without safepoint and with vector arguments
17350 instruct CallLeafDirectVector(method meth)
17351 %{
17352   match(CallLeafVector);
17353   effect(USE meth);
17354 
17355   ins_cost(300);
17356   format %{ "call_leaf,vector " %}
17357   ins_encode(Java_To_Runtime(meth));
17358   ins_pipe(pipe_slow);
17359 %}
17360 
17361 // Call runtime without safepoint
17362 // entry point is null, target holds the address to call
17363 instruct CallLeafNoFPInDirect(rRegP target)
17364 %{
17365   predicate(n->as_Call()->entry_point() == nullptr);
17366   match(CallLeafNoFP target);
17367 
17368   ins_cost(300);
17369   format %{ "call_leaf_nofp,runtime indirect " %}
17370   ins_encode %{
17371      __ call($target$$Register);
17372   %}
17373 
17374   ins_pipe(pipe_slow);
17375 %}
17376 
17377 // Call runtime without safepoint
17378 instruct CallLeafNoFPDirect(method meth)
17379 %{
17380   predicate(n->as_Call()->entry_point() != nullptr);
17381   match(CallLeafNoFP);
17382   effect(USE meth);
17383 
17384   ins_cost(300);
17385   format %{ "call_leaf_nofp,runtime " %}
17386   ins_encode(clear_avx, Java_To_Runtime(meth));
17387   ins_pipe(pipe_slow);
17388 %}
17389 
17390 // Return Instruction
17391 // Remove the return address & jump to it.
17392 // Notice: We always emit a nop after a ret to make sure there is room
17393 // for safepoint patching
17394 instruct Ret()
17395 %{
17396   match(Return);
17397 
17398   format %{ "ret" %}
17399   ins_encode %{
17400     __ ret(0);
< prev index next >