< prev index next >

src/hotspot/cpu/x86/x86.ad

Print this page

 1632 }
 1633 
 1634 // !!!!! Special hack to get all types of calls to specify the byte offset
 1635 //       from the start of the call to the point where the return address
 1636 //       will point.
 1637 int MachCallStaticJavaNode::ret_addr_offset()
 1638 {
 1639   int offset = 5; // 5 bytes from start of call to where return address points
 1640   offset += clear_avx_size();
 1641   return offset;
 1642 }
 1643 
 1644 int MachCallDynamicJavaNode::ret_addr_offset()
 1645 {
 1646   int offset = 15; // 15 bytes from start of call to where return address points
 1647   offset += clear_avx_size();
 1648   return offset;
 1649 }
 1650 
 1651 int MachCallRuntimeNode::ret_addr_offset() {




 1652   int offset = 13; // movq r10,#addr; callq (r10)
 1653   if (this->ideal_Opcode() != Op_CallLeafVector) {
 1654     offset += clear_avx_size();
 1655   }
 1656   return offset;
 1657 }

 1658 //
 1659 // Compute padding required for nodes which need alignment
 1660 //
 1661 
 1662 // The address of the call instruction needs to be 4-byte aligned to
 1663 // ensure that it does not span a cache line so that it can be patched.
 1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 1665 {
 1666   current_offset += clear_avx_size(); // skip vzeroupper
 1667   current_offset += 1; // skip call opcode byte
 1668   return align_up(current_offset, alignment_required()) - current_offset;
 1669 }
 1670 
 1671 // The address of the call instruction needs to be 4-byte aligned to
 1672 // ensure that it does not span a cache line so that it can be patched.
 1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 1674 {
 1675   current_offset += clear_avx_size(); // skip vzeroupper
 1676   current_offset += 11; // skip movq instruction + call opcode byte
 1677   return align_up(current_offset, alignment_required()) - current_offset;

 1864     st->print("\n\t");
 1865     st->print("# stack alignment check");
 1866 #endif
 1867   }
 1868   if (C->stub_function() != nullptr) {
 1869     st->print("\n\t");
 1870     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
 1871     st->print("\n\t");
 1872     st->print("je      fast_entry\t");
 1873     st->print("\n\t");
 1874     st->print("call    #nmethod_entry_barrier_stub\t");
 1875     st->print("\n\tfast_entry:");
 1876   }
 1877   st->cr();
 1878 }
 1879 #endif
 1880 
 1881 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1882   Compile* C = ra_->C;
 1883 
 1884   int framesize = C->output()->frame_size_in_bytes();
 1885   int bangsize = C->output()->bang_size_in_bytes();
 1886 
 1887   if (C->clinit_barrier_on_entry()) {
 1888     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
 1889     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
 1890 
 1891     Label L_skip_barrier;
 1892     Register klass = rscratch1;
 1893 
 1894     __ mov_metadata(klass, C->method()->holder()->constant_encoding());
 1895     __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
 1896 
 1897     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
 1898 
 1899     __ bind(L_skip_barrier);
 1900   }
 1901 
 1902   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);


 1903 
 1904   C->output()->set_frame_complete(__ offset());
 1905 
 1906   if (C->has_mach_constant_base_node()) {
 1907     // NOTE: We set the table base offset here because users might be
 1908     // emitted before MachConstantBaseNode.
 1909     ConstantTable& constant_table = C->output()->constant_table();
 1910     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1911   }
 1912 }
 1913 
 1914 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 1915 {
 1916   return MachNode::size(ra_); // too many variables; just compute it
 1917                               // the hard way
 1918 }
 1919 
 1920 int MachPrologNode::reloc() const
 1921 {
 1922   return 0; // a large enough number
 1923 }
 1924 
 1925 //=============================================================================
 1926 #ifndef PRODUCT
 1927 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1928 {
 1929   Compile* C = ra_->C;
 1930   if (generate_vzeroupper(C)) {
 1931     st->print("vzeroupper");
 1932     st->cr(); st->print("\t");
 1933   }
 1934 
 1935   int framesize = C->output()->frame_size_in_bytes();
 1936   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1937   // Remove word for return adr already pushed
 1938   // and RBP

 1946   st->print_cr("popq    rbp");
 1947   if (do_polling() && C->is_method_compilation()) {
 1948     st->print("\t");
 1949     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
 1950                  "ja      #safepoint_stub\t"
 1951                  "# Safepoint: poll for GC");
 1952   }
 1953 }
 1954 #endif
 1955 
 1956 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1957 {
 1958   Compile* C = ra_->C;
 1959 
 1960   if (generate_vzeroupper(C)) {
 1961     // Clear upper bits of YMM registers when current compiled code uses
 1962     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1963     __ vzeroupper();
 1964   }
 1965 
 1966   int framesize = C->output()->frame_size_in_bytes();
 1967   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1968   // Remove word for return adr already pushed
 1969   // and RBP
 1970   framesize -= 2*wordSize;
 1971 
 1972   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 1973 
 1974   if (framesize) {
 1975     __ addq(rsp, framesize);
 1976   }
 1977 
 1978   __ popq(rbp);
 1979 
 1980   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1981     __ reserved_stack_check();
 1982   }
 1983 
 1984   if (do_polling() && C->is_method_compilation()) {
 1985     Label dummy_label;
 1986     Label* code_stub = &dummy_label;
 1987     if (!C->output()->in_scratch_emit_size()) {
 1988       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1989       C->output()->add_stub(stub);
 1990       code_stub = &stub->entry();
 1991     }
 1992     __ relocate(relocInfo::poll_return_type);
 1993     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
 1994   }
 1995 }
 1996 
 1997 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
 1998 {
 1999   return MachNode::size(ra_); // too many variables; just compute it
 2000                               // the hard way
 2001 }
 2002 
 2003 int MachEpilogNode::reloc() const
 2004 {
 2005   return 2; // a large enough number
 2006 }
 2007 
 2008 const Pipeline* MachEpilogNode::pipeline() const
 2009 {
 2010   return MachNode::pipeline_class();
 2011 }
 2012 
 2013 //=============================================================================
 2014 
 2015 enum RC {
 2016   rc_bad,
 2017   rc_int,
 2018   rc_kreg,
 2019   rc_float,
 2020   rc_stack
 2021 };
 2022 

 2584 #endif
 2585 
 2586 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2587 {
 2588   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2589   int reg = ra_->get_encode(this);
 2590 
 2591   __ lea(as_Register(reg), Address(rsp, offset));
 2592 }
 2593 
 2594 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 2595 {
 2596   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2597   if (ra_->get_encode(this) > 15) {
 2598     return (offset < 0x80) ? 6 : 9; // REX2
 2599   } else {
 2600     return (offset < 0x80) ? 5 : 8; // REX
 2601   }
 2602 }
 2603 











































 2604 //=============================================================================
 2605 #ifndef PRODUCT
 2606 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2607 {
 2608   if (UseCompressedClassPointers) {
 2609     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2610     st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2611   } else {
 2612     st->print_cr("movq    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2613     st->print_cr("\tcmpq    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2614   }
 2615   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 2616 }
 2617 #endif
 2618 
 2619 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2620 {
 2621   __ ic_check(InteriorEntryAlignment);
 2622 }
 2623 
 2624 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 2625 {
 2626   return MachNode::size(ra_); // too many variables; just compute it
 2627                               // the hard way
 2628 }
 2629 
 2630 
 2631 //=============================================================================
 2632 
 2633 bool Matcher::supports_vector_calling_convention(void) {
 2634   return EnableVectorSupport;
 2635 }
 2636 
 2637 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
 2638   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
 2639 }
 2640 
 2641 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
 2642   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
 2643 }
 2644 
 2645 #ifdef ASSERT
 2646 static bool is_ndd_demotable(const MachNode* mdef) {
 2647   return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
 2648 }
 2649 #endif

 4583     }
 4584     __ post_call_nop();
 4585   %}
 4586 
 4587   enc_class Java_Dynamic_Call(method meth) %{
 4588     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 4589     __ post_call_nop();
 4590   %}
 4591 
 4592   enc_class call_epilog %{
 4593     if (VerifyStackAtCalls) {
 4594       // Check that stack depth is unchanged: find majik cookie on stack
 4595       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
 4596       Label L;
 4597       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
 4598       __ jccb(Assembler::equal, L);
 4599       // Die if stack mismatch
 4600       __ int3();
 4601       __ bind(L);
 4602     }

































 4603   %}
 4604 
 4605 %}
 4606 
 4607 //----------FRAME--------------------------------------------------------------
 4608 // Definition of frame structure and management information.
 4609 //
 4610 //  S T A C K   L A Y O U T    Allocators stack-slot number
 4611 //                             |   (to get allocators register number
 4612 //  G  Owned by    |        |  v    add OptoReg::stack0())
 4613 //  r   CALLER     |        |
 4614 //  o     |        +--------+      pad to even-align allocators stack-slot
 4615 //  w     V        |  pad0  |        numbers; owned by CALLER
 4616 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 4617 //  h     ^        |   in   |  5
 4618 //        |        |  args  |  4   Holes in incoming args owned by SELF
 4619 //  |     |        |        |  3
 4620 //  |     |        +--------+
 4621 //  V     |        | old out|      Empty on Intel, window on Sparc
 4622 //        |    old |preserve|      Must be even aligned.

 5761   %}
 5762 %}
 5763 
 5764 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5765 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 5766 %{
 5767   constraint(ALLOC_IN_RC(ptr_reg));
 5768   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5769   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 5770 
 5771   op_cost(10);
 5772   format %{"[$reg + $off + $idx << $scale]" %}
 5773   interface(MEMORY_INTER) %{
 5774     base($reg);
 5775     index($idx);
 5776     scale($scale);
 5777     disp($off);
 5778   %}
 5779 %}
 5780 
















 5781 // Indirect Narrow Oop Plus Offset Operand
 5782 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 5783 // we can't free r12 even with CompressedOops::base() == nullptr.
 5784 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 5785   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5786   constraint(ALLOC_IN_RC(ptr_reg));
 5787   match(AddP (DecodeN reg) off);
 5788 
 5789   op_cost(10);
 5790   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 5791   interface(MEMORY_INTER) %{
 5792     base(0xc); // R12
 5793     index($reg);
 5794     scale(0x3);
 5795     disp($off);
 5796   %}
 5797 %}
 5798 
 5799 // Indirect Memory Operand
 5800 operand indirectNarrow(rRegN reg)

 6270 %}
 6271 
 6272 // Replaces legVec during post-selection cleanup. See above.
 6273 operand legVecZ() %{
 6274   constraint(ALLOC_IN_RC(vectorz_reg_legacy));
 6275   match(VecZ);
 6276 
 6277   format %{ %}
 6278   interface(REG_INTER);
 6279 %}
 6280 
 6281 //----------OPERAND CLASSES----------------------------------------------------
 6282 // Operand Classes are groups of operands that are used as to simplify
 6283 // instruction definitions by not requiring the AD writer to specify separate
 6284 // instructions for every form of operand when the instruction accepts
 6285 // multiple operand types with the same basic encoding and format.  The classic
 6286 // case of this is memory operands.
 6287 
 6288 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 6289                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 6290                indCompressedOopOffset,
 6291                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 6292                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 6293                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 6294 
 6295 //----------PIPELINE-----------------------------------------------------------
 6296 // Rules which define the behavior of the target architectures pipeline.
 6297 pipeline %{
 6298 
 6299 //----------ATTRIBUTES---------------------------------------------------------
 6300 attributes %{
 6301   variable_size_instructions;        // Fixed size instructions
 6302   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 6303   instruction_unit_size = 1;         // An instruction is 1 bytes long
 6304   instruction_fetch_unit_size = 16;  // The processor fetches one line
 6305   instruction_fetch_units = 1;       // of 16 bytes
 6306 %}
 6307 
 6308 //----------RESOURCES----------------------------------------------------------
 6309 // Resources are the functional units available to the machine
 6310 

 8911   format %{ "MEMBAR-storestore (empty encoding)" %}
 8912   ins_encode( );
 8913   ins_pipe(empty);
 8914 %}
 8915 
 8916 //----------Move Instructions--------------------------------------------------
 8917 
 8918 instruct castX2P(rRegP dst, rRegL src)
 8919 %{
 8920   match(Set dst (CastX2P src));
 8921 
 8922   format %{ "movq    $dst, $src\t# long->ptr" %}
 8923   ins_encode %{
 8924     if ($dst$$reg != $src$$reg) {
 8925       __ movptr($dst$$Register, $src$$Register);
 8926     }
 8927   %}
 8928   ins_pipe(ialu_reg_reg); // XXX
 8929 %}
 8930 


























 8931 instruct castP2X(rRegL dst, rRegP src)
 8932 %{
 8933   match(Set dst (CastP2X src));
 8934 
 8935   format %{ "movq    $dst, $src\t# ptr -> long" %}
 8936   ins_encode %{
 8937     if ($dst$$reg != $src$$reg) {
 8938       __ movptr($dst$$Register, $src$$Register);
 8939     }
 8940   %}
 8941   ins_pipe(ialu_reg_reg); // XXX
 8942 %}
 8943 
 8944 // Convert oop into int for vectors alignment masking
 8945 instruct convP2I(rRegI dst, rRegP src)
 8946 %{
 8947   match(Set dst (ConvL2I (CastP2X src)));
 8948 
 8949   format %{ "movl    $dst, $src\t# ptr -> int" %}
 8950   ins_encode %{

15198   effect(DEF dst, USE src);
15199   ins_cost(100);
15200   format %{ "movd    $dst,$src\t# MoveI2F" %}
15201   ins_encode %{
15202     __ movdl($dst$$XMMRegister, $src$$Register);
15203   %}
15204   ins_pipe( pipe_slow );
15205 %}
15206 
15207 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15208   match(Set dst (MoveL2D src));
15209   effect(DEF dst, USE src);
15210   ins_cost(100);
15211   format %{ "movd    $dst,$src\t# MoveL2D" %}
15212   ins_encode %{
15213      __ movdq($dst$$XMMRegister, $src$$Register);
15214   %}
15215   ins_pipe( pipe_slow );
15216 %}
15217 

15218 // Fast clearing of an array
15219 // Small non-constant lenght ClearArray for non-AVX512 targets.
15220 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15221                   Universe dummy, rFlagsReg cr)
15222 %{
15223   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
15224   match(Set dummy (ClearArray cnt base));
15225   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);





















































































































15226 
15227   format %{ $$template
15228     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15229     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15230     $$emit$$"jg      LARGE\n\t"
15231     $$emit$$"dec     rcx\n\t"
15232     $$emit$$"js      DONE\t# Zero length\n\t"
15233     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15234     $$emit$$"dec     rcx\n\t"
15235     $$emit$$"jge     LOOP\n\t"
15236     $$emit$$"jmp     DONE\n\t"
15237     $$emit$$"# LARGE:\n\t"
15238     if (UseFastStosb) {
15239        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15240        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15241     } else if (UseXMMForObjInit) {
15242        $$emit$$"mov     rdi,rax\n\t"
15243        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15244        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15245        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

15253        $$emit$$"jl      L_tail\n\t"
15254        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15255        $$emit$$"add     0x20,rax\n\t"
15256        $$emit$$"sub     0x4,rcx\n\t"
15257        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15258        $$emit$$"add     0x4,rcx\n\t"
15259        $$emit$$"jle     L_end\n\t"
15260        $$emit$$"dec     rcx\n\t"
15261        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15262        $$emit$$"vmovq   xmm0,(rax)\n\t"
15263        $$emit$$"add     0x8,rax\n\t"
15264        $$emit$$"dec     rcx\n\t"
15265        $$emit$$"jge     L_sloop\n\t"
15266        $$emit$$"# L_end:\n\t"
15267     } else {
15268        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15269     }
15270     $$emit$$"# DONE"
15271   %}
15272   ins_encode %{
15273     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15274                  $tmp$$XMMRegister, false, knoreg);
15275   %}
15276   ins_pipe(pipe_slow);
15277 %}
15278 
15279 // Small non-constant length ClearArray for AVX512 targets.
15280 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15281                        Universe dummy, rFlagsReg cr)
15282 %{
15283   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15284   match(Set dummy (ClearArray cnt base));
15285   ins_cost(125);
15286   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15287 
15288   format %{ $$template
15289     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15290     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15291     $$emit$$"jg      LARGE\n\t"
15292     $$emit$$"dec     rcx\n\t"
15293     $$emit$$"js      DONE\t# Zero length\n\t"
15294     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15295     $$emit$$"dec     rcx\n\t"
15296     $$emit$$"jge     LOOP\n\t"
15297     $$emit$$"jmp     DONE\n\t"
15298     $$emit$$"# LARGE:\n\t"
15299     if (UseFastStosb) {
15300        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15301        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15302     } else if (UseXMMForObjInit) {
15303        $$emit$$"mov     rdi,rax\n\t"
15304        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15305        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15306        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

15314        $$emit$$"jl      L_tail\n\t"
15315        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15316        $$emit$$"add     0x20,rax\n\t"
15317        $$emit$$"sub     0x4,rcx\n\t"
15318        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15319        $$emit$$"add     0x4,rcx\n\t"
15320        $$emit$$"jle     L_end\n\t"
15321        $$emit$$"dec     rcx\n\t"
15322        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15323        $$emit$$"vmovq   xmm0,(rax)\n\t"
15324        $$emit$$"add     0x8,rax\n\t"
15325        $$emit$$"dec     rcx\n\t"
15326        $$emit$$"jge     L_sloop\n\t"
15327        $$emit$$"# L_end:\n\t"
15328     } else {
15329        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15330     }
15331     $$emit$$"# DONE"
15332   %}
15333   ins_encode %{
15334     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15335                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
15336   %}
15337   ins_pipe(pipe_slow);
15338 %}
15339 
15340 // Large non-constant length ClearArray for non-AVX512 targets.
15341 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15342                         Universe dummy, rFlagsReg cr)
15343 %{
15344   predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15345   match(Set dummy (ClearArray cnt base));
15346   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
































































































15347 
15348   format %{ $$template
15349     if (UseFastStosb) {
15350        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15351        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15352        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15353     } else if (UseXMMForObjInit) {
15354        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15355        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15356        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15357        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15358        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15359        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15360        $$emit$$"add     0x40,rax\n\t"
15361        $$emit$$"# L_zero_64_bytes:\n\t"
15362        $$emit$$"sub     0x8,rcx\n\t"
15363        $$emit$$"jge     L_loop\n\t"
15364        $$emit$$"add     0x4,rcx\n\t"
15365        $$emit$$"jl      L_tail\n\t"
15366        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15367        $$emit$$"add     0x20,rax\n\t"
15368        $$emit$$"sub     0x4,rcx\n\t"
15369        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15370        $$emit$$"add     0x4,rcx\n\t"
15371        $$emit$$"jle     L_end\n\t"
15372        $$emit$$"dec     rcx\n\t"
15373        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15374        $$emit$$"vmovq   xmm0,(rax)\n\t"
15375        $$emit$$"add     0x8,rax\n\t"
15376        $$emit$$"dec     rcx\n\t"
15377        $$emit$$"jge     L_sloop\n\t"
15378        $$emit$$"# L_end:\n\t"
15379     } else {
15380        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15381        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15382     }
15383   %}
15384   ins_encode %{
15385     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15386                  $tmp$$XMMRegister, true, knoreg);
15387   %}
15388   ins_pipe(pipe_slow);
15389 %}
15390 
15391 // Large non-constant length ClearArray for AVX512 targets.
15392 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15393                              Universe dummy, rFlagsReg cr)
15394 %{
15395   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15396   match(Set dummy (ClearArray cnt base));
15397   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15398 
15399   format %{ $$template
15400     if (UseFastStosb) {
15401        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15402        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15403        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15404     } else if (UseXMMForObjInit) {
15405        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15406        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15407        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15408        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15409        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15410        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15411        $$emit$$"add     0x40,rax\n\t"
15412        $$emit$$"# L_zero_64_bytes:\n\t"
15413        $$emit$$"sub     0x8,rcx\n\t"
15414        $$emit$$"jge     L_loop\n\t"
15415        $$emit$$"add     0x4,rcx\n\t"
15416        $$emit$$"jl      L_tail\n\t"
15417        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15418        $$emit$$"add     0x20,rax\n\t"
15419        $$emit$$"sub     0x4,rcx\n\t"
15420        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15421        $$emit$$"add     0x4,rcx\n\t"
15422        $$emit$$"jle     L_end\n\t"
15423        $$emit$$"dec     rcx\n\t"
15424        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15425        $$emit$$"vmovq   xmm0,(rax)\n\t"
15426        $$emit$$"add     0x8,rax\n\t"
15427        $$emit$$"dec     rcx\n\t"
15428        $$emit$$"jge     L_sloop\n\t"
15429        $$emit$$"# L_end:\n\t"
15430     } else {
15431        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15432        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15433     }
15434   %}
15435   ins_encode %{
15436     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15437                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
15438   %}
15439   ins_pipe(pipe_slow);
15440 %}
15441 
15442 // Small constant length ClearArray for AVX512 targets.
15443 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15444 %{
15445   predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15446   match(Set dummy (ClearArray cnt base));

15447   ins_cost(100);
15448   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15449   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
15450   ins_encode %{
15451    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15452   %}
15453   ins_pipe(pipe_slow);
15454 %}
15455 
15456 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15457                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15458 %{
15459   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15460   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15461   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15462 
15463   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15464   ins_encode %{
15465     __ string_compare($str1$$Register, $str2$$Register,
15466                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15467                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15468   %}
15469   ins_pipe( pipe_slow );
15470 %}
15471 

17309   effect(USE meth);
17310 
17311   ins_cost(300);
17312   format %{ "call_leaf,runtime " %}
17313   ins_encode(clear_avx, Java_To_Runtime(meth));
17314   ins_pipe(pipe_slow);
17315 %}
17316 
17317 // Call runtime without safepoint and with vector arguments
17318 instruct CallLeafDirectVector(method meth)
17319 %{
17320   match(CallLeafVector);
17321   effect(USE meth);
17322 
17323   ins_cost(300);
17324   format %{ "call_leaf,vector " %}
17325   ins_encode(Java_To_Runtime(meth));
17326   ins_pipe(pipe_slow);
17327 %}
17328 
















17329 // Call runtime without safepoint
17330 instruct CallLeafNoFPDirect(method meth)
17331 %{

17332   match(CallLeafNoFP);
17333   effect(USE meth);
17334 
17335   ins_cost(300);
17336   format %{ "call_leaf_nofp,runtime " %}
17337   ins_encode(clear_avx, Java_To_Runtime(meth));
17338   ins_pipe(pipe_slow);
17339 %}
17340 
17341 // Return Instruction
17342 // Remove the return address & jump to it.
17343 // Notice: We always emit a nop after a ret to make sure there is room
17344 // for safepoint patching
17345 instruct Ret()
17346 %{
17347   match(Return);
17348 
17349   format %{ "ret" %}
17350   ins_encode %{
17351     __ ret(0);

 1632 }
 1633 
 1634 // !!!!! Special hack to get all types of calls to specify the byte offset
 1635 //       from the start of the call to the point where the return address
 1636 //       will point.
 1637 int MachCallStaticJavaNode::ret_addr_offset()
 1638 {
 1639   int offset = 5; // 5 bytes from start of call to where return address points
 1640   offset += clear_avx_size();
 1641   return offset;
 1642 }
 1643 
 1644 int MachCallDynamicJavaNode::ret_addr_offset()
 1645 {
 1646   int offset = 15; // 15 bytes from start of call to where return address points
 1647   offset += clear_avx_size();
 1648   return offset;
 1649 }
 1650 
 1651 int MachCallRuntimeNode::ret_addr_offset() {
 1652   if (_entry_point == nullptr) {
 1653     // CallLeafNoFPInDirect
 1654     return 3; // callq (register)
 1655   }
 1656   int offset = 13; // movq r10,#addr; callq (r10)
 1657   if (this->ideal_Opcode() != Op_CallLeafVector) {
 1658     offset += clear_avx_size();
 1659   }
 1660   return offset;
 1661 }
 1662 
 1663 //
 1664 // Compute padding required for nodes which need alignment
 1665 //
 1666 
 1667 // The address of the call instruction needs to be 4-byte aligned to
 1668 // ensure that it does not span a cache line so that it can be patched.
 1669 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 1670 {
 1671   current_offset += clear_avx_size(); // skip vzeroupper
 1672   current_offset += 1; // skip call opcode byte
 1673   return align_up(current_offset, alignment_required()) - current_offset;
 1674 }
 1675 
 1676 // The address of the call instruction needs to be 4-byte aligned to
 1677 // ensure that it does not span a cache line so that it can be patched.
 1678 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 1679 {
 1680   current_offset += clear_avx_size(); // skip vzeroupper
 1681   current_offset += 11; // skip movq instruction + call opcode byte
 1682   return align_up(current_offset, alignment_required()) - current_offset;

 1869     st->print("\n\t");
 1870     st->print("# stack alignment check");
 1871 #endif
 1872   }
 1873   if (C->stub_function() != nullptr) {
 1874     st->print("\n\t");
 1875     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
 1876     st->print("\n\t");
 1877     st->print("je      fast_entry\t");
 1878     st->print("\n\t");
 1879     st->print("call    #nmethod_entry_barrier_stub\t");
 1880     st->print("\n\tfast_entry:");
 1881   }
 1882   st->cr();
 1883 }
 1884 #endif
 1885 
 1886 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1887   Compile* C = ra_->C;
 1888 
 1889   __ verified_entry(C);





 1890 
 1891   if (ra_->C->stub_function() == nullptr) {
 1892     __ entry_barrier();







 1893   }
 1894 
 1895   if (!Compile::current()->output()->in_scratch_emit_size()) {
 1896     __ bind(*_verified_entry);
 1897   }
 1898 
 1899   C->output()->set_frame_complete(__ offset());
 1900 
 1901   if (C->has_mach_constant_base_node()) {
 1902     // NOTE: We set the table base offset here because users might be
 1903     // emitted before MachConstantBaseNode.
 1904     ConstantTable& constant_table = C->output()->constant_table();
 1905     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1906   }
 1907 }
 1908 





 1909 
 1910 int MachPrologNode::reloc() const
 1911 {
 1912   return 0; // a large enough number
 1913 }
 1914 
 1915 //=============================================================================
 1916 #ifndef PRODUCT
 1917 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1918 {
 1919   Compile* C = ra_->C;
 1920   if (generate_vzeroupper(C)) {
 1921     st->print("vzeroupper");
 1922     st->cr(); st->print("\t");
 1923   }
 1924 
 1925   int framesize = C->output()->frame_size_in_bytes();
 1926   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1927   // Remove word for return adr already pushed
 1928   // and RBP

 1936   st->print_cr("popq    rbp");
 1937   if (do_polling() && C->is_method_compilation()) {
 1938     st->print("\t");
 1939     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
 1940                  "ja      #safepoint_stub\t"
 1941                  "# Safepoint: poll for GC");
 1942   }
 1943 }
 1944 #endif
 1945 
 1946 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1947 {
 1948   Compile* C = ra_->C;
 1949 
 1950   if (generate_vzeroupper(C)) {
 1951     // Clear upper bits of YMM registers when current compiled code uses
 1952     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1953     __ vzeroupper();
 1954   }
 1955 
 1956   // Subtract two words to account for return address and rbp
 1957   int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
 1958   __ remove_frame(initial_framesize, C->needs_stack_repair());










 1959 
 1960   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1961     __ reserved_stack_check();
 1962   }
 1963 
 1964   if (do_polling() && C->is_method_compilation()) {
 1965     Label dummy_label;
 1966     Label* code_stub = &dummy_label;
 1967     if (!C->output()->in_scratch_emit_size()) {
 1968       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1969       C->output()->add_stub(stub);
 1970       code_stub = &stub->entry();
 1971     }
 1972     __ relocate(relocInfo::poll_return_type);
 1973     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
 1974   }
 1975 }
 1976 






 1977 int MachEpilogNode::reloc() const
 1978 {
 1979   return 2; // a large enough number
 1980 }
 1981 
 1982 const Pipeline* MachEpilogNode::pipeline() const
 1983 {
 1984   return MachNode::pipeline_class();
 1985 }
 1986 
 1987 //=============================================================================
 1988 
 1989 enum RC {
 1990   rc_bad,
 1991   rc_int,
 1992   rc_kreg,
 1993   rc_float,
 1994   rc_stack
 1995 };
 1996 

 2558 #endif
 2559 
 2560 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2561 {
 2562   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2563   int reg = ra_->get_encode(this);
 2564 
 2565   __ lea(as_Register(reg), Address(rsp, offset));
 2566 }
 2567 
 2568 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 2569 {
 2570   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2571   if (ra_->get_encode(this) > 15) {
 2572     return (offset < 0x80) ? 6 : 9; // REX2
 2573   } else {
 2574     return (offset < 0x80) ? 5 : 8; // REX
 2575   }
 2576 }
 2577 
 2578 //=============================================================================
 2579 #ifndef PRODUCT
 2580 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2581 {
 2582   st->print_cr("MachVEPNode");
 2583 }
 2584 #endif
 2585 
 2586 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2587 {
 2588   CodeBuffer* cbuf = masm->code();
 2589   uint insts_size = cbuf->insts_size();
 2590   if (!_verified) {
 2591     __ ic_check(1);
 2592   } else {
 2593     // TODO 8284443 Avoid creation of temporary frame
 2594     if (ra_->C->stub_function() == nullptr) {
 2595       __ verified_entry(ra_->C, 0);
 2596       __ entry_barrier();
 2597       int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
 2598       __ remove_frame(initial_framesize, false);
 2599     }
 2600     // Unpack inline type args passed as oop and then jump to
 2601     // the verified entry point (skipping the unverified entry).
 2602     int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
 2603     // Emit code for verified entry and save increment for stack repair on return
 2604     __ verified_entry(ra_->C, sp_inc);
 2605     if (Compile::current()->output()->in_scratch_emit_size()) {
 2606       Label dummy_verified_entry;
 2607       __ jmp(dummy_verified_entry);
 2608     } else {
 2609       __ jmp(*_verified_entry);
 2610     }
 2611   }
 2612   /* WARNING these NOPs are critical so that verified entry point is properly
 2613      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
 2614   int nops_cnt = 4 - ((cbuf->insts_size() - insts_size) & 0x3);
 2615   nops_cnt &= 0x3; // Do not add nops if code is aligned.
 2616   if (nops_cnt > 0) {
 2617     __ nop(nops_cnt);
 2618   }
 2619 }
 2620 
 2621 //=============================================================================
 2622 #ifndef PRODUCT
 2623 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2624 {
 2625   if (UseCompressedClassPointers) {
 2626     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2627     st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2628   } else {
 2629     st->print_cr("movq    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2630     st->print_cr("\tcmpq    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2631   }
 2632   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 2633 }
 2634 #endif
 2635 
 2636 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2637 {
 2638   __ ic_check(InteriorEntryAlignment);
 2639 }
 2640 






 2641 
 2642 //=============================================================================
 2643 
 2644 bool Matcher::supports_vector_calling_convention(void) {
 2645   return EnableVectorSupport;
 2646 }
 2647 
 2648 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
 2649   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
 2650 }
 2651 
 2652 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
 2653   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
 2654 }
 2655 
 2656 #ifdef ASSERT
 2657 static bool is_ndd_demotable(const MachNode* mdef) {
 2658   return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
 2659 }
 2660 #endif

 4594     }
 4595     __ post_call_nop();
 4596   %}
 4597 
 4598   enc_class Java_Dynamic_Call(method meth) %{
 4599     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 4600     __ post_call_nop();
 4601   %}
 4602 
 4603   enc_class call_epilog %{
 4604     if (VerifyStackAtCalls) {
 4605       // Check that stack depth is unchanged: find majik cookie on stack
 4606       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
 4607       Label L;
 4608       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
 4609       __ jccb(Assembler::equal, L);
 4610       // Die if stack mismatch
 4611       __ int3();
 4612       __ bind(L);
 4613     }
 4614     if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) {
 4615       // The last return value is not set by the callee but used to pass the null marker to compiled code.
 4616       // Search for the corresponding projection, get the register and emit code that initialized it.
 4617       uint con = (tf()->range_cc()->cnt() - 1);
 4618       for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
 4619         ProjNode* proj = fast_out(i)->as_Proj();
 4620         if (proj->_con == con) {
 4621           // Set null marker if rax is non-null (a non-null value is returned buffered or scalarized)
 4622           OptoReg::Name optoReg = ra_->get_reg_first(proj);
 4623           VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
 4624           Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
 4625           __ testq(rax, rax);
 4626           __ setb(Assembler::notZero, toReg);
 4627           __ movzbl(toReg, toReg);
 4628           if (reg->is_stack()) {
 4629             int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
 4630             __ movq(Address(rsp, st_off), toReg);
 4631           }
 4632           break;
 4633         }
 4634       }
 4635       if (return_value_is_used()) {
 4636         // An inline type is returned as fields in multiple registers.
 4637         // Rax either contains an oop if the inline type is buffered or a pointer
 4638         // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax
 4639         // if the lowest bit is set to allow C2 to use the oop after null checking.
 4640         // rax &= (rax & 1) - 1
 4641         __ movptr(rscratch1, rax);
 4642         __ andptr(rscratch1, 0x1);
 4643         __ subptr(rscratch1, 0x1);
 4644         __ andptr(rax, rscratch1);
 4645       }
 4646     }
 4647   %}
 4648 
 4649 %}
 4650 
 4651 //----------FRAME--------------------------------------------------------------
 4652 // Definition of frame structure and management information.
 4653 //
 4654 //  S T A C K   L A Y O U T    Allocators stack-slot number
 4655 //                             |   (to get allocators register number
 4656 //  G  Owned by    |        |  v    add OptoReg::stack0())
 4657 //  r   CALLER     |        |
 4658 //  o     |        +--------+      pad to even-align allocators stack-slot
 4659 //  w     V        |  pad0  |        numbers; owned by CALLER
 4660 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 4661 //  h     ^        |   in   |  5
 4662 //        |        |  args  |  4   Holes in incoming args owned by SELF
 4663 //  |     |        |        |  3
 4664 //  |     |        +--------+
 4665 //  V     |        | old out|      Empty on Intel, window on Sparc
 4666 //        |    old |preserve|      Must be even aligned.

 5805   %}
 5806 %}
 5807 
 5808 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5809 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 5810 %{
 5811   constraint(ALLOC_IN_RC(ptr_reg));
 5812   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5813   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 5814 
 5815   op_cost(10);
 5816   format %{"[$reg + $off + $idx << $scale]" %}
 5817   interface(MEMORY_INTER) %{
 5818     base($reg);
 5819     index($idx);
 5820     scale($scale);
 5821     disp($off);
 5822   %}
 5823 %}
 5824 
 5825 // Indirect Narrow Oop Operand
 5826 operand indCompressedOop(rRegN reg) %{
 5827   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5828   constraint(ALLOC_IN_RC(ptr_reg));
 5829   match(DecodeN reg);
 5830 
 5831   op_cost(10);
 5832   format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
 5833   interface(MEMORY_INTER) %{
 5834     base(0xc); // R12
 5835     index($reg);
 5836     scale(0x3);
 5837     disp(0x0);
 5838   %}
 5839 %}
 5840 
 5841 // Indirect Narrow Oop Plus Offset Operand
 5842 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 5843 // we can't free r12 even with CompressedOops::base() == nullptr.
 5844 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 5845   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5846   constraint(ALLOC_IN_RC(ptr_reg));
 5847   match(AddP (DecodeN reg) off);
 5848 
 5849   op_cost(10);
 5850   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 5851   interface(MEMORY_INTER) %{
 5852     base(0xc); // R12
 5853     index($reg);
 5854     scale(0x3);
 5855     disp($off);
 5856   %}
 5857 %}
 5858 
 5859 // Indirect Memory Operand
 5860 operand indirectNarrow(rRegN reg)

 6330 %}
 6331 
 6332 // Replaces legVec during post-selection cleanup. See above.
 6333 operand legVecZ() %{
 6334   constraint(ALLOC_IN_RC(vectorz_reg_legacy));
 6335   match(VecZ);
 6336 
 6337   format %{ %}
 6338   interface(REG_INTER);
 6339 %}
 6340 
 6341 //----------OPERAND CLASSES----------------------------------------------------
 6342 // Operand Classes are groups of operands that are used as to simplify
 6343 // instruction definitions by not requiring the AD writer to specify separate
 6344 // instructions for every form of operand when the instruction accepts
 6345 // multiple operand types with the same basic encoding and format.  The classic
 6346 // case of this is memory operands.
 6347 
 6348 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 6349                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 6350                indCompressedOop, indCompressedOopOffset,
 6351                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 6352                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 6353                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 6354 
 6355 //----------PIPELINE-----------------------------------------------------------
 6356 // Rules which define the behavior of the target architectures pipeline.
 6357 pipeline %{
 6358 
 6359 //----------ATTRIBUTES---------------------------------------------------------
 6360 attributes %{
 6361   variable_size_instructions;        // Fixed size instructions
 6362   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 6363   instruction_unit_size = 1;         // An instruction is 1 bytes long
 6364   instruction_fetch_unit_size = 16;  // The processor fetches one line
 6365   instruction_fetch_units = 1;       // of 16 bytes
 6366 %}
 6367 
 6368 //----------RESOURCES----------------------------------------------------------
 6369 // Resources are the functional units available to the machine
 6370 

 8971   format %{ "MEMBAR-storestore (empty encoding)" %}
 8972   ins_encode( );
 8973   ins_pipe(empty);
 8974 %}
 8975 
 8976 //----------Move Instructions--------------------------------------------------
 8977 
 8978 instruct castX2P(rRegP dst, rRegL src)
 8979 %{
 8980   match(Set dst (CastX2P src));
 8981 
 8982   format %{ "movq    $dst, $src\t# long->ptr" %}
 8983   ins_encode %{
 8984     if ($dst$$reg != $src$$reg) {
 8985       __ movptr($dst$$Register, $src$$Register);
 8986     }
 8987   %}
 8988   ins_pipe(ialu_reg_reg); // XXX
 8989 %}
 8990 
 8991 instruct castI2N(rRegN dst, rRegI src)
 8992 %{
 8993   match(Set dst (CastI2N src));
 8994 
 8995   format %{ "movq    $dst, $src\t# int -> narrow ptr" %}
 8996   ins_encode %{
 8997     if ($dst$$reg != $src$$reg) {
 8998       __ movl($dst$$Register, $src$$Register);
 8999     }
 9000   %}
 9001   ins_pipe(ialu_reg_reg); // XXX
 9002 %}
 9003 
 9004 instruct castN2X(rRegL dst, rRegN src)
 9005 %{
 9006   match(Set dst (CastP2X src));
 9007 
 9008   format %{ "movq    $dst, $src\t# ptr -> long" %}
 9009   ins_encode %{
 9010     if ($dst$$reg != $src$$reg) {
 9011       __ movptr($dst$$Register, $src$$Register);
 9012     }
 9013   %}
 9014   ins_pipe(ialu_reg_reg); // XXX
 9015 %}
 9016 
 9017 instruct castP2X(rRegL dst, rRegP src)
 9018 %{
 9019   match(Set dst (CastP2X src));
 9020 
 9021   format %{ "movq    $dst, $src\t# ptr -> long" %}
 9022   ins_encode %{
 9023     if ($dst$$reg != $src$$reg) {
 9024       __ movptr($dst$$Register, $src$$Register);
 9025     }
 9026   %}
 9027   ins_pipe(ialu_reg_reg); // XXX
 9028 %}
 9029 
 9030 // Convert oop into int for vectors alignment masking
 9031 instruct convP2I(rRegI dst, rRegP src)
 9032 %{
 9033   match(Set dst (ConvL2I (CastP2X src)));
 9034 
 9035   format %{ "movl    $dst, $src\t# ptr -> int" %}
 9036   ins_encode %{

15284   effect(DEF dst, USE src);
15285   ins_cost(100);
15286   format %{ "movd    $dst,$src\t# MoveI2F" %}
15287   ins_encode %{
15288     __ movdl($dst$$XMMRegister, $src$$Register);
15289   %}
15290   ins_pipe( pipe_slow );
15291 %}
15292 
15293 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15294   match(Set dst (MoveL2D src));
15295   effect(DEF dst, USE src);
15296   ins_cost(100);
15297   format %{ "movd    $dst,$src\t# MoveL2D" %}
15298   ins_encode %{
15299      __ movdq($dst$$XMMRegister, $src$$Register);
15300   %}
15301   ins_pipe( pipe_slow );
15302 %}
15303 
15304 
15305 // Fast clearing of an array
15306 // Small non-constant lenght ClearArray for non-AVX512 targets.
15307 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15308                   Universe dummy, rFlagsReg cr)
15309 %{
15310   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15311   match(Set dummy (ClearArray (Binary cnt base) val));
15312   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15313 
15314   format %{ $$template
15315     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15316     $$emit$$"jg      LARGE\n\t"
15317     $$emit$$"dec     rcx\n\t"
15318     $$emit$$"js      DONE\t# Zero length\n\t"
15319     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15320     $$emit$$"dec     rcx\n\t"
15321     $$emit$$"jge     LOOP\n\t"
15322     $$emit$$"jmp     DONE\n\t"
15323     $$emit$$"# LARGE:\n\t"
15324     if (UseFastStosb) {
15325        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15326        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15327     } else if (UseXMMForObjInit) {
15328        $$emit$$"movdq   $tmp, $val\n\t"
15329        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15330        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15331        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15332        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15333        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15334        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15335        $$emit$$"add     0x40,rax\n\t"
15336        $$emit$$"# L_zero_64_bytes:\n\t"
15337        $$emit$$"sub     0x8,rcx\n\t"
15338        $$emit$$"jge     L_loop\n\t"
15339        $$emit$$"add     0x4,rcx\n\t"
15340        $$emit$$"jl      L_tail\n\t"
15341        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15342        $$emit$$"add     0x20,rax\n\t"
15343        $$emit$$"sub     0x4,rcx\n\t"
15344        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15345        $$emit$$"add     0x4,rcx\n\t"
15346        $$emit$$"jle     L_end\n\t"
15347        $$emit$$"dec     rcx\n\t"
15348        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15349        $$emit$$"vmovq   xmm0,(rax)\n\t"
15350        $$emit$$"add     0x8,rax\n\t"
15351        $$emit$$"dec     rcx\n\t"
15352        $$emit$$"jge     L_sloop\n\t"
15353        $$emit$$"# L_end:\n\t"
15354     } else {
15355        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15356     }
15357     $$emit$$"# DONE"
15358   %}
15359   ins_encode %{
15360     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15361                  $tmp$$XMMRegister, false, false);
15362   %}
15363   ins_pipe(pipe_slow);
15364 %}
15365 
15366 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15367                             Universe dummy, rFlagsReg cr)
15368 %{
15369   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15370   match(Set dummy (ClearArray (Binary cnt base) val));
15371   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15372 
15373   format %{ $$template
15374     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15375     $$emit$$"jg      LARGE\n\t"
15376     $$emit$$"dec     rcx\n\t"
15377     $$emit$$"js      DONE\t# Zero length\n\t"
15378     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15379     $$emit$$"dec     rcx\n\t"
15380     $$emit$$"jge     LOOP\n\t"
15381     $$emit$$"jmp     DONE\n\t"
15382     $$emit$$"# LARGE:\n\t"
15383     if (UseXMMForObjInit) {
15384        $$emit$$"movdq   $tmp, $val\n\t"
15385        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15386        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15387        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15388        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15389        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15390        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15391        $$emit$$"add     0x40,rax\n\t"
15392        $$emit$$"# L_zero_64_bytes:\n\t"
15393        $$emit$$"sub     0x8,rcx\n\t"
15394        $$emit$$"jge     L_loop\n\t"
15395        $$emit$$"add     0x4,rcx\n\t"
15396        $$emit$$"jl      L_tail\n\t"
15397        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15398        $$emit$$"add     0x20,rax\n\t"
15399        $$emit$$"sub     0x4,rcx\n\t"
15400        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15401        $$emit$$"add     0x4,rcx\n\t"
15402        $$emit$$"jle     L_end\n\t"
15403        $$emit$$"dec     rcx\n\t"
15404        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15405        $$emit$$"vmovq   xmm0,(rax)\n\t"
15406        $$emit$$"add     0x8,rax\n\t"
15407        $$emit$$"dec     rcx\n\t"
15408        $$emit$$"jge     L_sloop\n\t"
15409        $$emit$$"# L_end:\n\t"
15410     } else {
15411        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15412     }
15413     $$emit$$"# DONE"
15414   %}
15415   ins_encode %{
15416     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15417                  $tmp$$XMMRegister, false, true);
15418   %}
15419   ins_pipe(pipe_slow);
15420 %}
15421 
15422 // Small non-constant length ClearArray for AVX512 targets.
15423 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15424                        Universe dummy, rFlagsReg cr)
15425 %{
15426   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15427   match(Set dummy (ClearArray (Binary cnt base) val));
15428   ins_cost(125);
15429   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15430 
15431   format %{ $$template
15432     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15433     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15434     $$emit$$"jg      LARGE\n\t"
15435     $$emit$$"dec     rcx\n\t"
15436     $$emit$$"js      DONE\t# Zero length\n\t"
15437     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15438     $$emit$$"dec     rcx\n\t"
15439     $$emit$$"jge     LOOP\n\t"
15440     $$emit$$"jmp     DONE\n\t"
15441     $$emit$$"# LARGE:\n\t"
15442     if (UseFastStosb) {
15443        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15444        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15445     } else if (UseXMMForObjInit) {
15446        $$emit$$"mov     rdi,rax\n\t"
15447        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15448        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15449        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

15457        $$emit$$"jl      L_tail\n\t"
15458        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15459        $$emit$$"add     0x20,rax\n\t"
15460        $$emit$$"sub     0x4,rcx\n\t"
15461        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15462        $$emit$$"add     0x4,rcx\n\t"
15463        $$emit$$"jle     L_end\n\t"
15464        $$emit$$"dec     rcx\n\t"
15465        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15466        $$emit$$"vmovq   xmm0,(rax)\n\t"
15467        $$emit$$"add     0x8,rax\n\t"
15468        $$emit$$"dec     rcx\n\t"
15469        $$emit$$"jge     L_sloop\n\t"
15470        $$emit$$"# L_end:\n\t"
15471     } else {
15472        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15473     }
15474     $$emit$$"# DONE"
15475   %}
15476   ins_encode %{
15477     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15478                  $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
15479   %}
15480   ins_pipe(pipe_slow);
15481 %}
15482 
15483 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15484                                  Universe dummy, rFlagsReg cr)

15485 %{
15486   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15487   match(Set dummy (ClearArray (Binary cnt base) val));
15488   ins_cost(125);
15489   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15490 
15491   format %{ $$template
15492     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15493     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15494     $$emit$$"jg      LARGE\n\t"
15495     $$emit$$"dec     rcx\n\t"
15496     $$emit$$"js      DONE\t# Zero length\n\t"
15497     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15498     $$emit$$"dec     rcx\n\t"
15499     $$emit$$"jge     LOOP\n\t"
15500     $$emit$$"jmp     DONE\n\t"
15501     $$emit$$"# LARGE:\n\t"
15502     if (UseFastStosb) {
15503        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15504        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15505     } else if (UseXMMForObjInit) {
15506        $$emit$$"mov     rdi,rax\n\t"
15507        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15508        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15509        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

15517        $$emit$$"jl      L_tail\n\t"
15518        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15519        $$emit$$"add     0x20,rax\n\t"
15520        $$emit$$"sub     0x4,rcx\n\t"
15521        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15522        $$emit$$"add     0x4,rcx\n\t"
15523        $$emit$$"jle     L_end\n\t"
15524        $$emit$$"dec     rcx\n\t"
15525        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15526        $$emit$$"vmovq   xmm0,(rax)\n\t"
15527        $$emit$$"add     0x8,rax\n\t"
15528        $$emit$$"dec     rcx\n\t"
15529        $$emit$$"jge     L_sloop\n\t"
15530        $$emit$$"# L_end:\n\t"
15531     } else {
15532        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15533     }
15534     $$emit$$"# DONE"
15535   %}
15536   ins_encode %{
15537     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15538                  $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
15539   %}
15540   ins_pipe(pipe_slow);
15541 %}
15542 
15543 // Large non-constant length ClearArray for non-AVX512 targets.
15544 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15545                         Universe dummy, rFlagsReg cr)
15546 %{
15547   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15548   match(Set dummy (ClearArray (Binary cnt base) val));
15549   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15550 
15551   format %{ $$template
15552     if (UseFastStosb) {
15553        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15554        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15555     } else if (UseXMMForObjInit) {
15556        $$emit$$"movdq   $tmp, $val\n\t"
15557        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15558        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15559        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15560        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15561        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15562        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15563        $$emit$$"add     0x40,rax\n\t"
15564        $$emit$$"# L_zero_64_bytes:\n\t"
15565        $$emit$$"sub     0x8,rcx\n\t"
15566        $$emit$$"jge     L_loop\n\t"
15567        $$emit$$"add     0x4,rcx\n\t"
15568        $$emit$$"jl      L_tail\n\t"
15569        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15570        $$emit$$"add     0x20,rax\n\t"
15571        $$emit$$"sub     0x4,rcx\n\t"
15572        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15573        $$emit$$"add     0x4,rcx\n\t"
15574        $$emit$$"jle     L_end\n\t"
15575        $$emit$$"dec     rcx\n\t"
15576        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15577        $$emit$$"vmovq   xmm0,(rax)\n\t"
15578        $$emit$$"add     0x8,rax\n\t"
15579        $$emit$$"dec     rcx\n\t"
15580        $$emit$$"jge     L_sloop\n\t"
15581        $$emit$$"# L_end:\n\t"
15582     } else {
15583        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15584     }
15585   %}
15586   ins_encode %{
15587     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15588                  $tmp$$XMMRegister, true, false);
15589   %}
15590   ins_pipe(pipe_slow);
15591 %}
15592 
15593 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15594                                   Universe dummy, rFlagsReg cr)
15595 %{
15596   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15597   match(Set dummy (ClearArray (Binary cnt base) val));
15598   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15599 
15600   format %{ $$template
15601     if (UseXMMForObjInit) {
15602        $$emit$$"movdq   $tmp, $val\n\t"
15603        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15604        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15605        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15606        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15607        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15608        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15609        $$emit$$"add     0x40,rax\n\t"
15610        $$emit$$"# L_zero_64_bytes:\n\t"
15611        $$emit$$"sub     0x8,rcx\n\t"
15612        $$emit$$"jge     L_loop\n\t"
15613        $$emit$$"add     0x4,rcx\n\t"
15614        $$emit$$"jl      L_tail\n\t"
15615        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15616        $$emit$$"add     0x20,rax\n\t"
15617        $$emit$$"sub     0x4,rcx\n\t"
15618        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15619        $$emit$$"add     0x4,rcx\n\t"
15620        $$emit$$"jle     L_end\n\t"
15621        $$emit$$"dec     rcx\n\t"
15622        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15623        $$emit$$"vmovq   xmm0,(rax)\n\t"
15624        $$emit$$"add     0x8,rax\n\t"
15625        $$emit$$"dec     rcx\n\t"
15626        $$emit$$"jge     L_sloop\n\t"
15627        $$emit$$"# L_end:\n\t"
15628     } else {
15629        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15630     }
15631   %}
15632   ins_encode %{
15633     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15634                  $tmp$$XMMRegister, true, true);
15635   %}
15636   ins_pipe(pipe_slow);
15637 %}
15638 
15639 // Large non-constant length ClearArray for AVX512 targets.
15640 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15641                              Universe dummy, rFlagsReg cr)
15642 %{
15643   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15644   match(Set dummy (ClearArray (Binary cnt base) val));
15645   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15646 
15647   format %{ $$template
15648     if (UseFastStosb) {
15649        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15650        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15651        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15652     } else if (UseXMMForObjInit) {
15653        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15654        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15655        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15656        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15657        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15658        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15659        $$emit$$"add     0x40,rax\n\t"
15660        $$emit$$"# L_zero_64_bytes:\n\t"
15661        $$emit$$"sub     0x8,rcx\n\t"
15662        $$emit$$"jge     L_loop\n\t"
15663        $$emit$$"add     0x4,rcx\n\t"
15664        $$emit$$"jl      L_tail\n\t"
15665        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15666        $$emit$$"add     0x20,rax\n\t"
15667        $$emit$$"sub     0x4,rcx\n\t"
15668        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15669        $$emit$$"add     0x4,rcx\n\t"
15670        $$emit$$"jle     L_end\n\t"
15671        $$emit$$"dec     rcx\n\t"
15672        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15673        $$emit$$"vmovq   xmm0,(rax)\n\t"
15674        $$emit$$"add     0x8,rax\n\t"
15675        $$emit$$"dec     rcx\n\t"
15676        $$emit$$"jge     L_sloop\n\t"
15677        $$emit$$"# L_end:\n\t"
15678     } else {
15679        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15680        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15681     }
15682   %}
15683   ins_encode %{
15684     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15685                  $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
15686   %}
15687   ins_pipe(pipe_slow);
15688 %}
15689 
15690 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15691                                        Universe dummy, rFlagsReg cr)

15692 %{
15693   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15694   match(Set dummy (ClearArray (Binary cnt base) val));
15695   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15696 
15697   format %{ $$template
15698     if (UseFastStosb) {
15699        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15700        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15701        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15702     } else if (UseXMMForObjInit) {
15703        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15704        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15705        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15706        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15707        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15708        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15709        $$emit$$"add     0x40,rax\n\t"
15710        $$emit$$"# L_zero_64_bytes:\n\t"
15711        $$emit$$"sub     0x8,rcx\n\t"
15712        $$emit$$"jge     L_loop\n\t"
15713        $$emit$$"add     0x4,rcx\n\t"
15714        $$emit$$"jl      L_tail\n\t"
15715        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15716        $$emit$$"add     0x20,rax\n\t"
15717        $$emit$$"sub     0x4,rcx\n\t"
15718        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15719        $$emit$$"add     0x4,rcx\n\t"
15720        $$emit$$"jle     L_end\n\t"
15721        $$emit$$"dec     rcx\n\t"
15722        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15723        $$emit$$"vmovq   xmm0,(rax)\n\t"
15724        $$emit$$"add     0x8,rax\n\t"
15725        $$emit$$"dec     rcx\n\t"
15726        $$emit$$"jge     L_sloop\n\t"
15727        $$emit$$"# L_end:\n\t"
15728     } else {
15729        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15730        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15731     }
15732   %}
15733   ins_encode %{
15734     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15735                  $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
15736   %}
15737   ins_pipe(pipe_slow);
15738 %}
15739 
15740 // Small constant length ClearArray for AVX512 targets.
15741 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
15742 %{
15743   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
15744             ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
15745   match(Set dummy (ClearArray (Binary cnt base) val));
15746   ins_cost(100);
15747   effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
15748   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
15749   ins_encode %{
15750     __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15751   %}
15752   ins_pipe(pipe_slow);
15753 %}
15754 
15755 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15756                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15757 %{
15758   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15759   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15760   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15761 
15762   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15763   ins_encode %{
15764     __ string_compare($str1$$Register, $str2$$Register,
15765                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15766                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15767   %}
15768   ins_pipe( pipe_slow );
15769 %}
15770 

17608   effect(USE meth);
17609 
17610   ins_cost(300);
17611   format %{ "call_leaf,runtime " %}
17612   ins_encode(clear_avx, Java_To_Runtime(meth));
17613   ins_pipe(pipe_slow);
17614 %}
17615 
17616 // Call runtime without safepoint and with vector arguments
17617 instruct CallLeafDirectVector(method meth)
17618 %{
17619   match(CallLeafVector);
17620   effect(USE meth);
17621 
17622   ins_cost(300);
17623   format %{ "call_leaf,vector " %}
17624   ins_encode(Java_To_Runtime(meth));
17625   ins_pipe(pipe_slow);
17626 %}
17627 
17628 // Call runtime without safepoint
17629 // entry point is null, target holds the address to call
17630 instruct CallLeafNoFPInDirect(rRegP target)
17631 %{
17632   predicate(n->as_Call()->entry_point() == nullptr);
17633   match(CallLeafNoFP target);
17634 
17635   ins_cost(300);
17636   format %{ "call_leaf_nofp,runtime indirect " %}
17637   ins_encode %{
17638      __ call($target$$Register);
17639   %}
17640 
17641   ins_pipe(pipe_slow);
17642 %}
17643 
17644 // Call runtime without safepoint
17645 instruct CallLeafNoFPDirect(method meth)
17646 %{
17647   predicate(n->as_Call()->entry_point() != nullptr);
17648   match(CallLeafNoFP);
17649   effect(USE meth);
17650 
17651   ins_cost(300);
17652   format %{ "call_leaf_nofp,runtime " %}
17653   ins_encode(clear_avx, Java_To_Runtime(meth));
17654   ins_pipe(pipe_slow);
17655 %}
17656 
17657 // Return Instruction
17658 // Remove the return address & jump to it.
17659 // Notice: We always emit a nop after a ret to make sure there is room
17660 // for safepoint patching
17661 instruct Ret()
17662 %{
17663   match(Return);
17664 
17665   format %{ "ret" %}
17666   ins_encode %{
17667     __ ret(0);
< prev index next >