1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 int offset = 13; // movq r10,#addr; callq (r10)
1653 if (this->ideal_Opcode() != Op_CallLeafVector) {
1654 offset += clear_avx_size();
1655 }
1656 return offset;
1657 }
1658 //
1659 // Compute padding required for nodes which need alignment
1660 //
1661
1662 // The address of the call instruction needs to be 4-byte aligned to
1663 // ensure that it does not span a cache line so that it can be patched.
1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1665 {
1666 current_offset += clear_avx_size(); // skip vzeroupper
1667 current_offset += 1; // skip call opcode byte
1668 return align_up(current_offset, alignment_required()) - current_offset;
1669 }
1670
1671 // The address of the call instruction needs to be 4-byte aligned to
1672 // ensure that it does not span a cache line so that it can be patched.
1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
1674 {
1675 current_offset += clear_avx_size(); // skip vzeroupper
1676 current_offset += 11; // skip movq instruction + call opcode byte
1677 return align_up(current_offset, alignment_required()) - current_offset;
1863 st->print("\n\t");
1864 st->print("# stack alignment check");
1865 #endif
1866 }
1867 if (C->stub_function() != nullptr) {
1868 st->print("\n\t");
1869 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1870 st->print("\n\t");
1871 st->print("je fast_entry\t");
1872 st->print("\n\t");
1873 st->print("call #nmethod_entry_barrier_stub\t");
1874 st->print("\n\tfast_entry:");
1875 }
1876 st->cr();
1877 }
1878 #endif
1879
1880 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1881 Compile* C = ra_->C;
1882
1883 int framesize = C->output()->frame_size_in_bytes();
1884 int bangsize = C->output()->bang_size_in_bytes();
1885
1886 if (C->clinit_barrier_on_entry()) {
1887 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
1888 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
1889
1890 Label L_skip_barrier;
1891 Register klass = rscratch1;
1892
1893 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
1894 __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
1895
1896 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
1897
1898 __ bind(L_skip_barrier);
1899 }
1900
1901 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
1902
1903 C->output()->set_frame_complete(__ offset());
1904
1905 if (C->has_mach_constant_base_node()) {
1906 // NOTE: We set the table base offset here because users might be
1907 // emitted before MachConstantBaseNode.
1908 ConstantTable& constant_table = C->output()->constant_table();
1909 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1910 }
1911 }
1912
1913 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1914 {
1915 return MachNode::size(ra_); // too many variables; just compute it
1916 // the hard way
1917 }
1918
1919 int MachPrologNode::reloc() const
1920 {
1921 return 0; // a large enough number
1922 }
1923
1924 //=============================================================================
1925 #ifndef PRODUCT
1926 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1927 {
1928 Compile* C = ra_->C;
1929 if (generate_vzeroupper(C)) {
1930 st->print("vzeroupper");
1931 st->cr(); st->print("\t");
1932 }
1933
1934 int framesize = C->output()->frame_size_in_bytes();
1935 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1936 // Remove word for return adr already pushed
1937 // and RBP
1945 st->print_cr("popq rbp");
1946 if (do_polling() && C->is_method_compilation()) {
1947 st->print("\t");
1948 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1949 "ja #safepoint_stub\t"
1950 "# Safepoint: poll for GC");
1951 }
1952 }
1953 #endif
1954
1955 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1956 {
1957 Compile* C = ra_->C;
1958
1959 if (generate_vzeroupper(C)) {
1960 // Clear upper bits of YMM registers when current compiled code uses
1961 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1962 __ vzeroupper();
1963 }
1964
1965 int framesize = C->output()->frame_size_in_bytes();
1966 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1967 // Remove word for return adr already pushed
1968 // and RBP
1969 framesize -= 2*wordSize;
1970
1971 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1972
1973 if (framesize) {
1974 __ addq(rsp, framesize);
1975 }
1976
1977 __ popq(rbp);
1978
1979 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1980 __ reserved_stack_check();
1981 }
1982
1983 if (do_polling() && C->is_method_compilation()) {
1984 Label dummy_label;
1985 Label* code_stub = &dummy_label;
1986 if (!C->output()->in_scratch_emit_size()) {
1987 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1988 C->output()->add_stub(stub);
1989 code_stub = &stub->entry();
1990 }
1991 __ relocate(relocInfo::poll_return_type);
1992 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
1993 }
1994 }
1995
1996 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
1997 {
1998 return MachNode::size(ra_); // too many variables; just compute it
1999 // the hard way
2000 }
2001
2002 int MachEpilogNode::reloc() const
2003 {
2004 return 2; // a large enough number
2005 }
2006
2007 const Pipeline* MachEpilogNode::pipeline() const
2008 {
2009 return MachNode::pipeline_class();
2010 }
2011
2012 //=============================================================================
2013
2014 enum RC {
2015 rc_bad,
2016 rc_int,
2017 rc_kreg,
2018 rc_float,
2019 rc_stack
2020 };
2021
2583 #endif
2584
2585 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2586 {
2587 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2588 int reg = ra_->get_encode(this);
2589
2590 __ lea(as_Register(reg), Address(rsp, offset));
2591 }
2592
2593 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2594 {
2595 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2596 if (ra_->get_encode(this) > 15) {
2597 return (offset < 0x80) ? 6 : 9; // REX2
2598 } else {
2599 return (offset < 0x80) ? 5 : 8; // REX
2600 }
2601 }
2602
2603 //=============================================================================
2604 #ifndef PRODUCT
2605 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2606 {
2607 if (UseCompressedClassPointers) {
2608 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2609 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2610 } else {
2611 st->print_cr("movq rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2612 st->print_cr("\tcmpq rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2613 }
2614 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2615 }
2616 #endif
2617
2618 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2619 {
2620 __ ic_check(InteriorEntryAlignment);
2621 }
2622
2623 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
2624 {
2625 return MachNode::size(ra_); // too many variables; just compute it
2626 // the hard way
2627 }
2628
2629
2630 //=============================================================================
2631
2632 bool Matcher::supports_vector_calling_convention(void) {
2633 return EnableVectorSupport;
2634 }
2635
2636 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
2637 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
2638 }
2639
2640 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
2641 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
2642 }
2643
2644 #ifdef ASSERT
2645 static bool is_ndd_demotable(const MachNode* mdef) {
2646 return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
2647 }
2648 #endif
4584 }
4585 __ post_call_nop();
4586 %}
4587
4588 enc_class Java_Dynamic_Call(method meth) %{
4589 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4590 __ post_call_nop();
4591 %}
4592
4593 enc_class call_epilog %{
4594 if (VerifyStackAtCalls) {
4595 // Check that stack depth is unchanged: find majik cookie on stack
4596 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4597 Label L;
4598 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4599 __ jccb(Assembler::equal, L);
4600 // Die if stack mismatch
4601 __ int3();
4602 __ bind(L);
4603 }
4604 %}
4605
4606 %}
4607
4608 //----------FRAME--------------------------------------------------------------
4609 // Definition of frame structure and management information.
4610 //
4611 // S T A C K L A Y O U T Allocators stack-slot number
4612 // | (to get allocators register number
4613 // G Owned by | | v add OptoReg::stack0())
4614 // r CALLER | |
4615 // o | +--------+ pad to even-align allocators stack-slot
4616 // w V | pad0 | numbers; owned by CALLER
4617 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4618 // h ^ | in | 5
4619 // | | args | 4 Holes in incoming args owned by SELF
4620 // | | | | 3
4621 // | | +--------+
4622 // V | | old out| Empty on Intel, window on Sparc
4623 // | old |preserve| Must be even aligned.
5746 %}
5747 %}
5748
5749 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5750 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5751 %{
5752 constraint(ALLOC_IN_RC(ptr_reg));
5753 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5754 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5755
5756 op_cost(10);
5757 format %{"[$reg + $off + $idx << $scale]" %}
5758 interface(MEMORY_INTER) %{
5759 base($reg);
5760 index($idx);
5761 scale($scale);
5762 disp($off);
5763 %}
5764 %}
5765
5766 // Indirect Narrow Oop Plus Offset Operand
5767 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5768 // we can't free r12 even with CompressedOops::base() == nullptr.
5769 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5770 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5771 constraint(ALLOC_IN_RC(ptr_reg));
5772 match(AddP (DecodeN reg) off);
5773
5774 op_cost(10);
5775 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5776 interface(MEMORY_INTER) %{
5777 base(0xc); // R12
5778 index($reg);
5779 scale(0x3);
5780 disp($off);
5781 %}
5782 %}
5783
5784 // Indirect Memory Operand
5785 operand indirectNarrow(rRegN reg)
6222 %}
6223
6224 // Replaces legVec during post-selection cleanup. See above.
6225 operand legVecZ() %{
6226 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6227 match(VecZ);
6228
6229 format %{ %}
6230 interface(REG_INTER);
6231 %}
6232
6233 //----------OPERAND CLASSES----------------------------------------------------
6234 // Operand Classes are groups of operands that are used as to simplify
6235 // instruction definitions by not requiring the AD writer to specify separate
6236 // instructions for every form of operand when the instruction accepts
6237 // multiple operand types with the same basic encoding and format. The classic
6238 // case of this is memory operands.
6239
6240 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6241 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6242 indCompressedOopOffset,
6243 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6244 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6245 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6246
6247 //----------PIPELINE-----------------------------------------------------------
6248 // Rules which define the behavior of the target architectures pipeline.
6249 pipeline %{
6250
6251 //----------ATTRIBUTES---------------------------------------------------------
6252 attributes %{
6253 variable_size_instructions; // Fixed size instructions
6254 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6255 instruction_unit_size = 1; // An instruction is 1 bytes long
6256 instruction_fetch_unit_size = 16; // The processor fetches one line
6257 instruction_fetch_units = 1; // of 16 bytes
6258 %}
6259
6260 //----------RESOURCES----------------------------------------------------------
6261 // Resources are the functional units available to the machine
6262
8820 format %{ "MEMBAR-storestore (empty encoding)" %}
8821 ins_encode( );
8822 ins_pipe(empty);
8823 %}
8824
8825 //----------Move Instructions--------------------------------------------------
8826
8827 instruct castX2P(rRegP dst, rRegL src)
8828 %{
8829 match(Set dst (CastX2P src));
8830
8831 format %{ "movq $dst, $src\t# long->ptr" %}
8832 ins_encode %{
8833 if ($dst$$reg != $src$$reg) {
8834 __ movptr($dst$$Register, $src$$Register);
8835 }
8836 %}
8837 ins_pipe(ialu_reg_reg); // XXX
8838 %}
8839
8840 instruct castP2X(rRegL dst, rRegP src)
8841 %{
8842 match(Set dst (CastP2X src));
8843
8844 format %{ "movq $dst, $src\t# ptr -> long" %}
8845 ins_encode %{
8846 if ($dst$$reg != $src$$reg) {
8847 __ movptr($dst$$Register, $src$$Register);
8848 }
8849 %}
8850 ins_pipe(ialu_reg_reg); // XXX
8851 %}
8852
8853 // Convert oop into int for vectors alignment masking
8854 instruct convP2I(rRegI dst, rRegP src)
8855 %{
8856 match(Set dst (ConvL2I (CastP2X src)));
8857
8858 format %{ "movl $dst, $src\t# ptr -> int" %}
8859 ins_encode %{
15066 effect(DEF dst, USE src);
15067 ins_cost(100);
15068 format %{ "movd $dst,$src\t# MoveI2F" %}
15069 ins_encode %{
15070 __ movdl($dst$$XMMRegister, $src$$Register);
15071 %}
15072 ins_pipe( pipe_slow );
15073 %}
15074
15075 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15076 match(Set dst (MoveL2D src));
15077 effect(DEF dst, USE src);
15078 ins_cost(100);
15079 format %{ "movd $dst,$src\t# MoveL2D" %}
15080 ins_encode %{
15081 __ movdq($dst$$XMMRegister, $src$$Register);
15082 %}
15083 ins_pipe( pipe_slow );
15084 %}
15085
15086 // Fast clearing of an array
15087 // Small non-constant lenght ClearArray for non-AVX512 targets.
15088 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15089 Universe dummy, rFlagsReg cr)
15090 %{
15091 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
15092 match(Set dummy (ClearArray cnt base));
15093 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15094
15095 format %{ $$template
15096 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15097 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15098 $$emit$$"jg LARGE\n\t"
15099 $$emit$$"dec rcx\n\t"
15100 $$emit$$"js DONE\t# Zero length\n\t"
15101 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15102 $$emit$$"dec rcx\n\t"
15103 $$emit$$"jge LOOP\n\t"
15104 $$emit$$"jmp DONE\n\t"
15105 $$emit$$"# LARGE:\n\t"
15106 if (UseFastStosb) {
15107 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15108 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15109 } else if (UseXMMForObjInit) {
15110 $$emit$$"mov rdi,rax\n\t"
15111 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15112 $$emit$$"jmpq L_zero_64_bytes\n\t"
15113 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15121 $$emit$$"jl L_tail\n\t"
15122 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15123 $$emit$$"add 0x20,rax\n\t"
15124 $$emit$$"sub 0x4,rcx\n\t"
15125 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15126 $$emit$$"add 0x4,rcx\n\t"
15127 $$emit$$"jle L_end\n\t"
15128 $$emit$$"dec rcx\n\t"
15129 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15130 $$emit$$"vmovq xmm0,(rax)\n\t"
15131 $$emit$$"add 0x8,rax\n\t"
15132 $$emit$$"dec rcx\n\t"
15133 $$emit$$"jge L_sloop\n\t"
15134 $$emit$$"# L_end:\n\t"
15135 } else {
15136 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15137 }
15138 $$emit$$"# DONE"
15139 %}
15140 ins_encode %{
15141 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15142 $tmp$$XMMRegister, false, knoreg);
15143 %}
15144 ins_pipe(pipe_slow);
15145 %}
15146
15147 // Small non-constant length ClearArray for AVX512 targets.
15148 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15149 Universe dummy, rFlagsReg cr)
15150 %{
15151 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15152 match(Set dummy (ClearArray cnt base));
15153 ins_cost(125);
15154 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15155
15156 format %{ $$template
15157 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15158 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15159 $$emit$$"jg LARGE\n\t"
15160 $$emit$$"dec rcx\n\t"
15161 $$emit$$"js DONE\t# Zero length\n\t"
15162 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15163 $$emit$$"dec rcx\n\t"
15164 $$emit$$"jge LOOP\n\t"
15165 $$emit$$"jmp DONE\n\t"
15166 $$emit$$"# LARGE:\n\t"
15167 if (UseFastStosb) {
15168 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15169 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15170 } else if (UseXMMForObjInit) {
15171 $$emit$$"mov rdi,rax\n\t"
15172 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15173 $$emit$$"jmpq L_zero_64_bytes\n\t"
15174 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15182 $$emit$$"jl L_tail\n\t"
15183 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15184 $$emit$$"add 0x20,rax\n\t"
15185 $$emit$$"sub 0x4,rcx\n\t"
15186 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15187 $$emit$$"add 0x4,rcx\n\t"
15188 $$emit$$"jle L_end\n\t"
15189 $$emit$$"dec rcx\n\t"
15190 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15191 $$emit$$"vmovq xmm0,(rax)\n\t"
15192 $$emit$$"add 0x8,rax\n\t"
15193 $$emit$$"dec rcx\n\t"
15194 $$emit$$"jge L_sloop\n\t"
15195 $$emit$$"# L_end:\n\t"
15196 } else {
15197 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15198 }
15199 $$emit$$"# DONE"
15200 %}
15201 ins_encode %{
15202 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15203 $tmp$$XMMRegister, false, $ktmp$$KRegister);
15204 %}
15205 ins_pipe(pipe_slow);
15206 %}
15207
15208 // Large non-constant length ClearArray for non-AVX512 targets.
15209 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15210 Universe dummy, rFlagsReg cr)
15211 %{
15212 predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15213 match(Set dummy (ClearArray cnt base));
15214 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15215
15216 format %{ $$template
15217 if (UseFastStosb) {
15218 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15219 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15220 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15221 } else if (UseXMMForObjInit) {
15222 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15223 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15224 $$emit$$"jmpq L_zero_64_bytes\n\t"
15225 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15226 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15227 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15228 $$emit$$"add 0x40,rax\n\t"
15229 $$emit$$"# L_zero_64_bytes:\n\t"
15230 $$emit$$"sub 0x8,rcx\n\t"
15231 $$emit$$"jge L_loop\n\t"
15232 $$emit$$"add 0x4,rcx\n\t"
15233 $$emit$$"jl L_tail\n\t"
15234 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15235 $$emit$$"add 0x20,rax\n\t"
15236 $$emit$$"sub 0x4,rcx\n\t"
15237 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15238 $$emit$$"add 0x4,rcx\n\t"
15239 $$emit$$"jle L_end\n\t"
15240 $$emit$$"dec rcx\n\t"
15241 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15242 $$emit$$"vmovq xmm0,(rax)\n\t"
15243 $$emit$$"add 0x8,rax\n\t"
15244 $$emit$$"dec rcx\n\t"
15245 $$emit$$"jge L_sloop\n\t"
15246 $$emit$$"# L_end:\n\t"
15247 } else {
15248 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15249 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15250 }
15251 %}
15252 ins_encode %{
15253 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15254 $tmp$$XMMRegister, true, knoreg);
15255 %}
15256 ins_pipe(pipe_slow);
15257 %}
15258
15259 // Large non-constant length ClearArray for AVX512 targets.
15260 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15261 Universe dummy, rFlagsReg cr)
15262 %{
15263 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15264 match(Set dummy (ClearArray cnt base));
15265 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15266
15267 format %{ $$template
15268 if (UseFastStosb) {
15269 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15270 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15271 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15272 } else if (UseXMMForObjInit) {
15273 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15274 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15275 $$emit$$"jmpq L_zero_64_bytes\n\t"
15276 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15277 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15278 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15279 $$emit$$"add 0x40,rax\n\t"
15280 $$emit$$"# L_zero_64_bytes:\n\t"
15281 $$emit$$"sub 0x8,rcx\n\t"
15282 $$emit$$"jge L_loop\n\t"
15283 $$emit$$"add 0x4,rcx\n\t"
15284 $$emit$$"jl L_tail\n\t"
15285 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15286 $$emit$$"add 0x20,rax\n\t"
15287 $$emit$$"sub 0x4,rcx\n\t"
15288 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15289 $$emit$$"add 0x4,rcx\n\t"
15290 $$emit$$"jle L_end\n\t"
15291 $$emit$$"dec rcx\n\t"
15292 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15293 $$emit$$"vmovq xmm0,(rax)\n\t"
15294 $$emit$$"add 0x8,rax\n\t"
15295 $$emit$$"dec rcx\n\t"
15296 $$emit$$"jge L_sloop\n\t"
15297 $$emit$$"# L_end:\n\t"
15298 } else {
15299 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15300 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15301 }
15302 %}
15303 ins_encode %{
15304 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15305 $tmp$$XMMRegister, true, $ktmp$$KRegister);
15306 %}
15307 ins_pipe(pipe_slow);
15308 %}
15309
15310 // Small constant length ClearArray for AVX512 targets.
15311 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15312 %{
15313 predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15314 match(Set dummy (ClearArray cnt base));
15315 ins_cost(100);
15316 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15317 format %{ "clear_mem_imm $base , $cnt \n\t" %}
15318 ins_encode %{
15319 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15320 %}
15321 ins_pipe(pipe_slow);
15322 %}
15323
15324 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15325 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15326 %{
15327 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15328 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15329 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15330
15331 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15332 ins_encode %{
15333 __ string_compare($str1$$Register, $str2$$Register,
15334 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15335 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15336 %}
15337 ins_pipe( pipe_slow );
15338 %}
15339
17146 effect(USE meth);
17147
17148 ins_cost(300);
17149 format %{ "call_leaf,runtime " %}
17150 ins_encode(clear_avx, Java_To_Runtime(meth));
17151 ins_pipe(pipe_slow);
17152 %}
17153
17154 // Call runtime without safepoint and with vector arguments
17155 instruct CallLeafDirectVector(method meth)
17156 %{
17157 match(CallLeafVector);
17158 effect(USE meth);
17159
17160 ins_cost(300);
17161 format %{ "call_leaf,vector " %}
17162 ins_encode(Java_To_Runtime(meth));
17163 ins_pipe(pipe_slow);
17164 %}
17165
17166 // Call runtime without safepoint
17167 instruct CallLeafNoFPDirect(method meth)
17168 %{
17169 match(CallLeafNoFP);
17170 effect(USE meth);
17171
17172 ins_cost(300);
17173 format %{ "call_leaf_nofp,runtime " %}
17174 ins_encode(clear_avx, Java_To_Runtime(meth));
17175 ins_pipe(pipe_slow);
17176 %}
17177
17178 // Return Instruction
17179 // Remove the return address & jump to it.
17180 // Notice: We always emit a nop after a ret to make sure there is room
17181 // for safepoint patching
17182 instruct Ret()
17183 %{
17184 match(Return);
17185
17186 format %{ "ret" %}
17187 ins_encode %{
17188 __ ret(0);
|
1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 if (_entry_point == nullptr) {
1653 // CallLeafNoFPInDirect
1654 return 3; // callq (register)
1655 }
1656 int offset = 13; // movq r10,#addr; callq (r10)
1657 if (this->ideal_Opcode() != Op_CallLeafVector) {
1658 offset += clear_avx_size();
1659 }
1660 return offset;
1661 }
1662
1663 //
1664 // Compute padding required for nodes which need alignment
1665 //
1666
1667 // The address of the call instruction needs to be 4-byte aligned to
1668 // ensure that it does not span a cache line so that it can be patched.
1669 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1670 {
1671 current_offset += clear_avx_size(); // skip vzeroupper
1672 current_offset += 1; // skip call opcode byte
1673 return align_up(current_offset, alignment_required()) - current_offset;
1674 }
1675
1676 // The address of the call instruction needs to be 4-byte aligned to
1677 // ensure that it does not span a cache line so that it can be patched.
1678 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
1679 {
1680 current_offset += clear_avx_size(); // skip vzeroupper
1681 current_offset += 11; // skip movq instruction + call opcode byte
1682 return align_up(current_offset, alignment_required()) - current_offset;
1868 st->print("\n\t");
1869 st->print("# stack alignment check");
1870 #endif
1871 }
1872 if (C->stub_function() != nullptr) {
1873 st->print("\n\t");
1874 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1875 st->print("\n\t");
1876 st->print("je fast_entry\t");
1877 st->print("\n\t");
1878 st->print("call #nmethod_entry_barrier_stub\t");
1879 st->print("\n\tfast_entry:");
1880 }
1881 st->cr();
1882 }
1883 #endif
1884
1885 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1886 Compile* C = ra_->C;
1887
1888 __ verified_entry(C);
1889
1890 if (ra_->C->stub_function() == nullptr) {
1891 __ entry_barrier();
1892 }
1893
1894 if (!Compile::current()->output()->in_scratch_emit_size()) {
1895 __ bind(*_verified_entry);
1896 }
1897
1898 C->output()->set_frame_complete(__ offset());
1899
1900 if (C->has_mach_constant_base_node()) {
1901 // NOTE: We set the table base offset here because users might be
1902 // emitted before MachConstantBaseNode.
1903 ConstantTable& constant_table = C->output()->constant_table();
1904 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1905 }
1906 }
1907
1908
1909 int MachPrologNode::reloc() const
1910 {
1911 return 0; // a large enough number
1912 }
1913
1914 //=============================================================================
1915 #ifndef PRODUCT
1916 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1917 {
1918 Compile* C = ra_->C;
1919 if (generate_vzeroupper(C)) {
1920 st->print("vzeroupper");
1921 st->cr(); st->print("\t");
1922 }
1923
1924 int framesize = C->output()->frame_size_in_bytes();
1925 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1926 // Remove word for return adr already pushed
1927 // and RBP
1935 st->print_cr("popq rbp");
1936 if (do_polling() && C->is_method_compilation()) {
1937 st->print("\t");
1938 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1939 "ja #safepoint_stub\t"
1940 "# Safepoint: poll for GC");
1941 }
1942 }
1943 #endif
1944
1945 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1946 {
1947 Compile* C = ra_->C;
1948
1949 if (generate_vzeroupper(C)) {
1950 // Clear upper bits of YMM registers when current compiled code uses
1951 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1952 __ vzeroupper();
1953 }
1954
1955 // Subtract two words to account for return address and rbp
1956 int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
1957 __ remove_frame(initial_framesize, C->needs_stack_repair());
1958
1959 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1960 __ reserved_stack_check();
1961 }
1962
1963 if (do_polling() && C->is_method_compilation()) {
1964 Label dummy_label;
1965 Label* code_stub = &dummy_label;
1966 if (!C->output()->in_scratch_emit_size()) {
1967 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1968 C->output()->add_stub(stub);
1969 code_stub = &stub->entry();
1970 }
1971 __ relocate(relocInfo::poll_return_type);
1972 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
1973 }
1974 }
1975
1976 int MachEpilogNode::reloc() const
1977 {
1978 return 2; // a large enough number
1979 }
1980
1981 const Pipeline* MachEpilogNode::pipeline() const
1982 {
1983 return MachNode::pipeline_class();
1984 }
1985
1986 //=============================================================================
1987
1988 enum RC {
1989 rc_bad,
1990 rc_int,
1991 rc_kreg,
1992 rc_float,
1993 rc_stack
1994 };
1995
2557 #endif
2558
2559 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2560 {
2561 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2562 int reg = ra_->get_encode(this);
2563
2564 __ lea(as_Register(reg), Address(rsp, offset));
2565 }
2566
2567 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2568 {
2569 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2570 if (ra_->get_encode(this) > 15) {
2571 return (offset < 0x80) ? 6 : 9; // REX2
2572 } else {
2573 return (offset < 0x80) ? 5 : 8; // REX
2574 }
2575 }
2576
2577 //=============================================================================
2578 #ifndef PRODUCT
2579 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2580 {
2581 st->print_cr("MachVEPNode");
2582 }
2583 #endif
2584
2585 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2586 {
2587 CodeBuffer* cbuf = masm->code();
2588 uint insts_size = cbuf->insts_size();
2589 if (!_verified) {
2590 __ ic_check(1);
2591 } else {
2592 // TODO 8284443 Avoid creation of temporary frame
2593 if (ra_->C->stub_function() == nullptr) {
2594 __ verified_entry(ra_->C, 0);
2595 __ entry_barrier();
2596 int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
2597 __ remove_frame(initial_framesize, false);
2598 }
2599 // Unpack inline type args passed as oop and then jump to
2600 // the verified entry point (skipping the unverified entry).
2601 int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
2602 // Emit code for verified entry and save increment for stack repair on return
2603 __ verified_entry(ra_->C, sp_inc);
2604 if (Compile::current()->output()->in_scratch_emit_size()) {
2605 Label dummy_verified_entry;
2606 __ jmp(dummy_verified_entry);
2607 } else {
2608 __ jmp(*_verified_entry);
2609 }
2610 }
2611 /* WARNING these NOPs are critical so that verified entry point is properly
2612 4 bytes aligned for patching by NativeJump::patch_verified_entry() */
2613 int nops_cnt = 4 - ((cbuf->insts_size() - insts_size) & 0x3);
2614 nops_cnt &= 0x3; // Do not add nops if code is aligned.
2615 if (nops_cnt > 0) {
2616 __ nop(nops_cnt);
2617 }
2618 }
2619
2620 //=============================================================================
2621 #ifndef PRODUCT
2622 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2623 {
2624 if (UseCompressedClassPointers) {
2625 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2626 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2627 } else {
2628 st->print_cr("movq rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2629 st->print_cr("\tcmpq rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2630 }
2631 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2632 }
2633 #endif
2634
2635 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2636 {
2637 __ ic_check(InteriorEntryAlignment);
2638 }
2639
2640
2641 //=============================================================================
2642
2643 bool Matcher::supports_vector_calling_convention(void) {
2644 return EnableVectorSupport;
2645 }
2646
2647 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
2648 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
2649 }
2650
2651 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
2652 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
2653 }
2654
2655 #ifdef ASSERT
2656 static bool is_ndd_demotable(const MachNode* mdef) {
2657 return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
2658 }
2659 #endif
4595 }
4596 __ post_call_nop();
4597 %}
4598
4599 enc_class Java_Dynamic_Call(method meth) %{
4600 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4601 __ post_call_nop();
4602 %}
4603
4604 enc_class call_epilog %{
4605 if (VerifyStackAtCalls) {
4606 // Check that stack depth is unchanged: find majik cookie on stack
4607 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4608 Label L;
4609 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4610 __ jccb(Assembler::equal, L);
4611 // Die if stack mismatch
4612 __ int3();
4613 __ bind(L);
4614 }
4615 if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) {
4616 // The last return value is not set by the callee but used to pass the null marker to compiled code.
4617 // Search for the corresponding projection, get the register and emit code that initialized it.
4618 uint con = (tf()->range_cc()->cnt() - 1);
4619 for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
4620 ProjNode* proj = fast_out(i)->as_Proj();
4621 if (proj->_con == con) {
4622 // Set null marker if rax is non-null (a non-null value is returned buffered or scalarized)
4623 OptoReg::Name optoReg = ra_->get_reg_first(proj);
4624 VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
4625 Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
4626 __ testq(rax, rax);
4627 __ setb(Assembler::notZero, toReg);
4628 __ movzbl(toReg, toReg);
4629 if (reg->is_stack()) {
4630 int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
4631 __ movq(Address(rsp, st_off), toReg);
4632 }
4633 break;
4634 }
4635 }
4636 if (return_value_is_used()) {
4637 // An inline type is returned as fields in multiple registers.
4638 // Rax either contains an oop if the inline type is buffered or a pointer
4639 // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax
4640 // if the lowest bit is set to allow C2 to use the oop after null checking.
4641 // rax &= (rax & 1) - 1
4642 __ movptr(rscratch1, rax);
4643 __ andptr(rscratch1, 0x1);
4644 __ subptr(rscratch1, 0x1);
4645 __ andptr(rax, rscratch1);
4646 }
4647 }
4648 %}
4649
4650 %}
4651
4652 //----------FRAME--------------------------------------------------------------
4653 // Definition of frame structure and management information.
4654 //
4655 // S T A C K L A Y O U T Allocators stack-slot number
4656 // | (to get allocators register number
4657 // G Owned by | | v add OptoReg::stack0())
4658 // r CALLER | |
4659 // o | +--------+ pad to even-align allocators stack-slot
4660 // w V | pad0 | numbers; owned by CALLER
4661 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4662 // h ^ | in | 5
4663 // | | args | 4 Holes in incoming args owned by SELF
4664 // | | | | 3
4665 // | | +--------+
4666 // V | | old out| Empty on Intel, window on Sparc
4667 // | old |preserve| Must be even aligned.
5790 %}
5791 %}
5792
5793 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5794 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5795 %{
5796 constraint(ALLOC_IN_RC(ptr_reg));
5797 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5798 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5799
5800 op_cost(10);
5801 format %{"[$reg + $off + $idx << $scale]" %}
5802 interface(MEMORY_INTER) %{
5803 base($reg);
5804 index($idx);
5805 scale($scale);
5806 disp($off);
5807 %}
5808 %}
5809
5810 // Indirect Narrow Oop Operand
5811 operand indCompressedOop(rRegN reg) %{
5812 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5813 constraint(ALLOC_IN_RC(ptr_reg));
5814 match(DecodeN reg);
5815
5816 op_cost(10);
5817 format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
5818 interface(MEMORY_INTER) %{
5819 base(0xc); // R12
5820 index($reg);
5821 scale(0x3);
5822 disp(0x0);
5823 %}
5824 %}
5825
5826 // Indirect Narrow Oop Plus Offset Operand
5827 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5828 // we can't free r12 even with CompressedOops::base() == nullptr.
5829 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5830 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5831 constraint(ALLOC_IN_RC(ptr_reg));
5832 match(AddP (DecodeN reg) off);
5833
5834 op_cost(10);
5835 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5836 interface(MEMORY_INTER) %{
5837 base(0xc); // R12
5838 index($reg);
5839 scale(0x3);
5840 disp($off);
5841 %}
5842 %}
5843
5844 // Indirect Memory Operand
5845 operand indirectNarrow(rRegN reg)
6282 %}
6283
6284 // Replaces legVec during post-selection cleanup. See above.
6285 operand legVecZ() %{
6286 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6287 match(VecZ);
6288
6289 format %{ %}
6290 interface(REG_INTER);
6291 %}
6292
6293 //----------OPERAND CLASSES----------------------------------------------------
6294 // Operand Classes are groups of operands that are used as to simplify
6295 // instruction definitions by not requiring the AD writer to specify separate
6296 // instructions for every form of operand when the instruction accepts
6297 // multiple operand types with the same basic encoding and format. The classic
6298 // case of this is memory operands.
6299
6300 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6301 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6302 indCompressedOop, indCompressedOopOffset,
6303 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6304 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6305 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6306
6307 //----------PIPELINE-----------------------------------------------------------
6308 // Rules which define the behavior of the target architectures pipeline.
6309 pipeline %{
6310
6311 //----------ATTRIBUTES---------------------------------------------------------
6312 attributes %{
6313 variable_size_instructions; // Fixed size instructions
6314 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6315 instruction_unit_size = 1; // An instruction is 1 bytes long
6316 instruction_fetch_unit_size = 16; // The processor fetches one line
6317 instruction_fetch_units = 1; // of 16 bytes
6318 %}
6319
6320 //----------RESOURCES----------------------------------------------------------
6321 // Resources are the functional units available to the machine
6322
8880 format %{ "MEMBAR-storestore (empty encoding)" %}
8881 ins_encode( );
8882 ins_pipe(empty);
8883 %}
8884
8885 //----------Move Instructions--------------------------------------------------
8886
8887 instruct castX2P(rRegP dst, rRegL src)
8888 %{
8889 match(Set dst (CastX2P src));
8890
8891 format %{ "movq $dst, $src\t# long->ptr" %}
8892 ins_encode %{
8893 if ($dst$$reg != $src$$reg) {
8894 __ movptr($dst$$Register, $src$$Register);
8895 }
8896 %}
8897 ins_pipe(ialu_reg_reg); // XXX
8898 %}
8899
8900 instruct castI2N(rRegN dst, rRegI src)
8901 %{
8902 match(Set dst (CastI2N src));
8903
8904 format %{ "movq $dst, $src\t# int -> narrow ptr" %}
8905 ins_encode %{
8906 if ($dst$$reg != $src$$reg) {
8907 __ movl($dst$$Register, $src$$Register);
8908 }
8909 %}
8910 ins_pipe(ialu_reg_reg); // XXX
8911 %}
8912
8913 instruct castN2X(rRegL dst, rRegN src)
8914 %{
8915 match(Set dst (CastP2X src));
8916
8917 format %{ "movq $dst, $src\t# ptr -> long" %}
8918 ins_encode %{
8919 if ($dst$$reg != $src$$reg) {
8920 __ movptr($dst$$Register, $src$$Register);
8921 }
8922 %}
8923 ins_pipe(ialu_reg_reg); // XXX
8924 %}
8925
8926 instruct castP2X(rRegL dst, rRegP src)
8927 %{
8928 match(Set dst (CastP2X src));
8929
8930 format %{ "movq $dst, $src\t# ptr -> long" %}
8931 ins_encode %{
8932 if ($dst$$reg != $src$$reg) {
8933 __ movptr($dst$$Register, $src$$Register);
8934 }
8935 %}
8936 ins_pipe(ialu_reg_reg); // XXX
8937 %}
8938
8939 // Convert oop into int for vectors alignment masking
8940 instruct convP2I(rRegI dst, rRegP src)
8941 %{
8942 match(Set dst (ConvL2I (CastP2X src)));
8943
8944 format %{ "movl $dst, $src\t# ptr -> int" %}
8945 ins_encode %{
15152 effect(DEF dst, USE src);
15153 ins_cost(100);
15154 format %{ "movd $dst,$src\t# MoveI2F" %}
15155 ins_encode %{
15156 __ movdl($dst$$XMMRegister, $src$$Register);
15157 %}
15158 ins_pipe( pipe_slow );
15159 %}
15160
15161 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15162 match(Set dst (MoveL2D src));
15163 effect(DEF dst, USE src);
15164 ins_cost(100);
15165 format %{ "movd $dst,$src\t# MoveL2D" %}
15166 ins_encode %{
15167 __ movdq($dst$$XMMRegister, $src$$Register);
15168 %}
15169 ins_pipe( pipe_slow );
15170 %}
15171
15172
15173 // Fast clearing of an array
15174 // Small non-constant lenght ClearArray for non-AVX512 targets.
15175 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15176 Universe dummy, rFlagsReg cr)
15177 %{
15178 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15179 match(Set dummy (ClearArray (Binary cnt base) val));
15180 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15181
15182 format %{ $$template
15183 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15184 $$emit$$"jg LARGE\n\t"
15185 $$emit$$"dec rcx\n\t"
15186 $$emit$$"js DONE\t# Zero length\n\t"
15187 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15188 $$emit$$"dec rcx\n\t"
15189 $$emit$$"jge LOOP\n\t"
15190 $$emit$$"jmp DONE\n\t"
15191 $$emit$$"# LARGE:\n\t"
15192 if (UseFastStosb) {
15193 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15194 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15195 } else if (UseXMMForObjInit) {
15196 $$emit$$"movdq $tmp, $val\n\t"
15197 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15198 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15199 $$emit$$"jmpq L_zero_64_bytes\n\t"
15200 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15201 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15202 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15203 $$emit$$"add 0x40,rax\n\t"
15204 $$emit$$"# L_zero_64_bytes:\n\t"
15205 $$emit$$"sub 0x8,rcx\n\t"
15206 $$emit$$"jge L_loop\n\t"
15207 $$emit$$"add 0x4,rcx\n\t"
15208 $$emit$$"jl L_tail\n\t"
15209 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15210 $$emit$$"add 0x20,rax\n\t"
15211 $$emit$$"sub 0x4,rcx\n\t"
15212 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15213 $$emit$$"add 0x4,rcx\n\t"
15214 $$emit$$"jle L_end\n\t"
15215 $$emit$$"dec rcx\n\t"
15216 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15217 $$emit$$"vmovq xmm0,(rax)\n\t"
15218 $$emit$$"add 0x8,rax\n\t"
15219 $$emit$$"dec rcx\n\t"
15220 $$emit$$"jge L_sloop\n\t"
15221 $$emit$$"# L_end:\n\t"
15222 } else {
15223 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15224 }
15225 $$emit$$"# DONE"
15226 %}
15227 ins_encode %{
15228 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15229 $tmp$$XMMRegister, false, false);
15230 %}
15231 ins_pipe(pipe_slow);
15232 %}
15233
15234 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15235 Universe dummy, rFlagsReg cr)
15236 %{
15237 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15238 match(Set dummy (ClearArray (Binary cnt base) val));
15239 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15240
15241 format %{ $$template
15242 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15243 $$emit$$"jg LARGE\n\t"
15244 $$emit$$"dec rcx\n\t"
15245 $$emit$$"js DONE\t# Zero length\n\t"
15246 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15247 $$emit$$"dec rcx\n\t"
15248 $$emit$$"jge LOOP\n\t"
15249 $$emit$$"jmp DONE\n\t"
15250 $$emit$$"# LARGE:\n\t"
15251 if (UseXMMForObjInit) {
15252 $$emit$$"movdq $tmp, $val\n\t"
15253 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15254 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15255 $$emit$$"jmpq L_zero_64_bytes\n\t"
15256 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15257 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15258 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15259 $$emit$$"add 0x40,rax\n\t"
15260 $$emit$$"# L_zero_64_bytes:\n\t"
15261 $$emit$$"sub 0x8,rcx\n\t"
15262 $$emit$$"jge L_loop\n\t"
15263 $$emit$$"add 0x4,rcx\n\t"
15264 $$emit$$"jl L_tail\n\t"
15265 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15266 $$emit$$"add 0x20,rax\n\t"
15267 $$emit$$"sub 0x4,rcx\n\t"
15268 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15269 $$emit$$"add 0x4,rcx\n\t"
15270 $$emit$$"jle L_end\n\t"
15271 $$emit$$"dec rcx\n\t"
15272 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15273 $$emit$$"vmovq xmm0,(rax)\n\t"
15274 $$emit$$"add 0x8,rax\n\t"
15275 $$emit$$"dec rcx\n\t"
15276 $$emit$$"jge L_sloop\n\t"
15277 $$emit$$"# L_end:\n\t"
15278 } else {
15279 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15280 }
15281 $$emit$$"# DONE"
15282 %}
15283 ins_encode %{
15284 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15285 $tmp$$XMMRegister, false, true);
15286 %}
15287 ins_pipe(pipe_slow);
15288 %}
15289
15290 // Small non-constant length ClearArray for AVX512 targets.
15291 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15292 Universe dummy, rFlagsReg cr)
15293 %{
15294 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15295 match(Set dummy (ClearArray (Binary cnt base) val));
15296 ins_cost(125);
15297 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15298
15299 format %{ $$template
15300 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15301 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15302 $$emit$$"jg LARGE\n\t"
15303 $$emit$$"dec rcx\n\t"
15304 $$emit$$"js DONE\t# Zero length\n\t"
15305 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15306 $$emit$$"dec rcx\n\t"
15307 $$emit$$"jge LOOP\n\t"
15308 $$emit$$"jmp DONE\n\t"
15309 $$emit$$"# LARGE:\n\t"
15310 if (UseFastStosb) {
15311 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15312 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15313 } else if (UseXMMForObjInit) {
15314 $$emit$$"mov rdi,rax\n\t"
15315 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15316 $$emit$$"jmpq L_zero_64_bytes\n\t"
15317 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15325 $$emit$$"jl L_tail\n\t"
15326 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15327 $$emit$$"add 0x20,rax\n\t"
15328 $$emit$$"sub 0x4,rcx\n\t"
15329 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15330 $$emit$$"add 0x4,rcx\n\t"
15331 $$emit$$"jle L_end\n\t"
15332 $$emit$$"dec rcx\n\t"
15333 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15334 $$emit$$"vmovq xmm0,(rax)\n\t"
15335 $$emit$$"add 0x8,rax\n\t"
15336 $$emit$$"dec rcx\n\t"
15337 $$emit$$"jge L_sloop\n\t"
15338 $$emit$$"# L_end:\n\t"
15339 } else {
15340 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15341 }
15342 $$emit$$"# DONE"
15343 %}
15344 ins_encode %{
15345 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15346 $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
15347 %}
15348 ins_pipe(pipe_slow);
15349 %}
15350
15351 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15352 Universe dummy, rFlagsReg cr)
15353 %{
15354 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15355 match(Set dummy (ClearArray (Binary cnt base) val));
15356 ins_cost(125);
15357 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15358
15359 format %{ $$template
15360 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15361 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15362 $$emit$$"jg LARGE\n\t"
15363 $$emit$$"dec rcx\n\t"
15364 $$emit$$"js DONE\t# Zero length\n\t"
15365 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15366 $$emit$$"dec rcx\n\t"
15367 $$emit$$"jge LOOP\n\t"
15368 $$emit$$"jmp DONE\n\t"
15369 $$emit$$"# LARGE:\n\t"
15370 if (UseFastStosb) {
15371 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15372 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15373 } else if (UseXMMForObjInit) {
15374 $$emit$$"mov rdi,rax\n\t"
15375 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15376 $$emit$$"jmpq L_zero_64_bytes\n\t"
15377 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15385 $$emit$$"jl L_tail\n\t"
15386 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15387 $$emit$$"add 0x20,rax\n\t"
15388 $$emit$$"sub 0x4,rcx\n\t"
15389 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15390 $$emit$$"add 0x4,rcx\n\t"
15391 $$emit$$"jle L_end\n\t"
15392 $$emit$$"dec rcx\n\t"
15393 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15394 $$emit$$"vmovq xmm0,(rax)\n\t"
15395 $$emit$$"add 0x8,rax\n\t"
15396 $$emit$$"dec rcx\n\t"
15397 $$emit$$"jge L_sloop\n\t"
15398 $$emit$$"# L_end:\n\t"
15399 } else {
15400 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15401 }
15402 $$emit$$"# DONE"
15403 %}
15404 ins_encode %{
15405 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15406 $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
15407 %}
15408 ins_pipe(pipe_slow);
15409 %}
15410
15411 // Large non-constant length ClearArray for non-AVX512 targets.
15412 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15413 Universe dummy, rFlagsReg cr)
15414 %{
15415 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15416 match(Set dummy (ClearArray (Binary cnt base) val));
15417 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15418
15419 format %{ $$template
15420 if (UseFastStosb) {
15421 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15422 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15423 } else if (UseXMMForObjInit) {
15424 $$emit$$"movdq $tmp, $val\n\t"
15425 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15426 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15427 $$emit$$"jmpq L_zero_64_bytes\n\t"
15428 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15429 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15430 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15431 $$emit$$"add 0x40,rax\n\t"
15432 $$emit$$"# L_zero_64_bytes:\n\t"
15433 $$emit$$"sub 0x8,rcx\n\t"
15434 $$emit$$"jge L_loop\n\t"
15435 $$emit$$"add 0x4,rcx\n\t"
15436 $$emit$$"jl L_tail\n\t"
15437 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15438 $$emit$$"add 0x20,rax\n\t"
15439 $$emit$$"sub 0x4,rcx\n\t"
15440 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15441 $$emit$$"add 0x4,rcx\n\t"
15442 $$emit$$"jle L_end\n\t"
15443 $$emit$$"dec rcx\n\t"
15444 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15445 $$emit$$"vmovq xmm0,(rax)\n\t"
15446 $$emit$$"add 0x8,rax\n\t"
15447 $$emit$$"dec rcx\n\t"
15448 $$emit$$"jge L_sloop\n\t"
15449 $$emit$$"# L_end:\n\t"
15450 } else {
15451 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15452 }
15453 %}
15454 ins_encode %{
15455 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15456 $tmp$$XMMRegister, true, false);
15457 %}
15458 ins_pipe(pipe_slow);
15459 %}
15460
15461 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15462 Universe dummy, rFlagsReg cr)
15463 %{
15464 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15465 match(Set dummy (ClearArray (Binary cnt base) val));
15466 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15467
15468 format %{ $$template
15469 if (UseXMMForObjInit) {
15470 $$emit$$"movdq $tmp, $val\n\t"
15471 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15472 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15473 $$emit$$"jmpq L_zero_64_bytes\n\t"
15474 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15475 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15476 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15477 $$emit$$"add 0x40,rax\n\t"
15478 $$emit$$"# L_zero_64_bytes:\n\t"
15479 $$emit$$"sub 0x8,rcx\n\t"
15480 $$emit$$"jge L_loop\n\t"
15481 $$emit$$"add 0x4,rcx\n\t"
15482 $$emit$$"jl L_tail\n\t"
15483 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15484 $$emit$$"add 0x20,rax\n\t"
15485 $$emit$$"sub 0x4,rcx\n\t"
15486 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15487 $$emit$$"add 0x4,rcx\n\t"
15488 $$emit$$"jle L_end\n\t"
15489 $$emit$$"dec rcx\n\t"
15490 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15491 $$emit$$"vmovq xmm0,(rax)\n\t"
15492 $$emit$$"add 0x8,rax\n\t"
15493 $$emit$$"dec rcx\n\t"
15494 $$emit$$"jge L_sloop\n\t"
15495 $$emit$$"# L_end:\n\t"
15496 } else {
15497 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15498 }
15499 %}
15500 ins_encode %{
15501 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15502 $tmp$$XMMRegister, true, true);
15503 %}
15504 ins_pipe(pipe_slow);
15505 %}
15506
15507 // Large non-constant length ClearArray for AVX512 targets.
15508 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15509 Universe dummy, rFlagsReg cr)
15510 %{
15511 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15512 match(Set dummy (ClearArray (Binary cnt base) val));
15513 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15514
15515 format %{ $$template
15516 if (UseFastStosb) {
15517 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15518 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15519 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15520 } else if (UseXMMForObjInit) {
15521 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15522 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15523 $$emit$$"jmpq L_zero_64_bytes\n\t"
15524 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15525 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15526 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15527 $$emit$$"add 0x40,rax\n\t"
15528 $$emit$$"# L_zero_64_bytes:\n\t"
15529 $$emit$$"sub 0x8,rcx\n\t"
15530 $$emit$$"jge L_loop\n\t"
15531 $$emit$$"add 0x4,rcx\n\t"
15532 $$emit$$"jl L_tail\n\t"
15533 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15534 $$emit$$"add 0x20,rax\n\t"
15535 $$emit$$"sub 0x4,rcx\n\t"
15536 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15537 $$emit$$"add 0x4,rcx\n\t"
15538 $$emit$$"jle L_end\n\t"
15539 $$emit$$"dec rcx\n\t"
15540 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15541 $$emit$$"vmovq xmm0,(rax)\n\t"
15542 $$emit$$"add 0x8,rax\n\t"
15543 $$emit$$"dec rcx\n\t"
15544 $$emit$$"jge L_sloop\n\t"
15545 $$emit$$"# L_end:\n\t"
15546 } else {
15547 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15548 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15549 }
15550 %}
15551 ins_encode %{
15552 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15553 $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
15554 %}
15555 ins_pipe(pipe_slow);
15556 %}
15557
15558 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15559 Universe dummy, rFlagsReg cr)
15560 %{
15561 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15562 match(Set dummy (ClearArray (Binary cnt base) val));
15563 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15564
15565 format %{ $$template
15566 if (UseFastStosb) {
15567 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15568 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15569 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15570 } else if (UseXMMForObjInit) {
15571 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15572 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15573 $$emit$$"jmpq L_zero_64_bytes\n\t"
15574 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15575 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15576 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15577 $$emit$$"add 0x40,rax\n\t"
15578 $$emit$$"# L_zero_64_bytes:\n\t"
15579 $$emit$$"sub 0x8,rcx\n\t"
15580 $$emit$$"jge L_loop\n\t"
15581 $$emit$$"add 0x4,rcx\n\t"
15582 $$emit$$"jl L_tail\n\t"
15583 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15584 $$emit$$"add 0x20,rax\n\t"
15585 $$emit$$"sub 0x4,rcx\n\t"
15586 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15587 $$emit$$"add 0x4,rcx\n\t"
15588 $$emit$$"jle L_end\n\t"
15589 $$emit$$"dec rcx\n\t"
15590 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15591 $$emit$$"vmovq xmm0,(rax)\n\t"
15592 $$emit$$"add 0x8,rax\n\t"
15593 $$emit$$"dec rcx\n\t"
15594 $$emit$$"jge L_sloop\n\t"
15595 $$emit$$"# L_end:\n\t"
15596 } else {
15597 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15598 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15599 }
15600 %}
15601 ins_encode %{
15602 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15603 $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
15604 %}
15605 ins_pipe(pipe_slow);
15606 %}
15607
15608 // Small constant length ClearArray for AVX512 targets.
15609 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
15610 %{
15611 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
15612 ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
15613 match(Set dummy (ClearArray (Binary cnt base) val));
15614 ins_cost(100);
15615 effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
15616 format %{ "clear_mem_imm $base , $cnt \n\t" %}
15617 ins_encode %{
15618 __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15619 %}
15620 ins_pipe(pipe_slow);
15621 %}
15622
15623 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15624 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15625 %{
15626 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15627 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15628 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15629
15630 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15631 ins_encode %{
15632 __ string_compare($str1$$Register, $str2$$Register,
15633 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15634 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15635 %}
15636 ins_pipe( pipe_slow );
15637 %}
15638
17445 effect(USE meth);
17446
17447 ins_cost(300);
17448 format %{ "call_leaf,runtime " %}
17449 ins_encode(clear_avx, Java_To_Runtime(meth));
17450 ins_pipe(pipe_slow);
17451 %}
17452
17453 // Call runtime without safepoint and with vector arguments
17454 instruct CallLeafDirectVector(method meth)
17455 %{
17456 match(CallLeafVector);
17457 effect(USE meth);
17458
17459 ins_cost(300);
17460 format %{ "call_leaf,vector " %}
17461 ins_encode(Java_To_Runtime(meth));
17462 ins_pipe(pipe_slow);
17463 %}
17464
17465 // Call runtime without safepoint
17466 // entry point is null, target holds the address to call
17467 instruct CallLeafNoFPInDirect(rRegP target)
17468 %{
17469 predicate(n->as_Call()->entry_point() == nullptr);
17470 match(CallLeafNoFP target);
17471
17472 ins_cost(300);
17473 format %{ "call_leaf_nofp,runtime indirect " %}
17474 ins_encode %{
17475 __ call($target$$Register);
17476 %}
17477
17478 ins_pipe(pipe_slow);
17479 %}
17480
17481 // Call runtime without safepoint
17482 instruct CallLeafNoFPDirect(method meth)
17483 %{
17484 predicate(n->as_Call()->entry_point() != nullptr);
17485 match(CallLeafNoFP);
17486 effect(USE meth);
17487
17488 ins_cost(300);
17489 format %{ "call_leaf_nofp,runtime " %}
17490 ins_encode(clear_avx, Java_To_Runtime(meth));
17491 ins_pipe(pipe_slow);
17492 %}
17493
17494 // Return Instruction
17495 // Remove the return address & jump to it.
17496 // Notice: We always emit a nop after a ret to make sure there is room
17497 // for safepoint patching
17498 instruct Ret()
17499 %{
17500 match(Return);
17501
17502 format %{ "ret" %}
17503 ins_encode %{
17504 __ ret(0);
|