1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 int offset = 13; // movq r10,#addr; callq (r10)
1653 if (this->ideal_Opcode() != Op_CallLeafVector) {
1654 offset += clear_avx_size();
1655 }
1656 return offset;
1657 }
1658 //
1659 // Compute padding required for nodes which need alignment
1660 //
1661
1662 // The address of the call instruction needs to be 4-byte aligned to
1663 // ensure that it does not span a cache line so that it can be patched.
1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1665 {
1666 current_offset += clear_avx_size(); // skip vzeroupper
1667 current_offset += 1; // skip call opcode byte
1668 return align_up(current_offset, alignment_required()) - current_offset;
1669 }
1670
1671 // The address of the call instruction needs to be 4-byte aligned to
1672 // ensure that it does not span a cache line so that it can be patched.
1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
1674 {
1675 current_offset += clear_avx_size(); // skip vzeroupper
1676 current_offset += 11; // skip movq instruction + call opcode byte
1677 return align_up(current_offset, alignment_required()) - current_offset;
1863 st->print("\n\t");
1864 st->print("# stack alignment check");
1865 #endif
1866 }
1867 if (C->stub_function() != nullptr) {
1868 st->print("\n\t");
1869 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1870 st->print("\n\t");
1871 st->print("je fast_entry\t");
1872 st->print("\n\t");
1873 st->print("call #nmethod_entry_barrier_stub\t");
1874 st->print("\n\tfast_entry:");
1875 }
1876 st->cr();
1877 }
1878 #endif
1879
1880 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1881 Compile* C = ra_->C;
1882
1883 int framesize = C->output()->frame_size_in_bytes();
1884 int bangsize = C->output()->bang_size_in_bytes();
1885
1886 if (C->clinit_barrier_on_entry()) {
1887 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
1888 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
1889
1890 Label L_skip_barrier;
1891 Register klass = rscratch1;
1892
1893 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
1894 __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
1895
1896 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
1897
1898 __ bind(L_skip_barrier);
1899 }
1900
1901 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
1902
1903 C->output()->set_frame_complete(__ offset());
1904
1905 if (C->has_mach_constant_base_node()) {
1906 // NOTE: We set the table base offset here because users might be
1907 // emitted before MachConstantBaseNode.
1908 ConstantTable& constant_table = C->output()->constant_table();
1909 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1910 }
1911 }
1912
1913 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1914 {
1915 return MachNode::size(ra_); // too many variables; just compute it
1916 // the hard way
1917 }
1918
1919 int MachPrologNode::reloc() const
1920 {
1921 return 0; // a large enough number
1922 }
1923
1924 //=============================================================================
1925 #ifndef PRODUCT
1926 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1927 {
1928 Compile* C = ra_->C;
1929 if (generate_vzeroupper(C)) {
1930 st->print("vzeroupper");
1931 st->cr(); st->print("\t");
1932 }
1933
1934 int framesize = C->output()->frame_size_in_bytes();
1935 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1936 // Remove word for return adr already pushed
1937 // and RBP
1945 st->print_cr("popq rbp");
1946 if (do_polling() && C->is_method_compilation()) {
1947 st->print("\t");
1948 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1949 "ja #safepoint_stub\t"
1950 "# Safepoint: poll for GC");
1951 }
1952 }
1953 #endif
1954
1955 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1956 {
1957 Compile* C = ra_->C;
1958
1959 if (generate_vzeroupper(C)) {
1960 // Clear upper bits of YMM registers when current compiled code uses
1961 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1962 __ vzeroupper();
1963 }
1964
1965 int framesize = C->output()->frame_size_in_bytes();
1966 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1967 // Remove word for return adr already pushed
1968 // and RBP
1969 framesize -= 2*wordSize;
1970
1971 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1972
1973 if (framesize) {
1974 __ addq(rsp, framesize);
1975 }
1976
1977 __ popq(rbp);
1978
1979 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1980 __ reserved_stack_check();
1981 }
1982
1983 if (do_polling() && C->is_method_compilation()) {
1984 Label dummy_label;
1985 Label* code_stub = &dummy_label;
1986 if (!C->output()->in_scratch_emit_size()) {
1987 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1988 C->output()->add_stub(stub);
1989 code_stub = &stub->entry();
1990 }
1991 __ relocate(relocInfo::poll_return_type);
1992 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
1993 }
1994 }
1995
1996 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
1997 {
1998 return MachNode::size(ra_); // too many variables; just compute it
1999 // the hard way
2000 }
2001
2002 int MachEpilogNode::reloc() const
2003 {
2004 return 2; // a large enough number
2005 }
2006
2007 const Pipeline* MachEpilogNode::pipeline() const
2008 {
2009 return MachNode::pipeline_class();
2010 }
2011
2012 //=============================================================================
2013
2014 enum RC {
2015 rc_bad,
2016 rc_int,
2017 rc_kreg,
2018 rc_float,
2019 rc_stack
2020 };
2021
2583 #endif
2584
2585 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2586 {
2587 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2588 int reg = ra_->get_encode(this);
2589
2590 __ lea(as_Register(reg), Address(rsp, offset));
2591 }
2592
2593 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2594 {
2595 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2596 if (ra_->get_encode(this) > 15) {
2597 return (offset < 0x80) ? 6 : 9; // REX2
2598 } else {
2599 return (offset < 0x80) ? 5 : 8; // REX
2600 }
2601 }
2602
2603 //=============================================================================
2604 #ifndef PRODUCT
2605 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2606 {
2607 if (UseCompressedClassPointers) {
2608 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2609 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2610 } else {
2611 st->print_cr("movq rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2612 st->print_cr("\tcmpq rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2613 }
2614 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2615 }
2616 #endif
2617
2618 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2619 {
2620 __ ic_check(InteriorEntryAlignment);
2621 }
2622
2623 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
2624 {
2625 return MachNode::size(ra_); // too many variables; just compute it
2626 // the hard way
2627 }
2628
2629
2630 //=============================================================================
2631
2632 bool Matcher::supports_vector_calling_convention(void) {
2633 return EnableVectorSupport;
2634 }
2635
2636 static bool is_ndd_demotable(const MachNode* mdef) {
2637 return ((mdef->flags() & Node::PD::Flag_ndd_demotable) != 0);
2638 }
2639
2640 static bool is_ndd_demotable_commutative(const MachNode* mdef) {
2641 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_commutative) != 0);
2642 }
2643
2644 static bool is_demotion_candidate(const MachNode* mdef) {
2645 return (is_ndd_demotable(mdef) || is_ndd_demotable_commutative(mdef));
2646 }
2647
2648 bool Matcher::is_register_biasing_candidate(const MachNode* mdef,
4583 }
4584 __ post_call_nop();
4585 %}
4586
4587 enc_class Java_Dynamic_Call(method meth) %{
4588 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4589 __ post_call_nop();
4590 %}
4591
4592 enc_class call_epilog %{
4593 if (VerifyStackAtCalls) {
4594 // Check that stack depth is unchanged: find majik cookie on stack
4595 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4596 Label L;
4597 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4598 __ jccb(Assembler::equal, L);
4599 // Die if stack mismatch
4600 __ int3();
4601 __ bind(L);
4602 }
4603 %}
4604
4605 %}
4606
4607 //----------FRAME--------------------------------------------------------------
4608 // Definition of frame structure and management information.
4609 //
4610 // S T A C K L A Y O U T Allocators stack-slot number
4611 // | (to get allocators register number
4612 // G Owned by | | v add OptoReg::stack0())
4613 // r CALLER | |
4614 // o | +--------+ pad to even-align allocators stack-slot
4615 // w V | pad0 | numbers; owned by CALLER
4616 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4617 // h ^ | in | 5
4618 // | | args | 4 Holes in incoming args owned by SELF
4619 // | | | | 3
4620 // | | +--------+
4621 // V | | old out| Empty on Intel, window on Sparc
4622 // | old |preserve| Must be even aligned.
5745 %}
5746 %}
5747
5748 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5749 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5750 %{
5751 constraint(ALLOC_IN_RC(ptr_reg));
5752 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5753 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5754
5755 op_cost(10);
5756 format %{"[$reg + $off + $idx << $scale]" %}
5757 interface(MEMORY_INTER) %{
5758 base($reg);
5759 index($idx);
5760 scale($scale);
5761 disp($off);
5762 %}
5763 %}
5764
5765 // Indirect Narrow Oop Plus Offset Operand
5766 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5767 // we can't free r12 even with CompressedOops::base() == nullptr.
5768 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5769 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5770 constraint(ALLOC_IN_RC(ptr_reg));
5771 match(AddP (DecodeN reg) off);
5772
5773 op_cost(10);
5774 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5775 interface(MEMORY_INTER) %{
5776 base(0xc); // R12
5777 index($reg);
5778 scale(0x3);
5779 disp($off);
5780 %}
5781 %}
5782
5783 // Indirect Memory Operand
5784 operand indirectNarrow(rRegN reg)
6221 %}
6222
6223 // Replaces legVec during post-selection cleanup. See above.
6224 operand legVecZ() %{
6225 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6226 match(VecZ);
6227
6228 format %{ %}
6229 interface(REG_INTER);
6230 %}
6231
6232 //----------OPERAND CLASSES----------------------------------------------------
6233 // Operand Classes are groups of operands that are used as to simplify
6234 // instruction definitions by not requiring the AD writer to specify separate
6235 // instructions for every form of operand when the instruction accepts
6236 // multiple operand types with the same basic encoding and format. The classic
6237 // case of this is memory operands.
6238
6239 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6240 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6241 indCompressedOopOffset,
6242 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6243 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6244 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6245
6246 //----------PIPELINE-----------------------------------------------------------
6247 // Rules which define the behavior of the target architectures pipeline.
6248 pipeline %{
6249
6250 //----------ATTRIBUTES---------------------------------------------------------
6251 attributes %{
6252 variable_size_instructions; // Fixed size instructions
6253 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6254 instruction_unit_size = 1; // An instruction is 1 bytes long
6255 instruction_fetch_unit_size = 16; // The processor fetches one line
6256 instruction_fetch_units = 1; // of 16 bytes
6257 %}
6258
6259 //----------RESOURCES----------------------------------------------------------
6260 // Resources are the functional units available to the machine
6261
8819 format %{ "MEMBAR-storestore (empty encoding)" %}
8820 ins_encode( );
8821 ins_pipe(empty);
8822 %}
8823
8824 //----------Move Instructions--------------------------------------------------
8825
8826 instruct castX2P(rRegP dst, rRegL src)
8827 %{
8828 match(Set dst (CastX2P src));
8829
8830 format %{ "movq $dst, $src\t# long->ptr" %}
8831 ins_encode %{
8832 if ($dst$$reg != $src$$reg) {
8833 __ movptr($dst$$Register, $src$$Register);
8834 }
8835 %}
8836 ins_pipe(ialu_reg_reg); // XXX
8837 %}
8838
8839 instruct castP2X(rRegL dst, rRegP src)
8840 %{
8841 match(Set dst (CastP2X src));
8842
8843 format %{ "movq $dst, $src\t# ptr -> long" %}
8844 ins_encode %{
8845 if ($dst$$reg != $src$$reg) {
8846 __ movptr($dst$$Register, $src$$Register);
8847 }
8848 %}
8849 ins_pipe(ialu_reg_reg); // XXX
8850 %}
8851
8852 // Convert oop into int for vectors alignment masking
8853 instruct convP2I(rRegI dst, rRegP src)
8854 %{
8855 match(Set dst (ConvL2I (CastP2X src)));
8856
8857 format %{ "movl $dst, $src\t# ptr -> int" %}
8858 ins_encode %{
15065 effect(DEF dst, USE src);
15066 ins_cost(100);
15067 format %{ "movd $dst,$src\t# MoveI2F" %}
15068 ins_encode %{
15069 __ movdl($dst$$XMMRegister, $src$$Register);
15070 %}
15071 ins_pipe( pipe_slow );
15072 %}
15073
15074 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15075 match(Set dst (MoveL2D src));
15076 effect(DEF dst, USE src);
15077 ins_cost(100);
15078 format %{ "movd $dst,$src\t# MoveL2D" %}
15079 ins_encode %{
15080 __ movdq($dst$$XMMRegister, $src$$Register);
15081 %}
15082 ins_pipe( pipe_slow );
15083 %}
15084
15085 // Fast clearing of an array
15086 // Small non-constant lenght ClearArray for non-AVX512 targets.
15087 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15088 Universe dummy, rFlagsReg cr)
15089 %{
15090 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
15091 match(Set dummy (ClearArray cnt base));
15092 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15093
15094 format %{ $$template
15095 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15096 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15097 $$emit$$"jg LARGE\n\t"
15098 $$emit$$"dec rcx\n\t"
15099 $$emit$$"js DONE\t# Zero length\n\t"
15100 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15101 $$emit$$"dec rcx\n\t"
15102 $$emit$$"jge LOOP\n\t"
15103 $$emit$$"jmp DONE\n\t"
15104 $$emit$$"# LARGE:\n\t"
15105 if (UseFastStosb) {
15106 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15107 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15108 } else if (UseXMMForObjInit) {
15109 $$emit$$"mov rdi,rax\n\t"
15110 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15111 $$emit$$"jmpq L_zero_64_bytes\n\t"
15112 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15120 $$emit$$"jl L_tail\n\t"
15121 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15122 $$emit$$"add 0x20,rax\n\t"
15123 $$emit$$"sub 0x4,rcx\n\t"
15124 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15125 $$emit$$"add 0x4,rcx\n\t"
15126 $$emit$$"jle L_end\n\t"
15127 $$emit$$"dec rcx\n\t"
15128 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15129 $$emit$$"vmovq xmm0,(rax)\n\t"
15130 $$emit$$"add 0x8,rax\n\t"
15131 $$emit$$"dec rcx\n\t"
15132 $$emit$$"jge L_sloop\n\t"
15133 $$emit$$"# L_end:\n\t"
15134 } else {
15135 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15136 }
15137 $$emit$$"# DONE"
15138 %}
15139 ins_encode %{
15140 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15141 $tmp$$XMMRegister, false, knoreg);
15142 %}
15143 ins_pipe(pipe_slow);
15144 %}
15145
15146 // Small non-constant length ClearArray for AVX512 targets.
15147 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15148 Universe dummy, rFlagsReg cr)
15149 %{
15150 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15151 match(Set dummy (ClearArray cnt base));
15152 ins_cost(125);
15153 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15154
15155 format %{ $$template
15156 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15157 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15158 $$emit$$"jg LARGE\n\t"
15159 $$emit$$"dec rcx\n\t"
15160 $$emit$$"js DONE\t# Zero length\n\t"
15161 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15162 $$emit$$"dec rcx\n\t"
15163 $$emit$$"jge LOOP\n\t"
15164 $$emit$$"jmp DONE\n\t"
15165 $$emit$$"# LARGE:\n\t"
15166 if (UseFastStosb) {
15167 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15168 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15169 } else if (UseXMMForObjInit) {
15170 $$emit$$"mov rdi,rax\n\t"
15171 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15172 $$emit$$"jmpq L_zero_64_bytes\n\t"
15173 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15181 $$emit$$"jl L_tail\n\t"
15182 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15183 $$emit$$"add 0x20,rax\n\t"
15184 $$emit$$"sub 0x4,rcx\n\t"
15185 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15186 $$emit$$"add 0x4,rcx\n\t"
15187 $$emit$$"jle L_end\n\t"
15188 $$emit$$"dec rcx\n\t"
15189 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15190 $$emit$$"vmovq xmm0,(rax)\n\t"
15191 $$emit$$"add 0x8,rax\n\t"
15192 $$emit$$"dec rcx\n\t"
15193 $$emit$$"jge L_sloop\n\t"
15194 $$emit$$"# L_end:\n\t"
15195 } else {
15196 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15197 }
15198 $$emit$$"# DONE"
15199 %}
15200 ins_encode %{
15201 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15202 $tmp$$XMMRegister, false, $ktmp$$KRegister);
15203 %}
15204 ins_pipe(pipe_slow);
15205 %}
15206
15207 // Large non-constant length ClearArray for non-AVX512 targets.
15208 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15209 Universe dummy, rFlagsReg cr)
15210 %{
15211 predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15212 match(Set dummy (ClearArray cnt base));
15213 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15214
15215 format %{ $$template
15216 if (UseFastStosb) {
15217 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15218 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15219 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15220 } else if (UseXMMForObjInit) {
15221 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15222 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15223 $$emit$$"jmpq L_zero_64_bytes\n\t"
15224 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15225 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15226 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15227 $$emit$$"add 0x40,rax\n\t"
15228 $$emit$$"# L_zero_64_bytes:\n\t"
15229 $$emit$$"sub 0x8,rcx\n\t"
15230 $$emit$$"jge L_loop\n\t"
15231 $$emit$$"add 0x4,rcx\n\t"
15232 $$emit$$"jl L_tail\n\t"
15233 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15234 $$emit$$"add 0x20,rax\n\t"
15235 $$emit$$"sub 0x4,rcx\n\t"
15236 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15237 $$emit$$"add 0x4,rcx\n\t"
15238 $$emit$$"jle L_end\n\t"
15239 $$emit$$"dec rcx\n\t"
15240 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15241 $$emit$$"vmovq xmm0,(rax)\n\t"
15242 $$emit$$"add 0x8,rax\n\t"
15243 $$emit$$"dec rcx\n\t"
15244 $$emit$$"jge L_sloop\n\t"
15245 $$emit$$"# L_end:\n\t"
15246 } else {
15247 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15248 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15249 }
15250 %}
15251 ins_encode %{
15252 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15253 $tmp$$XMMRegister, true, knoreg);
15254 %}
15255 ins_pipe(pipe_slow);
15256 %}
15257
15258 // Large non-constant length ClearArray for AVX512 targets.
15259 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15260 Universe dummy, rFlagsReg cr)
15261 %{
15262 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15263 match(Set dummy (ClearArray cnt base));
15264 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15265
15266 format %{ $$template
15267 if (UseFastStosb) {
15268 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15269 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15270 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15271 } else if (UseXMMForObjInit) {
15272 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15273 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15274 $$emit$$"jmpq L_zero_64_bytes\n\t"
15275 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15276 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15277 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15278 $$emit$$"add 0x40,rax\n\t"
15279 $$emit$$"# L_zero_64_bytes:\n\t"
15280 $$emit$$"sub 0x8,rcx\n\t"
15281 $$emit$$"jge L_loop\n\t"
15282 $$emit$$"add 0x4,rcx\n\t"
15283 $$emit$$"jl L_tail\n\t"
15284 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15285 $$emit$$"add 0x20,rax\n\t"
15286 $$emit$$"sub 0x4,rcx\n\t"
15287 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15288 $$emit$$"add 0x4,rcx\n\t"
15289 $$emit$$"jle L_end\n\t"
15290 $$emit$$"dec rcx\n\t"
15291 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15292 $$emit$$"vmovq xmm0,(rax)\n\t"
15293 $$emit$$"add 0x8,rax\n\t"
15294 $$emit$$"dec rcx\n\t"
15295 $$emit$$"jge L_sloop\n\t"
15296 $$emit$$"# L_end:\n\t"
15297 } else {
15298 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15299 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15300 }
15301 %}
15302 ins_encode %{
15303 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15304 $tmp$$XMMRegister, true, $ktmp$$KRegister);
15305 %}
15306 ins_pipe(pipe_slow);
15307 %}
15308
15309 // Small constant length ClearArray for AVX512 targets.
15310 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15311 %{
15312 predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15313 match(Set dummy (ClearArray cnt base));
15314 ins_cost(100);
15315 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15316 format %{ "clear_mem_imm $base , $cnt \n\t" %}
15317 ins_encode %{
15318 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15319 %}
15320 ins_pipe(pipe_slow);
15321 %}
15322
15323 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15324 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15325 %{
15326 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15327 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15328 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15329
15330 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15331 ins_encode %{
15332 __ string_compare($str1$$Register, $str2$$Register,
15333 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15334 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15335 %}
15336 ins_pipe( pipe_slow );
15337 %}
15338
17145 effect(USE meth);
17146
17147 ins_cost(300);
17148 format %{ "call_leaf,runtime " %}
17149 ins_encode(clear_avx, Java_To_Runtime(meth));
17150 ins_pipe(pipe_slow);
17151 %}
17152
17153 // Call runtime without safepoint and with vector arguments
17154 instruct CallLeafDirectVector(method meth)
17155 %{
17156 match(CallLeafVector);
17157 effect(USE meth);
17158
17159 ins_cost(300);
17160 format %{ "call_leaf,vector " %}
17161 ins_encode(Java_To_Runtime(meth));
17162 ins_pipe(pipe_slow);
17163 %}
17164
17165 // Call runtime without safepoint
17166 instruct CallLeafNoFPDirect(method meth)
17167 %{
17168 match(CallLeafNoFP);
17169 effect(USE meth);
17170
17171 ins_cost(300);
17172 format %{ "call_leaf_nofp,runtime " %}
17173 ins_encode(clear_avx, Java_To_Runtime(meth));
17174 ins_pipe(pipe_slow);
17175 %}
17176
17177 // Return Instruction
17178 // Remove the return address & jump to it.
17179 // Notice: We always emit a nop after a ret to make sure there is room
17180 // for safepoint patching
17181 instruct Ret()
17182 %{
17183 match(Return);
17184
17185 format %{ "ret" %}
17186 ins_encode %{
17187 __ ret(0);
|
1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 if (_entry_point == nullptr) {
1653 // CallLeafNoFPInDirect
1654 return 3; // callq (register)
1655 }
1656 int offset = 13; // movq r10,#addr; callq (r10)
1657 if (this->ideal_Opcode() != Op_CallLeafVector) {
1658 offset += clear_avx_size();
1659 }
1660 return offset;
1661 }
1662
1663 //
1664 // Compute padding required for nodes which need alignment
1665 //
1666
1667 // The address of the call instruction needs to be 4-byte aligned to
1668 // ensure that it does not span a cache line so that it can be patched.
1669 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1670 {
1671 current_offset += clear_avx_size(); // skip vzeroupper
1672 current_offset += 1; // skip call opcode byte
1673 return align_up(current_offset, alignment_required()) - current_offset;
1674 }
1675
1676 // The address of the call instruction needs to be 4-byte aligned to
1677 // ensure that it does not span a cache line so that it can be patched.
1678 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
1679 {
1680 current_offset += clear_avx_size(); // skip vzeroupper
1681 current_offset += 11; // skip movq instruction + call opcode byte
1682 return align_up(current_offset, alignment_required()) - current_offset;
1868 st->print("\n\t");
1869 st->print("# stack alignment check");
1870 #endif
1871 }
1872 if (C->stub_function() != nullptr) {
1873 st->print("\n\t");
1874 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1875 st->print("\n\t");
1876 st->print("je fast_entry\t");
1877 st->print("\n\t");
1878 st->print("call #nmethod_entry_barrier_stub\t");
1879 st->print("\n\tfast_entry:");
1880 }
1881 st->cr();
1882 }
1883 #endif
1884
1885 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1886 Compile* C = ra_->C;
1887
1888 __ verified_entry(C);
1889
1890 if (ra_->C->stub_function() == nullptr) {
1891 __ entry_barrier();
1892 }
1893
1894 if (!Compile::current()->output()->in_scratch_emit_size()) {
1895 __ bind(*_verified_entry);
1896 }
1897
1898 C->output()->set_frame_complete(__ offset());
1899
1900 if (C->has_mach_constant_base_node()) {
1901 // NOTE: We set the table base offset here because users might be
1902 // emitted before MachConstantBaseNode.
1903 ConstantTable& constant_table = C->output()->constant_table();
1904 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1905 }
1906 }
1907
1908
1909 int MachPrologNode::reloc() const
1910 {
1911 return 0; // a large enough number
1912 }
1913
1914 //=============================================================================
1915 #ifndef PRODUCT
1916 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1917 {
1918 Compile* C = ra_->C;
1919 if (generate_vzeroupper(C)) {
1920 st->print("vzeroupper");
1921 st->cr(); st->print("\t");
1922 }
1923
1924 int framesize = C->output()->frame_size_in_bytes();
1925 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1926 // Remove word for return adr already pushed
1927 // and RBP
1935 st->print_cr("popq rbp");
1936 if (do_polling() && C->is_method_compilation()) {
1937 st->print("\t");
1938 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1939 "ja #safepoint_stub\t"
1940 "# Safepoint: poll for GC");
1941 }
1942 }
1943 #endif
1944
1945 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1946 {
1947 Compile* C = ra_->C;
1948
1949 if (generate_vzeroupper(C)) {
1950 // Clear upper bits of YMM registers when current compiled code uses
1951 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1952 __ vzeroupper();
1953 }
1954
1955 // Subtract two words to account for return address and rbp
1956 int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
1957 __ remove_frame(initial_framesize, C->needs_stack_repair());
1958
1959 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1960 __ reserved_stack_check();
1961 }
1962
1963 if (do_polling() && C->is_method_compilation()) {
1964 Label dummy_label;
1965 Label* code_stub = &dummy_label;
1966 if (!C->output()->in_scratch_emit_size()) {
1967 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1968 C->output()->add_stub(stub);
1969 code_stub = &stub->entry();
1970 }
1971 __ relocate(relocInfo::poll_return_type);
1972 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
1973 }
1974 }
1975
1976 int MachEpilogNode::reloc() const
1977 {
1978 return 2; // a large enough number
1979 }
1980
1981 const Pipeline* MachEpilogNode::pipeline() const
1982 {
1983 return MachNode::pipeline_class();
1984 }
1985
1986 //=============================================================================
1987
1988 enum RC {
1989 rc_bad,
1990 rc_int,
1991 rc_kreg,
1992 rc_float,
1993 rc_stack
1994 };
1995
2557 #endif
2558
2559 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2560 {
2561 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2562 int reg = ra_->get_encode(this);
2563
2564 __ lea(as_Register(reg), Address(rsp, offset));
2565 }
2566
2567 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2568 {
2569 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2570 if (ra_->get_encode(this) > 15) {
2571 return (offset < 0x80) ? 6 : 9; // REX2
2572 } else {
2573 return (offset < 0x80) ? 5 : 8; // REX
2574 }
2575 }
2576
2577 //=============================================================================
2578 #ifndef PRODUCT
2579 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2580 {
2581 st->print_cr("MachVEPNode");
2582 }
2583 #endif
2584
2585 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2586 {
2587 CodeBuffer* cbuf = masm->code();
2588 uint insts_size = cbuf->insts_size();
2589 if (!_verified) {
2590 __ ic_check(1);
2591 } else {
2592 // TODO 8284443 Avoid creation of temporary frame
2593 if (ra_->C->stub_function() == nullptr) {
2594 __ verified_entry(ra_->C, 0);
2595 __ entry_barrier();
2596 int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
2597 __ remove_frame(initial_framesize, false);
2598 }
2599 // Unpack inline type args passed as oop and then jump to
2600 // the verified entry point (skipping the unverified entry).
2601 int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
2602 // Emit code for verified entry and save increment for stack repair on return
2603 __ verified_entry(ra_->C, sp_inc);
2604 if (Compile::current()->output()->in_scratch_emit_size()) {
2605 Label dummy_verified_entry;
2606 __ jmp(dummy_verified_entry);
2607 } else {
2608 __ jmp(*_verified_entry);
2609 }
2610 }
2611 /* WARNING these NOPs are critical so that verified entry point is properly
2612 4 bytes aligned for patching by NativeJump::patch_verified_entry() */
2613 int nops_cnt = 4 - ((cbuf->insts_size() - insts_size) & 0x3);
2614 nops_cnt &= 0x3; // Do not add nops if code is aligned.
2615 if (nops_cnt > 0) {
2616 __ nop(nops_cnt);
2617 }
2618 }
2619
2620 //=============================================================================
2621 #ifndef PRODUCT
2622 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2623 {
2624 if (UseCompressedClassPointers) {
2625 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2626 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2627 } else {
2628 st->print_cr("movq rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2629 st->print_cr("\tcmpq rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2630 }
2631 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2632 }
2633 #endif
2634
2635 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2636 {
2637 __ ic_check(InteriorEntryAlignment);
2638 }
2639
2640
2641 //=============================================================================
2642
2643 bool Matcher::supports_vector_calling_convention(void) {
2644 return EnableVectorSupport;
2645 }
2646
2647 static bool is_ndd_demotable(const MachNode* mdef) {
2648 return ((mdef->flags() & Node::PD::Flag_ndd_demotable) != 0);
2649 }
2650
2651 static bool is_ndd_demotable_commutative(const MachNode* mdef) {
2652 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_commutative) != 0);
2653 }
2654
2655 static bool is_demotion_candidate(const MachNode* mdef) {
2656 return (is_ndd_demotable(mdef) || is_ndd_demotable_commutative(mdef));
2657 }
2658
2659 bool Matcher::is_register_biasing_candidate(const MachNode* mdef,
4594 }
4595 __ post_call_nop();
4596 %}
4597
4598 enc_class Java_Dynamic_Call(method meth) %{
4599 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4600 __ post_call_nop();
4601 %}
4602
4603 enc_class call_epilog %{
4604 if (VerifyStackAtCalls) {
4605 // Check that stack depth is unchanged: find majik cookie on stack
4606 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4607 Label L;
4608 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4609 __ jccb(Assembler::equal, L);
4610 // Die if stack mismatch
4611 __ int3();
4612 __ bind(L);
4613 }
4614 if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) {
4615 // The last return value is not set by the callee but used to pass the null marker to compiled code.
4616 // Search for the corresponding projection, get the register and emit code that initialized it.
4617 uint con = (tf()->range_cc()->cnt() - 1);
4618 for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
4619 ProjNode* proj = fast_out(i)->as_Proj();
4620 if (proj->_con == con) {
4621 // Set null marker if rax is non-null (a non-null value is returned buffered or scalarized)
4622 OptoReg::Name optoReg = ra_->get_reg_first(proj);
4623 VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
4624 Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
4625 __ testq(rax, rax);
4626 __ setb(Assembler::notZero, toReg);
4627 __ movzbl(toReg, toReg);
4628 if (reg->is_stack()) {
4629 int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
4630 __ movq(Address(rsp, st_off), toReg);
4631 }
4632 break;
4633 }
4634 }
4635 if (return_value_is_used()) {
4636 // An inline type is returned as fields in multiple registers.
4637 // Rax either contains an oop if the inline type is buffered or a pointer
4638 // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax
4639 // if the lowest bit is set to allow C2 to use the oop after null checking.
4640 // rax &= (rax & 1) - 1
4641 __ movptr(rscratch1, rax);
4642 __ andptr(rscratch1, 0x1);
4643 __ subptr(rscratch1, 0x1);
4644 __ andptr(rax, rscratch1);
4645 }
4646 }
4647 %}
4648
4649 %}
4650
4651 //----------FRAME--------------------------------------------------------------
4652 // Definition of frame structure and management information.
4653 //
4654 // S T A C K L A Y O U T Allocators stack-slot number
4655 // | (to get allocators register number
4656 // G Owned by | | v add OptoReg::stack0())
4657 // r CALLER | |
4658 // o | +--------+ pad to even-align allocators stack-slot
4659 // w V | pad0 | numbers; owned by CALLER
4660 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4661 // h ^ | in | 5
4662 // | | args | 4 Holes in incoming args owned by SELF
4663 // | | | | 3
4664 // | | +--------+
4665 // V | | old out| Empty on Intel, window on Sparc
4666 // | old |preserve| Must be even aligned.
5789 %}
5790 %}
5791
5792 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5793 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5794 %{
5795 constraint(ALLOC_IN_RC(ptr_reg));
5796 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5797 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5798
5799 op_cost(10);
5800 format %{"[$reg + $off + $idx << $scale]" %}
5801 interface(MEMORY_INTER) %{
5802 base($reg);
5803 index($idx);
5804 scale($scale);
5805 disp($off);
5806 %}
5807 %}
5808
5809 // Indirect Narrow Oop Operand
5810 operand indCompressedOop(rRegN reg) %{
5811 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5812 constraint(ALLOC_IN_RC(ptr_reg));
5813 match(DecodeN reg);
5814
5815 op_cost(10);
5816 format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
5817 interface(MEMORY_INTER) %{
5818 base(0xc); // R12
5819 index($reg);
5820 scale(0x3);
5821 disp(0x0);
5822 %}
5823 %}
5824
5825 // Indirect Narrow Oop Plus Offset Operand
5826 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5827 // we can't free r12 even with CompressedOops::base() == nullptr.
5828 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5829 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5830 constraint(ALLOC_IN_RC(ptr_reg));
5831 match(AddP (DecodeN reg) off);
5832
5833 op_cost(10);
5834 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5835 interface(MEMORY_INTER) %{
5836 base(0xc); // R12
5837 index($reg);
5838 scale(0x3);
5839 disp($off);
5840 %}
5841 %}
5842
5843 // Indirect Memory Operand
5844 operand indirectNarrow(rRegN reg)
6281 %}
6282
6283 // Replaces legVec during post-selection cleanup. See above.
6284 operand legVecZ() %{
6285 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6286 match(VecZ);
6287
6288 format %{ %}
6289 interface(REG_INTER);
6290 %}
6291
6292 //----------OPERAND CLASSES----------------------------------------------------
6293 // Operand Classes are groups of operands that are used as to simplify
6294 // instruction definitions by not requiring the AD writer to specify separate
6295 // instructions for every form of operand when the instruction accepts
6296 // multiple operand types with the same basic encoding and format. The classic
6297 // case of this is memory operands.
6298
6299 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6300 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6301 indCompressedOop, indCompressedOopOffset,
6302 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6303 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6304 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6305
6306 //----------PIPELINE-----------------------------------------------------------
6307 // Rules which define the behavior of the target architectures pipeline.
6308 pipeline %{
6309
6310 //----------ATTRIBUTES---------------------------------------------------------
6311 attributes %{
6312 variable_size_instructions; // Fixed size instructions
6313 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6314 instruction_unit_size = 1; // An instruction is 1 bytes long
6315 instruction_fetch_unit_size = 16; // The processor fetches one line
6316 instruction_fetch_units = 1; // of 16 bytes
6317 %}
6318
6319 //----------RESOURCES----------------------------------------------------------
6320 // Resources are the functional units available to the machine
6321
8879 format %{ "MEMBAR-storestore (empty encoding)" %}
8880 ins_encode( );
8881 ins_pipe(empty);
8882 %}
8883
8884 //----------Move Instructions--------------------------------------------------
8885
8886 instruct castX2P(rRegP dst, rRegL src)
8887 %{
8888 match(Set dst (CastX2P src));
8889
8890 format %{ "movq $dst, $src\t# long->ptr" %}
8891 ins_encode %{
8892 if ($dst$$reg != $src$$reg) {
8893 __ movptr($dst$$Register, $src$$Register);
8894 }
8895 %}
8896 ins_pipe(ialu_reg_reg); // XXX
8897 %}
8898
8899 instruct castI2N(rRegN dst, rRegI src)
8900 %{
8901 match(Set dst (CastI2N src));
8902
8903 format %{ "movq $dst, $src\t# int -> narrow ptr" %}
8904 ins_encode %{
8905 if ($dst$$reg != $src$$reg) {
8906 __ movl($dst$$Register, $src$$Register);
8907 }
8908 %}
8909 ins_pipe(ialu_reg_reg); // XXX
8910 %}
8911
8912 instruct castN2X(rRegL dst, rRegN src)
8913 %{
8914 match(Set dst (CastP2X src));
8915
8916 format %{ "movq $dst, $src\t# ptr -> long" %}
8917 ins_encode %{
8918 if ($dst$$reg != $src$$reg) {
8919 __ movptr($dst$$Register, $src$$Register);
8920 }
8921 %}
8922 ins_pipe(ialu_reg_reg); // XXX
8923 %}
8924
8925 instruct castP2X(rRegL dst, rRegP src)
8926 %{
8927 match(Set dst (CastP2X src));
8928
8929 format %{ "movq $dst, $src\t# ptr -> long" %}
8930 ins_encode %{
8931 if ($dst$$reg != $src$$reg) {
8932 __ movptr($dst$$Register, $src$$Register);
8933 }
8934 %}
8935 ins_pipe(ialu_reg_reg); // XXX
8936 %}
8937
8938 // Convert oop into int for vectors alignment masking
8939 instruct convP2I(rRegI dst, rRegP src)
8940 %{
8941 match(Set dst (ConvL2I (CastP2X src)));
8942
8943 format %{ "movl $dst, $src\t# ptr -> int" %}
8944 ins_encode %{
15151 effect(DEF dst, USE src);
15152 ins_cost(100);
15153 format %{ "movd $dst,$src\t# MoveI2F" %}
15154 ins_encode %{
15155 __ movdl($dst$$XMMRegister, $src$$Register);
15156 %}
15157 ins_pipe( pipe_slow );
15158 %}
15159
15160 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15161 match(Set dst (MoveL2D src));
15162 effect(DEF dst, USE src);
15163 ins_cost(100);
15164 format %{ "movd $dst,$src\t# MoveL2D" %}
15165 ins_encode %{
15166 __ movdq($dst$$XMMRegister, $src$$Register);
15167 %}
15168 ins_pipe( pipe_slow );
15169 %}
15170
15171
15172 // Fast clearing of an array
15173 // Small non-constant lenght ClearArray for non-AVX512 targets.
15174 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15175 Universe dummy, rFlagsReg cr)
15176 %{
15177 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15178 match(Set dummy (ClearArray (Binary cnt base) val));
15179 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15180
15181 format %{ $$template
15182 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15183 $$emit$$"jg LARGE\n\t"
15184 $$emit$$"dec rcx\n\t"
15185 $$emit$$"js DONE\t# Zero length\n\t"
15186 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15187 $$emit$$"dec rcx\n\t"
15188 $$emit$$"jge LOOP\n\t"
15189 $$emit$$"jmp DONE\n\t"
15190 $$emit$$"# LARGE:\n\t"
15191 if (UseFastStosb) {
15192 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15193 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15194 } else if (UseXMMForObjInit) {
15195 $$emit$$"movdq $tmp, $val\n\t"
15196 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15197 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15198 $$emit$$"jmpq L_zero_64_bytes\n\t"
15199 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15200 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15201 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15202 $$emit$$"add 0x40,rax\n\t"
15203 $$emit$$"# L_zero_64_bytes:\n\t"
15204 $$emit$$"sub 0x8,rcx\n\t"
15205 $$emit$$"jge L_loop\n\t"
15206 $$emit$$"add 0x4,rcx\n\t"
15207 $$emit$$"jl L_tail\n\t"
15208 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15209 $$emit$$"add 0x20,rax\n\t"
15210 $$emit$$"sub 0x4,rcx\n\t"
15211 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15212 $$emit$$"add 0x4,rcx\n\t"
15213 $$emit$$"jle L_end\n\t"
15214 $$emit$$"dec rcx\n\t"
15215 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15216 $$emit$$"vmovq xmm0,(rax)\n\t"
15217 $$emit$$"add 0x8,rax\n\t"
15218 $$emit$$"dec rcx\n\t"
15219 $$emit$$"jge L_sloop\n\t"
15220 $$emit$$"# L_end:\n\t"
15221 } else {
15222 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15223 }
15224 $$emit$$"# DONE"
15225 %}
15226 ins_encode %{
15227 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15228 $tmp$$XMMRegister, false, false);
15229 %}
15230 ins_pipe(pipe_slow);
15231 %}
15232
15233 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15234 Universe dummy, rFlagsReg cr)
15235 %{
15236 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15237 match(Set dummy (ClearArray (Binary cnt base) val));
15238 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15239
15240 format %{ $$template
15241 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15242 $$emit$$"jg LARGE\n\t"
15243 $$emit$$"dec rcx\n\t"
15244 $$emit$$"js DONE\t# Zero length\n\t"
15245 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15246 $$emit$$"dec rcx\n\t"
15247 $$emit$$"jge LOOP\n\t"
15248 $$emit$$"jmp DONE\n\t"
15249 $$emit$$"# LARGE:\n\t"
15250 if (UseXMMForObjInit) {
15251 $$emit$$"movdq $tmp, $val\n\t"
15252 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15253 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15254 $$emit$$"jmpq L_zero_64_bytes\n\t"
15255 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15256 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15257 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15258 $$emit$$"add 0x40,rax\n\t"
15259 $$emit$$"# L_zero_64_bytes:\n\t"
15260 $$emit$$"sub 0x8,rcx\n\t"
15261 $$emit$$"jge L_loop\n\t"
15262 $$emit$$"add 0x4,rcx\n\t"
15263 $$emit$$"jl L_tail\n\t"
15264 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15265 $$emit$$"add 0x20,rax\n\t"
15266 $$emit$$"sub 0x4,rcx\n\t"
15267 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15268 $$emit$$"add 0x4,rcx\n\t"
15269 $$emit$$"jle L_end\n\t"
15270 $$emit$$"dec rcx\n\t"
15271 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15272 $$emit$$"vmovq xmm0,(rax)\n\t"
15273 $$emit$$"add 0x8,rax\n\t"
15274 $$emit$$"dec rcx\n\t"
15275 $$emit$$"jge L_sloop\n\t"
15276 $$emit$$"# L_end:\n\t"
15277 } else {
15278 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15279 }
15280 $$emit$$"# DONE"
15281 %}
15282 ins_encode %{
15283 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15284 $tmp$$XMMRegister, false, true);
15285 %}
15286 ins_pipe(pipe_slow);
15287 %}
15288
15289 // Small non-constant length ClearArray for AVX512 targets.
15290 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15291 Universe dummy, rFlagsReg cr)
15292 %{
15293 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15294 match(Set dummy (ClearArray (Binary cnt base) val));
15295 ins_cost(125);
15296 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15297
15298 format %{ $$template
15299 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15300 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15301 $$emit$$"jg LARGE\n\t"
15302 $$emit$$"dec rcx\n\t"
15303 $$emit$$"js DONE\t# Zero length\n\t"
15304 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15305 $$emit$$"dec rcx\n\t"
15306 $$emit$$"jge LOOP\n\t"
15307 $$emit$$"jmp DONE\n\t"
15308 $$emit$$"# LARGE:\n\t"
15309 if (UseFastStosb) {
15310 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15311 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15312 } else if (UseXMMForObjInit) {
15313 $$emit$$"mov rdi,rax\n\t"
15314 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15315 $$emit$$"jmpq L_zero_64_bytes\n\t"
15316 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15324 $$emit$$"jl L_tail\n\t"
15325 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15326 $$emit$$"add 0x20,rax\n\t"
15327 $$emit$$"sub 0x4,rcx\n\t"
15328 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15329 $$emit$$"add 0x4,rcx\n\t"
15330 $$emit$$"jle L_end\n\t"
15331 $$emit$$"dec rcx\n\t"
15332 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15333 $$emit$$"vmovq xmm0,(rax)\n\t"
15334 $$emit$$"add 0x8,rax\n\t"
15335 $$emit$$"dec rcx\n\t"
15336 $$emit$$"jge L_sloop\n\t"
15337 $$emit$$"# L_end:\n\t"
15338 } else {
15339 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15340 }
15341 $$emit$$"# DONE"
15342 %}
15343 ins_encode %{
15344 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15345 $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
15346 %}
15347 ins_pipe(pipe_slow);
15348 %}
15349
15350 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15351 Universe dummy, rFlagsReg cr)
15352 %{
15353 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15354 match(Set dummy (ClearArray (Binary cnt base) val));
15355 ins_cost(125);
15356 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15357
15358 format %{ $$template
15359 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15360 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15361 $$emit$$"jg LARGE\n\t"
15362 $$emit$$"dec rcx\n\t"
15363 $$emit$$"js DONE\t# Zero length\n\t"
15364 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15365 $$emit$$"dec rcx\n\t"
15366 $$emit$$"jge LOOP\n\t"
15367 $$emit$$"jmp DONE\n\t"
15368 $$emit$$"# LARGE:\n\t"
15369 if (UseFastStosb) {
15370 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15371 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15372 } else if (UseXMMForObjInit) {
15373 $$emit$$"mov rdi,rax\n\t"
15374 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15375 $$emit$$"jmpq L_zero_64_bytes\n\t"
15376 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15384 $$emit$$"jl L_tail\n\t"
15385 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15386 $$emit$$"add 0x20,rax\n\t"
15387 $$emit$$"sub 0x4,rcx\n\t"
15388 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15389 $$emit$$"add 0x4,rcx\n\t"
15390 $$emit$$"jle L_end\n\t"
15391 $$emit$$"dec rcx\n\t"
15392 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15393 $$emit$$"vmovq xmm0,(rax)\n\t"
15394 $$emit$$"add 0x8,rax\n\t"
15395 $$emit$$"dec rcx\n\t"
15396 $$emit$$"jge L_sloop\n\t"
15397 $$emit$$"# L_end:\n\t"
15398 } else {
15399 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15400 }
15401 $$emit$$"# DONE"
15402 %}
15403 ins_encode %{
15404 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15405 $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
15406 %}
15407 ins_pipe(pipe_slow);
15408 %}
15409
15410 // Large non-constant length ClearArray for non-AVX512 targets.
15411 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15412 Universe dummy, rFlagsReg cr)
15413 %{
15414 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15415 match(Set dummy (ClearArray (Binary cnt base) val));
15416 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15417
15418 format %{ $$template
15419 if (UseFastStosb) {
15420 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15421 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15422 } else if (UseXMMForObjInit) {
15423 $$emit$$"movdq $tmp, $val\n\t"
15424 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15425 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15426 $$emit$$"jmpq L_zero_64_bytes\n\t"
15427 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15428 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15429 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15430 $$emit$$"add 0x40,rax\n\t"
15431 $$emit$$"# L_zero_64_bytes:\n\t"
15432 $$emit$$"sub 0x8,rcx\n\t"
15433 $$emit$$"jge L_loop\n\t"
15434 $$emit$$"add 0x4,rcx\n\t"
15435 $$emit$$"jl L_tail\n\t"
15436 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15437 $$emit$$"add 0x20,rax\n\t"
15438 $$emit$$"sub 0x4,rcx\n\t"
15439 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15440 $$emit$$"add 0x4,rcx\n\t"
15441 $$emit$$"jle L_end\n\t"
15442 $$emit$$"dec rcx\n\t"
15443 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15444 $$emit$$"vmovq xmm0,(rax)\n\t"
15445 $$emit$$"add 0x8,rax\n\t"
15446 $$emit$$"dec rcx\n\t"
15447 $$emit$$"jge L_sloop\n\t"
15448 $$emit$$"# L_end:\n\t"
15449 } else {
15450 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15451 }
15452 %}
15453 ins_encode %{
15454 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15455 $tmp$$XMMRegister, true, false);
15456 %}
15457 ins_pipe(pipe_slow);
15458 %}
15459
15460 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15461 Universe dummy, rFlagsReg cr)
15462 %{
15463 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15464 match(Set dummy (ClearArray (Binary cnt base) val));
15465 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15466
15467 format %{ $$template
15468 if (UseXMMForObjInit) {
15469 $$emit$$"movdq $tmp, $val\n\t"
15470 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15471 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15472 $$emit$$"jmpq L_zero_64_bytes\n\t"
15473 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15474 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15475 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15476 $$emit$$"add 0x40,rax\n\t"
15477 $$emit$$"# L_zero_64_bytes:\n\t"
15478 $$emit$$"sub 0x8,rcx\n\t"
15479 $$emit$$"jge L_loop\n\t"
15480 $$emit$$"add 0x4,rcx\n\t"
15481 $$emit$$"jl L_tail\n\t"
15482 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15483 $$emit$$"add 0x20,rax\n\t"
15484 $$emit$$"sub 0x4,rcx\n\t"
15485 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15486 $$emit$$"add 0x4,rcx\n\t"
15487 $$emit$$"jle L_end\n\t"
15488 $$emit$$"dec rcx\n\t"
15489 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15490 $$emit$$"vmovq xmm0,(rax)\n\t"
15491 $$emit$$"add 0x8,rax\n\t"
15492 $$emit$$"dec rcx\n\t"
15493 $$emit$$"jge L_sloop\n\t"
15494 $$emit$$"# L_end:\n\t"
15495 } else {
15496 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15497 }
15498 %}
15499 ins_encode %{
15500 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15501 $tmp$$XMMRegister, true, true);
15502 %}
15503 ins_pipe(pipe_slow);
15504 %}
15505
15506 // Large non-constant length ClearArray for AVX512 targets.
15507 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15508 Universe dummy, rFlagsReg cr)
15509 %{
15510 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15511 match(Set dummy (ClearArray (Binary cnt base) val));
15512 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15513
15514 format %{ $$template
15515 if (UseFastStosb) {
15516 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15517 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15518 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15519 } else if (UseXMMForObjInit) {
15520 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15521 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15522 $$emit$$"jmpq L_zero_64_bytes\n\t"
15523 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15524 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15525 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15526 $$emit$$"add 0x40,rax\n\t"
15527 $$emit$$"# L_zero_64_bytes:\n\t"
15528 $$emit$$"sub 0x8,rcx\n\t"
15529 $$emit$$"jge L_loop\n\t"
15530 $$emit$$"add 0x4,rcx\n\t"
15531 $$emit$$"jl L_tail\n\t"
15532 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15533 $$emit$$"add 0x20,rax\n\t"
15534 $$emit$$"sub 0x4,rcx\n\t"
15535 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15536 $$emit$$"add 0x4,rcx\n\t"
15537 $$emit$$"jle L_end\n\t"
15538 $$emit$$"dec rcx\n\t"
15539 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15540 $$emit$$"vmovq xmm0,(rax)\n\t"
15541 $$emit$$"add 0x8,rax\n\t"
15542 $$emit$$"dec rcx\n\t"
15543 $$emit$$"jge L_sloop\n\t"
15544 $$emit$$"# L_end:\n\t"
15545 } else {
15546 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15547 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15548 }
15549 %}
15550 ins_encode %{
15551 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15552 $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
15553 %}
15554 ins_pipe(pipe_slow);
15555 %}
15556
15557 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15558 Universe dummy, rFlagsReg cr)
15559 %{
15560 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15561 match(Set dummy (ClearArray (Binary cnt base) val));
15562 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15563
15564 format %{ $$template
15565 if (UseFastStosb) {
15566 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15567 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15568 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15569 } else if (UseXMMForObjInit) {
15570 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15571 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15572 $$emit$$"jmpq L_zero_64_bytes\n\t"
15573 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15574 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15575 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15576 $$emit$$"add 0x40,rax\n\t"
15577 $$emit$$"# L_zero_64_bytes:\n\t"
15578 $$emit$$"sub 0x8,rcx\n\t"
15579 $$emit$$"jge L_loop\n\t"
15580 $$emit$$"add 0x4,rcx\n\t"
15581 $$emit$$"jl L_tail\n\t"
15582 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15583 $$emit$$"add 0x20,rax\n\t"
15584 $$emit$$"sub 0x4,rcx\n\t"
15585 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15586 $$emit$$"add 0x4,rcx\n\t"
15587 $$emit$$"jle L_end\n\t"
15588 $$emit$$"dec rcx\n\t"
15589 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15590 $$emit$$"vmovq xmm0,(rax)\n\t"
15591 $$emit$$"add 0x8,rax\n\t"
15592 $$emit$$"dec rcx\n\t"
15593 $$emit$$"jge L_sloop\n\t"
15594 $$emit$$"# L_end:\n\t"
15595 } else {
15596 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15597 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15598 }
15599 %}
15600 ins_encode %{
15601 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15602 $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
15603 %}
15604 ins_pipe(pipe_slow);
15605 %}
15606
15607 // Small constant length ClearArray for AVX512 targets.
15608 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
15609 %{
15610 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
15611 ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
15612 match(Set dummy (ClearArray (Binary cnt base) val));
15613 ins_cost(100);
15614 effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
15615 format %{ "clear_mem_imm $base , $cnt \n\t" %}
15616 ins_encode %{
15617 __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15618 %}
15619 ins_pipe(pipe_slow);
15620 %}
15621
15622 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15623 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15624 %{
15625 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15626 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15627 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15628
15629 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15630 ins_encode %{
15631 __ string_compare($str1$$Register, $str2$$Register,
15632 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15633 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15634 %}
15635 ins_pipe( pipe_slow );
15636 %}
15637
17444 effect(USE meth);
17445
17446 ins_cost(300);
17447 format %{ "call_leaf,runtime " %}
17448 ins_encode(clear_avx, Java_To_Runtime(meth));
17449 ins_pipe(pipe_slow);
17450 %}
17451
17452 // Call runtime without safepoint and with vector arguments
17453 instruct CallLeafDirectVector(method meth)
17454 %{
17455 match(CallLeafVector);
17456 effect(USE meth);
17457
17458 ins_cost(300);
17459 format %{ "call_leaf,vector " %}
17460 ins_encode(Java_To_Runtime(meth));
17461 ins_pipe(pipe_slow);
17462 %}
17463
17464 // Call runtime without safepoint
17465 // entry point is null, target holds the address to call
17466 instruct CallLeafNoFPInDirect(rRegP target)
17467 %{
17468 predicate(n->as_Call()->entry_point() == nullptr);
17469 match(CallLeafNoFP target);
17470
17471 ins_cost(300);
17472 format %{ "call_leaf_nofp,runtime indirect " %}
17473 ins_encode %{
17474 __ call($target$$Register);
17475 %}
17476
17477 ins_pipe(pipe_slow);
17478 %}
17479
17480 // Call runtime without safepoint
17481 instruct CallLeafNoFPDirect(method meth)
17482 %{
17483 predicate(n->as_Call()->entry_point() != nullptr);
17484 match(CallLeafNoFP);
17485 effect(USE meth);
17486
17487 ins_cost(300);
17488 format %{ "call_leaf_nofp,runtime " %}
17489 ins_encode(clear_avx, Java_To_Runtime(meth));
17490 ins_pipe(pipe_slow);
17491 %}
17492
17493 // Return Instruction
17494 // Remove the return address & jump to it.
17495 // Notice: We always emit a nop after a ret to make sure there is room
17496 // for safepoint patching
17497 instruct Ret()
17498 %{
17499 match(Return);
17500
17501 format %{ "ret" %}
17502 ins_encode %{
17503 __ ret(0);
|