1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 int offset = 13; // movq r10,#addr; callq (r10)
1653 if (this->ideal_Opcode() != Op_CallLeafVector) {
1654 offset += clear_avx_size();
1655 }
1656 return offset;
1657 }
1658 //
1659 // Compute padding required for nodes which need alignment
1660 //
1661
1662 // The address of the call instruction needs to be 4-byte aligned to
1663 // ensure that it does not span a cache line so that it can be patched.
1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1665 {
1666 current_offset += clear_avx_size(); // skip vzeroupper
1667 current_offset += 1; // skip call opcode byte
1668 return align_up(current_offset, alignment_required()) - current_offset;
1669 }
1670
1671 // The address of the call instruction needs to be 4-byte aligned to
1672 // ensure that it does not span a cache line so that it can be patched.
1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
1674 {
1675 current_offset += clear_avx_size(); // skip vzeroupper
1676 current_offset += 11; // skip movq instruction + call opcode byte
1677 return align_up(current_offset, alignment_required()) - current_offset;
1864 st->print("\n\t");
1865 st->print("# stack alignment check");
1866 #endif
1867 }
1868 if (C->stub_function() != nullptr) {
1869 st->print("\n\t");
1870 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1871 st->print("\n\t");
1872 st->print("je fast_entry\t");
1873 st->print("\n\t");
1874 st->print("call #nmethod_entry_barrier_stub\t");
1875 st->print("\n\tfast_entry:");
1876 }
1877 st->cr();
1878 }
1879 #endif
1880
1881 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1882 Compile* C = ra_->C;
1883
1884 int framesize = C->output()->frame_size_in_bytes();
1885 int bangsize = C->output()->bang_size_in_bytes();
1886
1887 if (C->clinit_barrier_on_entry()) {
1888 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
1889 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
1890
1891 Label L_skip_barrier;
1892 Register klass = rscratch1;
1893
1894 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
1895 __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
1896
1897 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
1898
1899 __ bind(L_skip_barrier);
1900 }
1901
1902 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
1903
1904 C->output()->set_frame_complete(__ offset());
1905
1906 if (C->has_mach_constant_base_node()) {
1907 // NOTE: We set the table base offset here because users might be
1908 // emitted before MachConstantBaseNode.
1909 ConstantTable& constant_table = C->output()->constant_table();
1910 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1911 }
1912 }
1913
1914 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1915 {
1916 return MachNode::size(ra_); // too many variables; just compute it
1917 // the hard way
1918 }
1919
1920 int MachPrologNode::reloc() const
1921 {
1922 return 0; // a large enough number
1923 }
1924
1925 //=============================================================================
1926 #ifndef PRODUCT
1927 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1928 {
1929 Compile* C = ra_->C;
1930 if (generate_vzeroupper(C)) {
1931 st->print("vzeroupper");
1932 st->cr(); st->print("\t");
1933 }
1934
1935 int framesize = C->output()->frame_size_in_bytes();
1936 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1937 // Remove word for return adr already pushed
1938 // and RBP
1946 st->print_cr("popq rbp");
1947 if (do_polling() && C->is_method_compilation()) {
1948 st->print("\t");
1949 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1950 "ja #safepoint_stub\t"
1951 "# Safepoint: poll for GC");
1952 }
1953 }
1954 #endif
1955
1956 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1957 {
1958 Compile* C = ra_->C;
1959
1960 if (generate_vzeroupper(C)) {
1961 // Clear upper bits of YMM registers when current compiled code uses
1962 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1963 __ vzeroupper();
1964 }
1965
1966 int framesize = C->output()->frame_size_in_bytes();
1967 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1968 // Remove word for return adr already pushed
1969 // and RBP
1970 framesize -= 2*wordSize;
1971
1972 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1973
1974 if (framesize) {
1975 __ addq(rsp, framesize);
1976 }
1977
1978 __ popq(rbp);
1979
1980 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1981 __ reserved_stack_check();
1982 }
1983
1984 if (do_polling() && C->is_method_compilation()) {
1985 Label dummy_label;
1986 Label* code_stub = &dummy_label;
1987 if (!C->output()->in_scratch_emit_size()) {
1988 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1989 C->output()->add_stub(stub);
1990 code_stub = &stub->entry();
1991 }
1992 __ relocate(relocInfo::poll_return_type);
1993 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
1994 }
1995 }
1996
1997 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
1998 {
1999 return MachNode::size(ra_); // too many variables; just compute it
2000 // the hard way
2001 }
2002
2003 int MachEpilogNode::reloc() const
2004 {
2005 return 2; // a large enough number
2006 }
2007
2008 const Pipeline* MachEpilogNode::pipeline() const
2009 {
2010 return MachNode::pipeline_class();
2011 }
2012
2013 //=============================================================================
2014
2015 enum RC {
2016 rc_bad,
2017 rc_int,
2018 rc_kreg,
2019 rc_float,
2020 rc_stack
2021 };
2022
2584 #endif
2585
2586 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2587 {
2588 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2589 int reg = ra_->get_encode(this);
2590
2591 __ lea(as_Register(reg), Address(rsp, offset));
2592 }
2593
2594 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2595 {
2596 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2597 if (ra_->get_encode(this) > 15) {
2598 return (offset < 0x80) ? 6 : 9; // REX2
2599 } else {
2600 return (offset < 0x80) ? 5 : 8; // REX
2601 }
2602 }
2603
2604 //=============================================================================
2605 #ifndef PRODUCT
2606 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2607 {
2608 if (UseCompressedClassPointers) {
2609 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2610 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2611 } else {
2612 st->print_cr("movq rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2613 st->print_cr("\tcmpq rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2614 }
2615 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2616 }
2617 #endif
2618
2619 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2620 {
2621 __ ic_check(InteriorEntryAlignment);
2622 }
2623
2624 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
2625 {
2626 return MachNode::size(ra_); // too many variables; just compute it
2627 // the hard way
2628 }
2629
2630
2631 //=============================================================================
2632
2633 bool Matcher::supports_vector_calling_convention(void) {
2634 return EnableVectorSupport;
2635 }
2636
2637 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
2638 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
2639 }
2640
2641 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
2642 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
2643 }
2644
2645 #ifdef ASSERT
2646 static bool is_ndd_demotable(const MachNode* mdef) {
2647 return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
2648 }
2649 #endif
4585 }
4586 __ post_call_nop();
4587 %}
4588
4589 enc_class Java_Dynamic_Call(method meth) %{
4590 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4591 __ post_call_nop();
4592 %}
4593
4594 enc_class call_epilog %{
4595 if (VerifyStackAtCalls) {
4596 // Check that stack depth is unchanged: find majik cookie on stack
4597 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4598 Label L;
4599 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4600 __ jccb(Assembler::equal, L);
4601 // Die if stack mismatch
4602 __ int3();
4603 __ bind(L);
4604 }
4605 %}
4606
4607 %}
4608
4609 //----------FRAME--------------------------------------------------------------
4610 // Definition of frame structure and management information.
4611 //
4612 // S T A C K L A Y O U T Allocators stack-slot number
4613 // | (to get allocators register number
4614 // G Owned by | | v add OptoReg::stack0())
4615 // r CALLER | |
4616 // o | +--------+ pad to even-align allocators stack-slot
4617 // w V | pad0 | numbers; owned by CALLER
4618 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4619 // h ^ | in | 5
4620 // | | args | 4 Holes in incoming args owned by SELF
4621 // | | | | 3
4622 // | | +--------+
4623 // V | | old out| Empty on Intel, window on Sparc
4624 // | old |preserve| Must be even aligned.
5756 %}
5757 %}
5758
5759 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5760 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5761 %{
5762 constraint(ALLOC_IN_RC(ptr_reg));
5763 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5764 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5765
5766 op_cost(10);
5767 format %{"[$reg + $off + $idx << $scale]" %}
5768 interface(MEMORY_INTER) %{
5769 base($reg);
5770 index($idx);
5771 scale($scale);
5772 disp($off);
5773 %}
5774 %}
5775
5776 // Indirect Narrow Oop Plus Offset Operand
5777 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5778 // we can't free r12 even with CompressedOops::base() == nullptr.
5779 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5780 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5781 constraint(ALLOC_IN_RC(ptr_reg));
5782 match(AddP (DecodeN reg) off);
5783
5784 op_cost(10);
5785 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5786 interface(MEMORY_INTER) %{
5787 base(0xc); // R12
5788 index($reg);
5789 scale(0x3);
5790 disp($off);
5791 %}
5792 %}
5793
5794 // Indirect Memory Operand
5795 operand indirectNarrow(rRegN reg)
6265 %}
6266
6267 // Replaces legVec during post-selection cleanup. See above.
6268 operand legVecZ() %{
6269 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6270 match(VecZ);
6271
6272 format %{ %}
6273 interface(REG_INTER);
6274 %}
6275
6276 //----------OPERAND CLASSES----------------------------------------------------
6277 // Operand Classes are groups of operands that are used as to simplify
6278 // instruction definitions by not requiring the AD writer to specify separate
6279 // instructions for every form of operand when the instruction accepts
6280 // multiple operand types with the same basic encoding and format. The classic
6281 // case of this is memory operands.
6282
6283 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6284 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6285 indCompressedOopOffset,
6286 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6287 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6288 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6289
6290 //----------PIPELINE-----------------------------------------------------------
6291 // Rules which define the behavior of the target architectures pipeline.
6292 pipeline %{
6293
6294 //----------ATTRIBUTES---------------------------------------------------------
6295 attributes %{
6296 variable_size_instructions; // Fixed size instructions
6297 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6298 instruction_unit_size = 1; // An instruction is 1 bytes long
6299 instruction_fetch_unit_size = 16; // The processor fetches one line
6300 instruction_fetch_units = 1; // of 16 bytes
6301 %}
6302
6303 //----------RESOURCES----------------------------------------------------------
6304 // Resources are the functional units available to the machine
6305
8863 format %{ "MEMBAR-storestore (empty encoding)" %}
8864 ins_encode( );
8865 ins_pipe(empty);
8866 %}
8867
8868 //----------Move Instructions--------------------------------------------------
8869
8870 instruct castX2P(rRegP dst, rRegL src)
8871 %{
8872 match(Set dst (CastX2P src));
8873
8874 format %{ "movq $dst, $src\t# long->ptr" %}
8875 ins_encode %{
8876 if ($dst$$reg != $src$$reg) {
8877 __ movptr($dst$$Register, $src$$Register);
8878 }
8879 %}
8880 ins_pipe(ialu_reg_reg); // XXX
8881 %}
8882
8883 instruct castP2X(rRegL dst, rRegP src)
8884 %{
8885 match(Set dst (CastP2X src));
8886
8887 format %{ "movq $dst, $src\t# ptr -> long" %}
8888 ins_encode %{
8889 if ($dst$$reg != $src$$reg) {
8890 __ movptr($dst$$Register, $src$$Register);
8891 }
8892 %}
8893 ins_pipe(ialu_reg_reg); // XXX
8894 %}
8895
8896 // Convert oop into int for vectors alignment masking
8897 instruct convP2I(rRegI dst, rRegP src)
8898 %{
8899 match(Set dst (ConvL2I (CastP2X src)));
8900
8901 format %{ "movl $dst, $src\t# ptr -> int" %}
8902 ins_encode %{
15150 effect(DEF dst, USE src);
15151 ins_cost(100);
15152 format %{ "movd $dst,$src\t# MoveI2F" %}
15153 ins_encode %{
15154 __ movdl($dst$$XMMRegister, $src$$Register);
15155 %}
15156 ins_pipe( pipe_slow );
15157 %}
15158
15159 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15160 match(Set dst (MoveL2D src));
15161 effect(DEF dst, USE src);
15162 ins_cost(100);
15163 format %{ "movd $dst,$src\t# MoveL2D" %}
15164 ins_encode %{
15165 __ movdq($dst$$XMMRegister, $src$$Register);
15166 %}
15167 ins_pipe( pipe_slow );
15168 %}
15169
15170 // Fast clearing of an array
15171 // Small non-constant lenght ClearArray for non-AVX512 targets.
15172 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15173 Universe dummy, rFlagsReg cr)
15174 %{
15175 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
15176 match(Set dummy (ClearArray cnt base));
15177 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15178
15179 format %{ $$template
15180 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15181 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15182 $$emit$$"jg LARGE\n\t"
15183 $$emit$$"dec rcx\n\t"
15184 $$emit$$"js DONE\t# Zero length\n\t"
15185 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15186 $$emit$$"dec rcx\n\t"
15187 $$emit$$"jge LOOP\n\t"
15188 $$emit$$"jmp DONE\n\t"
15189 $$emit$$"# LARGE:\n\t"
15190 if (UseFastStosb) {
15191 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15192 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15193 } else if (UseXMMForObjInit) {
15194 $$emit$$"mov rdi,rax\n\t"
15195 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15196 $$emit$$"jmpq L_zero_64_bytes\n\t"
15197 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15205 $$emit$$"jl L_tail\n\t"
15206 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15207 $$emit$$"add 0x20,rax\n\t"
15208 $$emit$$"sub 0x4,rcx\n\t"
15209 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15210 $$emit$$"add 0x4,rcx\n\t"
15211 $$emit$$"jle L_end\n\t"
15212 $$emit$$"dec rcx\n\t"
15213 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15214 $$emit$$"vmovq xmm0,(rax)\n\t"
15215 $$emit$$"add 0x8,rax\n\t"
15216 $$emit$$"dec rcx\n\t"
15217 $$emit$$"jge L_sloop\n\t"
15218 $$emit$$"# L_end:\n\t"
15219 } else {
15220 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15221 }
15222 $$emit$$"# DONE"
15223 %}
15224 ins_encode %{
15225 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15226 $tmp$$XMMRegister, false, knoreg);
15227 %}
15228 ins_pipe(pipe_slow);
15229 %}
15230
15231 // Small non-constant length ClearArray for AVX512 targets.
15232 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15233 Universe dummy, rFlagsReg cr)
15234 %{
15235 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15236 match(Set dummy (ClearArray cnt base));
15237 ins_cost(125);
15238 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15239
15240 format %{ $$template
15241 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15242 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15243 $$emit$$"jg LARGE\n\t"
15244 $$emit$$"dec rcx\n\t"
15245 $$emit$$"js DONE\t# Zero length\n\t"
15246 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15247 $$emit$$"dec rcx\n\t"
15248 $$emit$$"jge LOOP\n\t"
15249 $$emit$$"jmp DONE\n\t"
15250 $$emit$$"# LARGE:\n\t"
15251 if (UseFastStosb) {
15252 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15253 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15254 } else if (UseXMMForObjInit) {
15255 $$emit$$"mov rdi,rax\n\t"
15256 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15257 $$emit$$"jmpq L_zero_64_bytes\n\t"
15258 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15266 $$emit$$"jl L_tail\n\t"
15267 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15268 $$emit$$"add 0x20,rax\n\t"
15269 $$emit$$"sub 0x4,rcx\n\t"
15270 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15271 $$emit$$"add 0x4,rcx\n\t"
15272 $$emit$$"jle L_end\n\t"
15273 $$emit$$"dec rcx\n\t"
15274 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15275 $$emit$$"vmovq xmm0,(rax)\n\t"
15276 $$emit$$"add 0x8,rax\n\t"
15277 $$emit$$"dec rcx\n\t"
15278 $$emit$$"jge L_sloop\n\t"
15279 $$emit$$"# L_end:\n\t"
15280 } else {
15281 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15282 }
15283 $$emit$$"# DONE"
15284 %}
15285 ins_encode %{
15286 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15287 $tmp$$XMMRegister, false, $ktmp$$KRegister);
15288 %}
15289 ins_pipe(pipe_slow);
15290 %}
15291
15292 // Large non-constant length ClearArray for non-AVX512 targets.
15293 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15294 Universe dummy, rFlagsReg cr)
15295 %{
15296 predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15297 match(Set dummy (ClearArray cnt base));
15298 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15299
15300 format %{ $$template
15301 if (UseFastStosb) {
15302 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15303 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15304 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15305 } else if (UseXMMForObjInit) {
15306 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15307 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15308 $$emit$$"jmpq L_zero_64_bytes\n\t"
15309 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15310 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15311 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15312 $$emit$$"add 0x40,rax\n\t"
15313 $$emit$$"# L_zero_64_bytes:\n\t"
15314 $$emit$$"sub 0x8,rcx\n\t"
15315 $$emit$$"jge L_loop\n\t"
15316 $$emit$$"add 0x4,rcx\n\t"
15317 $$emit$$"jl L_tail\n\t"
15318 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15319 $$emit$$"add 0x20,rax\n\t"
15320 $$emit$$"sub 0x4,rcx\n\t"
15321 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15322 $$emit$$"add 0x4,rcx\n\t"
15323 $$emit$$"jle L_end\n\t"
15324 $$emit$$"dec rcx\n\t"
15325 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15326 $$emit$$"vmovq xmm0,(rax)\n\t"
15327 $$emit$$"add 0x8,rax\n\t"
15328 $$emit$$"dec rcx\n\t"
15329 $$emit$$"jge L_sloop\n\t"
15330 $$emit$$"# L_end:\n\t"
15331 } else {
15332 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15333 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15334 }
15335 %}
15336 ins_encode %{
15337 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15338 $tmp$$XMMRegister, true, knoreg);
15339 %}
15340 ins_pipe(pipe_slow);
15341 %}
15342
15343 // Large non-constant length ClearArray for AVX512 targets.
15344 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15345 Universe dummy, rFlagsReg cr)
15346 %{
15347 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15348 match(Set dummy (ClearArray cnt base));
15349 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15350
15351 format %{ $$template
15352 if (UseFastStosb) {
15353 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15354 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15355 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15356 } else if (UseXMMForObjInit) {
15357 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15358 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15359 $$emit$$"jmpq L_zero_64_bytes\n\t"
15360 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15361 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15362 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15363 $$emit$$"add 0x40,rax\n\t"
15364 $$emit$$"# L_zero_64_bytes:\n\t"
15365 $$emit$$"sub 0x8,rcx\n\t"
15366 $$emit$$"jge L_loop\n\t"
15367 $$emit$$"add 0x4,rcx\n\t"
15368 $$emit$$"jl L_tail\n\t"
15369 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15370 $$emit$$"add 0x20,rax\n\t"
15371 $$emit$$"sub 0x4,rcx\n\t"
15372 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15373 $$emit$$"add 0x4,rcx\n\t"
15374 $$emit$$"jle L_end\n\t"
15375 $$emit$$"dec rcx\n\t"
15376 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15377 $$emit$$"vmovq xmm0,(rax)\n\t"
15378 $$emit$$"add 0x8,rax\n\t"
15379 $$emit$$"dec rcx\n\t"
15380 $$emit$$"jge L_sloop\n\t"
15381 $$emit$$"# L_end:\n\t"
15382 } else {
15383 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15384 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15385 }
15386 %}
15387 ins_encode %{
15388 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15389 $tmp$$XMMRegister, true, $ktmp$$KRegister);
15390 %}
15391 ins_pipe(pipe_slow);
15392 %}
15393
15394 // Small constant length ClearArray for AVX512 targets.
15395 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15396 %{
15397 predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15398 match(Set dummy (ClearArray cnt base));
15399 ins_cost(100);
15400 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15401 format %{ "clear_mem_imm $base , $cnt \n\t" %}
15402 ins_encode %{
15403 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15404 %}
15405 ins_pipe(pipe_slow);
15406 %}
15407
15408 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15409 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15410 %{
15411 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15412 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15413 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15414
15415 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15416 ins_encode %{
15417 __ string_compare($str1$$Register, $str2$$Register,
15418 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15419 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15420 %}
15421 ins_pipe( pipe_slow );
15422 %}
15423
17261 effect(USE meth);
17262
17263 ins_cost(300);
17264 format %{ "call_leaf,runtime " %}
17265 ins_encode(clear_avx, Java_To_Runtime(meth));
17266 ins_pipe(pipe_slow);
17267 %}
17268
17269 // Call runtime without safepoint and with vector arguments
17270 instruct CallLeafDirectVector(method meth)
17271 %{
17272 match(CallLeafVector);
17273 effect(USE meth);
17274
17275 ins_cost(300);
17276 format %{ "call_leaf,vector " %}
17277 ins_encode(Java_To_Runtime(meth));
17278 ins_pipe(pipe_slow);
17279 %}
17280
17281 // Call runtime without safepoint
17282 instruct CallLeafNoFPDirect(method meth)
17283 %{
17284 match(CallLeafNoFP);
17285 effect(USE meth);
17286
17287 ins_cost(300);
17288 format %{ "call_leaf_nofp,runtime " %}
17289 ins_encode(clear_avx, Java_To_Runtime(meth));
17290 ins_pipe(pipe_slow);
17291 %}
17292
17293 // Return Instruction
17294 // Remove the return address & jump to it.
17295 // Notice: We always emit a nop after a ret to make sure there is room
17296 // for safepoint patching
17297 instruct Ret()
17298 %{
17299 match(Return);
17300
17301 format %{ "ret" %}
17302 ins_encode %{
17303 __ ret(0);
|
1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 if (_entry_point == nullptr) {
1653 // CallLeafNoFPInDirect
1654 return 3; // callq (register)
1655 }
1656 int offset = 13; // movq r10,#addr; callq (r10)
1657 if (this->ideal_Opcode() != Op_CallLeafVector) {
1658 offset += clear_avx_size();
1659 }
1660 return offset;
1661 }
1662
1663 //
1664 // Compute padding required for nodes which need alignment
1665 //
1666
1667 // The address of the call instruction needs to be 4-byte aligned to
1668 // ensure that it does not span a cache line so that it can be patched.
1669 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1670 {
1671 current_offset += clear_avx_size(); // skip vzeroupper
1672 current_offset += 1; // skip call opcode byte
1673 return align_up(current_offset, alignment_required()) - current_offset;
1674 }
1675
1676 // The address of the call instruction needs to be 4-byte aligned to
1677 // ensure that it does not span a cache line so that it can be patched.
1678 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
1679 {
1680 current_offset += clear_avx_size(); // skip vzeroupper
1681 current_offset += 11; // skip movq instruction + call opcode byte
1682 return align_up(current_offset, alignment_required()) - current_offset;
1869 st->print("\n\t");
1870 st->print("# stack alignment check");
1871 #endif
1872 }
1873 if (C->stub_function() != nullptr) {
1874 st->print("\n\t");
1875 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1876 st->print("\n\t");
1877 st->print("je fast_entry\t");
1878 st->print("\n\t");
1879 st->print("call #nmethod_entry_barrier_stub\t");
1880 st->print("\n\tfast_entry:");
1881 }
1882 st->cr();
1883 }
1884 #endif
1885
1886 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1887 Compile* C = ra_->C;
1888
1889 __ verified_entry(C);
1890
1891 if (ra_->C->stub_function() == nullptr) {
1892 __ entry_barrier();
1893 }
1894
1895 if (!Compile::current()->output()->in_scratch_emit_size()) {
1896 __ bind(*_verified_entry);
1897 }
1898
1899 C->output()->set_frame_complete(__ offset());
1900
1901 if (C->has_mach_constant_base_node()) {
1902 // NOTE: We set the table base offset here because users might be
1903 // emitted before MachConstantBaseNode.
1904 ConstantTable& constant_table = C->output()->constant_table();
1905 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1906 }
1907 }
1908
1909
1910 int MachPrologNode::reloc() const
1911 {
1912 return 0; // a large enough number
1913 }
1914
1915 //=============================================================================
1916 #ifndef PRODUCT
1917 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1918 {
1919 Compile* C = ra_->C;
1920 if (generate_vzeroupper(C)) {
1921 st->print("vzeroupper");
1922 st->cr(); st->print("\t");
1923 }
1924
1925 int framesize = C->output()->frame_size_in_bytes();
1926 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1927 // Remove word for return adr already pushed
1928 // and RBP
1936 st->print_cr("popq rbp");
1937 if (do_polling() && C->is_method_compilation()) {
1938 st->print("\t");
1939 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1940 "ja #safepoint_stub\t"
1941 "# Safepoint: poll for GC");
1942 }
1943 }
1944 #endif
1945
1946 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1947 {
1948 Compile* C = ra_->C;
1949
1950 if (generate_vzeroupper(C)) {
1951 // Clear upper bits of YMM registers when current compiled code uses
1952 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1953 __ vzeroupper();
1954 }
1955
1956 // Subtract two words to account for return address and rbp
1957 int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
1958 __ remove_frame(initial_framesize, C->needs_stack_repair());
1959
1960 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1961 __ reserved_stack_check();
1962 }
1963
1964 if (do_polling() && C->is_method_compilation()) {
1965 Label dummy_label;
1966 Label* code_stub = &dummy_label;
1967 if (!C->output()->in_scratch_emit_size()) {
1968 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1969 C->output()->add_stub(stub);
1970 code_stub = &stub->entry();
1971 }
1972 __ relocate(relocInfo::poll_return_type);
1973 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
1974 }
1975 }
1976
1977 int MachEpilogNode::reloc() const
1978 {
1979 return 2; // a large enough number
1980 }
1981
1982 const Pipeline* MachEpilogNode::pipeline() const
1983 {
1984 return MachNode::pipeline_class();
1985 }
1986
1987 //=============================================================================
1988
1989 enum RC {
1990 rc_bad,
1991 rc_int,
1992 rc_kreg,
1993 rc_float,
1994 rc_stack
1995 };
1996
2558 #endif
2559
2560 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2561 {
2562 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2563 int reg = ra_->get_encode(this);
2564
2565 __ lea(as_Register(reg), Address(rsp, offset));
2566 }
2567
2568 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2569 {
2570 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2571 if (ra_->get_encode(this) > 15) {
2572 return (offset < 0x80) ? 6 : 9; // REX2
2573 } else {
2574 return (offset < 0x80) ? 5 : 8; // REX
2575 }
2576 }
2577
2578 //=============================================================================
2579 #ifndef PRODUCT
2580 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2581 {
2582 st->print_cr("MachVEPNode");
2583 }
2584 #endif
2585
2586 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2587 {
2588 CodeBuffer* cbuf = masm->code();
2589 uint insts_size = cbuf->insts_size();
2590 if (!_verified) {
2591 __ ic_check(1);
2592 } else {
2593 // TODO 8284443 Avoid creation of temporary frame
2594 if (ra_->C->stub_function() == nullptr) {
2595 __ verified_entry(ra_->C, 0);
2596 __ entry_barrier();
2597 int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
2598 __ remove_frame(initial_framesize, false);
2599 }
2600 // Unpack inline type args passed as oop and then jump to
2601 // the verified entry point (skipping the unverified entry).
2602 int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
2603 // Emit code for verified entry and save increment for stack repair on return
2604 __ verified_entry(ra_->C, sp_inc);
2605 if (Compile::current()->output()->in_scratch_emit_size()) {
2606 Label dummy_verified_entry;
2607 __ jmp(dummy_verified_entry);
2608 } else {
2609 __ jmp(*_verified_entry);
2610 }
2611 }
2612 /* WARNING these NOPs are critical so that verified entry point is properly
2613 4 bytes aligned for patching by NativeJump::patch_verified_entry() */
2614 int nops_cnt = 4 - ((cbuf->insts_size() - insts_size) & 0x3);
2615 nops_cnt &= 0x3; // Do not add nops if code is aligned.
2616 if (nops_cnt > 0) {
2617 __ nop(nops_cnt);
2618 }
2619 }
2620
2621 //=============================================================================
2622 #ifndef PRODUCT
2623 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2624 {
2625 if (UseCompressedClassPointers) {
2626 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2627 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2628 } else {
2629 st->print_cr("movq rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2630 st->print_cr("\tcmpq rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2631 }
2632 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2633 }
2634 #endif
2635
2636 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2637 {
2638 __ ic_check(InteriorEntryAlignment);
2639 }
2640
2641
2642 //=============================================================================
2643
2644 bool Matcher::supports_vector_calling_convention(void) {
2645 return EnableVectorSupport;
2646 }
2647
2648 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
2649 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
2650 }
2651
2652 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
2653 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
2654 }
2655
2656 #ifdef ASSERT
2657 static bool is_ndd_demotable(const MachNode* mdef) {
2658 return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
2659 }
2660 #endif
4596 }
4597 __ post_call_nop();
4598 %}
4599
4600 enc_class Java_Dynamic_Call(method meth) %{
4601 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4602 __ post_call_nop();
4603 %}
4604
4605 enc_class call_epilog %{
4606 if (VerifyStackAtCalls) {
4607 // Check that stack depth is unchanged: find majik cookie on stack
4608 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4609 Label L;
4610 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4611 __ jccb(Assembler::equal, L);
4612 // Die if stack mismatch
4613 __ int3();
4614 __ bind(L);
4615 }
4616 if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) {
4617 // The last return value is not set by the callee but used to pass the null marker to compiled code.
4618 // Search for the corresponding projection, get the register and emit code that initialized it.
4619 uint con = (tf()->range_cc()->cnt() - 1);
4620 for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
4621 ProjNode* proj = fast_out(i)->as_Proj();
4622 if (proj->_con == con) {
4623 // Set null marker if rax is non-null (a non-null value is returned buffered or scalarized)
4624 OptoReg::Name optoReg = ra_->get_reg_first(proj);
4625 VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
4626 Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
4627 __ testq(rax, rax);
4628 __ setb(Assembler::notZero, toReg);
4629 __ movzbl(toReg, toReg);
4630 if (reg->is_stack()) {
4631 int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
4632 __ movq(Address(rsp, st_off), toReg);
4633 }
4634 break;
4635 }
4636 }
4637 if (return_value_is_used()) {
4638 // An inline type is returned as fields in multiple registers.
4639 // Rax either contains an oop if the inline type is buffered or a pointer
4640 // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax
4641 // if the lowest bit is set to allow C2 to use the oop after null checking.
4642 // rax &= (rax & 1) - 1
4643 __ movptr(rscratch1, rax);
4644 __ andptr(rscratch1, 0x1);
4645 __ subptr(rscratch1, 0x1);
4646 __ andptr(rax, rscratch1);
4647 }
4648 }
4649 %}
4650
4651 %}
4652
4653 //----------FRAME--------------------------------------------------------------
4654 // Definition of frame structure and management information.
4655 //
4656 // S T A C K L A Y O U T Allocators stack-slot number
4657 // | (to get allocators register number
4658 // G Owned by | | v add OptoReg::stack0())
4659 // r CALLER | |
4660 // o | +--------+ pad to even-align allocators stack-slot
4661 // w V | pad0 | numbers; owned by CALLER
4662 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4663 // h ^ | in | 5
4664 // | | args | 4 Holes in incoming args owned by SELF
4665 // | | | | 3
4666 // | | +--------+
4667 // V | | old out| Empty on Intel, window on Sparc
4668 // | old |preserve| Must be even aligned.
5800 %}
5801 %}
5802
5803 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5804 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5805 %{
5806 constraint(ALLOC_IN_RC(ptr_reg));
5807 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5808 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5809
5810 op_cost(10);
5811 format %{"[$reg + $off + $idx << $scale]" %}
5812 interface(MEMORY_INTER) %{
5813 base($reg);
5814 index($idx);
5815 scale($scale);
5816 disp($off);
5817 %}
5818 %}
5819
5820 // Indirect Narrow Oop Operand
5821 operand indCompressedOop(rRegN reg) %{
5822 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5823 constraint(ALLOC_IN_RC(ptr_reg));
5824 match(DecodeN reg);
5825
5826 op_cost(10);
5827 format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
5828 interface(MEMORY_INTER) %{
5829 base(0xc); // R12
5830 index($reg);
5831 scale(0x3);
5832 disp(0x0);
5833 %}
5834 %}
5835
5836 // Indirect Narrow Oop Plus Offset Operand
5837 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5838 // we can't free r12 even with CompressedOops::base() == nullptr.
5839 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5840 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5841 constraint(ALLOC_IN_RC(ptr_reg));
5842 match(AddP (DecodeN reg) off);
5843
5844 op_cost(10);
5845 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5846 interface(MEMORY_INTER) %{
5847 base(0xc); // R12
5848 index($reg);
5849 scale(0x3);
5850 disp($off);
5851 %}
5852 %}
5853
5854 // Indirect Memory Operand
5855 operand indirectNarrow(rRegN reg)
6325 %}
6326
6327 // Replaces legVec during post-selection cleanup. See above.
6328 operand legVecZ() %{
6329 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6330 match(VecZ);
6331
6332 format %{ %}
6333 interface(REG_INTER);
6334 %}
6335
6336 //----------OPERAND CLASSES----------------------------------------------------
6337 // Operand Classes are groups of operands that are used as to simplify
6338 // instruction definitions by not requiring the AD writer to specify separate
6339 // instructions for every form of operand when the instruction accepts
6340 // multiple operand types with the same basic encoding and format. The classic
6341 // case of this is memory operands.
6342
6343 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6344 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6345 indCompressedOop, indCompressedOopOffset,
6346 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6347 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6348 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6349
6350 //----------PIPELINE-----------------------------------------------------------
6351 // Rules which define the behavior of the target architectures pipeline.
6352 pipeline %{
6353
6354 //----------ATTRIBUTES---------------------------------------------------------
6355 attributes %{
6356 variable_size_instructions; // Fixed size instructions
6357 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6358 instruction_unit_size = 1; // An instruction is 1 bytes long
6359 instruction_fetch_unit_size = 16; // The processor fetches one line
6360 instruction_fetch_units = 1; // of 16 bytes
6361 %}
6362
6363 //----------RESOURCES----------------------------------------------------------
6364 // Resources are the functional units available to the machine
6365
8923 format %{ "MEMBAR-storestore (empty encoding)" %}
8924 ins_encode( );
8925 ins_pipe(empty);
8926 %}
8927
8928 //----------Move Instructions--------------------------------------------------
8929
8930 instruct castX2P(rRegP dst, rRegL src)
8931 %{
8932 match(Set dst (CastX2P src));
8933
8934 format %{ "movq $dst, $src\t# long->ptr" %}
8935 ins_encode %{
8936 if ($dst$$reg != $src$$reg) {
8937 __ movptr($dst$$Register, $src$$Register);
8938 }
8939 %}
8940 ins_pipe(ialu_reg_reg); // XXX
8941 %}
8942
8943 instruct castI2N(rRegN dst, rRegI src)
8944 %{
8945 match(Set dst (CastI2N src));
8946
8947 format %{ "movq $dst, $src\t# int -> narrow ptr" %}
8948 ins_encode %{
8949 if ($dst$$reg != $src$$reg) {
8950 __ movl($dst$$Register, $src$$Register);
8951 }
8952 %}
8953 ins_pipe(ialu_reg_reg); // XXX
8954 %}
8955
8956 instruct castN2X(rRegL dst, rRegN src)
8957 %{
8958 match(Set dst (CastP2X src));
8959
8960 format %{ "movq $dst, $src\t# ptr -> long" %}
8961 ins_encode %{
8962 if ($dst$$reg != $src$$reg) {
8963 __ movptr($dst$$Register, $src$$Register);
8964 }
8965 %}
8966 ins_pipe(ialu_reg_reg); // XXX
8967 %}
8968
8969 instruct castP2X(rRegL dst, rRegP src)
8970 %{
8971 match(Set dst (CastP2X src));
8972
8973 format %{ "movq $dst, $src\t# ptr -> long" %}
8974 ins_encode %{
8975 if ($dst$$reg != $src$$reg) {
8976 __ movptr($dst$$Register, $src$$Register);
8977 }
8978 %}
8979 ins_pipe(ialu_reg_reg); // XXX
8980 %}
8981
8982 // Convert oop into int for vectors alignment masking
8983 instruct convP2I(rRegI dst, rRegP src)
8984 %{
8985 match(Set dst (ConvL2I (CastP2X src)));
8986
8987 format %{ "movl $dst, $src\t# ptr -> int" %}
8988 ins_encode %{
15236 effect(DEF dst, USE src);
15237 ins_cost(100);
15238 format %{ "movd $dst,$src\t# MoveI2F" %}
15239 ins_encode %{
15240 __ movdl($dst$$XMMRegister, $src$$Register);
15241 %}
15242 ins_pipe( pipe_slow );
15243 %}
15244
15245 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15246 match(Set dst (MoveL2D src));
15247 effect(DEF dst, USE src);
15248 ins_cost(100);
15249 format %{ "movd $dst,$src\t# MoveL2D" %}
15250 ins_encode %{
15251 __ movdq($dst$$XMMRegister, $src$$Register);
15252 %}
15253 ins_pipe( pipe_slow );
15254 %}
15255
15256
15257 // Fast clearing of an array
15258 // Small non-constant lenght ClearArray for non-AVX512 targets.
15259 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15260 Universe dummy, rFlagsReg cr)
15261 %{
15262 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15263 match(Set dummy (ClearArray (Binary cnt base) val));
15264 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15265
15266 format %{ $$template
15267 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15268 $$emit$$"jg LARGE\n\t"
15269 $$emit$$"dec rcx\n\t"
15270 $$emit$$"js DONE\t# Zero length\n\t"
15271 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15272 $$emit$$"dec rcx\n\t"
15273 $$emit$$"jge LOOP\n\t"
15274 $$emit$$"jmp DONE\n\t"
15275 $$emit$$"# LARGE:\n\t"
15276 if (UseFastStosb) {
15277 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15278 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15279 } else if (UseXMMForObjInit) {
15280 $$emit$$"movdq $tmp, $val\n\t"
15281 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15282 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15283 $$emit$$"jmpq L_zero_64_bytes\n\t"
15284 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15285 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15286 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15287 $$emit$$"add 0x40,rax\n\t"
15288 $$emit$$"# L_zero_64_bytes:\n\t"
15289 $$emit$$"sub 0x8,rcx\n\t"
15290 $$emit$$"jge L_loop\n\t"
15291 $$emit$$"add 0x4,rcx\n\t"
15292 $$emit$$"jl L_tail\n\t"
15293 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15294 $$emit$$"add 0x20,rax\n\t"
15295 $$emit$$"sub 0x4,rcx\n\t"
15296 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15297 $$emit$$"add 0x4,rcx\n\t"
15298 $$emit$$"jle L_end\n\t"
15299 $$emit$$"dec rcx\n\t"
15300 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15301 $$emit$$"vmovq xmm0,(rax)\n\t"
15302 $$emit$$"add 0x8,rax\n\t"
15303 $$emit$$"dec rcx\n\t"
15304 $$emit$$"jge L_sloop\n\t"
15305 $$emit$$"# L_end:\n\t"
15306 } else {
15307 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15308 }
15309 $$emit$$"# DONE"
15310 %}
15311 ins_encode %{
15312 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15313 $tmp$$XMMRegister, false, false);
15314 %}
15315 ins_pipe(pipe_slow);
15316 %}
15317
15318 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15319 Universe dummy, rFlagsReg cr)
15320 %{
15321 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15322 match(Set dummy (ClearArray (Binary cnt base) val));
15323 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15324
15325 format %{ $$template
15326 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15327 $$emit$$"jg LARGE\n\t"
15328 $$emit$$"dec rcx\n\t"
15329 $$emit$$"js DONE\t# Zero length\n\t"
15330 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15331 $$emit$$"dec rcx\n\t"
15332 $$emit$$"jge LOOP\n\t"
15333 $$emit$$"jmp DONE\n\t"
15334 $$emit$$"# LARGE:\n\t"
15335 if (UseXMMForObjInit) {
15336 $$emit$$"movdq $tmp, $val\n\t"
15337 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15338 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15339 $$emit$$"jmpq L_zero_64_bytes\n\t"
15340 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15341 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15342 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15343 $$emit$$"add 0x40,rax\n\t"
15344 $$emit$$"# L_zero_64_bytes:\n\t"
15345 $$emit$$"sub 0x8,rcx\n\t"
15346 $$emit$$"jge L_loop\n\t"
15347 $$emit$$"add 0x4,rcx\n\t"
15348 $$emit$$"jl L_tail\n\t"
15349 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15350 $$emit$$"add 0x20,rax\n\t"
15351 $$emit$$"sub 0x4,rcx\n\t"
15352 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15353 $$emit$$"add 0x4,rcx\n\t"
15354 $$emit$$"jle L_end\n\t"
15355 $$emit$$"dec rcx\n\t"
15356 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15357 $$emit$$"vmovq xmm0,(rax)\n\t"
15358 $$emit$$"add 0x8,rax\n\t"
15359 $$emit$$"dec rcx\n\t"
15360 $$emit$$"jge L_sloop\n\t"
15361 $$emit$$"# L_end:\n\t"
15362 } else {
15363 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15364 }
15365 $$emit$$"# DONE"
15366 %}
15367 ins_encode %{
15368 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15369 $tmp$$XMMRegister, false, true);
15370 %}
15371 ins_pipe(pipe_slow);
15372 %}
15373
15374 // Small non-constant length ClearArray for AVX512 targets.
15375 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15376 Universe dummy, rFlagsReg cr)
15377 %{
15378 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15379 match(Set dummy (ClearArray (Binary cnt base) val));
15380 ins_cost(125);
15381 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15382
15383 format %{ $$template
15384 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15385 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15386 $$emit$$"jg LARGE\n\t"
15387 $$emit$$"dec rcx\n\t"
15388 $$emit$$"js DONE\t# Zero length\n\t"
15389 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15390 $$emit$$"dec rcx\n\t"
15391 $$emit$$"jge LOOP\n\t"
15392 $$emit$$"jmp DONE\n\t"
15393 $$emit$$"# LARGE:\n\t"
15394 if (UseFastStosb) {
15395 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15396 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15397 } else if (UseXMMForObjInit) {
15398 $$emit$$"mov rdi,rax\n\t"
15399 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15400 $$emit$$"jmpq L_zero_64_bytes\n\t"
15401 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15409 $$emit$$"jl L_tail\n\t"
15410 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15411 $$emit$$"add 0x20,rax\n\t"
15412 $$emit$$"sub 0x4,rcx\n\t"
15413 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15414 $$emit$$"add 0x4,rcx\n\t"
15415 $$emit$$"jle L_end\n\t"
15416 $$emit$$"dec rcx\n\t"
15417 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15418 $$emit$$"vmovq xmm0,(rax)\n\t"
15419 $$emit$$"add 0x8,rax\n\t"
15420 $$emit$$"dec rcx\n\t"
15421 $$emit$$"jge L_sloop\n\t"
15422 $$emit$$"# L_end:\n\t"
15423 } else {
15424 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15425 }
15426 $$emit$$"# DONE"
15427 %}
15428 ins_encode %{
15429 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15430 $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
15431 %}
15432 ins_pipe(pipe_slow);
15433 %}
15434
15435 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15436 Universe dummy, rFlagsReg cr)
15437 %{
15438 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15439 match(Set dummy (ClearArray (Binary cnt base) val));
15440 ins_cost(125);
15441 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15442
15443 format %{ $$template
15444 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15445 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15446 $$emit$$"jg LARGE\n\t"
15447 $$emit$$"dec rcx\n\t"
15448 $$emit$$"js DONE\t# Zero length\n\t"
15449 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15450 $$emit$$"dec rcx\n\t"
15451 $$emit$$"jge LOOP\n\t"
15452 $$emit$$"jmp DONE\n\t"
15453 $$emit$$"# LARGE:\n\t"
15454 if (UseFastStosb) {
15455 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15456 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15457 } else if (UseXMMForObjInit) {
15458 $$emit$$"mov rdi,rax\n\t"
15459 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15460 $$emit$$"jmpq L_zero_64_bytes\n\t"
15461 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15469 $$emit$$"jl L_tail\n\t"
15470 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15471 $$emit$$"add 0x20,rax\n\t"
15472 $$emit$$"sub 0x4,rcx\n\t"
15473 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15474 $$emit$$"add 0x4,rcx\n\t"
15475 $$emit$$"jle L_end\n\t"
15476 $$emit$$"dec rcx\n\t"
15477 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15478 $$emit$$"vmovq xmm0,(rax)\n\t"
15479 $$emit$$"add 0x8,rax\n\t"
15480 $$emit$$"dec rcx\n\t"
15481 $$emit$$"jge L_sloop\n\t"
15482 $$emit$$"# L_end:\n\t"
15483 } else {
15484 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15485 }
15486 $$emit$$"# DONE"
15487 %}
15488 ins_encode %{
15489 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15490 $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
15491 %}
15492 ins_pipe(pipe_slow);
15493 %}
15494
15495 // Large non-constant length ClearArray for non-AVX512 targets.
15496 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15497 Universe dummy, rFlagsReg cr)
15498 %{
15499 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15500 match(Set dummy (ClearArray (Binary cnt base) val));
15501 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15502
15503 format %{ $$template
15504 if (UseFastStosb) {
15505 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15506 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15507 } else if (UseXMMForObjInit) {
15508 $$emit$$"movdq $tmp, $val\n\t"
15509 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15510 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15511 $$emit$$"jmpq L_zero_64_bytes\n\t"
15512 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15513 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15514 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15515 $$emit$$"add 0x40,rax\n\t"
15516 $$emit$$"# L_zero_64_bytes:\n\t"
15517 $$emit$$"sub 0x8,rcx\n\t"
15518 $$emit$$"jge L_loop\n\t"
15519 $$emit$$"add 0x4,rcx\n\t"
15520 $$emit$$"jl L_tail\n\t"
15521 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15522 $$emit$$"add 0x20,rax\n\t"
15523 $$emit$$"sub 0x4,rcx\n\t"
15524 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15525 $$emit$$"add 0x4,rcx\n\t"
15526 $$emit$$"jle L_end\n\t"
15527 $$emit$$"dec rcx\n\t"
15528 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15529 $$emit$$"vmovq xmm0,(rax)\n\t"
15530 $$emit$$"add 0x8,rax\n\t"
15531 $$emit$$"dec rcx\n\t"
15532 $$emit$$"jge L_sloop\n\t"
15533 $$emit$$"# L_end:\n\t"
15534 } else {
15535 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15536 }
15537 %}
15538 ins_encode %{
15539 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15540 $tmp$$XMMRegister, true, false);
15541 %}
15542 ins_pipe(pipe_slow);
15543 %}
15544
15545 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15546 Universe dummy, rFlagsReg cr)
15547 %{
15548 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15549 match(Set dummy (ClearArray (Binary cnt base) val));
15550 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15551
15552 format %{ $$template
15553 if (UseXMMForObjInit) {
15554 $$emit$$"movdq $tmp, $val\n\t"
15555 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15556 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15557 $$emit$$"jmpq L_zero_64_bytes\n\t"
15558 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15559 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15560 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15561 $$emit$$"add 0x40,rax\n\t"
15562 $$emit$$"# L_zero_64_bytes:\n\t"
15563 $$emit$$"sub 0x8,rcx\n\t"
15564 $$emit$$"jge L_loop\n\t"
15565 $$emit$$"add 0x4,rcx\n\t"
15566 $$emit$$"jl L_tail\n\t"
15567 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15568 $$emit$$"add 0x20,rax\n\t"
15569 $$emit$$"sub 0x4,rcx\n\t"
15570 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15571 $$emit$$"add 0x4,rcx\n\t"
15572 $$emit$$"jle L_end\n\t"
15573 $$emit$$"dec rcx\n\t"
15574 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15575 $$emit$$"vmovq xmm0,(rax)\n\t"
15576 $$emit$$"add 0x8,rax\n\t"
15577 $$emit$$"dec rcx\n\t"
15578 $$emit$$"jge L_sloop\n\t"
15579 $$emit$$"# L_end:\n\t"
15580 } else {
15581 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15582 }
15583 %}
15584 ins_encode %{
15585 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15586 $tmp$$XMMRegister, true, true);
15587 %}
15588 ins_pipe(pipe_slow);
15589 %}
15590
15591 // Large non-constant length ClearArray for AVX512 targets.
15592 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15593 Universe dummy, rFlagsReg cr)
15594 %{
15595 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15596 match(Set dummy (ClearArray (Binary cnt base) val));
15597 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15598
15599 format %{ $$template
15600 if (UseFastStosb) {
15601 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15602 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15603 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15604 } else if (UseXMMForObjInit) {
15605 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15606 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15607 $$emit$$"jmpq L_zero_64_bytes\n\t"
15608 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15609 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15610 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15611 $$emit$$"add 0x40,rax\n\t"
15612 $$emit$$"# L_zero_64_bytes:\n\t"
15613 $$emit$$"sub 0x8,rcx\n\t"
15614 $$emit$$"jge L_loop\n\t"
15615 $$emit$$"add 0x4,rcx\n\t"
15616 $$emit$$"jl L_tail\n\t"
15617 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15618 $$emit$$"add 0x20,rax\n\t"
15619 $$emit$$"sub 0x4,rcx\n\t"
15620 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15621 $$emit$$"add 0x4,rcx\n\t"
15622 $$emit$$"jle L_end\n\t"
15623 $$emit$$"dec rcx\n\t"
15624 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15625 $$emit$$"vmovq xmm0,(rax)\n\t"
15626 $$emit$$"add 0x8,rax\n\t"
15627 $$emit$$"dec rcx\n\t"
15628 $$emit$$"jge L_sloop\n\t"
15629 $$emit$$"# L_end:\n\t"
15630 } else {
15631 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15632 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15633 }
15634 %}
15635 ins_encode %{
15636 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15637 $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
15638 %}
15639 ins_pipe(pipe_slow);
15640 %}
15641
15642 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15643 Universe dummy, rFlagsReg cr)
15644 %{
15645 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15646 match(Set dummy (ClearArray (Binary cnt base) val));
15647 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15648
15649 format %{ $$template
15650 if (UseFastStosb) {
15651 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15652 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15653 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15654 } else if (UseXMMForObjInit) {
15655 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15656 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15657 $$emit$$"jmpq L_zero_64_bytes\n\t"
15658 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15659 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15660 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15661 $$emit$$"add 0x40,rax\n\t"
15662 $$emit$$"# L_zero_64_bytes:\n\t"
15663 $$emit$$"sub 0x8,rcx\n\t"
15664 $$emit$$"jge L_loop\n\t"
15665 $$emit$$"add 0x4,rcx\n\t"
15666 $$emit$$"jl L_tail\n\t"
15667 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15668 $$emit$$"add 0x20,rax\n\t"
15669 $$emit$$"sub 0x4,rcx\n\t"
15670 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15671 $$emit$$"add 0x4,rcx\n\t"
15672 $$emit$$"jle L_end\n\t"
15673 $$emit$$"dec rcx\n\t"
15674 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15675 $$emit$$"vmovq xmm0,(rax)\n\t"
15676 $$emit$$"add 0x8,rax\n\t"
15677 $$emit$$"dec rcx\n\t"
15678 $$emit$$"jge L_sloop\n\t"
15679 $$emit$$"# L_end:\n\t"
15680 } else {
15681 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15682 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15683 }
15684 %}
15685 ins_encode %{
15686 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15687 $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
15688 %}
15689 ins_pipe(pipe_slow);
15690 %}
15691
15692 // Small constant length ClearArray for AVX512 targets.
15693 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
15694 %{
15695 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
15696 ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
15697 match(Set dummy (ClearArray (Binary cnt base) val));
15698 ins_cost(100);
15699 effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
15700 format %{ "clear_mem_imm $base , $cnt \n\t" %}
15701 ins_encode %{
15702 __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15703 %}
15704 ins_pipe(pipe_slow);
15705 %}
15706
15707 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15708 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15709 %{
15710 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15711 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15712 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15713
15714 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15715 ins_encode %{
15716 __ string_compare($str1$$Register, $str2$$Register,
15717 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15718 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15719 %}
15720 ins_pipe( pipe_slow );
15721 %}
15722
17560 effect(USE meth);
17561
17562 ins_cost(300);
17563 format %{ "call_leaf,runtime " %}
17564 ins_encode(clear_avx, Java_To_Runtime(meth));
17565 ins_pipe(pipe_slow);
17566 %}
17567
17568 // Call runtime without safepoint and with vector arguments
17569 instruct CallLeafDirectVector(method meth)
17570 %{
17571 match(CallLeafVector);
17572 effect(USE meth);
17573
17574 ins_cost(300);
17575 format %{ "call_leaf,vector " %}
17576 ins_encode(Java_To_Runtime(meth));
17577 ins_pipe(pipe_slow);
17578 %}
17579
17580 // Call runtime without safepoint
17581 // entry point is null, target holds the address to call
17582 instruct CallLeafNoFPInDirect(rRegP target)
17583 %{
17584 predicate(n->as_Call()->entry_point() == nullptr);
17585 match(CallLeafNoFP target);
17586
17587 ins_cost(300);
17588 format %{ "call_leaf_nofp,runtime indirect " %}
17589 ins_encode %{
17590 __ call($target$$Register);
17591 %}
17592
17593 ins_pipe(pipe_slow);
17594 %}
17595
17596 // Call runtime without safepoint
17597 instruct CallLeafNoFPDirect(method meth)
17598 %{
17599 predicate(n->as_Call()->entry_point() != nullptr);
17600 match(CallLeafNoFP);
17601 effect(USE meth);
17602
17603 ins_cost(300);
17604 format %{ "call_leaf_nofp,runtime " %}
17605 ins_encode(clear_avx, Java_To_Runtime(meth));
17606 ins_pipe(pipe_slow);
17607 %}
17608
17609 // Return Instruction
17610 // Remove the return address & jump to it.
17611 // Notice: We always emit a nop after a ret to make sure there is room
17612 // for safepoint patching
17613 instruct Ret()
17614 %{
17615 match(Return);
17616
17617 format %{ "ret" %}
17618 ins_encode %{
17619 __ ret(0);
|