1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 int offset = 13; // movq r10,#addr; callq (r10)
1653 if (this->ideal_Opcode() != Op_CallLeafVector) {
1654 offset += clear_avx_size();
1655 }
1656 return offset;
1657 }
1658 //
1659 // Compute padding required for nodes which need alignment
1660 //
1661
1662 // The address of the call instruction needs to be 4-byte aligned to
1663 // ensure that it does not span a cache line so that it can be patched.
1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1665 {
1666 current_offset += clear_avx_size(); // skip vzeroupper
1667 current_offset += 1; // skip call opcode byte
1668 return align_up(current_offset, alignment_required()) - current_offset;
1669 }
1670
1671 // The address of the call instruction needs to be 4-byte aligned to
1672 // ensure that it does not span a cache line so that it can be patched.
1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
1674 {
1675 current_offset += clear_avx_size(); // skip vzeroupper
1676 current_offset += 11; // skip movq instruction + call opcode byte
1677 return align_up(current_offset, alignment_required()) - current_offset;
1864 st->print("\n\t");
1865 st->print("# stack alignment check");
1866 #endif
1867 }
1868 if (C->stub_function() != nullptr) {
1869 st->print("\n\t");
1870 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1871 st->print("\n\t");
1872 st->print("je fast_entry\t");
1873 st->print("\n\t");
1874 st->print("call #nmethod_entry_barrier_stub\t");
1875 st->print("\n\tfast_entry:");
1876 }
1877 st->cr();
1878 }
1879 #endif
1880
1881 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1882 Compile* C = ra_->C;
1883
1884 int framesize = C->output()->frame_size_in_bytes();
1885 int bangsize = C->output()->bang_size_in_bytes();
1886
1887 if (C->clinit_barrier_on_entry()) {
1888 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
1889 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
1890
1891 Label L_skip_barrier;
1892 Register klass = rscratch1;
1893
1894 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
1895 __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
1896
1897 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
1898
1899 __ bind(L_skip_barrier);
1900 }
1901
1902 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
1903
1904 C->output()->set_frame_complete(__ offset());
1905
1906 if (C->has_mach_constant_base_node()) {
1907 // NOTE: We set the table base offset here because users might be
1908 // emitted before MachConstantBaseNode.
1909 ConstantTable& constant_table = C->output()->constant_table();
1910 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1911 }
1912 }
1913
1914 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1915 {
1916 return MachNode::size(ra_); // too many variables; just compute it
1917 // the hard way
1918 }
1919
1920 int MachPrologNode::reloc() const
1921 {
1922 return 0; // a large enough number
1923 }
1924
1925 //=============================================================================
1926 #ifndef PRODUCT
1927 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1928 {
1929 Compile* C = ra_->C;
1930 if (generate_vzeroupper(C)) {
1931 st->print("vzeroupper");
1932 st->cr(); st->print("\t");
1933 }
1934
1935 int framesize = C->output()->frame_size_in_bytes();
1936 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1937 // Remove word for return adr already pushed
1938 // and RBP
1946 st->print_cr("popq rbp");
1947 if (do_polling() && C->is_method_compilation()) {
1948 st->print("\t");
1949 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1950 "ja #safepoint_stub\t"
1951 "# Safepoint: poll for GC");
1952 }
1953 }
1954 #endif
1955
1956 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1957 {
1958 Compile* C = ra_->C;
1959
1960 if (generate_vzeroupper(C)) {
1961 // Clear upper bits of YMM registers when current compiled code uses
1962 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1963 __ vzeroupper();
1964 }
1965
1966 int framesize = C->output()->frame_size_in_bytes();
1967 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1968 // Remove word for return adr already pushed
1969 // and RBP
1970 framesize -= 2*wordSize;
1971
1972 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1973
1974 if (framesize) {
1975 __ addq(rsp, framesize);
1976 }
1977
1978 __ popq(rbp);
1979
1980 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1981 __ reserved_stack_check();
1982 }
1983
1984 if (do_polling() && C->is_method_compilation()) {
1985 Label dummy_label;
1986 Label* code_stub = &dummy_label;
1987 if (!C->output()->in_scratch_emit_size()) {
1988 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1989 C->output()->add_stub(stub);
1990 code_stub = &stub->entry();
1991 }
1992 __ relocate(relocInfo::poll_return_type);
1993 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
1994 }
1995 }
1996
1997 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
1998 {
1999 return MachNode::size(ra_); // too many variables; just compute it
2000 // the hard way
2001 }
2002
2003 int MachEpilogNode::reloc() const
2004 {
2005 return 2; // a large enough number
2006 }
2007
2008 const Pipeline* MachEpilogNode::pipeline() const
2009 {
2010 return MachNode::pipeline_class();
2011 }
2012
2013 //=============================================================================
2014
2015 enum RC {
2016 rc_bad,
2017 rc_int,
2018 rc_kreg,
2019 rc_float,
2020 rc_stack
2021 };
2022
2584 #endif
2585
2586 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2587 {
2588 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2589 int reg = ra_->get_encode(this);
2590
2591 __ lea(as_Register(reg), Address(rsp, offset));
2592 }
2593
2594 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2595 {
2596 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2597 if (ra_->get_encode(this) > 15) {
2598 return (offset < 0x80) ? 6 : 9; // REX2
2599 } else {
2600 return (offset < 0x80) ? 5 : 8; // REX
2601 }
2602 }
2603
2604 //=============================================================================
2605 #ifndef PRODUCT
2606 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2607 {
2608 if (UseCompressedClassPointers) {
2609 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2610 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2611 } else {
2612 st->print_cr("movq rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2613 st->print_cr("\tcmpq rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2614 }
2615 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2616 }
2617 #endif
2618
2619 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2620 {
2621 __ ic_check(InteriorEntryAlignment);
2622 }
2623
2624 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
2625 {
2626 return MachNode::size(ra_); // too many variables; just compute it
2627 // the hard way
2628 }
2629
2630
2631 //=============================================================================
2632
2633 bool Matcher::supports_vector_calling_convention(void) {
2634 return EnableVectorSupport;
2635 }
2636
2637 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
2638 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
2639 }
2640
2641 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
2642 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
2643 }
2644
2645 #ifdef ASSERT
2646 static bool is_ndd_demotable(const MachNode* mdef) {
2647 return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
2648 }
2649 #endif
4590 }
4591 __ post_call_nop();
4592 %}
4593
4594 enc_class Java_Dynamic_Call(method meth) %{
4595 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4596 __ post_call_nop();
4597 %}
4598
4599 enc_class call_epilog %{
4600 if (VerifyStackAtCalls) {
4601 // Check that stack depth is unchanged: find majik cookie on stack
4602 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4603 Label L;
4604 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4605 __ jccb(Assembler::equal, L);
4606 // Die if stack mismatch
4607 __ int3();
4608 __ bind(L);
4609 }
4610 %}
4611
4612 %}
4613
4614 //----------FRAME--------------------------------------------------------------
4615 // Definition of frame structure and management information.
4616 //
4617 // S T A C K L A Y O U T Allocators stack-slot number
4618 // | (to get allocators register number
4619 // G Owned by | | v add OptoReg::stack0())
4620 // r CALLER | |
4621 // o | +--------+ pad to even-align allocators stack-slot
4622 // w V | pad0 | numbers; owned by CALLER
4623 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4624 // h ^ | in | 5
4625 // | | args | 4 Holes in incoming args owned by SELF
4626 // | | | | 3
4627 // | | +--------+
4628 // V | | old out| Empty on Intel, window on Sparc
4629 // | old |preserve| Must be even aligned.
5768 %}
5769 %}
5770
5771 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5772 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5773 %{
5774 constraint(ALLOC_IN_RC(ptr_reg));
5775 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5776 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5777
5778 op_cost(10);
5779 format %{"[$reg + $off + $idx << $scale]" %}
5780 interface(MEMORY_INTER) %{
5781 base($reg);
5782 index($idx);
5783 scale($scale);
5784 disp($off);
5785 %}
5786 %}
5787
5788 // Indirect Narrow Oop Plus Offset Operand
5789 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5790 // we can't free r12 even with CompressedOops::base() == nullptr.
5791 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5792 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5793 constraint(ALLOC_IN_RC(ptr_reg));
5794 match(AddP (DecodeN reg) off);
5795
5796 op_cost(10);
5797 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5798 interface(MEMORY_INTER) %{
5799 base(0xc); // R12
5800 index($reg);
5801 scale(0x3);
5802 disp($off);
5803 %}
5804 %}
5805
5806 // Indirect Memory Operand
5807 operand indirectNarrow(rRegN reg)
6277 %}
6278
6279 // Replaces legVec during post-selection cleanup. See above.
6280 operand legVecZ() %{
6281 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6282 match(VecZ);
6283
6284 format %{ %}
6285 interface(REG_INTER);
6286 %}
6287
6288 //----------OPERAND CLASSES----------------------------------------------------
6289 // Operand Classes are groups of operands that are used as to simplify
6290 // instruction definitions by not requiring the AD writer to specify separate
6291 // instructions for every form of operand when the instruction accepts
6292 // multiple operand types with the same basic encoding and format. The classic
6293 // case of this is memory operands.
6294
6295 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6296 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6297 indCompressedOopOffset,
6298 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6299 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6300 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6301
6302 //----------PIPELINE-----------------------------------------------------------
6303 // Rules which define the behavior of the target architectures pipeline.
6304 pipeline %{
6305
6306 //----------ATTRIBUTES---------------------------------------------------------
6307 attributes %{
6308 variable_size_instructions; // Fixed size instructions
6309 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6310 instruction_unit_size = 1; // An instruction is 1 bytes long
6311 instruction_fetch_unit_size = 16; // The processor fetches one line
6312 instruction_fetch_units = 1; // of 16 bytes
6313 %}
6314
6315 //----------RESOURCES----------------------------------------------------------
6316 // Resources are the functional units available to the machine
6317
8888 format %{ "MEMBAR-storestore (empty encoding)" %}
8889 ins_encode( );
8890 ins_pipe(empty);
8891 %}
8892
8893 //----------Move Instructions--------------------------------------------------
8894
8895 instruct castX2P(rRegP dst, rRegL src)
8896 %{
8897 match(Set dst (CastX2P src));
8898
8899 format %{ "movq $dst, $src\t# long->ptr" %}
8900 ins_encode %{
8901 if ($dst$$reg != $src$$reg) {
8902 __ movptr($dst$$Register, $src$$Register);
8903 }
8904 %}
8905 ins_pipe(ialu_reg_reg); // XXX
8906 %}
8907
8908 instruct castP2X(rRegL dst, rRegP src)
8909 %{
8910 match(Set dst (CastP2X src));
8911
8912 format %{ "movq $dst, $src\t# ptr -> long" %}
8913 ins_encode %{
8914 if ($dst$$reg != $src$$reg) {
8915 __ movptr($dst$$Register, $src$$Register);
8916 }
8917 %}
8918 ins_pipe(ialu_reg_reg); // XXX
8919 %}
8920
8921 // Convert oop into int for vectors alignment masking
8922 instruct convP2I(rRegI dst, rRegP src)
8923 %{
8924 match(Set dst (ConvL2I (CastP2X src)));
8925
8926 format %{ "movl $dst, $src\t# ptr -> int" %}
8927 ins_encode %{
15175 effect(DEF dst, USE src);
15176 ins_cost(100);
15177 format %{ "movd $dst,$src\t# MoveI2F" %}
15178 ins_encode %{
15179 __ movdl($dst$$XMMRegister, $src$$Register);
15180 %}
15181 ins_pipe( pipe_slow );
15182 %}
15183
15184 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15185 match(Set dst (MoveL2D src));
15186 effect(DEF dst, USE src);
15187 ins_cost(100);
15188 format %{ "movd $dst,$src\t# MoveL2D" %}
15189 ins_encode %{
15190 __ movdq($dst$$XMMRegister, $src$$Register);
15191 %}
15192 ins_pipe( pipe_slow );
15193 %}
15194
15195 // Fast clearing of an array
15196 // Small non-constant lenght ClearArray for non-AVX512 targets.
15197 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15198 Universe dummy, rFlagsReg cr)
15199 %{
15200 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
15201 match(Set dummy (ClearArray cnt base));
15202 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15203
15204 format %{ $$template
15205 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15206 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15207 $$emit$$"jg LARGE\n\t"
15208 $$emit$$"dec rcx\n\t"
15209 $$emit$$"js DONE\t# Zero length\n\t"
15210 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15211 $$emit$$"dec rcx\n\t"
15212 $$emit$$"jge LOOP\n\t"
15213 $$emit$$"jmp DONE\n\t"
15214 $$emit$$"# LARGE:\n\t"
15215 if (UseFastStosb) {
15216 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15217 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15218 } else if (UseXMMForObjInit) {
15219 $$emit$$"mov rdi,rax\n\t"
15220 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15221 $$emit$$"jmpq L_zero_64_bytes\n\t"
15222 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15230 $$emit$$"jl L_tail\n\t"
15231 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15232 $$emit$$"add 0x20,rax\n\t"
15233 $$emit$$"sub 0x4,rcx\n\t"
15234 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15235 $$emit$$"add 0x4,rcx\n\t"
15236 $$emit$$"jle L_end\n\t"
15237 $$emit$$"dec rcx\n\t"
15238 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15239 $$emit$$"vmovq xmm0,(rax)\n\t"
15240 $$emit$$"add 0x8,rax\n\t"
15241 $$emit$$"dec rcx\n\t"
15242 $$emit$$"jge L_sloop\n\t"
15243 $$emit$$"# L_end:\n\t"
15244 } else {
15245 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15246 }
15247 $$emit$$"# DONE"
15248 %}
15249 ins_encode %{
15250 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15251 $tmp$$XMMRegister, false, knoreg);
15252 %}
15253 ins_pipe(pipe_slow);
15254 %}
15255
15256 // Small non-constant length ClearArray for AVX512 targets.
15257 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15258 Universe dummy, rFlagsReg cr)
15259 %{
15260 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15261 match(Set dummy (ClearArray cnt base));
15262 ins_cost(125);
15263 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15264
15265 format %{ $$template
15266 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15267 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15268 $$emit$$"jg LARGE\n\t"
15269 $$emit$$"dec rcx\n\t"
15270 $$emit$$"js DONE\t# Zero length\n\t"
15271 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15272 $$emit$$"dec rcx\n\t"
15273 $$emit$$"jge LOOP\n\t"
15274 $$emit$$"jmp DONE\n\t"
15275 $$emit$$"# LARGE:\n\t"
15276 if (UseFastStosb) {
15277 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15278 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15279 } else if (UseXMMForObjInit) {
15280 $$emit$$"mov rdi,rax\n\t"
15281 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15282 $$emit$$"jmpq L_zero_64_bytes\n\t"
15283 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15291 $$emit$$"jl L_tail\n\t"
15292 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15293 $$emit$$"add 0x20,rax\n\t"
15294 $$emit$$"sub 0x4,rcx\n\t"
15295 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15296 $$emit$$"add 0x4,rcx\n\t"
15297 $$emit$$"jle L_end\n\t"
15298 $$emit$$"dec rcx\n\t"
15299 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15300 $$emit$$"vmovq xmm0,(rax)\n\t"
15301 $$emit$$"add 0x8,rax\n\t"
15302 $$emit$$"dec rcx\n\t"
15303 $$emit$$"jge L_sloop\n\t"
15304 $$emit$$"# L_end:\n\t"
15305 } else {
15306 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15307 }
15308 $$emit$$"# DONE"
15309 %}
15310 ins_encode %{
15311 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15312 $tmp$$XMMRegister, false, $ktmp$$KRegister);
15313 %}
15314 ins_pipe(pipe_slow);
15315 %}
15316
15317 // Large non-constant length ClearArray for non-AVX512 targets.
15318 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15319 Universe dummy, rFlagsReg cr)
15320 %{
15321 predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15322 match(Set dummy (ClearArray cnt base));
15323 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15324
15325 format %{ $$template
15326 if (UseFastStosb) {
15327 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15328 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15329 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15330 } else if (UseXMMForObjInit) {
15331 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15332 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15333 $$emit$$"jmpq L_zero_64_bytes\n\t"
15334 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15335 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15336 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15337 $$emit$$"add 0x40,rax\n\t"
15338 $$emit$$"# L_zero_64_bytes:\n\t"
15339 $$emit$$"sub 0x8,rcx\n\t"
15340 $$emit$$"jge L_loop\n\t"
15341 $$emit$$"add 0x4,rcx\n\t"
15342 $$emit$$"jl L_tail\n\t"
15343 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15344 $$emit$$"add 0x20,rax\n\t"
15345 $$emit$$"sub 0x4,rcx\n\t"
15346 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15347 $$emit$$"add 0x4,rcx\n\t"
15348 $$emit$$"jle L_end\n\t"
15349 $$emit$$"dec rcx\n\t"
15350 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15351 $$emit$$"vmovq xmm0,(rax)\n\t"
15352 $$emit$$"add 0x8,rax\n\t"
15353 $$emit$$"dec rcx\n\t"
15354 $$emit$$"jge L_sloop\n\t"
15355 $$emit$$"# L_end:\n\t"
15356 } else {
15357 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15358 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15359 }
15360 %}
15361 ins_encode %{
15362 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15363 $tmp$$XMMRegister, true, knoreg);
15364 %}
15365 ins_pipe(pipe_slow);
15366 %}
15367
15368 // Large non-constant length ClearArray for AVX512 targets.
15369 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15370 Universe dummy, rFlagsReg cr)
15371 %{
15372 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15373 match(Set dummy (ClearArray cnt base));
15374 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15375
15376 format %{ $$template
15377 if (UseFastStosb) {
15378 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15379 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15380 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15381 } else if (UseXMMForObjInit) {
15382 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15383 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15384 $$emit$$"jmpq L_zero_64_bytes\n\t"
15385 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15386 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15387 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15388 $$emit$$"add 0x40,rax\n\t"
15389 $$emit$$"# L_zero_64_bytes:\n\t"
15390 $$emit$$"sub 0x8,rcx\n\t"
15391 $$emit$$"jge L_loop\n\t"
15392 $$emit$$"add 0x4,rcx\n\t"
15393 $$emit$$"jl L_tail\n\t"
15394 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15395 $$emit$$"add 0x20,rax\n\t"
15396 $$emit$$"sub 0x4,rcx\n\t"
15397 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15398 $$emit$$"add 0x4,rcx\n\t"
15399 $$emit$$"jle L_end\n\t"
15400 $$emit$$"dec rcx\n\t"
15401 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15402 $$emit$$"vmovq xmm0,(rax)\n\t"
15403 $$emit$$"add 0x8,rax\n\t"
15404 $$emit$$"dec rcx\n\t"
15405 $$emit$$"jge L_sloop\n\t"
15406 $$emit$$"# L_end:\n\t"
15407 } else {
15408 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15409 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15410 }
15411 %}
15412 ins_encode %{
15413 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15414 $tmp$$XMMRegister, true, $ktmp$$KRegister);
15415 %}
15416 ins_pipe(pipe_slow);
15417 %}
15418
15419 // Small constant length ClearArray for AVX512 targets.
15420 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15421 %{
15422 predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15423 match(Set dummy (ClearArray cnt base));
15424 ins_cost(100);
15425 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15426 format %{ "clear_mem_imm $base , $cnt \n\t" %}
15427 ins_encode %{
15428 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15429 %}
15430 ins_pipe(pipe_slow);
15431 %}
15432
15433 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15434 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15435 %{
15436 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15437 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15438 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15439
15440 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15441 ins_encode %{
15442 __ string_compare($str1$$Register, $str2$$Register,
15443 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15444 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15445 %}
15446 ins_pipe( pipe_slow );
15447 %}
15448
17286 effect(USE meth);
17287
17288 ins_cost(300);
17289 format %{ "call_leaf,runtime " %}
17290 ins_encode(clear_avx, Java_To_Runtime(meth));
17291 ins_pipe(pipe_slow);
17292 %}
17293
17294 // Call runtime without safepoint and with vector arguments
17295 instruct CallLeafDirectVector(method meth)
17296 %{
17297 match(CallLeafVector);
17298 effect(USE meth);
17299
17300 ins_cost(300);
17301 format %{ "call_leaf,vector " %}
17302 ins_encode(Java_To_Runtime(meth));
17303 ins_pipe(pipe_slow);
17304 %}
17305
17306 // Call runtime without safepoint
17307 instruct CallLeafNoFPDirect(method meth)
17308 %{
17309 match(CallLeafNoFP);
17310 effect(USE meth);
17311
17312 ins_cost(300);
17313 format %{ "call_leaf_nofp,runtime " %}
17314 ins_encode(clear_avx, Java_To_Runtime(meth));
17315 ins_pipe(pipe_slow);
17316 %}
17317
17318 // Return Instruction
17319 // Remove the return address & jump to it.
17320 // Notice: We always emit a nop after a ret to make sure there is room
17321 // for safepoint patching
17322 instruct Ret()
17323 %{
17324 match(Return);
17325
17326 format %{ "ret" %}
17327 ins_encode %{
17328 __ ret(0);
|
1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 if (_entry_point == nullptr) {
1653 // CallLeafNoFPInDirect
1654 return 3; // callq (register)
1655 }
1656 int offset = 13; // movq r10,#addr; callq (r10)
1657 if (this->ideal_Opcode() != Op_CallLeafVector) {
1658 offset += clear_avx_size();
1659 }
1660 return offset;
1661 }
1662
1663 //
1664 // Compute padding required for nodes which need alignment
1665 //
1666
1667 // The address of the call instruction needs to be 4-byte aligned to
1668 // ensure that it does not span a cache line so that it can be patched.
1669 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1670 {
1671 current_offset += clear_avx_size(); // skip vzeroupper
1672 current_offset += 1; // skip call opcode byte
1673 return align_up(current_offset, alignment_required()) - current_offset;
1674 }
1675
1676 // The address of the call instruction needs to be 4-byte aligned to
1677 // ensure that it does not span a cache line so that it can be patched.
1678 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
1679 {
1680 current_offset += clear_avx_size(); // skip vzeroupper
1681 current_offset += 11; // skip movq instruction + call opcode byte
1682 return align_up(current_offset, alignment_required()) - current_offset;
1869 st->print("\n\t");
1870 st->print("# stack alignment check");
1871 #endif
1872 }
1873 if (C->stub_function() != nullptr) {
1874 st->print("\n\t");
1875 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1876 st->print("\n\t");
1877 st->print("je fast_entry\t");
1878 st->print("\n\t");
1879 st->print("call #nmethod_entry_barrier_stub\t");
1880 st->print("\n\tfast_entry:");
1881 }
1882 st->cr();
1883 }
1884 #endif
1885
1886 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1887 Compile* C = ra_->C;
1888
1889 __ verified_entry(C);
1890
1891 if (ra_->C->stub_function() == nullptr) {
1892 __ entry_barrier();
1893 }
1894
1895 if (!Compile::current()->output()->in_scratch_emit_size()) {
1896 __ bind(*_verified_entry);
1897 }
1898
1899 C->output()->set_frame_complete(__ offset());
1900
1901 if (C->has_mach_constant_base_node()) {
1902 // NOTE: We set the table base offset here because users might be
1903 // emitted before MachConstantBaseNode.
1904 ConstantTable& constant_table = C->output()->constant_table();
1905 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1906 }
1907 }
1908
1909
1910 int MachPrologNode::reloc() const
1911 {
1912 return 0; // a large enough number
1913 }
1914
1915 //=============================================================================
1916 #ifndef PRODUCT
1917 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1918 {
1919 Compile* C = ra_->C;
1920 if (generate_vzeroupper(C)) {
1921 st->print("vzeroupper");
1922 st->cr(); st->print("\t");
1923 }
1924
1925 int framesize = C->output()->frame_size_in_bytes();
1926 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1927 // Remove word for return adr already pushed
1928 // and RBP
1936 st->print_cr("popq rbp");
1937 if (do_polling() && C->is_method_compilation()) {
1938 st->print("\t");
1939 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1940 "ja #safepoint_stub\t"
1941 "# Safepoint: poll for GC");
1942 }
1943 }
1944 #endif
1945
1946 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1947 {
1948 Compile* C = ra_->C;
1949
1950 if (generate_vzeroupper(C)) {
1951 // Clear upper bits of YMM registers when current compiled code uses
1952 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1953 __ vzeroupper();
1954 }
1955
1956 // Subtract two words to account for return address and rbp
1957 int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
1958 __ remove_frame(initial_framesize, C->needs_stack_repair());
1959
1960 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1961 __ reserved_stack_check();
1962 }
1963
1964 if (do_polling() && C->is_method_compilation()) {
1965 Label dummy_label;
1966 Label* code_stub = &dummy_label;
1967 if (!C->output()->in_scratch_emit_size()) {
1968 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1969 C->output()->add_stub(stub);
1970 code_stub = &stub->entry();
1971 }
1972 __ relocate(relocInfo::poll_return_type);
1973 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
1974 }
1975 }
1976
1977 int MachEpilogNode::reloc() const
1978 {
1979 return 2; // a large enough number
1980 }
1981
1982 const Pipeline* MachEpilogNode::pipeline() const
1983 {
1984 return MachNode::pipeline_class();
1985 }
1986
1987 //=============================================================================
1988
1989 enum RC {
1990 rc_bad,
1991 rc_int,
1992 rc_kreg,
1993 rc_float,
1994 rc_stack
1995 };
1996
2558 #endif
2559
2560 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2561 {
2562 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2563 int reg = ra_->get_encode(this);
2564
2565 __ lea(as_Register(reg), Address(rsp, offset));
2566 }
2567
2568 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2569 {
2570 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2571 if (ra_->get_encode(this) > 15) {
2572 return (offset < 0x80) ? 6 : 9; // REX2
2573 } else {
2574 return (offset < 0x80) ? 5 : 8; // REX
2575 }
2576 }
2577
2578 //=============================================================================
2579 #ifndef PRODUCT
2580 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2581 {
2582 st->print_cr("MachVEPNode");
2583 }
2584 #endif
2585
2586 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2587 {
2588 CodeBuffer* cbuf = masm->code();
2589 uint insts_size = cbuf->insts_size();
2590 if (!_verified) {
2591 __ ic_check(1);
2592 } else {
2593 // TODO 8284443 Avoid creation of temporary frame
2594 if (ra_->C->stub_function() == nullptr) {
2595 __ verified_entry(ra_->C, 0);
2596 __ entry_barrier();
2597 int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
2598 __ remove_frame(initial_framesize, false);
2599 }
2600 // Unpack inline type args passed as oop and then jump to
2601 // the verified entry point (skipping the unverified entry).
2602 int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
2603 // Emit code for verified entry and save increment for stack repair on return
2604 __ verified_entry(ra_->C, sp_inc);
2605 if (Compile::current()->output()->in_scratch_emit_size()) {
2606 Label dummy_verified_entry;
2607 __ jmp(dummy_verified_entry);
2608 } else {
2609 __ jmp(*_verified_entry);
2610 }
2611 }
2612 /* WARNING these NOPs are critical so that verified entry point is properly
2613 4 bytes aligned for patching by NativeJump::patch_verified_entry() */
2614 int nops_cnt = 4 - ((cbuf->insts_size() - insts_size) & 0x3);
2615 nops_cnt &= 0x3; // Do not add nops if code is aligned.
2616 if (nops_cnt > 0) {
2617 __ nop(nops_cnt);
2618 }
2619 }
2620
2621 //=============================================================================
2622 #ifndef PRODUCT
2623 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2624 {
2625 if (UseCompressedClassPointers) {
2626 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2627 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2628 } else {
2629 st->print_cr("movq rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2630 st->print_cr("\tcmpq rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2631 }
2632 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2633 }
2634 #endif
2635
2636 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2637 {
2638 __ ic_check(InteriorEntryAlignment);
2639 }
2640
2641
2642 //=============================================================================
2643
2644 bool Matcher::supports_vector_calling_convention(void) {
2645 return EnableVectorSupport;
2646 }
2647
2648 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
2649 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
2650 }
2651
2652 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
2653 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
2654 }
2655
2656 #ifdef ASSERT
2657 static bool is_ndd_demotable(const MachNode* mdef) {
2658 return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
2659 }
2660 #endif
4601 }
4602 __ post_call_nop();
4603 %}
4604
4605 enc_class Java_Dynamic_Call(method meth) %{
4606 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4607 __ post_call_nop();
4608 %}
4609
4610 enc_class call_epilog %{
4611 if (VerifyStackAtCalls) {
4612 // Check that stack depth is unchanged: find majik cookie on stack
4613 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4614 Label L;
4615 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4616 __ jccb(Assembler::equal, L);
4617 // Die if stack mismatch
4618 __ int3();
4619 __ bind(L);
4620 }
4621 if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) {
4622 // The last return value is not set by the callee but used to pass the null marker to compiled code.
4623 // Search for the corresponding projection, get the register and emit code that initialized it.
4624 uint con = (tf()->range_cc()->cnt() - 1);
4625 for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
4626 ProjNode* proj = fast_out(i)->as_Proj();
4627 if (proj->_con == con) {
4628 // Set null marker if rax is non-null (a non-null value is returned buffered or scalarized)
4629 OptoReg::Name optoReg = ra_->get_reg_first(proj);
4630 VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
4631 Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
4632 __ testq(rax, rax);
4633 __ setb(Assembler::notZero, toReg);
4634 __ movzbl(toReg, toReg);
4635 if (reg->is_stack()) {
4636 int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
4637 __ movq(Address(rsp, st_off), toReg);
4638 }
4639 break;
4640 }
4641 }
4642 if (return_value_is_used()) {
4643 // An inline type is returned as fields in multiple registers.
4644 // Rax either contains an oop if the inline type is buffered or a pointer
4645 // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax
4646 // if the lowest bit is set to allow C2 to use the oop after null checking.
4647 // rax &= (rax & 1) - 1
4648 __ movptr(rscratch1, rax);
4649 __ andptr(rscratch1, 0x1);
4650 __ subptr(rscratch1, 0x1);
4651 __ andptr(rax, rscratch1);
4652 }
4653 }
4654 %}
4655
4656 %}
4657
4658 //----------FRAME--------------------------------------------------------------
4659 // Definition of frame structure and management information.
4660 //
4661 // S T A C K L A Y O U T Allocators stack-slot number
4662 // | (to get allocators register number
4663 // G Owned by | | v add OptoReg::stack0())
4664 // r CALLER | |
4665 // o | +--------+ pad to even-align allocators stack-slot
4666 // w V | pad0 | numbers; owned by CALLER
4667 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4668 // h ^ | in | 5
4669 // | | args | 4 Holes in incoming args owned by SELF
4670 // | | | | 3
4671 // | | +--------+
4672 // V | | old out| Empty on Intel, window on Sparc
4673 // | old |preserve| Must be even aligned.
5812 %}
5813 %}
5814
5815 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5816 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5817 %{
5818 constraint(ALLOC_IN_RC(ptr_reg));
5819 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5820 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5821
5822 op_cost(10);
5823 format %{"[$reg + $off + $idx << $scale]" %}
5824 interface(MEMORY_INTER) %{
5825 base($reg);
5826 index($idx);
5827 scale($scale);
5828 disp($off);
5829 %}
5830 %}
5831
5832 // Indirect Narrow Oop Operand
5833 operand indCompressedOop(rRegN reg) %{
5834 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5835 constraint(ALLOC_IN_RC(ptr_reg));
5836 match(DecodeN reg);
5837
5838 op_cost(10);
5839 format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
5840 interface(MEMORY_INTER) %{
5841 base(0xc); // R12
5842 index($reg);
5843 scale(0x3);
5844 disp(0x0);
5845 %}
5846 %}
5847
5848 // Indirect Narrow Oop Plus Offset Operand
5849 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5850 // we can't free r12 even with CompressedOops::base() == nullptr.
5851 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5852 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5853 constraint(ALLOC_IN_RC(ptr_reg));
5854 match(AddP (DecodeN reg) off);
5855
5856 op_cost(10);
5857 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5858 interface(MEMORY_INTER) %{
5859 base(0xc); // R12
5860 index($reg);
5861 scale(0x3);
5862 disp($off);
5863 %}
5864 %}
5865
5866 // Indirect Memory Operand
5867 operand indirectNarrow(rRegN reg)
6337 %}
6338
6339 // Replaces legVec during post-selection cleanup. See above.
6340 operand legVecZ() %{
6341 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6342 match(VecZ);
6343
6344 format %{ %}
6345 interface(REG_INTER);
6346 %}
6347
6348 //----------OPERAND CLASSES----------------------------------------------------
6349 // Operand Classes are groups of operands that are used as to simplify
6350 // instruction definitions by not requiring the AD writer to specify separate
6351 // instructions for every form of operand when the instruction accepts
6352 // multiple operand types with the same basic encoding and format. The classic
6353 // case of this is memory operands.
6354
6355 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6356 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6357 indCompressedOop, indCompressedOopOffset,
6358 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6359 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6360 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6361
6362 //----------PIPELINE-----------------------------------------------------------
6363 // Rules which define the behavior of the target architectures pipeline.
6364 pipeline %{
6365
6366 //----------ATTRIBUTES---------------------------------------------------------
6367 attributes %{
6368 variable_size_instructions; // Fixed size instructions
6369 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6370 instruction_unit_size = 1; // An instruction is 1 bytes long
6371 instruction_fetch_unit_size = 16; // The processor fetches one line
6372 instruction_fetch_units = 1; // of 16 bytes
6373 %}
6374
6375 //----------RESOURCES----------------------------------------------------------
6376 // Resources are the functional units available to the machine
6377
8948 format %{ "MEMBAR-storestore (empty encoding)" %}
8949 ins_encode( );
8950 ins_pipe(empty);
8951 %}
8952
8953 //----------Move Instructions--------------------------------------------------
8954
8955 instruct castX2P(rRegP dst, rRegL src)
8956 %{
8957 match(Set dst (CastX2P src));
8958
8959 format %{ "movq $dst, $src\t# long->ptr" %}
8960 ins_encode %{
8961 if ($dst$$reg != $src$$reg) {
8962 __ movptr($dst$$Register, $src$$Register);
8963 }
8964 %}
8965 ins_pipe(ialu_reg_reg); // XXX
8966 %}
8967
8968 instruct castI2N(rRegN dst, rRegI src)
8969 %{
8970 match(Set dst (CastI2N src));
8971
8972 format %{ "movq $dst, $src\t# int -> narrow ptr" %}
8973 ins_encode %{
8974 if ($dst$$reg != $src$$reg) {
8975 __ movl($dst$$Register, $src$$Register);
8976 }
8977 %}
8978 ins_pipe(ialu_reg_reg); // XXX
8979 %}
8980
8981 instruct castN2X(rRegL dst, rRegN src)
8982 %{
8983 match(Set dst (CastP2X src));
8984
8985 format %{ "movq $dst, $src\t# ptr -> long" %}
8986 ins_encode %{
8987 if ($dst$$reg != $src$$reg) {
8988 __ movptr($dst$$Register, $src$$Register);
8989 }
8990 %}
8991 ins_pipe(ialu_reg_reg); // XXX
8992 %}
8993
8994 instruct castP2X(rRegL dst, rRegP src)
8995 %{
8996 match(Set dst (CastP2X src));
8997
8998 format %{ "movq $dst, $src\t# ptr -> long" %}
8999 ins_encode %{
9000 if ($dst$$reg != $src$$reg) {
9001 __ movptr($dst$$Register, $src$$Register);
9002 }
9003 %}
9004 ins_pipe(ialu_reg_reg); // XXX
9005 %}
9006
9007 // Convert oop into int for vectors alignment masking
9008 instruct convP2I(rRegI dst, rRegP src)
9009 %{
9010 match(Set dst (ConvL2I (CastP2X src)));
9011
9012 format %{ "movl $dst, $src\t# ptr -> int" %}
9013 ins_encode %{
15261 effect(DEF dst, USE src);
15262 ins_cost(100);
15263 format %{ "movd $dst,$src\t# MoveI2F" %}
15264 ins_encode %{
15265 __ movdl($dst$$XMMRegister, $src$$Register);
15266 %}
15267 ins_pipe( pipe_slow );
15268 %}
15269
15270 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15271 match(Set dst (MoveL2D src));
15272 effect(DEF dst, USE src);
15273 ins_cost(100);
15274 format %{ "movd $dst,$src\t# MoveL2D" %}
15275 ins_encode %{
15276 __ movdq($dst$$XMMRegister, $src$$Register);
15277 %}
15278 ins_pipe( pipe_slow );
15279 %}
15280
15281
15282 // Fast clearing of an array
15283 // Small non-constant lenght ClearArray for non-AVX512 targets.
15284 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15285 Universe dummy, rFlagsReg cr)
15286 %{
15287 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15288 match(Set dummy (ClearArray (Binary cnt base) val));
15289 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15290
15291 format %{ $$template
15292 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15293 $$emit$$"jg LARGE\n\t"
15294 $$emit$$"dec rcx\n\t"
15295 $$emit$$"js DONE\t# Zero length\n\t"
15296 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15297 $$emit$$"dec rcx\n\t"
15298 $$emit$$"jge LOOP\n\t"
15299 $$emit$$"jmp DONE\n\t"
15300 $$emit$$"# LARGE:\n\t"
15301 if (UseFastStosb) {
15302 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15303 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15304 } else if (UseXMMForObjInit) {
15305 $$emit$$"movdq $tmp, $val\n\t"
15306 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15307 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15308 $$emit$$"jmpq L_zero_64_bytes\n\t"
15309 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15310 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15311 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15312 $$emit$$"add 0x40,rax\n\t"
15313 $$emit$$"# L_zero_64_bytes:\n\t"
15314 $$emit$$"sub 0x8,rcx\n\t"
15315 $$emit$$"jge L_loop\n\t"
15316 $$emit$$"add 0x4,rcx\n\t"
15317 $$emit$$"jl L_tail\n\t"
15318 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15319 $$emit$$"add 0x20,rax\n\t"
15320 $$emit$$"sub 0x4,rcx\n\t"
15321 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15322 $$emit$$"add 0x4,rcx\n\t"
15323 $$emit$$"jle L_end\n\t"
15324 $$emit$$"dec rcx\n\t"
15325 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15326 $$emit$$"vmovq xmm0,(rax)\n\t"
15327 $$emit$$"add 0x8,rax\n\t"
15328 $$emit$$"dec rcx\n\t"
15329 $$emit$$"jge L_sloop\n\t"
15330 $$emit$$"# L_end:\n\t"
15331 } else {
15332 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15333 }
15334 $$emit$$"# DONE"
15335 %}
15336 ins_encode %{
15337 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15338 $tmp$$XMMRegister, false, false);
15339 %}
15340 ins_pipe(pipe_slow);
15341 %}
15342
15343 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15344 Universe dummy, rFlagsReg cr)
15345 %{
15346 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15347 match(Set dummy (ClearArray (Binary cnt base) val));
15348 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15349
15350 format %{ $$template
15351 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15352 $$emit$$"jg LARGE\n\t"
15353 $$emit$$"dec rcx\n\t"
15354 $$emit$$"js DONE\t# Zero length\n\t"
15355 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15356 $$emit$$"dec rcx\n\t"
15357 $$emit$$"jge LOOP\n\t"
15358 $$emit$$"jmp DONE\n\t"
15359 $$emit$$"# LARGE:\n\t"
15360 if (UseXMMForObjInit) {
15361 $$emit$$"movdq $tmp, $val\n\t"
15362 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15363 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15364 $$emit$$"jmpq L_zero_64_bytes\n\t"
15365 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15366 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15367 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15368 $$emit$$"add 0x40,rax\n\t"
15369 $$emit$$"# L_zero_64_bytes:\n\t"
15370 $$emit$$"sub 0x8,rcx\n\t"
15371 $$emit$$"jge L_loop\n\t"
15372 $$emit$$"add 0x4,rcx\n\t"
15373 $$emit$$"jl L_tail\n\t"
15374 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15375 $$emit$$"add 0x20,rax\n\t"
15376 $$emit$$"sub 0x4,rcx\n\t"
15377 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15378 $$emit$$"add 0x4,rcx\n\t"
15379 $$emit$$"jle L_end\n\t"
15380 $$emit$$"dec rcx\n\t"
15381 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15382 $$emit$$"vmovq xmm0,(rax)\n\t"
15383 $$emit$$"add 0x8,rax\n\t"
15384 $$emit$$"dec rcx\n\t"
15385 $$emit$$"jge L_sloop\n\t"
15386 $$emit$$"# L_end:\n\t"
15387 } else {
15388 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15389 }
15390 $$emit$$"# DONE"
15391 %}
15392 ins_encode %{
15393 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15394 $tmp$$XMMRegister, false, true);
15395 %}
15396 ins_pipe(pipe_slow);
15397 %}
15398
15399 // Small non-constant length ClearArray for AVX512 targets.
15400 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15401 Universe dummy, rFlagsReg cr)
15402 %{
15403 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15404 match(Set dummy (ClearArray (Binary cnt base) val));
15405 ins_cost(125);
15406 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15407
15408 format %{ $$template
15409 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15410 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15411 $$emit$$"jg LARGE\n\t"
15412 $$emit$$"dec rcx\n\t"
15413 $$emit$$"js DONE\t# Zero length\n\t"
15414 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15415 $$emit$$"dec rcx\n\t"
15416 $$emit$$"jge LOOP\n\t"
15417 $$emit$$"jmp DONE\n\t"
15418 $$emit$$"# LARGE:\n\t"
15419 if (UseFastStosb) {
15420 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15421 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15422 } else if (UseXMMForObjInit) {
15423 $$emit$$"mov rdi,rax\n\t"
15424 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15425 $$emit$$"jmpq L_zero_64_bytes\n\t"
15426 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15434 $$emit$$"jl L_tail\n\t"
15435 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15436 $$emit$$"add 0x20,rax\n\t"
15437 $$emit$$"sub 0x4,rcx\n\t"
15438 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15439 $$emit$$"add 0x4,rcx\n\t"
15440 $$emit$$"jle L_end\n\t"
15441 $$emit$$"dec rcx\n\t"
15442 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15443 $$emit$$"vmovq xmm0,(rax)\n\t"
15444 $$emit$$"add 0x8,rax\n\t"
15445 $$emit$$"dec rcx\n\t"
15446 $$emit$$"jge L_sloop\n\t"
15447 $$emit$$"# L_end:\n\t"
15448 } else {
15449 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15450 }
15451 $$emit$$"# DONE"
15452 %}
15453 ins_encode %{
15454 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15455 $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
15456 %}
15457 ins_pipe(pipe_slow);
15458 %}
15459
15460 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15461 Universe dummy, rFlagsReg cr)
15462 %{
15463 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15464 match(Set dummy (ClearArray (Binary cnt base) val));
15465 ins_cost(125);
15466 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15467
15468 format %{ $$template
15469 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15470 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15471 $$emit$$"jg LARGE\n\t"
15472 $$emit$$"dec rcx\n\t"
15473 $$emit$$"js DONE\t# Zero length\n\t"
15474 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15475 $$emit$$"dec rcx\n\t"
15476 $$emit$$"jge LOOP\n\t"
15477 $$emit$$"jmp DONE\n\t"
15478 $$emit$$"# LARGE:\n\t"
15479 if (UseFastStosb) {
15480 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15481 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15482 } else if (UseXMMForObjInit) {
15483 $$emit$$"mov rdi,rax\n\t"
15484 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15485 $$emit$$"jmpq L_zero_64_bytes\n\t"
15486 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15494 $$emit$$"jl L_tail\n\t"
15495 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15496 $$emit$$"add 0x20,rax\n\t"
15497 $$emit$$"sub 0x4,rcx\n\t"
15498 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15499 $$emit$$"add 0x4,rcx\n\t"
15500 $$emit$$"jle L_end\n\t"
15501 $$emit$$"dec rcx\n\t"
15502 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15503 $$emit$$"vmovq xmm0,(rax)\n\t"
15504 $$emit$$"add 0x8,rax\n\t"
15505 $$emit$$"dec rcx\n\t"
15506 $$emit$$"jge L_sloop\n\t"
15507 $$emit$$"# L_end:\n\t"
15508 } else {
15509 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15510 }
15511 $$emit$$"# DONE"
15512 %}
15513 ins_encode %{
15514 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15515 $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
15516 %}
15517 ins_pipe(pipe_slow);
15518 %}
15519
15520 // Large non-constant length ClearArray for non-AVX512 targets.
15521 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15522 Universe dummy, rFlagsReg cr)
15523 %{
15524 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15525 match(Set dummy (ClearArray (Binary cnt base) val));
15526 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15527
15528 format %{ $$template
15529 if (UseFastStosb) {
15530 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15531 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15532 } else if (UseXMMForObjInit) {
15533 $$emit$$"movdq $tmp, $val\n\t"
15534 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15535 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15536 $$emit$$"jmpq L_zero_64_bytes\n\t"
15537 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15538 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15539 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15540 $$emit$$"add 0x40,rax\n\t"
15541 $$emit$$"# L_zero_64_bytes:\n\t"
15542 $$emit$$"sub 0x8,rcx\n\t"
15543 $$emit$$"jge L_loop\n\t"
15544 $$emit$$"add 0x4,rcx\n\t"
15545 $$emit$$"jl L_tail\n\t"
15546 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15547 $$emit$$"add 0x20,rax\n\t"
15548 $$emit$$"sub 0x4,rcx\n\t"
15549 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15550 $$emit$$"add 0x4,rcx\n\t"
15551 $$emit$$"jle L_end\n\t"
15552 $$emit$$"dec rcx\n\t"
15553 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15554 $$emit$$"vmovq xmm0,(rax)\n\t"
15555 $$emit$$"add 0x8,rax\n\t"
15556 $$emit$$"dec rcx\n\t"
15557 $$emit$$"jge L_sloop\n\t"
15558 $$emit$$"# L_end:\n\t"
15559 } else {
15560 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15561 }
15562 %}
15563 ins_encode %{
15564 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15565 $tmp$$XMMRegister, true, false);
15566 %}
15567 ins_pipe(pipe_slow);
15568 %}
15569
15570 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15571 Universe dummy, rFlagsReg cr)
15572 %{
15573 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15574 match(Set dummy (ClearArray (Binary cnt base) val));
15575 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15576
15577 format %{ $$template
15578 if (UseXMMForObjInit) {
15579 $$emit$$"movdq $tmp, $val\n\t"
15580 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15581 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15582 $$emit$$"jmpq L_zero_64_bytes\n\t"
15583 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15584 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15585 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15586 $$emit$$"add 0x40,rax\n\t"
15587 $$emit$$"# L_zero_64_bytes:\n\t"
15588 $$emit$$"sub 0x8,rcx\n\t"
15589 $$emit$$"jge L_loop\n\t"
15590 $$emit$$"add 0x4,rcx\n\t"
15591 $$emit$$"jl L_tail\n\t"
15592 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15593 $$emit$$"add 0x20,rax\n\t"
15594 $$emit$$"sub 0x4,rcx\n\t"
15595 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15596 $$emit$$"add 0x4,rcx\n\t"
15597 $$emit$$"jle L_end\n\t"
15598 $$emit$$"dec rcx\n\t"
15599 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15600 $$emit$$"vmovq xmm0,(rax)\n\t"
15601 $$emit$$"add 0x8,rax\n\t"
15602 $$emit$$"dec rcx\n\t"
15603 $$emit$$"jge L_sloop\n\t"
15604 $$emit$$"# L_end:\n\t"
15605 } else {
15606 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15607 }
15608 %}
15609 ins_encode %{
15610 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15611 $tmp$$XMMRegister, true, true);
15612 %}
15613 ins_pipe(pipe_slow);
15614 %}
15615
15616 // Large non-constant length ClearArray for AVX512 targets.
15617 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15618 Universe dummy, rFlagsReg cr)
15619 %{
15620 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15621 match(Set dummy (ClearArray (Binary cnt base) val));
15622 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15623
15624 format %{ $$template
15625 if (UseFastStosb) {
15626 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15627 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15628 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15629 } else if (UseXMMForObjInit) {
15630 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15631 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15632 $$emit$$"jmpq L_zero_64_bytes\n\t"
15633 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15634 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15635 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15636 $$emit$$"add 0x40,rax\n\t"
15637 $$emit$$"# L_zero_64_bytes:\n\t"
15638 $$emit$$"sub 0x8,rcx\n\t"
15639 $$emit$$"jge L_loop\n\t"
15640 $$emit$$"add 0x4,rcx\n\t"
15641 $$emit$$"jl L_tail\n\t"
15642 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15643 $$emit$$"add 0x20,rax\n\t"
15644 $$emit$$"sub 0x4,rcx\n\t"
15645 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15646 $$emit$$"add 0x4,rcx\n\t"
15647 $$emit$$"jle L_end\n\t"
15648 $$emit$$"dec rcx\n\t"
15649 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15650 $$emit$$"vmovq xmm0,(rax)\n\t"
15651 $$emit$$"add 0x8,rax\n\t"
15652 $$emit$$"dec rcx\n\t"
15653 $$emit$$"jge L_sloop\n\t"
15654 $$emit$$"# L_end:\n\t"
15655 } else {
15656 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15657 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15658 }
15659 %}
15660 ins_encode %{
15661 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15662 $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
15663 %}
15664 ins_pipe(pipe_slow);
15665 %}
15666
15667 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15668 Universe dummy, rFlagsReg cr)
15669 %{
15670 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15671 match(Set dummy (ClearArray (Binary cnt base) val));
15672 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15673
15674 format %{ $$template
15675 if (UseFastStosb) {
15676 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15677 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15678 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15679 } else if (UseXMMForObjInit) {
15680 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15681 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15682 $$emit$$"jmpq L_zero_64_bytes\n\t"
15683 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15684 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15685 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15686 $$emit$$"add 0x40,rax\n\t"
15687 $$emit$$"# L_zero_64_bytes:\n\t"
15688 $$emit$$"sub 0x8,rcx\n\t"
15689 $$emit$$"jge L_loop\n\t"
15690 $$emit$$"add 0x4,rcx\n\t"
15691 $$emit$$"jl L_tail\n\t"
15692 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15693 $$emit$$"add 0x20,rax\n\t"
15694 $$emit$$"sub 0x4,rcx\n\t"
15695 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15696 $$emit$$"add 0x4,rcx\n\t"
15697 $$emit$$"jle L_end\n\t"
15698 $$emit$$"dec rcx\n\t"
15699 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15700 $$emit$$"vmovq xmm0,(rax)\n\t"
15701 $$emit$$"add 0x8,rax\n\t"
15702 $$emit$$"dec rcx\n\t"
15703 $$emit$$"jge L_sloop\n\t"
15704 $$emit$$"# L_end:\n\t"
15705 } else {
15706 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15707 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15708 }
15709 %}
15710 ins_encode %{
15711 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15712 $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
15713 %}
15714 ins_pipe(pipe_slow);
15715 %}
15716
15717 // Small constant length ClearArray for AVX512 targets.
15718 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
15719 %{
15720 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
15721 ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
15722 match(Set dummy (ClearArray (Binary cnt base) val));
15723 ins_cost(100);
15724 effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
15725 format %{ "clear_mem_imm $base , $cnt \n\t" %}
15726 ins_encode %{
15727 __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15728 %}
15729 ins_pipe(pipe_slow);
15730 %}
15731
15732 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15733 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15734 %{
15735 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15736 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15737 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15738
15739 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15740 ins_encode %{
15741 __ string_compare($str1$$Register, $str2$$Register,
15742 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15743 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15744 %}
15745 ins_pipe( pipe_slow );
15746 %}
15747
17585 effect(USE meth);
17586
17587 ins_cost(300);
17588 format %{ "call_leaf,runtime " %}
17589 ins_encode(clear_avx, Java_To_Runtime(meth));
17590 ins_pipe(pipe_slow);
17591 %}
17592
17593 // Call runtime without safepoint and with vector arguments
17594 instruct CallLeafDirectVector(method meth)
17595 %{
17596 match(CallLeafVector);
17597 effect(USE meth);
17598
17599 ins_cost(300);
17600 format %{ "call_leaf,vector " %}
17601 ins_encode(Java_To_Runtime(meth));
17602 ins_pipe(pipe_slow);
17603 %}
17604
17605 // Call runtime without safepoint
17606 // entry point is null, target holds the address to call
17607 instruct CallLeafNoFPInDirect(rRegP target)
17608 %{
17609 predicate(n->as_Call()->entry_point() == nullptr);
17610 match(CallLeafNoFP target);
17611
17612 ins_cost(300);
17613 format %{ "call_leaf_nofp,runtime indirect " %}
17614 ins_encode %{
17615 __ call($target$$Register);
17616 %}
17617
17618 ins_pipe(pipe_slow);
17619 %}
17620
17621 // Call runtime without safepoint
17622 instruct CallLeafNoFPDirect(method meth)
17623 %{
17624 predicate(n->as_Call()->entry_point() != nullptr);
17625 match(CallLeafNoFP);
17626 effect(USE meth);
17627
17628 ins_cost(300);
17629 format %{ "call_leaf_nofp,runtime " %}
17630 ins_encode(clear_avx, Java_To_Runtime(meth));
17631 ins_pipe(pipe_slow);
17632 %}
17633
17634 // Return Instruction
17635 // Remove the return address & jump to it.
17636 // Notice: We always emit a nop after a ret to make sure there is room
17637 // for safepoint patching
17638 instruct Ret()
17639 %{
17640 match(Return);
17641
17642 format %{ "ret" %}
17643 ins_encode %{
17644 __ ret(0);
|