1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 int offset = 13; // movq r10,#addr; callq (r10)
1653 if (this->ideal_Opcode() != Op_CallLeafVector) {
1654 offset += clear_avx_size();
1655 }
1656 return offset;
1657 }
1658 //
1659 // Compute padding required for nodes which need alignment
1660 //
1661
1662 // The address of the call instruction needs to be 4-byte aligned to
1663 // ensure that it does not span a cache line so that it can be patched.
1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1665 {
1666 current_offset += clear_avx_size(); // skip vzeroupper
1667 current_offset += 1; // skip call opcode byte
1668 return align_up(current_offset, alignment_required()) - current_offset;
1669 }
1670
1671 // The address of the call instruction needs to be 4-byte aligned to
1672 // ensure that it does not span a cache line so that it can be patched.
1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
1674 {
1675 current_offset += clear_avx_size(); // skip vzeroupper
1676 current_offset += 11; // skip movq instruction + call opcode byte
1677 return align_up(current_offset, alignment_required()) - current_offset;
1864 st->print("\n\t");
1865 st->print("# stack alignment check");
1866 #endif
1867 }
1868 if (C->stub_function() != nullptr) {
1869 st->print("\n\t");
1870 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1871 st->print("\n\t");
1872 st->print("je fast_entry\t");
1873 st->print("\n\t");
1874 st->print("call #nmethod_entry_barrier_stub\t");
1875 st->print("\n\tfast_entry:");
1876 }
1877 st->cr();
1878 }
1879 #endif
1880
1881 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1882 Compile* C = ra_->C;
1883
1884 int framesize = C->output()->frame_size_in_bytes();
1885 int bangsize = C->output()->bang_size_in_bytes();
1886
1887 if (C->clinit_barrier_on_entry()) {
1888 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
1889 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
1890
1891 Label L_skip_barrier;
1892 Register klass = rscratch1;
1893
1894 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
1895 __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
1896
1897 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
1898
1899 __ bind(L_skip_barrier);
1900 }
1901
1902 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
1903
1904 C->output()->set_frame_complete(__ offset());
1905
1906 if (C->has_mach_constant_base_node()) {
1907 // NOTE: We set the table base offset here because users might be
1908 // emitted before MachConstantBaseNode.
1909 ConstantTable& constant_table = C->output()->constant_table();
1910 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1911 }
1912 }
1913
1914 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1915 {
1916 return MachNode::size(ra_); // too many variables; just compute it
1917 // the hard way
1918 }
1919
1920 int MachPrologNode::reloc() const
1921 {
1922 return 0; // a large enough number
1923 }
1924
1925 //=============================================================================
1926 #ifndef PRODUCT
1927 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1928 {
1929 Compile* C = ra_->C;
1930 if (generate_vzeroupper(C)) {
1931 st->print("vzeroupper");
1932 st->cr(); st->print("\t");
1933 }
1934
1935 int framesize = C->output()->frame_size_in_bytes();
1936 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1937 // Remove word for return adr already pushed
1938 // and RBP
1946 st->print_cr("popq rbp");
1947 if (do_polling() && C->is_method_compilation()) {
1948 st->print("\t");
1949 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1950 "ja #safepoint_stub\t"
1951 "# Safepoint: poll for GC");
1952 }
1953 }
1954 #endif
1955
1956 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1957 {
1958 Compile* C = ra_->C;
1959
1960 if (generate_vzeroupper(C)) {
1961 // Clear upper bits of YMM registers when current compiled code uses
1962 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1963 __ vzeroupper();
1964 }
1965
1966 int framesize = C->output()->frame_size_in_bytes();
1967 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1968 // Remove word for return adr already pushed
1969 // and RBP
1970 framesize -= 2*wordSize;
1971
1972 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1973
1974 if (framesize) {
1975 __ addq(rsp, framesize);
1976 }
1977
1978 __ popq(rbp);
1979
1980 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1981 __ reserved_stack_check();
1982 }
1983
1984 if (do_polling() && C->is_method_compilation()) {
1985 Label dummy_label;
1986 Label* code_stub = &dummy_label;
1987 if (!C->output()->in_scratch_emit_size()) {
1988 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1989 C->output()->add_stub(stub);
1990 code_stub = &stub->entry();
1991 }
1992 __ relocate(relocInfo::poll_return_type);
1993 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
1994 }
1995 }
1996
1997 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
1998 {
1999 return MachNode::size(ra_); // too many variables; just compute it
2000 // the hard way
2001 }
2002
2003 int MachEpilogNode::reloc() const
2004 {
2005 return 2; // a large enough number
2006 }
2007
2008 const Pipeline* MachEpilogNode::pipeline() const
2009 {
2010 return MachNode::pipeline_class();
2011 }
2012
2013 //=============================================================================
2014
2015 enum RC {
2016 rc_bad,
2017 rc_int,
2018 rc_kreg,
2019 rc_float,
2020 rc_stack
2021 };
2022
2584 #endif
2585
2586 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2587 {
2588 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2589 int reg = ra_->get_encode(this);
2590
2591 __ lea(as_Register(reg), Address(rsp, offset));
2592 }
2593
2594 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2595 {
2596 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2597 if (ra_->get_encode(this) > 15) {
2598 return (offset < 0x80) ? 6 : 9; // REX2
2599 } else {
2600 return (offset < 0x80) ? 5 : 8; // REX
2601 }
2602 }
2603
2604 //=============================================================================
2605 #ifndef PRODUCT
2606 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2607 {
2608 if (UseCompressedClassPointers) {
2609 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2610 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2611 } else {
2612 st->print_cr("movq rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2613 st->print_cr("\tcmpq rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2614 }
2615 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2616 }
2617 #endif
2618
2619 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2620 {
2621 __ ic_check(InteriorEntryAlignment);
2622 }
2623
2624 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
2625 {
2626 return MachNode::size(ra_); // too many variables; just compute it
2627 // the hard way
2628 }
2629
2630
2631 //=============================================================================
2632
2633 bool Matcher::supports_vector_calling_convention(void) {
2634 return EnableVectorSupport;
2635 }
2636
2637 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
2638 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
2639 }
2640
2641 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
2642 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
2643 }
2644
2645 #ifdef ASSERT
2646 static bool is_ndd_demotable(const MachNode* mdef) {
2647 return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
2648 }
2649 #endif
4583 }
4584 __ post_call_nop();
4585 %}
4586
4587 enc_class Java_Dynamic_Call(method meth) %{
4588 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4589 __ post_call_nop();
4590 %}
4591
4592 enc_class call_epilog %{
4593 if (VerifyStackAtCalls) {
4594 // Check that stack depth is unchanged: find majik cookie on stack
4595 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4596 Label L;
4597 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4598 __ jccb(Assembler::equal, L);
4599 // Die if stack mismatch
4600 __ int3();
4601 __ bind(L);
4602 }
4603 %}
4604
4605 %}
4606
4607 //----------FRAME--------------------------------------------------------------
4608 // Definition of frame structure and management information.
4609 //
4610 // S T A C K L A Y O U T Allocators stack-slot number
4611 // | (to get allocators register number
4612 // G Owned by | | v add OptoReg::stack0())
4613 // r CALLER | |
4614 // o | +--------+ pad to even-align allocators stack-slot
4615 // w V | pad0 | numbers; owned by CALLER
4616 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4617 // h ^ | in | 5
4618 // | | args | 4 Holes in incoming args owned by SELF
4619 // | | | | 3
4620 // | | +--------+
4621 // V | | old out| Empty on Intel, window on Sparc
4622 // | old |preserve| Must be even aligned.
5761 %}
5762 %}
5763
5764 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5765 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5766 %{
5767 constraint(ALLOC_IN_RC(ptr_reg));
5768 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5769 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5770
5771 op_cost(10);
5772 format %{"[$reg + $off + $idx << $scale]" %}
5773 interface(MEMORY_INTER) %{
5774 base($reg);
5775 index($idx);
5776 scale($scale);
5777 disp($off);
5778 %}
5779 %}
5780
5781 // Indirect Narrow Oop Plus Offset Operand
5782 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5783 // we can't free r12 even with CompressedOops::base() == nullptr.
5784 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5785 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5786 constraint(ALLOC_IN_RC(ptr_reg));
5787 match(AddP (DecodeN reg) off);
5788
5789 op_cost(10);
5790 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5791 interface(MEMORY_INTER) %{
5792 base(0xc); // R12
5793 index($reg);
5794 scale(0x3);
5795 disp($off);
5796 %}
5797 %}
5798
5799 // Indirect Memory Operand
5800 operand indirectNarrow(rRegN reg)
6270 %}
6271
6272 // Replaces legVec during post-selection cleanup. See above.
6273 operand legVecZ() %{
6274 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6275 match(VecZ);
6276
6277 format %{ %}
6278 interface(REG_INTER);
6279 %}
6280
6281 //----------OPERAND CLASSES----------------------------------------------------
6282 // Operand Classes are groups of operands that are used as to simplify
6283 // instruction definitions by not requiring the AD writer to specify separate
6284 // instructions for every form of operand when the instruction accepts
6285 // multiple operand types with the same basic encoding and format. The classic
6286 // case of this is memory operands.
6287
6288 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6289 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6290 indCompressedOopOffset,
6291 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6292 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6293 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6294
6295 //----------PIPELINE-----------------------------------------------------------
6296 // Rules which define the behavior of the target architectures pipeline.
6297 pipeline %{
6298
6299 //----------ATTRIBUTES---------------------------------------------------------
6300 attributes %{
6301 variable_size_instructions; // Fixed size instructions
6302 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6303 instruction_unit_size = 1; // An instruction is 1 bytes long
6304 instruction_fetch_unit_size = 16; // The processor fetches one line
6305 instruction_fetch_units = 1; // of 16 bytes
6306 %}
6307
6308 //----------RESOURCES----------------------------------------------------------
6309 // Resources are the functional units available to the machine
6310
8911 format %{ "MEMBAR-storestore (empty encoding)" %}
8912 ins_encode( );
8913 ins_pipe(empty);
8914 %}
8915
8916 //----------Move Instructions--------------------------------------------------
8917
8918 instruct castX2P(rRegP dst, rRegL src)
8919 %{
8920 match(Set dst (CastX2P src));
8921
8922 format %{ "movq $dst, $src\t# long->ptr" %}
8923 ins_encode %{
8924 if ($dst$$reg != $src$$reg) {
8925 __ movptr($dst$$Register, $src$$Register);
8926 }
8927 %}
8928 ins_pipe(ialu_reg_reg); // XXX
8929 %}
8930
8931 instruct castP2X(rRegL dst, rRegP src)
8932 %{
8933 match(Set dst (CastP2X src));
8934
8935 format %{ "movq $dst, $src\t# ptr -> long" %}
8936 ins_encode %{
8937 if ($dst$$reg != $src$$reg) {
8938 __ movptr($dst$$Register, $src$$Register);
8939 }
8940 %}
8941 ins_pipe(ialu_reg_reg); // XXX
8942 %}
8943
8944 // Convert oop into int for vectors alignment masking
8945 instruct convP2I(rRegI dst, rRegP src)
8946 %{
8947 match(Set dst (ConvL2I (CastP2X src)));
8948
8949 format %{ "movl $dst, $src\t# ptr -> int" %}
8950 ins_encode %{
15198 effect(DEF dst, USE src);
15199 ins_cost(100);
15200 format %{ "movd $dst,$src\t# MoveI2F" %}
15201 ins_encode %{
15202 __ movdl($dst$$XMMRegister, $src$$Register);
15203 %}
15204 ins_pipe( pipe_slow );
15205 %}
15206
15207 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15208 match(Set dst (MoveL2D src));
15209 effect(DEF dst, USE src);
15210 ins_cost(100);
15211 format %{ "movd $dst,$src\t# MoveL2D" %}
15212 ins_encode %{
15213 __ movdq($dst$$XMMRegister, $src$$Register);
15214 %}
15215 ins_pipe( pipe_slow );
15216 %}
15217
15218 // Fast clearing of an array
15219 // Small non-constant lenght ClearArray for non-AVX512 targets.
15220 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15221 Universe dummy, rFlagsReg cr)
15222 %{
15223 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
15224 match(Set dummy (ClearArray cnt base));
15225 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15226
15227 format %{ $$template
15228 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15229 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15230 $$emit$$"jg LARGE\n\t"
15231 $$emit$$"dec rcx\n\t"
15232 $$emit$$"js DONE\t# Zero length\n\t"
15233 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15234 $$emit$$"dec rcx\n\t"
15235 $$emit$$"jge LOOP\n\t"
15236 $$emit$$"jmp DONE\n\t"
15237 $$emit$$"# LARGE:\n\t"
15238 if (UseFastStosb) {
15239 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15240 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15241 } else if (UseXMMForObjInit) {
15242 $$emit$$"mov rdi,rax\n\t"
15243 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15244 $$emit$$"jmpq L_zero_64_bytes\n\t"
15245 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15253 $$emit$$"jl L_tail\n\t"
15254 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15255 $$emit$$"add 0x20,rax\n\t"
15256 $$emit$$"sub 0x4,rcx\n\t"
15257 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15258 $$emit$$"add 0x4,rcx\n\t"
15259 $$emit$$"jle L_end\n\t"
15260 $$emit$$"dec rcx\n\t"
15261 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15262 $$emit$$"vmovq xmm0,(rax)\n\t"
15263 $$emit$$"add 0x8,rax\n\t"
15264 $$emit$$"dec rcx\n\t"
15265 $$emit$$"jge L_sloop\n\t"
15266 $$emit$$"# L_end:\n\t"
15267 } else {
15268 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15269 }
15270 $$emit$$"# DONE"
15271 %}
15272 ins_encode %{
15273 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15274 $tmp$$XMMRegister, false, knoreg);
15275 %}
15276 ins_pipe(pipe_slow);
15277 %}
15278
15279 // Small non-constant length ClearArray for AVX512 targets.
15280 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15281 Universe dummy, rFlagsReg cr)
15282 %{
15283 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15284 match(Set dummy (ClearArray cnt base));
15285 ins_cost(125);
15286 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15287
15288 format %{ $$template
15289 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15290 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15291 $$emit$$"jg LARGE\n\t"
15292 $$emit$$"dec rcx\n\t"
15293 $$emit$$"js DONE\t# Zero length\n\t"
15294 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15295 $$emit$$"dec rcx\n\t"
15296 $$emit$$"jge LOOP\n\t"
15297 $$emit$$"jmp DONE\n\t"
15298 $$emit$$"# LARGE:\n\t"
15299 if (UseFastStosb) {
15300 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15301 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15302 } else if (UseXMMForObjInit) {
15303 $$emit$$"mov rdi,rax\n\t"
15304 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15305 $$emit$$"jmpq L_zero_64_bytes\n\t"
15306 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15314 $$emit$$"jl L_tail\n\t"
15315 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15316 $$emit$$"add 0x20,rax\n\t"
15317 $$emit$$"sub 0x4,rcx\n\t"
15318 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15319 $$emit$$"add 0x4,rcx\n\t"
15320 $$emit$$"jle L_end\n\t"
15321 $$emit$$"dec rcx\n\t"
15322 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15323 $$emit$$"vmovq xmm0,(rax)\n\t"
15324 $$emit$$"add 0x8,rax\n\t"
15325 $$emit$$"dec rcx\n\t"
15326 $$emit$$"jge L_sloop\n\t"
15327 $$emit$$"# L_end:\n\t"
15328 } else {
15329 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15330 }
15331 $$emit$$"# DONE"
15332 %}
15333 ins_encode %{
15334 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15335 $tmp$$XMMRegister, false, $ktmp$$KRegister);
15336 %}
15337 ins_pipe(pipe_slow);
15338 %}
15339
15340 // Large non-constant length ClearArray for non-AVX512 targets.
15341 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15342 Universe dummy, rFlagsReg cr)
15343 %{
15344 predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15345 match(Set dummy (ClearArray cnt base));
15346 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15347
15348 format %{ $$template
15349 if (UseFastStosb) {
15350 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15351 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15352 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15353 } else if (UseXMMForObjInit) {
15354 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15355 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15356 $$emit$$"jmpq L_zero_64_bytes\n\t"
15357 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15358 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15359 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15360 $$emit$$"add 0x40,rax\n\t"
15361 $$emit$$"# L_zero_64_bytes:\n\t"
15362 $$emit$$"sub 0x8,rcx\n\t"
15363 $$emit$$"jge L_loop\n\t"
15364 $$emit$$"add 0x4,rcx\n\t"
15365 $$emit$$"jl L_tail\n\t"
15366 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15367 $$emit$$"add 0x20,rax\n\t"
15368 $$emit$$"sub 0x4,rcx\n\t"
15369 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15370 $$emit$$"add 0x4,rcx\n\t"
15371 $$emit$$"jle L_end\n\t"
15372 $$emit$$"dec rcx\n\t"
15373 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15374 $$emit$$"vmovq xmm0,(rax)\n\t"
15375 $$emit$$"add 0x8,rax\n\t"
15376 $$emit$$"dec rcx\n\t"
15377 $$emit$$"jge L_sloop\n\t"
15378 $$emit$$"# L_end:\n\t"
15379 } else {
15380 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15381 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15382 }
15383 %}
15384 ins_encode %{
15385 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15386 $tmp$$XMMRegister, true, knoreg);
15387 %}
15388 ins_pipe(pipe_slow);
15389 %}
15390
15391 // Large non-constant length ClearArray for AVX512 targets.
15392 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15393 Universe dummy, rFlagsReg cr)
15394 %{
15395 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15396 match(Set dummy (ClearArray cnt base));
15397 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15398
15399 format %{ $$template
15400 if (UseFastStosb) {
15401 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15402 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15403 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15404 } else if (UseXMMForObjInit) {
15405 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15406 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15407 $$emit$$"jmpq L_zero_64_bytes\n\t"
15408 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15409 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15410 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15411 $$emit$$"add 0x40,rax\n\t"
15412 $$emit$$"# L_zero_64_bytes:\n\t"
15413 $$emit$$"sub 0x8,rcx\n\t"
15414 $$emit$$"jge L_loop\n\t"
15415 $$emit$$"add 0x4,rcx\n\t"
15416 $$emit$$"jl L_tail\n\t"
15417 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15418 $$emit$$"add 0x20,rax\n\t"
15419 $$emit$$"sub 0x4,rcx\n\t"
15420 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15421 $$emit$$"add 0x4,rcx\n\t"
15422 $$emit$$"jle L_end\n\t"
15423 $$emit$$"dec rcx\n\t"
15424 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15425 $$emit$$"vmovq xmm0,(rax)\n\t"
15426 $$emit$$"add 0x8,rax\n\t"
15427 $$emit$$"dec rcx\n\t"
15428 $$emit$$"jge L_sloop\n\t"
15429 $$emit$$"# L_end:\n\t"
15430 } else {
15431 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15432 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15433 }
15434 %}
15435 ins_encode %{
15436 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15437 $tmp$$XMMRegister, true, $ktmp$$KRegister);
15438 %}
15439 ins_pipe(pipe_slow);
15440 %}
15441
15442 // Small constant length ClearArray for AVX512 targets.
15443 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15444 %{
15445 predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15446 match(Set dummy (ClearArray cnt base));
15447 ins_cost(100);
15448 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15449 format %{ "clear_mem_imm $base , $cnt \n\t" %}
15450 ins_encode %{
15451 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15452 %}
15453 ins_pipe(pipe_slow);
15454 %}
15455
15456 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15457 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15458 %{
15459 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15460 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15461 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15462
15463 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15464 ins_encode %{
15465 __ string_compare($str1$$Register, $str2$$Register,
15466 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15467 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15468 %}
15469 ins_pipe( pipe_slow );
15470 %}
15471
17309 effect(USE meth);
17310
17311 ins_cost(300);
17312 format %{ "call_leaf,runtime " %}
17313 ins_encode(clear_avx, Java_To_Runtime(meth));
17314 ins_pipe(pipe_slow);
17315 %}
17316
17317 // Call runtime without safepoint and with vector arguments
17318 instruct CallLeafDirectVector(method meth)
17319 %{
17320 match(CallLeafVector);
17321 effect(USE meth);
17322
17323 ins_cost(300);
17324 format %{ "call_leaf,vector " %}
17325 ins_encode(Java_To_Runtime(meth));
17326 ins_pipe(pipe_slow);
17327 %}
17328
17329 // Call runtime without safepoint
17330 instruct CallLeafNoFPDirect(method meth)
17331 %{
17332 match(CallLeafNoFP);
17333 effect(USE meth);
17334
17335 ins_cost(300);
17336 format %{ "call_leaf_nofp,runtime " %}
17337 ins_encode(clear_avx, Java_To_Runtime(meth));
17338 ins_pipe(pipe_slow);
17339 %}
17340
17341 // Return Instruction
17342 // Remove the return address & jump to it.
17343 // Notice: We always emit a nop after a ret to make sure there is room
17344 // for safepoint patching
17345 instruct Ret()
17346 %{
17347 match(Return);
17348
17349 format %{ "ret" %}
17350 ins_encode %{
17351 __ ret(0);
|
1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 if (_entry_point == nullptr) {
1653 // CallLeafNoFPInDirect
1654 return 3; // callq (register)
1655 }
1656 int offset = 13; // movq r10,#addr; callq (r10)
1657 if (this->ideal_Opcode() != Op_CallLeafVector) {
1658 offset += clear_avx_size();
1659 }
1660 return offset;
1661 }
1662
1663 //
1664 // Compute padding required for nodes which need alignment
1665 //
1666
1667 // The address of the call instruction needs to be 4-byte aligned to
1668 // ensure that it does not span a cache line so that it can be patched.
1669 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1670 {
1671 current_offset += clear_avx_size(); // skip vzeroupper
1672 current_offset += 1; // skip call opcode byte
1673 return align_up(current_offset, alignment_required()) - current_offset;
1674 }
1675
1676 // The address of the call instruction needs to be 4-byte aligned to
1677 // ensure that it does not span a cache line so that it can be patched.
1678 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
1679 {
1680 current_offset += clear_avx_size(); // skip vzeroupper
1681 current_offset += 11; // skip movq instruction + call opcode byte
1682 return align_up(current_offset, alignment_required()) - current_offset;
1869 st->print("\n\t");
1870 st->print("# stack alignment check");
1871 #endif
1872 }
1873 if (C->stub_function() != nullptr) {
1874 st->print("\n\t");
1875 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1876 st->print("\n\t");
1877 st->print("je fast_entry\t");
1878 st->print("\n\t");
1879 st->print("call #nmethod_entry_barrier_stub\t");
1880 st->print("\n\tfast_entry:");
1881 }
1882 st->cr();
1883 }
1884 #endif
1885
1886 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1887 Compile* C = ra_->C;
1888
1889 __ verified_entry(C);
1890
1891 if (ra_->C->stub_function() == nullptr) {
1892 __ entry_barrier();
1893 }
1894
1895 if (!Compile::current()->output()->in_scratch_emit_size()) {
1896 __ bind(*_verified_entry);
1897 }
1898
1899 C->output()->set_frame_complete(__ offset());
1900
1901 if (C->has_mach_constant_base_node()) {
1902 // NOTE: We set the table base offset here because users might be
1903 // emitted before MachConstantBaseNode.
1904 ConstantTable& constant_table = C->output()->constant_table();
1905 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1906 }
1907 }
1908
1909
1910 int MachPrologNode::reloc() const
1911 {
1912 return 0; // a large enough number
1913 }
1914
1915 //=============================================================================
1916 #ifndef PRODUCT
1917 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1918 {
1919 Compile* C = ra_->C;
1920 if (generate_vzeroupper(C)) {
1921 st->print("vzeroupper");
1922 st->cr(); st->print("\t");
1923 }
1924
1925 int framesize = C->output()->frame_size_in_bytes();
1926 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1927 // Remove word for return adr already pushed
1928 // and RBP
1936 st->print_cr("popq rbp");
1937 if (do_polling() && C->is_method_compilation()) {
1938 st->print("\t");
1939 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1940 "ja #safepoint_stub\t"
1941 "# Safepoint: poll for GC");
1942 }
1943 }
1944 #endif
1945
1946 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1947 {
1948 Compile* C = ra_->C;
1949
1950 if (generate_vzeroupper(C)) {
1951 // Clear upper bits of YMM registers when current compiled code uses
1952 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1953 __ vzeroupper();
1954 }
1955
1956 // Subtract two words to account for return address and rbp
1957 int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
1958 __ remove_frame(initial_framesize, C->needs_stack_repair());
1959
1960 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1961 __ reserved_stack_check();
1962 }
1963
1964 if (do_polling() && C->is_method_compilation()) {
1965 Label dummy_label;
1966 Label* code_stub = &dummy_label;
1967 if (!C->output()->in_scratch_emit_size()) {
1968 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1969 C->output()->add_stub(stub);
1970 code_stub = &stub->entry();
1971 }
1972 __ relocate(relocInfo::poll_return_type);
1973 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
1974 }
1975 }
1976
1977 int MachEpilogNode::reloc() const
1978 {
1979 return 2; // a large enough number
1980 }
1981
1982 const Pipeline* MachEpilogNode::pipeline() const
1983 {
1984 return MachNode::pipeline_class();
1985 }
1986
1987 //=============================================================================
1988
1989 enum RC {
1990 rc_bad,
1991 rc_int,
1992 rc_kreg,
1993 rc_float,
1994 rc_stack
1995 };
1996
2558 #endif
2559
2560 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2561 {
2562 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2563 int reg = ra_->get_encode(this);
2564
2565 __ lea(as_Register(reg), Address(rsp, offset));
2566 }
2567
2568 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2569 {
2570 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2571 if (ra_->get_encode(this) > 15) {
2572 return (offset < 0x80) ? 6 : 9; // REX2
2573 } else {
2574 return (offset < 0x80) ? 5 : 8; // REX
2575 }
2576 }
2577
2578 //=============================================================================
2579 #ifndef PRODUCT
2580 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2581 {
2582 st->print_cr("MachVEPNode");
2583 }
2584 #endif
2585
2586 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2587 {
2588 CodeBuffer* cbuf = masm->code();
2589 uint insts_size = cbuf->insts_size();
2590 if (!_verified) {
2591 __ ic_check(1);
2592 } else {
2593 // TODO 8284443 Avoid creation of temporary frame
2594 if (ra_->C->stub_function() == nullptr) {
2595 __ verified_entry(ra_->C, 0);
2596 __ entry_barrier();
2597 int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
2598 __ remove_frame(initial_framesize, false);
2599 }
2600 // Unpack inline type args passed as oop and then jump to
2601 // the verified entry point (skipping the unverified entry).
2602 int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
2603 // Emit code for verified entry and save increment for stack repair on return
2604 __ verified_entry(ra_->C, sp_inc);
2605 if (Compile::current()->output()->in_scratch_emit_size()) {
2606 Label dummy_verified_entry;
2607 __ jmp(dummy_verified_entry);
2608 } else {
2609 __ jmp(*_verified_entry);
2610 }
2611 }
2612 /* WARNING these NOPs are critical so that verified entry point is properly
2613 4 bytes aligned for patching by NativeJump::patch_verified_entry() */
2614 int nops_cnt = 4 - ((cbuf->insts_size() - insts_size) & 0x3);
2615 nops_cnt &= 0x3; // Do not add nops if code is aligned.
2616 if (nops_cnt > 0) {
2617 __ nop(nops_cnt);
2618 }
2619 }
2620
2621 //=============================================================================
2622 #ifndef PRODUCT
2623 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2624 {
2625 if (UseCompressedClassPointers) {
2626 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2627 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2628 } else {
2629 st->print_cr("movq rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2630 st->print_cr("\tcmpq rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2631 }
2632 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2633 }
2634 #endif
2635
2636 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2637 {
2638 __ ic_check(InteriorEntryAlignment);
2639 }
2640
2641
2642 //=============================================================================
2643
2644 bool Matcher::supports_vector_calling_convention(void) {
2645 return EnableVectorSupport;
2646 }
2647
2648 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
2649 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
2650 }
2651
2652 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
2653 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
2654 }
2655
2656 #ifdef ASSERT
2657 static bool is_ndd_demotable(const MachNode* mdef) {
2658 return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
2659 }
2660 #endif
4594 }
4595 __ post_call_nop();
4596 %}
4597
4598 enc_class Java_Dynamic_Call(method meth) %{
4599 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4600 __ post_call_nop();
4601 %}
4602
4603 enc_class call_epilog %{
4604 if (VerifyStackAtCalls) {
4605 // Check that stack depth is unchanged: find majik cookie on stack
4606 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4607 Label L;
4608 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4609 __ jccb(Assembler::equal, L);
4610 // Die if stack mismatch
4611 __ int3();
4612 __ bind(L);
4613 }
4614 if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) {
4615 // The last return value is not set by the callee but used to pass the null marker to compiled code.
4616 // Search for the corresponding projection, get the register and emit code that initialized it.
4617 uint con = (tf()->range_cc()->cnt() - 1);
4618 for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
4619 ProjNode* proj = fast_out(i)->as_Proj();
4620 if (proj->_con == con) {
4621 // Set null marker if rax is non-null (a non-null value is returned buffered or scalarized)
4622 OptoReg::Name optoReg = ra_->get_reg_first(proj);
4623 VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
4624 Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
4625 __ testq(rax, rax);
4626 __ setb(Assembler::notZero, toReg);
4627 __ movzbl(toReg, toReg);
4628 if (reg->is_stack()) {
4629 int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
4630 __ movq(Address(rsp, st_off), toReg);
4631 }
4632 break;
4633 }
4634 }
4635 if (return_value_is_used()) {
4636 // An inline type is returned as fields in multiple registers.
4637 // Rax either contains an oop if the inline type is buffered or a pointer
4638 // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax
4639 // if the lowest bit is set to allow C2 to use the oop after null checking.
4640 // rax &= (rax & 1) - 1
4641 __ movptr(rscratch1, rax);
4642 __ andptr(rscratch1, 0x1);
4643 __ subptr(rscratch1, 0x1);
4644 __ andptr(rax, rscratch1);
4645 }
4646 }
4647 %}
4648
4649 %}
4650
4651 //----------FRAME--------------------------------------------------------------
4652 // Definition of frame structure and management information.
4653 //
4654 // S T A C K L A Y O U T Allocators stack-slot number
4655 // | (to get allocators register number
4656 // G Owned by | | v add OptoReg::stack0())
4657 // r CALLER | |
4658 // o | +--------+ pad to even-align allocators stack-slot
4659 // w V | pad0 | numbers; owned by CALLER
4660 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4661 // h ^ | in | 5
4662 // | | args | 4 Holes in incoming args owned by SELF
4663 // | | | | 3
4664 // | | +--------+
4665 // V | | old out| Empty on Intel, window on Sparc
4666 // | old |preserve| Must be even aligned.
5805 %}
5806 %}
5807
5808 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5809 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5810 %{
5811 constraint(ALLOC_IN_RC(ptr_reg));
5812 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5813 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5814
5815 op_cost(10);
5816 format %{"[$reg + $off + $idx << $scale]" %}
5817 interface(MEMORY_INTER) %{
5818 base($reg);
5819 index($idx);
5820 scale($scale);
5821 disp($off);
5822 %}
5823 %}
5824
5825 // Indirect Narrow Oop Operand
5826 operand indCompressedOop(rRegN reg) %{
5827 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5828 constraint(ALLOC_IN_RC(ptr_reg));
5829 match(DecodeN reg);
5830
5831 op_cost(10);
5832 format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
5833 interface(MEMORY_INTER) %{
5834 base(0xc); // R12
5835 index($reg);
5836 scale(0x3);
5837 disp(0x0);
5838 %}
5839 %}
5840
5841 // Indirect Narrow Oop Plus Offset Operand
5842 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5843 // we can't free r12 even with CompressedOops::base() == nullptr.
5844 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5845 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5846 constraint(ALLOC_IN_RC(ptr_reg));
5847 match(AddP (DecodeN reg) off);
5848
5849 op_cost(10);
5850 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5851 interface(MEMORY_INTER) %{
5852 base(0xc); // R12
5853 index($reg);
5854 scale(0x3);
5855 disp($off);
5856 %}
5857 %}
5858
5859 // Indirect Memory Operand
5860 operand indirectNarrow(rRegN reg)
6330 %}
6331
6332 // Replaces legVec during post-selection cleanup. See above.
6333 operand legVecZ() %{
6334 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6335 match(VecZ);
6336
6337 format %{ %}
6338 interface(REG_INTER);
6339 %}
6340
6341 //----------OPERAND CLASSES----------------------------------------------------
6342 // Operand Classes are groups of operands that are used as to simplify
6343 // instruction definitions by not requiring the AD writer to specify separate
6344 // instructions for every form of operand when the instruction accepts
6345 // multiple operand types with the same basic encoding and format. The classic
6346 // case of this is memory operands.
6347
6348 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6349 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6350 indCompressedOop, indCompressedOopOffset,
6351 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6352 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6353 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6354
6355 //----------PIPELINE-----------------------------------------------------------
6356 // Rules which define the behavior of the target architectures pipeline.
6357 pipeline %{
6358
6359 //----------ATTRIBUTES---------------------------------------------------------
6360 attributes %{
6361 variable_size_instructions; // Fixed size instructions
6362 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6363 instruction_unit_size = 1; // An instruction is 1 bytes long
6364 instruction_fetch_unit_size = 16; // The processor fetches one line
6365 instruction_fetch_units = 1; // of 16 bytes
6366 %}
6367
6368 //----------RESOURCES----------------------------------------------------------
6369 // Resources are the functional units available to the machine
6370
8971 format %{ "MEMBAR-storestore (empty encoding)" %}
8972 ins_encode( );
8973 ins_pipe(empty);
8974 %}
8975
8976 //----------Move Instructions--------------------------------------------------
8977
8978 instruct castX2P(rRegP dst, rRegL src)
8979 %{
8980 match(Set dst (CastX2P src));
8981
8982 format %{ "movq $dst, $src\t# long->ptr" %}
8983 ins_encode %{
8984 if ($dst$$reg != $src$$reg) {
8985 __ movptr($dst$$Register, $src$$Register);
8986 }
8987 %}
8988 ins_pipe(ialu_reg_reg); // XXX
8989 %}
8990
8991 instruct castI2N(rRegN dst, rRegI src)
8992 %{
8993 match(Set dst (CastI2N src));
8994
8995 format %{ "movq $dst, $src\t# int -> narrow ptr" %}
8996 ins_encode %{
8997 if ($dst$$reg != $src$$reg) {
8998 __ movl($dst$$Register, $src$$Register);
8999 }
9000 %}
9001 ins_pipe(ialu_reg_reg); // XXX
9002 %}
9003
9004 instruct castN2X(rRegL dst, rRegN src)
9005 %{
9006 match(Set dst (CastP2X src));
9007
9008 format %{ "movq $dst, $src\t# ptr -> long" %}
9009 ins_encode %{
9010 if ($dst$$reg != $src$$reg) {
9011 __ movptr($dst$$Register, $src$$Register);
9012 }
9013 %}
9014 ins_pipe(ialu_reg_reg); // XXX
9015 %}
9016
9017 instruct castP2X(rRegL dst, rRegP src)
9018 %{
9019 match(Set dst (CastP2X src));
9020
9021 format %{ "movq $dst, $src\t# ptr -> long" %}
9022 ins_encode %{
9023 if ($dst$$reg != $src$$reg) {
9024 __ movptr($dst$$Register, $src$$Register);
9025 }
9026 %}
9027 ins_pipe(ialu_reg_reg); // XXX
9028 %}
9029
9030 // Convert oop into int for vectors alignment masking
9031 instruct convP2I(rRegI dst, rRegP src)
9032 %{
9033 match(Set dst (ConvL2I (CastP2X src)));
9034
9035 format %{ "movl $dst, $src\t# ptr -> int" %}
9036 ins_encode %{
15284 effect(DEF dst, USE src);
15285 ins_cost(100);
15286 format %{ "movd $dst,$src\t# MoveI2F" %}
15287 ins_encode %{
15288 __ movdl($dst$$XMMRegister, $src$$Register);
15289 %}
15290 ins_pipe( pipe_slow );
15291 %}
15292
15293 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15294 match(Set dst (MoveL2D src));
15295 effect(DEF dst, USE src);
15296 ins_cost(100);
15297 format %{ "movd $dst,$src\t# MoveL2D" %}
15298 ins_encode %{
15299 __ movdq($dst$$XMMRegister, $src$$Register);
15300 %}
15301 ins_pipe( pipe_slow );
15302 %}
15303
15304
15305 // Fast clearing of an array
15306 // Small non-constant lenght ClearArray for non-AVX512 targets.
15307 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15308 Universe dummy, rFlagsReg cr)
15309 %{
15310 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15311 match(Set dummy (ClearArray (Binary cnt base) val));
15312 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15313
15314 format %{ $$template
15315 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15316 $$emit$$"jg LARGE\n\t"
15317 $$emit$$"dec rcx\n\t"
15318 $$emit$$"js DONE\t# Zero length\n\t"
15319 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15320 $$emit$$"dec rcx\n\t"
15321 $$emit$$"jge LOOP\n\t"
15322 $$emit$$"jmp DONE\n\t"
15323 $$emit$$"# LARGE:\n\t"
15324 if (UseFastStosb) {
15325 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15326 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15327 } else if (UseXMMForObjInit) {
15328 $$emit$$"movdq $tmp, $val\n\t"
15329 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15330 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15331 $$emit$$"jmpq L_zero_64_bytes\n\t"
15332 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15333 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15334 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15335 $$emit$$"add 0x40,rax\n\t"
15336 $$emit$$"# L_zero_64_bytes:\n\t"
15337 $$emit$$"sub 0x8,rcx\n\t"
15338 $$emit$$"jge L_loop\n\t"
15339 $$emit$$"add 0x4,rcx\n\t"
15340 $$emit$$"jl L_tail\n\t"
15341 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15342 $$emit$$"add 0x20,rax\n\t"
15343 $$emit$$"sub 0x4,rcx\n\t"
15344 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15345 $$emit$$"add 0x4,rcx\n\t"
15346 $$emit$$"jle L_end\n\t"
15347 $$emit$$"dec rcx\n\t"
15348 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15349 $$emit$$"vmovq xmm0,(rax)\n\t"
15350 $$emit$$"add 0x8,rax\n\t"
15351 $$emit$$"dec rcx\n\t"
15352 $$emit$$"jge L_sloop\n\t"
15353 $$emit$$"# L_end:\n\t"
15354 } else {
15355 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15356 }
15357 $$emit$$"# DONE"
15358 %}
15359 ins_encode %{
15360 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15361 $tmp$$XMMRegister, false, false);
15362 %}
15363 ins_pipe(pipe_slow);
15364 %}
15365
15366 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15367 Universe dummy, rFlagsReg cr)
15368 %{
15369 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15370 match(Set dummy (ClearArray (Binary cnt base) val));
15371 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15372
15373 format %{ $$template
15374 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15375 $$emit$$"jg LARGE\n\t"
15376 $$emit$$"dec rcx\n\t"
15377 $$emit$$"js DONE\t# Zero length\n\t"
15378 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15379 $$emit$$"dec rcx\n\t"
15380 $$emit$$"jge LOOP\n\t"
15381 $$emit$$"jmp DONE\n\t"
15382 $$emit$$"# LARGE:\n\t"
15383 if (UseXMMForObjInit) {
15384 $$emit$$"movdq $tmp, $val\n\t"
15385 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15386 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15387 $$emit$$"jmpq L_zero_64_bytes\n\t"
15388 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15389 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15390 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15391 $$emit$$"add 0x40,rax\n\t"
15392 $$emit$$"# L_zero_64_bytes:\n\t"
15393 $$emit$$"sub 0x8,rcx\n\t"
15394 $$emit$$"jge L_loop\n\t"
15395 $$emit$$"add 0x4,rcx\n\t"
15396 $$emit$$"jl L_tail\n\t"
15397 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15398 $$emit$$"add 0x20,rax\n\t"
15399 $$emit$$"sub 0x4,rcx\n\t"
15400 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15401 $$emit$$"add 0x4,rcx\n\t"
15402 $$emit$$"jle L_end\n\t"
15403 $$emit$$"dec rcx\n\t"
15404 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15405 $$emit$$"vmovq xmm0,(rax)\n\t"
15406 $$emit$$"add 0x8,rax\n\t"
15407 $$emit$$"dec rcx\n\t"
15408 $$emit$$"jge L_sloop\n\t"
15409 $$emit$$"# L_end:\n\t"
15410 } else {
15411 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15412 }
15413 $$emit$$"# DONE"
15414 %}
15415 ins_encode %{
15416 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15417 $tmp$$XMMRegister, false, true);
15418 %}
15419 ins_pipe(pipe_slow);
15420 %}
15421
15422 // Small non-constant length ClearArray for AVX512 targets.
15423 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15424 Universe dummy, rFlagsReg cr)
15425 %{
15426 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15427 match(Set dummy (ClearArray (Binary cnt base) val));
15428 ins_cost(125);
15429 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15430
15431 format %{ $$template
15432 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15433 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15434 $$emit$$"jg LARGE\n\t"
15435 $$emit$$"dec rcx\n\t"
15436 $$emit$$"js DONE\t# Zero length\n\t"
15437 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15438 $$emit$$"dec rcx\n\t"
15439 $$emit$$"jge LOOP\n\t"
15440 $$emit$$"jmp DONE\n\t"
15441 $$emit$$"# LARGE:\n\t"
15442 if (UseFastStosb) {
15443 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15444 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15445 } else if (UseXMMForObjInit) {
15446 $$emit$$"mov rdi,rax\n\t"
15447 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15448 $$emit$$"jmpq L_zero_64_bytes\n\t"
15449 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15457 $$emit$$"jl L_tail\n\t"
15458 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15459 $$emit$$"add 0x20,rax\n\t"
15460 $$emit$$"sub 0x4,rcx\n\t"
15461 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15462 $$emit$$"add 0x4,rcx\n\t"
15463 $$emit$$"jle L_end\n\t"
15464 $$emit$$"dec rcx\n\t"
15465 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15466 $$emit$$"vmovq xmm0,(rax)\n\t"
15467 $$emit$$"add 0x8,rax\n\t"
15468 $$emit$$"dec rcx\n\t"
15469 $$emit$$"jge L_sloop\n\t"
15470 $$emit$$"# L_end:\n\t"
15471 } else {
15472 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15473 }
15474 $$emit$$"# DONE"
15475 %}
15476 ins_encode %{
15477 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15478 $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
15479 %}
15480 ins_pipe(pipe_slow);
15481 %}
15482
15483 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15484 Universe dummy, rFlagsReg cr)
15485 %{
15486 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15487 match(Set dummy (ClearArray (Binary cnt base) val));
15488 ins_cost(125);
15489 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15490
15491 format %{ $$template
15492 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15493 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15494 $$emit$$"jg LARGE\n\t"
15495 $$emit$$"dec rcx\n\t"
15496 $$emit$$"js DONE\t# Zero length\n\t"
15497 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15498 $$emit$$"dec rcx\n\t"
15499 $$emit$$"jge LOOP\n\t"
15500 $$emit$$"jmp DONE\n\t"
15501 $$emit$$"# LARGE:\n\t"
15502 if (UseFastStosb) {
15503 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15504 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15505 } else if (UseXMMForObjInit) {
15506 $$emit$$"mov rdi,rax\n\t"
15507 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15508 $$emit$$"jmpq L_zero_64_bytes\n\t"
15509 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15517 $$emit$$"jl L_tail\n\t"
15518 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15519 $$emit$$"add 0x20,rax\n\t"
15520 $$emit$$"sub 0x4,rcx\n\t"
15521 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15522 $$emit$$"add 0x4,rcx\n\t"
15523 $$emit$$"jle L_end\n\t"
15524 $$emit$$"dec rcx\n\t"
15525 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15526 $$emit$$"vmovq xmm0,(rax)\n\t"
15527 $$emit$$"add 0x8,rax\n\t"
15528 $$emit$$"dec rcx\n\t"
15529 $$emit$$"jge L_sloop\n\t"
15530 $$emit$$"# L_end:\n\t"
15531 } else {
15532 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15533 }
15534 $$emit$$"# DONE"
15535 %}
15536 ins_encode %{
15537 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15538 $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
15539 %}
15540 ins_pipe(pipe_slow);
15541 %}
15542
15543 // Large non-constant length ClearArray for non-AVX512 targets.
15544 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15545 Universe dummy, rFlagsReg cr)
15546 %{
15547 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15548 match(Set dummy (ClearArray (Binary cnt base) val));
15549 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15550
15551 format %{ $$template
15552 if (UseFastStosb) {
15553 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15554 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15555 } else if (UseXMMForObjInit) {
15556 $$emit$$"movdq $tmp, $val\n\t"
15557 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15558 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15559 $$emit$$"jmpq L_zero_64_bytes\n\t"
15560 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15561 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15562 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15563 $$emit$$"add 0x40,rax\n\t"
15564 $$emit$$"# L_zero_64_bytes:\n\t"
15565 $$emit$$"sub 0x8,rcx\n\t"
15566 $$emit$$"jge L_loop\n\t"
15567 $$emit$$"add 0x4,rcx\n\t"
15568 $$emit$$"jl L_tail\n\t"
15569 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15570 $$emit$$"add 0x20,rax\n\t"
15571 $$emit$$"sub 0x4,rcx\n\t"
15572 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15573 $$emit$$"add 0x4,rcx\n\t"
15574 $$emit$$"jle L_end\n\t"
15575 $$emit$$"dec rcx\n\t"
15576 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15577 $$emit$$"vmovq xmm0,(rax)\n\t"
15578 $$emit$$"add 0x8,rax\n\t"
15579 $$emit$$"dec rcx\n\t"
15580 $$emit$$"jge L_sloop\n\t"
15581 $$emit$$"# L_end:\n\t"
15582 } else {
15583 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15584 }
15585 %}
15586 ins_encode %{
15587 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15588 $tmp$$XMMRegister, true, false);
15589 %}
15590 ins_pipe(pipe_slow);
15591 %}
15592
15593 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15594 Universe dummy, rFlagsReg cr)
15595 %{
15596 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15597 match(Set dummy (ClearArray (Binary cnt base) val));
15598 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15599
15600 format %{ $$template
15601 if (UseXMMForObjInit) {
15602 $$emit$$"movdq $tmp, $val\n\t"
15603 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15604 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15605 $$emit$$"jmpq L_zero_64_bytes\n\t"
15606 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15607 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15608 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15609 $$emit$$"add 0x40,rax\n\t"
15610 $$emit$$"# L_zero_64_bytes:\n\t"
15611 $$emit$$"sub 0x8,rcx\n\t"
15612 $$emit$$"jge L_loop\n\t"
15613 $$emit$$"add 0x4,rcx\n\t"
15614 $$emit$$"jl L_tail\n\t"
15615 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15616 $$emit$$"add 0x20,rax\n\t"
15617 $$emit$$"sub 0x4,rcx\n\t"
15618 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15619 $$emit$$"add 0x4,rcx\n\t"
15620 $$emit$$"jle L_end\n\t"
15621 $$emit$$"dec rcx\n\t"
15622 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15623 $$emit$$"vmovq xmm0,(rax)\n\t"
15624 $$emit$$"add 0x8,rax\n\t"
15625 $$emit$$"dec rcx\n\t"
15626 $$emit$$"jge L_sloop\n\t"
15627 $$emit$$"# L_end:\n\t"
15628 } else {
15629 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15630 }
15631 %}
15632 ins_encode %{
15633 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15634 $tmp$$XMMRegister, true, true);
15635 %}
15636 ins_pipe(pipe_slow);
15637 %}
15638
15639 // Large non-constant length ClearArray for AVX512 targets.
15640 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15641 Universe dummy, rFlagsReg cr)
15642 %{
15643 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15644 match(Set dummy (ClearArray (Binary cnt base) val));
15645 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15646
15647 format %{ $$template
15648 if (UseFastStosb) {
15649 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15650 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15651 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15652 } else if (UseXMMForObjInit) {
15653 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15654 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15655 $$emit$$"jmpq L_zero_64_bytes\n\t"
15656 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15657 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15658 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15659 $$emit$$"add 0x40,rax\n\t"
15660 $$emit$$"# L_zero_64_bytes:\n\t"
15661 $$emit$$"sub 0x8,rcx\n\t"
15662 $$emit$$"jge L_loop\n\t"
15663 $$emit$$"add 0x4,rcx\n\t"
15664 $$emit$$"jl L_tail\n\t"
15665 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15666 $$emit$$"add 0x20,rax\n\t"
15667 $$emit$$"sub 0x4,rcx\n\t"
15668 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15669 $$emit$$"add 0x4,rcx\n\t"
15670 $$emit$$"jle L_end\n\t"
15671 $$emit$$"dec rcx\n\t"
15672 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15673 $$emit$$"vmovq xmm0,(rax)\n\t"
15674 $$emit$$"add 0x8,rax\n\t"
15675 $$emit$$"dec rcx\n\t"
15676 $$emit$$"jge L_sloop\n\t"
15677 $$emit$$"# L_end:\n\t"
15678 } else {
15679 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15680 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15681 }
15682 %}
15683 ins_encode %{
15684 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15685 $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
15686 %}
15687 ins_pipe(pipe_slow);
15688 %}
15689
15690 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15691 Universe dummy, rFlagsReg cr)
15692 %{
15693 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15694 match(Set dummy (ClearArray (Binary cnt base) val));
15695 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15696
15697 format %{ $$template
15698 if (UseFastStosb) {
15699 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15700 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15701 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15702 } else if (UseXMMForObjInit) {
15703 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15704 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15705 $$emit$$"jmpq L_zero_64_bytes\n\t"
15706 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15707 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15708 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15709 $$emit$$"add 0x40,rax\n\t"
15710 $$emit$$"# L_zero_64_bytes:\n\t"
15711 $$emit$$"sub 0x8,rcx\n\t"
15712 $$emit$$"jge L_loop\n\t"
15713 $$emit$$"add 0x4,rcx\n\t"
15714 $$emit$$"jl L_tail\n\t"
15715 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15716 $$emit$$"add 0x20,rax\n\t"
15717 $$emit$$"sub 0x4,rcx\n\t"
15718 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15719 $$emit$$"add 0x4,rcx\n\t"
15720 $$emit$$"jle L_end\n\t"
15721 $$emit$$"dec rcx\n\t"
15722 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15723 $$emit$$"vmovq xmm0,(rax)\n\t"
15724 $$emit$$"add 0x8,rax\n\t"
15725 $$emit$$"dec rcx\n\t"
15726 $$emit$$"jge L_sloop\n\t"
15727 $$emit$$"# L_end:\n\t"
15728 } else {
15729 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15730 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15731 }
15732 %}
15733 ins_encode %{
15734 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15735 $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
15736 %}
15737 ins_pipe(pipe_slow);
15738 %}
15739
15740 // Small constant length ClearArray for AVX512 targets.
15741 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
15742 %{
15743 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
15744 ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
15745 match(Set dummy (ClearArray (Binary cnt base) val));
15746 ins_cost(100);
15747 effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
15748 format %{ "clear_mem_imm $base , $cnt \n\t" %}
15749 ins_encode %{
15750 __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15751 %}
15752 ins_pipe(pipe_slow);
15753 %}
15754
15755 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15756 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15757 %{
15758 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15759 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15760 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15761
15762 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15763 ins_encode %{
15764 __ string_compare($str1$$Register, $str2$$Register,
15765 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15766 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15767 %}
15768 ins_pipe( pipe_slow );
15769 %}
15770
17608 effect(USE meth);
17609
17610 ins_cost(300);
17611 format %{ "call_leaf,runtime " %}
17612 ins_encode(clear_avx, Java_To_Runtime(meth));
17613 ins_pipe(pipe_slow);
17614 %}
17615
17616 // Call runtime without safepoint and with vector arguments
17617 instruct CallLeafDirectVector(method meth)
17618 %{
17619 match(CallLeafVector);
17620 effect(USE meth);
17621
17622 ins_cost(300);
17623 format %{ "call_leaf,vector " %}
17624 ins_encode(Java_To_Runtime(meth));
17625 ins_pipe(pipe_slow);
17626 %}
17627
17628 // Call runtime without safepoint
17629 // entry point is null, target holds the address to call
17630 instruct CallLeafNoFPInDirect(rRegP target)
17631 %{
17632 predicate(n->as_Call()->entry_point() == nullptr);
17633 match(CallLeafNoFP target);
17634
17635 ins_cost(300);
17636 format %{ "call_leaf_nofp,runtime indirect " %}
17637 ins_encode %{
17638 __ call($target$$Register);
17639 %}
17640
17641 ins_pipe(pipe_slow);
17642 %}
17643
17644 // Call runtime without safepoint
17645 instruct CallLeafNoFPDirect(method meth)
17646 %{
17647 predicate(n->as_Call()->entry_point() != nullptr);
17648 match(CallLeafNoFP);
17649 effect(USE meth);
17650
17651 ins_cost(300);
17652 format %{ "call_leaf_nofp,runtime " %}
17653 ins_encode(clear_avx, Java_To_Runtime(meth));
17654 ins_pipe(pipe_slow);
17655 %}
17656
17657 // Return Instruction
17658 // Remove the return address & jump to it.
17659 // Notice: We always emit a nop after a ret to make sure there is room
17660 // for safepoint patching
17661 instruct Ret()
17662 %{
17663 match(Return);
17664
17665 format %{ "ret" %}
17666 ins_encode %{
17667 __ ret(0);
|