1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 int offset = 13; // movq r10,#addr; callq (r10)
1653 if (this->ideal_Opcode() != Op_CallLeafVector) {
1654 offset += clear_avx_size();
1655 }
1656 return offset;
1657 }
1658 //
1659 // Compute padding required for nodes which need alignment
1660 //
1661
1662 // The address of the call instruction needs to be 4-byte aligned to
1663 // ensure that it does not span a cache line so that it can be patched.
1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1665 {
1666 current_offset += clear_avx_size(); // skip vzeroupper
1667 current_offset += 1; // skip call opcode byte
1668 return align_up(current_offset, alignment_required()) - current_offset;
1669 }
1670
1671 // The address of the call instruction needs to be 4-byte aligned to
1870 st->print("\n\t");
1871 st->print("# stack alignment check");
1872 #endif
1873 }
1874 if (C->stub_function() != nullptr) {
1875 st->print("\n\t");
1876 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1877 st->print("\n\t");
1878 st->print("je fast_entry\t");
1879 st->print("\n\t");
1880 st->print("call #nmethod_entry_barrier_stub\t");
1881 st->print("\n\tfast_entry:");
1882 }
1883 st->cr();
1884 }
1885 #endif
1886
1887 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1888 Compile* C = ra_->C;
1889
1890 int framesize = C->output()->frame_size_in_bytes();
1891 int bangsize = C->output()->bang_size_in_bytes();
1892
1893 if (C->clinit_barrier_on_entry()) {
1894 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
1895 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
1896
1897 Label L_skip_barrier;
1898 Register klass = rscratch1;
1899
1900 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
1901 __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
1902
1903 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
1904
1905 __ bind(L_skip_barrier);
1906 }
1907
1908 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
1909
1910 C->output()->set_frame_complete(__ offset());
1911
1912 if (C->has_mach_constant_base_node()) {
1913 // NOTE: We set the table base offset here because users might be
1914 // emitted before MachConstantBaseNode.
1915 ConstantTable& constant_table = C->output()->constant_table();
1916 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1917 }
1918 }
1919
1920 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1921 {
1922 return MachNode::size(ra_); // too many variables; just compute it
1923 // the hard way
1924 }
1925
1926 int MachPrologNode::reloc() const
1927 {
1928 return 0; // a large enough number
1929 }
1930
1931 //=============================================================================
1932 #ifndef PRODUCT
1933 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1934 {
1935 Compile* C = ra_->C;
1936 if (generate_vzeroupper(C)) {
1937 st->print("vzeroupper");
1938 st->cr(); st->print("\t");
1939 }
1940
1941 int framesize = C->output()->frame_size_in_bytes();
1942 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1943 // Remove word for return adr already pushed
1944 // and RBP
1952 st->print_cr("popq rbp");
1953 if (do_polling() && C->is_method_compilation()) {
1954 st->print("\t");
1955 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1956 "ja #safepoint_stub\t"
1957 "# Safepoint: poll for GC");
1958 }
1959 }
1960 #endif
1961
1962 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1963 {
1964 Compile* C = ra_->C;
1965
1966 if (generate_vzeroupper(C)) {
1967 // Clear upper bits of YMM registers when current compiled code uses
1968 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1969 __ vzeroupper();
1970 }
1971
1972 int framesize = C->output()->frame_size_in_bytes();
1973 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1974 // Remove word for return adr already pushed
1975 // and RBP
1976 framesize -= 2*wordSize;
1977
1978 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1979
1980 if (framesize) {
1981 __ addq(rsp, framesize);
1982 }
1983
1984 __ popq(rbp);
1985
1986 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1987 __ reserved_stack_check();
1988 }
1989
1990 if (do_polling() && C->is_method_compilation()) {
1991 Label dummy_label;
1992 Label* code_stub = &dummy_label;
1993 if (!C->output()->in_scratch_emit_size()) {
1994 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1995 C->output()->add_stub(stub);
1996 code_stub = &stub->entry();
1997 }
1998 __ relocate(relocInfo::poll_return_type);
1999 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
2000 }
2001 }
2002
2003 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
2004 {
2005 return MachNode::size(ra_); // too many variables; just compute it
2006 // the hard way
2007 }
2008
2009 int MachEpilogNode::reloc() const
2010 {
2011 return 2; // a large enough number
2012 }
2013
2014 const Pipeline* MachEpilogNode::pipeline() const
2015 {
2016 return MachNode::pipeline_class();
2017 }
2018
2019 //=============================================================================
2020
2021 enum RC {
2022 rc_bad,
2023 rc_int,
2024 rc_kreg,
2025 rc_float,
2026 rc_stack
2027 };
2028
2590 #endif
2591
2592 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2593 {
2594 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2595 int reg = ra_->get_encode(this);
2596
2597 __ lea(as_Register(reg), Address(rsp, offset));
2598 }
2599
2600 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2601 {
2602 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2603 if (ra_->get_encode(this) > 15) {
2604 return (offset < 0x80) ? 6 : 9; // REX2
2605 } else {
2606 return (offset < 0x80) ? 5 : 8; // REX
2607 }
2608 }
2609
2610 //=============================================================================
2611 #ifndef PRODUCT
2612 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2613 {
2614 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2615 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2616 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2617 }
2618 #endif
2619
2620 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2621 {
2622 __ ic_check(InteriorEntryAlignment);
2623 }
2624
2625 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
2626 {
2627 return MachNode::size(ra_); // too many variables; just compute it
2628 // the hard way
2629 }
2630
2631
2632 //=============================================================================
2633
2634 bool Matcher::supports_vector_calling_convention(void) {
2635 return EnableVectorSupport;
2636 }
2637
2638 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
2639 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
2640 }
2641
2642 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
2643 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
2644 }
2645
2646 #ifdef ASSERT
2647 static bool is_ndd_demotable(const MachNode* mdef) {
2648 return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
2649 }
2650 #endif
4584 }
4585 __ post_call_nop();
4586 %}
4587
4588 enc_class Java_Dynamic_Call(method meth) %{
4589 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4590 __ post_call_nop();
4591 %}
4592
4593 enc_class call_epilog %{
4594 if (VerifyStackAtCalls) {
4595 // Check that stack depth is unchanged: find majik cookie on stack
4596 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4597 Label L;
4598 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4599 __ jccb(Assembler::equal, L);
4600 // Die if stack mismatch
4601 __ int3();
4602 __ bind(L);
4603 }
4604 %}
4605
4606 %}
4607
4608 //----------FRAME--------------------------------------------------------------
4609 // Definition of frame structure and management information.
4610 //
4611 // S T A C K L A Y O U T Allocators stack-slot number
4612 // | (to get allocators register number
4613 // G Owned by | | v add OptoReg::stack0())
4614 // r CALLER | |
4615 // o | +--------+ pad to even-align allocators stack-slot
4616 // w V | pad0 | numbers; owned by CALLER
4617 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4618 // h ^ | in | 5
4619 // | | args | 4 Holes in incoming args owned by SELF
4620 // | | | | 3
4621 // | | +--------+
4622 // V | | old out| Empty on Intel, window on Sparc
4623 // | old |preserve| Must be even aligned.
5762 %}
5763 %}
5764
5765 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5766 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5767 %{
5768 constraint(ALLOC_IN_RC(ptr_reg));
5769 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5770 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5771
5772 op_cost(10);
5773 format %{"[$reg + $off + $idx << $scale]" %}
5774 interface(MEMORY_INTER) %{
5775 base($reg);
5776 index($idx);
5777 scale($scale);
5778 disp($off);
5779 %}
5780 %}
5781
5782 // Indirect Narrow Oop Plus Offset Operand
5783 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5784 // we can't free r12 even with CompressedOops::base() == nullptr.
5785 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5786 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5787 constraint(ALLOC_IN_RC(ptr_reg));
5788 match(AddP (DecodeN reg) off);
5789
5790 op_cost(10);
5791 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5792 interface(MEMORY_INTER) %{
5793 base(0xc); // R12
5794 index($reg);
5795 scale(0x3);
5796 disp($off);
5797 %}
5798 %}
5799
5800 // Indirect Memory Operand
5801 operand indirectNarrow(rRegN reg)
6271 %}
6272
6273 // Replaces legVec during post-selection cleanup. See above.
6274 operand legVecZ() %{
6275 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6276 match(VecZ);
6277
6278 format %{ %}
6279 interface(REG_INTER);
6280 %}
6281
6282 //----------OPERAND CLASSES----------------------------------------------------
6283 // Operand Classes are groups of operands that are used as to simplify
6284 // instruction definitions by not requiring the AD writer to specify separate
6285 // instructions for every form of operand when the instruction accepts
6286 // multiple operand types with the same basic encoding and format. The classic
6287 // case of this is memory operands.
6288
6289 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6290 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6291 indCompressedOopOffset,
6292 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6293 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6294 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6295
6296 //----------PIPELINE-----------------------------------------------------------
6297 // Rules which define the behavior of the target architectures pipeline.
6298 pipeline %{
6299
6300 //----------ATTRIBUTES---------------------------------------------------------
6301 attributes %{
6302 variable_size_instructions; // Fixed size instructions
6303 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6304 instruction_unit_size = 1; // An instruction is 1 bytes long
6305 instruction_fetch_unit_size = 16; // The processor fetches one line
6306 instruction_fetch_units = 1; // of 16 bytes
6307 %}
6308
6309 //----------RESOURCES----------------------------------------------------------
6310 // Resources are the functional units available to the machine
6311
8901 format %{ "MEMBAR-storestore (empty encoding)" %}
8902 ins_encode( );
8903 ins_pipe(empty);
8904 %}
8905
8906 //----------Move Instructions--------------------------------------------------
8907
8908 instruct castX2P(rRegP dst, rRegL src)
8909 %{
8910 match(Set dst (CastX2P src));
8911
8912 format %{ "movq $dst, $src\t# long->ptr" %}
8913 ins_encode %{
8914 if ($dst$$reg != $src$$reg) {
8915 __ movptr($dst$$Register, $src$$Register);
8916 }
8917 %}
8918 ins_pipe(ialu_reg_reg); // XXX
8919 %}
8920
8921 instruct castP2X(rRegL dst, rRegP src)
8922 %{
8923 match(Set dst (CastP2X src));
8924
8925 format %{ "movq $dst, $src\t# ptr -> long" %}
8926 ins_encode %{
8927 if ($dst$$reg != $src$$reg) {
8928 __ movptr($dst$$Register, $src$$Register);
8929 }
8930 %}
8931 ins_pipe(ialu_reg_reg); // XXX
8932 %}
8933
8934 // Convert oop into int for vectors alignment masking
8935 instruct convP2I(rRegI dst, rRegP src)
8936 %{
8937 match(Set dst (ConvL2I (CastP2X src)));
8938
8939 format %{ "movl $dst, $src\t# ptr -> int" %}
8940 ins_encode %{
14639 effect(DEF dst, USE src);
14640 ins_cost(100);
14641 format %{ "movd $dst,$src\t# MoveI2F" %}
14642 ins_encode %{
14643 __ movdl($dst$$XMMRegister, $src$$Register);
14644 %}
14645 ins_pipe( pipe_slow );
14646 %}
14647
14648 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
14649 match(Set dst (MoveL2D src));
14650 effect(DEF dst, USE src);
14651 ins_cost(100);
14652 format %{ "movd $dst,$src\t# MoveL2D" %}
14653 ins_encode %{
14654 __ movdq($dst$$XMMRegister, $src$$Register);
14655 %}
14656 ins_pipe( pipe_slow );
14657 %}
14658
14659 // Fast clearing of an array
14660 // Small non-constant lenght ClearArray for non-AVX512 targets.
14661 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
14662 Universe dummy, rFlagsReg cr)
14663 %{
14664 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
14665 match(Set dummy (ClearArray cnt base));
14666 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
14667
14668 format %{ $$template
14669 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
14670 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
14671 $$emit$$"jg LARGE\n\t"
14672 $$emit$$"dec rcx\n\t"
14673 $$emit$$"js DONE\t# Zero length\n\t"
14674 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
14675 $$emit$$"dec rcx\n\t"
14676 $$emit$$"jge LOOP\n\t"
14677 $$emit$$"jmp DONE\n\t"
14678 $$emit$$"# LARGE:\n\t"
14679 if (UseFastStosb) {
14680 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
14681 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
14682 } else if (UseXMMForObjInit) {
14683 $$emit$$"mov rdi,rax\n\t"
14684 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
14685 $$emit$$"jmpq L_zero_64_bytes\n\t"
14686 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
14694 $$emit$$"jl L_tail\n\t"
14695 $$emit$$"vmovdqu ymm0,(rax)\n\t"
14696 $$emit$$"add 0x20,rax\n\t"
14697 $$emit$$"sub 0x4,rcx\n\t"
14698 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
14699 $$emit$$"add 0x4,rcx\n\t"
14700 $$emit$$"jle L_end\n\t"
14701 $$emit$$"dec rcx\n\t"
14702 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
14703 $$emit$$"vmovq xmm0,(rax)\n\t"
14704 $$emit$$"add 0x8,rax\n\t"
14705 $$emit$$"dec rcx\n\t"
14706 $$emit$$"jge L_sloop\n\t"
14707 $$emit$$"# L_end:\n\t"
14708 } else {
14709 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
14710 }
14711 $$emit$$"# DONE"
14712 %}
14713 ins_encode %{
14714 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
14715 $tmp$$XMMRegister, false, knoreg);
14716 %}
14717 ins_pipe(pipe_slow);
14718 %}
14719
14720 // Small non-constant length ClearArray for AVX512 targets.
14721 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
14722 Universe dummy, rFlagsReg cr)
14723 %{
14724 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
14725 match(Set dummy (ClearArray cnt base));
14726 ins_cost(125);
14727 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
14728
14729 format %{ $$template
14730 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
14731 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
14732 $$emit$$"jg LARGE\n\t"
14733 $$emit$$"dec rcx\n\t"
14734 $$emit$$"js DONE\t# Zero length\n\t"
14735 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
14736 $$emit$$"dec rcx\n\t"
14737 $$emit$$"jge LOOP\n\t"
14738 $$emit$$"jmp DONE\n\t"
14739 $$emit$$"# LARGE:\n\t"
14740 if (UseFastStosb) {
14741 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
14742 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
14743 } else if (UseXMMForObjInit) {
14744 $$emit$$"mov rdi,rax\n\t"
14745 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
14746 $$emit$$"jmpq L_zero_64_bytes\n\t"
14747 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
14755 $$emit$$"jl L_tail\n\t"
14756 $$emit$$"vmovdqu ymm0,(rax)\n\t"
14757 $$emit$$"add 0x20,rax\n\t"
14758 $$emit$$"sub 0x4,rcx\n\t"
14759 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
14760 $$emit$$"add 0x4,rcx\n\t"
14761 $$emit$$"jle L_end\n\t"
14762 $$emit$$"dec rcx\n\t"
14763 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
14764 $$emit$$"vmovq xmm0,(rax)\n\t"
14765 $$emit$$"add 0x8,rax\n\t"
14766 $$emit$$"dec rcx\n\t"
14767 $$emit$$"jge L_sloop\n\t"
14768 $$emit$$"# L_end:\n\t"
14769 } else {
14770 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
14771 }
14772 $$emit$$"# DONE"
14773 %}
14774 ins_encode %{
14775 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
14776 $tmp$$XMMRegister, false, $ktmp$$KRegister);
14777 %}
14778 ins_pipe(pipe_slow);
14779 %}
14780
14781 // Large non-constant length ClearArray for non-AVX512 targets.
14782 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
14783 Universe dummy, rFlagsReg cr)
14784 %{
14785 predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
14786 match(Set dummy (ClearArray cnt base));
14787 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
14788
14789 format %{ $$template
14790 if (UseFastStosb) {
14791 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
14792 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
14793 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
14794 } else if (UseXMMForObjInit) {
14795 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
14796 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
14797 $$emit$$"jmpq L_zero_64_bytes\n\t"
14798 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
14799 $$emit$$"vmovdqu ymm0,(rax)\n\t"
14800 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
14801 $$emit$$"add 0x40,rax\n\t"
14802 $$emit$$"# L_zero_64_bytes:\n\t"
14803 $$emit$$"sub 0x8,rcx\n\t"
14804 $$emit$$"jge L_loop\n\t"
14805 $$emit$$"add 0x4,rcx\n\t"
14806 $$emit$$"jl L_tail\n\t"
14807 $$emit$$"vmovdqu ymm0,(rax)\n\t"
14808 $$emit$$"add 0x20,rax\n\t"
14809 $$emit$$"sub 0x4,rcx\n\t"
14810 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
14811 $$emit$$"add 0x4,rcx\n\t"
14812 $$emit$$"jle L_end\n\t"
14813 $$emit$$"dec rcx\n\t"
14814 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
14815 $$emit$$"vmovq xmm0,(rax)\n\t"
14816 $$emit$$"add 0x8,rax\n\t"
14817 $$emit$$"dec rcx\n\t"
14818 $$emit$$"jge L_sloop\n\t"
14819 $$emit$$"# L_end:\n\t"
14820 } else {
14821 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
14822 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
14823 }
14824 %}
14825 ins_encode %{
14826 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
14827 $tmp$$XMMRegister, true, knoreg);
14828 %}
14829 ins_pipe(pipe_slow);
14830 %}
14831
14832 // Large non-constant length ClearArray for AVX512 targets.
14833 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
14834 Universe dummy, rFlagsReg cr)
14835 %{
14836 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
14837 match(Set dummy (ClearArray cnt base));
14838 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
14839
14840 format %{ $$template
14841 if (UseFastStosb) {
14842 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
14843 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
14844 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
14845 } else if (UseXMMForObjInit) {
14846 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
14847 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
14848 $$emit$$"jmpq L_zero_64_bytes\n\t"
14849 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
14850 $$emit$$"vmovdqu ymm0,(rax)\n\t"
14851 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
14852 $$emit$$"add 0x40,rax\n\t"
14853 $$emit$$"# L_zero_64_bytes:\n\t"
14854 $$emit$$"sub 0x8,rcx\n\t"
14855 $$emit$$"jge L_loop\n\t"
14856 $$emit$$"add 0x4,rcx\n\t"
14857 $$emit$$"jl L_tail\n\t"
14858 $$emit$$"vmovdqu ymm0,(rax)\n\t"
14859 $$emit$$"add 0x20,rax\n\t"
14860 $$emit$$"sub 0x4,rcx\n\t"
14861 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
14862 $$emit$$"add 0x4,rcx\n\t"
14863 $$emit$$"jle L_end\n\t"
14864 $$emit$$"dec rcx\n\t"
14865 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
14866 $$emit$$"vmovq xmm0,(rax)\n\t"
14867 $$emit$$"add 0x8,rax\n\t"
14868 $$emit$$"dec rcx\n\t"
14869 $$emit$$"jge L_sloop\n\t"
14870 $$emit$$"# L_end:\n\t"
14871 } else {
14872 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
14873 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
14874 }
14875 %}
14876 ins_encode %{
14877 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
14878 $tmp$$XMMRegister, true, $ktmp$$KRegister);
14879 %}
14880 ins_pipe(pipe_slow);
14881 %}
14882
14883 // Small constant length ClearArray for AVX512 targets.
14884 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
14885 %{
14886 predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
14887 match(Set dummy (ClearArray cnt base));
14888 ins_cost(100);
14889 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
14890 format %{ "clear_mem_imm $base , $cnt \n\t" %}
14891 ins_encode %{
14892 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
14893 %}
14894 ins_pipe(pipe_slow);
14895 %}
14896
14897 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
14898 rax_RegI result, legRegD tmp1, rFlagsReg cr)
14899 %{
14900 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
14901 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14902 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14903
14904 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
14905 ins_encode %{
14906 __ string_compare($str1$$Register, $str2$$Register,
14907 $cnt1$$Register, $cnt2$$Register, $result$$Register,
14908 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
14909 %}
14910 ins_pipe( pipe_slow );
14911 %}
14912
16750 effect(USE meth);
16751
16752 ins_cost(300);
16753 format %{ "call_leaf,runtime " %}
16754 ins_encode(clear_avx, Java_To_Runtime(meth));
16755 ins_pipe(pipe_slow);
16756 %}
16757
16758 // Call runtime without safepoint and with vector arguments
16759 instruct CallLeafDirectVector(method meth)
16760 %{
16761 match(CallLeafVector);
16762 effect(USE meth);
16763
16764 ins_cost(300);
16765 format %{ "call_leaf,vector " %}
16766 ins_encode(Java_To_Runtime(meth));
16767 ins_pipe(pipe_slow);
16768 %}
16769
16770 // Call runtime without safepoint
16771 instruct CallLeafNoFPDirect(method meth)
16772 %{
16773 match(CallLeafNoFP);
16774 effect(USE meth);
16775
16776 ins_cost(300);
16777 format %{ "call_leaf_nofp,runtime " %}
16778 ins_encode(clear_avx, Java_To_Runtime(meth));
16779 ins_pipe(pipe_slow);
16780 %}
16781
16782 // Return Instruction
16783 // Remove the return address & jump to it.
16784 // Notice: We always emit a nop after a ret to make sure there is room
16785 // for safepoint patching
16786 instruct Ret()
16787 %{
16788 match(Return);
16789
16790 format %{ "ret" %}
16791 ins_encode %{
16792 __ ret(0);
|
1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 if (_entry_point == nullptr) {
1653 // CallLeafNoFPInDirect
1654 return 3; // callq (register)
1655 }
1656 int offset = 13; // movq r10,#addr; callq (r10)
1657 if (this->ideal_Opcode() != Op_CallLeafVector) {
1658 offset += clear_avx_size();
1659 }
1660 return offset;
1661 }
1662 //
1663 // Compute padding required for nodes which need alignment
1664 //
1665
1666 // The address of the call instruction needs to be 4-byte aligned to
1667 // ensure that it does not span a cache line so that it can be patched.
1668 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1669 {
1670 current_offset += clear_avx_size(); // skip vzeroupper
1671 current_offset += 1; // skip call opcode byte
1672 return align_up(current_offset, alignment_required()) - current_offset;
1673 }
1674
1675 // The address of the call instruction needs to be 4-byte aligned to
1874 st->print("\n\t");
1875 st->print("# stack alignment check");
1876 #endif
1877 }
1878 if (C->stub_function() != nullptr) {
1879 st->print("\n\t");
1880 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1881 st->print("\n\t");
1882 st->print("je fast_entry\t");
1883 st->print("\n\t");
1884 st->print("call #nmethod_entry_barrier_stub\t");
1885 st->print("\n\tfast_entry:");
1886 }
1887 st->cr();
1888 }
1889 #endif
1890
1891 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1892 Compile* C = ra_->C;
1893
1894 __ verified_entry(C);
1895
1896 if (ra_->C->stub_function() == nullptr) {
1897 __ entry_barrier();
1898 }
1899
1900 if (!Compile::current()->output()->in_scratch_emit_size()) {
1901 __ bind(*_verified_entry);
1902 }
1903
1904 C->output()->set_frame_complete(__ offset());
1905
1906 if (C->has_mach_constant_base_node()) {
1907 // NOTE: We set the table base offset here because users might be
1908 // emitted before MachConstantBaseNode.
1909 ConstantTable& constant_table = C->output()->constant_table();
1910 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1911 }
1912 }
1913
1914
1915 int MachPrologNode::reloc() const
1916 {
1917 return 0; // a large enough number
1918 }
1919
1920 //=============================================================================
1921 #ifndef PRODUCT
1922 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1923 {
1924 Compile* C = ra_->C;
1925 if (generate_vzeroupper(C)) {
1926 st->print("vzeroupper");
1927 st->cr(); st->print("\t");
1928 }
1929
1930 int framesize = C->output()->frame_size_in_bytes();
1931 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1932 // Remove word for return adr already pushed
1933 // and RBP
1941 st->print_cr("popq rbp");
1942 if (do_polling() && C->is_method_compilation()) {
1943 st->print("\t");
1944 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1945 "ja #safepoint_stub\t"
1946 "# Safepoint: poll for GC");
1947 }
1948 }
1949 #endif
1950
1951 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1952 {
1953 Compile* C = ra_->C;
1954
1955 if (generate_vzeroupper(C)) {
1956 // Clear upper bits of YMM registers when current compiled code uses
1957 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1958 __ vzeroupper();
1959 }
1960
1961 // Subtract two words to account for return address and rbp
1962 int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
1963 __ remove_frame(initial_framesize, C->needs_stack_repair());
1964
1965 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1966 __ reserved_stack_check();
1967 }
1968
1969 if (do_polling() && C->is_method_compilation()) {
1970 Label dummy_label;
1971 Label* code_stub = &dummy_label;
1972 if (!C->output()->in_scratch_emit_size()) {
1973 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1974 C->output()->add_stub(stub);
1975 code_stub = &stub->entry();
1976 }
1977 __ relocate(relocInfo::poll_return_type);
1978 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
1979 }
1980 }
1981
1982 int MachEpilogNode::reloc() const
1983 {
1984 return 2; // a large enough number
1985 }
1986
1987 const Pipeline* MachEpilogNode::pipeline() const
1988 {
1989 return MachNode::pipeline_class();
1990 }
1991
1992 //=============================================================================
1993
1994 enum RC {
1995 rc_bad,
1996 rc_int,
1997 rc_kreg,
1998 rc_float,
1999 rc_stack
2000 };
2001
2563 #endif
2564
2565 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2566 {
2567 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2568 int reg = ra_->get_encode(this);
2569
2570 __ lea(as_Register(reg), Address(rsp, offset));
2571 }
2572
2573 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2574 {
2575 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2576 if (ra_->get_encode(this) > 15) {
2577 return (offset < 0x80) ? 6 : 9; // REX2
2578 } else {
2579 return (offset < 0x80) ? 5 : 8; // REX
2580 }
2581 }
2582
2583 //=============================================================================
2584 #ifndef PRODUCT
2585 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2586 {
2587 st->print_cr("MachVEPNode");
2588 }
2589 #endif
2590
2591 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2592 {
2593 CodeBuffer* cbuf = masm->code();
2594 if (!_verified) {
2595 __ ic_check(1);
2596 } else {
2597 if (ra_->C->stub_function() == nullptr) {
2598 // Emit the entry barrier in a temporary frame before unpacking because
2599 // it can deopt, which would require packing the scalarized args again.
2600 __ verified_entry(ra_->C, 0);
2601 __ entry_barrier();
2602 int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
2603 __ remove_frame(initial_framesize, false);
2604 }
2605 // Unpack inline type args passed as oop and then jump to
2606 // the verified entry point (skipping the unverified entry).
2607 int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
2608 // Emit code for verified entry and save increment for stack repair on return
2609 __ verified_entry(ra_->C, sp_inc);
2610 if (Compile::current()->output()->in_scratch_emit_size()) {
2611 Label dummy_verified_entry;
2612 __ jmp(dummy_verified_entry);
2613 } else {
2614 __ jmp(*_verified_entry);
2615 }
2616 }
2617 if (ra_->C->stub_function() == nullptr) {
2618 // Pad so that the next call to MachVEPNode::emit() starts out with the
2619 // correct alignment. This is needed by entry_barrier() to align the
2620 // compare. But unfortunately we need to align all 4 MachVEPNodes because
2621 // entry point offsets are computed using scratch_emit_size(), so starting
2622 // alignment must match the alignment of the scratch buffer, otherwise the sizes
2623 // will be off.
2624 __ align(4);
2625 }
2626 }
2627
2628 //=============================================================================
2629 #ifndef PRODUCT
2630 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2631 {
2632 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2633 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2634 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2635 }
2636 #endif
2637
2638 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2639 {
2640 __ ic_check(InteriorEntryAlignment);
2641 }
2642
2643
2644 //=============================================================================
2645
2646 bool Matcher::supports_vector_calling_convention(void) {
2647 return EnableVectorSupport;
2648 }
2649
2650 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
2651 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
2652 }
2653
2654 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
2655 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
2656 }
2657
2658 #ifdef ASSERT
2659 static bool is_ndd_demotable(const MachNode* mdef) {
2660 return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
2661 }
2662 #endif
4596 }
4597 __ post_call_nop();
4598 %}
4599
4600 enc_class Java_Dynamic_Call(method meth) %{
4601 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4602 __ post_call_nop();
4603 %}
4604
4605 enc_class call_epilog %{
4606 if (VerifyStackAtCalls) {
4607 // Check that stack depth is unchanged: find majik cookie on stack
4608 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4609 Label L;
4610 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4611 __ jccb(Assembler::equal, L);
4612 // Die if stack mismatch
4613 __ int3();
4614 __ bind(L);
4615 }
4616 if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) {
4617 // The last return value is not set by the callee but used to pass the null marker to compiled code.
4618 // Search for the corresponding projection, get the register and emit code that initializes it.
4619 uint con = (tf()->range_cc()->cnt() - 1);
4620 for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
4621 ProjNode* proj = fast_out(i)->as_Proj();
4622 if (proj->_con == con) {
4623 // Set null marker if rax is non-null (a non-null value is returned buffered or scalarized)
4624 OptoReg::Name optoReg = ra_->get_reg_first(proj);
4625 VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
4626 Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
4627 __ testq(rax, rax);
4628 __ setb(Assembler::notZero, toReg);
4629 __ movzbl(toReg, toReg);
4630 if (reg->is_stack()) {
4631 int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
4632 __ movq(Address(rsp, st_off), toReg);
4633 }
4634 break;
4635 }
4636 }
4637 if (return_value_is_used()) {
4638 // An inline type is returned as fields in multiple registers.
4639 // Rax either contains an oop if the inline type is buffered or a pointer
4640 // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax
4641 // if the lowest bit is set to allow C2 to use the oop after null checking.
4642 // rax &= (rax & 1) - 1
4643 __ movptr(rscratch1, rax);
4644 __ andptr(rscratch1, 0x1);
4645 __ subptr(rscratch1, 0x1);
4646 __ andptr(rax, rscratch1);
4647 }
4648 }
4649 %}
4650
4651 %}
4652
4653 //----------FRAME--------------------------------------------------------------
4654 // Definition of frame structure and management information.
4655 //
4656 // S T A C K L A Y O U T Allocators stack-slot number
4657 // | (to get allocators register number
4658 // G Owned by | | v add OptoReg::stack0())
4659 // r CALLER | |
4660 // o | +--------+ pad to even-align allocators stack-slot
4661 // w V | pad0 | numbers; owned by CALLER
4662 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4663 // h ^ | in | 5
4664 // | | args | 4 Holes in incoming args owned by SELF
4665 // | | | | 3
4666 // | | +--------+
4667 // V | | old out| Empty on Intel, window on Sparc
4668 // | old |preserve| Must be even aligned.
5807 %}
5808 %}
5809
5810 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5811 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5812 %{
5813 constraint(ALLOC_IN_RC(ptr_reg));
5814 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5815 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5816
5817 op_cost(10);
5818 format %{"[$reg + $off + $idx << $scale]" %}
5819 interface(MEMORY_INTER) %{
5820 base($reg);
5821 index($idx);
5822 scale($scale);
5823 disp($off);
5824 %}
5825 %}
5826
5827 // Indirect Narrow Oop Operand
5828 operand indCompressedOop(rRegN reg) %{
5829 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5830 constraint(ALLOC_IN_RC(ptr_reg));
5831 match(DecodeN reg);
5832
5833 op_cost(10);
5834 format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
5835 interface(MEMORY_INTER) %{
5836 base(0xc); // R12
5837 index($reg);
5838 scale(0x3);
5839 disp(0x0);
5840 %}
5841 %}
5842
5843 // Indirect Narrow Oop Plus Offset Operand
5844 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5845 // we can't free r12 even with CompressedOops::base() == nullptr.
5846 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5847 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5848 constraint(ALLOC_IN_RC(ptr_reg));
5849 match(AddP (DecodeN reg) off);
5850
5851 op_cost(10);
5852 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5853 interface(MEMORY_INTER) %{
5854 base(0xc); // R12
5855 index($reg);
5856 scale(0x3);
5857 disp($off);
5858 %}
5859 %}
5860
5861 // Indirect Memory Operand
5862 operand indirectNarrow(rRegN reg)
6332 %}
6333
6334 // Replaces legVec during post-selection cleanup. See above.
6335 operand legVecZ() %{
6336 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6337 match(VecZ);
6338
6339 format %{ %}
6340 interface(REG_INTER);
6341 %}
6342
6343 //----------OPERAND CLASSES----------------------------------------------------
6344 // Operand Classes are groups of operands that are used as to simplify
6345 // instruction definitions by not requiring the AD writer to specify separate
6346 // instructions for every form of operand when the instruction accepts
6347 // multiple operand types with the same basic encoding and format. The classic
6348 // case of this is memory operands.
6349
6350 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6351 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6352 indCompressedOop, indCompressedOopOffset,
6353 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6354 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6355 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6356
6357 //----------PIPELINE-----------------------------------------------------------
6358 // Rules which define the behavior of the target architectures pipeline.
6359 pipeline %{
6360
6361 //----------ATTRIBUTES---------------------------------------------------------
6362 attributes %{
6363 variable_size_instructions; // Fixed size instructions
6364 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6365 instruction_unit_size = 1; // An instruction is 1 bytes long
6366 instruction_fetch_unit_size = 16; // The processor fetches one line
6367 instruction_fetch_units = 1; // of 16 bytes
6368 %}
6369
6370 //----------RESOURCES----------------------------------------------------------
6371 // Resources are the functional units available to the machine
6372
8962 format %{ "MEMBAR-storestore (empty encoding)" %}
8963 ins_encode( );
8964 ins_pipe(empty);
8965 %}
8966
8967 //----------Move Instructions--------------------------------------------------
8968
8969 instruct castX2P(rRegP dst, rRegL src)
8970 %{
8971 match(Set dst (CastX2P src));
8972
8973 format %{ "movq $dst, $src\t# long->ptr" %}
8974 ins_encode %{
8975 if ($dst$$reg != $src$$reg) {
8976 __ movptr($dst$$Register, $src$$Register);
8977 }
8978 %}
8979 ins_pipe(ialu_reg_reg); // XXX
8980 %}
8981
8982 instruct castI2N(rRegN dst, rRegI src)
8983 %{
8984 match(Set dst (CastI2N src));
8985
8986 format %{ "movq $dst, $src\t# int -> narrow ptr" %}
8987 ins_encode %{
8988 if ($dst$$reg != $src$$reg) {
8989 __ movl($dst$$Register, $src$$Register);
8990 }
8991 %}
8992 ins_pipe(ialu_reg_reg); // XXX
8993 %}
8994
8995 instruct castN2X(rRegL dst, rRegN src)
8996 %{
8997 match(Set dst (CastP2X src));
8998
8999 format %{ "movq $dst, $src\t# ptr -> long" %}
9000 ins_encode %{
9001 if ($dst$$reg != $src$$reg) {
9002 __ movptr($dst$$Register, $src$$Register);
9003 }
9004 %}
9005 ins_pipe(ialu_reg_reg); // XXX
9006 %}
9007
9008 instruct castP2X(rRegL dst, rRegP src)
9009 %{
9010 match(Set dst (CastP2X src));
9011
9012 format %{ "movq $dst, $src\t# ptr -> long" %}
9013 ins_encode %{
9014 if ($dst$$reg != $src$$reg) {
9015 __ movptr($dst$$Register, $src$$Register);
9016 }
9017 %}
9018 ins_pipe(ialu_reg_reg); // XXX
9019 %}
9020
9021 // Convert oop into int for vectors alignment masking
9022 instruct convP2I(rRegI dst, rRegP src)
9023 %{
9024 match(Set dst (ConvL2I (CastP2X src)));
9025
9026 format %{ "movl $dst, $src\t# ptr -> int" %}
9027 ins_encode %{
14726 effect(DEF dst, USE src);
14727 ins_cost(100);
14728 format %{ "movd $dst,$src\t# MoveI2F" %}
14729 ins_encode %{
14730 __ movdl($dst$$XMMRegister, $src$$Register);
14731 %}
14732 ins_pipe( pipe_slow );
14733 %}
14734
14735 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
14736 match(Set dst (MoveL2D src));
14737 effect(DEF dst, USE src);
14738 ins_cost(100);
14739 format %{ "movd $dst,$src\t# MoveL2D" %}
14740 ins_encode %{
14741 __ movdq($dst$$XMMRegister, $src$$Register);
14742 %}
14743 ins_pipe( pipe_slow );
14744 %}
14745
14746
14747 // Fast clearing of an array
14748 // Small non-constant lenght ClearArray for non-AVX512 targets.
14749 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
14750 Universe dummy, rFlagsReg cr)
14751 %{
14752 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
14753 match(Set dummy (ClearArray (Binary cnt base) val));
14754 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
14755
14756 format %{ $$template
14757 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
14758 $$emit$$"jg LARGE\n\t"
14759 $$emit$$"dec rcx\n\t"
14760 $$emit$$"js DONE\t# Zero length\n\t"
14761 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
14762 $$emit$$"dec rcx\n\t"
14763 $$emit$$"jge LOOP\n\t"
14764 $$emit$$"jmp DONE\n\t"
14765 $$emit$$"# LARGE:\n\t"
14766 if (UseFastStosb) {
14767 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
14768 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
14769 } else if (UseXMMForObjInit) {
14770 $$emit$$"movdq $tmp, $val\n\t"
14771 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
14772 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
14773 $$emit$$"jmpq L_zero_64_bytes\n\t"
14774 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
14775 $$emit$$"vmovdqu $tmp,(rax)\n\t"
14776 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
14777 $$emit$$"add 0x40,rax\n\t"
14778 $$emit$$"# L_zero_64_bytes:\n\t"
14779 $$emit$$"sub 0x8,rcx\n\t"
14780 $$emit$$"jge L_loop\n\t"
14781 $$emit$$"add 0x4,rcx\n\t"
14782 $$emit$$"jl L_tail\n\t"
14783 $$emit$$"vmovdqu $tmp,(rax)\n\t"
14784 $$emit$$"add 0x20,rax\n\t"
14785 $$emit$$"sub 0x4,rcx\n\t"
14786 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
14787 $$emit$$"add 0x4,rcx\n\t"
14788 $$emit$$"jle L_end\n\t"
14789 $$emit$$"dec rcx\n\t"
14790 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
14791 $$emit$$"vmovq xmm0,(rax)\n\t"
14792 $$emit$$"add 0x8,rax\n\t"
14793 $$emit$$"dec rcx\n\t"
14794 $$emit$$"jge L_sloop\n\t"
14795 $$emit$$"# L_end:\n\t"
14796 } else {
14797 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
14798 }
14799 $$emit$$"# DONE"
14800 %}
14801 ins_encode %{
14802 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
14803 $tmp$$XMMRegister, false, false);
14804 %}
14805 ins_pipe(pipe_slow);
14806 %}
14807
14808 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
14809 Universe dummy, rFlagsReg cr)
14810 %{
14811 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
14812 match(Set dummy (ClearArray (Binary cnt base) val));
14813 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
14814
14815 format %{ $$template
14816 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
14817 $$emit$$"jg LARGE\n\t"
14818 $$emit$$"dec rcx\n\t"
14819 $$emit$$"js DONE\t# Zero length\n\t"
14820 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
14821 $$emit$$"dec rcx\n\t"
14822 $$emit$$"jge LOOP\n\t"
14823 $$emit$$"jmp DONE\n\t"
14824 $$emit$$"# LARGE:\n\t"
14825 if (UseXMMForObjInit) {
14826 $$emit$$"movdq $tmp, $val\n\t"
14827 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
14828 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
14829 $$emit$$"jmpq L_zero_64_bytes\n\t"
14830 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
14831 $$emit$$"vmovdqu $tmp,(rax)\n\t"
14832 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
14833 $$emit$$"add 0x40,rax\n\t"
14834 $$emit$$"# L_zero_64_bytes:\n\t"
14835 $$emit$$"sub 0x8,rcx\n\t"
14836 $$emit$$"jge L_loop\n\t"
14837 $$emit$$"add 0x4,rcx\n\t"
14838 $$emit$$"jl L_tail\n\t"
14839 $$emit$$"vmovdqu $tmp,(rax)\n\t"
14840 $$emit$$"add 0x20,rax\n\t"
14841 $$emit$$"sub 0x4,rcx\n\t"
14842 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
14843 $$emit$$"add 0x4,rcx\n\t"
14844 $$emit$$"jle L_end\n\t"
14845 $$emit$$"dec rcx\n\t"
14846 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
14847 $$emit$$"vmovq xmm0,(rax)\n\t"
14848 $$emit$$"add 0x8,rax\n\t"
14849 $$emit$$"dec rcx\n\t"
14850 $$emit$$"jge L_sloop\n\t"
14851 $$emit$$"# L_end:\n\t"
14852 } else {
14853 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
14854 }
14855 $$emit$$"# DONE"
14856 %}
14857 ins_encode %{
14858 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
14859 $tmp$$XMMRegister, false, true);
14860 %}
14861 ins_pipe(pipe_slow);
14862 %}
14863
14864 // Small non-constant length ClearArray for AVX512 targets.
14865 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
14866 Universe dummy, rFlagsReg cr)
14867 %{
14868 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
14869 match(Set dummy (ClearArray (Binary cnt base) val));
14870 ins_cost(125);
14871 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
14872
14873 format %{ $$template
14874 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
14875 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
14876 $$emit$$"jg LARGE\n\t"
14877 $$emit$$"dec rcx\n\t"
14878 $$emit$$"js DONE\t# Zero length\n\t"
14879 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
14880 $$emit$$"dec rcx\n\t"
14881 $$emit$$"jge LOOP\n\t"
14882 $$emit$$"jmp DONE\n\t"
14883 $$emit$$"# LARGE:\n\t"
14884 if (UseFastStosb) {
14885 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
14886 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
14887 } else if (UseXMMForObjInit) {
14888 $$emit$$"mov rdi,rax\n\t"
14889 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
14890 $$emit$$"jmpq L_zero_64_bytes\n\t"
14891 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
14899 $$emit$$"jl L_tail\n\t"
14900 $$emit$$"vmovdqu ymm0,(rax)\n\t"
14901 $$emit$$"add 0x20,rax\n\t"
14902 $$emit$$"sub 0x4,rcx\n\t"
14903 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
14904 $$emit$$"add 0x4,rcx\n\t"
14905 $$emit$$"jle L_end\n\t"
14906 $$emit$$"dec rcx\n\t"
14907 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
14908 $$emit$$"vmovq xmm0,(rax)\n\t"
14909 $$emit$$"add 0x8,rax\n\t"
14910 $$emit$$"dec rcx\n\t"
14911 $$emit$$"jge L_sloop\n\t"
14912 $$emit$$"# L_end:\n\t"
14913 } else {
14914 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
14915 }
14916 $$emit$$"# DONE"
14917 %}
14918 ins_encode %{
14919 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
14920 $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
14921 %}
14922 ins_pipe(pipe_slow);
14923 %}
14924
14925 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
14926 Universe dummy, rFlagsReg cr)
14927 %{
14928 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
14929 match(Set dummy (ClearArray (Binary cnt base) val));
14930 ins_cost(125);
14931 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
14932
14933 format %{ $$template
14934 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
14935 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
14936 $$emit$$"jg LARGE\n\t"
14937 $$emit$$"dec rcx\n\t"
14938 $$emit$$"js DONE\t# Zero length\n\t"
14939 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
14940 $$emit$$"dec rcx\n\t"
14941 $$emit$$"jge LOOP\n\t"
14942 $$emit$$"jmp DONE\n\t"
14943 $$emit$$"# LARGE:\n\t"
14944 if (UseFastStosb) {
14945 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
14946 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
14947 } else if (UseXMMForObjInit) {
14948 $$emit$$"mov rdi,rax\n\t"
14949 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
14950 $$emit$$"jmpq L_zero_64_bytes\n\t"
14951 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
14959 $$emit$$"jl L_tail\n\t"
14960 $$emit$$"vmovdqu ymm0,(rax)\n\t"
14961 $$emit$$"add 0x20,rax\n\t"
14962 $$emit$$"sub 0x4,rcx\n\t"
14963 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
14964 $$emit$$"add 0x4,rcx\n\t"
14965 $$emit$$"jle L_end\n\t"
14966 $$emit$$"dec rcx\n\t"
14967 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
14968 $$emit$$"vmovq xmm0,(rax)\n\t"
14969 $$emit$$"add 0x8,rax\n\t"
14970 $$emit$$"dec rcx\n\t"
14971 $$emit$$"jge L_sloop\n\t"
14972 $$emit$$"# L_end:\n\t"
14973 } else {
14974 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
14975 }
14976 $$emit$$"# DONE"
14977 %}
14978 ins_encode %{
14979 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
14980 $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
14981 %}
14982 ins_pipe(pipe_slow);
14983 %}
14984
14985 // Large non-constant length ClearArray for non-AVX512 targets.
14986 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
14987 Universe dummy, rFlagsReg cr)
14988 %{
14989 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
14990 match(Set dummy (ClearArray (Binary cnt base) val));
14991 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
14992
14993 format %{ $$template
14994 if (UseFastStosb) {
14995 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
14996 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
14997 } else if (UseXMMForObjInit) {
14998 $$emit$$"movdq $tmp, $val\n\t"
14999 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15000 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15001 $$emit$$"jmpq L_zero_64_bytes\n\t"
15002 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15003 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15004 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15005 $$emit$$"add 0x40,rax\n\t"
15006 $$emit$$"# L_zero_64_bytes:\n\t"
15007 $$emit$$"sub 0x8,rcx\n\t"
15008 $$emit$$"jge L_loop\n\t"
15009 $$emit$$"add 0x4,rcx\n\t"
15010 $$emit$$"jl L_tail\n\t"
15011 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15012 $$emit$$"add 0x20,rax\n\t"
15013 $$emit$$"sub 0x4,rcx\n\t"
15014 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15015 $$emit$$"add 0x4,rcx\n\t"
15016 $$emit$$"jle L_end\n\t"
15017 $$emit$$"dec rcx\n\t"
15018 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15019 $$emit$$"vmovq xmm0,(rax)\n\t"
15020 $$emit$$"add 0x8,rax\n\t"
15021 $$emit$$"dec rcx\n\t"
15022 $$emit$$"jge L_sloop\n\t"
15023 $$emit$$"# L_end:\n\t"
15024 } else {
15025 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15026 }
15027 %}
15028 ins_encode %{
15029 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15030 $tmp$$XMMRegister, true, false);
15031 %}
15032 ins_pipe(pipe_slow);
15033 %}
15034
15035 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15036 Universe dummy, rFlagsReg cr)
15037 %{
15038 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15039 match(Set dummy (ClearArray (Binary cnt base) val));
15040 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15041
15042 format %{ $$template
15043 if (UseXMMForObjInit) {
15044 $$emit$$"movdq $tmp, $val\n\t"
15045 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15046 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15047 $$emit$$"jmpq L_zero_64_bytes\n\t"
15048 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15049 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15050 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15051 $$emit$$"add 0x40,rax\n\t"
15052 $$emit$$"# L_zero_64_bytes:\n\t"
15053 $$emit$$"sub 0x8,rcx\n\t"
15054 $$emit$$"jge L_loop\n\t"
15055 $$emit$$"add 0x4,rcx\n\t"
15056 $$emit$$"jl L_tail\n\t"
15057 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15058 $$emit$$"add 0x20,rax\n\t"
15059 $$emit$$"sub 0x4,rcx\n\t"
15060 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15061 $$emit$$"add 0x4,rcx\n\t"
15062 $$emit$$"jle L_end\n\t"
15063 $$emit$$"dec rcx\n\t"
15064 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15065 $$emit$$"vmovq xmm0,(rax)\n\t"
15066 $$emit$$"add 0x8,rax\n\t"
15067 $$emit$$"dec rcx\n\t"
15068 $$emit$$"jge L_sloop\n\t"
15069 $$emit$$"# L_end:\n\t"
15070 } else {
15071 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15072 }
15073 %}
15074 ins_encode %{
15075 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15076 $tmp$$XMMRegister, true, true);
15077 %}
15078 ins_pipe(pipe_slow);
15079 %}
15080
15081 // Large non-constant length ClearArray for AVX512 targets.
15082 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15083 Universe dummy, rFlagsReg cr)
15084 %{
15085 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15086 match(Set dummy (ClearArray (Binary cnt base) val));
15087 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15088
15089 format %{ $$template
15090 if (UseFastStosb) {
15091 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15092 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15093 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15094 } else if (UseXMMForObjInit) {
15095 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15096 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15097 $$emit$$"jmpq L_zero_64_bytes\n\t"
15098 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15099 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15100 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15101 $$emit$$"add 0x40,rax\n\t"
15102 $$emit$$"# L_zero_64_bytes:\n\t"
15103 $$emit$$"sub 0x8,rcx\n\t"
15104 $$emit$$"jge L_loop\n\t"
15105 $$emit$$"add 0x4,rcx\n\t"
15106 $$emit$$"jl L_tail\n\t"
15107 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15108 $$emit$$"add 0x20,rax\n\t"
15109 $$emit$$"sub 0x4,rcx\n\t"
15110 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15111 $$emit$$"add 0x4,rcx\n\t"
15112 $$emit$$"jle L_end\n\t"
15113 $$emit$$"dec rcx\n\t"
15114 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15115 $$emit$$"vmovq xmm0,(rax)\n\t"
15116 $$emit$$"add 0x8,rax\n\t"
15117 $$emit$$"dec rcx\n\t"
15118 $$emit$$"jge L_sloop\n\t"
15119 $$emit$$"# L_end:\n\t"
15120 } else {
15121 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15122 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15123 }
15124 %}
15125 ins_encode %{
15126 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15127 $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
15128 %}
15129 ins_pipe(pipe_slow);
15130 %}
15131
15132 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15133 Universe dummy, rFlagsReg cr)
15134 %{
15135 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15136 match(Set dummy (ClearArray (Binary cnt base) val));
15137 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15138
15139 format %{ $$template
15140 if (UseFastStosb) {
15141 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15142 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15143 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15144 } else if (UseXMMForObjInit) {
15145 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15146 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15147 $$emit$$"jmpq L_zero_64_bytes\n\t"
15148 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15149 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15150 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15151 $$emit$$"add 0x40,rax\n\t"
15152 $$emit$$"# L_zero_64_bytes:\n\t"
15153 $$emit$$"sub 0x8,rcx\n\t"
15154 $$emit$$"jge L_loop\n\t"
15155 $$emit$$"add 0x4,rcx\n\t"
15156 $$emit$$"jl L_tail\n\t"
15157 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15158 $$emit$$"add 0x20,rax\n\t"
15159 $$emit$$"sub 0x4,rcx\n\t"
15160 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15161 $$emit$$"add 0x4,rcx\n\t"
15162 $$emit$$"jle L_end\n\t"
15163 $$emit$$"dec rcx\n\t"
15164 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15165 $$emit$$"vmovq xmm0,(rax)\n\t"
15166 $$emit$$"add 0x8,rax\n\t"
15167 $$emit$$"dec rcx\n\t"
15168 $$emit$$"jge L_sloop\n\t"
15169 $$emit$$"# L_end:\n\t"
15170 } else {
15171 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15172 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15173 }
15174 %}
15175 ins_encode %{
15176 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15177 $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
15178 %}
15179 ins_pipe(pipe_slow);
15180 %}
15181
15182 // Small constant length ClearArray for AVX512 targets.
15183 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
15184 %{
15185 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
15186 ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
15187 match(Set dummy (ClearArray (Binary cnt base) val));
15188 ins_cost(100);
15189 effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
15190 format %{ "clear_mem_imm $base , $cnt \n\t" %}
15191 ins_encode %{
15192 __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15193 %}
15194 ins_pipe(pipe_slow);
15195 %}
15196
15197 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15198 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15199 %{
15200 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15201 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15202 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15203
15204 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15205 ins_encode %{
15206 __ string_compare($str1$$Register, $str2$$Register,
15207 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15208 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15209 %}
15210 ins_pipe( pipe_slow );
15211 %}
15212
17050 effect(USE meth);
17051
17052 ins_cost(300);
17053 format %{ "call_leaf,runtime " %}
17054 ins_encode(clear_avx, Java_To_Runtime(meth));
17055 ins_pipe(pipe_slow);
17056 %}
17057
17058 // Call runtime without safepoint and with vector arguments
17059 instruct CallLeafDirectVector(method meth)
17060 %{
17061 match(CallLeafVector);
17062 effect(USE meth);
17063
17064 ins_cost(300);
17065 format %{ "call_leaf,vector " %}
17066 ins_encode(Java_To_Runtime(meth));
17067 ins_pipe(pipe_slow);
17068 %}
17069
17070 // Call runtime without safepoint
17071 // entry point is null, target holds the address to call
17072 instruct CallLeafNoFPInDirect(rRegP target)
17073 %{
17074 predicate(n->as_Call()->entry_point() == nullptr);
17075 match(CallLeafNoFP target);
17076
17077 ins_cost(300);
17078 format %{ "call_leaf_nofp,runtime indirect " %}
17079 ins_encode %{
17080 __ call($target$$Register);
17081 %}
17082
17083 ins_pipe(pipe_slow);
17084 %}
17085
17086 // Call runtime without safepoint
17087 instruct CallLeafNoFPDirect(method meth)
17088 %{
17089 predicate(n->as_Call()->entry_point() != nullptr);
17090 match(CallLeafNoFP);
17091 effect(USE meth);
17092
17093 ins_cost(300);
17094 format %{ "call_leaf_nofp,runtime " %}
17095 ins_encode(clear_avx, Java_To_Runtime(meth));
17096 ins_pipe(pipe_slow);
17097 %}
17098
17099 // Return Instruction
17100 // Remove the return address & jump to it.
17101 // Notice: We always emit a nop after a ret to make sure there is room
17102 // for safepoint patching
17103 instruct Ret()
17104 %{
17105 match(Return);
17106
17107 format %{ "ret" %}
17108 ins_encode %{
17109 __ ret(0);
|