1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 int offset = 13; // movq r10,#addr; callq (r10)
1653 if (this->ideal_Opcode() != Op_CallLeafVector) {
1654 offset += clear_avx_size();
1655 }
1656 return offset;
1657 }
1658 //
1659 // Compute padding required for nodes which need alignment
1660 //
1661
1662 // The address of the call instruction needs to be 4-byte aligned to
1663 // ensure that it does not span a cache line so that it can be patched.
1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1665 {
1666 current_offset += clear_avx_size(); // skip vzeroupper
1667 current_offset += 1; // skip call opcode byte
1668 return align_up(current_offset, alignment_required()) - current_offset;
1669 }
1670
1671 // The address of the call instruction needs to be 4-byte aligned to
1672 // ensure that it does not span a cache line so that it can be patched.
1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
1674 {
1675 current_offset += clear_avx_size(); // skip vzeroupper
1676 current_offset += 11; // skip movq instruction + call opcode byte
1677 return align_up(current_offset, alignment_required()) - current_offset;
1863 st->print("\n\t");
1864 st->print("# stack alignment check");
1865 #endif
1866 }
1867 if (C->stub_function() != nullptr) {
1868 st->print("\n\t");
1869 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1870 st->print("\n\t");
1871 st->print("je fast_entry\t");
1872 st->print("\n\t");
1873 st->print("call #nmethod_entry_barrier_stub\t");
1874 st->print("\n\tfast_entry:");
1875 }
1876 st->cr();
1877 }
1878 #endif
1879
1880 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1881 Compile* C = ra_->C;
1882
1883 int framesize = C->output()->frame_size_in_bytes();
1884 int bangsize = C->output()->bang_size_in_bytes();
1885
1886 if (C->clinit_barrier_on_entry()) {
1887 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
1888 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
1889
1890 Label L_skip_barrier;
1891 Register klass = rscratch1;
1892
1893 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
1894 __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
1895
1896 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
1897
1898 __ bind(L_skip_barrier);
1899 }
1900
1901 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
1902
1903 C->output()->set_frame_complete(__ offset());
1904
1905 if (C->has_mach_constant_base_node()) {
1906 // NOTE: We set the table base offset here because users might be
1907 // emitted before MachConstantBaseNode.
1908 ConstantTable& constant_table = C->output()->constant_table();
1909 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1910 }
1911 }
1912
1913 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1914 {
1915 return MachNode::size(ra_); // too many variables; just compute it
1916 // the hard way
1917 }
1918
1919 int MachPrologNode::reloc() const
1920 {
1921 return 0; // a large enough number
1922 }
1923
1924 //=============================================================================
1925 #ifndef PRODUCT
1926 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1927 {
1928 Compile* C = ra_->C;
1929 if (generate_vzeroupper(C)) {
1930 st->print("vzeroupper");
1931 st->cr(); st->print("\t");
1932 }
1933
1934 int framesize = C->output()->frame_size_in_bytes();
1935 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1936 // Remove word for return adr already pushed
1937 // and RBP
1945 st->print_cr("popq rbp");
1946 if (do_polling() && C->is_method_compilation()) {
1947 st->print("\t");
1948 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1949 "ja #safepoint_stub\t"
1950 "# Safepoint: poll for GC");
1951 }
1952 }
1953 #endif
1954
1955 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1956 {
1957 Compile* C = ra_->C;
1958
1959 if (generate_vzeroupper(C)) {
1960 // Clear upper bits of YMM registers when current compiled code uses
1961 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1962 __ vzeroupper();
1963 }
1964
1965 int framesize = C->output()->frame_size_in_bytes();
1966 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1967 // Remove word for return adr already pushed
1968 // and RBP
1969 framesize -= 2*wordSize;
1970
1971 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1972
1973 if (framesize) {
1974 __ addq(rsp, framesize);
1975 }
1976
1977 __ popq(rbp);
1978
1979 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1980 __ reserved_stack_check();
1981 }
1982
1983 if (do_polling() && C->is_method_compilation()) {
1984 Label dummy_label;
1985 Label* code_stub = &dummy_label;
1986 if (!C->output()->in_scratch_emit_size()) {
1987 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1988 C->output()->add_stub(stub);
1989 code_stub = &stub->entry();
1990 }
1991 __ relocate(relocInfo::poll_return_type);
1992 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
1993 }
1994 }
1995
1996 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
1997 {
1998 return MachNode::size(ra_); // too many variables; just compute it
1999 // the hard way
2000 }
2001
2002 int MachEpilogNode::reloc() const
2003 {
2004 return 2; // a large enough number
2005 }
2006
2007 const Pipeline* MachEpilogNode::pipeline() const
2008 {
2009 return MachNode::pipeline_class();
2010 }
2011
2012 //=============================================================================
2013
2014 enum RC {
2015 rc_bad,
2016 rc_int,
2017 rc_kreg,
2018 rc_float,
2019 rc_stack
2020 };
2021
2583 #endif
2584
2585 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2586 {
2587 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2588 int reg = ra_->get_encode(this);
2589
2590 __ lea(as_Register(reg), Address(rsp, offset));
2591 }
2592
2593 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2594 {
2595 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2596 if (ra_->get_encode(this) > 15) {
2597 return (offset < 0x80) ? 6 : 9; // REX2
2598 } else {
2599 return (offset < 0x80) ? 5 : 8; // REX
2600 }
2601 }
2602
2603 //=============================================================================
2604 #ifndef PRODUCT
2605 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2606 {
2607 if (UseCompressedClassPointers) {
2608 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2609 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2610 } else {
2611 st->print_cr("movq rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2612 st->print_cr("\tcmpq rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2613 }
2614 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2615 }
2616 #endif
2617
2618 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2619 {
2620 __ ic_check(InteriorEntryAlignment);
2621 }
2622
2623 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
2624 {
2625 return MachNode::size(ra_); // too many variables; just compute it
2626 // the hard way
2627 }
2628
2629
2630 //=============================================================================
2631
2632 bool Matcher::supports_vector_calling_convention(void) {
2633 return EnableVectorSupport;
2634 }
2635
2636 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
2637 assert(EnableVectorSupport, "sanity");
2638 int lo = XMM0_num;
2639 int hi = XMM0b_num;
2640 if (ideal_reg == Op_VecX) hi = XMM0d_num;
2641 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
2642 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
2643 return OptoRegPair(hi, lo);
2644 }
2645
2646 // Is this branch offset short enough that a short branch can be used?
2647 //
2648 // NOTE: If the platform does not provide any short branch variants, then
4505 }
4506 __ post_call_nop();
4507 %}
4508
4509 enc_class Java_Dynamic_Call(method meth) %{
4510 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4511 __ post_call_nop();
4512 %}
4513
4514 enc_class call_epilog %{
4515 if (VerifyStackAtCalls) {
4516 // Check that stack depth is unchanged: find majik cookie on stack
4517 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4518 Label L;
4519 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4520 __ jccb(Assembler::equal, L);
4521 // Die if stack mismatch
4522 __ int3();
4523 __ bind(L);
4524 }
4525 %}
4526
4527 %}
4528
4529 //----------FRAME--------------------------------------------------------------
4530 // Definition of frame structure and management information.
4531 //
4532 // S T A C K L A Y O U T Allocators stack-slot number
4533 // | (to get allocators register number
4534 // G Owned by | | v add OptoReg::stack0())
4535 // r CALLER | |
4536 // o | +--------+ pad to even-align allocators stack-slot
4537 // w V | pad0 | numbers; owned by CALLER
4538 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4539 // h ^ | in | 5
4540 // | | args | 4 Holes in incoming args owned by SELF
4541 // | | | | 3
4542 // | | +--------+
4543 // V | | old out| Empty on Intel, window on Sparc
4544 // | old |preserve| Must be even aligned.
5667 %}
5668 %}
5669
5670 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5671 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5672 %{
5673 constraint(ALLOC_IN_RC(ptr_reg));
5674 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5675 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5676
5677 op_cost(10);
5678 format %{"[$reg + $off + $idx << $scale]" %}
5679 interface(MEMORY_INTER) %{
5680 base($reg);
5681 index($idx);
5682 scale($scale);
5683 disp($off);
5684 %}
5685 %}
5686
5687 // Indirect Narrow Oop Plus Offset Operand
5688 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5689 // we can't free r12 even with CompressedOops::base() == nullptr.
5690 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5691 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5692 constraint(ALLOC_IN_RC(ptr_reg));
5693 match(AddP (DecodeN reg) off);
5694
5695 op_cost(10);
5696 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5697 interface(MEMORY_INTER) %{
5698 base(0xc); // R12
5699 index($reg);
5700 scale(0x3);
5701 disp($off);
5702 %}
5703 %}
5704
5705 // Indirect Memory Operand
5706 operand indirectNarrow(rRegN reg)
6143 %}
6144
6145 // Replaces legVec during post-selection cleanup. See above.
6146 operand legVecZ() %{
6147 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6148 match(VecZ);
6149
6150 format %{ %}
6151 interface(REG_INTER);
6152 %}
6153
6154 //----------OPERAND CLASSES----------------------------------------------------
6155 // Operand Classes are groups of operands that are used as to simplify
6156 // instruction definitions by not requiring the AD writer to specify separate
6157 // instructions for every form of operand when the instruction accepts
6158 // multiple operand types with the same basic encoding and format. The classic
6159 // case of this is memory operands.
6160
6161 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6162 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6163 indCompressedOopOffset,
6164 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6165 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6166 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6167
6168 //----------PIPELINE-----------------------------------------------------------
6169 // Rules which define the behavior of the target architectures pipeline.
6170 pipeline %{
6171
6172 //----------ATTRIBUTES---------------------------------------------------------
6173 attributes %{
6174 variable_size_instructions; // Fixed size instructions
6175 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6176 instruction_unit_size = 1; // An instruction is 1 bytes long
6177 instruction_fetch_unit_size = 16; // The processor fetches one line
6178 instruction_fetch_units = 1; // of 16 bytes
6179 %}
6180
6181 //----------RESOURCES----------------------------------------------------------
6182 // Resources are the functional units available to the machine
6183
8741 format %{ "MEMBAR-storestore (empty encoding)" %}
8742 ins_encode( );
8743 ins_pipe(empty);
8744 %}
8745
8746 //----------Move Instructions--------------------------------------------------
8747
8748 instruct castX2P(rRegP dst, rRegL src)
8749 %{
8750 match(Set dst (CastX2P src));
8751
8752 format %{ "movq $dst, $src\t# long->ptr" %}
8753 ins_encode %{
8754 if ($dst$$reg != $src$$reg) {
8755 __ movptr($dst$$Register, $src$$Register);
8756 }
8757 %}
8758 ins_pipe(ialu_reg_reg); // XXX
8759 %}
8760
8761 instruct castP2X(rRegL dst, rRegP src)
8762 %{
8763 match(Set dst (CastP2X src));
8764
8765 format %{ "movq $dst, $src\t# ptr -> long" %}
8766 ins_encode %{
8767 if ($dst$$reg != $src$$reg) {
8768 __ movptr($dst$$Register, $src$$Register);
8769 }
8770 %}
8771 ins_pipe(ialu_reg_reg); // XXX
8772 %}
8773
8774 // Convert oop into int for vectors alignment masking
8775 instruct convP2I(rRegI dst, rRegP src)
8776 %{
8777 match(Set dst (ConvL2I (CastP2X src)));
8778
8779 format %{ "movl $dst, $src\t# ptr -> int" %}
8780 ins_encode %{
14964 effect(DEF dst, USE src);
14965 ins_cost(100);
14966 format %{ "movd $dst,$src\t# MoveI2F" %}
14967 ins_encode %{
14968 __ movdl($dst$$XMMRegister, $src$$Register);
14969 %}
14970 ins_pipe( pipe_slow );
14971 %}
14972
14973 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
14974 match(Set dst (MoveL2D src));
14975 effect(DEF dst, USE src);
14976 ins_cost(100);
14977 format %{ "movd $dst,$src\t# MoveL2D" %}
14978 ins_encode %{
14979 __ movdq($dst$$XMMRegister, $src$$Register);
14980 %}
14981 ins_pipe( pipe_slow );
14982 %}
14983
14984 // Fast clearing of an array
14985 // Small non-constant lenght ClearArray for non-AVX512 targets.
14986 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
14987 Universe dummy, rFlagsReg cr)
14988 %{
14989 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
14990 match(Set dummy (ClearArray cnt base));
14991 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
14992
14993 format %{ $$template
14994 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
14995 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
14996 $$emit$$"jg LARGE\n\t"
14997 $$emit$$"dec rcx\n\t"
14998 $$emit$$"js DONE\t# Zero length\n\t"
14999 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15000 $$emit$$"dec rcx\n\t"
15001 $$emit$$"jge LOOP\n\t"
15002 $$emit$$"jmp DONE\n\t"
15003 $$emit$$"# LARGE:\n\t"
15004 if (UseFastStosb) {
15005 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15006 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15007 } else if (UseXMMForObjInit) {
15008 $$emit$$"mov rdi,rax\n\t"
15009 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15010 $$emit$$"jmpq L_zero_64_bytes\n\t"
15011 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15019 $$emit$$"jl L_tail\n\t"
15020 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15021 $$emit$$"add 0x20,rax\n\t"
15022 $$emit$$"sub 0x4,rcx\n\t"
15023 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15024 $$emit$$"add 0x4,rcx\n\t"
15025 $$emit$$"jle L_end\n\t"
15026 $$emit$$"dec rcx\n\t"
15027 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15028 $$emit$$"vmovq xmm0,(rax)\n\t"
15029 $$emit$$"add 0x8,rax\n\t"
15030 $$emit$$"dec rcx\n\t"
15031 $$emit$$"jge L_sloop\n\t"
15032 $$emit$$"# L_end:\n\t"
15033 } else {
15034 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15035 }
15036 $$emit$$"# DONE"
15037 %}
15038 ins_encode %{
15039 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15040 $tmp$$XMMRegister, false, knoreg);
15041 %}
15042 ins_pipe(pipe_slow);
15043 %}
15044
15045 // Small non-constant length ClearArray for AVX512 targets.
15046 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15047 Universe dummy, rFlagsReg cr)
15048 %{
15049 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15050 match(Set dummy (ClearArray cnt base));
15051 ins_cost(125);
15052 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15053
15054 format %{ $$template
15055 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15056 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15057 $$emit$$"jg LARGE\n\t"
15058 $$emit$$"dec rcx\n\t"
15059 $$emit$$"js DONE\t# Zero length\n\t"
15060 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15061 $$emit$$"dec rcx\n\t"
15062 $$emit$$"jge LOOP\n\t"
15063 $$emit$$"jmp DONE\n\t"
15064 $$emit$$"# LARGE:\n\t"
15065 if (UseFastStosb) {
15066 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15067 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15068 } else if (UseXMMForObjInit) {
15069 $$emit$$"mov rdi,rax\n\t"
15070 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15071 $$emit$$"jmpq L_zero_64_bytes\n\t"
15072 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15080 $$emit$$"jl L_tail\n\t"
15081 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15082 $$emit$$"add 0x20,rax\n\t"
15083 $$emit$$"sub 0x4,rcx\n\t"
15084 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15085 $$emit$$"add 0x4,rcx\n\t"
15086 $$emit$$"jle L_end\n\t"
15087 $$emit$$"dec rcx\n\t"
15088 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15089 $$emit$$"vmovq xmm0,(rax)\n\t"
15090 $$emit$$"add 0x8,rax\n\t"
15091 $$emit$$"dec rcx\n\t"
15092 $$emit$$"jge L_sloop\n\t"
15093 $$emit$$"# L_end:\n\t"
15094 } else {
15095 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15096 }
15097 $$emit$$"# DONE"
15098 %}
15099 ins_encode %{
15100 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15101 $tmp$$XMMRegister, false, $ktmp$$KRegister);
15102 %}
15103 ins_pipe(pipe_slow);
15104 %}
15105
15106 // Large non-constant length ClearArray for non-AVX512 targets.
15107 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15108 Universe dummy, rFlagsReg cr)
15109 %{
15110 predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15111 match(Set dummy (ClearArray cnt base));
15112 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15113
15114 format %{ $$template
15115 if (UseFastStosb) {
15116 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15117 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15118 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15119 } else if (UseXMMForObjInit) {
15120 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15121 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15122 $$emit$$"jmpq L_zero_64_bytes\n\t"
15123 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15124 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15125 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15126 $$emit$$"add 0x40,rax\n\t"
15127 $$emit$$"# L_zero_64_bytes:\n\t"
15128 $$emit$$"sub 0x8,rcx\n\t"
15129 $$emit$$"jge L_loop\n\t"
15130 $$emit$$"add 0x4,rcx\n\t"
15131 $$emit$$"jl L_tail\n\t"
15132 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15133 $$emit$$"add 0x20,rax\n\t"
15134 $$emit$$"sub 0x4,rcx\n\t"
15135 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15136 $$emit$$"add 0x4,rcx\n\t"
15137 $$emit$$"jle L_end\n\t"
15138 $$emit$$"dec rcx\n\t"
15139 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15140 $$emit$$"vmovq xmm0,(rax)\n\t"
15141 $$emit$$"add 0x8,rax\n\t"
15142 $$emit$$"dec rcx\n\t"
15143 $$emit$$"jge L_sloop\n\t"
15144 $$emit$$"# L_end:\n\t"
15145 } else {
15146 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15147 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15148 }
15149 %}
15150 ins_encode %{
15151 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15152 $tmp$$XMMRegister, true, knoreg);
15153 %}
15154 ins_pipe(pipe_slow);
15155 %}
15156
15157 // Large non-constant length ClearArray for AVX512 targets.
15158 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15159 Universe dummy, rFlagsReg cr)
15160 %{
15161 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15162 match(Set dummy (ClearArray cnt base));
15163 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15164
15165 format %{ $$template
15166 if (UseFastStosb) {
15167 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15168 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15169 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15170 } else if (UseXMMForObjInit) {
15171 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15172 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15173 $$emit$$"jmpq L_zero_64_bytes\n\t"
15174 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15175 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15176 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15177 $$emit$$"add 0x40,rax\n\t"
15178 $$emit$$"# L_zero_64_bytes:\n\t"
15179 $$emit$$"sub 0x8,rcx\n\t"
15180 $$emit$$"jge L_loop\n\t"
15181 $$emit$$"add 0x4,rcx\n\t"
15182 $$emit$$"jl L_tail\n\t"
15183 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15184 $$emit$$"add 0x20,rax\n\t"
15185 $$emit$$"sub 0x4,rcx\n\t"
15186 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15187 $$emit$$"add 0x4,rcx\n\t"
15188 $$emit$$"jle L_end\n\t"
15189 $$emit$$"dec rcx\n\t"
15190 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15191 $$emit$$"vmovq xmm0,(rax)\n\t"
15192 $$emit$$"add 0x8,rax\n\t"
15193 $$emit$$"dec rcx\n\t"
15194 $$emit$$"jge L_sloop\n\t"
15195 $$emit$$"# L_end:\n\t"
15196 } else {
15197 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15198 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15199 }
15200 %}
15201 ins_encode %{
15202 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15203 $tmp$$XMMRegister, true, $ktmp$$KRegister);
15204 %}
15205 ins_pipe(pipe_slow);
15206 %}
15207
15208 // Small constant length ClearArray for AVX512 targets.
15209 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15210 %{
15211 predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15212 match(Set dummy (ClearArray cnt base));
15213 ins_cost(100);
15214 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15215 format %{ "clear_mem_imm $base , $cnt \n\t" %}
15216 ins_encode %{
15217 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15218 %}
15219 ins_pipe(pipe_slow);
15220 %}
15221
15222 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15223 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15224 %{
15225 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15226 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15227 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15228
15229 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15230 ins_encode %{
15231 __ string_compare($str1$$Register, $str2$$Register,
15232 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15233 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15234 %}
15235 ins_pipe( pipe_slow );
15236 %}
15237
17042 effect(USE meth);
17043
17044 ins_cost(300);
17045 format %{ "call_leaf,runtime " %}
17046 ins_encode(clear_avx, Java_To_Runtime(meth));
17047 ins_pipe(pipe_slow);
17048 %}
17049
17050 // Call runtime without safepoint and with vector arguments
17051 instruct CallLeafDirectVector(method meth)
17052 %{
17053 match(CallLeafVector);
17054 effect(USE meth);
17055
17056 ins_cost(300);
17057 format %{ "call_leaf,vector " %}
17058 ins_encode(Java_To_Runtime(meth));
17059 ins_pipe(pipe_slow);
17060 %}
17061
17062 // Call runtime without safepoint
17063 instruct CallLeafNoFPDirect(method meth)
17064 %{
17065 match(CallLeafNoFP);
17066 effect(USE meth);
17067
17068 ins_cost(300);
17069 format %{ "call_leaf_nofp,runtime " %}
17070 ins_encode(clear_avx, Java_To_Runtime(meth));
17071 ins_pipe(pipe_slow);
17072 %}
17073
17074 // Return Instruction
17075 // Remove the return address & jump to it.
17076 // Notice: We always emit a nop after a ret to make sure there is room
17077 // for safepoint patching
17078 instruct Ret()
17079 %{
17080 match(Return);
17081
17082 format %{ "ret" %}
17083 ins_encode %{
17084 __ ret(0);
|
1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 if (_entry_point == nullptr) {
1653 // CallLeafNoFPInDirect
1654 return 3; // callq (register)
1655 }
1656 int offset = 13; // movq r10,#addr; callq (r10)
1657 if (this->ideal_Opcode() != Op_CallLeafVector) {
1658 offset += clear_avx_size();
1659 }
1660 return offset;
1661 }
1662
1663 //
1664 // Compute padding required for nodes which need alignment
1665 //
1666
1667 // The address of the call instruction needs to be 4-byte aligned to
1668 // ensure that it does not span a cache line so that it can be patched.
1669 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1670 {
1671 current_offset += clear_avx_size(); // skip vzeroupper
1672 current_offset += 1; // skip call opcode byte
1673 return align_up(current_offset, alignment_required()) - current_offset;
1674 }
1675
1676 // The address of the call instruction needs to be 4-byte aligned to
1677 // ensure that it does not span a cache line so that it can be patched.
1678 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
1679 {
1680 current_offset += clear_avx_size(); // skip vzeroupper
1681 current_offset += 11; // skip movq instruction + call opcode byte
1682 return align_up(current_offset, alignment_required()) - current_offset;
1868 st->print("\n\t");
1869 st->print("# stack alignment check");
1870 #endif
1871 }
1872 if (C->stub_function() != nullptr) {
1873 st->print("\n\t");
1874 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1875 st->print("\n\t");
1876 st->print("je fast_entry\t");
1877 st->print("\n\t");
1878 st->print("call #nmethod_entry_barrier_stub\t");
1879 st->print("\n\tfast_entry:");
1880 }
1881 st->cr();
1882 }
1883 #endif
1884
1885 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1886 Compile* C = ra_->C;
1887
1888 __ verified_entry(C);
1889
1890 if (ra_->C->stub_function() == nullptr) {
1891 __ entry_barrier();
1892 }
1893
1894 if (!Compile::current()->output()->in_scratch_emit_size()) {
1895 __ bind(*_verified_entry);
1896 }
1897
1898 C->output()->set_frame_complete(__ offset());
1899
1900 if (C->has_mach_constant_base_node()) {
1901 // NOTE: We set the table base offset here because users might be
1902 // emitted before MachConstantBaseNode.
1903 ConstantTable& constant_table = C->output()->constant_table();
1904 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1905 }
1906 }
1907
1908
1909 int MachPrologNode::reloc() const
1910 {
1911 return 0; // a large enough number
1912 }
1913
1914 //=============================================================================
1915 #ifndef PRODUCT
1916 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1917 {
1918 Compile* C = ra_->C;
1919 if (generate_vzeroupper(C)) {
1920 st->print("vzeroupper");
1921 st->cr(); st->print("\t");
1922 }
1923
1924 int framesize = C->output()->frame_size_in_bytes();
1925 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1926 // Remove word for return adr already pushed
1927 // and RBP
1935 st->print_cr("popq rbp");
1936 if (do_polling() && C->is_method_compilation()) {
1937 st->print("\t");
1938 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1939 "ja #safepoint_stub\t"
1940 "# Safepoint: poll for GC");
1941 }
1942 }
1943 #endif
1944
1945 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1946 {
1947 Compile* C = ra_->C;
1948
1949 if (generate_vzeroupper(C)) {
1950 // Clear upper bits of YMM registers when current compiled code uses
1951 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1952 __ vzeroupper();
1953 }
1954
1955 // Subtract two words to account for return address and rbp
1956 int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
1957 __ remove_frame(initial_framesize, C->needs_stack_repair());
1958
1959 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1960 __ reserved_stack_check();
1961 }
1962
1963 if (do_polling() && C->is_method_compilation()) {
1964 Label dummy_label;
1965 Label* code_stub = &dummy_label;
1966 if (!C->output()->in_scratch_emit_size()) {
1967 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1968 C->output()->add_stub(stub);
1969 code_stub = &stub->entry();
1970 }
1971 __ relocate(relocInfo::poll_return_type);
1972 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
1973 }
1974 }
1975
1976 int MachEpilogNode::reloc() const
1977 {
1978 return 2; // a large enough number
1979 }
1980
1981 const Pipeline* MachEpilogNode::pipeline() const
1982 {
1983 return MachNode::pipeline_class();
1984 }
1985
1986 //=============================================================================
1987
1988 enum RC {
1989 rc_bad,
1990 rc_int,
1991 rc_kreg,
1992 rc_float,
1993 rc_stack
1994 };
1995
2557 #endif
2558
2559 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2560 {
2561 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2562 int reg = ra_->get_encode(this);
2563
2564 __ lea(as_Register(reg), Address(rsp, offset));
2565 }
2566
2567 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2568 {
2569 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2570 if (ra_->get_encode(this) > 15) {
2571 return (offset < 0x80) ? 6 : 9; // REX2
2572 } else {
2573 return (offset < 0x80) ? 5 : 8; // REX
2574 }
2575 }
2576
2577 //=============================================================================
2578 #ifndef PRODUCT
2579 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2580 {
2581 st->print_cr("MachVEPNode");
2582 }
2583 #endif
2584
2585 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2586 {
2587 CodeBuffer* cbuf = masm->code();
2588 uint insts_size = cbuf->insts_size();
2589 if (!_verified) {
2590 __ ic_check(1);
2591 } else {
2592 // TODO 8284443 Avoid creation of temporary frame
2593 if (ra_->C->stub_function() == nullptr) {
2594 __ verified_entry(ra_->C, 0);
2595 __ entry_barrier();
2596 int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
2597 __ remove_frame(initial_framesize, false);
2598 }
2599 // Unpack inline type args passed as oop and then jump to
2600 // the verified entry point (skipping the unverified entry).
2601 int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
2602 // Emit code for verified entry and save increment for stack repair on return
2603 __ verified_entry(ra_->C, sp_inc);
2604 if (Compile::current()->output()->in_scratch_emit_size()) {
2605 Label dummy_verified_entry;
2606 __ jmp(dummy_verified_entry);
2607 } else {
2608 __ jmp(*_verified_entry);
2609 }
2610 }
2611 /* WARNING these NOPs are critical so that verified entry point is properly
2612 4 bytes aligned for patching by NativeJump::patch_verified_entry() */
2613 int nops_cnt = 4 - ((cbuf->insts_size() - insts_size) & 0x3);
2614 nops_cnt &= 0x3; // Do not add nops if code is aligned.
2615 if (nops_cnt > 0) {
2616 __ nop(nops_cnt);
2617 }
2618 }
2619
2620 //=============================================================================
2621 #ifndef PRODUCT
2622 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2623 {
2624 if (UseCompressedClassPointers) {
2625 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2626 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2627 } else {
2628 st->print_cr("movq rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2629 st->print_cr("\tcmpq rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2630 }
2631 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2632 }
2633 #endif
2634
2635 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2636 {
2637 __ ic_check(InteriorEntryAlignment);
2638 }
2639
2640
2641 //=============================================================================
2642
2643 bool Matcher::supports_vector_calling_convention(void) {
2644 return EnableVectorSupport;
2645 }
2646
2647 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
2648 assert(EnableVectorSupport, "sanity");
2649 int lo = XMM0_num;
2650 int hi = XMM0b_num;
2651 if (ideal_reg == Op_VecX) hi = XMM0d_num;
2652 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
2653 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
2654 return OptoRegPair(hi, lo);
2655 }
2656
2657 // Is this branch offset short enough that a short branch can be used?
2658 //
2659 // NOTE: If the platform does not provide any short branch variants, then
4516 }
4517 __ post_call_nop();
4518 %}
4519
4520 enc_class Java_Dynamic_Call(method meth) %{
4521 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4522 __ post_call_nop();
4523 %}
4524
4525 enc_class call_epilog %{
4526 if (VerifyStackAtCalls) {
4527 // Check that stack depth is unchanged: find majik cookie on stack
4528 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4529 Label L;
4530 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4531 __ jccb(Assembler::equal, L);
4532 // Die if stack mismatch
4533 __ int3();
4534 __ bind(L);
4535 }
4536 if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) {
4537 // The last return value is not set by the callee but used to pass the null marker to compiled code.
4538 // Search for the corresponding projection, get the register and emit code that initialized it.
4539 uint con = (tf()->range_cc()->cnt() - 1);
4540 for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
4541 ProjNode* proj = fast_out(i)->as_Proj();
4542 if (proj->_con == con) {
4543 // Set null marker if rax is non-null (a non-null value is returned buffered or scalarized)
4544 OptoReg::Name optoReg = ra_->get_reg_first(proj);
4545 VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
4546 Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
4547 __ testq(rax, rax);
4548 __ setb(Assembler::notZero, toReg);
4549 __ movzbl(toReg, toReg);
4550 if (reg->is_stack()) {
4551 int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
4552 __ movq(Address(rsp, st_off), toReg);
4553 }
4554 break;
4555 }
4556 }
4557 if (return_value_is_used()) {
4558 // An inline type is returned as fields in multiple registers.
4559 // Rax either contains an oop if the inline type is buffered or a pointer
4560 // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax
4561 // if the lowest bit is set to allow C2 to use the oop after null checking.
4562 // rax &= (rax & 1) - 1
4563 __ movptr(rscratch1, rax);
4564 __ andptr(rscratch1, 0x1);
4565 __ subptr(rscratch1, 0x1);
4566 __ andptr(rax, rscratch1);
4567 }
4568 }
4569 %}
4570
4571 %}
4572
4573 //----------FRAME--------------------------------------------------------------
4574 // Definition of frame structure and management information.
4575 //
4576 // S T A C K L A Y O U T Allocators stack-slot number
4577 // | (to get allocators register number
4578 // G Owned by | | v add OptoReg::stack0())
4579 // r CALLER | |
4580 // o | +--------+ pad to even-align allocators stack-slot
4581 // w V | pad0 | numbers; owned by CALLER
4582 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4583 // h ^ | in | 5
4584 // | | args | 4 Holes in incoming args owned by SELF
4585 // | | | | 3
4586 // | | +--------+
4587 // V | | old out| Empty on Intel, window on Sparc
4588 // | old |preserve| Must be even aligned.
5711 %}
5712 %}
5713
5714 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5715 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5716 %{
5717 constraint(ALLOC_IN_RC(ptr_reg));
5718 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5719 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5720
5721 op_cost(10);
5722 format %{"[$reg + $off + $idx << $scale]" %}
5723 interface(MEMORY_INTER) %{
5724 base($reg);
5725 index($idx);
5726 scale($scale);
5727 disp($off);
5728 %}
5729 %}
5730
5731 // Indirect Narrow Oop Operand
5732 operand indCompressedOop(rRegN reg) %{
5733 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5734 constraint(ALLOC_IN_RC(ptr_reg));
5735 match(DecodeN reg);
5736
5737 op_cost(10);
5738 format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
5739 interface(MEMORY_INTER) %{
5740 base(0xc); // R12
5741 index($reg);
5742 scale(0x3);
5743 disp(0x0);
5744 %}
5745 %}
5746
5747 // Indirect Narrow Oop Plus Offset Operand
5748 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5749 // we can't free r12 even with CompressedOops::base() == nullptr.
5750 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5751 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5752 constraint(ALLOC_IN_RC(ptr_reg));
5753 match(AddP (DecodeN reg) off);
5754
5755 op_cost(10);
5756 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5757 interface(MEMORY_INTER) %{
5758 base(0xc); // R12
5759 index($reg);
5760 scale(0x3);
5761 disp($off);
5762 %}
5763 %}
5764
5765 // Indirect Memory Operand
5766 operand indirectNarrow(rRegN reg)
6203 %}
6204
6205 // Replaces legVec during post-selection cleanup. See above.
6206 operand legVecZ() %{
6207 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6208 match(VecZ);
6209
6210 format %{ %}
6211 interface(REG_INTER);
6212 %}
6213
6214 //----------OPERAND CLASSES----------------------------------------------------
6215 // Operand Classes are groups of operands that are used as to simplify
6216 // instruction definitions by not requiring the AD writer to specify separate
6217 // instructions for every form of operand when the instruction accepts
6218 // multiple operand types with the same basic encoding and format. The classic
6219 // case of this is memory operands.
6220
6221 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6222 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6223 indCompressedOop, indCompressedOopOffset,
6224 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6225 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6226 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6227
6228 //----------PIPELINE-----------------------------------------------------------
6229 // Rules which define the behavior of the target architectures pipeline.
6230 pipeline %{
6231
6232 //----------ATTRIBUTES---------------------------------------------------------
6233 attributes %{
6234 variable_size_instructions; // Fixed size instructions
6235 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6236 instruction_unit_size = 1; // An instruction is 1 bytes long
6237 instruction_fetch_unit_size = 16; // The processor fetches one line
6238 instruction_fetch_units = 1; // of 16 bytes
6239 %}
6240
6241 //----------RESOURCES----------------------------------------------------------
6242 // Resources are the functional units available to the machine
6243
8801 format %{ "MEMBAR-storestore (empty encoding)" %}
8802 ins_encode( );
8803 ins_pipe(empty);
8804 %}
8805
8806 //----------Move Instructions--------------------------------------------------
8807
8808 instruct castX2P(rRegP dst, rRegL src)
8809 %{
8810 match(Set dst (CastX2P src));
8811
8812 format %{ "movq $dst, $src\t# long->ptr" %}
8813 ins_encode %{
8814 if ($dst$$reg != $src$$reg) {
8815 __ movptr($dst$$Register, $src$$Register);
8816 }
8817 %}
8818 ins_pipe(ialu_reg_reg); // XXX
8819 %}
8820
8821 instruct castI2N(rRegN dst, rRegI src)
8822 %{
8823 match(Set dst (CastI2N src));
8824
8825 format %{ "movq $dst, $src\t# int -> narrow ptr" %}
8826 ins_encode %{
8827 if ($dst$$reg != $src$$reg) {
8828 __ movl($dst$$Register, $src$$Register);
8829 }
8830 %}
8831 ins_pipe(ialu_reg_reg); // XXX
8832 %}
8833
8834 instruct castN2X(rRegL dst, rRegN src)
8835 %{
8836 match(Set dst (CastP2X src));
8837
8838 format %{ "movq $dst, $src\t# ptr -> long" %}
8839 ins_encode %{
8840 if ($dst$$reg != $src$$reg) {
8841 __ movptr($dst$$Register, $src$$Register);
8842 }
8843 %}
8844 ins_pipe(ialu_reg_reg); // XXX
8845 %}
8846
8847 instruct castP2X(rRegL dst, rRegP src)
8848 %{
8849 match(Set dst (CastP2X src));
8850
8851 format %{ "movq $dst, $src\t# ptr -> long" %}
8852 ins_encode %{
8853 if ($dst$$reg != $src$$reg) {
8854 __ movptr($dst$$Register, $src$$Register);
8855 }
8856 %}
8857 ins_pipe(ialu_reg_reg); // XXX
8858 %}
8859
8860 // Convert oop into int for vectors alignment masking
8861 instruct convP2I(rRegI dst, rRegP src)
8862 %{
8863 match(Set dst (ConvL2I (CastP2X src)));
8864
8865 format %{ "movl $dst, $src\t# ptr -> int" %}
8866 ins_encode %{
15050 effect(DEF dst, USE src);
15051 ins_cost(100);
15052 format %{ "movd $dst,$src\t# MoveI2F" %}
15053 ins_encode %{
15054 __ movdl($dst$$XMMRegister, $src$$Register);
15055 %}
15056 ins_pipe( pipe_slow );
15057 %}
15058
15059 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15060 match(Set dst (MoveL2D src));
15061 effect(DEF dst, USE src);
15062 ins_cost(100);
15063 format %{ "movd $dst,$src\t# MoveL2D" %}
15064 ins_encode %{
15065 __ movdq($dst$$XMMRegister, $src$$Register);
15066 %}
15067 ins_pipe( pipe_slow );
15068 %}
15069
15070
15071 // Fast clearing of an array
15072 // Small non-constant lenght ClearArray for non-AVX512 targets.
15073 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15074 Universe dummy, rFlagsReg cr)
15075 %{
15076 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15077 match(Set dummy (ClearArray (Binary cnt base) val));
15078 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15079
15080 format %{ $$template
15081 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15082 $$emit$$"jg LARGE\n\t"
15083 $$emit$$"dec rcx\n\t"
15084 $$emit$$"js DONE\t# Zero length\n\t"
15085 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15086 $$emit$$"dec rcx\n\t"
15087 $$emit$$"jge LOOP\n\t"
15088 $$emit$$"jmp DONE\n\t"
15089 $$emit$$"# LARGE:\n\t"
15090 if (UseFastStosb) {
15091 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15092 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15093 } else if (UseXMMForObjInit) {
15094 $$emit$$"movdq $tmp, $val\n\t"
15095 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15096 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15097 $$emit$$"jmpq L_zero_64_bytes\n\t"
15098 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15099 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15100 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15101 $$emit$$"add 0x40,rax\n\t"
15102 $$emit$$"# L_zero_64_bytes:\n\t"
15103 $$emit$$"sub 0x8,rcx\n\t"
15104 $$emit$$"jge L_loop\n\t"
15105 $$emit$$"add 0x4,rcx\n\t"
15106 $$emit$$"jl L_tail\n\t"
15107 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15108 $$emit$$"add 0x20,rax\n\t"
15109 $$emit$$"sub 0x4,rcx\n\t"
15110 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15111 $$emit$$"add 0x4,rcx\n\t"
15112 $$emit$$"jle L_end\n\t"
15113 $$emit$$"dec rcx\n\t"
15114 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15115 $$emit$$"vmovq xmm0,(rax)\n\t"
15116 $$emit$$"add 0x8,rax\n\t"
15117 $$emit$$"dec rcx\n\t"
15118 $$emit$$"jge L_sloop\n\t"
15119 $$emit$$"# L_end:\n\t"
15120 } else {
15121 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15122 }
15123 $$emit$$"# DONE"
15124 %}
15125 ins_encode %{
15126 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15127 $tmp$$XMMRegister, false, false);
15128 %}
15129 ins_pipe(pipe_slow);
15130 %}
15131
15132 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15133 Universe dummy, rFlagsReg cr)
15134 %{
15135 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15136 match(Set dummy (ClearArray (Binary cnt base) val));
15137 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15138
15139 format %{ $$template
15140 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15141 $$emit$$"jg LARGE\n\t"
15142 $$emit$$"dec rcx\n\t"
15143 $$emit$$"js DONE\t# Zero length\n\t"
15144 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15145 $$emit$$"dec rcx\n\t"
15146 $$emit$$"jge LOOP\n\t"
15147 $$emit$$"jmp DONE\n\t"
15148 $$emit$$"# LARGE:\n\t"
15149 if (UseXMMForObjInit) {
15150 $$emit$$"movdq $tmp, $val\n\t"
15151 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15152 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15153 $$emit$$"jmpq L_zero_64_bytes\n\t"
15154 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15155 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15156 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15157 $$emit$$"add 0x40,rax\n\t"
15158 $$emit$$"# L_zero_64_bytes:\n\t"
15159 $$emit$$"sub 0x8,rcx\n\t"
15160 $$emit$$"jge L_loop\n\t"
15161 $$emit$$"add 0x4,rcx\n\t"
15162 $$emit$$"jl L_tail\n\t"
15163 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15164 $$emit$$"add 0x20,rax\n\t"
15165 $$emit$$"sub 0x4,rcx\n\t"
15166 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15167 $$emit$$"add 0x4,rcx\n\t"
15168 $$emit$$"jle L_end\n\t"
15169 $$emit$$"dec rcx\n\t"
15170 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15171 $$emit$$"vmovq xmm0,(rax)\n\t"
15172 $$emit$$"add 0x8,rax\n\t"
15173 $$emit$$"dec rcx\n\t"
15174 $$emit$$"jge L_sloop\n\t"
15175 $$emit$$"# L_end:\n\t"
15176 } else {
15177 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15178 }
15179 $$emit$$"# DONE"
15180 %}
15181 ins_encode %{
15182 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15183 $tmp$$XMMRegister, false, true);
15184 %}
15185 ins_pipe(pipe_slow);
15186 %}
15187
15188 // Small non-constant length ClearArray for AVX512 targets.
15189 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15190 Universe dummy, rFlagsReg cr)
15191 %{
15192 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15193 match(Set dummy (ClearArray (Binary cnt base) val));
15194 ins_cost(125);
15195 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15196
15197 format %{ $$template
15198 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15199 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15200 $$emit$$"jg LARGE\n\t"
15201 $$emit$$"dec rcx\n\t"
15202 $$emit$$"js DONE\t# Zero length\n\t"
15203 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15204 $$emit$$"dec rcx\n\t"
15205 $$emit$$"jge LOOP\n\t"
15206 $$emit$$"jmp DONE\n\t"
15207 $$emit$$"# LARGE:\n\t"
15208 if (UseFastStosb) {
15209 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15210 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15211 } else if (UseXMMForObjInit) {
15212 $$emit$$"mov rdi,rax\n\t"
15213 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15214 $$emit$$"jmpq L_zero_64_bytes\n\t"
15215 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15223 $$emit$$"jl L_tail\n\t"
15224 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15225 $$emit$$"add 0x20,rax\n\t"
15226 $$emit$$"sub 0x4,rcx\n\t"
15227 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15228 $$emit$$"add 0x4,rcx\n\t"
15229 $$emit$$"jle L_end\n\t"
15230 $$emit$$"dec rcx\n\t"
15231 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15232 $$emit$$"vmovq xmm0,(rax)\n\t"
15233 $$emit$$"add 0x8,rax\n\t"
15234 $$emit$$"dec rcx\n\t"
15235 $$emit$$"jge L_sloop\n\t"
15236 $$emit$$"# L_end:\n\t"
15237 } else {
15238 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15239 }
15240 $$emit$$"# DONE"
15241 %}
15242 ins_encode %{
15243 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15244 $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
15245 %}
15246 ins_pipe(pipe_slow);
15247 %}
15248
15249 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15250 Universe dummy, rFlagsReg cr)
15251 %{
15252 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15253 match(Set dummy (ClearArray (Binary cnt base) val));
15254 ins_cost(125);
15255 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15256
15257 format %{ $$template
15258 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15259 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15260 $$emit$$"jg LARGE\n\t"
15261 $$emit$$"dec rcx\n\t"
15262 $$emit$$"js DONE\t# Zero length\n\t"
15263 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15264 $$emit$$"dec rcx\n\t"
15265 $$emit$$"jge LOOP\n\t"
15266 $$emit$$"jmp DONE\n\t"
15267 $$emit$$"# LARGE:\n\t"
15268 if (UseFastStosb) {
15269 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15270 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15271 } else if (UseXMMForObjInit) {
15272 $$emit$$"mov rdi,rax\n\t"
15273 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15274 $$emit$$"jmpq L_zero_64_bytes\n\t"
15275 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15283 $$emit$$"jl L_tail\n\t"
15284 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15285 $$emit$$"add 0x20,rax\n\t"
15286 $$emit$$"sub 0x4,rcx\n\t"
15287 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15288 $$emit$$"add 0x4,rcx\n\t"
15289 $$emit$$"jle L_end\n\t"
15290 $$emit$$"dec rcx\n\t"
15291 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15292 $$emit$$"vmovq xmm0,(rax)\n\t"
15293 $$emit$$"add 0x8,rax\n\t"
15294 $$emit$$"dec rcx\n\t"
15295 $$emit$$"jge L_sloop\n\t"
15296 $$emit$$"# L_end:\n\t"
15297 } else {
15298 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15299 }
15300 $$emit$$"# DONE"
15301 %}
15302 ins_encode %{
15303 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15304 $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
15305 %}
15306 ins_pipe(pipe_slow);
15307 %}
15308
15309 // Large non-constant length ClearArray for non-AVX512 targets.
15310 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15311 Universe dummy, rFlagsReg cr)
15312 %{
15313 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15314 match(Set dummy (ClearArray (Binary cnt base) val));
15315 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15316
15317 format %{ $$template
15318 if (UseFastStosb) {
15319 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15320 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15321 } else if (UseXMMForObjInit) {
15322 $$emit$$"movdq $tmp, $val\n\t"
15323 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15324 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15325 $$emit$$"jmpq L_zero_64_bytes\n\t"
15326 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15327 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15328 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15329 $$emit$$"add 0x40,rax\n\t"
15330 $$emit$$"# L_zero_64_bytes:\n\t"
15331 $$emit$$"sub 0x8,rcx\n\t"
15332 $$emit$$"jge L_loop\n\t"
15333 $$emit$$"add 0x4,rcx\n\t"
15334 $$emit$$"jl L_tail\n\t"
15335 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15336 $$emit$$"add 0x20,rax\n\t"
15337 $$emit$$"sub 0x4,rcx\n\t"
15338 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15339 $$emit$$"add 0x4,rcx\n\t"
15340 $$emit$$"jle L_end\n\t"
15341 $$emit$$"dec rcx\n\t"
15342 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15343 $$emit$$"vmovq xmm0,(rax)\n\t"
15344 $$emit$$"add 0x8,rax\n\t"
15345 $$emit$$"dec rcx\n\t"
15346 $$emit$$"jge L_sloop\n\t"
15347 $$emit$$"# L_end:\n\t"
15348 } else {
15349 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15350 }
15351 %}
15352 ins_encode %{
15353 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15354 $tmp$$XMMRegister, true, false);
15355 %}
15356 ins_pipe(pipe_slow);
15357 %}
15358
15359 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15360 Universe dummy, rFlagsReg cr)
15361 %{
15362 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15363 match(Set dummy (ClearArray (Binary cnt base) val));
15364 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15365
15366 format %{ $$template
15367 if (UseXMMForObjInit) {
15368 $$emit$$"movdq $tmp, $val\n\t"
15369 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15370 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15371 $$emit$$"jmpq L_zero_64_bytes\n\t"
15372 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15373 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15374 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15375 $$emit$$"add 0x40,rax\n\t"
15376 $$emit$$"# L_zero_64_bytes:\n\t"
15377 $$emit$$"sub 0x8,rcx\n\t"
15378 $$emit$$"jge L_loop\n\t"
15379 $$emit$$"add 0x4,rcx\n\t"
15380 $$emit$$"jl L_tail\n\t"
15381 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15382 $$emit$$"add 0x20,rax\n\t"
15383 $$emit$$"sub 0x4,rcx\n\t"
15384 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15385 $$emit$$"add 0x4,rcx\n\t"
15386 $$emit$$"jle L_end\n\t"
15387 $$emit$$"dec rcx\n\t"
15388 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15389 $$emit$$"vmovq xmm0,(rax)\n\t"
15390 $$emit$$"add 0x8,rax\n\t"
15391 $$emit$$"dec rcx\n\t"
15392 $$emit$$"jge L_sloop\n\t"
15393 $$emit$$"# L_end:\n\t"
15394 } else {
15395 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15396 }
15397 %}
15398 ins_encode %{
15399 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15400 $tmp$$XMMRegister, true, true);
15401 %}
15402 ins_pipe(pipe_slow);
15403 %}
15404
15405 // Large non-constant length ClearArray for AVX512 targets.
15406 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15407 Universe dummy, rFlagsReg cr)
15408 %{
15409 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15410 match(Set dummy (ClearArray (Binary cnt base) val));
15411 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15412
15413 format %{ $$template
15414 if (UseFastStosb) {
15415 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15416 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15417 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15418 } else if (UseXMMForObjInit) {
15419 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15420 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15421 $$emit$$"jmpq L_zero_64_bytes\n\t"
15422 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15423 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15424 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15425 $$emit$$"add 0x40,rax\n\t"
15426 $$emit$$"# L_zero_64_bytes:\n\t"
15427 $$emit$$"sub 0x8,rcx\n\t"
15428 $$emit$$"jge L_loop\n\t"
15429 $$emit$$"add 0x4,rcx\n\t"
15430 $$emit$$"jl L_tail\n\t"
15431 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15432 $$emit$$"add 0x20,rax\n\t"
15433 $$emit$$"sub 0x4,rcx\n\t"
15434 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15435 $$emit$$"add 0x4,rcx\n\t"
15436 $$emit$$"jle L_end\n\t"
15437 $$emit$$"dec rcx\n\t"
15438 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15439 $$emit$$"vmovq xmm0,(rax)\n\t"
15440 $$emit$$"add 0x8,rax\n\t"
15441 $$emit$$"dec rcx\n\t"
15442 $$emit$$"jge L_sloop\n\t"
15443 $$emit$$"# L_end:\n\t"
15444 } else {
15445 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15446 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15447 }
15448 %}
15449 ins_encode %{
15450 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15451 $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
15452 %}
15453 ins_pipe(pipe_slow);
15454 %}
15455
15456 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15457 Universe dummy, rFlagsReg cr)
15458 %{
15459 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15460 match(Set dummy (ClearArray (Binary cnt base) val));
15461 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15462
15463 format %{ $$template
15464 if (UseFastStosb) {
15465 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15466 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15467 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15468 } else if (UseXMMForObjInit) {
15469 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15470 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15471 $$emit$$"jmpq L_zero_64_bytes\n\t"
15472 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15473 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15474 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15475 $$emit$$"add 0x40,rax\n\t"
15476 $$emit$$"# L_zero_64_bytes:\n\t"
15477 $$emit$$"sub 0x8,rcx\n\t"
15478 $$emit$$"jge L_loop\n\t"
15479 $$emit$$"add 0x4,rcx\n\t"
15480 $$emit$$"jl L_tail\n\t"
15481 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15482 $$emit$$"add 0x20,rax\n\t"
15483 $$emit$$"sub 0x4,rcx\n\t"
15484 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15485 $$emit$$"add 0x4,rcx\n\t"
15486 $$emit$$"jle L_end\n\t"
15487 $$emit$$"dec rcx\n\t"
15488 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15489 $$emit$$"vmovq xmm0,(rax)\n\t"
15490 $$emit$$"add 0x8,rax\n\t"
15491 $$emit$$"dec rcx\n\t"
15492 $$emit$$"jge L_sloop\n\t"
15493 $$emit$$"# L_end:\n\t"
15494 } else {
15495 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15496 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15497 }
15498 %}
15499 ins_encode %{
15500 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15501 $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
15502 %}
15503 ins_pipe(pipe_slow);
15504 %}
15505
15506 // Small constant length ClearArray for AVX512 targets.
15507 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
15508 %{
15509 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
15510 ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
15511 match(Set dummy (ClearArray (Binary cnt base) val));
15512 ins_cost(100);
15513 effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
15514 format %{ "clear_mem_imm $base , $cnt \n\t" %}
15515 ins_encode %{
15516 __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15517 %}
15518 ins_pipe(pipe_slow);
15519 %}
15520
15521 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15522 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15523 %{
15524 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15525 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15526 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15527
15528 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15529 ins_encode %{
15530 __ string_compare($str1$$Register, $str2$$Register,
15531 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15532 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15533 %}
15534 ins_pipe( pipe_slow );
15535 %}
15536
17341 effect(USE meth);
17342
17343 ins_cost(300);
17344 format %{ "call_leaf,runtime " %}
17345 ins_encode(clear_avx, Java_To_Runtime(meth));
17346 ins_pipe(pipe_slow);
17347 %}
17348
17349 // Call runtime without safepoint and with vector arguments
17350 instruct CallLeafDirectVector(method meth)
17351 %{
17352 match(CallLeafVector);
17353 effect(USE meth);
17354
17355 ins_cost(300);
17356 format %{ "call_leaf,vector " %}
17357 ins_encode(Java_To_Runtime(meth));
17358 ins_pipe(pipe_slow);
17359 %}
17360
17361 // Call runtime without safepoint
17362 // entry point is null, target holds the address to call
17363 instruct CallLeafNoFPInDirect(rRegP target)
17364 %{
17365 predicate(n->as_Call()->entry_point() == nullptr);
17366 match(CallLeafNoFP target);
17367
17368 ins_cost(300);
17369 format %{ "call_leaf_nofp,runtime indirect " %}
17370 ins_encode %{
17371 __ call($target$$Register);
17372 %}
17373
17374 ins_pipe(pipe_slow);
17375 %}
17376
17377 // Call runtime without safepoint
17378 instruct CallLeafNoFPDirect(method meth)
17379 %{
17380 predicate(n->as_Call()->entry_point() != nullptr);
17381 match(CallLeafNoFP);
17382 effect(USE meth);
17383
17384 ins_cost(300);
17385 format %{ "call_leaf_nofp,runtime " %}
17386 ins_encode(clear_avx, Java_To_Runtime(meth));
17387 ins_pipe(pipe_slow);
17388 %}
17389
17390 // Return Instruction
17391 // Remove the return address & jump to it.
17392 // Notice: We always emit a nop after a ret to make sure there is room
17393 // for safepoint patching
17394 instruct Ret()
17395 %{
17396 match(Return);
17397
17398 format %{ "ret" %}
17399 ins_encode %{
17400 __ ret(0);
|