1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 int offset = 13; // movq r10,#addr; callq (r10)
1653 if (this->ideal_Opcode() != Op_CallLeafVector) {
1654 offset += clear_avx_size();
1655 }
1656 return offset;
1657 }
1658 //
1659 // Compute padding required for nodes which need alignment
1660 //
1661
1662 // The address of the call instruction needs to be 4-byte aligned to
1663 // ensure that it does not span a cache line so that it can be patched.
1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1665 {
1666 current_offset += clear_avx_size(); // skip vzeroupper
1667 current_offset += 1; // skip call opcode byte
1668 return align_up(current_offset, alignment_required()) - current_offset;
1669 }
1670
1671 // The address of the call instruction needs to be 4-byte aligned to
1672 // ensure that it does not span a cache line so that it can be patched.
1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
1674 {
1675 current_offset += clear_avx_size(); // skip vzeroupper
1676 current_offset += 11; // skip movq instruction + call opcode byte
1677 return align_up(current_offset, alignment_required()) - current_offset;
1863 st->print("\n\t");
1864 st->print("# stack alignment check");
1865 #endif
1866 }
1867 if (C->stub_function() != nullptr) {
1868 st->print("\n\t");
1869 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1870 st->print("\n\t");
1871 st->print("je fast_entry\t");
1872 st->print("\n\t");
1873 st->print("call #nmethod_entry_barrier_stub\t");
1874 st->print("\n\tfast_entry:");
1875 }
1876 st->cr();
1877 }
1878 #endif
1879
1880 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1881 Compile* C = ra_->C;
1882
1883 int framesize = C->output()->frame_size_in_bytes();
1884 int bangsize = C->output()->bang_size_in_bytes();
1885
1886 if (C->clinit_barrier_on_entry()) {
1887 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
1888 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
1889
1890 Label L_skip_barrier;
1891 Register klass = rscratch1;
1892
1893 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
1894 __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
1895
1896 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
1897
1898 __ bind(L_skip_barrier);
1899 }
1900
1901 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
1902
1903 C->output()->set_frame_complete(__ offset());
1904
1905 if (C->has_mach_constant_base_node()) {
1906 // NOTE: We set the table base offset here because users might be
1907 // emitted before MachConstantBaseNode.
1908 ConstantTable& constant_table = C->output()->constant_table();
1909 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1910 }
1911 }
1912
1913 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1914 {
1915 return MachNode::size(ra_); // too many variables; just compute it
1916 // the hard way
1917 }
1918
1919 int MachPrologNode::reloc() const
1920 {
1921 return 0; // a large enough number
1922 }
1923
1924 //=============================================================================
1925 #ifndef PRODUCT
1926 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1927 {
1928 Compile* C = ra_->C;
1929 if (generate_vzeroupper(C)) {
1930 st->print("vzeroupper");
1931 st->cr(); st->print("\t");
1932 }
1933
1934 int framesize = C->output()->frame_size_in_bytes();
1935 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1936 // Remove word for return adr already pushed
1937 // and RBP
1945 st->print_cr("popq rbp");
1946 if (do_polling() && C->is_method_compilation()) {
1947 st->print("\t");
1948 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1949 "ja #safepoint_stub\t"
1950 "# Safepoint: poll for GC");
1951 }
1952 }
1953 #endif
1954
1955 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1956 {
1957 Compile* C = ra_->C;
1958
1959 if (generate_vzeroupper(C)) {
1960 // Clear upper bits of YMM registers when current compiled code uses
1961 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1962 __ vzeroupper();
1963 }
1964
1965 int framesize = C->output()->frame_size_in_bytes();
1966 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1967 // Remove word for return adr already pushed
1968 // and RBP
1969 framesize -= 2*wordSize;
1970
1971 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1972
1973 if (framesize) {
1974 __ addq(rsp, framesize);
1975 }
1976
1977 __ popq(rbp);
1978
1979 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1980 __ reserved_stack_check();
1981 }
1982
1983 if (do_polling() && C->is_method_compilation()) {
1984 Label dummy_label;
1985 Label* code_stub = &dummy_label;
1986 if (!C->output()->in_scratch_emit_size()) {
1987 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1988 C->output()->add_stub(stub);
1989 code_stub = &stub->entry();
1990 }
1991 __ relocate(relocInfo::poll_return_type);
1992 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
1993 }
1994 }
1995
1996 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
1997 {
1998 return MachNode::size(ra_); // too many variables; just compute it
1999 // the hard way
2000 }
2001
2002 int MachEpilogNode::reloc() const
2003 {
2004 return 2; // a large enough number
2005 }
2006
2007 const Pipeline* MachEpilogNode::pipeline() const
2008 {
2009 return MachNode::pipeline_class();
2010 }
2011
2012 //=============================================================================
2013
2014 enum RC {
2015 rc_bad,
2016 rc_int,
2017 rc_kreg,
2018 rc_float,
2019 rc_stack
2020 };
2021
2583 #endif
2584
2585 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2586 {
2587 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2588 int reg = ra_->get_encode(this);
2589
2590 __ lea(as_Register(reg), Address(rsp, offset));
2591 }
2592
2593 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2594 {
2595 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2596 if (ra_->get_encode(this) > 15) {
2597 return (offset < 0x80) ? 6 : 9; // REX2
2598 } else {
2599 return (offset < 0x80) ? 5 : 8; // REX
2600 }
2601 }
2602
2603 //=============================================================================
2604 #ifndef PRODUCT
2605 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2606 {
2607 if (UseCompressedClassPointers) {
2608 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2609 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2610 } else {
2611 st->print_cr("movq rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2612 st->print_cr("\tcmpq rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2613 }
2614 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2615 }
2616 #endif
2617
2618 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2619 {
2620 __ ic_check(InteriorEntryAlignment);
2621 }
2622
2623 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
2624 {
2625 return MachNode::size(ra_); // too many variables; just compute it
2626 // the hard way
2627 }
2628
2629
2630 //=============================================================================
2631
2632 bool Matcher::supports_vector_calling_convention(void) {
2633 return EnableVectorSupport;
2634 }
2635
2636 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
2637 assert(EnableVectorSupport, "sanity");
2638 int lo = XMM0_num;
2639 int hi = XMM0b_num;
2640 if (ideal_reg == Op_VecX) hi = XMM0d_num;
2641 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
2642 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
2643 return OptoRegPair(hi, lo);
2644 }
2645
2646 // Is this branch offset short enough that a short branch can be used?
2647 //
2648 // NOTE: If the platform does not provide any short branch variants, then
4512 }
4513 __ post_call_nop();
4514 %}
4515
4516 enc_class Java_Dynamic_Call(method meth) %{
4517 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4518 __ post_call_nop();
4519 %}
4520
4521 enc_class call_epilog %{
4522 if (VerifyStackAtCalls) {
4523 // Check that stack depth is unchanged: find majik cookie on stack
4524 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4525 Label L;
4526 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4527 __ jccb(Assembler::equal, L);
4528 // Die if stack mismatch
4529 __ int3();
4530 __ bind(L);
4531 }
4532 %}
4533
4534 %}
4535
4536 //----------FRAME--------------------------------------------------------------
4537 // Definition of frame structure and management information.
4538 //
4539 // S T A C K L A Y O U T Allocators stack-slot number
4540 // | (to get allocators register number
4541 // G Owned by | | v add OptoReg::stack0())
4542 // r CALLER | |
4543 // o | +--------+ pad to even-align allocators stack-slot
4544 // w V | pad0 | numbers; owned by CALLER
4545 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4546 // h ^ | in | 5
4547 // | | args | 4 Holes in incoming args owned by SELF
4548 // | | | | 3
4549 // | | +--------+
4550 // V | | old out| Empty on Intel, window on Sparc
4551 // | old |preserve| Must be even aligned.
5674 %}
5675 %}
5676
5677 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5678 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5679 %{
5680 constraint(ALLOC_IN_RC(ptr_reg));
5681 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5682 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5683
5684 op_cost(10);
5685 format %{"[$reg + $off + $idx << $scale]" %}
5686 interface(MEMORY_INTER) %{
5687 base($reg);
5688 index($idx);
5689 scale($scale);
5690 disp($off);
5691 %}
5692 %}
5693
5694 // Indirect Narrow Oop Plus Offset Operand
5695 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5696 // we can't free r12 even with CompressedOops::base() == nullptr.
5697 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5698 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5699 constraint(ALLOC_IN_RC(ptr_reg));
5700 match(AddP (DecodeN reg) off);
5701
5702 op_cost(10);
5703 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5704 interface(MEMORY_INTER) %{
5705 base(0xc); // R12
5706 index($reg);
5707 scale(0x3);
5708 disp($off);
5709 %}
5710 %}
5711
5712 // Indirect Memory Operand
5713 operand indirectNarrow(rRegN reg)
6150 %}
6151
6152 // Replaces legVec during post-selection cleanup. See above.
6153 operand legVecZ() %{
6154 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6155 match(VecZ);
6156
6157 format %{ %}
6158 interface(REG_INTER);
6159 %}
6160
6161 //----------OPERAND CLASSES----------------------------------------------------
6162 // Operand Classes are groups of operands that are used as to simplify
6163 // instruction definitions by not requiring the AD writer to specify separate
6164 // instructions for every form of operand when the instruction accepts
6165 // multiple operand types with the same basic encoding and format. The classic
6166 // case of this is memory operands.
6167
6168 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6169 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6170 indCompressedOopOffset,
6171 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6172 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6173 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6174
6175 //----------PIPELINE-----------------------------------------------------------
6176 // Rules which define the behavior of the target architectures pipeline.
6177 pipeline %{
6178
6179 //----------ATTRIBUTES---------------------------------------------------------
6180 attributes %{
6181 variable_size_instructions; // Fixed size instructions
6182 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6183 instruction_unit_size = 1; // An instruction is 1 bytes long
6184 instruction_fetch_unit_size = 16; // The processor fetches one line
6185 instruction_fetch_units = 1; // of 16 bytes
6186 %}
6187
6188 //----------RESOURCES----------------------------------------------------------
6189 // Resources are the functional units available to the machine
6190
8748 format %{ "MEMBAR-storestore (empty encoding)" %}
8749 ins_encode( );
8750 ins_pipe(empty);
8751 %}
8752
8753 //----------Move Instructions--------------------------------------------------
8754
8755 instruct castX2P(rRegP dst, rRegL src)
8756 %{
8757 match(Set dst (CastX2P src));
8758
8759 format %{ "movq $dst, $src\t# long->ptr" %}
8760 ins_encode %{
8761 if ($dst$$reg != $src$$reg) {
8762 __ movptr($dst$$Register, $src$$Register);
8763 }
8764 %}
8765 ins_pipe(ialu_reg_reg); // XXX
8766 %}
8767
8768 instruct castP2X(rRegL dst, rRegP src)
8769 %{
8770 match(Set dst (CastP2X src));
8771
8772 format %{ "movq $dst, $src\t# ptr -> long" %}
8773 ins_encode %{
8774 if ($dst$$reg != $src$$reg) {
8775 __ movptr($dst$$Register, $src$$Register);
8776 }
8777 %}
8778 ins_pipe(ialu_reg_reg); // XXX
8779 %}
8780
8781 // Convert oop into int for vectors alignment masking
8782 instruct convP2I(rRegI dst, rRegP src)
8783 %{
8784 match(Set dst (ConvL2I (CastP2X src)));
8785
8786 format %{ "movl $dst, $src\t# ptr -> int" %}
8787 ins_encode %{
14971 effect(DEF dst, USE src);
14972 ins_cost(100);
14973 format %{ "movd $dst,$src\t# MoveI2F" %}
14974 ins_encode %{
14975 __ movdl($dst$$XMMRegister, $src$$Register);
14976 %}
14977 ins_pipe( pipe_slow );
14978 %}
14979
14980 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
14981 match(Set dst (MoveL2D src));
14982 effect(DEF dst, USE src);
14983 ins_cost(100);
14984 format %{ "movd $dst,$src\t# MoveL2D" %}
14985 ins_encode %{
14986 __ movdq($dst$$XMMRegister, $src$$Register);
14987 %}
14988 ins_pipe( pipe_slow );
14989 %}
14990
14991 // Fast clearing of an array
14992 // Small non-constant lenght ClearArray for non-AVX512 targets.
14993 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
14994 Universe dummy, rFlagsReg cr)
14995 %{
14996 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
14997 match(Set dummy (ClearArray cnt base));
14998 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
14999
15000 format %{ $$template
15001 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15002 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15003 $$emit$$"jg LARGE\n\t"
15004 $$emit$$"dec rcx\n\t"
15005 $$emit$$"js DONE\t# Zero length\n\t"
15006 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15007 $$emit$$"dec rcx\n\t"
15008 $$emit$$"jge LOOP\n\t"
15009 $$emit$$"jmp DONE\n\t"
15010 $$emit$$"# LARGE:\n\t"
15011 if (UseFastStosb) {
15012 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15013 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15014 } else if (UseXMMForObjInit) {
15015 $$emit$$"mov rdi,rax\n\t"
15016 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15017 $$emit$$"jmpq L_zero_64_bytes\n\t"
15018 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15026 $$emit$$"jl L_tail\n\t"
15027 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15028 $$emit$$"add 0x20,rax\n\t"
15029 $$emit$$"sub 0x4,rcx\n\t"
15030 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15031 $$emit$$"add 0x4,rcx\n\t"
15032 $$emit$$"jle L_end\n\t"
15033 $$emit$$"dec rcx\n\t"
15034 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15035 $$emit$$"vmovq xmm0,(rax)\n\t"
15036 $$emit$$"add 0x8,rax\n\t"
15037 $$emit$$"dec rcx\n\t"
15038 $$emit$$"jge L_sloop\n\t"
15039 $$emit$$"# L_end:\n\t"
15040 } else {
15041 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15042 }
15043 $$emit$$"# DONE"
15044 %}
15045 ins_encode %{
15046 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15047 $tmp$$XMMRegister, false, knoreg);
15048 %}
15049 ins_pipe(pipe_slow);
15050 %}
15051
15052 // Small non-constant length ClearArray for AVX512 targets.
15053 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15054 Universe dummy, rFlagsReg cr)
15055 %{
15056 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15057 match(Set dummy (ClearArray cnt base));
15058 ins_cost(125);
15059 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15060
15061 format %{ $$template
15062 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15063 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15064 $$emit$$"jg LARGE\n\t"
15065 $$emit$$"dec rcx\n\t"
15066 $$emit$$"js DONE\t# Zero length\n\t"
15067 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15068 $$emit$$"dec rcx\n\t"
15069 $$emit$$"jge LOOP\n\t"
15070 $$emit$$"jmp DONE\n\t"
15071 $$emit$$"# LARGE:\n\t"
15072 if (UseFastStosb) {
15073 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15074 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15075 } else if (UseXMMForObjInit) {
15076 $$emit$$"mov rdi,rax\n\t"
15077 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15078 $$emit$$"jmpq L_zero_64_bytes\n\t"
15079 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15087 $$emit$$"jl L_tail\n\t"
15088 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15089 $$emit$$"add 0x20,rax\n\t"
15090 $$emit$$"sub 0x4,rcx\n\t"
15091 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15092 $$emit$$"add 0x4,rcx\n\t"
15093 $$emit$$"jle L_end\n\t"
15094 $$emit$$"dec rcx\n\t"
15095 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15096 $$emit$$"vmovq xmm0,(rax)\n\t"
15097 $$emit$$"add 0x8,rax\n\t"
15098 $$emit$$"dec rcx\n\t"
15099 $$emit$$"jge L_sloop\n\t"
15100 $$emit$$"# L_end:\n\t"
15101 } else {
15102 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15103 }
15104 $$emit$$"# DONE"
15105 %}
15106 ins_encode %{
15107 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15108 $tmp$$XMMRegister, false, $ktmp$$KRegister);
15109 %}
15110 ins_pipe(pipe_slow);
15111 %}
15112
15113 // Large non-constant length ClearArray for non-AVX512 targets.
15114 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15115 Universe dummy, rFlagsReg cr)
15116 %{
15117 predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15118 match(Set dummy (ClearArray cnt base));
15119 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15120
15121 format %{ $$template
15122 if (UseFastStosb) {
15123 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15124 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15125 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15126 } else if (UseXMMForObjInit) {
15127 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15128 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15129 $$emit$$"jmpq L_zero_64_bytes\n\t"
15130 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15131 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15132 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15133 $$emit$$"add 0x40,rax\n\t"
15134 $$emit$$"# L_zero_64_bytes:\n\t"
15135 $$emit$$"sub 0x8,rcx\n\t"
15136 $$emit$$"jge L_loop\n\t"
15137 $$emit$$"add 0x4,rcx\n\t"
15138 $$emit$$"jl L_tail\n\t"
15139 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15140 $$emit$$"add 0x20,rax\n\t"
15141 $$emit$$"sub 0x4,rcx\n\t"
15142 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15143 $$emit$$"add 0x4,rcx\n\t"
15144 $$emit$$"jle L_end\n\t"
15145 $$emit$$"dec rcx\n\t"
15146 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15147 $$emit$$"vmovq xmm0,(rax)\n\t"
15148 $$emit$$"add 0x8,rax\n\t"
15149 $$emit$$"dec rcx\n\t"
15150 $$emit$$"jge L_sloop\n\t"
15151 $$emit$$"# L_end:\n\t"
15152 } else {
15153 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15154 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15155 }
15156 %}
15157 ins_encode %{
15158 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15159 $tmp$$XMMRegister, true, knoreg);
15160 %}
15161 ins_pipe(pipe_slow);
15162 %}
15163
15164 // Large non-constant length ClearArray for AVX512 targets.
15165 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15166 Universe dummy, rFlagsReg cr)
15167 %{
15168 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15169 match(Set dummy (ClearArray cnt base));
15170 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15171
15172 format %{ $$template
15173 if (UseFastStosb) {
15174 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15175 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15176 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15177 } else if (UseXMMForObjInit) {
15178 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15179 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15180 $$emit$$"jmpq L_zero_64_bytes\n\t"
15181 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15182 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15183 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15184 $$emit$$"add 0x40,rax\n\t"
15185 $$emit$$"# L_zero_64_bytes:\n\t"
15186 $$emit$$"sub 0x8,rcx\n\t"
15187 $$emit$$"jge L_loop\n\t"
15188 $$emit$$"add 0x4,rcx\n\t"
15189 $$emit$$"jl L_tail\n\t"
15190 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15191 $$emit$$"add 0x20,rax\n\t"
15192 $$emit$$"sub 0x4,rcx\n\t"
15193 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15194 $$emit$$"add 0x4,rcx\n\t"
15195 $$emit$$"jle L_end\n\t"
15196 $$emit$$"dec rcx\n\t"
15197 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15198 $$emit$$"vmovq xmm0,(rax)\n\t"
15199 $$emit$$"add 0x8,rax\n\t"
15200 $$emit$$"dec rcx\n\t"
15201 $$emit$$"jge L_sloop\n\t"
15202 $$emit$$"# L_end:\n\t"
15203 } else {
15204 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15205 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15206 }
15207 %}
15208 ins_encode %{
15209 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15210 $tmp$$XMMRegister, true, $ktmp$$KRegister);
15211 %}
15212 ins_pipe(pipe_slow);
15213 %}
15214
15215 // Small constant length ClearArray for AVX512 targets.
15216 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15217 %{
15218 predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15219 match(Set dummy (ClearArray cnt base));
15220 ins_cost(100);
15221 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15222 format %{ "clear_mem_imm $base , $cnt \n\t" %}
15223 ins_encode %{
15224 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15225 %}
15226 ins_pipe(pipe_slow);
15227 %}
15228
15229 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15230 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15231 %{
15232 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15233 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15234 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15235
15236 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15237 ins_encode %{
15238 __ string_compare($str1$$Register, $str2$$Register,
15239 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15240 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15241 %}
15242 ins_pipe( pipe_slow );
15243 %}
15244
17049 effect(USE meth);
17050
17051 ins_cost(300);
17052 format %{ "call_leaf,runtime " %}
17053 ins_encode(clear_avx, Java_To_Runtime(meth));
17054 ins_pipe(pipe_slow);
17055 %}
17056
17057 // Call runtime without safepoint and with vector arguments
17058 instruct CallLeafDirectVector(method meth)
17059 %{
17060 match(CallLeafVector);
17061 effect(USE meth);
17062
17063 ins_cost(300);
17064 format %{ "call_leaf,vector " %}
17065 ins_encode(Java_To_Runtime(meth));
17066 ins_pipe(pipe_slow);
17067 %}
17068
17069 // Call runtime without safepoint
17070 instruct CallLeafNoFPDirect(method meth)
17071 %{
17072 match(CallLeafNoFP);
17073 effect(USE meth);
17074
17075 ins_cost(300);
17076 format %{ "call_leaf_nofp,runtime " %}
17077 ins_encode(clear_avx, Java_To_Runtime(meth));
17078 ins_pipe(pipe_slow);
17079 %}
17080
17081 // Return Instruction
17082 // Remove the return address & jump to it.
17083 // Notice: We always emit a nop after a ret to make sure there is room
17084 // for safepoint patching
17085 instruct Ret()
17086 %{
17087 match(Return);
17088
17089 format %{ "ret" %}
17090 ins_encode %{
17091 __ ret(0);
|
1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 if (_entry_point == nullptr) {
1653 // CallLeafNoFPInDirect
1654 return 3; // callq (register)
1655 }
1656 int offset = 13; // movq r10,#addr; callq (r10)
1657 if (this->ideal_Opcode() != Op_CallLeafVector) {
1658 offset += clear_avx_size();
1659 }
1660 return offset;
1661 }
1662
1663 //
1664 // Compute padding required for nodes which need alignment
1665 //
1666
1667 // The address of the call instruction needs to be 4-byte aligned to
1668 // ensure that it does not span a cache line so that it can be patched.
1669 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1670 {
1671 current_offset += clear_avx_size(); // skip vzeroupper
1672 current_offset += 1; // skip call opcode byte
1673 return align_up(current_offset, alignment_required()) - current_offset;
1674 }
1675
1676 // The address of the call instruction needs to be 4-byte aligned to
1677 // ensure that it does not span a cache line so that it can be patched.
1678 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
1679 {
1680 current_offset += clear_avx_size(); // skip vzeroupper
1681 current_offset += 11; // skip movq instruction + call opcode byte
1682 return align_up(current_offset, alignment_required()) - current_offset;
1868 st->print("\n\t");
1869 st->print("# stack alignment check");
1870 #endif
1871 }
1872 if (C->stub_function() != nullptr) {
1873 st->print("\n\t");
1874 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1875 st->print("\n\t");
1876 st->print("je fast_entry\t");
1877 st->print("\n\t");
1878 st->print("call #nmethod_entry_barrier_stub\t");
1879 st->print("\n\tfast_entry:");
1880 }
1881 st->cr();
1882 }
1883 #endif
1884
1885 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1886 Compile* C = ra_->C;
1887
1888 __ verified_entry(C);
1889
1890 if (ra_->C->stub_function() == nullptr) {
1891 __ entry_barrier();
1892 }
1893
1894 if (!Compile::current()->output()->in_scratch_emit_size()) {
1895 __ bind(*_verified_entry);
1896 }
1897
1898 C->output()->set_frame_complete(__ offset());
1899
1900 if (C->has_mach_constant_base_node()) {
1901 // NOTE: We set the table base offset here because users might be
1902 // emitted before MachConstantBaseNode.
1903 ConstantTable& constant_table = C->output()->constant_table();
1904 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1905 }
1906 }
1907
1908
1909 int MachPrologNode::reloc() const
1910 {
1911 return 0; // a large enough number
1912 }
1913
1914 //=============================================================================
1915 #ifndef PRODUCT
1916 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1917 {
1918 Compile* C = ra_->C;
1919 if (generate_vzeroupper(C)) {
1920 st->print("vzeroupper");
1921 st->cr(); st->print("\t");
1922 }
1923
1924 int framesize = C->output()->frame_size_in_bytes();
1925 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1926 // Remove word for return adr already pushed
1927 // and RBP
1935 st->print_cr("popq rbp");
1936 if (do_polling() && C->is_method_compilation()) {
1937 st->print("\t");
1938 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1939 "ja #safepoint_stub\t"
1940 "# Safepoint: poll for GC");
1941 }
1942 }
1943 #endif
1944
1945 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1946 {
1947 Compile* C = ra_->C;
1948
1949 if (generate_vzeroupper(C)) {
1950 // Clear upper bits of YMM registers when current compiled code uses
1951 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1952 __ vzeroupper();
1953 }
1954
1955 // Subtract two words to account for return address and rbp
1956 int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
1957 __ remove_frame(initial_framesize, C->needs_stack_repair());
1958
1959 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1960 __ reserved_stack_check();
1961 }
1962
1963 if (do_polling() && C->is_method_compilation()) {
1964 Label dummy_label;
1965 Label* code_stub = &dummy_label;
1966 if (!C->output()->in_scratch_emit_size()) {
1967 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1968 C->output()->add_stub(stub);
1969 code_stub = &stub->entry();
1970 }
1971 __ relocate(relocInfo::poll_return_type);
1972 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
1973 }
1974 }
1975
1976 int MachEpilogNode::reloc() const
1977 {
1978 return 2; // a large enough number
1979 }
1980
1981 const Pipeline* MachEpilogNode::pipeline() const
1982 {
1983 return MachNode::pipeline_class();
1984 }
1985
1986 //=============================================================================
1987
1988 enum RC {
1989 rc_bad,
1990 rc_int,
1991 rc_kreg,
1992 rc_float,
1993 rc_stack
1994 };
1995
2557 #endif
2558
2559 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2560 {
2561 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2562 int reg = ra_->get_encode(this);
2563
2564 __ lea(as_Register(reg), Address(rsp, offset));
2565 }
2566
2567 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2568 {
2569 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2570 if (ra_->get_encode(this) > 15) {
2571 return (offset < 0x80) ? 6 : 9; // REX2
2572 } else {
2573 return (offset < 0x80) ? 5 : 8; // REX
2574 }
2575 }
2576
2577 //=============================================================================
2578 #ifndef PRODUCT
2579 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2580 {
2581 st->print_cr("MachVEPNode");
2582 }
2583 #endif
2584
2585 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2586 {
2587 CodeBuffer* cbuf = masm->code();
2588 uint insts_size = cbuf->insts_size();
2589 if (!_verified) {
2590 __ ic_check(1);
2591 } else {
2592 // TODO 8284443 Avoid creation of temporary frame
2593 if (ra_->C->stub_function() == nullptr) {
2594 __ verified_entry(ra_->C, 0);
2595 __ entry_barrier();
2596 int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
2597 __ remove_frame(initial_framesize, false);
2598 }
2599 // Unpack inline type args passed as oop and then jump to
2600 // the verified entry point (skipping the unverified entry).
2601 int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
2602 // Emit code for verified entry and save increment for stack repair on return
2603 __ verified_entry(ra_->C, sp_inc);
2604 if (Compile::current()->output()->in_scratch_emit_size()) {
2605 Label dummy_verified_entry;
2606 __ jmp(dummy_verified_entry);
2607 } else {
2608 __ jmp(*_verified_entry);
2609 }
2610 }
2611 /* WARNING these NOPs are critical so that verified entry point is properly
2612 4 bytes aligned for patching by NativeJump::patch_verified_entry() */
2613 int nops_cnt = 4 - ((cbuf->insts_size() - insts_size) & 0x3);
2614 nops_cnt &= 0x3; // Do not add nops if code is aligned.
2615 if (nops_cnt > 0) {
2616 __ nop(nops_cnt);
2617 }
2618 }
2619
2620 //=============================================================================
2621 #ifndef PRODUCT
2622 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2623 {
2624 if (UseCompressedClassPointers) {
2625 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2626 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2627 } else {
2628 st->print_cr("movq rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2629 st->print_cr("\tcmpq rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2630 }
2631 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2632 }
2633 #endif
2634
2635 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2636 {
2637 __ ic_check(InteriorEntryAlignment);
2638 }
2639
2640
2641 //=============================================================================
2642
2643 bool Matcher::supports_vector_calling_convention(void) {
2644 return EnableVectorSupport;
2645 }
2646
2647 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
2648 assert(EnableVectorSupport, "sanity");
2649 int lo = XMM0_num;
2650 int hi = XMM0b_num;
2651 if (ideal_reg == Op_VecX) hi = XMM0d_num;
2652 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
2653 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
2654 return OptoRegPair(hi, lo);
2655 }
2656
2657 // Is this branch offset short enough that a short branch can be used?
2658 //
2659 // NOTE: If the platform does not provide any short branch variants, then
4523 }
4524 __ post_call_nop();
4525 %}
4526
4527 enc_class Java_Dynamic_Call(method meth) %{
4528 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4529 __ post_call_nop();
4530 %}
4531
4532 enc_class call_epilog %{
4533 if (VerifyStackAtCalls) {
4534 // Check that stack depth is unchanged: find majik cookie on stack
4535 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4536 Label L;
4537 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4538 __ jccb(Assembler::equal, L);
4539 // Die if stack mismatch
4540 __ int3();
4541 __ bind(L);
4542 }
4543 if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) {
4544 // The last return value is not set by the callee but used to pass the null marker to compiled code.
4545 // Search for the corresponding projection, get the register and emit code that initialized it.
4546 uint con = (tf()->range_cc()->cnt() - 1);
4547 for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
4548 ProjNode* proj = fast_out(i)->as_Proj();
4549 if (proj->_con == con) {
4550 // Set null marker if rax is non-null (a non-null value is returned buffered or scalarized)
4551 OptoReg::Name optoReg = ra_->get_reg_first(proj);
4552 VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
4553 Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
4554 __ testq(rax, rax);
4555 __ setb(Assembler::notZero, toReg);
4556 __ movzbl(toReg, toReg);
4557 if (reg->is_stack()) {
4558 int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
4559 __ movq(Address(rsp, st_off), toReg);
4560 }
4561 break;
4562 }
4563 }
4564 if (return_value_is_used()) {
4565 // An inline type is returned as fields in multiple registers.
4566 // Rax either contains an oop if the inline type is buffered or a pointer
4567 // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax
4568 // if the lowest bit is set to allow C2 to use the oop after null checking.
4569 // rax &= (rax & 1) - 1
4570 __ movptr(rscratch1, rax);
4571 __ andptr(rscratch1, 0x1);
4572 __ subptr(rscratch1, 0x1);
4573 __ andptr(rax, rscratch1);
4574 }
4575 }
4576 %}
4577
4578 %}
4579
4580 //----------FRAME--------------------------------------------------------------
4581 // Definition of frame structure and management information.
4582 //
4583 // S T A C K L A Y O U T Allocators stack-slot number
4584 // | (to get allocators register number
4585 // G Owned by | | v add OptoReg::stack0())
4586 // r CALLER | |
4587 // o | +--------+ pad to even-align allocators stack-slot
4588 // w V | pad0 | numbers; owned by CALLER
4589 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4590 // h ^ | in | 5
4591 // | | args | 4 Holes in incoming args owned by SELF
4592 // | | | | 3
4593 // | | +--------+
4594 // V | | old out| Empty on Intel, window on Sparc
4595 // | old |preserve| Must be even aligned.
5718 %}
5719 %}
5720
5721 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5722 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5723 %{
5724 constraint(ALLOC_IN_RC(ptr_reg));
5725 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5726 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5727
5728 op_cost(10);
5729 format %{"[$reg + $off + $idx << $scale]" %}
5730 interface(MEMORY_INTER) %{
5731 base($reg);
5732 index($idx);
5733 scale($scale);
5734 disp($off);
5735 %}
5736 %}
5737
5738 // Indirect Narrow Oop Operand
5739 operand indCompressedOop(rRegN reg) %{
5740 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5741 constraint(ALLOC_IN_RC(ptr_reg));
5742 match(DecodeN reg);
5743
5744 op_cost(10);
5745 format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
5746 interface(MEMORY_INTER) %{
5747 base(0xc); // R12
5748 index($reg);
5749 scale(0x3);
5750 disp(0x0);
5751 %}
5752 %}
5753
5754 // Indirect Narrow Oop Plus Offset Operand
5755 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5756 // we can't free r12 even with CompressedOops::base() == nullptr.
5757 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5758 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5759 constraint(ALLOC_IN_RC(ptr_reg));
5760 match(AddP (DecodeN reg) off);
5761
5762 op_cost(10);
5763 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5764 interface(MEMORY_INTER) %{
5765 base(0xc); // R12
5766 index($reg);
5767 scale(0x3);
5768 disp($off);
5769 %}
5770 %}
5771
5772 // Indirect Memory Operand
5773 operand indirectNarrow(rRegN reg)
6210 %}
6211
6212 // Replaces legVec during post-selection cleanup. See above.
6213 operand legVecZ() %{
6214 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6215 match(VecZ);
6216
6217 format %{ %}
6218 interface(REG_INTER);
6219 %}
6220
6221 //----------OPERAND CLASSES----------------------------------------------------
6222 // Operand Classes are groups of operands that are used as to simplify
6223 // instruction definitions by not requiring the AD writer to specify separate
6224 // instructions for every form of operand when the instruction accepts
6225 // multiple operand types with the same basic encoding and format. The classic
6226 // case of this is memory operands.
6227
6228 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6229 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6230 indCompressedOop, indCompressedOopOffset,
6231 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6232 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6233 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6234
6235 //----------PIPELINE-----------------------------------------------------------
6236 // Rules which define the behavior of the target architectures pipeline.
6237 pipeline %{
6238
6239 //----------ATTRIBUTES---------------------------------------------------------
6240 attributes %{
6241 variable_size_instructions; // Fixed size instructions
6242 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6243 instruction_unit_size = 1; // An instruction is 1 bytes long
6244 instruction_fetch_unit_size = 16; // The processor fetches one line
6245 instruction_fetch_units = 1; // of 16 bytes
6246 %}
6247
6248 //----------RESOURCES----------------------------------------------------------
6249 // Resources are the functional units available to the machine
6250
8808 format %{ "MEMBAR-storestore (empty encoding)" %}
8809 ins_encode( );
8810 ins_pipe(empty);
8811 %}
8812
8813 //----------Move Instructions--------------------------------------------------
8814
8815 instruct castX2P(rRegP dst, rRegL src)
8816 %{
8817 match(Set dst (CastX2P src));
8818
8819 format %{ "movq $dst, $src\t# long->ptr" %}
8820 ins_encode %{
8821 if ($dst$$reg != $src$$reg) {
8822 __ movptr($dst$$Register, $src$$Register);
8823 }
8824 %}
8825 ins_pipe(ialu_reg_reg); // XXX
8826 %}
8827
8828 instruct castI2N(rRegN dst, rRegI src)
8829 %{
8830 match(Set dst (CastI2N src));
8831
8832 format %{ "movq $dst, $src\t# int -> narrow ptr" %}
8833 ins_encode %{
8834 if ($dst$$reg != $src$$reg) {
8835 __ movl($dst$$Register, $src$$Register);
8836 }
8837 %}
8838 ins_pipe(ialu_reg_reg); // XXX
8839 %}
8840
8841 instruct castN2X(rRegL dst, rRegN src)
8842 %{
8843 match(Set dst (CastP2X src));
8844
8845 format %{ "movq $dst, $src\t# ptr -> long" %}
8846 ins_encode %{
8847 if ($dst$$reg != $src$$reg) {
8848 __ movptr($dst$$Register, $src$$Register);
8849 }
8850 %}
8851 ins_pipe(ialu_reg_reg); // XXX
8852 %}
8853
8854 instruct castP2X(rRegL dst, rRegP src)
8855 %{
8856 match(Set dst (CastP2X src));
8857
8858 format %{ "movq $dst, $src\t# ptr -> long" %}
8859 ins_encode %{
8860 if ($dst$$reg != $src$$reg) {
8861 __ movptr($dst$$Register, $src$$Register);
8862 }
8863 %}
8864 ins_pipe(ialu_reg_reg); // XXX
8865 %}
8866
8867 // Convert oop into int for vectors alignment masking
8868 instruct convP2I(rRegI dst, rRegP src)
8869 %{
8870 match(Set dst (ConvL2I (CastP2X src)));
8871
8872 format %{ "movl $dst, $src\t# ptr -> int" %}
8873 ins_encode %{
15057 effect(DEF dst, USE src);
15058 ins_cost(100);
15059 format %{ "movd $dst,$src\t# MoveI2F" %}
15060 ins_encode %{
15061 __ movdl($dst$$XMMRegister, $src$$Register);
15062 %}
15063 ins_pipe( pipe_slow );
15064 %}
15065
15066 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15067 match(Set dst (MoveL2D src));
15068 effect(DEF dst, USE src);
15069 ins_cost(100);
15070 format %{ "movd $dst,$src\t# MoveL2D" %}
15071 ins_encode %{
15072 __ movdq($dst$$XMMRegister, $src$$Register);
15073 %}
15074 ins_pipe( pipe_slow );
15075 %}
15076
15077
15078 // Fast clearing of an array
15079 // Small non-constant lenght ClearArray for non-AVX512 targets.
15080 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15081 Universe dummy, rFlagsReg cr)
15082 %{
15083 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15084 match(Set dummy (ClearArray (Binary cnt base) val));
15085 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15086
15087 format %{ $$template
15088 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15089 $$emit$$"jg LARGE\n\t"
15090 $$emit$$"dec rcx\n\t"
15091 $$emit$$"js DONE\t# Zero length\n\t"
15092 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15093 $$emit$$"dec rcx\n\t"
15094 $$emit$$"jge LOOP\n\t"
15095 $$emit$$"jmp DONE\n\t"
15096 $$emit$$"# LARGE:\n\t"
15097 if (UseFastStosb) {
15098 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15099 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15100 } else if (UseXMMForObjInit) {
15101 $$emit$$"movdq $tmp, $val\n\t"
15102 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15103 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15104 $$emit$$"jmpq L_zero_64_bytes\n\t"
15105 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15106 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15107 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15108 $$emit$$"add 0x40,rax\n\t"
15109 $$emit$$"# L_zero_64_bytes:\n\t"
15110 $$emit$$"sub 0x8,rcx\n\t"
15111 $$emit$$"jge L_loop\n\t"
15112 $$emit$$"add 0x4,rcx\n\t"
15113 $$emit$$"jl L_tail\n\t"
15114 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15115 $$emit$$"add 0x20,rax\n\t"
15116 $$emit$$"sub 0x4,rcx\n\t"
15117 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15118 $$emit$$"add 0x4,rcx\n\t"
15119 $$emit$$"jle L_end\n\t"
15120 $$emit$$"dec rcx\n\t"
15121 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15122 $$emit$$"vmovq xmm0,(rax)\n\t"
15123 $$emit$$"add 0x8,rax\n\t"
15124 $$emit$$"dec rcx\n\t"
15125 $$emit$$"jge L_sloop\n\t"
15126 $$emit$$"# L_end:\n\t"
15127 } else {
15128 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15129 }
15130 $$emit$$"# DONE"
15131 %}
15132 ins_encode %{
15133 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15134 $tmp$$XMMRegister, false, false);
15135 %}
15136 ins_pipe(pipe_slow);
15137 %}
15138
15139 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15140 Universe dummy, rFlagsReg cr)
15141 %{
15142 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15143 match(Set dummy (ClearArray (Binary cnt base) val));
15144 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15145
15146 format %{ $$template
15147 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15148 $$emit$$"jg LARGE\n\t"
15149 $$emit$$"dec rcx\n\t"
15150 $$emit$$"js DONE\t# Zero length\n\t"
15151 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15152 $$emit$$"dec rcx\n\t"
15153 $$emit$$"jge LOOP\n\t"
15154 $$emit$$"jmp DONE\n\t"
15155 $$emit$$"# LARGE:\n\t"
15156 if (UseXMMForObjInit) {
15157 $$emit$$"movdq $tmp, $val\n\t"
15158 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15159 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15160 $$emit$$"jmpq L_zero_64_bytes\n\t"
15161 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15162 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15163 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15164 $$emit$$"add 0x40,rax\n\t"
15165 $$emit$$"# L_zero_64_bytes:\n\t"
15166 $$emit$$"sub 0x8,rcx\n\t"
15167 $$emit$$"jge L_loop\n\t"
15168 $$emit$$"add 0x4,rcx\n\t"
15169 $$emit$$"jl L_tail\n\t"
15170 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15171 $$emit$$"add 0x20,rax\n\t"
15172 $$emit$$"sub 0x4,rcx\n\t"
15173 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15174 $$emit$$"add 0x4,rcx\n\t"
15175 $$emit$$"jle L_end\n\t"
15176 $$emit$$"dec rcx\n\t"
15177 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15178 $$emit$$"vmovq xmm0,(rax)\n\t"
15179 $$emit$$"add 0x8,rax\n\t"
15180 $$emit$$"dec rcx\n\t"
15181 $$emit$$"jge L_sloop\n\t"
15182 $$emit$$"# L_end:\n\t"
15183 } else {
15184 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15185 }
15186 $$emit$$"# DONE"
15187 %}
15188 ins_encode %{
15189 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15190 $tmp$$XMMRegister, false, true);
15191 %}
15192 ins_pipe(pipe_slow);
15193 %}
15194
15195 // Small non-constant length ClearArray for AVX512 targets.
15196 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15197 Universe dummy, rFlagsReg cr)
15198 %{
15199 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15200 match(Set dummy (ClearArray (Binary cnt base) val));
15201 ins_cost(125);
15202 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15203
15204 format %{ $$template
15205 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15206 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15207 $$emit$$"jg LARGE\n\t"
15208 $$emit$$"dec rcx\n\t"
15209 $$emit$$"js DONE\t# Zero length\n\t"
15210 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15211 $$emit$$"dec rcx\n\t"
15212 $$emit$$"jge LOOP\n\t"
15213 $$emit$$"jmp DONE\n\t"
15214 $$emit$$"# LARGE:\n\t"
15215 if (UseFastStosb) {
15216 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15217 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15218 } else if (UseXMMForObjInit) {
15219 $$emit$$"mov rdi,rax\n\t"
15220 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15221 $$emit$$"jmpq L_zero_64_bytes\n\t"
15222 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15230 $$emit$$"jl L_tail\n\t"
15231 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15232 $$emit$$"add 0x20,rax\n\t"
15233 $$emit$$"sub 0x4,rcx\n\t"
15234 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15235 $$emit$$"add 0x4,rcx\n\t"
15236 $$emit$$"jle L_end\n\t"
15237 $$emit$$"dec rcx\n\t"
15238 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15239 $$emit$$"vmovq xmm0,(rax)\n\t"
15240 $$emit$$"add 0x8,rax\n\t"
15241 $$emit$$"dec rcx\n\t"
15242 $$emit$$"jge L_sloop\n\t"
15243 $$emit$$"# L_end:\n\t"
15244 } else {
15245 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15246 }
15247 $$emit$$"# DONE"
15248 %}
15249 ins_encode %{
15250 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15251 $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
15252 %}
15253 ins_pipe(pipe_slow);
15254 %}
15255
15256 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15257 Universe dummy, rFlagsReg cr)
15258 %{
15259 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15260 match(Set dummy (ClearArray (Binary cnt base) val));
15261 ins_cost(125);
15262 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15263
15264 format %{ $$template
15265 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15266 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15267 $$emit$$"jg LARGE\n\t"
15268 $$emit$$"dec rcx\n\t"
15269 $$emit$$"js DONE\t# Zero length\n\t"
15270 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15271 $$emit$$"dec rcx\n\t"
15272 $$emit$$"jge LOOP\n\t"
15273 $$emit$$"jmp DONE\n\t"
15274 $$emit$$"# LARGE:\n\t"
15275 if (UseFastStosb) {
15276 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15277 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15278 } else if (UseXMMForObjInit) {
15279 $$emit$$"mov rdi,rax\n\t"
15280 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15281 $$emit$$"jmpq L_zero_64_bytes\n\t"
15282 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15290 $$emit$$"jl L_tail\n\t"
15291 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15292 $$emit$$"add 0x20,rax\n\t"
15293 $$emit$$"sub 0x4,rcx\n\t"
15294 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15295 $$emit$$"add 0x4,rcx\n\t"
15296 $$emit$$"jle L_end\n\t"
15297 $$emit$$"dec rcx\n\t"
15298 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15299 $$emit$$"vmovq xmm0,(rax)\n\t"
15300 $$emit$$"add 0x8,rax\n\t"
15301 $$emit$$"dec rcx\n\t"
15302 $$emit$$"jge L_sloop\n\t"
15303 $$emit$$"# L_end:\n\t"
15304 } else {
15305 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15306 }
15307 $$emit$$"# DONE"
15308 %}
15309 ins_encode %{
15310 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15311 $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
15312 %}
15313 ins_pipe(pipe_slow);
15314 %}
15315
15316 // Large non-constant length ClearArray for non-AVX512 targets.
15317 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15318 Universe dummy, rFlagsReg cr)
15319 %{
15320 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15321 match(Set dummy (ClearArray (Binary cnt base) val));
15322 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15323
15324 format %{ $$template
15325 if (UseFastStosb) {
15326 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15327 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15328 } else if (UseXMMForObjInit) {
15329 $$emit$$"movdq $tmp, $val\n\t"
15330 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15331 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15332 $$emit$$"jmpq L_zero_64_bytes\n\t"
15333 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15334 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15335 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15336 $$emit$$"add 0x40,rax\n\t"
15337 $$emit$$"# L_zero_64_bytes:\n\t"
15338 $$emit$$"sub 0x8,rcx\n\t"
15339 $$emit$$"jge L_loop\n\t"
15340 $$emit$$"add 0x4,rcx\n\t"
15341 $$emit$$"jl L_tail\n\t"
15342 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15343 $$emit$$"add 0x20,rax\n\t"
15344 $$emit$$"sub 0x4,rcx\n\t"
15345 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15346 $$emit$$"add 0x4,rcx\n\t"
15347 $$emit$$"jle L_end\n\t"
15348 $$emit$$"dec rcx\n\t"
15349 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15350 $$emit$$"vmovq xmm0,(rax)\n\t"
15351 $$emit$$"add 0x8,rax\n\t"
15352 $$emit$$"dec rcx\n\t"
15353 $$emit$$"jge L_sloop\n\t"
15354 $$emit$$"# L_end:\n\t"
15355 } else {
15356 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15357 }
15358 %}
15359 ins_encode %{
15360 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15361 $tmp$$XMMRegister, true, false);
15362 %}
15363 ins_pipe(pipe_slow);
15364 %}
15365
15366 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15367 Universe dummy, rFlagsReg cr)
15368 %{
15369 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15370 match(Set dummy (ClearArray (Binary cnt base) val));
15371 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15372
15373 format %{ $$template
15374 if (UseXMMForObjInit) {
15375 $$emit$$"movdq $tmp, $val\n\t"
15376 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15377 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15378 $$emit$$"jmpq L_zero_64_bytes\n\t"
15379 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15380 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15381 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15382 $$emit$$"add 0x40,rax\n\t"
15383 $$emit$$"# L_zero_64_bytes:\n\t"
15384 $$emit$$"sub 0x8,rcx\n\t"
15385 $$emit$$"jge L_loop\n\t"
15386 $$emit$$"add 0x4,rcx\n\t"
15387 $$emit$$"jl L_tail\n\t"
15388 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15389 $$emit$$"add 0x20,rax\n\t"
15390 $$emit$$"sub 0x4,rcx\n\t"
15391 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15392 $$emit$$"add 0x4,rcx\n\t"
15393 $$emit$$"jle L_end\n\t"
15394 $$emit$$"dec rcx\n\t"
15395 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15396 $$emit$$"vmovq xmm0,(rax)\n\t"
15397 $$emit$$"add 0x8,rax\n\t"
15398 $$emit$$"dec rcx\n\t"
15399 $$emit$$"jge L_sloop\n\t"
15400 $$emit$$"# L_end:\n\t"
15401 } else {
15402 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15403 }
15404 %}
15405 ins_encode %{
15406 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15407 $tmp$$XMMRegister, true, true);
15408 %}
15409 ins_pipe(pipe_slow);
15410 %}
15411
15412 // Large non-constant length ClearArray for AVX512 targets.
15413 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15414 Universe dummy, rFlagsReg cr)
15415 %{
15416 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15417 match(Set dummy (ClearArray (Binary cnt base) val));
15418 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15419
15420 format %{ $$template
15421 if (UseFastStosb) {
15422 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15423 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15424 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15425 } else if (UseXMMForObjInit) {
15426 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15427 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15428 $$emit$$"jmpq L_zero_64_bytes\n\t"
15429 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15430 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15431 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15432 $$emit$$"add 0x40,rax\n\t"
15433 $$emit$$"# L_zero_64_bytes:\n\t"
15434 $$emit$$"sub 0x8,rcx\n\t"
15435 $$emit$$"jge L_loop\n\t"
15436 $$emit$$"add 0x4,rcx\n\t"
15437 $$emit$$"jl L_tail\n\t"
15438 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15439 $$emit$$"add 0x20,rax\n\t"
15440 $$emit$$"sub 0x4,rcx\n\t"
15441 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15442 $$emit$$"add 0x4,rcx\n\t"
15443 $$emit$$"jle L_end\n\t"
15444 $$emit$$"dec rcx\n\t"
15445 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15446 $$emit$$"vmovq xmm0,(rax)\n\t"
15447 $$emit$$"add 0x8,rax\n\t"
15448 $$emit$$"dec rcx\n\t"
15449 $$emit$$"jge L_sloop\n\t"
15450 $$emit$$"# L_end:\n\t"
15451 } else {
15452 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15453 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15454 }
15455 %}
15456 ins_encode %{
15457 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15458 $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
15459 %}
15460 ins_pipe(pipe_slow);
15461 %}
15462
15463 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15464 Universe dummy, rFlagsReg cr)
15465 %{
15466 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15467 match(Set dummy (ClearArray (Binary cnt base) val));
15468 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15469
15470 format %{ $$template
15471 if (UseFastStosb) {
15472 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15473 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15474 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15475 } else if (UseXMMForObjInit) {
15476 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15477 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15478 $$emit$$"jmpq L_zero_64_bytes\n\t"
15479 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15480 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15481 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15482 $$emit$$"add 0x40,rax\n\t"
15483 $$emit$$"# L_zero_64_bytes:\n\t"
15484 $$emit$$"sub 0x8,rcx\n\t"
15485 $$emit$$"jge L_loop\n\t"
15486 $$emit$$"add 0x4,rcx\n\t"
15487 $$emit$$"jl L_tail\n\t"
15488 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15489 $$emit$$"add 0x20,rax\n\t"
15490 $$emit$$"sub 0x4,rcx\n\t"
15491 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15492 $$emit$$"add 0x4,rcx\n\t"
15493 $$emit$$"jle L_end\n\t"
15494 $$emit$$"dec rcx\n\t"
15495 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15496 $$emit$$"vmovq xmm0,(rax)\n\t"
15497 $$emit$$"add 0x8,rax\n\t"
15498 $$emit$$"dec rcx\n\t"
15499 $$emit$$"jge L_sloop\n\t"
15500 $$emit$$"# L_end:\n\t"
15501 } else {
15502 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15503 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15504 }
15505 %}
15506 ins_encode %{
15507 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15508 $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
15509 %}
15510 ins_pipe(pipe_slow);
15511 %}
15512
15513 // Small constant length ClearArray for AVX512 targets.
15514 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
15515 %{
15516 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
15517 ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
15518 match(Set dummy (ClearArray (Binary cnt base) val));
15519 ins_cost(100);
15520 effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
15521 format %{ "clear_mem_imm $base , $cnt \n\t" %}
15522 ins_encode %{
15523 __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15524 %}
15525 ins_pipe(pipe_slow);
15526 %}
15527
15528 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15529 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15530 %{
15531 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15532 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15533 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15534
15535 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15536 ins_encode %{
15537 __ string_compare($str1$$Register, $str2$$Register,
15538 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15539 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15540 %}
15541 ins_pipe( pipe_slow );
15542 %}
15543
17348 effect(USE meth);
17349
17350 ins_cost(300);
17351 format %{ "call_leaf,runtime " %}
17352 ins_encode(clear_avx, Java_To_Runtime(meth));
17353 ins_pipe(pipe_slow);
17354 %}
17355
17356 // Call runtime without safepoint and with vector arguments
17357 instruct CallLeafDirectVector(method meth)
17358 %{
17359 match(CallLeafVector);
17360 effect(USE meth);
17361
17362 ins_cost(300);
17363 format %{ "call_leaf,vector " %}
17364 ins_encode(Java_To_Runtime(meth));
17365 ins_pipe(pipe_slow);
17366 %}
17367
17368 // Call runtime without safepoint
17369 // entry point is null, target holds the address to call
17370 instruct CallLeafNoFPInDirect(rRegP target)
17371 %{
17372 predicate(n->as_Call()->entry_point() == nullptr);
17373 match(CallLeafNoFP target);
17374
17375 ins_cost(300);
17376 format %{ "call_leaf_nofp,runtime indirect " %}
17377 ins_encode %{
17378 __ call($target$$Register);
17379 %}
17380
17381 ins_pipe(pipe_slow);
17382 %}
17383
17384 // Call runtime without safepoint
17385 instruct CallLeafNoFPDirect(method meth)
17386 %{
17387 predicate(n->as_Call()->entry_point() != nullptr);
17388 match(CallLeafNoFP);
17389 effect(USE meth);
17390
17391 ins_cost(300);
17392 format %{ "call_leaf_nofp,runtime " %}
17393 ins_encode(clear_avx, Java_To_Runtime(meth));
17394 ins_pipe(pipe_slow);
17395 %}
17396
17397 // Return Instruction
17398 // Remove the return address & jump to it.
17399 // Notice: We always emit a nop after a ret to make sure there is room
17400 // for safepoint patching
17401 instruct Ret()
17402 %{
17403 match(Return);
17404
17405 format %{ "ret" %}
17406 ins_encode %{
17407 __ ret(0);
|