1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 int offset = 13; // movq r10,#addr; callq (r10)
1653 if (this->ideal_Opcode() != Op_CallLeafVector) {
1654 offset += clear_avx_size();
1655 }
1656 return offset;
1657 }
1658 //
1659 // Compute padding required for nodes which need alignment
1660 //
1661
1662 // The address of the call instruction needs to be 4-byte aligned to
1663 // ensure that it does not span a cache line so that it can be patched.
1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1665 {
1666 current_offset += clear_avx_size(); // skip vzeroupper
1667 current_offset += 1; // skip call opcode byte
1668 return align_up(current_offset, alignment_required()) - current_offset;
1669 }
1670
1671 // The address of the call instruction needs to be 4-byte aligned to
1672 // ensure that it does not span a cache line so that it can be patched.
1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
1674 {
1675 current_offset += clear_avx_size(); // skip vzeroupper
1676 current_offset += 11; // skip movq instruction + call opcode byte
1677 return align_up(current_offset, alignment_required()) - current_offset;
1879 st->print("\n\t");
1880 st->print("# stack alignment check");
1881 #endif
1882 }
1883 if (C->stub_function() != nullptr) {
1884 st->print("\n\t");
1885 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1886 st->print("\n\t");
1887 st->print("je fast_entry\t");
1888 st->print("\n\t");
1889 st->print("call #nmethod_entry_barrier_stub\t");
1890 st->print("\n\tfast_entry:");
1891 }
1892 st->cr();
1893 }
1894 #endif
1895
1896 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1897 Compile* C = ra_->C;
1898
1899 int framesize = C->output()->frame_size_in_bytes();
1900 int bangsize = C->output()->bang_size_in_bytes();
1901
1902 if (C->clinit_barrier_on_entry()) {
1903 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
1904 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
1905
1906 Label L_skip_barrier;
1907 Register klass = rscratch1;
1908
1909 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
1910 __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
1911
1912 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
1913
1914 __ bind(L_skip_barrier);
1915 }
1916
1917 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
1918
1919 C->output()->set_frame_complete(__ offset());
1920
1921 if (C->has_mach_constant_base_node()) {
1922 // NOTE: We set the table base offset here because users might be
1923 // emitted before MachConstantBaseNode.
1924 ConstantTable& constant_table = C->output()->constant_table();
1925 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1926 }
1927 }
1928
1929 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1930 {
1931 return MachNode::size(ra_); // too many variables; just compute it
1932 // the hard way
1933 }
1934
1935 int MachPrologNode::reloc() const
1936 {
1937 return 0; // a large enough number
1938 }
1939
1940 //=============================================================================
1941 #ifndef PRODUCT
1942 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1943 {
1944 Compile* C = ra_->C;
1945 if (generate_vzeroupper(C)) {
1946 st->print("vzeroupper");
1947 st->cr(); st->print("\t");
1948 }
1949
1950 int framesize = C->output()->frame_size_in_bytes();
1951 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1952 // Remove word for return adr already pushed
1953 // and RBP
1961 st->print_cr("popq rbp");
1962 if (do_polling() && C->is_method_compilation()) {
1963 st->print("\t");
1964 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1965 "ja #safepoint_stub\t"
1966 "# Safepoint: poll for GC");
1967 }
1968 }
1969 #endif
1970
1971 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1972 {
1973 Compile* C = ra_->C;
1974
1975 if (generate_vzeroupper(C)) {
1976 // Clear upper bits of YMM registers when current compiled code uses
1977 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1978 __ vzeroupper();
1979 }
1980
1981 int framesize = C->output()->frame_size_in_bytes();
1982 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1983 // Remove word for return adr already pushed
1984 // and RBP
1985 framesize -= 2*wordSize;
1986
1987 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1988
1989 if (framesize) {
1990 __ addq(rsp, framesize);
1991 }
1992
1993 __ popq(rbp);
1994
1995 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1996 __ reserved_stack_check();
1997 }
1998
1999 if (do_polling() && C->is_method_compilation()) {
2000 Label dummy_label;
2001 Label* code_stub = &dummy_label;
2002 if (!C->output()->in_scratch_emit_size()) {
2003 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
2004 C->output()->add_stub(stub);
2005 code_stub = &stub->entry();
2006 }
2007 __ relocate(relocInfo::poll_return_type);
2008 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
2009 }
2010 }
2011
2012 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
2013 {
2014 return MachNode::size(ra_); // too many variables; just compute it
2015 // the hard way
2016 }
2017
2018 int MachEpilogNode::reloc() const
2019 {
2020 return 2; // a large enough number
2021 }
2022
2023 const Pipeline* MachEpilogNode::pipeline() const
2024 {
2025 return MachNode::pipeline_class();
2026 }
2027
2028 //=============================================================================
2029
2030 enum RC {
2031 rc_bad,
2032 rc_int,
2033 rc_kreg,
2034 rc_float,
2035 rc_stack
2036 };
2037
2599 #endif
2600
2601 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2602 {
2603 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2604 int reg = ra_->get_encode(this);
2605
2606 __ lea(as_Register(reg), Address(rsp, offset));
2607 }
2608
2609 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2610 {
2611 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2612 if (ra_->get_encode(this) > 15) {
2613 return (offset < 0x80) ? 6 : 9; // REX2
2614 } else {
2615 return (offset < 0x80) ? 5 : 8; // REX
2616 }
2617 }
2618
2619 //=============================================================================
2620 #ifndef PRODUCT
2621 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2622 {
2623 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2624 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2625 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2626 }
2627 #endif
2628
2629 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2630 {
2631 __ ic_check(InteriorEntryAlignment);
2632 }
2633
2634 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
2635 {
2636 return MachNode::size(ra_); // too many variables; just compute it
2637 // the hard way
2638 }
2639
2640
2641 //=============================================================================
2642
2643 bool Matcher::supports_vector_calling_convention(void) {
2644 return EnableVectorSupport;
2645 }
2646
2647 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
2648 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
2649 }
2650
2651 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
2652 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
2653 }
2654
2655 #ifdef ASSERT
2656 static bool is_ndd_demotable(const MachNode* mdef) {
2657 return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
2658 }
2659 #endif
4593 }
4594 __ post_call_nop();
4595 %}
4596
4597 enc_class Java_Dynamic_Call(method meth) %{
4598 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4599 __ post_call_nop();
4600 %}
4601
4602 enc_class call_epilog %{
4603 if (VerifyStackAtCalls) {
4604 // Check that stack depth is unchanged: find majik cookie on stack
4605 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4606 Label L;
4607 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4608 __ jccb(Assembler::equal, L);
4609 // Die if stack mismatch
4610 __ int3();
4611 __ bind(L);
4612 }
4613 %}
4614
4615 %}
4616
4617 //----------FRAME--------------------------------------------------------------
4618 // Definition of frame structure and management information.
4619 //
4620 // S T A C K L A Y O U T Allocators stack-slot number
4621 // | (to get allocators register number
4622 // G Owned by | | v add OptoReg::stack0())
4623 // r CALLER | |
4624 // o | +--------+ pad to even-align allocators stack-slot
4625 // w V | pad0 | numbers; owned by CALLER
4626 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4627 // h ^ | in | 5
4628 // | | args | 4 Holes in incoming args owned by SELF
4629 // | | | | 3
4630 // | | +--------+
4631 // V | | old out| Empty on Intel, window on Sparc
4632 // | old |preserve| Must be even aligned.
5771 %}
5772 %}
5773
5774 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5775 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5776 %{
5777 constraint(ALLOC_IN_RC(ptr_reg));
5778 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5779 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5780
5781 op_cost(10);
5782 format %{"[$reg + $off + $idx << $scale]" %}
5783 interface(MEMORY_INTER) %{
5784 base($reg);
5785 index($idx);
5786 scale($scale);
5787 disp($off);
5788 %}
5789 %}
5790
5791 // Indirect Narrow Oop Plus Offset Operand
5792 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5793 // we can't free r12 even with CompressedOops::base() == nullptr.
5794 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5795 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5796 constraint(ALLOC_IN_RC(ptr_reg));
5797 match(AddP (DecodeN reg) off);
5798
5799 op_cost(10);
5800 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5801 interface(MEMORY_INTER) %{
5802 base(0xc); // R12
5803 index($reg);
5804 scale(0x3);
5805 disp($off);
5806 %}
5807 %}
5808
5809 // Indirect Memory Operand
5810 operand indirectNarrow(rRegN reg)
6280 %}
6281
6282 // Replaces legVec during post-selection cleanup. See above.
6283 operand legVecZ() %{
6284 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6285 match(VecZ);
6286
6287 format %{ %}
6288 interface(REG_INTER);
6289 %}
6290
6291 //----------OPERAND CLASSES----------------------------------------------------
6292 // Operand Classes are groups of operands that are used as to simplify
6293 // instruction definitions by not requiring the AD writer to specify separate
6294 // instructions for every form of operand when the instruction accepts
6295 // multiple operand types with the same basic encoding and format. The classic
6296 // case of this is memory operands.
6297
6298 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6299 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6300 indCompressedOopOffset,
6301 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6302 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6303 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6304
6305 //----------PIPELINE-----------------------------------------------------------
6306 // Rules which define the behavior of the target architectures pipeline.
6307 pipeline %{
6308
6309 //----------ATTRIBUTES---------------------------------------------------------
6310 attributes %{
6311 variable_size_instructions; // Fixed size instructions
6312 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6313 instruction_unit_size = 1; // An instruction is 1 bytes long
6314 instruction_fetch_unit_size = 16; // The processor fetches one line
6315 instruction_fetch_units = 1; // of 16 bytes
6316 %}
6317
6318 //----------RESOURCES----------------------------------------------------------
6319 // Resources are the functional units available to the machine
6320
8915 format %{ "MEMBAR-storestore (empty encoding)" %}
8916 ins_encode( );
8917 ins_pipe(empty);
8918 %}
8919
8920 //----------Move Instructions--------------------------------------------------
8921
8922 instruct castX2P(rRegP dst, rRegL src)
8923 %{
8924 match(Set dst (CastX2P src));
8925
8926 format %{ "movq $dst, $src\t# long->ptr" %}
8927 ins_encode %{
8928 if ($dst$$reg != $src$$reg) {
8929 __ movptr($dst$$Register, $src$$Register);
8930 }
8931 %}
8932 ins_pipe(ialu_reg_reg); // XXX
8933 %}
8934
8935 instruct castP2X(rRegL dst, rRegP src)
8936 %{
8937 match(Set dst (CastP2X src));
8938
8939 format %{ "movq $dst, $src\t# ptr -> long" %}
8940 ins_encode %{
8941 if ($dst$$reg != $src$$reg) {
8942 __ movptr($dst$$Register, $src$$Register);
8943 }
8944 %}
8945 ins_pipe(ialu_reg_reg); // XXX
8946 %}
8947
8948 // Convert oop into int for vectors alignment masking
8949 instruct convP2I(rRegI dst, rRegP src)
8950 %{
8951 match(Set dst (ConvL2I (CastP2X src)));
8952
8953 format %{ "movl $dst, $src\t# ptr -> int" %}
8954 ins_encode %{
15202 effect(DEF dst, USE src);
15203 ins_cost(100);
15204 format %{ "movd $dst,$src\t# MoveI2F" %}
15205 ins_encode %{
15206 __ movdl($dst$$XMMRegister, $src$$Register);
15207 %}
15208 ins_pipe( pipe_slow );
15209 %}
15210
15211 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15212 match(Set dst (MoveL2D src));
15213 effect(DEF dst, USE src);
15214 ins_cost(100);
15215 format %{ "movd $dst,$src\t# MoveL2D" %}
15216 ins_encode %{
15217 __ movdq($dst$$XMMRegister, $src$$Register);
15218 %}
15219 ins_pipe( pipe_slow );
15220 %}
15221
15222 // Fast clearing of an array
15223 // Small non-constant lenght ClearArray for non-AVX512 targets.
15224 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15225 Universe dummy, rFlagsReg cr)
15226 %{
15227 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
15228 match(Set dummy (ClearArray cnt base));
15229 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15230
15231 format %{ $$template
15232 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15233 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15234 $$emit$$"jg LARGE\n\t"
15235 $$emit$$"dec rcx\n\t"
15236 $$emit$$"js DONE\t# Zero length\n\t"
15237 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15238 $$emit$$"dec rcx\n\t"
15239 $$emit$$"jge LOOP\n\t"
15240 $$emit$$"jmp DONE\n\t"
15241 $$emit$$"# LARGE:\n\t"
15242 if (UseFastStosb) {
15243 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15244 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15245 } else if (UseXMMForObjInit) {
15246 $$emit$$"mov rdi,rax\n\t"
15247 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15248 $$emit$$"jmpq L_zero_64_bytes\n\t"
15249 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15257 $$emit$$"jl L_tail\n\t"
15258 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15259 $$emit$$"add 0x20,rax\n\t"
15260 $$emit$$"sub 0x4,rcx\n\t"
15261 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15262 $$emit$$"add 0x4,rcx\n\t"
15263 $$emit$$"jle L_end\n\t"
15264 $$emit$$"dec rcx\n\t"
15265 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15266 $$emit$$"vmovq xmm0,(rax)\n\t"
15267 $$emit$$"add 0x8,rax\n\t"
15268 $$emit$$"dec rcx\n\t"
15269 $$emit$$"jge L_sloop\n\t"
15270 $$emit$$"# L_end:\n\t"
15271 } else {
15272 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15273 }
15274 $$emit$$"# DONE"
15275 %}
15276 ins_encode %{
15277 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15278 $tmp$$XMMRegister, false, knoreg);
15279 %}
15280 ins_pipe(pipe_slow);
15281 %}
15282
15283 // Small non-constant length ClearArray for AVX512 targets.
15284 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15285 Universe dummy, rFlagsReg cr)
15286 %{
15287 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15288 match(Set dummy (ClearArray cnt base));
15289 ins_cost(125);
15290 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15291
15292 format %{ $$template
15293 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15294 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15295 $$emit$$"jg LARGE\n\t"
15296 $$emit$$"dec rcx\n\t"
15297 $$emit$$"js DONE\t# Zero length\n\t"
15298 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15299 $$emit$$"dec rcx\n\t"
15300 $$emit$$"jge LOOP\n\t"
15301 $$emit$$"jmp DONE\n\t"
15302 $$emit$$"# LARGE:\n\t"
15303 if (UseFastStosb) {
15304 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15305 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15306 } else if (UseXMMForObjInit) {
15307 $$emit$$"mov rdi,rax\n\t"
15308 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15309 $$emit$$"jmpq L_zero_64_bytes\n\t"
15310 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15318 $$emit$$"jl L_tail\n\t"
15319 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15320 $$emit$$"add 0x20,rax\n\t"
15321 $$emit$$"sub 0x4,rcx\n\t"
15322 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15323 $$emit$$"add 0x4,rcx\n\t"
15324 $$emit$$"jle L_end\n\t"
15325 $$emit$$"dec rcx\n\t"
15326 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15327 $$emit$$"vmovq xmm0,(rax)\n\t"
15328 $$emit$$"add 0x8,rax\n\t"
15329 $$emit$$"dec rcx\n\t"
15330 $$emit$$"jge L_sloop\n\t"
15331 $$emit$$"# L_end:\n\t"
15332 } else {
15333 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15334 }
15335 $$emit$$"# DONE"
15336 %}
15337 ins_encode %{
15338 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15339 $tmp$$XMMRegister, false, $ktmp$$KRegister);
15340 %}
15341 ins_pipe(pipe_slow);
15342 %}
15343
15344 // Large non-constant length ClearArray for non-AVX512 targets.
15345 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15346 Universe dummy, rFlagsReg cr)
15347 %{
15348 predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15349 match(Set dummy (ClearArray cnt base));
15350 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15351
15352 format %{ $$template
15353 if (UseFastStosb) {
15354 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15355 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15356 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15357 } else if (UseXMMForObjInit) {
15358 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15359 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15360 $$emit$$"jmpq L_zero_64_bytes\n\t"
15361 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15362 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15363 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15364 $$emit$$"add 0x40,rax\n\t"
15365 $$emit$$"# L_zero_64_bytes:\n\t"
15366 $$emit$$"sub 0x8,rcx\n\t"
15367 $$emit$$"jge L_loop\n\t"
15368 $$emit$$"add 0x4,rcx\n\t"
15369 $$emit$$"jl L_tail\n\t"
15370 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15371 $$emit$$"add 0x20,rax\n\t"
15372 $$emit$$"sub 0x4,rcx\n\t"
15373 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15374 $$emit$$"add 0x4,rcx\n\t"
15375 $$emit$$"jle L_end\n\t"
15376 $$emit$$"dec rcx\n\t"
15377 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15378 $$emit$$"vmovq xmm0,(rax)\n\t"
15379 $$emit$$"add 0x8,rax\n\t"
15380 $$emit$$"dec rcx\n\t"
15381 $$emit$$"jge L_sloop\n\t"
15382 $$emit$$"# L_end:\n\t"
15383 } else {
15384 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15385 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15386 }
15387 %}
15388 ins_encode %{
15389 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15390 $tmp$$XMMRegister, true, knoreg);
15391 %}
15392 ins_pipe(pipe_slow);
15393 %}
15394
15395 // Large non-constant length ClearArray for AVX512 targets.
15396 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15397 Universe dummy, rFlagsReg cr)
15398 %{
15399 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15400 match(Set dummy (ClearArray cnt base));
15401 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15402
15403 format %{ $$template
15404 if (UseFastStosb) {
15405 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15406 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15407 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15408 } else if (UseXMMForObjInit) {
15409 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15410 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15411 $$emit$$"jmpq L_zero_64_bytes\n\t"
15412 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15413 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15414 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15415 $$emit$$"add 0x40,rax\n\t"
15416 $$emit$$"# L_zero_64_bytes:\n\t"
15417 $$emit$$"sub 0x8,rcx\n\t"
15418 $$emit$$"jge L_loop\n\t"
15419 $$emit$$"add 0x4,rcx\n\t"
15420 $$emit$$"jl L_tail\n\t"
15421 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15422 $$emit$$"add 0x20,rax\n\t"
15423 $$emit$$"sub 0x4,rcx\n\t"
15424 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15425 $$emit$$"add 0x4,rcx\n\t"
15426 $$emit$$"jle L_end\n\t"
15427 $$emit$$"dec rcx\n\t"
15428 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15429 $$emit$$"vmovq xmm0,(rax)\n\t"
15430 $$emit$$"add 0x8,rax\n\t"
15431 $$emit$$"dec rcx\n\t"
15432 $$emit$$"jge L_sloop\n\t"
15433 $$emit$$"# L_end:\n\t"
15434 } else {
15435 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15436 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15437 }
15438 %}
15439 ins_encode %{
15440 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15441 $tmp$$XMMRegister, true, $ktmp$$KRegister);
15442 %}
15443 ins_pipe(pipe_slow);
15444 %}
15445
15446 // Small constant length ClearArray for AVX512 targets.
15447 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15448 %{
15449 predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15450 match(Set dummy (ClearArray cnt base));
15451 ins_cost(100);
15452 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15453 format %{ "clear_mem_imm $base , $cnt \n\t" %}
15454 ins_encode %{
15455 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15456 %}
15457 ins_pipe(pipe_slow);
15458 %}
15459
15460 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15461 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15462 %{
15463 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15464 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15465 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15466
15467 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15468 ins_encode %{
15469 __ string_compare($str1$$Register, $str2$$Register,
15470 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15471 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15472 %}
15473 ins_pipe( pipe_slow );
15474 %}
15475
17313 effect(USE meth);
17314
17315 ins_cost(300);
17316 format %{ "call_leaf,runtime " %}
17317 ins_encode(clear_avx, Java_To_Runtime(meth));
17318 ins_pipe(pipe_slow);
17319 %}
17320
17321 // Call runtime without safepoint and with vector arguments
17322 instruct CallLeafDirectVector(method meth)
17323 %{
17324 match(CallLeafVector);
17325 effect(USE meth);
17326
17327 ins_cost(300);
17328 format %{ "call_leaf,vector " %}
17329 ins_encode(Java_To_Runtime(meth));
17330 ins_pipe(pipe_slow);
17331 %}
17332
17333 // Call runtime without safepoint
17334 instruct CallLeafNoFPDirect(method meth)
17335 %{
17336 match(CallLeafNoFP);
17337 effect(USE meth);
17338
17339 ins_cost(300);
17340 format %{ "call_leaf_nofp,runtime " %}
17341 ins_encode(clear_avx, Java_To_Runtime(meth));
17342 ins_pipe(pipe_slow);
17343 %}
17344
17345 // Return Instruction
17346 // Remove the return address & jump to it.
17347 // Notice: We always emit a nop after a ret to make sure there is room
17348 // for safepoint patching
17349 instruct Ret()
17350 %{
17351 match(Return);
17352
17353 format %{ "ret" %}
17354 ins_encode %{
17355 __ ret(0);
|
1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 if (_entry_point == nullptr) {
1653 // CallLeafNoFPInDirect
1654 return 3; // callq (register)
1655 }
1656 int offset = 13; // movq r10,#addr; callq (r10)
1657 if (this->ideal_Opcode() != Op_CallLeafVector) {
1658 offset += clear_avx_size();
1659 }
1660 return offset;
1661 }
1662
1663 //
1664 // Compute padding required for nodes which need alignment
1665 //
1666
1667 // The address of the call instruction needs to be 4-byte aligned to
1668 // ensure that it does not span a cache line so that it can be patched.
1669 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1670 {
1671 current_offset += clear_avx_size(); // skip vzeroupper
1672 current_offset += 1; // skip call opcode byte
1673 return align_up(current_offset, alignment_required()) - current_offset;
1674 }
1675
1676 // The address of the call instruction needs to be 4-byte aligned to
1677 // ensure that it does not span a cache line so that it can be patched.
1678 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
1679 {
1680 current_offset += clear_avx_size(); // skip vzeroupper
1681 current_offset += 11; // skip movq instruction + call opcode byte
1682 return align_up(current_offset, alignment_required()) - current_offset;
1884 st->print("\n\t");
1885 st->print("# stack alignment check");
1886 #endif
1887 }
1888 if (C->stub_function() != nullptr) {
1889 st->print("\n\t");
1890 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1891 st->print("\n\t");
1892 st->print("je fast_entry\t");
1893 st->print("\n\t");
1894 st->print("call #nmethod_entry_barrier_stub\t");
1895 st->print("\n\tfast_entry:");
1896 }
1897 st->cr();
1898 }
1899 #endif
1900
1901 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1902 Compile* C = ra_->C;
1903
1904 __ verified_entry(C);
1905
1906 if (ra_->C->stub_function() == nullptr) {
1907 __ entry_barrier();
1908 }
1909
1910 if (!Compile::current()->output()->in_scratch_emit_size()) {
1911 __ bind(*_verified_entry);
1912 }
1913
1914 C->output()->set_frame_complete(__ offset());
1915
1916 if (C->has_mach_constant_base_node()) {
1917 // NOTE: We set the table base offset here because users might be
1918 // emitted before MachConstantBaseNode.
1919 ConstantTable& constant_table = C->output()->constant_table();
1920 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1921 }
1922 }
1923
1924
1925 int MachPrologNode::reloc() const
1926 {
1927 return 0; // a large enough number
1928 }
1929
1930 //=============================================================================
1931 #ifndef PRODUCT
1932 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1933 {
1934 Compile* C = ra_->C;
1935 if (generate_vzeroupper(C)) {
1936 st->print("vzeroupper");
1937 st->cr(); st->print("\t");
1938 }
1939
1940 int framesize = C->output()->frame_size_in_bytes();
1941 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1942 // Remove word for return adr already pushed
1943 // and RBP
1951 st->print_cr("popq rbp");
1952 if (do_polling() && C->is_method_compilation()) {
1953 st->print("\t");
1954 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1955 "ja #safepoint_stub\t"
1956 "# Safepoint: poll for GC");
1957 }
1958 }
1959 #endif
1960
1961 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1962 {
1963 Compile* C = ra_->C;
1964
1965 if (generate_vzeroupper(C)) {
1966 // Clear upper bits of YMM registers when current compiled code uses
1967 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1968 __ vzeroupper();
1969 }
1970
1971 // Subtract two words to account for return address and rbp
1972 int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
1973 __ remove_frame(initial_framesize, C->needs_stack_repair());
1974
1975 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1976 __ reserved_stack_check();
1977 }
1978
1979 if (do_polling() && C->is_method_compilation()) {
1980 Label dummy_label;
1981 Label* code_stub = &dummy_label;
1982 if (!C->output()->in_scratch_emit_size()) {
1983 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1984 C->output()->add_stub(stub);
1985 code_stub = &stub->entry();
1986 }
1987 __ relocate(relocInfo::poll_return_type);
1988 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
1989 }
1990 }
1991
1992 int MachEpilogNode::reloc() const
1993 {
1994 return 2; // a large enough number
1995 }
1996
1997 const Pipeline* MachEpilogNode::pipeline() const
1998 {
1999 return MachNode::pipeline_class();
2000 }
2001
2002 //=============================================================================
2003
2004 enum RC {
2005 rc_bad,
2006 rc_int,
2007 rc_kreg,
2008 rc_float,
2009 rc_stack
2010 };
2011
2573 #endif
2574
2575 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2576 {
2577 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2578 int reg = ra_->get_encode(this);
2579
2580 __ lea(as_Register(reg), Address(rsp, offset));
2581 }
2582
2583 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2584 {
2585 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2586 if (ra_->get_encode(this) > 15) {
2587 return (offset < 0x80) ? 6 : 9; // REX2
2588 } else {
2589 return (offset < 0x80) ? 5 : 8; // REX
2590 }
2591 }
2592
2593 //=============================================================================
2594 #ifndef PRODUCT
2595 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2596 {
2597 st->print_cr("MachVEPNode");
2598 }
2599 #endif
2600
2601 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2602 {
2603 CodeBuffer* cbuf = masm->code();
2604 uint insts_size = cbuf->insts_size();
2605 if (!_verified) {
2606 __ ic_check(1);
2607 } else {
2608 // TODO 8284443 Avoid creation of temporary frame
2609 if (ra_->C->stub_function() == nullptr) {
2610 __ verified_entry(ra_->C, 0);
2611 __ entry_barrier();
2612 int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
2613 __ remove_frame(initial_framesize, false);
2614 }
2615 // Unpack inline type args passed as oop and then jump to
2616 // the verified entry point (skipping the unverified entry).
2617 int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
2618 // Emit code for verified entry and save increment for stack repair on return
2619 __ verified_entry(ra_->C, sp_inc);
2620 if (Compile::current()->output()->in_scratch_emit_size()) {
2621 Label dummy_verified_entry;
2622 __ jmp(dummy_verified_entry);
2623 } else {
2624 __ jmp(*_verified_entry);
2625 }
2626 }
2627 /* WARNING these NOPs are critical so that verified entry point is properly
2628 4 bytes aligned for patching by NativeJump::patch_verified_entry() */
2629 int nops_cnt = 4 - ((cbuf->insts_size() - insts_size) & 0x3);
2630 nops_cnt &= 0x3; // Do not add nops if code is aligned.
2631 if (nops_cnt > 0) {
2632 __ nop(nops_cnt);
2633 }
2634 }
2635
2636 //=============================================================================
2637 #ifndef PRODUCT
2638 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2639 {
2640 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2641 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2642 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2643 }
2644 #endif
2645
2646 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2647 {
2648 __ ic_check(InteriorEntryAlignment);
2649 }
2650
2651
2652 //=============================================================================
2653
2654 bool Matcher::supports_vector_calling_convention(void) {
2655 return EnableVectorSupport;
2656 }
2657
2658 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
2659 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
2660 }
2661
2662 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
2663 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
2664 }
2665
2666 #ifdef ASSERT
2667 static bool is_ndd_demotable(const MachNode* mdef) {
2668 return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
2669 }
2670 #endif
4604 }
4605 __ post_call_nop();
4606 %}
4607
4608 enc_class Java_Dynamic_Call(method meth) %{
4609 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4610 __ post_call_nop();
4611 %}
4612
4613 enc_class call_epilog %{
4614 if (VerifyStackAtCalls) {
4615 // Check that stack depth is unchanged: find majik cookie on stack
4616 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4617 Label L;
4618 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4619 __ jccb(Assembler::equal, L);
4620 // Die if stack mismatch
4621 __ int3();
4622 __ bind(L);
4623 }
4624 if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) {
4625 // The last return value is not set by the callee but used to pass the null marker to compiled code.
4626 // Search for the corresponding projection, get the register and emit code that initialized it.
4627 uint con = (tf()->range_cc()->cnt() - 1);
4628 for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
4629 ProjNode* proj = fast_out(i)->as_Proj();
4630 if (proj->_con == con) {
4631 // Set null marker if rax is non-null (a non-null value is returned buffered or scalarized)
4632 OptoReg::Name optoReg = ra_->get_reg_first(proj);
4633 VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
4634 Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
4635 __ testq(rax, rax);
4636 __ setb(Assembler::notZero, toReg);
4637 __ movzbl(toReg, toReg);
4638 if (reg->is_stack()) {
4639 int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
4640 __ movq(Address(rsp, st_off), toReg);
4641 }
4642 break;
4643 }
4644 }
4645 if (return_value_is_used()) {
4646 // An inline type is returned as fields in multiple registers.
4647 // Rax either contains an oop if the inline type is buffered or a pointer
4648 // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax
4649 // if the lowest bit is set to allow C2 to use the oop after null checking.
4650 // rax &= (rax & 1) - 1
4651 __ movptr(rscratch1, rax);
4652 __ andptr(rscratch1, 0x1);
4653 __ subptr(rscratch1, 0x1);
4654 __ andptr(rax, rscratch1);
4655 }
4656 }
4657 %}
4658
4659 %}
4660
4661 //----------FRAME--------------------------------------------------------------
4662 // Definition of frame structure and management information.
4663 //
4664 // S T A C K L A Y O U T Allocators stack-slot number
4665 // | (to get allocators register number
4666 // G Owned by | | v add OptoReg::stack0())
4667 // r CALLER | |
4668 // o | +--------+ pad to even-align allocators stack-slot
4669 // w V | pad0 | numbers; owned by CALLER
4670 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4671 // h ^ | in | 5
4672 // | | args | 4 Holes in incoming args owned by SELF
4673 // | | | | 3
4674 // | | +--------+
4675 // V | | old out| Empty on Intel, window on Sparc
4676 // | old |preserve| Must be even aligned.
5815 %}
5816 %}
5817
5818 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5819 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5820 %{
5821 constraint(ALLOC_IN_RC(ptr_reg));
5822 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5823 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5824
5825 op_cost(10);
5826 format %{"[$reg + $off + $idx << $scale]" %}
5827 interface(MEMORY_INTER) %{
5828 base($reg);
5829 index($idx);
5830 scale($scale);
5831 disp($off);
5832 %}
5833 %}
5834
5835 // Indirect Narrow Oop Operand
5836 operand indCompressedOop(rRegN reg) %{
5837 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5838 constraint(ALLOC_IN_RC(ptr_reg));
5839 match(DecodeN reg);
5840
5841 op_cost(10);
5842 format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
5843 interface(MEMORY_INTER) %{
5844 base(0xc); // R12
5845 index($reg);
5846 scale(0x3);
5847 disp(0x0);
5848 %}
5849 %}
5850
5851 // Indirect Narrow Oop Plus Offset Operand
5852 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5853 // we can't free r12 even with CompressedOops::base() == nullptr.
5854 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5855 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5856 constraint(ALLOC_IN_RC(ptr_reg));
5857 match(AddP (DecodeN reg) off);
5858
5859 op_cost(10);
5860 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5861 interface(MEMORY_INTER) %{
5862 base(0xc); // R12
5863 index($reg);
5864 scale(0x3);
5865 disp($off);
5866 %}
5867 %}
5868
5869 // Indirect Memory Operand
5870 operand indirectNarrow(rRegN reg)
6340 %}
6341
6342 // Replaces legVec during post-selection cleanup. See above.
6343 operand legVecZ() %{
6344 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6345 match(VecZ);
6346
6347 format %{ %}
6348 interface(REG_INTER);
6349 %}
6350
6351 //----------OPERAND CLASSES----------------------------------------------------
6352 // Operand Classes are groups of operands that are used as to simplify
6353 // instruction definitions by not requiring the AD writer to specify separate
6354 // instructions for every form of operand when the instruction accepts
6355 // multiple operand types with the same basic encoding and format. The classic
6356 // case of this is memory operands.
6357
6358 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6359 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6360 indCompressedOop, indCompressedOopOffset,
6361 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6362 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6363 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6364
6365 //----------PIPELINE-----------------------------------------------------------
6366 // Rules which define the behavior of the target architectures pipeline.
6367 pipeline %{
6368
6369 //----------ATTRIBUTES---------------------------------------------------------
6370 attributes %{
6371 variable_size_instructions; // Fixed size instructions
6372 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6373 instruction_unit_size = 1; // An instruction is 1 bytes long
6374 instruction_fetch_unit_size = 16; // The processor fetches one line
6375 instruction_fetch_units = 1; // of 16 bytes
6376 %}
6377
6378 //----------RESOURCES----------------------------------------------------------
6379 // Resources are the functional units available to the machine
6380
8975 format %{ "MEMBAR-storestore (empty encoding)" %}
8976 ins_encode( );
8977 ins_pipe(empty);
8978 %}
8979
8980 //----------Move Instructions--------------------------------------------------
8981
8982 instruct castX2P(rRegP dst, rRegL src)
8983 %{
8984 match(Set dst (CastX2P src));
8985
8986 format %{ "movq $dst, $src\t# long->ptr" %}
8987 ins_encode %{
8988 if ($dst$$reg != $src$$reg) {
8989 __ movptr($dst$$Register, $src$$Register);
8990 }
8991 %}
8992 ins_pipe(ialu_reg_reg); // XXX
8993 %}
8994
8995 instruct castI2N(rRegN dst, rRegI src)
8996 %{
8997 match(Set dst (CastI2N src));
8998
8999 format %{ "movq $dst, $src\t# int -> narrow ptr" %}
9000 ins_encode %{
9001 if ($dst$$reg != $src$$reg) {
9002 __ movl($dst$$Register, $src$$Register);
9003 }
9004 %}
9005 ins_pipe(ialu_reg_reg); // XXX
9006 %}
9007
9008 instruct castN2X(rRegL dst, rRegN src)
9009 %{
9010 match(Set dst (CastP2X src));
9011
9012 format %{ "movq $dst, $src\t# ptr -> long" %}
9013 ins_encode %{
9014 if ($dst$$reg != $src$$reg) {
9015 __ movptr($dst$$Register, $src$$Register);
9016 }
9017 %}
9018 ins_pipe(ialu_reg_reg); // XXX
9019 %}
9020
9021 instruct castP2X(rRegL dst, rRegP src)
9022 %{
9023 match(Set dst (CastP2X src));
9024
9025 format %{ "movq $dst, $src\t# ptr -> long" %}
9026 ins_encode %{
9027 if ($dst$$reg != $src$$reg) {
9028 __ movptr($dst$$Register, $src$$Register);
9029 }
9030 %}
9031 ins_pipe(ialu_reg_reg); // XXX
9032 %}
9033
9034 // Convert oop into int for vectors alignment masking
9035 instruct convP2I(rRegI dst, rRegP src)
9036 %{
9037 match(Set dst (ConvL2I (CastP2X src)));
9038
9039 format %{ "movl $dst, $src\t# ptr -> int" %}
9040 ins_encode %{
15288 effect(DEF dst, USE src);
15289 ins_cost(100);
15290 format %{ "movd $dst,$src\t# MoveI2F" %}
15291 ins_encode %{
15292 __ movdl($dst$$XMMRegister, $src$$Register);
15293 %}
15294 ins_pipe( pipe_slow );
15295 %}
15296
15297 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15298 match(Set dst (MoveL2D src));
15299 effect(DEF dst, USE src);
15300 ins_cost(100);
15301 format %{ "movd $dst,$src\t# MoveL2D" %}
15302 ins_encode %{
15303 __ movdq($dst$$XMMRegister, $src$$Register);
15304 %}
15305 ins_pipe( pipe_slow );
15306 %}
15307
15308
15309 // Fast clearing of an array
15310 // Small non-constant lenght ClearArray for non-AVX512 targets.
15311 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15312 Universe dummy, rFlagsReg cr)
15313 %{
15314 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15315 match(Set dummy (ClearArray (Binary cnt base) val));
15316 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15317
15318 format %{ $$template
15319 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15320 $$emit$$"jg LARGE\n\t"
15321 $$emit$$"dec rcx\n\t"
15322 $$emit$$"js DONE\t# Zero length\n\t"
15323 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15324 $$emit$$"dec rcx\n\t"
15325 $$emit$$"jge LOOP\n\t"
15326 $$emit$$"jmp DONE\n\t"
15327 $$emit$$"# LARGE:\n\t"
15328 if (UseFastStosb) {
15329 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15330 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15331 } else if (UseXMMForObjInit) {
15332 $$emit$$"movdq $tmp, $val\n\t"
15333 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15334 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15335 $$emit$$"jmpq L_zero_64_bytes\n\t"
15336 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15337 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15338 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15339 $$emit$$"add 0x40,rax\n\t"
15340 $$emit$$"# L_zero_64_bytes:\n\t"
15341 $$emit$$"sub 0x8,rcx\n\t"
15342 $$emit$$"jge L_loop\n\t"
15343 $$emit$$"add 0x4,rcx\n\t"
15344 $$emit$$"jl L_tail\n\t"
15345 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15346 $$emit$$"add 0x20,rax\n\t"
15347 $$emit$$"sub 0x4,rcx\n\t"
15348 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15349 $$emit$$"add 0x4,rcx\n\t"
15350 $$emit$$"jle L_end\n\t"
15351 $$emit$$"dec rcx\n\t"
15352 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15353 $$emit$$"vmovq xmm0,(rax)\n\t"
15354 $$emit$$"add 0x8,rax\n\t"
15355 $$emit$$"dec rcx\n\t"
15356 $$emit$$"jge L_sloop\n\t"
15357 $$emit$$"# L_end:\n\t"
15358 } else {
15359 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15360 }
15361 $$emit$$"# DONE"
15362 %}
15363 ins_encode %{
15364 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15365 $tmp$$XMMRegister, false, false);
15366 %}
15367 ins_pipe(pipe_slow);
15368 %}
15369
15370 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15371 Universe dummy, rFlagsReg cr)
15372 %{
15373 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15374 match(Set dummy (ClearArray (Binary cnt base) val));
15375 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15376
15377 format %{ $$template
15378 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15379 $$emit$$"jg LARGE\n\t"
15380 $$emit$$"dec rcx\n\t"
15381 $$emit$$"js DONE\t# Zero length\n\t"
15382 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15383 $$emit$$"dec rcx\n\t"
15384 $$emit$$"jge LOOP\n\t"
15385 $$emit$$"jmp DONE\n\t"
15386 $$emit$$"# LARGE:\n\t"
15387 if (UseXMMForObjInit) {
15388 $$emit$$"movdq $tmp, $val\n\t"
15389 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15390 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15391 $$emit$$"jmpq L_zero_64_bytes\n\t"
15392 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15393 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15394 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15395 $$emit$$"add 0x40,rax\n\t"
15396 $$emit$$"# L_zero_64_bytes:\n\t"
15397 $$emit$$"sub 0x8,rcx\n\t"
15398 $$emit$$"jge L_loop\n\t"
15399 $$emit$$"add 0x4,rcx\n\t"
15400 $$emit$$"jl L_tail\n\t"
15401 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15402 $$emit$$"add 0x20,rax\n\t"
15403 $$emit$$"sub 0x4,rcx\n\t"
15404 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15405 $$emit$$"add 0x4,rcx\n\t"
15406 $$emit$$"jle L_end\n\t"
15407 $$emit$$"dec rcx\n\t"
15408 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15409 $$emit$$"vmovq xmm0,(rax)\n\t"
15410 $$emit$$"add 0x8,rax\n\t"
15411 $$emit$$"dec rcx\n\t"
15412 $$emit$$"jge L_sloop\n\t"
15413 $$emit$$"# L_end:\n\t"
15414 } else {
15415 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15416 }
15417 $$emit$$"# DONE"
15418 %}
15419 ins_encode %{
15420 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15421 $tmp$$XMMRegister, false, true);
15422 %}
15423 ins_pipe(pipe_slow);
15424 %}
15425
15426 // Small non-constant length ClearArray for AVX512 targets.
15427 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15428 Universe dummy, rFlagsReg cr)
15429 %{
15430 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15431 match(Set dummy (ClearArray (Binary cnt base) val));
15432 ins_cost(125);
15433 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15434
15435 format %{ $$template
15436 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15437 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15438 $$emit$$"jg LARGE\n\t"
15439 $$emit$$"dec rcx\n\t"
15440 $$emit$$"js DONE\t# Zero length\n\t"
15441 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15442 $$emit$$"dec rcx\n\t"
15443 $$emit$$"jge LOOP\n\t"
15444 $$emit$$"jmp DONE\n\t"
15445 $$emit$$"# LARGE:\n\t"
15446 if (UseFastStosb) {
15447 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15448 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15449 } else if (UseXMMForObjInit) {
15450 $$emit$$"mov rdi,rax\n\t"
15451 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15452 $$emit$$"jmpq L_zero_64_bytes\n\t"
15453 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15461 $$emit$$"jl L_tail\n\t"
15462 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15463 $$emit$$"add 0x20,rax\n\t"
15464 $$emit$$"sub 0x4,rcx\n\t"
15465 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15466 $$emit$$"add 0x4,rcx\n\t"
15467 $$emit$$"jle L_end\n\t"
15468 $$emit$$"dec rcx\n\t"
15469 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15470 $$emit$$"vmovq xmm0,(rax)\n\t"
15471 $$emit$$"add 0x8,rax\n\t"
15472 $$emit$$"dec rcx\n\t"
15473 $$emit$$"jge L_sloop\n\t"
15474 $$emit$$"# L_end:\n\t"
15475 } else {
15476 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15477 }
15478 $$emit$$"# DONE"
15479 %}
15480 ins_encode %{
15481 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15482 $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
15483 %}
15484 ins_pipe(pipe_slow);
15485 %}
15486
15487 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15488 Universe dummy, rFlagsReg cr)
15489 %{
15490 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15491 match(Set dummy (ClearArray (Binary cnt base) val));
15492 ins_cost(125);
15493 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15494
15495 format %{ $$template
15496 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15497 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15498 $$emit$$"jg LARGE\n\t"
15499 $$emit$$"dec rcx\n\t"
15500 $$emit$$"js DONE\t# Zero length\n\t"
15501 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15502 $$emit$$"dec rcx\n\t"
15503 $$emit$$"jge LOOP\n\t"
15504 $$emit$$"jmp DONE\n\t"
15505 $$emit$$"# LARGE:\n\t"
15506 if (UseFastStosb) {
15507 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15508 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15509 } else if (UseXMMForObjInit) {
15510 $$emit$$"mov rdi,rax\n\t"
15511 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15512 $$emit$$"jmpq L_zero_64_bytes\n\t"
15513 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15521 $$emit$$"jl L_tail\n\t"
15522 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15523 $$emit$$"add 0x20,rax\n\t"
15524 $$emit$$"sub 0x4,rcx\n\t"
15525 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15526 $$emit$$"add 0x4,rcx\n\t"
15527 $$emit$$"jle L_end\n\t"
15528 $$emit$$"dec rcx\n\t"
15529 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15530 $$emit$$"vmovq xmm0,(rax)\n\t"
15531 $$emit$$"add 0x8,rax\n\t"
15532 $$emit$$"dec rcx\n\t"
15533 $$emit$$"jge L_sloop\n\t"
15534 $$emit$$"# L_end:\n\t"
15535 } else {
15536 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15537 }
15538 $$emit$$"# DONE"
15539 %}
15540 ins_encode %{
15541 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15542 $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
15543 %}
15544 ins_pipe(pipe_slow);
15545 %}
15546
15547 // Large non-constant length ClearArray for non-AVX512 targets.
15548 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15549 Universe dummy, rFlagsReg cr)
15550 %{
15551 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15552 match(Set dummy (ClearArray (Binary cnt base) val));
15553 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15554
15555 format %{ $$template
15556 if (UseFastStosb) {
15557 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15558 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15559 } else if (UseXMMForObjInit) {
15560 $$emit$$"movdq $tmp, $val\n\t"
15561 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15562 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15563 $$emit$$"jmpq L_zero_64_bytes\n\t"
15564 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15565 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15566 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15567 $$emit$$"add 0x40,rax\n\t"
15568 $$emit$$"# L_zero_64_bytes:\n\t"
15569 $$emit$$"sub 0x8,rcx\n\t"
15570 $$emit$$"jge L_loop\n\t"
15571 $$emit$$"add 0x4,rcx\n\t"
15572 $$emit$$"jl L_tail\n\t"
15573 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15574 $$emit$$"add 0x20,rax\n\t"
15575 $$emit$$"sub 0x4,rcx\n\t"
15576 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15577 $$emit$$"add 0x4,rcx\n\t"
15578 $$emit$$"jle L_end\n\t"
15579 $$emit$$"dec rcx\n\t"
15580 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15581 $$emit$$"vmovq xmm0,(rax)\n\t"
15582 $$emit$$"add 0x8,rax\n\t"
15583 $$emit$$"dec rcx\n\t"
15584 $$emit$$"jge L_sloop\n\t"
15585 $$emit$$"# L_end:\n\t"
15586 } else {
15587 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15588 }
15589 %}
15590 ins_encode %{
15591 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15592 $tmp$$XMMRegister, true, false);
15593 %}
15594 ins_pipe(pipe_slow);
15595 %}
15596
15597 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15598 Universe dummy, rFlagsReg cr)
15599 %{
15600 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15601 match(Set dummy (ClearArray (Binary cnt base) val));
15602 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15603
15604 format %{ $$template
15605 if (UseXMMForObjInit) {
15606 $$emit$$"movdq $tmp, $val\n\t"
15607 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15608 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15609 $$emit$$"jmpq L_zero_64_bytes\n\t"
15610 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15611 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15612 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15613 $$emit$$"add 0x40,rax\n\t"
15614 $$emit$$"# L_zero_64_bytes:\n\t"
15615 $$emit$$"sub 0x8,rcx\n\t"
15616 $$emit$$"jge L_loop\n\t"
15617 $$emit$$"add 0x4,rcx\n\t"
15618 $$emit$$"jl L_tail\n\t"
15619 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15620 $$emit$$"add 0x20,rax\n\t"
15621 $$emit$$"sub 0x4,rcx\n\t"
15622 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15623 $$emit$$"add 0x4,rcx\n\t"
15624 $$emit$$"jle L_end\n\t"
15625 $$emit$$"dec rcx\n\t"
15626 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15627 $$emit$$"vmovq xmm0,(rax)\n\t"
15628 $$emit$$"add 0x8,rax\n\t"
15629 $$emit$$"dec rcx\n\t"
15630 $$emit$$"jge L_sloop\n\t"
15631 $$emit$$"# L_end:\n\t"
15632 } else {
15633 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15634 }
15635 %}
15636 ins_encode %{
15637 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15638 $tmp$$XMMRegister, true, true);
15639 %}
15640 ins_pipe(pipe_slow);
15641 %}
15642
15643 // Large non-constant length ClearArray for AVX512 targets.
15644 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15645 Universe dummy, rFlagsReg cr)
15646 %{
15647 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15648 match(Set dummy (ClearArray (Binary cnt base) val));
15649 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15650
15651 format %{ $$template
15652 if (UseFastStosb) {
15653 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15654 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15655 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15656 } else if (UseXMMForObjInit) {
15657 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15658 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15659 $$emit$$"jmpq L_zero_64_bytes\n\t"
15660 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15661 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15662 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15663 $$emit$$"add 0x40,rax\n\t"
15664 $$emit$$"# L_zero_64_bytes:\n\t"
15665 $$emit$$"sub 0x8,rcx\n\t"
15666 $$emit$$"jge L_loop\n\t"
15667 $$emit$$"add 0x4,rcx\n\t"
15668 $$emit$$"jl L_tail\n\t"
15669 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15670 $$emit$$"add 0x20,rax\n\t"
15671 $$emit$$"sub 0x4,rcx\n\t"
15672 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15673 $$emit$$"add 0x4,rcx\n\t"
15674 $$emit$$"jle L_end\n\t"
15675 $$emit$$"dec rcx\n\t"
15676 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15677 $$emit$$"vmovq xmm0,(rax)\n\t"
15678 $$emit$$"add 0x8,rax\n\t"
15679 $$emit$$"dec rcx\n\t"
15680 $$emit$$"jge L_sloop\n\t"
15681 $$emit$$"# L_end:\n\t"
15682 } else {
15683 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15684 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15685 }
15686 %}
15687 ins_encode %{
15688 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15689 $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
15690 %}
15691 ins_pipe(pipe_slow);
15692 %}
15693
15694 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15695 Universe dummy, rFlagsReg cr)
15696 %{
15697 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15698 match(Set dummy (ClearArray (Binary cnt base) val));
15699 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15700
15701 format %{ $$template
15702 if (UseFastStosb) {
15703 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15704 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15705 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15706 } else if (UseXMMForObjInit) {
15707 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15708 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15709 $$emit$$"jmpq L_zero_64_bytes\n\t"
15710 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15711 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15712 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15713 $$emit$$"add 0x40,rax\n\t"
15714 $$emit$$"# L_zero_64_bytes:\n\t"
15715 $$emit$$"sub 0x8,rcx\n\t"
15716 $$emit$$"jge L_loop\n\t"
15717 $$emit$$"add 0x4,rcx\n\t"
15718 $$emit$$"jl L_tail\n\t"
15719 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15720 $$emit$$"add 0x20,rax\n\t"
15721 $$emit$$"sub 0x4,rcx\n\t"
15722 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15723 $$emit$$"add 0x4,rcx\n\t"
15724 $$emit$$"jle L_end\n\t"
15725 $$emit$$"dec rcx\n\t"
15726 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15727 $$emit$$"vmovq xmm0,(rax)\n\t"
15728 $$emit$$"add 0x8,rax\n\t"
15729 $$emit$$"dec rcx\n\t"
15730 $$emit$$"jge L_sloop\n\t"
15731 $$emit$$"# L_end:\n\t"
15732 } else {
15733 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15734 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15735 }
15736 %}
15737 ins_encode %{
15738 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15739 $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
15740 %}
15741 ins_pipe(pipe_slow);
15742 %}
15743
15744 // Small constant length ClearArray for AVX512 targets.
15745 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
15746 %{
15747 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
15748 ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
15749 match(Set dummy (ClearArray (Binary cnt base) val));
15750 ins_cost(100);
15751 effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
15752 format %{ "clear_mem_imm $base , $cnt \n\t" %}
15753 ins_encode %{
15754 __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15755 %}
15756 ins_pipe(pipe_slow);
15757 %}
15758
15759 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15760 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15761 %{
15762 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15763 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15764 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15765
15766 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15767 ins_encode %{
15768 __ string_compare($str1$$Register, $str2$$Register,
15769 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15770 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15771 %}
15772 ins_pipe( pipe_slow );
15773 %}
15774
17612 effect(USE meth);
17613
17614 ins_cost(300);
17615 format %{ "call_leaf,runtime " %}
17616 ins_encode(clear_avx, Java_To_Runtime(meth));
17617 ins_pipe(pipe_slow);
17618 %}
17619
17620 // Call runtime without safepoint and with vector arguments
17621 instruct CallLeafDirectVector(method meth)
17622 %{
17623 match(CallLeafVector);
17624 effect(USE meth);
17625
17626 ins_cost(300);
17627 format %{ "call_leaf,vector " %}
17628 ins_encode(Java_To_Runtime(meth));
17629 ins_pipe(pipe_slow);
17630 %}
17631
17632 // Call runtime without safepoint
17633 // entry point is null, target holds the address to call
17634 instruct CallLeafNoFPInDirect(rRegP target)
17635 %{
17636 predicate(n->as_Call()->entry_point() == nullptr);
17637 match(CallLeafNoFP target);
17638
17639 ins_cost(300);
17640 format %{ "call_leaf_nofp,runtime indirect " %}
17641 ins_encode %{
17642 __ call($target$$Register);
17643 %}
17644
17645 ins_pipe(pipe_slow);
17646 %}
17647
17648 // Call runtime without safepoint
17649 instruct CallLeafNoFPDirect(method meth)
17650 %{
17651 predicate(n->as_Call()->entry_point() != nullptr);
17652 match(CallLeafNoFP);
17653 effect(USE meth);
17654
17655 ins_cost(300);
17656 format %{ "call_leaf_nofp,runtime " %}
17657 ins_encode(clear_avx, Java_To_Runtime(meth));
17658 ins_pipe(pipe_slow);
17659 %}
17660
17661 // Return Instruction
17662 // Remove the return address & jump to it.
17663 // Notice: We always emit a nop after a ret to make sure there is room
17664 // for safepoint patching
17665 instruct Ret()
17666 %{
17667 match(Return);
17668
17669 format %{ "ret" %}
17670 ins_encode %{
17671 __ ret(0);
|