1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 int offset = 13; // movq r10,#addr; callq (r10)
1653 if (this->ideal_Opcode() != Op_CallLeafVector) {
1654 offset += clear_avx_size();
1655 }
1656 return offset;
1657 }
1658 //
1659 // Compute padding required for nodes which need alignment
1660 //
1661
1662 // The address of the call instruction needs to be 4-byte aligned to
1663 // ensure that it does not span a cache line so that it can be patched.
1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1665 {
1666 current_offset += clear_avx_size(); // skip vzeroupper
1667 current_offset += 1; // skip call opcode byte
1668 return align_up(current_offset, alignment_required()) - current_offset;
1669 }
1670
1671 // The address of the call instruction needs to be 4-byte aligned to
1672 // ensure that it does not span a cache line so that it can be patched.
1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
1674 {
1675 current_offset += clear_avx_size(); // skip vzeroupper
1676 current_offset += 11; // skip movq instruction + call opcode byte
1677 return align_up(current_offset, alignment_required()) - current_offset;
1879 st->print("\n\t");
1880 st->print("# stack alignment check");
1881 #endif
1882 }
1883 if (C->stub_function() != nullptr) {
1884 st->print("\n\t");
1885 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1886 st->print("\n\t");
1887 st->print("je fast_entry\t");
1888 st->print("\n\t");
1889 st->print("call #nmethod_entry_barrier_stub\t");
1890 st->print("\n\tfast_entry:");
1891 }
1892 st->cr();
1893 }
1894 #endif
1895
1896 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1897 Compile* C = ra_->C;
1898
1899 int framesize = C->output()->frame_size_in_bytes();
1900 int bangsize = C->output()->bang_size_in_bytes();
1901
1902 if (C->clinit_barrier_on_entry()) {
1903 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
1904 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
1905
1906 Label L_skip_barrier;
1907 Register klass = rscratch1;
1908
1909 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
1910 __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
1911
1912 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
1913
1914 __ bind(L_skip_barrier);
1915 }
1916
1917 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
1918
1919 C->output()->set_frame_complete(__ offset());
1920
1921 if (C->has_mach_constant_base_node()) {
1922 // NOTE: We set the table base offset here because users might be
1923 // emitted before MachConstantBaseNode.
1924 ConstantTable& constant_table = C->output()->constant_table();
1925 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1926 }
1927 }
1928
1929 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1930 {
1931 return MachNode::size(ra_); // too many variables; just compute it
1932 // the hard way
1933 }
1934
1935 int MachPrologNode::reloc() const
1936 {
1937 return 0; // a large enough number
1938 }
1939
1940 //=============================================================================
1941 #ifndef PRODUCT
1942 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1943 {
1944 Compile* C = ra_->C;
1945 if (generate_vzeroupper(C)) {
1946 st->print("vzeroupper");
1947 st->cr(); st->print("\t");
1948 }
1949
1950 int framesize = C->output()->frame_size_in_bytes();
1951 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1952 // Remove word for return adr already pushed
1953 // and RBP
1961 st->print_cr("popq rbp");
1962 if (do_polling() && C->is_method_compilation()) {
1963 st->print("\t");
1964 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1965 "ja #safepoint_stub\t"
1966 "# Safepoint: poll for GC");
1967 }
1968 }
1969 #endif
1970
1971 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1972 {
1973 Compile* C = ra_->C;
1974
1975 if (generate_vzeroupper(C)) {
1976 // Clear upper bits of YMM registers when current compiled code uses
1977 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1978 __ vzeroupper();
1979 }
1980
1981 int framesize = C->output()->frame_size_in_bytes();
1982 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1983 // Remove word for return adr already pushed
1984 // and RBP
1985 framesize -= 2*wordSize;
1986
1987 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1988
1989 if (framesize) {
1990 __ addq(rsp, framesize);
1991 }
1992
1993 __ popq(rbp);
1994
1995 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1996 __ reserved_stack_check();
1997 }
1998
1999 if (do_polling() && C->is_method_compilation()) {
2000 Label dummy_label;
2001 Label* code_stub = &dummy_label;
2002 if (!C->output()->in_scratch_emit_size()) {
2003 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
2004 C->output()->add_stub(stub);
2005 code_stub = &stub->entry();
2006 }
2007 __ relocate(relocInfo::poll_return_type);
2008 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
2009 }
2010 }
2011
2012 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
2013 {
2014 return MachNode::size(ra_); // too many variables; just compute it
2015 // the hard way
2016 }
2017
2018 int MachEpilogNode::reloc() const
2019 {
2020 return 2; // a large enough number
2021 }
2022
2023 const Pipeline* MachEpilogNode::pipeline() const
2024 {
2025 return MachNode::pipeline_class();
2026 }
2027
2028 //=============================================================================
2029
2030 enum RC {
2031 rc_bad,
2032 rc_int,
2033 rc_kreg,
2034 rc_float,
2035 rc_stack
2036 };
2037
2599 #endif
2600
2601 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2602 {
2603 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2604 int reg = ra_->get_encode(this);
2605
2606 __ lea(as_Register(reg), Address(rsp, offset));
2607 }
2608
2609 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2610 {
2611 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2612 if (ra_->get_encode(this) > 15) {
2613 return (offset < 0x80) ? 6 : 9; // REX2
2614 } else {
2615 return (offset < 0x80) ? 5 : 8; // REX
2616 }
2617 }
2618
2619 //=============================================================================
2620 #ifndef PRODUCT
2621 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2622 {
2623 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2624 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2625 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2626 }
2627 #endif
2628
2629 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2630 {
2631 __ ic_check(InteriorEntryAlignment);
2632 }
2633
2634 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
2635 {
2636 return MachNode::size(ra_); // too many variables; just compute it
2637 // the hard way
2638 }
2639
2640
2641 //=============================================================================
2642
2643 bool Matcher::supports_vector_calling_convention(void) {
2644 return EnableVectorSupport;
2645 }
2646
2647 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
2648 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
2649 }
2650
2651 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
2652 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
2653 }
2654
2655 #ifdef ASSERT
2656 static bool is_ndd_demotable(const MachNode* mdef) {
2657 return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
2658 }
2659 #endif
4593 }
4594 __ post_call_nop();
4595 %}
4596
4597 enc_class Java_Dynamic_Call(method meth) %{
4598 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4599 __ post_call_nop();
4600 %}
4601
4602 enc_class call_epilog %{
4603 if (VerifyStackAtCalls) {
4604 // Check that stack depth is unchanged: find majik cookie on stack
4605 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4606 Label L;
4607 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4608 __ jccb(Assembler::equal, L);
4609 // Die if stack mismatch
4610 __ int3();
4611 __ bind(L);
4612 }
4613 %}
4614
4615 %}
4616
4617 //----------FRAME--------------------------------------------------------------
4618 // Definition of frame structure and management information.
4619 //
4620 // S T A C K L A Y O U T Allocators stack-slot number
4621 // | (to get allocators register number
4622 // G Owned by | | v add OptoReg::stack0())
4623 // r CALLER | |
4624 // o | +--------+ pad to even-align allocators stack-slot
4625 // w V | pad0 | numbers; owned by CALLER
4626 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4627 // h ^ | in | 5
4628 // | | args | 4 Holes in incoming args owned by SELF
4629 // | | | | 3
4630 // | | +--------+
4631 // V | | old out| Empty on Intel, window on Sparc
4632 // | old |preserve| Must be even aligned.
5771 %}
5772 %}
5773
5774 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5775 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5776 %{
5777 constraint(ALLOC_IN_RC(ptr_reg));
5778 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5779 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5780
5781 op_cost(10);
5782 format %{"[$reg + $off + $idx << $scale]" %}
5783 interface(MEMORY_INTER) %{
5784 base($reg);
5785 index($idx);
5786 scale($scale);
5787 disp($off);
5788 %}
5789 %}
5790
5791 // Indirect Narrow Oop Plus Offset Operand
5792 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5793 // we can't free r12 even with CompressedOops::base() == nullptr.
5794 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5795 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5796 constraint(ALLOC_IN_RC(ptr_reg));
5797 match(AddP (DecodeN reg) off);
5798
5799 op_cost(10);
5800 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5801 interface(MEMORY_INTER) %{
5802 base(0xc); // R12
5803 index($reg);
5804 scale(0x3);
5805 disp($off);
5806 %}
5807 %}
5808
5809 // Indirect Memory Operand
5810 operand indirectNarrow(rRegN reg)
6280 %}
6281
6282 // Replaces legVec during post-selection cleanup. See above.
6283 operand legVecZ() %{
6284 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6285 match(VecZ);
6286
6287 format %{ %}
6288 interface(REG_INTER);
6289 %}
6290
6291 //----------OPERAND CLASSES----------------------------------------------------
6292 // Operand Classes are groups of operands that are used as to simplify
6293 // instruction definitions by not requiring the AD writer to specify separate
6294 // instructions for every form of operand when the instruction accepts
6295 // multiple operand types with the same basic encoding and format. The classic
6296 // case of this is memory operands.
6297
6298 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6299 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6300 indCompressedOopOffset,
6301 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6302 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6303 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6304
6305 //----------PIPELINE-----------------------------------------------------------
6306 // Rules which define the behavior of the target architectures pipeline.
6307 pipeline %{
6308
6309 //----------ATTRIBUTES---------------------------------------------------------
6310 attributes %{
6311 variable_size_instructions; // Fixed size instructions
6312 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6313 instruction_unit_size = 1; // An instruction is 1 bytes long
6314 instruction_fetch_unit_size = 16; // The processor fetches one line
6315 instruction_fetch_units = 1; // of 16 bytes
6316 %}
6317
6318 //----------RESOURCES----------------------------------------------------------
6319 // Resources are the functional units available to the machine
6320
8915 format %{ "MEMBAR-storestore (empty encoding)" %}
8916 ins_encode( );
8917 ins_pipe(empty);
8918 %}
8919
8920 //----------Move Instructions--------------------------------------------------
8921
8922 instruct castX2P(rRegP dst, rRegL src)
8923 %{
8924 match(Set dst (CastX2P src));
8925
8926 format %{ "movq $dst, $src\t# long->ptr" %}
8927 ins_encode %{
8928 if ($dst$$reg != $src$$reg) {
8929 __ movptr($dst$$Register, $src$$Register);
8930 }
8931 %}
8932 ins_pipe(ialu_reg_reg); // XXX
8933 %}
8934
8935 instruct castP2X(rRegL dst, rRegP src)
8936 %{
8937 match(Set dst (CastP2X src));
8938
8939 format %{ "movq $dst, $src\t# ptr -> long" %}
8940 ins_encode %{
8941 if ($dst$$reg != $src$$reg) {
8942 __ movptr($dst$$Register, $src$$Register);
8943 }
8944 %}
8945 ins_pipe(ialu_reg_reg); // XXX
8946 %}
8947
8948 // Convert oop into int for vectors alignment masking
8949 instruct convP2I(rRegI dst, rRegP src)
8950 %{
8951 match(Set dst (ConvL2I (CastP2X src)));
8952
8953 format %{ "movl $dst, $src\t# ptr -> int" %}
8954 ins_encode %{
15206 effect(DEF dst, USE src);
15207 ins_cost(100);
15208 format %{ "movd $dst,$src\t# MoveI2F" %}
15209 ins_encode %{
15210 __ movdl($dst$$XMMRegister, $src$$Register);
15211 %}
15212 ins_pipe( pipe_slow );
15213 %}
15214
15215 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15216 match(Set dst (MoveL2D src));
15217 effect(DEF dst, USE src);
15218 ins_cost(100);
15219 format %{ "movd $dst,$src\t# MoveL2D" %}
15220 ins_encode %{
15221 __ movdq($dst$$XMMRegister, $src$$Register);
15222 %}
15223 ins_pipe( pipe_slow );
15224 %}
15225
15226 // Fast clearing of an array
15227 // Small non-constant lenght ClearArray for non-AVX512 targets.
15228 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15229 Universe dummy, rFlagsReg cr)
15230 %{
15231 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
15232 match(Set dummy (ClearArray cnt base));
15233 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15234
15235 format %{ $$template
15236 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15237 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15238 $$emit$$"jg LARGE\n\t"
15239 $$emit$$"dec rcx\n\t"
15240 $$emit$$"js DONE\t# Zero length\n\t"
15241 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15242 $$emit$$"dec rcx\n\t"
15243 $$emit$$"jge LOOP\n\t"
15244 $$emit$$"jmp DONE\n\t"
15245 $$emit$$"# LARGE:\n\t"
15246 if (UseFastStosb) {
15247 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15248 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15249 } else if (UseXMMForObjInit) {
15250 $$emit$$"mov rdi,rax\n\t"
15251 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15252 $$emit$$"jmpq L_zero_64_bytes\n\t"
15253 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15261 $$emit$$"jl L_tail\n\t"
15262 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15263 $$emit$$"add 0x20,rax\n\t"
15264 $$emit$$"sub 0x4,rcx\n\t"
15265 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15266 $$emit$$"add 0x4,rcx\n\t"
15267 $$emit$$"jle L_end\n\t"
15268 $$emit$$"dec rcx\n\t"
15269 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15270 $$emit$$"vmovq xmm0,(rax)\n\t"
15271 $$emit$$"add 0x8,rax\n\t"
15272 $$emit$$"dec rcx\n\t"
15273 $$emit$$"jge L_sloop\n\t"
15274 $$emit$$"# L_end:\n\t"
15275 } else {
15276 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15277 }
15278 $$emit$$"# DONE"
15279 %}
15280 ins_encode %{
15281 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15282 $tmp$$XMMRegister, false, knoreg);
15283 %}
15284 ins_pipe(pipe_slow);
15285 %}
15286
15287 // Small non-constant length ClearArray for AVX512 targets.
15288 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15289 Universe dummy, rFlagsReg cr)
15290 %{
15291 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15292 match(Set dummy (ClearArray cnt base));
15293 ins_cost(125);
15294 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15295
15296 format %{ $$template
15297 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15298 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15299 $$emit$$"jg LARGE\n\t"
15300 $$emit$$"dec rcx\n\t"
15301 $$emit$$"js DONE\t# Zero length\n\t"
15302 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15303 $$emit$$"dec rcx\n\t"
15304 $$emit$$"jge LOOP\n\t"
15305 $$emit$$"jmp DONE\n\t"
15306 $$emit$$"# LARGE:\n\t"
15307 if (UseFastStosb) {
15308 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15309 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15310 } else if (UseXMMForObjInit) {
15311 $$emit$$"mov rdi,rax\n\t"
15312 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15313 $$emit$$"jmpq L_zero_64_bytes\n\t"
15314 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15322 $$emit$$"jl L_tail\n\t"
15323 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15324 $$emit$$"add 0x20,rax\n\t"
15325 $$emit$$"sub 0x4,rcx\n\t"
15326 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15327 $$emit$$"add 0x4,rcx\n\t"
15328 $$emit$$"jle L_end\n\t"
15329 $$emit$$"dec rcx\n\t"
15330 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15331 $$emit$$"vmovq xmm0,(rax)\n\t"
15332 $$emit$$"add 0x8,rax\n\t"
15333 $$emit$$"dec rcx\n\t"
15334 $$emit$$"jge L_sloop\n\t"
15335 $$emit$$"# L_end:\n\t"
15336 } else {
15337 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15338 }
15339 $$emit$$"# DONE"
15340 %}
15341 ins_encode %{
15342 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15343 $tmp$$XMMRegister, false, $ktmp$$KRegister);
15344 %}
15345 ins_pipe(pipe_slow);
15346 %}
15347
15348 // Large non-constant length ClearArray for non-AVX512 targets.
15349 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15350 Universe dummy, rFlagsReg cr)
15351 %{
15352 predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15353 match(Set dummy (ClearArray cnt base));
15354 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15355
15356 format %{ $$template
15357 if (UseFastStosb) {
15358 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15359 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15360 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15361 } else if (UseXMMForObjInit) {
15362 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15363 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15364 $$emit$$"jmpq L_zero_64_bytes\n\t"
15365 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15366 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15367 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15368 $$emit$$"add 0x40,rax\n\t"
15369 $$emit$$"# L_zero_64_bytes:\n\t"
15370 $$emit$$"sub 0x8,rcx\n\t"
15371 $$emit$$"jge L_loop\n\t"
15372 $$emit$$"add 0x4,rcx\n\t"
15373 $$emit$$"jl L_tail\n\t"
15374 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15375 $$emit$$"add 0x20,rax\n\t"
15376 $$emit$$"sub 0x4,rcx\n\t"
15377 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15378 $$emit$$"add 0x4,rcx\n\t"
15379 $$emit$$"jle L_end\n\t"
15380 $$emit$$"dec rcx\n\t"
15381 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15382 $$emit$$"vmovq xmm0,(rax)\n\t"
15383 $$emit$$"add 0x8,rax\n\t"
15384 $$emit$$"dec rcx\n\t"
15385 $$emit$$"jge L_sloop\n\t"
15386 $$emit$$"# L_end:\n\t"
15387 } else {
15388 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15389 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15390 }
15391 %}
15392 ins_encode %{
15393 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15394 $tmp$$XMMRegister, true, knoreg);
15395 %}
15396 ins_pipe(pipe_slow);
15397 %}
15398
15399 // Large non-constant length ClearArray for AVX512 targets.
15400 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15401 Universe dummy, rFlagsReg cr)
15402 %{
15403 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15404 match(Set dummy (ClearArray cnt base));
15405 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15406
15407 format %{ $$template
15408 if (UseFastStosb) {
15409 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15410 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15411 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15412 } else if (UseXMMForObjInit) {
15413 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15414 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15415 $$emit$$"jmpq L_zero_64_bytes\n\t"
15416 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15417 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15418 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15419 $$emit$$"add 0x40,rax\n\t"
15420 $$emit$$"# L_zero_64_bytes:\n\t"
15421 $$emit$$"sub 0x8,rcx\n\t"
15422 $$emit$$"jge L_loop\n\t"
15423 $$emit$$"add 0x4,rcx\n\t"
15424 $$emit$$"jl L_tail\n\t"
15425 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15426 $$emit$$"add 0x20,rax\n\t"
15427 $$emit$$"sub 0x4,rcx\n\t"
15428 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15429 $$emit$$"add 0x4,rcx\n\t"
15430 $$emit$$"jle L_end\n\t"
15431 $$emit$$"dec rcx\n\t"
15432 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15433 $$emit$$"vmovq xmm0,(rax)\n\t"
15434 $$emit$$"add 0x8,rax\n\t"
15435 $$emit$$"dec rcx\n\t"
15436 $$emit$$"jge L_sloop\n\t"
15437 $$emit$$"# L_end:\n\t"
15438 } else {
15439 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15440 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15441 }
15442 %}
15443 ins_encode %{
15444 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15445 $tmp$$XMMRegister, true, $ktmp$$KRegister);
15446 %}
15447 ins_pipe(pipe_slow);
15448 %}
15449
15450 // Small constant length ClearArray for AVX512 targets.
15451 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15452 %{
15453 predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15454 match(Set dummy (ClearArray cnt base));
15455 ins_cost(100);
15456 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15457 format %{ "clear_mem_imm $base , $cnt \n\t" %}
15458 ins_encode %{
15459 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15460 %}
15461 ins_pipe(pipe_slow);
15462 %}
15463
15464 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15465 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15466 %{
15467 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15468 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15469 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15470
15471 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15472 ins_encode %{
15473 __ string_compare($str1$$Register, $str2$$Register,
15474 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15475 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15476 %}
15477 ins_pipe( pipe_slow );
15478 %}
15479
17317 effect(USE meth);
17318
17319 ins_cost(300);
17320 format %{ "call_leaf,runtime " %}
17321 ins_encode(clear_avx, Java_To_Runtime(meth));
17322 ins_pipe(pipe_slow);
17323 %}
17324
17325 // Call runtime without safepoint and with vector arguments
17326 instruct CallLeafDirectVector(method meth)
17327 %{
17328 match(CallLeafVector);
17329 effect(USE meth);
17330
17331 ins_cost(300);
17332 format %{ "call_leaf,vector " %}
17333 ins_encode(Java_To_Runtime(meth));
17334 ins_pipe(pipe_slow);
17335 %}
17336
17337 // Call runtime without safepoint
17338 instruct CallLeafNoFPDirect(method meth)
17339 %{
17340 match(CallLeafNoFP);
17341 effect(USE meth);
17342
17343 ins_cost(300);
17344 format %{ "call_leaf_nofp,runtime " %}
17345 ins_encode(clear_avx, Java_To_Runtime(meth));
17346 ins_pipe(pipe_slow);
17347 %}
17348
17349 // Return Instruction
17350 // Remove the return address & jump to it.
17351 // Notice: We always emit a nop after a ret to make sure there is room
17352 // for safepoint patching
17353 instruct Ret()
17354 %{
17355 match(Return);
17356
17357 format %{ "ret" %}
17358 ins_encode %{
17359 __ ret(0);
|
1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 if (_entry_point == nullptr) {
1653 // CallLeafNoFPInDirect
1654 return 3; // callq (register)
1655 }
1656 int offset = 13; // movq r10,#addr; callq (r10)
1657 if (this->ideal_Opcode() != Op_CallLeafVector) {
1658 offset += clear_avx_size();
1659 }
1660 return offset;
1661 }
1662
1663 //
1664 // Compute padding required for nodes which need alignment
1665 //
1666
1667 // The address of the call instruction needs to be 4-byte aligned to
1668 // ensure that it does not span a cache line so that it can be patched.
1669 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1670 {
1671 current_offset += clear_avx_size(); // skip vzeroupper
1672 current_offset += 1; // skip call opcode byte
1673 return align_up(current_offset, alignment_required()) - current_offset;
1674 }
1675
1676 // The address of the call instruction needs to be 4-byte aligned to
1677 // ensure that it does not span a cache line so that it can be patched.
1678 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
1679 {
1680 current_offset += clear_avx_size(); // skip vzeroupper
1681 current_offset += 11; // skip movq instruction + call opcode byte
1682 return align_up(current_offset, alignment_required()) - current_offset;
1884 st->print("\n\t");
1885 st->print("# stack alignment check");
1886 #endif
1887 }
1888 if (C->stub_function() != nullptr) {
1889 st->print("\n\t");
1890 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1891 st->print("\n\t");
1892 st->print("je fast_entry\t");
1893 st->print("\n\t");
1894 st->print("call #nmethod_entry_barrier_stub\t");
1895 st->print("\n\tfast_entry:");
1896 }
1897 st->cr();
1898 }
1899 #endif
1900
1901 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1902 Compile* C = ra_->C;
1903
1904 __ verified_entry(C);
1905
1906 if (ra_->C->stub_function() == nullptr) {
1907 __ entry_barrier();
1908 }
1909
1910 if (!Compile::current()->output()->in_scratch_emit_size()) {
1911 __ bind(*_verified_entry);
1912 }
1913
1914 C->output()->set_frame_complete(__ offset());
1915
1916 if (C->has_mach_constant_base_node()) {
1917 // NOTE: We set the table base offset here because users might be
1918 // emitted before MachConstantBaseNode.
1919 ConstantTable& constant_table = C->output()->constant_table();
1920 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1921 }
1922 }
1923
1924
1925 int MachPrologNode::reloc() const
1926 {
1927 return 0; // a large enough number
1928 }
1929
1930 //=============================================================================
1931 #ifndef PRODUCT
1932 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1933 {
1934 Compile* C = ra_->C;
1935 if (generate_vzeroupper(C)) {
1936 st->print("vzeroupper");
1937 st->cr(); st->print("\t");
1938 }
1939
1940 int framesize = C->output()->frame_size_in_bytes();
1941 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1942 // Remove word for return adr already pushed
1943 // and RBP
1951 st->print_cr("popq rbp");
1952 if (do_polling() && C->is_method_compilation()) {
1953 st->print("\t");
1954 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1955 "ja #safepoint_stub\t"
1956 "# Safepoint: poll for GC");
1957 }
1958 }
1959 #endif
1960
1961 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1962 {
1963 Compile* C = ra_->C;
1964
1965 if (generate_vzeroupper(C)) {
1966 // Clear upper bits of YMM registers when current compiled code uses
1967 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1968 __ vzeroupper();
1969 }
1970
1971 // Subtract two words to account for return address and rbp
1972 int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
1973 __ remove_frame(initial_framesize, C->needs_stack_repair());
1974
1975 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1976 __ reserved_stack_check();
1977 }
1978
1979 if (do_polling() && C->is_method_compilation()) {
1980 Label dummy_label;
1981 Label* code_stub = &dummy_label;
1982 if (!C->output()->in_scratch_emit_size()) {
1983 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1984 C->output()->add_stub(stub);
1985 code_stub = &stub->entry();
1986 }
1987 __ relocate(relocInfo::poll_return_type);
1988 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
1989 }
1990 }
1991
1992 int MachEpilogNode::reloc() const
1993 {
1994 return 2; // a large enough number
1995 }
1996
1997 const Pipeline* MachEpilogNode::pipeline() const
1998 {
1999 return MachNode::pipeline_class();
2000 }
2001
2002 //=============================================================================
2003
2004 enum RC {
2005 rc_bad,
2006 rc_int,
2007 rc_kreg,
2008 rc_float,
2009 rc_stack
2010 };
2011
2573 #endif
2574
2575 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2576 {
2577 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2578 int reg = ra_->get_encode(this);
2579
2580 __ lea(as_Register(reg), Address(rsp, offset));
2581 }
2582
2583 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2584 {
2585 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2586 if (ra_->get_encode(this) > 15) {
2587 return (offset < 0x80) ? 6 : 9; // REX2
2588 } else {
2589 return (offset < 0x80) ? 5 : 8; // REX
2590 }
2591 }
2592
2593 //=============================================================================
2594 #ifndef PRODUCT
2595 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2596 {
2597 st->print_cr("MachVEPNode");
2598 }
2599 #endif
2600
2601 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2602 {
2603 CodeBuffer* cbuf = masm->code();
2604 uint insts_size = cbuf->insts_size();
2605 if (!_verified) {
2606 __ ic_check(1);
2607 } else {
2608 if (ra_->C->stub_function() == nullptr) {
2609 // Emit the entry barrier in a temporary frame before unpacking because
2610 // it can deopt, which would require packing the scalarized args again.
2611 __ verified_entry(ra_->C, 0);
2612 __ entry_barrier();
2613 int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
2614 __ remove_frame(initial_framesize, false);
2615 }
2616 // Unpack inline type args passed as oop and then jump to
2617 // the verified entry point (skipping the unverified entry).
2618 int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
2619 // Emit code for verified entry and save increment for stack repair on return
2620 __ verified_entry(ra_->C, sp_inc);
2621 if (Compile::current()->output()->in_scratch_emit_size()) {
2622 Label dummy_verified_entry;
2623 __ jmp(dummy_verified_entry);
2624 } else {
2625 __ jmp(*_verified_entry);
2626 }
2627 }
2628 /* WARNING these NOPs are critical so that verified entry point is properly
2629 4 bytes aligned for patching by NativeJump::patch_verified_entry() */
2630 int nops_cnt = 4 - ((cbuf->insts_size() - insts_size) & 0x3);
2631 nops_cnt &= 0x3; // Do not add nops if code is aligned.
2632 if (nops_cnt > 0) {
2633 __ nop(nops_cnt);
2634 }
2635 }
2636
2637 //=============================================================================
2638 #ifndef PRODUCT
2639 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2640 {
2641 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2642 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2643 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2644 }
2645 #endif
2646
2647 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2648 {
2649 __ ic_check(InteriorEntryAlignment);
2650 }
2651
2652
2653 //=============================================================================
2654
2655 bool Matcher::supports_vector_calling_convention(void) {
2656 return EnableVectorSupport;
2657 }
2658
2659 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
2660 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
2661 }
2662
2663 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
2664 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
2665 }
2666
2667 #ifdef ASSERT
2668 static bool is_ndd_demotable(const MachNode* mdef) {
2669 return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
2670 }
2671 #endif
4605 }
4606 __ post_call_nop();
4607 %}
4608
4609 enc_class Java_Dynamic_Call(method meth) %{
4610 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4611 __ post_call_nop();
4612 %}
4613
4614 enc_class call_epilog %{
4615 if (VerifyStackAtCalls) {
4616 // Check that stack depth is unchanged: find majik cookie on stack
4617 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4618 Label L;
4619 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4620 __ jccb(Assembler::equal, L);
4621 // Die if stack mismatch
4622 __ int3();
4623 __ bind(L);
4624 }
4625 if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) {
4626 // The last return value is not set by the callee but used to pass the null marker to compiled code.
4627 // Search for the corresponding projection, get the register and emit code that initialized it.
4628 uint con = (tf()->range_cc()->cnt() - 1);
4629 for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
4630 ProjNode* proj = fast_out(i)->as_Proj();
4631 if (proj->_con == con) {
4632 // Set null marker if rax is non-null (a non-null value is returned buffered or scalarized)
4633 OptoReg::Name optoReg = ra_->get_reg_first(proj);
4634 VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
4635 Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
4636 __ testq(rax, rax);
4637 __ setb(Assembler::notZero, toReg);
4638 __ movzbl(toReg, toReg);
4639 if (reg->is_stack()) {
4640 int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
4641 __ movq(Address(rsp, st_off), toReg);
4642 }
4643 break;
4644 }
4645 }
4646 if (return_value_is_used()) {
4647 // An inline type is returned as fields in multiple registers.
4648 // Rax either contains an oop if the inline type is buffered or a pointer
4649 // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax
4650 // if the lowest bit is set to allow C2 to use the oop after null checking.
4651 // rax &= (rax & 1) - 1
4652 __ movptr(rscratch1, rax);
4653 __ andptr(rscratch1, 0x1);
4654 __ subptr(rscratch1, 0x1);
4655 __ andptr(rax, rscratch1);
4656 }
4657 }
4658 %}
4659
4660 %}
4661
4662 //----------FRAME--------------------------------------------------------------
4663 // Definition of frame structure and management information.
4664 //
4665 // S T A C K L A Y O U T Allocators stack-slot number
4666 // | (to get allocators register number
4667 // G Owned by | | v add OptoReg::stack0())
4668 // r CALLER | |
4669 // o | +--------+ pad to even-align allocators stack-slot
4670 // w V | pad0 | numbers; owned by CALLER
4671 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4672 // h ^ | in | 5
4673 // | | args | 4 Holes in incoming args owned by SELF
4674 // | | | | 3
4675 // | | +--------+
4676 // V | | old out| Empty on Intel, window on Sparc
4677 // | old |preserve| Must be even aligned.
5816 %}
5817 %}
5818
5819 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5820 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5821 %{
5822 constraint(ALLOC_IN_RC(ptr_reg));
5823 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5824 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5825
5826 op_cost(10);
5827 format %{"[$reg + $off + $idx << $scale]" %}
5828 interface(MEMORY_INTER) %{
5829 base($reg);
5830 index($idx);
5831 scale($scale);
5832 disp($off);
5833 %}
5834 %}
5835
5836 // Indirect Narrow Oop Operand
5837 operand indCompressedOop(rRegN reg) %{
5838 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5839 constraint(ALLOC_IN_RC(ptr_reg));
5840 match(DecodeN reg);
5841
5842 op_cost(10);
5843 format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
5844 interface(MEMORY_INTER) %{
5845 base(0xc); // R12
5846 index($reg);
5847 scale(0x3);
5848 disp(0x0);
5849 %}
5850 %}
5851
5852 // Indirect Narrow Oop Plus Offset Operand
5853 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5854 // we can't free r12 even with CompressedOops::base() == nullptr.
5855 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5856 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5857 constraint(ALLOC_IN_RC(ptr_reg));
5858 match(AddP (DecodeN reg) off);
5859
5860 op_cost(10);
5861 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5862 interface(MEMORY_INTER) %{
5863 base(0xc); // R12
5864 index($reg);
5865 scale(0x3);
5866 disp($off);
5867 %}
5868 %}
5869
5870 // Indirect Memory Operand
5871 operand indirectNarrow(rRegN reg)
6341 %}
6342
6343 // Replaces legVec during post-selection cleanup. See above.
6344 operand legVecZ() %{
6345 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6346 match(VecZ);
6347
6348 format %{ %}
6349 interface(REG_INTER);
6350 %}
6351
6352 //----------OPERAND CLASSES----------------------------------------------------
6353 // Operand Classes are groups of operands that are used as to simplify
6354 // instruction definitions by not requiring the AD writer to specify separate
6355 // instructions for every form of operand when the instruction accepts
6356 // multiple operand types with the same basic encoding and format. The classic
6357 // case of this is memory operands.
6358
6359 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6360 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6361 indCompressedOop, indCompressedOopOffset,
6362 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6363 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6364 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6365
6366 //----------PIPELINE-----------------------------------------------------------
6367 // Rules which define the behavior of the target architectures pipeline.
6368 pipeline %{
6369
6370 //----------ATTRIBUTES---------------------------------------------------------
6371 attributes %{
6372 variable_size_instructions; // Fixed size instructions
6373 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6374 instruction_unit_size = 1; // An instruction is 1 bytes long
6375 instruction_fetch_unit_size = 16; // The processor fetches one line
6376 instruction_fetch_units = 1; // of 16 bytes
6377 %}
6378
6379 //----------RESOURCES----------------------------------------------------------
6380 // Resources are the functional units available to the machine
6381
8976 format %{ "MEMBAR-storestore (empty encoding)" %}
8977 ins_encode( );
8978 ins_pipe(empty);
8979 %}
8980
8981 //----------Move Instructions--------------------------------------------------
8982
8983 instruct castX2P(rRegP dst, rRegL src)
8984 %{
8985 match(Set dst (CastX2P src));
8986
8987 format %{ "movq $dst, $src\t# long->ptr" %}
8988 ins_encode %{
8989 if ($dst$$reg != $src$$reg) {
8990 __ movptr($dst$$Register, $src$$Register);
8991 }
8992 %}
8993 ins_pipe(ialu_reg_reg); // XXX
8994 %}
8995
8996 instruct castI2N(rRegN dst, rRegI src)
8997 %{
8998 match(Set dst (CastI2N src));
8999
9000 format %{ "movq $dst, $src\t# int -> narrow ptr" %}
9001 ins_encode %{
9002 if ($dst$$reg != $src$$reg) {
9003 __ movl($dst$$Register, $src$$Register);
9004 }
9005 %}
9006 ins_pipe(ialu_reg_reg); // XXX
9007 %}
9008
9009 instruct castN2X(rRegL dst, rRegN src)
9010 %{
9011 match(Set dst (CastP2X src));
9012
9013 format %{ "movq $dst, $src\t# ptr -> long" %}
9014 ins_encode %{
9015 if ($dst$$reg != $src$$reg) {
9016 __ movptr($dst$$Register, $src$$Register);
9017 }
9018 %}
9019 ins_pipe(ialu_reg_reg); // XXX
9020 %}
9021
9022 instruct castP2X(rRegL dst, rRegP src)
9023 %{
9024 match(Set dst (CastP2X src));
9025
9026 format %{ "movq $dst, $src\t# ptr -> long" %}
9027 ins_encode %{
9028 if ($dst$$reg != $src$$reg) {
9029 __ movptr($dst$$Register, $src$$Register);
9030 }
9031 %}
9032 ins_pipe(ialu_reg_reg); // XXX
9033 %}
9034
9035 // Convert oop into int for vectors alignment masking
9036 instruct convP2I(rRegI dst, rRegP src)
9037 %{
9038 match(Set dst (ConvL2I (CastP2X src)));
9039
9040 format %{ "movl $dst, $src\t# ptr -> int" %}
9041 ins_encode %{
15293 effect(DEF dst, USE src);
15294 ins_cost(100);
15295 format %{ "movd $dst,$src\t# MoveI2F" %}
15296 ins_encode %{
15297 __ movdl($dst$$XMMRegister, $src$$Register);
15298 %}
15299 ins_pipe( pipe_slow );
15300 %}
15301
15302 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15303 match(Set dst (MoveL2D src));
15304 effect(DEF dst, USE src);
15305 ins_cost(100);
15306 format %{ "movd $dst,$src\t# MoveL2D" %}
15307 ins_encode %{
15308 __ movdq($dst$$XMMRegister, $src$$Register);
15309 %}
15310 ins_pipe( pipe_slow );
15311 %}
15312
15313
15314 // Fast clearing of an array
15315 // Small non-constant lenght ClearArray for non-AVX512 targets.
15316 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15317 Universe dummy, rFlagsReg cr)
15318 %{
15319 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15320 match(Set dummy (ClearArray (Binary cnt base) val));
15321 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15322
15323 format %{ $$template
15324 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15325 $$emit$$"jg LARGE\n\t"
15326 $$emit$$"dec rcx\n\t"
15327 $$emit$$"js DONE\t# Zero length\n\t"
15328 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15329 $$emit$$"dec rcx\n\t"
15330 $$emit$$"jge LOOP\n\t"
15331 $$emit$$"jmp DONE\n\t"
15332 $$emit$$"# LARGE:\n\t"
15333 if (UseFastStosb) {
15334 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15335 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15336 } else if (UseXMMForObjInit) {
15337 $$emit$$"movdq $tmp, $val\n\t"
15338 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15339 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15340 $$emit$$"jmpq L_zero_64_bytes\n\t"
15341 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15342 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15343 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15344 $$emit$$"add 0x40,rax\n\t"
15345 $$emit$$"# L_zero_64_bytes:\n\t"
15346 $$emit$$"sub 0x8,rcx\n\t"
15347 $$emit$$"jge L_loop\n\t"
15348 $$emit$$"add 0x4,rcx\n\t"
15349 $$emit$$"jl L_tail\n\t"
15350 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15351 $$emit$$"add 0x20,rax\n\t"
15352 $$emit$$"sub 0x4,rcx\n\t"
15353 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15354 $$emit$$"add 0x4,rcx\n\t"
15355 $$emit$$"jle L_end\n\t"
15356 $$emit$$"dec rcx\n\t"
15357 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15358 $$emit$$"vmovq xmm0,(rax)\n\t"
15359 $$emit$$"add 0x8,rax\n\t"
15360 $$emit$$"dec rcx\n\t"
15361 $$emit$$"jge L_sloop\n\t"
15362 $$emit$$"# L_end:\n\t"
15363 } else {
15364 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15365 }
15366 $$emit$$"# DONE"
15367 %}
15368 ins_encode %{
15369 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15370 $tmp$$XMMRegister, false, false);
15371 %}
15372 ins_pipe(pipe_slow);
15373 %}
15374
15375 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15376 Universe dummy, rFlagsReg cr)
15377 %{
15378 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15379 match(Set dummy (ClearArray (Binary cnt base) val));
15380 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15381
15382 format %{ $$template
15383 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15384 $$emit$$"jg LARGE\n\t"
15385 $$emit$$"dec rcx\n\t"
15386 $$emit$$"js DONE\t# Zero length\n\t"
15387 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15388 $$emit$$"dec rcx\n\t"
15389 $$emit$$"jge LOOP\n\t"
15390 $$emit$$"jmp DONE\n\t"
15391 $$emit$$"# LARGE:\n\t"
15392 if (UseXMMForObjInit) {
15393 $$emit$$"movdq $tmp, $val\n\t"
15394 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15395 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15396 $$emit$$"jmpq L_zero_64_bytes\n\t"
15397 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15398 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15399 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15400 $$emit$$"add 0x40,rax\n\t"
15401 $$emit$$"# L_zero_64_bytes:\n\t"
15402 $$emit$$"sub 0x8,rcx\n\t"
15403 $$emit$$"jge L_loop\n\t"
15404 $$emit$$"add 0x4,rcx\n\t"
15405 $$emit$$"jl L_tail\n\t"
15406 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15407 $$emit$$"add 0x20,rax\n\t"
15408 $$emit$$"sub 0x4,rcx\n\t"
15409 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15410 $$emit$$"add 0x4,rcx\n\t"
15411 $$emit$$"jle L_end\n\t"
15412 $$emit$$"dec rcx\n\t"
15413 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15414 $$emit$$"vmovq xmm0,(rax)\n\t"
15415 $$emit$$"add 0x8,rax\n\t"
15416 $$emit$$"dec rcx\n\t"
15417 $$emit$$"jge L_sloop\n\t"
15418 $$emit$$"# L_end:\n\t"
15419 } else {
15420 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15421 }
15422 $$emit$$"# DONE"
15423 %}
15424 ins_encode %{
15425 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15426 $tmp$$XMMRegister, false, true);
15427 %}
15428 ins_pipe(pipe_slow);
15429 %}
15430
15431 // Small non-constant length ClearArray for AVX512 targets.
15432 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15433 Universe dummy, rFlagsReg cr)
15434 %{
15435 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15436 match(Set dummy (ClearArray (Binary cnt base) val));
15437 ins_cost(125);
15438 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15439
15440 format %{ $$template
15441 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15442 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15443 $$emit$$"jg LARGE\n\t"
15444 $$emit$$"dec rcx\n\t"
15445 $$emit$$"js DONE\t# Zero length\n\t"
15446 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15447 $$emit$$"dec rcx\n\t"
15448 $$emit$$"jge LOOP\n\t"
15449 $$emit$$"jmp DONE\n\t"
15450 $$emit$$"# LARGE:\n\t"
15451 if (UseFastStosb) {
15452 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15453 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15454 } else if (UseXMMForObjInit) {
15455 $$emit$$"mov rdi,rax\n\t"
15456 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15457 $$emit$$"jmpq L_zero_64_bytes\n\t"
15458 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15466 $$emit$$"jl L_tail\n\t"
15467 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15468 $$emit$$"add 0x20,rax\n\t"
15469 $$emit$$"sub 0x4,rcx\n\t"
15470 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15471 $$emit$$"add 0x4,rcx\n\t"
15472 $$emit$$"jle L_end\n\t"
15473 $$emit$$"dec rcx\n\t"
15474 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15475 $$emit$$"vmovq xmm0,(rax)\n\t"
15476 $$emit$$"add 0x8,rax\n\t"
15477 $$emit$$"dec rcx\n\t"
15478 $$emit$$"jge L_sloop\n\t"
15479 $$emit$$"# L_end:\n\t"
15480 } else {
15481 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15482 }
15483 $$emit$$"# DONE"
15484 %}
15485 ins_encode %{
15486 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15487 $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
15488 %}
15489 ins_pipe(pipe_slow);
15490 %}
15491
15492 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15493 Universe dummy, rFlagsReg cr)
15494 %{
15495 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15496 match(Set dummy (ClearArray (Binary cnt base) val));
15497 ins_cost(125);
15498 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15499
15500 format %{ $$template
15501 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15502 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15503 $$emit$$"jg LARGE\n\t"
15504 $$emit$$"dec rcx\n\t"
15505 $$emit$$"js DONE\t# Zero length\n\t"
15506 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15507 $$emit$$"dec rcx\n\t"
15508 $$emit$$"jge LOOP\n\t"
15509 $$emit$$"jmp DONE\n\t"
15510 $$emit$$"# LARGE:\n\t"
15511 if (UseFastStosb) {
15512 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15513 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15514 } else if (UseXMMForObjInit) {
15515 $$emit$$"mov rdi,rax\n\t"
15516 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15517 $$emit$$"jmpq L_zero_64_bytes\n\t"
15518 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15526 $$emit$$"jl L_tail\n\t"
15527 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15528 $$emit$$"add 0x20,rax\n\t"
15529 $$emit$$"sub 0x4,rcx\n\t"
15530 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15531 $$emit$$"add 0x4,rcx\n\t"
15532 $$emit$$"jle L_end\n\t"
15533 $$emit$$"dec rcx\n\t"
15534 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15535 $$emit$$"vmovq xmm0,(rax)\n\t"
15536 $$emit$$"add 0x8,rax\n\t"
15537 $$emit$$"dec rcx\n\t"
15538 $$emit$$"jge L_sloop\n\t"
15539 $$emit$$"# L_end:\n\t"
15540 } else {
15541 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15542 }
15543 $$emit$$"# DONE"
15544 %}
15545 ins_encode %{
15546 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15547 $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
15548 %}
15549 ins_pipe(pipe_slow);
15550 %}
15551
15552 // Large non-constant length ClearArray for non-AVX512 targets.
15553 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15554 Universe dummy, rFlagsReg cr)
15555 %{
15556 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15557 match(Set dummy (ClearArray (Binary cnt base) val));
15558 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15559
15560 format %{ $$template
15561 if (UseFastStosb) {
15562 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15563 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15564 } else if (UseXMMForObjInit) {
15565 $$emit$$"movdq $tmp, $val\n\t"
15566 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15567 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15568 $$emit$$"jmpq L_zero_64_bytes\n\t"
15569 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15570 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15571 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15572 $$emit$$"add 0x40,rax\n\t"
15573 $$emit$$"# L_zero_64_bytes:\n\t"
15574 $$emit$$"sub 0x8,rcx\n\t"
15575 $$emit$$"jge L_loop\n\t"
15576 $$emit$$"add 0x4,rcx\n\t"
15577 $$emit$$"jl L_tail\n\t"
15578 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15579 $$emit$$"add 0x20,rax\n\t"
15580 $$emit$$"sub 0x4,rcx\n\t"
15581 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15582 $$emit$$"add 0x4,rcx\n\t"
15583 $$emit$$"jle L_end\n\t"
15584 $$emit$$"dec rcx\n\t"
15585 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15586 $$emit$$"vmovq xmm0,(rax)\n\t"
15587 $$emit$$"add 0x8,rax\n\t"
15588 $$emit$$"dec rcx\n\t"
15589 $$emit$$"jge L_sloop\n\t"
15590 $$emit$$"# L_end:\n\t"
15591 } else {
15592 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15593 }
15594 %}
15595 ins_encode %{
15596 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15597 $tmp$$XMMRegister, true, false);
15598 %}
15599 ins_pipe(pipe_slow);
15600 %}
15601
15602 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15603 Universe dummy, rFlagsReg cr)
15604 %{
15605 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15606 match(Set dummy (ClearArray (Binary cnt base) val));
15607 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15608
15609 format %{ $$template
15610 if (UseXMMForObjInit) {
15611 $$emit$$"movdq $tmp, $val\n\t"
15612 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15613 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15614 $$emit$$"jmpq L_zero_64_bytes\n\t"
15615 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15616 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15617 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15618 $$emit$$"add 0x40,rax\n\t"
15619 $$emit$$"# L_zero_64_bytes:\n\t"
15620 $$emit$$"sub 0x8,rcx\n\t"
15621 $$emit$$"jge L_loop\n\t"
15622 $$emit$$"add 0x4,rcx\n\t"
15623 $$emit$$"jl L_tail\n\t"
15624 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15625 $$emit$$"add 0x20,rax\n\t"
15626 $$emit$$"sub 0x4,rcx\n\t"
15627 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15628 $$emit$$"add 0x4,rcx\n\t"
15629 $$emit$$"jle L_end\n\t"
15630 $$emit$$"dec rcx\n\t"
15631 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15632 $$emit$$"vmovq xmm0,(rax)\n\t"
15633 $$emit$$"add 0x8,rax\n\t"
15634 $$emit$$"dec rcx\n\t"
15635 $$emit$$"jge L_sloop\n\t"
15636 $$emit$$"# L_end:\n\t"
15637 } else {
15638 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15639 }
15640 %}
15641 ins_encode %{
15642 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15643 $tmp$$XMMRegister, true, true);
15644 %}
15645 ins_pipe(pipe_slow);
15646 %}
15647
15648 // Large non-constant length ClearArray for AVX512 targets.
15649 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15650 Universe dummy, rFlagsReg cr)
15651 %{
15652 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15653 match(Set dummy (ClearArray (Binary cnt base) val));
15654 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15655
15656 format %{ $$template
15657 if (UseFastStosb) {
15658 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15659 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15660 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15661 } else if (UseXMMForObjInit) {
15662 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15663 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15664 $$emit$$"jmpq L_zero_64_bytes\n\t"
15665 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15666 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15667 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15668 $$emit$$"add 0x40,rax\n\t"
15669 $$emit$$"# L_zero_64_bytes:\n\t"
15670 $$emit$$"sub 0x8,rcx\n\t"
15671 $$emit$$"jge L_loop\n\t"
15672 $$emit$$"add 0x4,rcx\n\t"
15673 $$emit$$"jl L_tail\n\t"
15674 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15675 $$emit$$"add 0x20,rax\n\t"
15676 $$emit$$"sub 0x4,rcx\n\t"
15677 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15678 $$emit$$"add 0x4,rcx\n\t"
15679 $$emit$$"jle L_end\n\t"
15680 $$emit$$"dec rcx\n\t"
15681 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15682 $$emit$$"vmovq xmm0,(rax)\n\t"
15683 $$emit$$"add 0x8,rax\n\t"
15684 $$emit$$"dec rcx\n\t"
15685 $$emit$$"jge L_sloop\n\t"
15686 $$emit$$"# L_end:\n\t"
15687 } else {
15688 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15689 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15690 }
15691 %}
15692 ins_encode %{
15693 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15694 $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
15695 %}
15696 ins_pipe(pipe_slow);
15697 %}
15698
15699 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15700 Universe dummy, rFlagsReg cr)
15701 %{
15702 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15703 match(Set dummy (ClearArray (Binary cnt base) val));
15704 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15705
15706 format %{ $$template
15707 if (UseFastStosb) {
15708 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15709 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15710 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15711 } else if (UseXMMForObjInit) {
15712 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15713 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15714 $$emit$$"jmpq L_zero_64_bytes\n\t"
15715 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15716 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15717 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15718 $$emit$$"add 0x40,rax\n\t"
15719 $$emit$$"# L_zero_64_bytes:\n\t"
15720 $$emit$$"sub 0x8,rcx\n\t"
15721 $$emit$$"jge L_loop\n\t"
15722 $$emit$$"add 0x4,rcx\n\t"
15723 $$emit$$"jl L_tail\n\t"
15724 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15725 $$emit$$"add 0x20,rax\n\t"
15726 $$emit$$"sub 0x4,rcx\n\t"
15727 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15728 $$emit$$"add 0x4,rcx\n\t"
15729 $$emit$$"jle L_end\n\t"
15730 $$emit$$"dec rcx\n\t"
15731 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15732 $$emit$$"vmovq xmm0,(rax)\n\t"
15733 $$emit$$"add 0x8,rax\n\t"
15734 $$emit$$"dec rcx\n\t"
15735 $$emit$$"jge L_sloop\n\t"
15736 $$emit$$"# L_end:\n\t"
15737 } else {
15738 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15739 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15740 }
15741 %}
15742 ins_encode %{
15743 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15744 $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
15745 %}
15746 ins_pipe(pipe_slow);
15747 %}
15748
15749 // Small constant length ClearArray for AVX512 targets.
15750 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
15751 %{
15752 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
15753 ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
15754 match(Set dummy (ClearArray (Binary cnt base) val));
15755 ins_cost(100);
15756 effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
15757 format %{ "clear_mem_imm $base , $cnt \n\t" %}
15758 ins_encode %{
15759 __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15760 %}
15761 ins_pipe(pipe_slow);
15762 %}
15763
15764 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15765 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15766 %{
15767 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15768 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15769 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15770
15771 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15772 ins_encode %{
15773 __ string_compare($str1$$Register, $str2$$Register,
15774 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15775 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15776 %}
15777 ins_pipe( pipe_slow );
15778 %}
15779
17617 effect(USE meth);
17618
17619 ins_cost(300);
17620 format %{ "call_leaf,runtime " %}
17621 ins_encode(clear_avx, Java_To_Runtime(meth));
17622 ins_pipe(pipe_slow);
17623 %}
17624
17625 // Call runtime without safepoint and with vector arguments
17626 instruct CallLeafDirectVector(method meth)
17627 %{
17628 match(CallLeafVector);
17629 effect(USE meth);
17630
17631 ins_cost(300);
17632 format %{ "call_leaf,vector " %}
17633 ins_encode(Java_To_Runtime(meth));
17634 ins_pipe(pipe_slow);
17635 %}
17636
17637 // Call runtime without safepoint
17638 // entry point is null, target holds the address to call
17639 instruct CallLeafNoFPInDirect(rRegP target)
17640 %{
17641 predicate(n->as_Call()->entry_point() == nullptr);
17642 match(CallLeafNoFP target);
17643
17644 ins_cost(300);
17645 format %{ "call_leaf_nofp,runtime indirect " %}
17646 ins_encode %{
17647 __ call($target$$Register);
17648 %}
17649
17650 ins_pipe(pipe_slow);
17651 %}
17652
17653 // Call runtime without safepoint
17654 instruct CallLeafNoFPDirect(method meth)
17655 %{
17656 predicate(n->as_Call()->entry_point() != nullptr);
17657 match(CallLeafNoFP);
17658 effect(USE meth);
17659
17660 ins_cost(300);
17661 format %{ "call_leaf_nofp,runtime " %}
17662 ins_encode(clear_avx, Java_To_Runtime(meth));
17663 ins_pipe(pipe_slow);
17664 %}
17665
17666 // Return Instruction
17667 // Remove the return address & jump to it.
17668 // Notice: We always emit a nop after a ret to make sure there is room
17669 // for safepoint patching
17670 instruct Ret()
17671 %{
17672 match(Return);
17673
17674 format %{ "ret" %}
17675 ins_encode %{
17676 __ ret(0);
|