1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 int offset = 13; // movq r10,#addr; callq (r10)
1653 if (this->ideal_Opcode() != Op_CallLeafVector) {
1654 offset += clear_avx_size();
1655 }
1656 return offset;
1657 }
1658 //
1659 // Compute padding required for nodes which need alignment
1660 //
1661
1662 // The address of the call instruction needs to be 4-byte aligned to
1663 // ensure that it does not span a cache line so that it can be patched.
1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1665 {
1666 current_offset += clear_avx_size(); // skip vzeroupper
1667 current_offset += 1; // skip call opcode byte
1668 return align_up(current_offset, alignment_required()) - current_offset;
1669 }
1670
1671 // The address of the call instruction needs to be 4-byte aligned to
1879 st->print("\n\t");
1880 st->print("# stack alignment check");
1881 #endif
1882 }
1883 if (C->stub_function() != nullptr) {
1884 st->print("\n\t");
1885 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1886 st->print("\n\t");
1887 st->print("je fast_entry\t");
1888 st->print("\n\t");
1889 st->print("call #nmethod_entry_barrier_stub\t");
1890 st->print("\n\tfast_entry:");
1891 }
1892 st->cr();
1893 }
1894 #endif
1895
1896 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1897 Compile* C = ra_->C;
1898
1899 int framesize = C->output()->frame_size_in_bytes();
1900 int bangsize = C->output()->bang_size_in_bytes();
1901
1902 if (C->clinit_barrier_on_entry()) {
1903 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
1904 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
1905
1906 Label L_skip_barrier;
1907 Register klass = rscratch1;
1908
1909 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
1910 __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
1911
1912 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
1913
1914 __ bind(L_skip_barrier);
1915 }
1916
1917 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
1918
1919 C->output()->set_frame_complete(__ offset());
1920
1921 if (C->has_mach_constant_base_node()) {
1922 // NOTE: We set the table base offset here because users might be
1923 // emitted before MachConstantBaseNode.
1924 ConstantTable& constant_table = C->output()->constant_table();
1925 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1926 }
1927 }
1928
1929 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1930 {
1931 return MachNode::size(ra_); // too many variables; just compute it
1932 // the hard way
1933 }
1934
1935 int MachPrologNode::reloc() const
1936 {
1937 return 0; // a large enough number
1938 }
1939
1940 //=============================================================================
1941 #ifndef PRODUCT
1942 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1943 {
1944 Compile* C = ra_->C;
1945 if (generate_vzeroupper(C)) {
1946 st->print("vzeroupper");
1947 st->cr(); st->print("\t");
1948 }
1949
1950 int framesize = C->output()->frame_size_in_bytes();
1951 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1952 // Remove word for return adr already pushed
1953 // and RBP
1961 st->print_cr("popq rbp");
1962 if (do_polling() && C->is_method_compilation()) {
1963 st->print("\t");
1964 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1965 "ja #safepoint_stub\t"
1966 "# Safepoint: poll for GC");
1967 }
1968 }
1969 #endif
1970
1971 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1972 {
1973 Compile* C = ra_->C;
1974
1975 if (generate_vzeroupper(C)) {
1976 // Clear upper bits of YMM registers when current compiled code uses
1977 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1978 __ vzeroupper();
1979 }
1980
1981 int framesize = C->output()->frame_size_in_bytes();
1982 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1983 // Remove word for return adr already pushed
1984 // and RBP
1985 framesize -= 2*wordSize;
1986
1987 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1988
1989 if (framesize) {
1990 __ addq(rsp, framesize);
1991 }
1992
1993 __ popq(rbp);
1994
1995 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1996 __ reserved_stack_check();
1997 }
1998
1999 if (do_polling() && C->is_method_compilation()) {
2000 Label dummy_label;
2001 Label* code_stub = &dummy_label;
2002 if (!C->output()->in_scratch_emit_size()) {
2003 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
2004 C->output()->add_stub(stub);
2005 code_stub = &stub->entry();
2006 }
2007 __ relocate(relocInfo::poll_return_type);
2008 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
2009 }
2010 }
2011
2012 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
2013 {
2014 return MachNode::size(ra_); // too many variables; just compute it
2015 // the hard way
2016 }
2017
2018 int MachEpilogNode::reloc() const
2019 {
2020 return 2; // a large enough number
2021 }
2022
2023 const Pipeline* MachEpilogNode::pipeline() const
2024 {
2025 return MachNode::pipeline_class();
2026 }
2027
2028 //=============================================================================
2029
2030 enum RC {
2031 rc_bad,
2032 rc_int,
2033 rc_kreg,
2034 rc_float,
2035 rc_stack
2036 };
2037
2599 #endif
2600
2601 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2602 {
2603 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2604 int reg = ra_->get_encode(this);
2605
2606 __ lea(as_Register(reg), Address(rsp, offset));
2607 }
2608
2609 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2610 {
2611 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2612 if (ra_->get_encode(this) > 15) {
2613 return (offset < 0x80) ? 6 : 9; // REX2
2614 } else {
2615 return (offset < 0x80) ? 5 : 8; // REX
2616 }
2617 }
2618
2619 //=============================================================================
2620 #ifndef PRODUCT
2621 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2622 {
2623 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2624 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2625 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2626 }
2627 #endif
2628
2629 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2630 {
2631 __ ic_check(InteriorEntryAlignment);
2632 }
2633
2634 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
2635 {
2636 return MachNode::size(ra_); // too many variables; just compute it
2637 // the hard way
2638 }
2639
2640
2641 //=============================================================================
2642
2643 bool Matcher::supports_vector_calling_convention(void) {
2644 return EnableVectorSupport;
2645 }
2646
2647 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
2648 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
2649 }
2650
2651 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
2652 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
2653 }
2654
2655 #ifdef ASSERT
2656 static bool is_ndd_demotable(const MachNode* mdef) {
2657 return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
2658 }
2659 #endif
4593 }
4594 __ post_call_nop();
4595 %}
4596
4597 enc_class Java_Dynamic_Call(method meth) %{
4598 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4599 __ post_call_nop();
4600 %}
4601
4602 enc_class call_epilog %{
4603 if (VerifyStackAtCalls) {
4604 // Check that stack depth is unchanged: find majik cookie on stack
4605 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4606 Label L;
4607 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4608 __ jccb(Assembler::equal, L);
4609 // Die if stack mismatch
4610 __ int3();
4611 __ bind(L);
4612 }
4613 %}
4614
4615 %}
4616
4617 //----------FRAME--------------------------------------------------------------
4618 // Definition of frame structure and management information.
4619 //
4620 // S T A C K L A Y O U T Allocators stack-slot number
4621 // | (to get allocators register number
4622 // G Owned by | | v add OptoReg::stack0())
4623 // r CALLER | |
4624 // o | +--------+ pad to even-align allocators stack-slot
4625 // w V | pad0 | numbers; owned by CALLER
4626 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4627 // h ^ | in | 5
4628 // | | args | 4 Holes in incoming args owned by SELF
4629 // | | | | 3
4630 // | | +--------+
4631 // V | | old out| Empty on Intel, window on Sparc
4632 // | old |preserve| Must be even aligned.
5771 %}
5772 %}
5773
5774 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5775 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5776 %{
5777 constraint(ALLOC_IN_RC(ptr_reg));
5778 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5779 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5780
5781 op_cost(10);
5782 format %{"[$reg + $off + $idx << $scale]" %}
5783 interface(MEMORY_INTER) %{
5784 base($reg);
5785 index($idx);
5786 scale($scale);
5787 disp($off);
5788 %}
5789 %}
5790
5791 // Indirect Narrow Oop Plus Offset Operand
5792 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5793 // we can't free r12 even with CompressedOops::base() == nullptr.
5794 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5795 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5796 constraint(ALLOC_IN_RC(ptr_reg));
5797 match(AddP (DecodeN reg) off);
5798
5799 op_cost(10);
5800 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5801 interface(MEMORY_INTER) %{
5802 base(0xc); // R12
5803 index($reg);
5804 scale(0x3);
5805 disp($off);
5806 %}
5807 %}
5808
5809 // Indirect Memory Operand
5810 operand indirectNarrow(rRegN reg)
6280 %}
6281
6282 // Replaces legVec during post-selection cleanup. See above.
6283 operand legVecZ() %{
6284 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6285 match(VecZ);
6286
6287 format %{ %}
6288 interface(REG_INTER);
6289 %}
6290
6291 //----------OPERAND CLASSES----------------------------------------------------
6292 // Operand Classes are groups of operands that are used as to simplify
6293 // instruction definitions by not requiring the AD writer to specify separate
6294 // instructions for every form of operand when the instruction accepts
6295 // multiple operand types with the same basic encoding and format. The classic
6296 // case of this is memory operands.
6297
6298 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6299 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6300 indCompressedOopOffset,
6301 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6302 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6303 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6304
6305 //----------PIPELINE-----------------------------------------------------------
6306 // Rules which define the behavior of the target architectures pipeline.
6307 pipeline %{
6308
6309 //----------ATTRIBUTES---------------------------------------------------------
6310 attributes %{
6311 variable_size_instructions; // Fixed size instructions
6312 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6313 instruction_unit_size = 1; // An instruction is 1 bytes long
6314 instruction_fetch_unit_size = 16; // The processor fetches one line
6315 instruction_fetch_units = 1; // of 16 bytes
6316 %}
6317
6318 //----------RESOURCES----------------------------------------------------------
6319 // Resources are the functional units available to the machine
6320
8915 format %{ "MEMBAR-storestore (empty encoding)" %}
8916 ins_encode( );
8917 ins_pipe(empty);
8918 %}
8919
8920 //----------Move Instructions--------------------------------------------------
8921
8922 instruct castX2P(rRegP dst, rRegL src)
8923 %{
8924 match(Set dst (CastX2P src));
8925
8926 format %{ "movq $dst, $src\t# long->ptr" %}
8927 ins_encode %{
8928 if ($dst$$reg != $src$$reg) {
8929 __ movptr($dst$$Register, $src$$Register);
8930 }
8931 %}
8932 ins_pipe(ialu_reg_reg); // XXX
8933 %}
8934
8935 instruct castP2X(rRegL dst, rRegP src)
8936 %{
8937 match(Set dst (CastP2X src));
8938
8939 format %{ "movq $dst, $src\t# ptr -> long" %}
8940 ins_encode %{
8941 if ($dst$$reg != $src$$reg) {
8942 __ movptr($dst$$Register, $src$$Register);
8943 }
8944 %}
8945 ins_pipe(ialu_reg_reg); // XXX
8946 %}
8947
8948 // Convert oop into int for vectors alignment masking
8949 instruct convP2I(rRegI dst, rRegP src)
8950 %{
8951 match(Set dst (ConvL2I (CastP2X src)));
8952
8953 format %{ "movl $dst, $src\t# ptr -> int" %}
8954 ins_encode %{
15206 effect(DEF dst, USE src);
15207 ins_cost(100);
15208 format %{ "movd $dst,$src\t# MoveI2F" %}
15209 ins_encode %{
15210 __ movdl($dst$$XMMRegister, $src$$Register);
15211 %}
15212 ins_pipe( pipe_slow );
15213 %}
15214
15215 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15216 match(Set dst (MoveL2D src));
15217 effect(DEF dst, USE src);
15218 ins_cost(100);
15219 format %{ "movd $dst,$src\t# MoveL2D" %}
15220 ins_encode %{
15221 __ movdq($dst$$XMMRegister, $src$$Register);
15222 %}
15223 ins_pipe( pipe_slow );
15224 %}
15225
15226 // Fast clearing of an array
15227 // Small non-constant lenght ClearArray for non-AVX512 targets.
15228 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15229 Universe dummy, rFlagsReg cr)
15230 %{
15231 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
15232 match(Set dummy (ClearArray cnt base));
15233 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15234
15235 format %{ $$template
15236 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15237 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15238 $$emit$$"jg LARGE\n\t"
15239 $$emit$$"dec rcx\n\t"
15240 $$emit$$"js DONE\t# Zero length\n\t"
15241 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15242 $$emit$$"dec rcx\n\t"
15243 $$emit$$"jge LOOP\n\t"
15244 $$emit$$"jmp DONE\n\t"
15245 $$emit$$"# LARGE:\n\t"
15246 if (UseFastStosb) {
15247 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15248 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15249 } else if (UseXMMForObjInit) {
15250 $$emit$$"mov rdi,rax\n\t"
15251 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15252 $$emit$$"jmpq L_zero_64_bytes\n\t"
15253 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15261 $$emit$$"jl L_tail\n\t"
15262 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15263 $$emit$$"add 0x20,rax\n\t"
15264 $$emit$$"sub 0x4,rcx\n\t"
15265 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15266 $$emit$$"add 0x4,rcx\n\t"
15267 $$emit$$"jle L_end\n\t"
15268 $$emit$$"dec rcx\n\t"
15269 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15270 $$emit$$"vmovq xmm0,(rax)\n\t"
15271 $$emit$$"add 0x8,rax\n\t"
15272 $$emit$$"dec rcx\n\t"
15273 $$emit$$"jge L_sloop\n\t"
15274 $$emit$$"# L_end:\n\t"
15275 } else {
15276 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15277 }
15278 $$emit$$"# DONE"
15279 %}
15280 ins_encode %{
15281 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15282 $tmp$$XMMRegister, false, knoreg);
15283 %}
15284 ins_pipe(pipe_slow);
15285 %}
15286
15287 // Small non-constant length ClearArray for AVX512 targets.
15288 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15289 Universe dummy, rFlagsReg cr)
15290 %{
15291 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15292 match(Set dummy (ClearArray cnt base));
15293 ins_cost(125);
15294 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15295
15296 format %{ $$template
15297 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15298 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15299 $$emit$$"jg LARGE\n\t"
15300 $$emit$$"dec rcx\n\t"
15301 $$emit$$"js DONE\t# Zero length\n\t"
15302 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15303 $$emit$$"dec rcx\n\t"
15304 $$emit$$"jge LOOP\n\t"
15305 $$emit$$"jmp DONE\n\t"
15306 $$emit$$"# LARGE:\n\t"
15307 if (UseFastStosb) {
15308 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15309 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15310 } else if (UseXMMForObjInit) {
15311 $$emit$$"mov rdi,rax\n\t"
15312 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15313 $$emit$$"jmpq L_zero_64_bytes\n\t"
15314 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15322 $$emit$$"jl L_tail\n\t"
15323 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15324 $$emit$$"add 0x20,rax\n\t"
15325 $$emit$$"sub 0x4,rcx\n\t"
15326 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15327 $$emit$$"add 0x4,rcx\n\t"
15328 $$emit$$"jle L_end\n\t"
15329 $$emit$$"dec rcx\n\t"
15330 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15331 $$emit$$"vmovq xmm0,(rax)\n\t"
15332 $$emit$$"add 0x8,rax\n\t"
15333 $$emit$$"dec rcx\n\t"
15334 $$emit$$"jge L_sloop\n\t"
15335 $$emit$$"# L_end:\n\t"
15336 } else {
15337 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15338 }
15339 $$emit$$"# DONE"
15340 %}
15341 ins_encode %{
15342 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15343 $tmp$$XMMRegister, false, $ktmp$$KRegister);
15344 %}
15345 ins_pipe(pipe_slow);
15346 %}
15347
15348 // Large non-constant length ClearArray for non-AVX512 targets.
15349 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15350 Universe dummy, rFlagsReg cr)
15351 %{
15352 predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15353 match(Set dummy (ClearArray cnt base));
15354 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15355
15356 format %{ $$template
15357 if (UseFastStosb) {
15358 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15359 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15360 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15361 } else if (UseXMMForObjInit) {
15362 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15363 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15364 $$emit$$"jmpq L_zero_64_bytes\n\t"
15365 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15366 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15367 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15368 $$emit$$"add 0x40,rax\n\t"
15369 $$emit$$"# L_zero_64_bytes:\n\t"
15370 $$emit$$"sub 0x8,rcx\n\t"
15371 $$emit$$"jge L_loop\n\t"
15372 $$emit$$"add 0x4,rcx\n\t"
15373 $$emit$$"jl L_tail\n\t"
15374 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15375 $$emit$$"add 0x20,rax\n\t"
15376 $$emit$$"sub 0x4,rcx\n\t"
15377 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15378 $$emit$$"add 0x4,rcx\n\t"
15379 $$emit$$"jle L_end\n\t"
15380 $$emit$$"dec rcx\n\t"
15381 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15382 $$emit$$"vmovq xmm0,(rax)\n\t"
15383 $$emit$$"add 0x8,rax\n\t"
15384 $$emit$$"dec rcx\n\t"
15385 $$emit$$"jge L_sloop\n\t"
15386 $$emit$$"# L_end:\n\t"
15387 } else {
15388 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15389 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15390 }
15391 %}
15392 ins_encode %{
15393 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15394 $tmp$$XMMRegister, true, knoreg);
15395 %}
15396 ins_pipe(pipe_slow);
15397 %}
15398
15399 // Large non-constant length ClearArray for AVX512 targets.
15400 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15401 Universe dummy, rFlagsReg cr)
15402 %{
15403 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15404 match(Set dummy (ClearArray cnt base));
15405 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15406
15407 format %{ $$template
15408 if (UseFastStosb) {
15409 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15410 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15411 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15412 } else if (UseXMMForObjInit) {
15413 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15414 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15415 $$emit$$"jmpq L_zero_64_bytes\n\t"
15416 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15417 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15418 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15419 $$emit$$"add 0x40,rax\n\t"
15420 $$emit$$"# L_zero_64_bytes:\n\t"
15421 $$emit$$"sub 0x8,rcx\n\t"
15422 $$emit$$"jge L_loop\n\t"
15423 $$emit$$"add 0x4,rcx\n\t"
15424 $$emit$$"jl L_tail\n\t"
15425 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15426 $$emit$$"add 0x20,rax\n\t"
15427 $$emit$$"sub 0x4,rcx\n\t"
15428 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15429 $$emit$$"add 0x4,rcx\n\t"
15430 $$emit$$"jle L_end\n\t"
15431 $$emit$$"dec rcx\n\t"
15432 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15433 $$emit$$"vmovq xmm0,(rax)\n\t"
15434 $$emit$$"add 0x8,rax\n\t"
15435 $$emit$$"dec rcx\n\t"
15436 $$emit$$"jge L_sloop\n\t"
15437 $$emit$$"# L_end:\n\t"
15438 } else {
15439 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15440 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15441 }
15442 %}
15443 ins_encode %{
15444 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15445 $tmp$$XMMRegister, true, $ktmp$$KRegister);
15446 %}
15447 ins_pipe(pipe_slow);
15448 %}
15449
15450 // Small constant length ClearArray for AVX512 targets.
15451 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15452 %{
15453 predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15454 match(Set dummy (ClearArray cnt base));
15455 ins_cost(100);
15456 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15457 format %{ "clear_mem_imm $base , $cnt \n\t" %}
15458 ins_encode %{
15459 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15460 %}
15461 ins_pipe(pipe_slow);
15462 %}
15463
15464 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15465 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15466 %{
15467 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15468 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15469 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15470
15471 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15472 ins_encode %{
15473 __ string_compare($str1$$Register, $str2$$Register,
15474 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15475 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15476 %}
15477 ins_pipe( pipe_slow );
15478 %}
15479
17317 effect(USE meth);
17318
17319 ins_cost(300);
17320 format %{ "call_leaf,runtime " %}
17321 ins_encode(clear_avx, Java_To_Runtime(meth));
17322 ins_pipe(pipe_slow);
17323 %}
17324
17325 // Call runtime without safepoint and with vector arguments
17326 instruct CallLeafDirectVector(method meth)
17327 %{
17328 match(CallLeafVector);
17329 effect(USE meth);
17330
17331 ins_cost(300);
17332 format %{ "call_leaf,vector " %}
17333 ins_encode(Java_To_Runtime(meth));
17334 ins_pipe(pipe_slow);
17335 %}
17336
17337 // Call runtime without safepoint
17338 instruct CallLeafNoFPDirect(method meth)
17339 %{
17340 match(CallLeafNoFP);
17341 effect(USE meth);
17342
17343 ins_cost(300);
17344 format %{ "call_leaf_nofp,runtime " %}
17345 ins_encode(clear_avx, Java_To_Runtime(meth));
17346 ins_pipe(pipe_slow);
17347 %}
17348
17349 // Return Instruction
17350 // Remove the return address & jump to it.
17351 // Notice: We always emit a nop after a ret to make sure there is room
17352 // for safepoint patching
17353 instruct Ret()
17354 %{
17355 match(Return);
17356
17357 format %{ "ret" %}
17358 ins_encode %{
17359 __ ret(0);
|
1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 if (_entry_point == nullptr) {
1653 // CallLeafNoFPInDirect
1654 return 3; // callq (register)
1655 }
1656 int offset = 13; // movq r10,#addr; callq (r10)
1657 if (this->ideal_Opcode() != Op_CallLeafVector) {
1658 offset += clear_avx_size();
1659 }
1660 return offset;
1661 }
1662 //
1663 // Compute padding required for nodes which need alignment
1664 //
1665
1666 // The address of the call instruction needs to be 4-byte aligned to
1667 // ensure that it does not span a cache line so that it can be patched.
1668 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1669 {
1670 current_offset += clear_avx_size(); // skip vzeroupper
1671 current_offset += 1; // skip call opcode byte
1672 return align_up(current_offset, alignment_required()) - current_offset;
1673 }
1674
1675 // The address of the call instruction needs to be 4-byte aligned to
1883 st->print("\n\t");
1884 st->print("# stack alignment check");
1885 #endif
1886 }
1887 if (C->stub_function() != nullptr) {
1888 st->print("\n\t");
1889 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1890 st->print("\n\t");
1891 st->print("je fast_entry\t");
1892 st->print("\n\t");
1893 st->print("call #nmethod_entry_barrier_stub\t");
1894 st->print("\n\tfast_entry:");
1895 }
1896 st->cr();
1897 }
1898 #endif
1899
1900 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1901 Compile* C = ra_->C;
1902
1903 __ verified_entry(C);
1904
1905 if (ra_->C->stub_function() == nullptr) {
1906 __ entry_barrier();
1907 }
1908
1909 if (!Compile::current()->output()->in_scratch_emit_size()) {
1910 __ bind(*_verified_entry);
1911 }
1912
1913 C->output()->set_frame_complete(__ offset());
1914
1915 if (C->has_mach_constant_base_node()) {
1916 // NOTE: We set the table base offset here because users might be
1917 // emitted before MachConstantBaseNode.
1918 ConstantTable& constant_table = C->output()->constant_table();
1919 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1920 }
1921 }
1922
1923
1924 int MachPrologNode::reloc() const
1925 {
1926 return 0; // a large enough number
1927 }
1928
1929 //=============================================================================
1930 #ifndef PRODUCT
1931 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1932 {
1933 Compile* C = ra_->C;
1934 if (generate_vzeroupper(C)) {
1935 st->print("vzeroupper");
1936 st->cr(); st->print("\t");
1937 }
1938
1939 int framesize = C->output()->frame_size_in_bytes();
1940 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1941 // Remove word for return adr already pushed
1942 // and RBP
1950 st->print_cr("popq rbp");
1951 if (do_polling() && C->is_method_compilation()) {
1952 st->print("\t");
1953 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1954 "ja #safepoint_stub\t"
1955 "# Safepoint: poll for GC");
1956 }
1957 }
1958 #endif
1959
1960 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1961 {
1962 Compile* C = ra_->C;
1963
1964 if (generate_vzeroupper(C)) {
1965 // Clear upper bits of YMM registers when current compiled code uses
1966 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1967 __ vzeroupper();
1968 }
1969
1970 // Subtract two words to account for return address and rbp
1971 int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
1972 __ remove_frame(initial_framesize, C->needs_stack_repair());
1973
1974 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1975 __ reserved_stack_check();
1976 }
1977
1978 if (do_polling() && C->is_method_compilation()) {
1979 Label dummy_label;
1980 Label* code_stub = &dummy_label;
1981 if (!C->output()->in_scratch_emit_size()) {
1982 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1983 C->output()->add_stub(stub);
1984 code_stub = &stub->entry();
1985 }
1986 __ relocate(relocInfo::poll_return_type);
1987 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
1988 }
1989 }
1990
1991 int MachEpilogNode::reloc() const
1992 {
1993 return 2; // a large enough number
1994 }
1995
1996 const Pipeline* MachEpilogNode::pipeline() const
1997 {
1998 return MachNode::pipeline_class();
1999 }
2000
2001 //=============================================================================
2002
2003 enum RC {
2004 rc_bad,
2005 rc_int,
2006 rc_kreg,
2007 rc_float,
2008 rc_stack
2009 };
2010
2572 #endif
2573
2574 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2575 {
2576 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2577 int reg = ra_->get_encode(this);
2578
2579 __ lea(as_Register(reg), Address(rsp, offset));
2580 }
2581
2582 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2583 {
2584 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2585 if (ra_->get_encode(this) > 15) {
2586 return (offset < 0x80) ? 6 : 9; // REX2
2587 } else {
2588 return (offset < 0x80) ? 5 : 8; // REX
2589 }
2590 }
2591
2592 //=============================================================================
2593 #ifndef PRODUCT
2594 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2595 {
2596 st->print_cr("MachVEPNode");
2597 }
2598 #endif
2599
2600 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2601 {
2602 CodeBuffer* cbuf = masm->code();
2603 uint insts_size = cbuf->insts_size();
2604 if (!_verified) {
2605 __ ic_check(1);
2606 } else {
2607 if (ra_->C->stub_function() == nullptr) {
2608 // Emit the entry barrier in a temporary frame before unpacking because
2609 // it can deopt, which would require packing the scalarized args again.
2610 __ verified_entry(ra_->C, 0);
2611 __ entry_barrier();
2612 int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
2613 __ remove_frame(initial_framesize, false);
2614 }
2615 // Unpack inline type args passed as oop and then jump to
2616 // the verified entry point (skipping the unverified entry).
2617 int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
2618 // Emit code for verified entry and save increment for stack repair on return
2619 __ verified_entry(ra_->C, sp_inc);
2620 if (Compile::current()->output()->in_scratch_emit_size()) {
2621 Label dummy_verified_entry;
2622 __ jmp(dummy_verified_entry);
2623 } else {
2624 __ jmp(*_verified_entry);
2625 }
2626 }
2627 /* WARNING these NOPs are critical so that verified entry point is properly
2628 4 bytes aligned for patching by NativeJump::patch_verified_entry() */
2629 int nops_cnt = 4 - ((cbuf->insts_size() - insts_size) & 0x3);
2630 nops_cnt &= 0x3; // Do not add nops if code is aligned.
2631 if (nops_cnt > 0) {
2632 __ nop(nops_cnt);
2633 }
2634 }
2635
2636 //=============================================================================
2637 #ifndef PRODUCT
2638 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2639 {
2640 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2641 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2642 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2643 }
2644 #endif
2645
2646 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2647 {
2648 __ ic_check(InteriorEntryAlignment);
2649 }
2650
2651
2652 //=============================================================================
2653
2654 bool Matcher::supports_vector_calling_convention(void) {
2655 return EnableVectorSupport;
2656 }
2657
2658 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
2659 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
2660 }
2661
2662 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
2663 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
2664 }
2665
2666 #ifdef ASSERT
2667 static bool is_ndd_demotable(const MachNode* mdef) {
2668 return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
2669 }
2670 #endif
4604 }
4605 __ post_call_nop();
4606 %}
4607
4608 enc_class Java_Dynamic_Call(method meth) %{
4609 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4610 __ post_call_nop();
4611 %}
4612
4613 enc_class call_epilog %{
4614 if (VerifyStackAtCalls) {
4615 // Check that stack depth is unchanged: find majik cookie on stack
4616 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4617 Label L;
4618 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4619 __ jccb(Assembler::equal, L);
4620 // Die if stack mismatch
4621 __ int3();
4622 __ bind(L);
4623 }
4624 if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) {
4625 // The last return value is not set by the callee but used to pass the null marker to compiled code.
4626 // Search for the corresponding projection, get the register and emit code that initialized it.
4627 uint con = (tf()->range_cc()->cnt() - 1);
4628 for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
4629 ProjNode* proj = fast_out(i)->as_Proj();
4630 if (proj->_con == con) {
4631 // Set null marker if rax is non-null (a non-null value is returned buffered or scalarized)
4632 OptoReg::Name optoReg = ra_->get_reg_first(proj);
4633 VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
4634 Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
4635 __ testq(rax, rax);
4636 __ setb(Assembler::notZero, toReg);
4637 __ movzbl(toReg, toReg);
4638 if (reg->is_stack()) {
4639 int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
4640 __ movq(Address(rsp, st_off), toReg);
4641 }
4642 break;
4643 }
4644 }
4645 if (return_value_is_used()) {
4646 // An inline type is returned as fields in multiple registers.
4647 // Rax either contains an oop if the inline type is buffered or a pointer
4648 // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax
4649 // if the lowest bit is set to allow C2 to use the oop after null checking.
4650 // rax &= (rax & 1) - 1
4651 __ movptr(rscratch1, rax);
4652 __ andptr(rscratch1, 0x1);
4653 __ subptr(rscratch1, 0x1);
4654 __ andptr(rax, rscratch1);
4655 }
4656 }
4657 %}
4658
4659 %}
4660
4661 //----------FRAME--------------------------------------------------------------
4662 // Definition of frame structure and management information.
4663 //
4664 // S T A C K L A Y O U T Allocators stack-slot number
4665 // | (to get allocators register number
4666 // G Owned by | | v add OptoReg::stack0())
4667 // r CALLER | |
4668 // o | +--------+ pad to even-align allocators stack-slot
4669 // w V | pad0 | numbers; owned by CALLER
4670 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4671 // h ^ | in | 5
4672 // | | args | 4 Holes in incoming args owned by SELF
4673 // | | | | 3
4674 // | | +--------+
4675 // V | | old out| Empty on Intel, window on Sparc
4676 // | old |preserve| Must be even aligned.
5815 %}
5816 %}
5817
5818 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5819 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5820 %{
5821 constraint(ALLOC_IN_RC(ptr_reg));
5822 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5823 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5824
5825 op_cost(10);
5826 format %{"[$reg + $off + $idx << $scale]" %}
5827 interface(MEMORY_INTER) %{
5828 base($reg);
5829 index($idx);
5830 scale($scale);
5831 disp($off);
5832 %}
5833 %}
5834
5835 // Indirect Narrow Oop Operand
5836 operand indCompressedOop(rRegN reg) %{
5837 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5838 constraint(ALLOC_IN_RC(ptr_reg));
5839 match(DecodeN reg);
5840
5841 op_cost(10);
5842 format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
5843 interface(MEMORY_INTER) %{
5844 base(0xc); // R12
5845 index($reg);
5846 scale(0x3);
5847 disp(0x0);
5848 %}
5849 %}
5850
5851 // Indirect Narrow Oop Plus Offset Operand
5852 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5853 // we can't free r12 even with CompressedOops::base() == nullptr.
5854 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5855 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5856 constraint(ALLOC_IN_RC(ptr_reg));
5857 match(AddP (DecodeN reg) off);
5858
5859 op_cost(10);
5860 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5861 interface(MEMORY_INTER) %{
5862 base(0xc); // R12
5863 index($reg);
5864 scale(0x3);
5865 disp($off);
5866 %}
5867 %}
5868
5869 // Indirect Memory Operand
5870 operand indirectNarrow(rRegN reg)
6340 %}
6341
6342 // Replaces legVec during post-selection cleanup. See above.
6343 operand legVecZ() %{
6344 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6345 match(VecZ);
6346
6347 format %{ %}
6348 interface(REG_INTER);
6349 %}
6350
6351 //----------OPERAND CLASSES----------------------------------------------------
6352 // Operand Classes are groups of operands that are used as to simplify
6353 // instruction definitions by not requiring the AD writer to specify separate
6354 // instructions for every form of operand when the instruction accepts
6355 // multiple operand types with the same basic encoding and format. The classic
6356 // case of this is memory operands.
6357
6358 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6359 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6360 indCompressedOop, indCompressedOopOffset,
6361 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6362 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6363 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6364
6365 //----------PIPELINE-----------------------------------------------------------
6366 // Rules which define the behavior of the target architectures pipeline.
6367 pipeline %{
6368
6369 //----------ATTRIBUTES---------------------------------------------------------
6370 attributes %{
6371 variable_size_instructions; // Fixed size instructions
6372 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6373 instruction_unit_size = 1; // An instruction is 1 bytes long
6374 instruction_fetch_unit_size = 16; // The processor fetches one line
6375 instruction_fetch_units = 1; // of 16 bytes
6376 %}
6377
6378 //----------RESOURCES----------------------------------------------------------
6379 // Resources are the functional units available to the machine
6380
8975 format %{ "MEMBAR-storestore (empty encoding)" %}
8976 ins_encode( );
8977 ins_pipe(empty);
8978 %}
8979
8980 //----------Move Instructions--------------------------------------------------
8981
8982 instruct castX2P(rRegP dst, rRegL src)
8983 %{
8984 match(Set dst (CastX2P src));
8985
8986 format %{ "movq $dst, $src\t# long->ptr" %}
8987 ins_encode %{
8988 if ($dst$$reg != $src$$reg) {
8989 __ movptr($dst$$Register, $src$$Register);
8990 }
8991 %}
8992 ins_pipe(ialu_reg_reg); // XXX
8993 %}
8994
8995 instruct castI2N(rRegN dst, rRegI src)
8996 %{
8997 match(Set dst (CastI2N src));
8998
8999 format %{ "movq $dst, $src\t# int -> narrow ptr" %}
9000 ins_encode %{
9001 if ($dst$$reg != $src$$reg) {
9002 __ movl($dst$$Register, $src$$Register);
9003 }
9004 %}
9005 ins_pipe(ialu_reg_reg); // XXX
9006 %}
9007
9008 instruct castN2X(rRegL dst, rRegN src)
9009 %{
9010 match(Set dst (CastP2X src));
9011
9012 format %{ "movq $dst, $src\t# ptr -> long" %}
9013 ins_encode %{
9014 if ($dst$$reg != $src$$reg) {
9015 __ movptr($dst$$Register, $src$$Register);
9016 }
9017 %}
9018 ins_pipe(ialu_reg_reg); // XXX
9019 %}
9020
9021 instruct castP2X(rRegL dst, rRegP src)
9022 %{
9023 match(Set dst (CastP2X src));
9024
9025 format %{ "movq $dst, $src\t# ptr -> long" %}
9026 ins_encode %{
9027 if ($dst$$reg != $src$$reg) {
9028 __ movptr($dst$$Register, $src$$Register);
9029 }
9030 %}
9031 ins_pipe(ialu_reg_reg); // XXX
9032 %}
9033
9034 // Convert oop into int for vectors alignment masking
9035 instruct convP2I(rRegI dst, rRegP src)
9036 %{
9037 match(Set dst (ConvL2I (CastP2X src)));
9038
9039 format %{ "movl $dst, $src\t# ptr -> int" %}
9040 ins_encode %{
15292 effect(DEF dst, USE src);
15293 ins_cost(100);
15294 format %{ "movd $dst,$src\t# MoveI2F" %}
15295 ins_encode %{
15296 __ movdl($dst$$XMMRegister, $src$$Register);
15297 %}
15298 ins_pipe( pipe_slow );
15299 %}
15300
15301 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15302 match(Set dst (MoveL2D src));
15303 effect(DEF dst, USE src);
15304 ins_cost(100);
15305 format %{ "movd $dst,$src\t# MoveL2D" %}
15306 ins_encode %{
15307 __ movdq($dst$$XMMRegister, $src$$Register);
15308 %}
15309 ins_pipe( pipe_slow );
15310 %}
15311
15312
15313 // Fast clearing of an array
15314 // Small non-constant lenght ClearArray for non-AVX512 targets.
15315 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15316 Universe dummy, rFlagsReg cr)
15317 %{
15318 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15319 match(Set dummy (ClearArray (Binary cnt base) val));
15320 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15321
15322 format %{ $$template
15323 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15324 $$emit$$"jg LARGE\n\t"
15325 $$emit$$"dec rcx\n\t"
15326 $$emit$$"js DONE\t# Zero length\n\t"
15327 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15328 $$emit$$"dec rcx\n\t"
15329 $$emit$$"jge LOOP\n\t"
15330 $$emit$$"jmp DONE\n\t"
15331 $$emit$$"# LARGE:\n\t"
15332 if (UseFastStosb) {
15333 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15334 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15335 } else if (UseXMMForObjInit) {
15336 $$emit$$"movdq $tmp, $val\n\t"
15337 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15338 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15339 $$emit$$"jmpq L_zero_64_bytes\n\t"
15340 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15341 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15342 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15343 $$emit$$"add 0x40,rax\n\t"
15344 $$emit$$"# L_zero_64_bytes:\n\t"
15345 $$emit$$"sub 0x8,rcx\n\t"
15346 $$emit$$"jge L_loop\n\t"
15347 $$emit$$"add 0x4,rcx\n\t"
15348 $$emit$$"jl L_tail\n\t"
15349 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15350 $$emit$$"add 0x20,rax\n\t"
15351 $$emit$$"sub 0x4,rcx\n\t"
15352 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15353 $$emit$$"add 0x4,rcx\n\t"
15354 $$emit$$"jle L_end\n\t"
15355 $$emit$$"dec rcx\n\t"
15356 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15357 $$emit$$"vmovq xmm0,(rax)\n\t"
15358 $$emit$$"add 0x8,rax\n\t"
15359 $$emit$$"dec rcx\n\t"
15360 $$emit$$"jge L_sloop\n\t"
15361 $$emit$$"# L_end:\n\t"
15362 } else {
15363 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15364 }
15365 $$emit$$"# DONE"
15366 %}
15367 ins_encode %{
15368 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15369 $tmp$$XMMRegister, false, false);
15370 %}
15371 ins_pipe(pipe_slow);
15372 %}
15373
15374 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15375 Universe dummy, rFlagsReg cr)
15376 %{
15377 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15378 match(Set dummy (ClearArray (Binary cnt base) val));
15379 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15380
15381 format %{ $$template
15382 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15383 $$emit$$"jg LARGE\n\t"
15384 $$emit$$"dec rcx\n\t"
15385 $$emit$$"js DONE\t# Zero length\n\t"
15386 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15387 $$emit$$"dec rcx\n\t"
15388 $$emit$$"jge LOOP\n\t"
15389 $$emit$$"jmp DONE\n\t"
15390 $$emit$$"# LARGE:\n\t"
15391 if (UseXMMForObjInit) {
15392 $$emit$$"movdq $tmp, $val\n\t"
15393 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15394 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15395 $$emit$$"jmpq L_zero_64_bytes\n\t"
15396 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15397 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15398 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15399 $$emit$$"add 0x40,rax\n\t"
15400 $$emit$$"# L_zero_64_bytes:\n\t"
15401 $$emit$$"sub 0x8,rcx\n\t"
15402 $$emit$$"jge L_loop\n\t"
15403 $$emit$$"add 0x4,rcx\n\t"
15404 $$emit$$"jl L_tail\n\t"
15405 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15406 $$emit$$"add 0x20,rax\n\t"
15407 $$emit$$"sub 0x4,rcx\n\t"
15408 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15409 $$emit$$"add 0x4,rcx\n\t"
15410 $$emit$$"jle L_end\n\t"
15411 $$emit$$"dec rcx\n\t"
15412 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15413 $$emit$$"vmovq xmm0,(rax)\n\t"
15414 $$emit$$"add 0x8,rax\n\t"
15415 $$emit$$"dec rcx\n\t"
15416 $$emit$$"jge L_sloop\n\t"
15417 $$emit$$"# L_end:\n\t"
15418 } else {
15419 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15420 }
15421 $$emit$$"# DONE"
15422 %}
15423 ins_encode %{
15424 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15425 $tmp$$XMMRegister, false, true);
15426 %}
15427 ins_pipe(pipe_slow);
15428 %}
15429
15430 // Small non-constant length ClearArray for AVX512 targets.
15431 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15432 Universe dummy, rFlagsReg cr)
15433 %{
15434 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15435 match(Set dummy (ClearArray (Binary cnt base) val));
15436 ins_cost(125);
15437 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15438
15439 format %{ $$template
15440 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15441 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15442 $$emit$$"jg LARGE\n\t"
15443 $$emit$$"dec rcx\n\t"
15444 $$emit$$"js DONE\t# Zero length\n\t"
15445 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15446 $$emit$$"dec rcx\n\t"
15447 $$emit$$"jge LOOP\n\t"
15448 $$emit$$"jmp DONE\n\t"
15449 $$emit$$"# LARGE:\n\t"
15450 if (UseFastStosb) {
15451 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15452 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15453 } else if (UseXMMForObjInit) {
15454 $$emit$$"mov rdi,rax\n\t"
15455 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15456 $$emit$$"jmpq L_zero_64_bytes\n\t"
15457 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15465 $$emit$$"jl L_tail\n\t"
15466 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15467 $$emit$$"add 0x20,rax\n\t"
15468 $$emit$$"sub 0x4,rcx\n\t"
15469 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15470 $$emit$$"add 0x4,rcx\n\t"
15471 $$emit$$"jle L_end\n\t"
15472 $$emit$$"dec rcx\n\t"
15473 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15474 $$emit$$"vmovq xmm0,(rax)\n\t"
15475 $$emit$$"add 0x8,rax\n\t"
15476 $$emit$$"dec rcx\n\t"
15477 $$emit$$"jge L_sloop\n\t"
15478 $$emit$$"# L_end:\n\t"
15479 } else {
15480 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15481 }
15482 $$emit$$"# DONE"
15483 %}
15484 ins_encode %{
15485 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15486 $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
15487 %}
15488 ins_pipe(pipe_slow);
15489 %}
15490
15491 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15492 Universe dummy, rFlagsReg cr)
15493 %{
15494 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15495 match(Set dummy (ClearArray (Binary cnt base) val));
15496 ins_cost(125);
15497 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15498
15499 format %{ $$template
15500 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15501 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15502 $$emit$$"jg LARGE\n\t"
15503 $$emit$$"dec rcx\n\t"
15504 $$emit$$"js DONE\t# Zero length\n\t"
15505 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15506 $$emit$$"dec rcx\n\t"
15507 $$emit$$"jge LOOP\n\t"
15508 $$emit$$"jmp DONE\n\t"
15509 $$emit$$"# LARGE:\n\t"
15510 if (UseFastStosb) {
15511 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15512 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15513 } else if (UseXMMForObjInit) {
15514 $$emit$$"mov rdi,rax\n\t"
15515 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15516 $$emit$$"jmpq L_zero_64_bytes\n\t"
15517 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15525 $$emit$$"jl L_tail\n\t"
15526 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15527 $$emit$$"add 0x20,rax\n\t"
15528 $$emit$$"sub 0x4,rcx\n\t"
15529 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15530 $$emit$$"add 0x4,rcx\n\t"
15531 $$emit$$"jle L_end\n\t"
15532 $$emit$$"dec rcx\n\t"
15533 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15534 $$emit$$"vmovq xmm0,(rax)\n\t"
15535 $$emit$$"add 0x8,rax\n\t"
15536 $$emit$$"dec rcx\n\t"
15537 $$emit$$"jge L_sloop\n\t"
15538 $$emit$$"# L_end:\n\t"
15539 } else {
15540 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15541 }
15542 $$emit$$"# DONE"
15543 %}
15544 ins_encode %{
15545 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15546 $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
15547 %}
15548 ins_pipe(pipe_slow);
15549 %}
15550
15551 // Large non-constant length ClearArray for non-AVX512 targets.
15552 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15553 Universe dummy, rFlagsReg cr)
15554 %{
15555 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15556 match(Set dummy (ClearArray (Binary cnt base) val));
15557 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15558
15559 format %{ $$template
15560 if (UseFastStosb) {
15561 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15562 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15563 } else if (UseXMMForObjInit) {
15564 $$emit$$"movdq $tmp, $val\n\t"
15565 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15566 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15567 $$emit$$"jmpq L_zero_64_bytes\n\t"
15568 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15569 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15570 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15571 $$emit$$"add 0x40,rax\n\t"
15572 $$emit$$"# L_zero_64_bytes:\n\t"
15573 $$emit$$"sub 0x8,rcx\n\t"
15574 $$emit$$"jge L_loop\n\t"
15575 $$emit$$"add 0x4,rcx\n\t"
15576 $$emit$$"jl L_tail\n\t"
15577 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15578 $$emit$$"add 0x20,rax\n\t"
15579 $$emit$$"sub 0x4,rcx\n\t"
15580 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15581 $$emit$$"add 0x4,rcx\n\t"
15582 $$emit$$"jle L_end\n\t"
15583 $$emit$$"dec rcx\n\t"
15584 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15585 $$emit$$"vmovq xmm0,(rax)\n\t"
15586 $$emit$$"add 0x8,rax\n\t"
15587 $$emit$$"dec rcx\n\t"
15588 $$emit$$"jge L_sloop\n\t"
15589 $$emit$$"# L_end:\n\t"
15590 } else {
15591 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15592 }
15593 %}
15594 ins_encode %{
15595 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15596 $tmp$$XMMRegister, true, false);
15597 %}
15598 ins_pipe(pipe_slow);
15599 %}
15600
15601 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15602 Universe dummy, rFlagsReg cr)
15603 %{
15604 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15605 match(Set dummy (ClearArray (Binary cnt base) val));
15606 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15607
15608 format %{ $$template
15609 if (UseXMMForObjInit) {
15610 $$emit$$"movdq $tmp, $val\n\t"
15611 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15612 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15613 $$emit$$"jmpq L_zero_64_bytes\n\t"
15614 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15615 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15616 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15617 $$emit$$"add 0x40,rax\n\t"
15618 $$emit$$"# L_zero_64_bytes:\n\t"
15619 $$emit$$"sub 0x8,rcx\n\t"
15620 $$emit$$"jge L_loop\n\t"
15621 $$emit$$"add 0x4,rcx\n\t"
15622 $$emit$$"jl L_tail\n\t"
15623 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15624 $$emit$$"add 0x20,rax\n\t"
15625 $$emit$$"sub 0x4,rcx\n\t"
15626 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15627 $$emit$$"add 0x4,rcx\n\t"
15628 $$emit$$"jle L_end\n\t"
15629 $$emit$$"dec rcx\n\t"
15630 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15631 $$emit$$"vmovq xmm0,(rax)\n\t"
15632 $$emit$$"add 0x8,rax\n\t"
15633 $$emit$$"dec rcx\n\t"
15634 $$emit$$"jge L_sloop\n\t"
15635 $$emit$$"# L_end:\n\t"
15636 } else {
15637 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15638 }
15639 %}
15640 ins_encode %{
15641 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15642 $tmp$$XMMRegister, true, true);
15643 %}
15644 ins_pipe(pipe_slow);
15645 %}
15646
15647 // Large non-constant length ClearArray for AVX512 targets.
15648 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15649 Universe dummy, rFlagsReg cr)
15650 %{
15651 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15652 match(Set dummy (ClearArray (Binary cnt base) val));
15653 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15654
15655 format %{ $$template
15656 if (UseFastStosb) {
15657 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15658 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15659 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15660 } else if (UseXMMForObjInit) {
15661 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15662 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15663 $$emit$$"jmpq L_zero_64_bytes\n\t"
15664 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15665 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15666 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15667 $$emit$$"add 0x40,rax\n\t"
15668 $$emit$$"# L_zero_64_bytes:\n\t"
15669 $$emit$$"sub 0x8,rcx\n\t"
15670 $$emit$$"jge L_loop\n\t"
15671 $$emit$$"add 0x4,rcx\n\t"
15672 $$emit$$"jl L_tail\n\t"
15673 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15674 $$emit$$"add 0x20,rax\n\t"
15675 $$emit$$"sub 0x4,rcx\n\t"
15676 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15677 $$emit$$"add 0x4,rcx\n\t"
15678 $$emit$$"jle L_end\n\t"
15679 $$emit$$"dec rcx\n\t"
15680 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15681 $$emit$$"vmovq xmm0,(rax)\n\t"
15682 $$emit$$"add 0x8,rax\n\t"
15683 $$emit$$"dec rcx\n\t"
15684 $$emit$$"jge L_sloop\n\t"
15685 $$emit$$"# L_end:\n\t"
15686 } else {
15687 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15688 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15689 }
15690 %}
15691 ins_encode %{
15692 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15693 $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
15694 %}
15695 ins_pipe(pipe_slow);
15696 %}
15697
15698 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15699 Universe dummy, rFlagsReg cr)
15700 %{
15701 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15702 match(Set dummy (ClearArray (Binary cnt base) val));
15703 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15704
15705 format %{ $$template
15706 if (UseFastStosb) {
15707 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15708 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15709 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15710 } else if (UseXMMForObjInit) {
15711 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15712 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15713 $$emit$$"jmpq L_zero_64_bytes\n\t"
15714 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15715 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15716 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15717 $$emit$$"add 0x40,rax\n\t"
15718 $$emit$$"# L_zero_64_bytes:\n\t"
15719 $$emit$$"sub 0x8,rcx\n\t"
15720 $$emit$$"jge L_loop\n\t"
15721 $$emit$$"add 0x4,rcx\n\t"
15722 $$emit$$"jl L_tail\n\t"
15723 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15724 $$emit$$"add 0x20,rax\n\t"
15725 $$emit$$"sub 0x4,rcx\n\t"
15726 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15727 $$emit$$"add 0x4,rcx\n\t"
15728 $$emit$$"jle L_end\n\t"
15729 $$emit$$"dec rcx\n\t"
15730 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15731 $$emit$$"vmovq xmm0,(rax)\n\t"
15732 $$emit$$"add 0x8,rax\n\t"
15733 $$emit$$"dec rcx\n\t"
15734 $$emit$$"jge L_sloop\n\t"
15735 $$emit$$"# L_end:\n\t"
15736 } else {
15737 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15738 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15739 }
15740 %}
15741 ins_encode %{
15742 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15743 $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
15744 %}
15745 ins_pipe(pipe_slow);
15746 %}
15747
15748 // Small constant length ClearArray for AVX512 targets.
15749 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
15750 %{
15751 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
15752 ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
15753 match(Set dummy (ClearArray (Binary cnt base) val));
15754 ins_cost(100);
15755 effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
15756 format %{ "clear_mem_imm $base , $cnt \n\t" %}
15757 ins_encode %{
15758 __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15759 %}
15760 ins_pipe(pipe_slow);
15761 %}
15762
15763 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15764 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15765 %{
15766 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15767 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15768 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15769
15770 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15771 ins_encode %{
15772 __ string_compare($str1$$Register, $str2$$Register,
15773 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15774 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15775 %}
15776 ins_pipe( pipe_slow );
15777 %}
15778
17616 effect(USE meth);
17617
17618 ins_cost(300);
17619 format %{ "call_leaf,runtime " %}
17620 ins_encode(clear_avx, Java_To_Runtime(meth));
17621 ins_pipe(pipe_slow);
17622 %}
17623
17624 // Call runtime without safepoint and with vector arguments
17625 instruct CallLeafDirectVector(method meth)
17626 %{
17627 match(CallLeafVector);
17628 effect(USE meth);
17629
17630 ins_cost(300);
17631 format %{ "call_leaf,vector " %}
17632 ins_encode(Java_To_Runtime(meth));
17633 ins_pipe(pipe_slow);
17634 %}
17635
17636 // Call runtime without safepoint
17637 // entry point is null, target holds the address to call
17638 instruct CallLeafNoFPInDirect(rRegP target)
17639 %{
17640 predicate(n->as_Call()->entry_point() == nullptr);
17641 match(CallLeafNoFP target);
17642
17643 ins_cost(300);
17644 format %{ "call_leaf_nofp,runtime indirect " %}
17645 ins_encode %{
17646 __ call($target$$Register);
17647 %}
17648
17649 ins_pipe(pipe_slow);
17650 %}
17651
17652 // Call runtime without safepoint
17653 instruct CallLeafNoFPDirect(method meth)
17654 %{
17655 predicate(n->as_Call()->entry_point() != nullptr);
17656 match(CallLeafNoFP);
17657 effect(USE meth);
17658
17659 ins_cost(300);
17660 format %{ "call_leaf_nofp,runtime " %}
17661 ins_encode(clear_avx, Java_To_Runtime(meth));
17662 ins_pipe(pipe_slow);
17663 %}
17664
17665 // Return Instruction
17666 // Remove the return address & jump to it.
17667 // Notice: We always emit a nop after a ret to make sure there is room
17668 // for safepoint patching
17669 instruct Ret()
17670 %{
17671 match(Return);
17672
17673 format %{ "ret" %}
17674 ins_encode %{
17675 __ ret(0);
|