1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 int offset = 13; // movq r10,#addr; callq (r10)
1653 if (this->ideal_Opcode() != Op_CallLeafVector) {
1654 offset += clear_avx_size();
1655 }
1656 return offset;
1657 }
1658 //
1659 // Compute padding required for nodes which need alignment
1660 //
1661
1662 // The address of the call instruction needs to be 4-byte aligned to
1663 // ensure that it does not span a cache line so that it can be patched.
1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1665 {
1666 current_offset += clear_avx_size(); // skip vzeroupper
1667 current_offset += 1; // skip call opcode byte
1668 return align_up(current_offset, alignment_required()) - current_offset;
1669 }
1670
1671 // The address of the call instruction needs to be 4-byte aligned to
1870 st->print("\n\t");
1871 st->print("# stack alignment check");
1872 #endif
1873 }
1874 if (C->stub_function() != nullptr) {
1875 st->print("\n\t");
1876 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1877 st->print("\n\t");
1878 st->print("je fast_entry\t");
1879 st->print("\n\t");
1880 st->print("call #nmethod_entry_barrier_stub\t");
1881 st->print("\n\tfast_entry:");
1882 }
1883 st->cr();
1884 }
1885 #endif
1886
1887 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1888 Compile* C = ra_->C;
1889
1890 int framesize = C->output()->frame_size_in_bytes();
1891 int bangsize = C->output()->bang_size_in_bytes();
1892
1893 if (C->clinit_barrier_on_entry()) {
1894 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
1895 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
1896
1897 Label L_skip_barrier;
1898 Register klass = rscratch1;
1899
1900 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
1901 __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
1902
1903 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
1904
1905 __ bind(L_skip_barrier);
1906 }
1907
1908 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
1909
1910 C->output()->set_frame_complete(__ offset());
1911
1912 if (C->has_mach_constant_base_node()) {
1913 // NOTE: We set the table base offset here because users might be
1914 // emitted before MachConstantBaseNode.
1915 ConstantTable& constant_table = C->output()->constant_table();
1916 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1917 }
1918 }
1919
1920 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1921 {
1922 return MachNode::size(ra_); // too many variables; just compute it
1923 // the hard way
1924 }
1925
1926 int MachPrologNode::reloc() const
1927 {
1928 return 0; // a large enough number
1929 }
1930
1931 //=============================================================================
1932 #ifndef PRODUCT
1933 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1934 {
1935 Compile* C = ra_->C;
1936 if (generate_vzeroupper(C)) {
1937 st->print("vzeroupper");
1938 st->cr(); st->print("\t");
1939 }
1940
1941 int framesize = C->output()->frame_size_in_bytes();
1942 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1943 // Remove word for return adr already pushed
1944 // and RBP
1952 st->print_cr("popq rbp");
1953 if (do_polling() && C->is_method_compilation()) {
1954 st->print("\t");
1955 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1956 "ja #safepoint_stub\t"
1957 "# Safepoint: poll for GC");
1958 }
1959 }
1960 #endif
1961
1962 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1963 {
1964 Compile* C = ra_->C;
1965
1966 if (generate_vzeroupper(C)) {
1967 // Clear upper bits of YMM registers when current compiled code uses
1968 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1969 __ vzeroupper();
1970 }
1971
1972 int framesize = C->output()->frame_size_in_bytes();
1973 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1974 // Remove word for return adr already pushed
1975 // and RBP
1976 framesize -= 2*wordSize;
1977
1978 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1979
1980 if (framesize) {
1981 __ addq(rsp, framesize);
1982 }
1983
1984 __ popq(rbp);
1985
1986 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1987 __ reserved_stack_check();
1988 }
1989
1990 if (do_polling() && C->is_method_compilation()) {
1991 Label dummy_label;
1992 Label* code_stub = &dummy_label;
1993 if (!C->output()->in_scratch_emit_size()) {
1994 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1995 C->output()->add_stub(stub);
1996 code_stub = &stub->entry();
1997 }
1998 __ relocate(relocInfo::poll_return_type);
1999 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
2000 }
2001 }
2002
2003 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
2004 {
2005 return MachNode::size(ra_); // too many variables; just compute it
2006 // the hard way
2007 }
2008
2009 int MachEpilogNode::reloc() const
2010 {
2011 return 2; // a large enough number
2012 }
2013
2014 const Pipeline* MachEpilogNode::pipeline() const
2015 {
2016 return MachNode::pipeline_class();
2017 }
2018
2019 //=============================================================================
2020
2021 enum RC {
2022 rc_bad,
2023 rc_int,
2024 rc_kreg,
2025 rc_float,
2026 rc_stack
2027 };
2028
2590 #endif
2591
2592 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2593 {
2594 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2595 int reg = ra_->get_encode(this);
2596
2597 __ lea(as_Register(reg), Address(rsp, offset));
2598 }
2599
2600 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2601 {
2602 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2603 if (ra_->get_encode(this) > 15) {
2604 return (offset < 0x80) ? 6 : 9; // REX2
2605 } else {
2606 return (offset < 0x80) ? 5 : 8; // REX
2607 }
2608 }
2609
2610 //=============================================================================
2611 #ifndef PRODUCT
2612 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2613 {
2614 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2615 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2616 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2617 }
2618 #endif
2619
2620 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2621 {
2622 __ ic_check(InteriorEntryAlignment);
2623 }
2624
2625 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
2626 {
2627 return MachNode::size(ra_); // too many variables; just compute it
2628 // the hard way
2629 }
2630
2631
2632 //=============================================================================
2633
2634 bool Matcher::supports_vector_calling_convention(void) {
2635 return EnableVectorSupport;
2636 }
2637
2638 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
2639 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
2640 }
2641
2642 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
2643 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
2644 }
2645
2646 #ifdef ASSERT
2647 static bool is_ndd_demotable(const MachNode* mdef) {
2648 return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
2649 }
2650 #endif
4584 }
4585 __ post_call_nop();
4586 %}
4587
4588 enc_class Java_Dynamic_Call(method meth) %{
4589 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4590 __ post_call_nop();
4591 %}
4592
4593 enc_class call_epilog %{
4594 if (VerifyStackAtCalls) {
4595 // Check that stack depth is unchanged: find majik cookie on stack
4596 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4597 Label L;
4598 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4599 __ jccb(Assembler::equal, L);
4600 // Die if stack mismatch
4601 __ int3();
4602 __ bind(L);
4603 }
4604 %}
4605
4606 %}
4607
4608 //----------FRAME--------------------------------------------------------------
4609 // Definition of frame structure and management information.
4610 //
4611 // S T A C K L A Y O U T Allocators stack-slot number
4612 // | (to get allocators register number
4613 // G Owned by | | v add OptoReg::stack0())
4614 // r CALLER | |
4615 // o | +--------+ pad to even-align allocators stack-slot
4616 // w V | pad0 | numbers; owned by CALLER
4617 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4618 // h ^ | in | 5
4619 // | | args | 4 Holes in incoming args owned by SELF
4620 // | | | | 3
4621 // | | +--------+
4622 // V | | old out| Empty on Intel, window on Sparc
4623 // | old |preserve| Must be even aligned.
5762 %}
5763 %}
5764
5765 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5766 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5767 %{
5768 constraint(ALLOC_IN_RC(ptr_reg));
5769 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5770 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5771
5772 op_cost(10);
5773 format %{"[$reg + $off + $idx << $scale]" %}
5774 interface(MEMORY_INTER) %{
5775 base($reg);
5776 index($idx);
5777 scale($scale);
5778 disp($off);
5779 %}
5780 %}
5781
5782 // Indirect Narrow Oop Plus Offset Operand
5783 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5784 // we can't free r12 even with CompressedOops::base() == nullptr.
5785 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5786 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5787 constraint(ALLOC_IN_RC(ptr_reg));
5788 match(AddP (DecodeN reg) off);
5789
5790 op_cost(10);
5791 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5792 interface(MEMORY_INTER) %{
5793 base(0xc); // R12
5794 index($reg);
5795 scale(0x3);
5796 disp($off);
5797 %}
5798 %}
5799
5800 // Indirect Memory Operand
5801 operand indirectNarrow(rRegN reg)
6271 %}
6272
6273 // Replaces legVec during post-selection cleanup. See above.
6274 operand legVecZ() %{
6275 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6276 match(VecZ);
6277
6278 format %{ %}
6279 interface(REG_INTER);
6280 %}
6281
6282 //----------OPERAND CLASSES----------------------------------------------------
6283 // Operand Classes are groups of operands that are used as to simplify
6284 // instruction definitions by not requiring the AD writer to specify separate
6285 // instructions for every form of operand when the instruction accepts
6286 // multiple operand types with the same basic encoding and format. The classic
6287 // case of this is memory operands.
6288
6289 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6290 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6291 indCompressedOopOffset,
6292 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6293 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6294 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6295
6296 //----------PIPELINE-----------------------------------------------------------
6297 // Rules which define the behavior of the target architectures pipeline.
6298 pipeline %{
6299
6300 //----------ATTRIBUTES---------------------------------------------------------
6301 attributes %{
6302 variable_size_instructions; // Fixed size instructions
6303 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6304 instruction_unit_size = 1; // An instruction is 1 bytes long
6305 instruction_fetch_unit_size = 16; // The processor fetches one line
6306 instruction_fetch_units = 1; // of 16 bytes
6307 %}
6308
6309 //----------RESOURCES----------------------------------------------------------
6310 // Resources are the functional units available to the machine
6311
8906 format %{ "MEMBAR-storestore (empty encoding)" %}
8907 ins_encode( );
8908 ins_pipe(empty);
8909 %}
8910
8911 //----------Move Instructions--------------------------------------------------
8912
8913 instruct castX2P(rRegP dst, rRegL src)
8914 %{
8915 match(Set dst (CastX2P src));
8916
8917 format %{ "movq $dst, $src\t# long->ptr" %}
8918 ins_encode %{
8919 if ($dst$$reg != $src$$reg) {
8920 __ movptr($dst$$Register, $src$$Register);
8921 }
8922 %}
8923 ins_pipe(ialu_reg_reg); // XXX
8924 %}
8925
8926 instruct castP2X(rRegL dst, rRegP src)
8927 %{
8928 match(Set dst (CastP2X src));
8929
8930 format %{ "movq $dst, $src\t# ptr -> long" %}
8931 ins_encode %{
8932 if ($dst$$reg != $src$$reg) {
8933 __ movptr($dst$$Register, $src$$Register);
8934 }
8935 %}
8936 ins_pipe(ialu_reg_reg); // XXX
8937 %}
8938
8939 // Convert oop into int for vectors alignment masking
8940 instruct convP2I(rRegI dst, rRegP src)
8941 %{
8942 match(Set dst (ConvL2I (CastP2X src)));
8943
8944 format %{ "movl $dst, $src\t# ptr -> int" %}
8945 ins_encode %{
15199 effect(DEF dst, USE src);
15200 ins_cost(100);
15201 format %{ "movd $dst,$src\t# MoveI2F" %}
15202 ins_encode %{
15203 __ movdl($dst$$XMMRegister, $src$$Register);
15204 %}
15205 ins_pipe( pipe_slow );
15206 %}
15207
15208 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15209 match(Set dst (MoveL2D src));
15210 effect(DEF dst, USE src);
15211 ins_cost(100);
15212 format %{ "movd $dst,$src\t# MoveL2D" %}
15213 ins_encode %{
15214 __ movdq($dst$$XMMRegister, $src$$Register);
15215 %}
15216 ins_pipe( pipe_slow );
15217 %}
15218
15219 // Fast clearing of an array
15220 // Small non-constant lenght ClearArray for non-AVX512 targets.
15221 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15222 Universe dummy, rFlagsReg cr)
15223 %{
15224 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
15225 match(Set dummy (ClearArray cnt base));
15226 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15227
15228 format %{ $$template
15229 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15230 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15231 $$emit$$"jg LARGE\n\t"
15232 $$emit$$"dec rcx\n\t"
15233 $$emit$$"js DONE\t# Zero length\n\t"
15234 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15235 $$emit$$"dec rcx\n\t"
15236 $$emit$$"jge LOOP\n\t"
15237 $$emit$$"jmp DONE\n\t"
15238 $$emit$$"# LARGE:\n\t"
15239 if (UseFastStosb) {
15240 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15241 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15242 } else if (UseXMMForObjInit) {
15243 $$emit$$"mov rdi,rax\n\t"
15244 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15245 $$emit$$"jmpq L_zero_64_bytes\n\t"
15246 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15254 $$emit$$"jl L_tail\n\t"
15255 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15256 $$emit$$"add 0x20,rax\n\t"
15257 $$emit$$"sub 0x4,rcx\n\t"
15258 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15259 $$emit$$"add 0x4,rcx\n\t"
15260 $$emit$$"jle L_end\n\t"
15261 $$emit$$"dec rcx\n\t"
15262 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15263 $$emit$$"vmovq xmm0,(rax)\n\t"
15264 $$emit$$"add 0x8,rax\n\t"
15265 $$emit$$"dec rcx\n\t"
15266 $$emit$$"jge L_sloop\n\t"
15267 $$emit$$"# L_end:\n\t"
15268 } else {
15269 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15270 }
15271 $$emit$$"# DONE"
15272 %}
15273 ins_encode %{
15274 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15275 $tmp$$XMMRegister, false, knoreg);
15276 %}
15277 ins_pipe(pipe_slow);
15278 %}
15279
15280 // Small non-constant length ClearArray for AVX512 targets.
15281 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15282 Universe dummy, rFlagsReg cr)
15283 %{
15284 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15285 match(Set dummy (ClearArray cnt base));
15286 ins_cost(125);
15287 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15288
15289 format %{ $$template
15290 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15291 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15292 $$emit$$"jg LARGE\n\t"
15293 $$emit$$"dec rcx\n\t"
15294 $$emit$$"js DONE\t# Zero length\n\t"
15295 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15296 $$emit$$"dec rcx\n\t"
15297 $$emit$$"jge LOOP\n\t"
15298 $$emit$$"jmp DONE\n\t"
15299 $$emit$$"# LARGE:\n\t"
15300 if (UseFastStosb) {
15301 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15302 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15303 } else if (UseXMMForObjInit) {
15304 $$emit$$"mov rdi,rax\n\t"
15305 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15306 $$emit$$"jmpq L_zero_64_bytes\n\t"
15307 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15315 $$emit$$"jl L_tail\n\t"
15316 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15317 $$emit$$"add 0x20,rax\n\t"
15318 $$emit$$"sub 0x4,rcx\n\t"
15319 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15320 $$emit$$"add 0x4,rcx\n\t"
15321 $$emit$$"jle L_end\n\t"
15322 $$emit$$"dec rcx\n\t"
15323 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15324 $$emit$$"vmovq xmm0,(rax)\n\t"
15325 $$emit$$"add 0x8,rax\n\t"
15326 $$emit$$"dec rcx\n\t"
15327 $$emit$$"jge L_sloop\n\t"
15328 $$emit$$"# L_end:\n\t"
15329 } else {
15330 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15331 }
15332 $$emit$$"# DONE"
15333 %}
15334 ins_encode %{
15335 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15336 $tmp$$XMMRegister, false, $ktmp$$KRegister);
15337 %}
15338 ins_pipe(pipe_slow);
15339 %}
15340
15341 // Large non-constant length ClearArray for non-AVX512 targets.
15342 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15343 Universe dummy, rFlagsReg cr)
15344 %{
15345 predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15346 match(Set dummy (ClearArray cnt base));
15347 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15348
15349 format %{ $$template
15350 if (UseFastStosb) {
15351 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15352 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15353 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15354 } else if (UseXMMForObjInit) {
15355 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15356 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15357 $$emit$$"jmpq L_zero_64_bytes\n\t"
15358 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15359 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15360 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15361 $$emit$$"add 0x40,rax\n\t"
15362 $$emit$$"# L_zero_64_bytes:\n\t"
15363 $$emit$$"sub 0x8,rcx\n\t"
15364 $$emit$$"jge L_loop\n\t"
15365 $$emit$$"add 0x4,rcx\n\t"
15366 $$emit$$"jl L_tail\n\t"
15367 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15368 $$emit$$"add 0x20,rax\n\t"
15369 $$emit$$"sub 0x4,rcx\n\t"
15370 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15371 $$emit$$"add 0x4,rcx\n\t"
15372 $$emit$$"jle L_end\n\t"
15373 $$emit$$"dec rcx\n\t"
15374 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15375 $$emit$$"vmovq xmm0,(rax)\n\t"
15376 $$emit$$"add 0x8,rax\n\t"
15377 $$emit$$"dec rcx\n\t"
15378 $$emit$$"jge L_sloop\n\t"
15379 $$emit$$"# L_end:\n\t"
15380 } else {
15381 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15382 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15383 }
15384 %}
15385 ins_encode %{
15386 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15387 $tmp$$XMMRegister, true, knoreg);
15388 %}
15389 ins_pipe(pipe_slow);
15390 %}
15391
15392 // Large non-constant length ClearArray for AVX512 targets.
15393 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15394 Universe dummy, rFlagsReg cr)
15395 %{
15396 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15397 match(Set dummy (ClearArray cnt base));
15398 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15399
15400 format %{ $$template
15401 if (UseFastStosb) {
15402 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15403 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15404 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15405 } else if (UseXMMForObjInit) {
15406 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15407 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15408 $$emit$$"jmpq L_zero_64_bytes\n\t"
15409 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15410 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15411 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15412 $$emit$$"add 0x40,rax\n\t"
15413 $$emit$$"# L_zero_64_bytes:\n\t"
15414 $$emit$$"sub 0x8,rcx\n\t"
15415 $$emit$$"jge L_loop\n\t"
15416 $$emit$$"add 0x4,rcx\n\t"
15417 $$emit$$"jl L_tail\n\t"
15418 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15419 $$emit$$"add 0x20,rax\n\t"
15420 $$emit$$"sub 0x4,rcx\n\t"
15421 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15422 $$emit$$"add 0x4,rcx\n\t"
15423 $$emit$$"jle L_end\n\t"
15424 $$emit$$"dec rcx\n\t"
15425 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15426 $$emit$$"vmovq xmm0,(rax)\n\t"
15427 $$emit$$"add 0x8,rax\n\t"
15428 $$emit$$"dec rcx\n\t"
15429 $$emit$$"jge L_sloop\n\t"
15430 $$emit$$"# L_end:\n\t"
15431 } else {
15432 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15433 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15434 }
15435 %}
15436 ins_encode %{
15437 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15438 $tmp$$XMMRegister, true, $ktmp$$KRegister);
15439 %}
15440 ins_pipe(pipe_slow);
15441 %}
15442
15443 // Small constant length ClearArray for AVX512 targets.
15444 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15445 %{
15446 predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15447 match(Set dummy (ClearArray cnt base));
15448 ins_cost(100);
15449 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15450 format %{ "clear_mem_imm $base , $cnt \n\t" %}
15451 ins_encode %{
15452 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15453 %}
15454 ins_pipe(pipe_slow);
15455 %}
15456
15457 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15458 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15459 %{
15460 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15461 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15462 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15463
15464 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15465 ins_encode %{
15466 __ string_compare($str1$$Register, $str2$$Register,
15467 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15468 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15469 %}
15470 ins_pipe( pipe_slow );
15471 %}
15472
17310 effect(USE meth);
17311
17312 ins_cost(300);
17313 format %{ "call_leaf,runtime " %}
17314 ins_encode(clear_avx, Java_To_Runtime(meth));
17315 ins_pipe(pipe_slow);
17316 %}
17317
17318 // Call runtime without safepoint and with vector arguments
17319 instruct CallLeafDirectVector(method meth)
17320 %{
17321 match(CallLeafVector);
17322 effect(USE meth);
17323
17324 ins_cost(300);
17325 format %{ "call_leaf,vector " %}
17326 ins_encode(Java_To_Runtime(meth));
17327 ins_pipe(pipe_slow);
17328 %}
17329
17330 // Call runtime without safepoint
17331 instruct CallLeafNoFPDirect(method meth)
17332 %{
17333 match(CallLeafNoFP);
17334 effect(USE meth);
17335
17336 ins_cost(300);
17337 format %{ "call_leaf_nofp,runtime " %}
17338 ins_encode(clear_avx, Java_To_Runtime(meth));
17339 ins_pipe(pipe_slow);
17340 %}
17341
17342 // Return Instruction
17343 // Remove the return address & jump to it.
17344 // Notice: We always emit a nop after a ret to make sure there is room
17345 // for safepoint patching
17346 instruct Ret()
17347 %{
17348 match(Return);
17349
17350 format %{ "ret" %}
17351 ins_encode %{
17352 __ ret(0);
|
1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 if (_entry_point == nullptr) {
1653 // CallLeafNoFPInDirect
1654 return 3; // callq (register)
1655 }
1656 int offset = 13; // movq r10,#addr; callq (r10)
1657 if (this->ideal_Opcode() != Op_CallLeafVector) {
1658 offset += clear_avx_size();
1659 }
1660 return offset;
1661 }
1662 //
1663 // Compute padding required for nodes which need alignment
1664 //
1665
1666 // The address of the call instruction needs to be 4-byte aligned to
1667 // ensure that it does not span a cache line so that it can be patched.
1668 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1669 {
1670 current_offset += clear_avx_size(); // skip vzeroupper
1671 current_offset += 1; // skip call opcode byte
1672 return align_up(current_offset, alignment_required()) - current_offset;
1673 }
1674
1675 // The address of the call instruction needs to be 4-byte aligned to
1874 st->print("\n\t");
1875 st->print("# stack alignment check");
1876 #endif
1877 }
1878 if (C->stub_function() != nullptr) {
1879 st->print("\n\t");
1880 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1881 st->print("\n\t");
1882 st->print("je fast_entry\t");
1883 st->print("\n\t");
1884 st->print("call #nmethod_entry_barrier_stub\t");
1885 st->print("\n\tfast_entry:");
1886 }
1887 st->cr();
1888 }
1889 #endif
1890
1891 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1892 Compile* C = ra_->C;
1893
1894 __ verified_entry(C);
1895
1896 if (ra_->C->stub_function() == nullptr) {
1897 __ entry_barrier();
1898 }
1899
1900 if (!Compile::current()->output()->in_scratch_emit_size()) {
1901 __ bind(*_verified_entry);
1902 }
1903
1904 C->output()->set_frame_complete(__ offset());
1905
1906 if (C->has_mach_constant_base_node()) {
1907 // NOTE: We set the table base offset here because users might be
1908 // emitted before MachConstantBaseNode.
1909 ConstantTable& constant_table = C->output()->constant_table();
1910 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1911 }
1912 }
1913
1914
1915 int MachPrologNode::reloc() const
1916 {
1917 return 0; // a large enough number
1918 }
1919
1920 //=============================================================================
1921 #ifndef PRODUCT
1922 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1923 {
1924 Compile* C = ra_->C;
1925 if (generate_vzeroupper(C)) {
1926 st->print("vzeroupper");
1927 st->cr(); st->print("\t");
1928 }
1929
1930 int framesize = C->output()->frame_size_in_bytes();
1931 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1932 // Remove word for return adr already pushed
1933 // and RBP
1941 st->print_cr("popq rbp");
1942 if (do_polling() && C->is_method_compilation()) {
1943 st->print("\t");
1944 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1945 "ja #safepoint_stub\t"
1946 "# Safepoint: poll for GC");
1947 }
1948 }
1949 #endif
1950
1951 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1952 {
1953 Compile* C = ra_->C;
1954
1955 if (generate_vzeroupper(C)) {
1956 // Clear upper bits of YMM registers when current compiled code uses
1957 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1958 __ vzeroupper();
1959 }
1960
1961 // Subtract two words to account for return address and rbp
1962 int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
1963 __ remove_frame(initial_framesize, C->needs_stack_repair());
1964
1965 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1966 __ reserved_stack_check();
1967 }
1968
1969 if (do_polling() && C->is_method_compilation()) {
1970 Label dummy_label;
1971 Label* code_stub = &dummy_label;
1972 if (!C->output()->in_scratch_emit_size()) {
1973 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1974 C->output()->add_stub(stub);
1975 code_stub = &stub->entry();
1976 }
1977 __ relocate(relocInfo::poll_return_type);
1978 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
1979 }
1980 }
1981
1982 int MachEpilogNode::reloc() const
1983 {
1984 return 2; // a large enough number
1985 }
1986
1987 const Pipeline* MachEpilogNode::pipeline() const
1988 {
1989 return MachNode::pipeline_class();
1990 }
1991
1992 //=============================================================================
1993
1994 enum RC {
1995 rc_bad,
1996 rc_int,
1997 rc_kreg,
1998 rc_float,
1999 rc_stack
2000 };
2001
2563 #endif
2564
2565 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2566 {
2567 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2568 int reg = ra_->get_encode(this);
2569
2570 __ lea(as_Register(reg), Address(rsp, offset));
2571 }
2572
2573 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2574 {
2575 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2576 if (ra_->get_encode(this) > 15) {
2577 return (offset < 0x80) ? 6 : 9; // REX2
2578 } else {
2579 return (offset < 0x80) ? 5 : 8; // REX
2580 }
2581 }
2582
2583 //=============================================================================
2584 #ifndef PRODUCT
2585 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2586 {
2587 st->print_cr("MachVEPNode");
2588 }
2589 #endif
2590
2591 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2592 {
2593 CodeBuffer* cbuf = masm->code();
2594 if (!_verified) {
2595 __ ic_check(1);
2596 } else {
2597 if (ra_->C->stub_function() == nullptr) {
2598 // Emit the entry barrier in a temporary frame before unpacking because
2599 // it can deopt, which would require packing the scalarized args again.
2600 __ verified_entry(ra_->C, 0);
2601 __ entry_barrier();
2602 int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
2603 __ remove_frame(initial_framesize, false);
2604 }
2605 // Unpack inline type args passed as oop and then jump to
2606 // the verified entry point (skipping the unverified entry).
2607 int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
2608 // Emit code for verified entry and save increment for stack repair on return
2609 __ verified_entry(ra_->C, sp_inc);
2610 if (Compile::current()->output()->in_scratch_emit_size()) {
2611 Label dummy_verified_entry;
2612 __ jmp(dummy_verified_entry);
2613 } else {
2614 __ jmp(*_verified_entry);
2615 }
2616 }
2617 if (ra_->C->stub_function() == nullptr) {
2618 // Pad so that the next call to MachVEPNode::emit() starts out with the
2619 // correct alignment. This is needed by entry_barrier() to align the
2620 // compare. But unfortunately we need to align all 4 MachVEPNodes because
2621 // entry point offsets are computed using scratch_emit_size(), so starting
2622 // alignment must match the alignment of the scratch buffer, otherwise the sizes
2623 // will be off.
2624 __ align(4);
2625 }
2626 }
2627
2628 //=============================================================================
2629 #ifndef PRODUCT
2630 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2631 {
2632 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2633 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2634 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2635 }
2636 #endif
2637
2638 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2639 {
2640 __ ic_check(InteriorEntryAlignment);
2641 }
2642
2643
2644 //=============================================================================
2645
2646 bool Matcher::supports_vector_calling_convention(void) {
2647 return EnableVectorSupport;
2648 }
2649
2650 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
2651 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
2652 }
2653
2654 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
2655 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
2656 }
2657
2658 #ifdef ASSERT
2659 static bool is_ndd_demotable(const MachNode* mdef) {
2660 return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
2661 }
2662 #endif
4596 }
4597 __ post_call_nop();
4598 %}
4599
4600 enc_class Java_Dynamic_Call(method meth) %{
4601 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4602 __ post_call_nop();
4603 %}
4604
4605 enc_class call_epilog %{
4606 if (VerifyStackAtCalls) {
4607 // Check that stack depth is unchanged: find majik cookie on stack
4608 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4609 Label L;
4610 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4611 __ jccb(Assembler::equal, L);
4612 // Die if stack mismatch
4613 __ int3();
4614 __ bind(L);
4615 }
4616 if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) {
4617 // The last return value is not set by the callee but used to pass the null marker to compiled code.
4618 // Search for the corresponding projection, get the register and emit code that initializes it.
4619 uint con = (tf()->range_cc()->cnt() - 1);
4620 for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
4621 ProjNode* proj = fast_out(i)->as_Proj();
4622 if (proj->_con == con) {
4623 // Set null marker if rax is non-null (a non-null value is returned buffered or scalarized)
4624 OptoReg::Name optoReg = ra_->get_reg_first(proj);
4625 VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
4626 Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
4627 __ testq(rax, rax);
4628 __ setb(Assembler::notZero, toReg);
4629 __ movzbl(toReg, toReg);
4630 if (reg->is_stack()) {
4631 int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
4632 __ movq(Address(rsp, st_off), toReg);
4633 }
4634 break;
4635 }
4636 }
4637 if (return_value_is_used()) {
4638 // An inline type is returned as fields in multiple registers.
4639 // Rax either contains an oop if the inline type is buffered or a pointer
4640 // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax
4641 // if the lowest bit is set to allow C2 to use the oop after null checking.
4642 // rax &= (rax & 1) - 1
4643 __ movptr(rscratch1, rax);
4644 __ andptr(rscratch1, 0x1);
4645 __ subptr(rscratch1, 0x1);
4646 __ andptr(rax, rscratch1);
4647 }
4648 }
4649 %}
4650
4651 %}
4652
4653 //----------FRAME--------------------------------------------------------------
4654 // Definition of frame structure and management information.
4655 //
4656 // S T A C K L A Y O U T Allocators stack-slot number
4657 // | (to get allocators register number
4658 // G Owned by | | v add OptoReg::stack0())
4659 // r CALLER | |
4660 // o | +--------+ pad to even-align allocators stack-slot
4661 // w V | pad0 | numbers; owned by CALLER
4662 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4663 // h ^ | in | 5
4664 // | | args | 4 Holes in incoming args owned by SELF
4665 // | | | | 3
4666 // | | +--------+
4667 // V | | old out| Empty on Intel, window on Sparc
4668 // | old |preserve| Must be even aligned.
5807 %}
5808 %}
5809
5810 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5811 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5812 %{
5813 constraint(ALLOC_IN_RC(ptr_reg));
5814 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5815 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5816
5817 op_cost(10);
5818 format %{"[$reg + $off + $idx << $scale]" %}
5819 interface(MEMORY_INTER) %{
5820 base($reg);
5821 index($idx);
5822 scale($scale);
5823 disp($off);
5824 %}
5825 %}
5826
5827 // Indirect Narrow Oop Operand
5828 operand indCompressedOop(rRegN reg) %{
5829 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5830 constraint(ALLOC_IN_RC(ptr_reg));
5831 match(DecodeN reg);
5832
5833 op_cost(10);
5834 format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
5835 interface(MEMORY_INTER) %{
5836 base(0xc); // R12
5837 index($reg);
5838 scale(0x3);
5839 disp(0x0);
5840 %}
5841 %}
5842
5843 // Indirect Narrow Oop Plus Offset Operand
5844 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5845 // we can't free r12 even with CompressedOops::base() == nullptr.
5846 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5847 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5848 constraint(ALLOC_IN_RC(ptr_reg));
5849 match(AddP (DecodeN reg) off);
5850
5851 op_cost(10);
5852 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5853 interface(MEMORY_INTER) %{
5854 base(0xc); // R12
5855 index($reg);
5856 scale(0x3);
5857 disp($off);
5858 %}
5859 %}
5860
5861 // Indirect Memory Operand
5862 operand indirectNarrow(rRegN reg)
6332 %}
6333
6334 // Replaces legVec during post-selection cleanup. See above.
6335 operand legVecZ() %{
6336 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6337 match(VecZ);
6338
6339 format %{ %}
6340 interface(REG_INTER);
6341 %}
6342
6343 //----------OPERAND CLASSES----------------------------------------------------
6344 // Operand Classes are groups of operands that are used as to simplify
6345 // instruction definitions by not requiring the AD writer to specify separate
6346 // instructions for every form of operand when the instruction accepts
6347 // multiple operand types with the same basic encoding and format. The classic
6348 // case of this is memory operands.
6349
6350 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6351 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6352 indCompressedOop, indCompressedOopOffset,
6353 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6354 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6355 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6356
6357 //----------PIPELINE-----------------------------------------------------------
6358 // Rules which define the behavior of the target architectures pipeline.
6359 pipeline %{
6360
6361 //----------ATTRIBUTES---------------------------------------------------------
6362 attributes %{
6363 variable_size_instructions; // Fixed size instructions
6364 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6365 instruction_unit_size = 1; // An instruction is 1 bytes long
6366 instruction_fetch_unit_size = 16; // The processor fetches one line
6367 instruction_fetch_units = 1; // of 16 bytes
6368 %}
6369
6370 //----------RESOURCES----------------------------------------------------------
6371 // Resources are the functional units available to the machine
6372
8967 format %{ "MEMBAR-storestore (empty encoding)" %}
8968 ins_encode( );
8969 ins_pipe(empty);
8970 %}
8971
8972 //----------Move Instructions--------------------------------------------------
8973
8974 instruct castX2P(rRegP dst, rRegL src)
8975 %{
8976 match(Set dst (CastX2P src));
8977
8978 format %{ "movq $dst, $src\t# long->ptr" %}
8979 ins_encode %{
8980 if ($dst$$reg != $src$$reg) {
8981 __ movptr($dst$$Register, $src$$Register);
8982 }
8983 %}
8984 ins_pipe(ialu_reg_reg); // XXX
8985 %}
8986
8987 instruct castI2N(rRegN dst, rRegI src)
8988 %{
8989 match(Set dst (CastI2N src));
8990
8991 format %{ "movq $dst, $src\t# int -> narrow ptr" %}
8992 ins_encode %{
8993 if ($dst$$reg != $src$$reg) {
8994 __ movl($dst$$Register, $src$$Register);
8995 }
8996 %}
8997 ins_pipe(ialu_reg_reg); // XXX
8998 %}
8999
9000 instruct castN2X(rRegL dst, rRegN src)
9001 %{
9002 match(Set dst (CastP2X src));
9003
9004 format %{ "movq $dst, $src\t# ptr -> long" %}
9005 ins_encode %{
9006 if ($dst$$reg != $src$$reg) {
9007 __ movptr($dst$$Register, $src$$Register);
9008 }
9009 %}
9010 ins_pipe(ialu_reg_reg); // XXX
9011 %}
9012
9013 instruct castP2X(rRegL dst, rRegP src)
9014 %{
9015 match(Set dst (CastP2X src));
9016
9017 format %{ "movq $dst, $src\t# ptr -> long" %}
9018 ins_encode %{
9019 if ($dst$$reg != $src$$reg) {
9020 __ movptr($dst$$Register, $src$$Register);
9021 }
9022 %}
9023 ins_pipe(ialu_reg_reg); // XXX
9024 %}
9025
9026 // Convert oop into int for vectors alignment masking
9027 instruct convP2I(rRegI dst, rRegP src)
9028 %{
9029 match(Set dst (ConvL2I (CastP2X src)));
9030
9031 format %{ "movl $dst, $src\t# ptr -> int" %}
9032 ins_encode %{
15286 effect(DEF dst, USE src);
15287 ins_cost(100);
15288 format %{ "movd $dst,$src\t# MoveI2F" %}
15289 ins_encode %{
15290 __ movdl($dst$$XMMRegister, $src$$Register);
15291 %}
15292 ins_pipe( pipe_slow );
15293 %}
15294
15295 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15296 match(Set dst (MoveL2D src));
15297 effect(DEF dst, USE src);
15298 ins_cost(100);
15299 format %{ "movd $dst,$src\t# MoveL2D" %}
15300 ins_encode %{
15301 __ movdq($dst$$XMMRegister, $src$$Register);
15302 %}
15303 ins_pipe( pipe_slow );
15304 %}
15305
15306
15307 // Fast clearing of an array
15308 // Small non-constant lenght ClearArray for non-AVX512 targets.
15309 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15310 Universe dummy, rFlagsReg cr)
15311 %{
15312 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15313 match(Set dummy (ClearArray (Binary cnt base) val));
15314 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15315
15316 format %{ $$template
15317 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15318 $$emit$$"jg LARGE\n\t"
15319 $$emit$$"dec rcx\n\t"
15320 $$emit$$"js DONE\t# Zero length\n\t"
15321 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15322 $$emit$$"dec rcx\n\t"
15323 $$emit$$"jge LOOP\n\t"
15324 $$emit$$"jmp DONE\n\t"
15325 $$emit$$"# LARGE:\n\t"
15326 if (UseFastStosb) {
15327 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15328 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15329 } else if (UseXMMForObjInit) {
15330 $$emit$$"movdq $tmp, $val\n\t"
15331 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15332 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15333 $$emit$$"jmpq L_zero_64_bytes\n\t"
15334 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15335 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15336 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15337 $$emit$$"add 0x40,rax\n\t"
15338 $$emit$$"# L_zero_64_bytes:\n\t"
15339 $$emit$$"sub 0x8,rcx\n\t"
15340 $$emit$$"jge L_loop\n\t"
15341 $$emit$$"add 0x4,rcx\n\t"
15342 $$emit$$"jl L_tail\n\t"
15343 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15344 $$emit$$"add 0x20,rax\n\t"
15345 $$emit$$"sub 0x4,rcx\n\t"
15346 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15347 $$emit$$"add 0x4,rcx\n\t"
15348 $$emit$$"jle L_end\n\t"
15349 $$emit$$"dec rcx\n\t"
15350 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15351 $$emit$$"vmovq xmm0,(rax)\n\t"
15352 $$emit$$"add 0x8,rax\n\t"
15353 $$emit$$"dec rcx\n\t"
15354 $$emit$$"jge L_sloop\n\t"
15355 $$emit$$"# L_end:\n\t"
15356 } else {
15357 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15358 }
15359 $$emit$$"# DONE"
15360 %}
15361 ins_encode %{
15362 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15363 $tmp$$XMMRegister, false, false);
15364 %}
15365 ins_pipe(pipe_slow);
15366 %}
15367
15368 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15369 Universe dummy, rFlagsReg cr)
15370 %{
15371 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15372 match(Set dummy (ClearArray (Binary cnt base) val));
15373 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15374
15375 format %{ $$template
15376 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15377 $$emit$$"jg LARGE\n\t"
15378 $$emit$$"dec rcx\n\t"
15379 $$emit$$"js DONE\t# Zero length\n\t"
15380 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15381 $$emit$$"dec rcx\n\t"
15382 $$emit$$"jge LOOP\n\t"
15383 $$emit$$"jmp DONE\n\t"
15384 $$emit$$"# LARGE:\n\t"
15385 if (UseXMMForObjInit) {
15386 $$emit$$"movdq $tmp, $val\n\t"
15387 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15388 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15389 $$emit$$"jmpq L_zero_64_bytes\n\t"
15390 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15391 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15392 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15393 $$emit$$"add 0x40,rax\n\t"
15394 $$emit$$"# L_zero_64_bytes:\n\t"
15395 $$emit$$"sub 0x8,rcx\n\t"
15396 $$emit$$"jge L_loop\n\t"
15397 $$emit$$"add 0x4,rcx\n\t"
15398 $$emit$$"jl L_tail\n\t"
15399 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15400 $$emit$$"add 0x20,rax\n\t"
15401 $$emit$$"sub 0x4,rcx\n\t"
15402 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15403 $$emit$$"add 0x4,rcx\n\t"
15404 $$emit$$"jle L_end\n\t"
15405 $$emit$$"dec rcx\n\t"
15406 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15407 $$emit$$"vmovq xmm0,(rax)\n\t"
15408 $$emit$$"add 0x8,rax\n\t"
15409 $$emit$$"dec rcx\n\t"
15410 $$emit$$"jge L_sloop\n\t"
15411 $$emit$$"# L_end:\n\t"
15412 } else {
15413 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15414 }
15415 $$emit$$"# DONE"
15416 %}
15417 ins_encode %{
15418 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15419 $tmp$$XMMRegister, false, true);
15420 %}
15421 ins_pipe(pipe_slow);
15422 %}
15423
15424 // Small non-constant length ClearArray for AVX512 targets.
15425 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15426 Universe dummy, rFlagsReg cr)
15427 %{
15428 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15429 match(Set dummy (ClearArray (Binary cnt base) val));
15430 ins_cost(125);
15431 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15432
15433 format %{ $$template
15434 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15435 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15436 $$emit$$"jg LARGE\n\t"
15437 $$emit$$"dec rcx\n\t"
15438 $$emit$$"js DONE\t# Zero length\n\t"
15439 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15440 $$emit$$"dec rcx\n\t"
15441 $$emit$$"jge LOOP\n\t"
15442 $$emit$$"jmp DONE\n\t"
15443 $$emit$$"# LARGE:\n\t"
15444 if (UseFastStosb) {
15445 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15446 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15447 } else if (UseXMMForObjInit) {
15448 $$emit$$"mov rdi,rax\n\t"
15449 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15450 $$emit$$"jmpq L_zero_64_bytes\n\t"
15451 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15459 $$emit$$"jl L_tail\n\t"
15460 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15461 $$emit$$"add 0x20,rax\n\t"
15462 $$emit$$"sub 0x4,rcx\n\t"
15463 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15464 $$emit$$"add 0x4,rcx\n\t"
15465 $$emit$$"jle L_end\n\t"
15466 $$emit$$"dec rcx\n\t"
15467 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15468 $$emit$$"vmovq xmm0,(rax)\n\t"
15469 $$emit$$"add 0x8,rax\n\t"
15470 $$emit$$"dec rcx\n\t"
15471 $$emit$$"jge L_sloop\n\t"
15472 $$emit$$"# L_end:\n\t"
15473 } else {
15474 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15475 }
15476 $$emit$$"# DONE"
15477 %}
15478 ins_encode %{
15479 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15480 $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
15481 %}
15482 ins_pipe(pipe_slow);
15483 %}
15484
15485 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15486 Universe dummy, rFlagsReg cr)
15487 %{
15488 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15489 match(Set dummy (ClearArray (Binary cnt base) val));
15490 ins_cost(125);
15491 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15492
15493 format %{ $$template
15494 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15495 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15496 $$emit$$"jg LARGE\n\t"
15497 $$emit$$"dec rcx\n\t"
15498 $$emit$$"js DONE\t# Zero length\n\t"
15499 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15500 $$emit$$"dec rcx\n\t"
15501 $$emit$$"jge LOOP\n\t"
15502 $$emit$$"jmp DONE\n\t"
15503 $$emit$$"# LARGE:\n\t"
15504 if (UseFastStosb) {
15505 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15506 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15507 } else if (UseXMMForObjInit) {
15508 $$emit$$"mov rdi,rax\n\t"
15509 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15510 $$emit$$"jmpq L_zero_64_bytes\n\t"
15511 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15519 $$emit$$"jl L_tail\n\t"
15520 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15521 $$emit$$"add 0x20,rax\n\t"
15522 $$emit$$"sub 0x4,rcx\n\t"
15523 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15524 $$emit$$"add 0x4,rcx\n\t"
15525 $$emit$$"jle L_end\n\t"
15526 $$emit$$"dec rcx\n\t"
15527 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15528 $$emit$$"vmovq xmm0,(rax)\n\t"
15529 $$emit$$"add 0x8,rax\n\t"
15530 $$emit$$"dec rcx\n\t"
15531 $$emit$$"jge L_sloop\n\t"
15532 $$emit$$"# L_end:\n\t"
15533 } else {
15534 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15535 }
15536 $$emit$$"# DONE"
15537 %}
15538 ins_encode %{
15539 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15540 $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
15541 %}
15542 ins_pipe(pipe_slow);
15543 %}
15544
15545 // Large non-constant length ClearArray for non-AVX512 targets.
15546 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15547 Universe dummy, rFlagsReg cr)
15548 %{
15549 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15550 match(Set dummy (ClearArray (Binary cnt base) val));
15551 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15552
15553 format %{ $$template
15554 if (UseFastStosb) {
15555 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15556 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15557 } else if (UseXMMForObjInit) {
15558 $$emit$$"movdq $tmp, $val\n\t"
15559 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15560 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15561 $$emit$$"jmpq L_zero_64_bytes\n\t"
15562 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15563 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15564 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15565 $$emit$$"add 0x40,rax\n\t"
15566 $$emit$$"# L_zero_64_bytes:\n\t"
15567 $$emit$$"sub 0x8,rcx\n\t"
15568 $$emit$$"jge L_loop\n\t"
15569 $$emit$$"add 0x4,rcx\n\t"
15570 $$emit$$"jl L_tail\n\t"
15571 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15572 $$emit$$"add 0x20,rax\n\t"
15573 $$emit$$"sub 0x4,rcx\n\t"
15574 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15575 $$emit$$"add 0x4,rcx\n\t"
15576 $$emit$$"jle L_end\n\t"
15577 $$emit$$"dec rcx\n\t"
15578 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15579 $$emit$$"vmovq xmm0,(rax)\n\t"
15580 $$emit$$"add 0x8,rax\n\t"
15581 $$emit$$"dec rcx\n\t"
15582 $$emit$$"jge L_sloop\n\t"
15583 $$emit$$"# L_end:\n\t"
15584 } else {
15585 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15586 }
15587 %}
15588 ins_encode %{
15589 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15590 $tmp$$XMMRegister, true, false);
15591 %}
15592 ins_pipe(pipe_slow);
15593 %}
15594
15595 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15596 Universe dummy, rFlagsReg cr)
15597 %{
15598 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15599 match(Set dummy (ClearArray (Binary cnt base) val));
15600 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15601
15602 format %{ $$template
15603 if (UseXMMForObjInit) {
15604 $$emit$$"movdq $tmp, $val\n\t"
15605 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15606 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15607 $$emit$$"jmpq L_zero_64_bytes\n\t"
15608 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15609 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15610 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15611 $$emit$$"add 0x40,rax\n\t"
15612 $$emit$$"# L_zero_64_bytes:\n\t"
15613 $$emit$$"sub 0x8,rcx\n\t"
15614 $$emit$$"jge L_loop\n\t"
15615 $$emit$$"add 0x4,rcx\n\t"
15616 $$emit$$"jl L_tail\n\t"
15617 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15618 $$emit$$"add 0x20,rax\n\t"
15619 $$emit$$"sub 0x4,rcx\n\t"
15620 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15621 $$emit$$"add 0x4,rcx\n\t"
15622 $$emit$$"jle L_end\n\t"
15623 $$emit$$"dec rcx\n\t"
15624 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15625 $$emit$$"vmovq xmm0,(rax)\n\t"
15626 $$emit$$"add 0x8,rax\n\t"
15627 $$emit$$"dec rcx\n\t"
15628 $$emit$$"jge L_sloop\n\t"
15629 $$emit$$"# L_end:\n\t"
15630 } else {
15631 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15632 }
15633 %}
15634 ins_encode %{
15635 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15636 $tmp$$XMMRegister, true, true);
15637 %}
15638 ins_pipe(pipe_slow);
15639 %}
15640
15641 // Large non-constant length ClearArray for AVX512 targets.
15642 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15643 Universe dummy, rFlagsReg cr)
15644 %{
15645 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15646 match(Set dummy (ClearArray (Binary cnt base) val));
15647 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15648
15649 format %{ $$template
15650 if (UseFastStosb) {
15651 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15652 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15653 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15654 } else if (UseXMMForObjInit) {
15655 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15656 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15657 $$emit$$"jmpq L_zero_64_bytes\n\t"
15658 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15659 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15660 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15661 $$emit$$"add 0x40,rax\n\t"
15662 $$emit$$"# L_zero_64_bytes:\n\t"
15663 $$emit$$"sub 0x8,rcx\n\t"
15664 $$emit$$"jge L_loop\n\t"
15665 $$emit$$"add 0x4,rcx\n\t"
15666 $$emit$$"jl L_tail\n\t"
15667 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15668 $$emit$$"add 0x20,rax\n\t"
15669 $$emit$$"sub 0x4,rcx\n\t"
15670 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15671 $$emit$$"add 0x4,rcx\n\t"
15672 $$emit$$"jle L_end\n\t"
15673 $$emit$$"dec rcx\n\t"
15674 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15675 $$emit$$"vmovq xmm0,(rax)\n\t"
15676 $$emit$$"add 0x8,rax\n\t"
15677 $$emit$$"dec rcx\n\t"
15678 $$emit$$"jge L_sloop\n\t"
15679 $$emit$$"# L_end:\n\t"
15680 } else {
15681 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15682 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15683 }
15684 %}
15685 ins_encode %{
15686 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15687 $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
15688 %}
15689 ins_pipe(pipe_slow);
15690 %}
15691
15692 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15693 Universe dummy, rFlagsReg cr)
15694 %{
15695 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15696 match(Set dummy (ClearArray (Binary cnt base) val));
15697 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15698
15699 format %{ $$template
15700 if (UseFastStosb) {
15701 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15702 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15703 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15704 } else if (UseXMMForObjInit) {
15705 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15706 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15707 $$emit$$"jmpq L_zero_64_bytes\n\t"
15708 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15709 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15710 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15711 $$emit$$"add 0x40,rax\n\t"
15712 $$emit$$"# L_zero_64_bytes:\n\t"
15713 $$emit$$"sub 0x8,rcx\n\t"
15714 $$emit$$"jge L_loop\n\t"
15715 $$emit$$"add 0x4,rcx\n\t"
15716 $$emit$$"jl L_tail\n\t"
15717 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15718 $$emit$$"add 0x20,rax\n\t"
15719 $$emit$$"sub 0x4,rcx\n\t"
15720 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15721 $$emit$$"add 0x4,rcx\n\t"
15722 $$emit$$"jle L_end\n\t"
15723 $$emit$$"dec rcx\n\t"
15724 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15725 $$emit$$"vmovq xmm0,(rax)\n\t"
15726 $$emit$$"add 0x8,rax\n\t"
15727 $$emit$$"dec rcx\n\t"
15728 $$emit$$"jge L_sloop\n\t"
15729 $$emit$$"# L_end:\n\t"
15730 } else {
15731 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15732 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15733 }
15734 %}
15735 ins_encode %{
15736 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15737 $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
15738 %}
15739 ins_pipe(pipe_slow);
15740 %}
15741
15742 // Small constant length ClearArray for AVX512 targets.
15743 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
15744 %{
15745 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
15746 ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
15747 match(Set dummy (ClearArray (Binary cnt base) val));
15748 ins_cost(100);
15749 effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
15750 format %{ "clear_mem_imm $base , $cnt \n\t" %}
15751 ins_encode %{
15752 __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15753 %}
15754 ins_pipe(pipe_slow);
15755 %}
15756
15757 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15758 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15759 %{
15760 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15761 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15762 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15763
15764 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15765 ins_encode %{
15766 __ string_compare($str1$$Register, $str2$$Register,
15767 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15768 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15769 %}
15770 ins_pipe( pipe_slow );
15771 %}
15772
17610 effect(USE meth);
17611
17612 ins_cost(300);
17613 format %{ "call_leaf,runtime " %}
17614 ins_encode(clear_avx, Java_To_Runtime(meth));
17615 ins_pipe(pipe_slow);
17616 %}
17617
17618 // Call runtime without safepoint and with vector arguments
17619 instruct CallLeafDirectVector(method meth)
17620 %{
17621 match(CallLeafVector);
17622 effect(USE meth);
17623
17624 ins_cost(300);
17625 format %{ "call_leaf,vector " %}
17626 ins_encode(Java_To_Runtime(meth));
17627 ins_pipe(pipe_slow);
17628 %}
17629
17630 // Call runtime without safepoint
17631 // entry point is null, target holds the address to call
17632 instruct CallLeafNoFPInDirect(rRegP target)
17633 %{
17634 predicate(n->as_Call()->entry_point() == nullptr);
17635 match(CallLeafNoFP target);
17636
17637 ins_cost(300);
17638 format %{ "call_leaf_nofp,runtime indirect " %}
17639 ins_encode %{
17640 __ call($target$$Register);
17641 %}
17642
17643 ins_pipe(pipe_slow);
17644 %}
17645
17646 // Call runtime without safepoint
17647 instruct CallLeafNoFPDirect(method meth)
17648 %{
17649 predicate(n->as_Call()->entry_point() != nullptr);
17650 match(CallLeafNoFP);
17651 effect(USE meth);
17652
17653 ins_cost(300);
17654 format %{ "call_leaf_nofp,runtime " %}
17655 ins_encode(clear_avx, Java_To_Runtime(meth));
17656 ins_pipe(pipe_slow);
17657 %}
17658
17659 // Return Instruction
17660 // Remove the return address & jump to it.
17661 // Notice: We always emit a nop after a ret to make sure there is room
17662 // for safepoint patching
17663 instruct Ret()
17664 %{
17665 match(Return);
17666
17667 format %{ "ret" %}
17668 ins_encode %{
17669 __ ret(0);
|