473 }
474
475 // !!!!! Special hack to get all types of calls to specify the byte offset
476 // from the start of the call to the point where the return address
477 // will point.
478 int MachCallStaticJavaNode::ret_addr_offset()
479 {
480 int offset = 5; // 5 bytes from start of call to where return address points
481 offset += clear_avx_size();
482 return offset;
483 }
484
485 int MachCallDynamicJavaNode::ret_addr_offset()
486 {
487 int offset = 15; // 15 bytes from start of call to where return address points
488 offset += clear_avx_size();
489 return offset;
490 }
491
492 int MachCallRuntimeNode::ret_addr_offset() {
493 int offset = 13; // movq r10,#addr; callq (r10)
494 if (this->ideal_Opcode() != Op_CallLeafVector) {
495 offset += clear_avx_size();
496 }
497 return offset;
498 }
499 //
500 // Compute padding required for nodes which need alignment
501 //
502
503 // The address of the call instruction needs to be 4-byte aligned to
504 // ensure that it does not span a cache line so that it can be patched.
505 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
506 {
507 current_offset += clear_avx_size(); // skip vzeroupper
508 current_offset += 1; // skip call opcode byte
509 return align_up(current_offset, alignment_required()) - current_offset;
510 }
511
512 // The address of the call instruction needs to be 4-byte aligned to
513 // ensure that it does not span a cache line so that it can be patched.
514 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
515 {
516 current_offset += clear_avx_size(); // skip vzeroupper
517 current_offset += 11; // skip movq instruction + call opcode byte
518 return align_up(current_offset, alignment_required()) - current_offset;
689 st->print("movq [rsp + #%d], rbp\t# Save rbp",framesize);
690 if (PreserveFramePointer) {
691 st->print("\n\t");
692 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
693 if (framesize > 0) {
694 st->print("\n\t");
695 st->print("addq rbp, #%d", framesize);
696 }
697 }
698 }
699
700 if (VerifyStackAtCalls) {
701 st->print("\n\t");
702 framesize -= wordSize;
703 st->print("movq [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
704 #ifdef ASSERT
705 st->print("\n\t");
706 st->print("# stack alignment check");
707 #endif
708 }
709 if (C->stub_function() != NULL && BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
710 st->print("\n\t");
711 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
712 st->print("\n\t");
713 st->print("je fast_entry\t");
714 st->print("\n\t");
715 st->print("call #nmethod_entry_barrier_stub\t");
716 st->print("\n\tfast_entry:");
717 }
718 st->cr();
719 }
720 #endif
721
722 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
723 Compile* C = ra_->C;
724 C2_MacroAssembler _masm(&cbuf);
725
726 int framesize = C->output()->frame_size_in_bytes();
727 int bangsize = C->output()->bang_size_in_bytes();
728
729 if (C->clinit_barrier_on_entry()) {
730 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
731 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
732
733 Label L_skip_barrier;
734 Register klass = rscratch1;
735
736 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
737 __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/);
738
739 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
740
741 __ bind(L_skip_barrier);
742 }
743
744 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != NULL);
745
746 C->output()->set_frame_complete(cbuf.insts_size());
747
748 if (C->has_mach_constant_base_node()) {
749 // NOTE: We set the table base offset here because users might be
750 // emitted before MachConstantBaseNode.
751 ConstantTable& constant_table = C->output()->constant_table();
752 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
753 }
754 }
755
756 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
757 {
758 return MachNode::size(ra_); // too many variables; just compute it
759 // the hard way
760 }
761
762 int MachPrologNode::reloc() const
763 {
764 return 0; // a large enough number
765 }
766
767 //=============================================================================
768 #ifndef PRODUCT
769 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
770 {
771 Compile* C = ra_->C;
772 if (generate_vzeroupper(C)) {
773 st->print("vzeroupper");
774 st->cr(); st->print("\t");
775 }
776
777 int framesize = C->output()->frame_size_in_bytes();
778 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
779 // Remove word for return adr already pushed
780 // and RBP
781 framesize -= 2*wordSize;
789 if (do_polling() && C->is_method_compilation()) {
790 st->print("\t");
791 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
792 "ja #safepoint_stub\t"
793 "# Safepoint: poll for GC");
794 }
795 }
796 #endif
797
798 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
799 {
800 Compile* C = ra_->C;
801 MacroAssembler _masm(&cbuf);
802
803 if (generate_vzeroupper(C)) {
804 // Clear upper bits of YMM registers when current compiled code uses
805 // wide vectors to avoid AVX <-> SSE transition penalty during call.
806 __ vzeroupper();
807 }
808
809 int framesize = C->output()->frame_size_in_bytes();
810 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
811 // Remove word for return adr already pushed
812 // and RBP
813 framesize -= 2*wordSize;
814
815 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
816
817 if (framesize) {
818 __ addq(rsp, framesize);
819 }
820
821 __ popq(rbp);
822
823 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
824 __ reserved_stack_check();
825 }
826
827 if (do_polling() && C->is_method_compilation()) {
828 MacroAssembler _masm(&cbuf);
829 Label dummy_label;
830 Label* code_stub = &dummy_label;
831 if (!C->output()->in_scratch_emit_size()) {
832 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
833 C->output()->add_stub(stub);
834 code_stub = &stub->entry();
835 }
836 __ relocate(relocInfo::poll_return_type);
837 __ safepoint_poll(*code_stub, r15_thread, true /* at_return */, true /* in_nmethod */);
838 }
839 }
840
841 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
842 {
843 return MachNode::size(ra_); // too many variables; just compute it
844 // the hard way
845 }
846
847 int MachEpilogNode::reloc() const
848 {
849 return 2; // a large enough number
850 }
851
852 const Pipeline* MachEpilogNode::pipeline() const
853 {
854 return MachNode::pipeline_class();
855 }
856
857 //=============================================================================
858
859 enum RC {
860 rc_bad,
861 rc_int,
862 rc_kreg,
863 rc_float,
864 rc_stack
865 };
866
953 src_offset, dst_offset);
954 break;
955 case Op_VecZ:
956 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
957 "vmovdqu xmm0, [rsp + #%d]\n\t"
958 "vmovdqu [rsp + #%d], xmm0\n\t"
959 "vmovdqu xmm0, [rsp - #64]",
960 src_offset, dst_offset);
961 break;
962 default:
963 ShouldNotReachHere();
964 }
965 #endif
966 }
967 }
968
969 uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
970 PhaseRegAlloc* ra_,
971 bool do_size,
972 outputStream* st) const {
973 assert(cbuf != NULL || st != NULL, "sanity");
974 // Get registers to move
975 OptoReg::Name src_second = ra_->get_reg_second(in(1));
976 OptoReg::Name src_first = ra_->get_reg_first(in(1));
977 OptoReg::Name dst_second = ra_->get_reg_second(this);
978 OptoReg::Name dst_first = ra_->get_reg_first(this);
979
980 enum RC src_second_rc = rc_class(src_second);
981 enum RC src_first_rc = rc_class(src_first);
982 enum RC dst_second_rc = rc_class(dst_second);
983 enum RC dst_first_rc = rc_class(dst_first);
984
985 assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
986 "must move at least 1 register" );
987
988 if (src_first == dst_first && src_second == dst_second) {
989 // Self copy, no move
990 return 0;
991 }
992 if (bottom_type()->isa_vect() != NULL && bottom_type()->isa_vectmask() == NULL) {
993 uint ireg = ideal_reg();
994 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
995 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
996 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
997 // mem -> mem
998 int src_offset = ra_->reg2offset(src_first);
999 int dst_offset = ra_->reg2offset(dst_first);
1000 vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
1001 } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
1002 vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st);
1003 } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1004 int stack_offset = ra_->reg2offset(dst_first);
1005 vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st);
1006 } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
1007 int stack_offset = ra_->reg2offset(src_first);
1008 vec_spill_helper(cbuf, true, stack_offset, dst_first, ireg, st);
1009 } else {
1010 ShouldNotReachHere();
1011 }
1012 return 0;
1411 st->print("kmovq %s, %s\t# spill",
1412 Matcher::regName[dst_first],
1413 Matcher::regName[src_first]);
1414 #endif
1415 }
1416 }
1417 return 0;
1418 } else if (dst_first_rc == rc_float) {
1419 assert(false, "Illegal spill");
1420 return 0;
1421 }
1422 }
1423
1424 assert(0," foo ");
1425 Unimplemented();
1426 return 0;
1427 }
1428
1429 #ifndef PRODUCT
1430 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1431 implementation(NULL, ra_, false, st);
1432 }
1433 #endif
1434
1435 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1436 implementation(&cbuf, ra_, false, NULL);
1437 }
1438
1439 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1440 return MachNode::size(ra_);
1441 }
1442
1443 //=============================================================================
1444 #ifndef PRODUCT
1445 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1446 {
1447 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1448 int reg = ra_->get_reg_first(this);
1449 st->print("leaq %s, [rsp + #%d]\t# box lock",
1450 Matcher::regName[reg], offset);
1451 }
1452 #endif
1453
1454 void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1455 {
1456 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1457 int reg = ra_->get_encode(this);
1458
1459 MacroAssembler masm(&cbuf);
1460 masm.lea(as_Register(reg), Address(rsp, offset));
1461 }
1462
1463 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1464 {
1465 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1466 return (offset < 0x80) ? 5 : 8; // REX
1467 }
1468
1469 //=============================================================================
1470 #ifndef PRODUCT
1471 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1472 {
1473 if (UseCompressedClassPointers) {
1474 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1475 st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
1476 st->print_cr("\tcmpq rax, rscratch1\t # Inline cache check");
1477 } else {
1478 st->print_cr("\tcmpq rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
1479 "# Inline cache check");
1480 }
1481 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
1482 st->print_cr("\tnop\t# nops to align entry point");
1483 }
1484 #endif
1485
1486 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1487 {
1488 MacroAssembler masm(&cbuf);
1491 masm.load_klass(rscratch1, j_rarg0, rscratch2);
1492 masm.cmpptr(rax, rscratch1);
1493 } else {
1494 masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
1495 }
1496
1497 masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1498
1499 /* WARNING these NOPs are critical so that verified entry point is properly
1500 4 bytes aligned for patching by NativeJump::patch_verified_entry() */
1501 int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
1502 if (OptoBreakpoint) {
1503 // Leave space for int3
1504 nops_cnt -= 1;
1505 }
1506 nops_cnt &= 0x3; // Do not add nops if code is aligned.
1507 if (nops_cnt > 0)
1508 masm.nop(nops_cnt);
1509 }
1510
1511 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1512 {
1513 return MachNode::size(ra_); // too many variables; just compute it
1514 // the hard way
1515 }
1516
1517
1518 //=============================================================================
1519
1520 bool Matcher::supports_vector_calling_convention(void) {
1521 if (EnableVectorSupport && UseVectorStubs) {
1522 return true;
1523 }
1524 return false;
1525 }
1526
1527 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
1528 assert(EnableVectorSupport && UseVectorStubs, "sanity");
1529 int lo = XMM0_num;
1530 int hi = XMM0b_num;
1531 if (ideal_reg == Op_VecX) hi = XMM0d_num;
1532 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
1533 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
1534 return OptoRegPair(hi, lo);
1535 }
1536
1537 // Is this branch offset short enough that a short branch can be used?
1767 __ bind(normal);
1768 __ cdqq();
1769
1770 // idivq (note: must be emitted by the user of this rule)
1771 // <done>
1772 __ idivq($div$$Register);
1773 __ bind(done);
1774 %}
1775
1776 enc_class enc_PartialSubtypeCheck()
1777 %{
1778 Register Rrdi = as_Register(RDI_enc); // result register
1779 Register Rrax = as_Register(RAX_enc); // super class
1780 Register Rrcx = as_Register(RCX_enc); // killed
1781 Register Rrsi = as_Register(RSI_enc); // sub class
1782 Label miss;
1783 const bool set_cond_codes = true;
1784
1785 MacroAssembler _masm(&cbuf);
1786 __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
1787 NULL, &miss,
1788 /*set_cond_codes:*/ true);
1789 if ($primary) {
1790 __ xorptr(Rrdi, Rrdi);
1791 }
1792 __ bind(miss);
1793 %}
1794
1795 enc_class clear_avx %{
1796 debug_only(int off0 = cbuf.insts_size());
1797 if (generate_vzeroupper(Compile::current())) {
1798 // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
1799 // Clear upper bits of YMM registers when current compiled code uses
1800 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1801 MacroAssembler _masm(&cbuf);
1802 __ vzeroupper();
1803 }
1804 debug_only(int off1 = cbuf.insts_size());
1805 assert(off1 - off0 == clear_avx_size(), "correct size prediction");
1806 %}
1807
1824 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
1825 } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
1826 // The NOP here is purely to ensure that eliding a call to
1827 // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
1828 __ addr_nop_5();
1829 __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
1830 } else {
1831 int method_index = resolved_method_index(cbuf);
1832 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
1833 : static_call_Relocation::spec(method_index);
1834 address mark = __ pc();
1835 int call_offset = __ offset();
1836 __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
1837 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
1838 // Calls of the same statically bound method can share
1839 // a stub to the interpreter.
1840 cbuf.shared_stub_to_interp_for(_method, call_offset);
1841 } else {
1842 // Emit stubs for static call.
1843 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark);
1844 if (stub == NULL) {
1845 ciEnv::current()->record_failure("CodeCache is full");
1846 return;
1847 }
1848 }
1849 }
1850 __ post_call_nop();
1851 %}
1852
1853 enc_class Java_Dynamic_Call(method meth) %{
1854 MacroAssembler _masm(&cbuf);
1855 __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
1856 __ post_call_nop();
1857 %}
1858
1859 %}
1860
1861
1862
1863 //----------FRAME--------------------------------------------------------------
1864 // Definition of frame structure and management information.
2162 operand immI_64()
2163 %{
2164 predicate( n->get_int() == 64 );
2165 match(ConI);
2166
2167 op_cost(0);
2168 format %{ %}
2169 interface(CONST_INTER);
2170 %}
2171
2172 // Pointer Immediate
2173 operand immP()
2174 %{
2175 match(ConP);
2176
2177 op_cost(10);
2178 format %{ %}
2179 interface(CONST_INTER);
2180 %}
2181
2182 // NULL Pointer Immediate
2183 operand immP0()
2184 %{
2185 predicate(n->get_ptr() == 0);
2186 match(ConP);
2187
2188 op_cost(5);
2189 format %{ %}
2190 interface(CONST_INTER);
2191 %}
2192
2193 // Pointer Immediate
2194 operand immN() %{
2195 match(ConN);
2196
2197 op_cost(10);
2198 format %{ %}
2199 interface(CONST_INTER);
2200 %}
2201
2202 operand immNKlass() %{
2203 match(ConNKlass);
2204
2205 op_cost(10);
2206 format %{ %}
2207 interface(CONST_INTER);
2208 %}
2209
2210 // NULL Pointer Immediate
2211 operand immN0() %{
2212 predicate(n->get_narrowcon() == 0);
2213 match(ConN);
2214
2215 op_cost(5);
2216 format %{ %}
2217 interface(CONST_INTER);
2218 %}
2219
2220 operand immP31()
2221 %{
2222 predicate(n->as_Type()->type()->reloc() == relocInfo::none
2223 && (n->get_ptr() >> 31) == 0);
2224 match(ConP);
2225
2226 op_cost(5);
2227 format %{ %}
2228 interface(CONST_INTER);
2229 %}
2230
3102 %}
3103 %}
3104
3105 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
3106 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
3107 %{
3108 constraint(ALLOC_IN_RC(ptr_reg));
3109 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
3110 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
3111
3112 op_cost(10);
3113 format %{"[$reg + $off + $idx << $scale]" %}
3114 interface(MEMORY_INTER) %{
3115 base($reg);
3116 index($idx);
3117 scale($scale);
3118 disp($off);
3119 %}
3120 %}
3121
3122 // Indirect Narrow Oop Plus Offset Operand
3123 // Note: x86 architecture doesn't support "scale * index + offset" without a base
3124 // we can't free r12 even with CompressedOops::base() == NULL.
3125 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
3126 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
3127 constraint(ALLOC_IN_RC(ptr_reg));
3128 match(AddP (DecodeN reg) off);
3129
3130 op_cost(10);
3131 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
3132 interface(MEMORY_INTER) %{
3133 base(0xc); // R12
3134 index($reg);
3135 scale(0x3);
3136 disp($off);
3137 %}
3138 %}
3139
3140 // Indirect Memory Operand
3141 operand indirectNarrow(rRegN reg)
3142 %{
3143 predicate(CompressedOops::shift() == 0);
3144 constraint(ALLOC_IN_RC(ptr_reg));
3448 equal(0x4, "e");
3449 not_equal(0x5, "ne");
3450 less(0x2, "b");
3451 greater_equal(0x3, "ae");
3452 less_equal(0x6, "be");
3453 greater(0x7, "a");
3454 overflow(0x0, "o");
3455 no_overflow(0x1, "no");
3456 %}
3457 %}
3458
3459 //----------OPERAND CLASSES----------------------------------------------------
3460 // Operand Classes are groups of operands that are used as to simplify
3461 // instruction definitions by not requiring the AD writer to specify separate
3462 // instructions for every form of operand when the instruction accepts
3463 // multiple operand types with the same basic encoding and format. The classic
3464 // case of this is memory operands.
3465
3466 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
3467 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
3468 indCompressedOopOffset,
3469 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
3470 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
3471 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
3472
3473 //----------PIPELINE-----------------------------------------------------------
3474 // Rules which define the behavior of the target architectures pipeline.
3475 pipeline %{
3476
3477 //----------ATTRIBUTES---------------------------------------------------------
3478 attributes %{
3479 variable_size_instructions; // Fixed size instructions
3480 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
3481 instruction_unit_size = 1; // An instruction is 1 bytes long
3482 instruction_fetch_unit_size = 16; // The processor fetches one line
3483 instruction_fetch_units = 1; // of 16 bytes
3484
3485 // List of nop instructions
3486 nops( MachNop );
3487 %}
3488
4885 format %{ "movl $dst, $src\t# ptr (positive 32-bit)" %}
4886 ins_encode %{
4887 __ movl($dst$$Register, $src$$constant);
4888 %}
4889 ins_pipe(ialu_reg);
4890 %}
4891
4892 instruct loadConF(regF dst, immF con) %{
4893 match(Set dst con);
4894 ins_cost(125);
4895 format %{ "movss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
4896 ins_encode %{
4897 __ movflt($dst$$XMMRegister, $constantaddress($con));
4898 %}
4899 ins_pipe(pipe_slow);
4900 %}
4901
4902 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
4903 match(Set dst src);
4904 effect(KILL cr);
4905 format %{ "xorq $dst, $src\t# compressed NULL ptr" %}
4906 ins_encode %{
4907 __ xorq($dst$$Register, $dst$$Register);
4908 %}
4909 ins_pipe(ialu_reg);
4910 %}
4911
4912 instruct loadConN(rRegN dst, immN src) %{
4913 match(Set dst src);
4914
4915 ins_cost(125);
4916 format %{ "movl $dst, $src\t# compressed ptr" %}
4917 ins_encode %{
4918 address con = (address)$src$$constant;
4919 if (con == NULL) {
4920 ShouldNotReachHere();
4921 } else {
4922 __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
4923 }
4924 %}
4925 ins_pipe(ialu_reg_fat); // XXX
4926 %}
4927
4928 instruct loadConNKlass(rRegN dst, immNKlass src) %{
4929 match(Set dst src);
4930
4931 ins_cost(125);
4932 format %{ "movl $dst, $src\t# compressed klass ptr" %}
4933 ins_encode %{
4934 address con = (address)$src$$constant;
4935 if (con == NULL) {
4936 ShouldNotReachHere();
4937 } else {
4938 __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
4939 }
4940 %}
4941 ins_pipe(ialu_reg_fat); // XXX
4942 %}
4943
4944 instruct loadConF0(regF dst, immF0 src)
4945 %{
4946 match(Set dst src);
4947 ins_cost(100);
4948
4949 format %{ "xorps $dst, $dst\t# float 0.0" %}
4950 ins_encode %{
4951 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
4952 %}
4953 ins_pipe(pipe_slow);
4954 %}
4955
5141 %}
5142 ins_pipe(ialu_mem_reg); // XXX
5143 %}
5144
5145 // Store Pointer
5146 instruct storeP(memory mem, any_RegP src)
5147 %{
5148 predicate(n->as_Store()->barrier_data() == 0);
5149 match(Set mem (StoreP mem src));
5150
5151 ins_cost(125); // XXX
5152 format %{ "movq $mem, $src\t# ptr" %}
5153 ins_encode %{
5154 __ movq($mem$$Address, $src$$Register);
5155 %}
5156 ins_pipe(ialu_mem_reg);
5157 %}
5158
5159 instruct storeImmP0(memory mem, immP0 zero)
5160 %{
5161 predicate(UseCompressedOops && (CompressedOops::base() == NULL) && n->as_Store()->barrier_data() == 0);
5162 match(Set mem (StoreP mem zero));
5163
5164 ins_cost(125); // XXX
5165 format %{ "movq $mem, R12\t# ptr (R12_heapbase==0)" %}
5166 ins_encode %{
5167 __ movq($mem$$Address, r12);
5168 %}
5169 ins_pipe(ialu_mem_reg);
5170 %}
5171
5172 // Store NULL Pointer, mark word, or other simple pointer constant.
5173 instruct storeImmP(memory mem, immP31 src)
5174 %{
5175 predicate(n->as_Store()->barrier_data() == 0);
5176 match(Set mem (StoreP mem src));
5177
5178 ins_cost(150); // XXX
5179 format %{ "movq $mem, $src\t# ptr" %}
5180 ins_encode %{
5181 __ movq($mem$$Address, $src$$constant);
5182 %}
5183 ins_pipe(ialu_mem_imm);
5184 %}
5185
5186 // Store Compressed Pointer
5187 instruct storeN(memory mem, rRegN src)
5188 %{
5189 match(Set mem (StoreN mem src));
5190
5191 ins_cost(125); // XXX
5192 format %{ "movl $mem, $src\t# compressed ptr" %}
5193 ins_encode %{
5194 __ movl($mem$$Address, $src$$Register);
5195 %}
5196 ins_pipe(ialu_mem_reg);
5197 %}
5198
5199 instruct storeNKlass(memory mem, rRegN src)
5200 %{
5201 match(Set mem (StoreNKlass mem src));
5202
5203 ins_cost(125); // XXX
5204 format %{ "movl $mem, $src\t# compressed klass ptr" %}
5205 ins_encode %{
5206 __ movl($mem$$Address, $src$$Register);
5207 %}
5208 ins_pipe(ialu_mem_reg);
5209 %}
5210
5211 instruct storeImmN0(memory mem, immN0 zero)
5212 %{
5213 predicate(CompressedOops::base() == NULL);
5214 match(Set mem (StoreN mem zero));
5215
5216 ins_cost(125); // XXX
5217 format %{ "movl $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
5218 ins_encode %{
5219 __ movl($mem$$Address, r12);
5220 %}
5221 ins_pipe(ialu_mem_reg);
5222 %}
5223
5224 instruct storeImmN(memory mem, immN src)
5225 %{
5226 match(Set mem (StoreN mem src));
5227
5228 ins_cost(150); // XXX
5229 format %{ "movl $mem, $src\t# compressed ptr" %}
5230 ins_encode %{
5231 address con = (address)$src$$constant;
5232 if (con == NULL) {
5233 __ movl($mem$$Address, 0);
5234 } else {
5235 __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
5236 }
5237 %}
5238 ins_pipe(ialu_mem_imm);
5239 %}
5240
5241 instruct storeImmNKlass(memory mem, immNKlass src)
5242 %{
5243 match(Set mem (StoreNKlass mem src));
5244
5245 ins_cost(150); // XXX
5246 format %{ "movl $mem, $src\t# compressed klass ptr" %}
5247 ins_encode %{
5248 __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
5249 %}
5250 ins_pipe(ialu_mem_imm);
5251 %}
5252
5253 // Store Integer Immediate
5254 instruct storeImmI0(memory mem, immI_0 zero)
5255 %{
5256 predicate(UseCompressedOops && (CompressedOops::base() == NULL));
5257 match(Set mem (StoreI mem zero));
5258
5259 ins_cost(125); // XXX
5260 format %{ "movl $mem, R12\t# int (R12_heapbase==0)" %}
5261 ins_encode %{
5262 __ movl($mem$$Address, r12);
5263 %}
5264 ins_pipe(ialu_mem_reg);
5265 %}
5266
5267 instruct storeImmI(memory mem, immI src)
5268 %{
5269 match(Set mem (StoreI mem src));
5270
5271 ins_cost(150);
5272 format %{ "movl $mem, $src\t# int" %}
5273 ins_encode %{
5274 __ movl($mem$$Address, $src$$constant);
5275 %}
5276 ins_pipe(ialu_mem_imm);
5277 %}
5278
5279 // Store Long Immediate
5280 instruct storeImmL0(memory mem, immL0 zero)
5281 %{
5282 predicate(UseCompressedOops && (CompressedOops::base() == NULL));
5283 match(Set mem (StoreL mem zero));
5284
5285 ins_cost(125); // XXX
5286 format %{ "movq $mem, R12\t# long (R12_heapbase==0)" %}
5287 ins_encode %{
5288 __ movq($mem$$Address, r12);
5289 %}
5290 ins_pipe(ialu_mem_reg);
5291 %}
5292
5293 instruct storeImmL(memory mem, immL32 src)
5294 %{
5295 match(Set mem (StoreL mem src));
5296
5297 ins_cost(150);
5298 format %{ "movq $mem, $src\t# long" %}
5299 ins_encode %{
5300 __ movq($mem$$Address, $src$$constant);
5301 %}
5302 ins_pipe(ialu_mem_imm);
5303 %}
5304
5305 // Store Short/Char Immediate
5306 instruct storeImmC0(memory mem, immI_0 zero)
5307 %{
5308 predicate(UseCompressedOops && (CompressedOops::base() == NULL));
5309 match(Set mem (StoreC mem zero));
5310
5311 ins_cost(125); // XXX
5312 format %{ "movw $mem, R12\t# short/char (R12_heapbase==0)" %}
5313 ins_encode %{
5314 __ movw($mem$$Address, r12);
5315 %}
5316 ins_pipe(ialu_mem_reg);
5317 %}
5318
5319 instruct storeImmI16(memory mem, immI16 src)
5320 %{
5321 predicate(UseStoreImmI16);
5322 match(Set mem (StoreC mem src));
5323
5324 ins_cost(150);
5325 format %{ "movw $mem, $src\t# short/char" %}
5326 ins_encode %{
5327 __ movw($mem$$Address, $src$$constant);
5328 %}
5329 ins_pipe(ialu_mem_imm);
5330 %}
5331
5332 // Store Byte Immediate
5333 instruct storeImmB0(memory mem, immI_0 zero)
5334 %{
5335 predicate(UseCompressedOops && (CompressedOops::base() == NULL));
5336 match(Set mem (StoreB mem zero));
5337
5338 ins_cost(125); // XXX
5339 format %{ "movb $mem, R12\t# short/char (R12_heapbase==0)" %}
5340 ins_encode %{
5341 __ movb($mem$$Address, r12);
5342 %}
5343 ins_pipe(ialu_mem_reg);
5344 %}
5345
5346 instruct storeImmB(memory mem, immI8 src)
5347 %{
5348 match(Set mem (StoreB mem src));
5349
5350 ins_cost(150); // XXX
5351 format %{ "movb $mem, $src\t# byte" %}
5352 ins_encode %{
5353 __ movb($mem$$Address, $src$$constant);
5354 %}
5355 ins_pipe(ialu_mem_imm);
5356 %}
5357
5358 // Store CMS card-mark Immediate
5359 instruct storeImmCM0_reg(memory mem, immI_0 zero)
5360 %{
5361 predicate(UseCompressedOops && (CompressedOops::base() == NULL));
5362 match(Set mem (StoreCM mem zero));
5363
5364 ins_cost(125); // XXX
5365 format %{ "movb $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
5366 ins_encode %{
5367 __ movb($mem$$Address, r12);
5368 %}
5369 ins_pipe(ialu_mem_reg);
5370 %}
5371
5372 instruct storeImmCM0(memory mem, immI_0 src)
5373 %{
5374 match(Set mem (StoreCM mem src));
5375
5376 ins_cost(150); // XXX
5377 format %{ "movb $mem, $src\t# CMS card-mark byte 0" %}
5378 ins_encode %{
5379 __ movb($mem$$Address, $src$$constant);
5380 %}
5381 ins_pipe(ialu_mem_imm);
5382 %}
5383
5384 // Store Float
5385 instruct storeF(memory mem, regF src)
5386 %{
5387 match(Set mem (StoreF mem src));
5388
5389 ins_cost(95); // XXX
5390 format %{ "movss $mem, $src\t# float" %}
5391 ins_encode %{
5392 __ movflt($mem$$Address, $src$$XMMRegister);
5393 %}
5394 ins_pipe(pipe_slow); // XXX
5395 %}
5396
5397 // Store immediate Float value (it is faster than store from XMM register)
5398 instruct storeF0(memory mem, immF0 zero)
5399 %{
5400 predicate(UseCompressedOops && (CompressedOops::base() == NULL));
5401 match(Set mem (StoreF mem zero));
5402
5403 ins_cost(25); // XXX
5404 format %{ "movl $mem, R12\t# float 0. (R12_heapbase==0)" %}
5405 ins_encode %{
5406 __ movl($mem$$Address, r12);
5407 %}
5408 ins_pipe(ialu_mem_reg);
5409 %}
5410
5411 instruct storeF_imm(memory mem, immF src)
5412 %{
5413 match(Set mem (StoreF mem src));
5414
5415 ins_cost(50);
5416 format %{ "movl $mem, $src\t# float" %}
5417 ins_encode %{
5418 __ movl($mem$$Address, jint_cast($src$$constant));
5419 %}
5420 ins_pipe(ialu_mem_imm);
5421 %}
5422
5423 // Store Double
5424 instruct storeD(memory mem, regD src)
5425 %{
5426 match(Set mem (StoreD mem src));
5427
5428 ins_cost(95); // XXX
5429 format %{ "movsd $mem, $src\t# double" %}
5430 ins_encode %{
5431 __ movdbl($mem$$Address, $src$$XMMRegister);
5432 %}
5433 ins_pipe(pipe_slow); // XXX
5434 %}
5435
5436 // Store immediate double 0.0 (it is faster than store from XMM register)
5437 instruct storeD0_imm(memory mem, immD0 src)
5438 %{
5439 predicate(!UseCompressedOops || (CompressedOops::base() != NULL));
5440 match(Set mem (StoreD mem src));
5441
5442 ins_cost(50);
5443 format %{ "movq $mem, $src\t# double 0." %}
5444 ins_encode %{
5445 __ movq($mem$$Address, $src$$constant);
5446 %}
5447 ins_pipe(ialu_mem_imm);
5448 %}
5449
5450 instruct storeD0(memory mem, immD0 zero)
5451 %{
5452 predicate(UseCompressedOops && (CompressedOops::base() == NULL));
5453 match(Set mem (StoreD mem zero));
5454
5455 ins_cost(25); // XXX
5456 format %{ "movq $mem, R12\t# double 0. (R12_heapbase==0)" %}
5457 ins_encode %{
5458 __ movq($mem$$Address, r12);
5459 %}
5460 ins_pipe(ialu_mem_reg);
5461 %}
5462
5463 instruct storeSSI(stackSlotI dst, rRegI src)
5464 %{
5465 match(Set dst src);
5466
5467 ins_cost(100);
5468 format %{ "movl $dst, $src\t# int stk" %}
5469 ins_encode %{
5470 __ movl($dst$$Address, $src$$Register);
5471 %}
5472 ins_pipe( ialu_mem_reg );
5979 format %{ "MEMBAR-storestore (empty encoding)" %}
5980 ins_encode( );
5981 ins_pipe(empty);
5982 %}
5983
5984 //----------Move Instructions--------------------------------------------------
5985
5986 instruct castX2P(rRegP dst, rRegL src)
5987 %{
5988 match(Set dst (CastX2P src));
5989
5990 format %{ "movq $dst, $src\t# long->ptr" %}
5991 ins_encode %{
5992 if ($dst$$reg != $src$$reg) {
5993 __ movptr($dst$$Register, $src$$Register);
5994 }
5995 %}
5996 ins_pipe(ialu_reg_reg); // XXX
5997 %}
5998
5999 instruct castP2X(rRegL dst, rRegP src)
6000 %{
6001 match(Set dst (CastP2X src));
6002
6003 format %{ "movq $dst, $src\t# ptr -> long" %}
6004 ins_encode %{
6005 if ($dst$$reg != $src$$reg) {
6006 __ movptr($dst$$Register, $src$$Register);
6007 }
6008 %}
6009 ins_pipe(ialu_reg_reg); // XXX
6010 %}
6011
6012 // Convert oop into int for vectors alignment masking
6013 instruct convP2I(rRegI dst, rRegP src)
6014 %{
6015 match(Set dst (ConvL2I (CastP2X src)));
6016
6017 format %{ "movl $dst, $src\t# ptr -> int" %}
6018 ins_encode %{
10520 effect(DEF dst, USE src);
10521 ins_cost(100);
10522 format %{ "movd $dst,$src\t# MoveI2F" %}
10523 ins_encode %{
10524 __ movdl($dst$$XMMRegister, $src$$Register);
10525 %}
10526 ins_pipe( pipe_slow );
10527 %}
10528
10529 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
10530 match(Set dst (MoveL2D src));
10531 effect(DEF dst, USE src);
10532 ins_cost(100);
10533 format %{ "movd $dst,$src\t# MoveL2D" %}
10534 ins_encode %{
10535 __ movdq($dst$$XMMRegister, $src$$Register);
10536 %}
10537 ins_pipe( pipe_slow );
10538 %}
10539
10540 // Fast clearing of an array
10541 // Small ClearArray non-AVX512.
10542 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
10543 Universe dummy, rFlagsReg cr)
10544 %{
10545 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
10546 match(Set dummy (ClearArray cnt base));
10547 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
10548
10549 format %{ $$template
10550 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10551 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
10552 $$emit$$"jg LARGE\n\t"
10553 $$emit$$"dec rcx\n\t"
10554 $$emit$$"js DONE\t# Zero length\n\t"
10555 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
10556 $$emit$$"dec rcx\n\t"
10557 $$emit$$"jge LOOP\n\t"
10558 $$emit$$"jmp DONE\n\t"
10559 $$emit$$"# LARGE:\n\t"
10560 if (UseFastStosb) {
10561 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10562 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
10563 } else if (UseXMMForObjInit) {
10564 $$emit$$"mov rdi,rax\n\t"
10565 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
10566 $$emit$$"jmpq L_zero_64_bytes\n\t"
10567 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10575 $$emit$$"jl L_tail\n\t"
10576 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10577 $$emit$$"add 0x20,rax\n\t"
10578 $$emit$$"sub 0x4,rcx\n\t"
10579 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10580 $$emit$$"add 0x4,rcx\n\t"
10581 $$emit$$"jle L_end\n\t"
10582 $$emit$$"dec rcx\n\t"
10583 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10584 $$emit$$"vmovq xmm0,(rax)\n\t"
10585 $$emit$$"add 0x8,rax\n\t"
10586 $$emit$$"dec rcx\n\t"
10587 $$emit$$"jge L_sloop\n\t"
10588 $$emit$$"# L_end:\n\t"
10589 } else {
10590 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
10591 }
10592 $$emit$$"# DONE"
10593 %}
10594 ins_encode %{
10595 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
10596 $tmp$$XMMRegister, false, knoreg);
10597 %}
10598 ins_pipe(pipe_slow);
10599 %}
10600
10601 // Small ClearArray AVX512 non-constant length.
10602 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
10603 Universe dummy, rFlagsReg cr)
10604 %{
10605 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
10606 match(Set dummy (ClearArray cnt base));
10607 ins_cost(125);
10608 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
10609
10610 format %{ $$template
10611 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10612 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
10613 $$emit$$"jg LARGE\n\t"
10614 $$emit$$"dec rcx\n\t"
10615 $$emit$$"js DONE\t# Zero length\n\t"
10616 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
10617 $$emit$$"dec rcx\n\t"
10618 $$emit$$"jge LOOP\n\t"
10619 $$emit$$"jmp DONE\n\t"
10620 $$emit$$"# LARGE:\n\t"
10621 if (UseFastStosb) {
10622 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10623 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
10624 } else if (UseXMMForObjInit) {
10625 $$emit$$"mov rdi,rax\n\t"
10626 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
10627 $$emit$$"jmpq L_zero_64_bytes\n\t"
10628 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10636 $$emit$$"jl L_tail\n\t"
10637 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10638 $$emit$$"add 0x20,rax\n\t"
10639 $$emit$$"sub 0x4,rcx\n\t"
10640 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10641 $$emit$$"add 0x4,rcx\n\t"
10642 $$emit$$"jle L_end\n\t"
10643 $$emit$$"dec rcx\n\t"
10644 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10645 $$emit$$"vmovq xmm0,(rax)\n\t"
10646 $$emit$$"add 0x8,rax\n\t"
10647 $$emit$$"dec rcx\n\t"
10648 $$emit$$"jge L_sloop\n\t"
10649 $$emit$$"# L_end:\n\t"
10650 } else {
10651 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
10652 }
10653 $$emit$$"# DONE"
10654 %}
10655 ins_encode %{
10656 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
10657 $tmp$$XMMRegister, false, $ktmp$$KRegister);
10658 %}
10659 ins_pipe(pipe_slow);
10660 %}
10661
10662 // Large ClearArray non-AVX512.
10663 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
10664 Universe dummy, rFlagsReg cr)
10665 %{
10666 predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
10667 match(Set dummy (ClearArray cnt base));
10668 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
10669
10670 format %{ $$template
10671 if (UseFastStosb) {
10672 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10673 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10674 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
10675 } else if (UseXMMForObjInit) {
10676 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
10677 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
10678 $$emit$$"jmpq L_zero_64_bytes\n\t"
10679 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10680 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10681 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
10682 $$emit$$"add 0x40,rax\n\t"
10683 $$emit$$"# L_zero_64_bytes:\n\t"
10684 $$emit$$"sub 0x8,rcx\n\t"
10685 $$emit$$"jge L_loop\n\t"
10686 $$emit$$"add 0x4,rcx\n\t"
10687 $$emit$$"jl L_tail\n\t"
10688 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10689 $$emit$$"add 0x20,rax\n\t"
10690 $$emit$$"sub 0x4,rcx\n\t"
10691 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10692 $$emit$$"add 0x4,rcx\n\t"
10693 $$emit$$"jle L_end\n\t"
10694 $$emit$$"dec rcx\n\t"
10695 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10696 $$emit$$"vmovq xmm0,(rax)\n\t"
10697 $$emit$$"add 0x8,rax\n\t"
10698 $$emit$$"dec rcx\n\t"
10699 $$emit$$"jge L_sloop\n\t"
10700 $$emit$$"# L_end:\n\t"
10701 } else {
10702 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10703 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
10704 }
10705 %}
10706 ins_encode %{
10707 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
10708 $tmp$$XMMRegister, true, knoreg);
10709 %}
10710 ins_pipe(pipe_slow);
10711 %}
10712
10713 // Large ClearArray AVX512.
10714 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
10715 Universe dummy, rFlagsReg cr)
10716 %{
10717 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
10718 match(Set dummy (ClearArray cnt base));
10719 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
10720
10721 format %{ $$template
10722 if (UseFastStosb) {
10723 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10724 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10725 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
10726 } else if (UseXMMForObjInit) {
10727 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
10728 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
10729 $$emit$$"jmpq L_zero_64_bytes\n\t"
10730 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10731 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10732 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
10733 $$emit$$"add 0x40,rax\n\t"
10734 $$emit$$"# L_zero_64_bytes:\n\t"
10735 $$emit$$"sub 0x8,rcx\n\t"
10736 $$emit$$"jge L_loop\n\t"
10737 $$emit$$"add 0x4,rcx\n\t"
10738 $$emit$$"jl L_tail\n\t"
10739 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10740 $$emit$$"add 0x20,rax\n\t"
10741 $$emit$$"sub 0x4,rcx\n\t"
10742 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10743 $$emit$$"add 0x4,rcx\n\t"
10744 $$emit$$"jle L_end\n\t"
10745 $$emit$$"dec rcx\n\t"
10746 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10747 $$emit$$"vmovq xmm0,(rax)\n\t"
10748 $$emit$$"add 0x8,rax\n\t"
10749 $$emit$$"dec rcx\n\t"
10750 $$emit$$"jge L_sloop\n\t"
10751 $$emit$$"# L_end:\n\t"
10752 } else {
10753 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10754 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
10755 }
10756 %}
10757 ins_encode %{
10758 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
10759 $tmp$$XMMRegister, true, $ktmp$$KRegister);
10760 %}
10761 ins_pipe(pipe_slow);
10762 %}
10763
10764 // Small ClearArray AVX512 constant length.
10765 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
10766 %{
10767 predicate(!((ClearArrayNode*)n)->is_large() &&
10768 ((UseAVX > 2) && VM_Version::supports_avx512vlbw()));
10769 match(Set dummy (ClearArray cnt base));
10770 ins_cost(100);
10771 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
10772 format %{ "clear_mem_imm $base , $cnt \n\t" %}
10773 ins_encode %{
10774 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
10775 %}
10776 ins_pipe(pipe_slow);
10777 %}
10778
10779 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
10780 rax_RegI result, legRegD tmp1, rFlagsReg cr)
10781 %{
10782 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
10783 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
10784 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
10785
10786 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
10787 ins_encode %{
10788 __ string_compare($str1$$Register, $str2$$Register,
10789 $cnt1$$Register, $cnt2$$Register, $result$$Register,
10790 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
10791 %}
10792 ins_pipe( pipe_slow );
10793 %}
10794
11663 ins_pipe(ialu_cr_reg_mem);
11664 %}
11665
11666 // This will generate a signed flags result. This should be OK since
11667 // any compare to a zero should be eq/neq.
11668 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
11669 %{
11670 match(Set cr (CmpP src zero));
11671
11672 format %{ "testq $src, $src\t# ptr" %}
11673 ins_encode %{
11674 __ testq($src$$Register, $src$$Register);
11675 %}
11676 ins_pipe(ialu_cr_reg_imm);
11677 %}
11678
11679 // This will generate a signed flags result. This should be OK since
11680 // any compare to a zero should be eq/neq.
11681 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
11682 %{
11683 predicate((!UseCompressedOops || (CompressedOops::base() != NULL)) &&
11684 n->in(1)->as_Load()->barrier_data() == 0);
11685 match(Set cr (CmpP (LoadP op) zero));
11686
11687 ins_cost(500); // XXX
11688 format %{ "testq $op, 0xffffffffffffffff\t# ptr" %}
11689 ins_encode %{
11690 __ testq($op$$Address, 0xFFFFFFFF);
11691 %}
11692 ins_pipe(ialu_cr_reg_imm);
11693 %}
11694
11695 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
11696 %{
11697 predicate(UseCompressedOops && (CompressedOops::base() == NULL) &&
11698 n->in(1)->as_Load()->barrier_data() == 0);
11699 match(Set cr (CmpP (LoadP mem) zero));
11700
11701 format %{ "cmpq R12, $mem\t# ptr (R12_heapbase==0)" %}
11702 ins_encode %{
11703 __ cmpq(r12, $mem$$Address);
11704 %}
11705 ins_pipe(ialu_cr_reg_mem);
11706 %}
11707
11708 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
11709 %{
11710 match(Set cr (CmpN op1 op2));
11711
11712 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
11713 ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
11714 ins_pipe(ialu_cr_reg_reg);
11715 %}
11716
11717 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
11760 %{
11761 match(Set cr (CmpN src (LoadNKlass mem)));
11762
11763 format %{ "cmpl $mem, $src\t# compressed klass ptr" %}
11764 ins_encode %{
11765 __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
11766 %}
11767 ins_pipe(ialu_cr_reg_mem);
11768 %}
11769
11770 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
11771 match(Set cr (CmpN src zero));
11772
11773 format %{ "testl $src, $src\t# compressed ptr" %}
11774 ins_encode %{ __ testl($src$$Register, $src$$Register); %}
11775 ins_pipe(ialu_cr_reg_imm);
11776 %}
11777
11778 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
11779 %{
11780 predicate(CompressedOops::base() != NULL);
11781 match(Set cr (CmpN (LoadN mem) zero));
11782
11783 ins_cost(500); // XXX
11784 format %{ "testl $mem, 0xffffffff\t# compressed ptr" %}
11785 ins_encode %{
11786 __ cmpl($mem$$Address, (int)0xFFFFFFFF);
11787 %}
11788 ins_pipe(ialu_cr_reg_mem);
11789 %}
11790
11791 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
11792 %{
11793 predicate(CompressedOops::base() == NULL);
11794 match(Set cr (CmpN (LoadN mem) zero));
11795
11796 format %{ "cmpl R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
11797 ins_encode %{
11798 __ cmpl(r12, $mem$$Address);
11799 %}
11800 ins_pipe(ialu_cr_reg_mem);
11801 %}
11802
11803 // Yanked all unsigned pointer compare operations.
11804 // Pointer compares are done with CmpP which is already unsigned.
11805
11806 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
11807 %{
11808 match(Set cr (CmpL op1 op2));
11809
11810 format %{ "cmpq $op1, $op2" %}
11811 ins_encode %{
11812 __ cmpq($op1$$Register, $op2$$Register);
11813 %}
12524
12525 ins_cost(300);
12526 format %{ "call_leaf,runtime " %}
12527 ins_encode(clear_avx, Java_To_Runtime(meth));
12528 ins_pipe(pipe_slow);
12529 %}
12530
12531 // Call runtime without safepoint and with vector arguments
12532 instruct CallLeafDirectVector(method meth)
12533 %{
12534 match(CallLeafVector);
12535 effect(USE meth);
12536
12537 ins_cost(300);
12538 format %{ "call_leaf,vector " %}
12539 ins_encode(Java_To_Runtime(meth));
12540 ins_pipe(pipe_slow);
12541 %}
12542
12543 // Call runtime without safepoint
12544 instruct CallLeafNoFPDirect(method meth)
12545 %{
12546 match(CallLeafNoFP);
12547 effect(USE meth);
12548
12549 ins_cost(300);
12550 format %{ "call_leaf_nofp,runtime " %}
12551 ins_encode(clear_avx, Java_To_Runtime(meth));
12552 ins_pipe(pipe_slow);
12553 %}
12554
12555 // Return Instruction
12556 // Remove the return address & jump to it.
12557 // Notice: We always emit a nop after a ret to make sure there is room
12558 // for safepoint patching
12559 instruct Ret()
12560 %{
12561 match(Return);
12562
12563 format %{ "ret" %}
12564 ins_encode %{
12565 __ ret(0);
|
473 }
474
475 // !!!!! Special hack to get all types of calls to specify the byte offset
476 // from the start of the call to the point where the return address
477 // will point.
478 int MachCallStaticJavaNode::ret_addr_offset()
479 {
480 int offset = 5; // 5 bytes from start of call to where return address points
481 offset += clear_avx_size();
482 return offset;
483 }
484
485 int MachCallDynamicJavaNode::ret_addr_offset()
486 {
487 int offset = 15; // 15 bytes from start of call to where return address points
488 offset += clear_avx_size();
489 return offset;
490 }
491
492 int MachCallRuntimeNode::ret_addr_offset() {
493 if (_entry_point == nullptr) {
494 // CallLeafNoFPInDirect
495 return 3; // callq (register)
496 }
497 int offset = 13; // movq r10,#addr; callq (r10)
498 if (this->ideal_Opcode() != Op_CallLeafVector) {
499 offset += clear_avx_size();
500 }
501 return offset;
502 }
503
504 //
505 // Compute padding required for nodes which need alignment
506 //
507
508 // The address of the call instruction needs to be 4-byte aligned to
509 // ensure that it does not span a cache line so that it can be patched.
510 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
511 {
512 current_offset += clear_avx_size(); // skip vzeroupper
513 current_offset += 1; // skip call opcode byte
514 return align_up(current_offset, alignment_required()) - current_offset;
515 }
516
517 // The address of the call instruction needs to be 4-byte aligned to
518 // ensure that it does not span a cache line so that it can be patched.
519 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
520 {
521 current_offset += clear_avx_size(); // skip vzeroupper
522 current_offset += 11; // skip movq instruction + call opcode byte
523 return align_up(current_offset, alignment_required()) - current_offset;
694 st->print("movq [rsp + #%d], rbp\t# Save rbp",framesize);
695 if (PreserveFramePointer) {
696 st->print("\n\t");
697 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
698 if (framesize > 0) {
699 st->print("\n\t");
700 st->print("addq rbp, #%d", framesize);
701 }
702 }
703 }
704
705 if (VerifyStackAtCalls) {
706 st->print("\n\t");
707 framesize -= wordSize;
708 st->print("movq [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
709 #ifdef ASSERT
710 st->print("\n\t");
711 st->print("# stack alignment check");
712 #endif
713 }
714 if (C->stub_function() != nullptr && BarrierSet::barrier_set()->barrier_set_nmethod() != nullptr) {
715 st->print("\n\t");
716 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
717 st->print("\n\t");
718 st->print("je fast_entry\t");
719 st->print("\n\t");
720 st->print("call #nmethod_entry_barrier_stub\t");
721 st->print("\n\tfast_entry:");
722 }
723 st->cr();
724 }
725 #endif
726
727 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
728 Compile* C = ra_->C;
729 C2_MacroAssembler _masm(&cbuf);
730
731 __ verified_entry(C);
732
733 if (ra_->C->stub_function() == nullptr) {
734 __ entry_barrier();
735 }
736
737 if (!Compile::current()->output()->in_scratch_emit_size()) {
738 __ bind(*_verified_entry);
739 }
740
741 C->output()->set_frame_complete(cbuf.insts_size());
742
743 if (C->has_mach_constant_base_node()) {
744 // NOTE: We set the table base offset here because users might be
745 // emitted before MachConstantBaseNode.
746 ConstantTable& constant_table = C->output()->constant_table();
747 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
748 }
749 }
750
751 int MachPrologNode::reloc() const
752 {
753 return 0; // a large enough number
754 }
755
756 //=============================================================================
757 #ifndef PRODUCT
758 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
759 {
760 Compile* C = ra_->C;
761 if (generate_vzeroupper(C)) {
762 st->print("vzeroupper");
763 st->cr(); st->print("\t");
764 }
765
766 int framesize = C->output()->frame_size_in_bytes();
767 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
768 // Remove word for return adr already pushed
769 // and RBP
770 framesize -= 2*wordSize;
778 if (do_polling() && C->is_method_compilation()) {
779 st->print("\t");
780 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
781 "ja #safepoint_stub\t"
782 "# Safepoint: poll for GC");
783 }
784 }
785 #endif
786
787 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
788 {
789 Compile* C = ra_->C;
790 MacroAssembler _masm(&cbuf);
791
792 if (generate_vzeroupper(C)) {
793 // Clear upper bits of YMM registers when current compiled code uses
794 // wide vectors to avoid AVX <-> SSE transition penalty during call.
795 __ vzeroupper();
796 }
797
798 // Subtract two words to account for return address and rbp
799 int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
800 __ remove_frame(initial_framesize, C->needs_stack_repair());
801
802 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
803 __ reserved_stack_check();
804 }
805
806 if (do_polling() && C->is_method_compilation()) {
807 MacroAssembler _masm(&cbuf);
808 Label dummy_label;
809 Label* code_stub = &dummy_label;
810 if (!C->output()->in_scratch_emit_size()) {
811 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
812 C->output()->add_stub(stub);
813 code_stub = &stub->entry();
814 }
815 __ relocate(relocInfo::poll_return_type);
816 __ safepoint_poll(*code_stub, r15_thread, true /* at_return */, true /* in_nmethod */);
817 }
818 }
819
820 int MachEpilogNode::reloc() const
821 {
822 return 2; // a large enough number
823 }
824
825 const Pipeline* MachEpilogNode::pipeline() const
826 {
827 return MachNode::pipeline_class();
828 }
829
830 //=============================================================================
831
832 enum RC {
833 rc_bad,
834 rc_int,
835 rc_kreg,
836 rc_float,
837 rc_stack
838 };
839
926 src_offset, dst_offset);
927 break;
928 case Op_VecZ:
929 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
930 "vmovdqu xmm0, [rsp + #%d]\n\t"
931 "vmovdqu [rsp + #%d], xmm0\n\t"
932 "vmovdqu xmm0, [rsp - #64]",
933 src_offset, dst_offset);
934 break;
935 default:
936 ShouldNotReachHere();
937 }
938 #endif
939 }
940 }
941
942 uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
943 PhaseRegAlloc* ra_,
944 bool do_size,
945 outputStream* st) const {
946 assert(cbuf != nullptr || st != nullptr, "sanity");
947 // Get registers to move
948 OptoReg::Name src_second = ra_->get_reg_second(in(1));
949 OptoReg::Name src_first = ra_->get_reg_first(in(1));
950 OptoReg::Name dst_second = ra_->get_reg_second(this);
951 OptoReg::Name dst_first = ra_->get_reg_first(this);
952
953 enum RC src_second_rc = rc_class(src_second);
954 enum RC src_first_rc = rc_class(src_first);
955 enum RC dst_second_rc = rc_class(dst_second);
956 enum RC dst_first_rc = rc_class(dst_first);
957
958 assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
959 "must move at least 1 register" );
960
961 if (src_first == dst_first && src_second == dst_second) {
962 // Self copy, no move
963 return 0;
964 }
965 if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
966 uint ireg = ideal_reg();
967 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
968 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
969 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
970 // mem -> mem
971 int src_offset = ra_->reg2offset(src_first);
972 int dst_offset = ra_->reg2offset(dst_first);
973 vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
974 } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
975 vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st);
976 } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
977 int stack_offset = ra_->reg2offset(dst_first);
978 vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st);
979 } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
980 int stack_offset = ra_->reg2offset(src_first);
981 vec_spill_helper(cbuf, true, stack_offset, dst_first, ireg, st);
982 } else {
983 ShouldNotReachHere();
984 }
985 return 0;
1384 st->print("kmovq %s, %s\t# spill",
1385 Matcher::regName[dst_first],
1386 Matcher::regName[src_first]);
1387 #endif
1388 }
1389 }
1390 return 0;
1391 } else if (dst_first_rc == rc_float) {
1392 assert(false, "Illegal spill");
1393 return 0;
1394 }
1395 }
1396
1397 assert(0," foo ");
1398 Unimplemented();
1399 return 0;
1400 }
1401
1402 #ifndef PRODUCT
1403 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1404 implementation(nullptr, ra_, false, st);
1405 }
1406 #endif
1407
1408 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1409 implementation(&cbuf, ra_, false, nullptr);
1410 }
1411
1412 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1413 return MachNode::size(ra_);
1414 }
1415
1416 //=============================================================================
1417 #ifndef PRODUCT
1418 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1419 {
1420 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1421 int reg = ra_->get_reg_first(this);
1422 st->print("leaq %s, [rsp + #%d]\t# box lock",
1423 Matcher::regName[reg], offset);
1424 }
1425 #endif
1426
1427 void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1428 {
1429 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1430 int reg = ra_->get_encode(this);
1431
1432 MacroAssembler masm(&cbuf);
1433 masm.lea(as_Register(reg), Address(rsp, offset));
1434 }
1435
1436 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1437 {
1438 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1439 return (offset < 0x80) ? 5 : 8; // REX
1440 }
1441
1442 //=============================================================================
1443 #ifndef PRODUCT
1444 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1445 {
1446 st->print_cr("MachVEPNode");
1447 }
1448 #endif
1449
1450 void MachVEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1451 {
1452 C2_MacroAssembler _masm(&cbuf);
1453 uint insts_size = cbuf.insts_size();
1454 if (!_verified) {
1455 if (UseCompressedClassPointers) {
1456 __ load_klass(rscratch1, j_rarg0, rscratch2);
1457 __ cmpptr(rax, rscratch1);
1458 } else {
1459 __ cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
1460 }
1461 __ jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1462 } else {
1463 // TODO 8284443 Avoid creation of temporary frame
1464 if (ra_->C->stub_function() == nullptr) {
1465 __ verified_entry(ra_->C, 0);
1466 __ entry_barrier();
1467 int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
1468 __ remove_frame(initial_framesize, false);
1469 }
1470 // Unpack inline type args passed as oop and then jump to
1471 // the verified entry point (skipping the unverified entry).
1472 int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
1473 // Emit code for verified entry and save increment for stack repair on return
1474 __ verified_entry(ra_->C, sp_inc);
1475 if (Compile::current()->output()->in_scratch_emit_size()) {
1476 Label dummy_verified_entry;
1477 __ jmp(dummy_verified_entry);
1478 } else {
1479 __ jmp(*_verified_entry);
1480 }
1481 }
1482 /* WARNING these NOPs are critical so that verified entry point is properly
1483 4 bytes aligned for patching by NativeJump::patch_verified_entry() */
1484 int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
1485 nops_cnt &= 0x3; // Do not add nops if code is aligned.
1486 if (nops_cnt > 0) {
1487 __ nop(nops_cnt);
1488 }
1489 }
1490
1491 //=============================================================================
1492 #ifndef PRODUCT
1493 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1494 {
1495 if (UseCompressedClassPointers) {
1496 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1497 st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
1498 st->print_cr("\tcmpq rax, rscratch1\t # Inline cache check");
1499 } else {
1500 st->print_cr("\tcmpq rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
1501 "# Inline cache check");
1502 }
1503 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
1504 st->print_cr("\tnop\t# nops to align entry point");
1505 }
1506 #endif
1507
1508 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1509 {
1510 MacroAssembler masm(&cbuf);
1513 masm.load_klass(rscratch1, j_rarg0, rscratch2);
1514 masm.cmpptr(rax, rscratch1);
1515 } else {
1516 masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
1517 }
1518
1519 masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1520
1521 /* WARNING these NOPs are critical so that verified entry point is properly
1522 4 bytes aligned for patching by NativeJump::patch_verified_entry() */
1523 int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
1524 if (OptoBreakpoint) {
1525 // Leave space for int3
1526 nops_cnt -= 1;
1527 }
1528 nops_cnt &= 0x3; // Do not add nops if code is aligned.
1529 if (nops_cnt > 0)
1530 masm.nop(nops_cnt);
1531 }
1532
1533 //=============================================================================
1534
1535 bool Matcher::supports_vector_calling_convention(void) {
1536 if (EnableVectorSupport && UseVectorStubs) {
1537 return true;
1538 }
1539 return false;
1540 }
1541
1542 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
1543 assert(EnableVectorSupport && UseVectorStubs, "sanity");
1544 int lo = XMM0_num;
1545 int hi = XMM0b_num;
1546 if (ideal_reg == Op_VecX) hi = XMM0d_num;
1547 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
1548 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
1549 return OptoRegPair(hi, lo);
1550 }
1551
1552 // Is this branch offset short enough that a short branch can be used?
1782 __ bind(normal);
1783 __ cdqq();
1784
1785 // idivq (note: must be emitted by the user of this rule)
1786 // <done>
1787 __ idivq($div$$Register);
1788 __ bind(done);
1789 %}
1790
1791 enc_class enc_PartialSubtypeCheck()
1792 %{
1793 Register Rrdi = as_Register(RDI_enc); // result register
1794 Register Rrax = as_Register(RAX_enc); // super class
1795 Register Rrcx = as_Register(RCX_enc); // killed
1796 Register Rrsi = as_Register(RSI_enc); // sub class
1797 Label miss;
1798 const bool set_cond_codes = true;
1799
1800 MacroAssembler _masm(&cbuf);
1801 __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
1802 nullptr, &miss,
1803 /*set_cond_codes:*/ true);
1804 if ($primary) {
1805 __ xorptr(Rrdi, Rrdi);
1806 }
1807 __ bind(miss);
1808 %}
1809
1810 enc_class clear_avx %{
1811 debug_only(int off0 = cbuf.insts_size());
1812 if (generate_vzeroupper(Compile::current())) {
1813 // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
1814 // Clear upper bits of YMM registers when current compiled code uses
1815 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1816 MacroAssembler _masm(&cbuf);
1817 __ vzeroupper();
1818 }
1819 debug_only(int off1 = cbuf.insts_size());
1820 assert(off1 - off0 == clear_avx_size(), "correct size prediction");
1821 %}
1822
1839 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
1840 } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
1841 // The NOP here is purely to ensure that eliding a call to
1842 // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
1843 __ addr_nop_5();
1844 __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
1845 } else {
1846 int method_index = resolved_method_index(cbuf);
1847 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
1848 : static_call_Relocation::spec(method_index);
1849 address mark = __ pc();
1850 int call_offset = __ offset();
1851 __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
1852 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
1853 // Calls of the same statically bound method can share
1854 // a stub to the interpreter.
1855 cbuf.shared_stub_to_interp_for(_method, call_offset);
1856 } else {
1857 // Emit stubs for static call.
1858 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark);
1859 if (stub == nullptr) {
1860 ciEnv::current()->record_failure("CodeCache is full");
1861 return;
1862 }
1863 }
1864 }
1865 __ post_call_nop();
1866 %}
1867
1868 enc_class Java_Dynamic_Call(method meth) %{
1869 MacroAssembler _masm(&cbuf);
1870 __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
1871 __ post_call_nop();
1872 %}
1873
1874 %}
1875
1876
1877
1878 //----------FRAME--------------------------------------------------------------
1879 // Definition of frame structure and management information.
2177 operand immI_64()
2178 %{
2179 predicate( n->get_int() == 64 );
2180 match(ConI);
2181
2182 op_cost(0);
2183 format %{ %}
2184 interface(CONST_INTER);
2185 %}
2186
2187 // Pointer Immediate
2188 operand immP()
2189 %{
2190 match(ConP);
2191
2192 op_cost(10);
2193 format %{ %}
2194 interface(CONST_INTER);
2195 %}
2196
2197 // nullptr Pointer Immediate
2198 operand immP0()
2199 %{
2200 predicate(n->get_ptr() == 0);
2201 match(ConP);
2202
2203 op_cost(5);
2204 format %{ %}
2205 interface(CONST_INTER);
2206 %}
2207
2208 // Pointer Immediate
2209 operand immN() %{
2210 match(ConN);
2211
2212 op_cost(10);
2213 format %{ %}
2214 interface(CONST_INTER);
2215 %}
2216
2217 operand immNKlass() %{
2218 match(ConNKlass);
2219
2220 op_cost(10);
2221 format %{ %}
2222 interface(CONST_INTER);
2223 %}
2224
2225 // nullptr Pointer Immediate
2226 operand immN0() %{
2227 predicate(n->get_narrowcon() == 0);
2228 match(ConN);
2229
2230 op_cost(5);
2231 format %{ %}
2232 interface(CONST_INTER);
2233 %}
2234
2235 operand immP31()
2236 %{
2237 predicate(n->as_Type()->type()->reloc() == relocInfo::none
2238 && (n->get_ptr() >> 31) == 0);
2239 match(ConP);
2240
2241 op_cost(5);
2242 format %{ %}
2243 interface(CONST_INTER);
2244 %}
2245
3117 %}
3118 %}
3119
3120 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
3121 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
3122 %{
3123 constraint(ALLOC_IN_RC(ptr_reg));
3124 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
3125 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
3126
3127 op_cost(10);
3128 format %{"[$reg + $off + $idx << $scale]" %}
3129 interface(MEMORY_INTER) %{
3130 base($reg);
3131 index($idx);
3132 scale($scale);
3133 disp($off);
3134 %}
3135 %}
3136
3137 // Indirect Narrow Oop Operand
3138 operand indCompressedOop(rRegN reg) %{
3139 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
3140 constraint(ALLOC_IN_RC(ptr_reg));
3141 match(DecodeN reg);
3142
3143 op_cost(10);
3144 format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
3145 interface(MEMORY_INTER) %{
3146 base(0xc); // R12
3147 index($reg);
3148 scale(0x3);
3149 disp(0x0);
3150 %}
3151 %}
3152
3153 // Indirect Narrow Oop Plus Offset Operand
3154 // Note: x86 architecture doesn't support "scale * index + offset" without a base
3155 // we can't free r12 even with CompressedOops::base() == nullptr.
3156 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
3157 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
3158 constraint(ALLOC_IN_RC(ptr_reg));
3159 match(AddP (DecodeN reg) off);
3160
3161 op_cost(10);
3162 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
3163 interface(MEMORY_INTER) %{
3164 base(0xc); // R12
3165 index($reg);
3166 scale(0x3);
3167 disp($off);
3168 %}
3169 %}
3170
3171 // Indirect Memory Operand
3172 operand indirectNarrow(rRegN reg)
3173 %{
3174 predicate(CompressedOops::shift() == 0);
3175 constraint(ALLOC_IN_RC(ptr_reg));
3479 equal(0x4, "e");
3480 not_equal(0x5, "ne");
3481 less(0x2, "b");
3482 greater_equal(0x3, "ae");
3483 less_equal(0x6, "be");
3484 greater(0x7, "a");
3485 overflow(0x0, "o");
3486 no_overflow(0x1, "no");
3487 %}
3488 %}
3489
3490 //----------OPERAND CLASSES----------------------------------------------------
3491 // Operand Classes are groups of operands that are used as to simplify
3492 // instruction definitions by not requiring the AD writer to specify separate
3493 // instructions for every form of operand when the instruction accepts
3494 // multiple operand types with the same basic encoding and format. The classic
3495 // case of this is memory operands.
3496
3497 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
3498 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
3499 indCompressedOop, indCompressedOopOffset,
3500 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
3501 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
3502 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
3503
3504 //----------PIPELINE-----------------------------------------------------------
3505 // Rules which define the behavior of the target architectures pipeline.
3506 pipeline %{
3507
3508 //----------ATTRIBUTES---------------------------------------------------------
3509 attributes %{
3510 variable_size_instructions; // Fixed size instructions
3511 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
3512 instruction_unit_size = 1; // An instruction is 1 bytes long
3513 instruction_fetch_unit_size = 16; // The processor fetches one line
3514 instruction_fetch_units = 1; // of 16 bytes
3515
3516 // List of nop instructions
3517 nops( MachNop );
3518 %}
3519
4916 format %{ "movl $dst, $src\t# ptr (positive 32-bit)" %}
4917 ins_encode %{
4918 __ movl($dst$$Register, $src$$constant);
4919 %}
4920 ins_pipe(ialu_reg);
4921 %}
4922
4923 instruct loadConF(regF dst, immF con) %{
4924 match(Set dst con);
4925 ins_cost(125);
4926 format %{ "movss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
4927 ins_encode %{
4928 __ movflt($dst$$XMMRegister, $constantaddress($con));
4929 %}
4930 ins_pipe(pipe_slow);
4931 %}
4932
4933 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
4934 match(Set dst src);
4935 effect(KILL cr);
4936 format %{ "xorq $dst, $src\t# compressed nullptr ptr" %}
4937 ins_encode %{
4938 __ xorq($dst$$Register, $dst$$Register);
4939 %}
4940 ins_pipe(ialu_reg);
4941 %}
4942
4943 instruct loadConN(rRegN dst, immN src) %{
4944 match(Set dst src);
4945
4946 ins_cost(125);
4947 format %{ "movl $dst, $src\t# compressed ptr" %}
4948 ins_encode %{
4949 address con = (address)$src$$constant;
4950 if (con == nullptr) {
4951 ShouldNotReachHere();
4952 } else {
4953 __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
4954 }
4955 %}
4956 ins_pipe(ialu_reg_fat); // XXX
4957 %}
4958
4959 instruct loadConNKlass(rRegN dst, immNKlass src) %{
4960 match(Set dst src);
4961
4962 ins_cost(125);
4963 format %{ "movl $dst, $src\t# compressed klass ptr" %}
4964 ins_encode %{
4965 address con = (address)$src$$constant;
4966 if (con == nullptr) {
4967 ShouldNotReachHere();
4968 } else {
4969 __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
4970 }
4971 %}
4972 ins_pipe(ialu_reg_fat); // XXX
4973 %}
4974
4975 instruct loadConF0(regF dst, immF0 src)
4976 %{
4977 match(Set dst src);
4978 ins_cost(100);
4979
4980 format %{ "xorps $dst, $dst\t# float 0.0" %}
4981 ins_encode %{
4982 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
4983 %}
4984 ins_pipe(pipe_slow);
4985 %}
4986
5172 %}
5173 ins_pipe(ialu_mem_reg); // XXX
5174 %}
5175
5176 // Store Pointer
5177 instruct storeP(memory mem, any_RegP src)
5178 %{
5179 predicate(n->as_Store()->barrier_data() == 0);
5180 match(Set mem (StoreP mem src));
5181
5182 ins_cost(125); // XXX
5183 format %{ "movq $mem, $src\t# ptr" %}
5184 ins_encode %{
5185 __ movq($mem$$Address, $src$$Register);
5186 %}
5187 ins_pipe(ialu_mem_reg);
5188 %}
5189
5190 instruct storeImmP0(memory mem, immP0 zero)
5191 %{
5192 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
5193 match(Set mem (StoreP mem zero));
5194
5195 ins_cost(125); // XXX
5196 format %{ "movq $mem, R12\t# ptr (R12_heapbase==0)" %}
5197 ins_encode %{
5198 __ movq($mem$$Address, r12);
5199 %}
5200 ins_pipe(ialu_mem_reg);
5201 %}
5202
5203 // Store nullptr Pointer, mark word, or other simple pointer constant.
5204 instruct storeImmP(memory mem, immP31 src)
5205 %{
5206 predicate(n->as_Store()->barrier_data() == 0);
5207 match(Set mem (StoreP mem src));
5208
5209 ins_cost(150); // XXX
5210 format %{ "movq $mem, $src\t# ptr" %}
5211 ins_encode %{
5212 __ movq($mem$$Address, $src$$constant);
5213 %}
5214 ins_pipe(ialu_mem_imm);
5215 %}
5216
5217 // Store Compressed Pointer
5218 instruct storeN(memory mem, rRegN src)
5219 %{
5220 match(Set mem (StoreN mem src));
5221
5222 ins_cost(125); // XXX
5223 format %{ "movl $mem, $src\t# compressed ptr" %}
5224 ins_encode %{
5225 __ movl($mem$$Address, $src$$Register);
5226 %}
5227 ins_pipe(ialu_mem_reg);
5228 %}
5229
5230 instruct storeNKlass(memory mem, rRegN src)
5231 %{
5232 match(Set mem (StoreNKlass mem src));
5233
5234 ins_cost(125); // XXX
5235 format %{ "movl $mem, $src\t# compressed klass ptr" %}
5236 ins_encode %{
5237 __ movl($mem$$Address, $src$$Register);
5238 %}
5239 ins_pipe(ialu_mem_reg);
5240 %}
5241
5242 instruct storeImmN0(memory mem, immN0 zero)
5243 %{
5244 predicate(CompressedOops::base() == nullptr);
5245 match(Set mem (StoreN mem zero));
5246
5247 ins_cost(125); // XXX
5248 format %{ "movl $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
5249 ins_encode %{
5250 __ movl($mem$$Address, r12);
5251 %}
5252 ins_pipe(ialu_mem_reg);
5253 %}
5254
5255 instruct storeImmN(memory mem, immN src)
5256 %{
5257 match(Set mem (StoreN mem src));
5258
5259 ins_cost(150); // XXX
5260 format %{ "movl $mem, $src\t# compressed ptr" %}
5261 ins_encode %{
5262 address con = (address)$src$$constant;
5263 if (con == nullptr) {
5264 __ movl($mem$$Address, 0);
5265 } else {
5266 __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
5267 }
5268 %}
5269 ins_pipe(ialu_mem_imm);
5270 %}
5271
5272 instruct storeImmNKlass(memory mem, immNKlass src)
5273 %{
5274 match(Set mem (StoreNKlass mem src));
5275
5276 ins_cost(150); // XXX
5277 format %{ "movl $mem, $src\t# compressed klass ptr" %}
5278 ins_encode %{
5279 __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
5280 %}
5281 ins_pipe(ialu_mem_imm);
5282 %}
5283
5284 // Store Integer Immediate
5285 instruct storeImmI0(memory mem, immI_0 zero)
5286 %{
5287 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
5288 match(Set mem (StoreI mem zero));
5289
5290 ins_cost(125); // XXX
5291 format %{ "movl $mem, R12\t# int (R12_heapbase==0)" %}
5292 ins_encode %{
5293 __ movl($mem$$Address, r12);
5294 %}
5295 ins_pipe(ialu_mem_reg);
5296 %}
5297
5298 instruct storeImmI(memory mem, immI src)
5299 %{
5300 match(Set mem (StoreI mem src));
5301
5302 ins_cost(150);
5303 format %{ "movl $mem, $src\t# int" %}
5304 ins_encode %{
5305 __ movl($mem$$Address, $src$$constant);
5306 %}
5307 ins_pipe(ialu_mem_imm);
5308 %}
5309
5310 // Store Long Immediate
5311 instruct storeImmL0(memory mem, immL0 zero)
5312 %{
5313 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
5314 match(Set mem (StoreL mem zero));
5315
5316 ins_cost(125); // XXX
5317 format %{ "movq $mem, R12\t# long (R12_heapbase==0)" %}
5318 ins_encode %{
5319 __ movq($mem$$Address, r12);
5320 %}
5321 ins_pipe(ialu_mem_reg);
5322 %}
5323
5324 instruct storeImmL(memory mem, immL32 src)
5325 %{
5326 match(Set mem (StoreL mem src));
5327
5328 ins_cost(150);
5329 format %{ "movq $mem, $src\t# long" %}
5330 ins_encode %{
5331 __ movq($mem$$Address, $src$$constant);
5332 %}
5333 ins_pipe(ialu_mem_imm);
5334 %}
5335
5336 // Store Short/Char Immediate
5337 instruct storeImmC0(memory mem, immI_0 zero)
5338 %{
5339 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
5340 match(Set mem (StoreC mem zero));
5341
5342 ins_cost(125); // XXX
5343 format %{ "movw $mem, R12\t# short/char (R12_heapbase==0)" %}
5344 ins_encode %{
5345 __ movw($mem$$Address, r12);
5346 %}
5347 ins_pipe(ialu_mem_reg);
5348 %}
5349
5350 instruct storeImmI16(memory mem, immI16 src)
5351 %{
5352 predicate(UseStoreImmI16);
5353 match(Set mem (StoreC mem src));
5354
5355 ins_cost(150);
5356 format %{ "movw $mem, $src\t# short/char" %}
5357 ins_encode %{
5358 __ movw($mem$$Address, $src$$constant);
5359 %}
5360 ins_pipe(ialu_mem_imm);
5361 %}
5362
5363 // Store Byte Immediate
5364 instruct storeImmB0(memory mem, immI_0 zero)
5365 %{
5366 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
5367 match(Set mem (StoreB mem zero));
5368
5369 ins_cost(125); // XXX
5370 format %{ "movb $mem, R12\t# short/char (R12_heapbase==0)" %}
5371 ins_encode %{
5372 __ movb($mem$$Address, r12);
5373 %}
5374 ins_pipe(ialu_mem_reg);
5375 %}
5376
5377 instruct storeImmB(memory mem, immI8 src)
5378 %{
5379 match(Set mem (StoreB mem src));
5380
5381 ins_cost(150); // XXX
5382 format %{ "movb $mem, $src\t# byte" %}
5383 ins_encode %{
5384 __ movb($mem$$Address, $src$$constant);
5385 %}
5386 ins_pipe(ialu_mem_imm);
5387 %}
5388
5389 // Store CMS card-mark Immediate
5390 instruct storeImmCM0_reg(memory mem, immI_0 zero)
5391 %{
5392 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
5393 match(Set mem (StoreCM mem zero));
5394
5395 ins_cost(125); // XXX
5396 format %{ "movb $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
5397 ins_encode %{
5398 __ movb($mem$$Address, r12);
5399 %}
5400 ins_pipe(ialu_mem_reg);
5401 %}
5402
5403 instruct storeImmCM0(memory mem, immI_0 src)
5404 %{
5405 match(Set mem (StoreCM mem src));
5406
5407 ins_cost(150); // XXX
5408 format %{ "movb $mem, $src\t# CMS card-mark byte 0" %}
5409 ins_encode %{
5410 __ movb($mem$$Address, $src$$constant);
5411 %}
5412 ins_pipe(ialu_mem_imm);
5413 %}
5414
5415 // Store Float
5416 instruct storeF(memory mem, regF src)
5417 %{
5418 match(Set mem (StoreF mem src));
5419
5420 ins_cost(95); // XXX
5421 format %{ "movss $mem, $src\t# float" %}
5422 ins_encode %{
5423 __ movflt($mem$$Address, $src$$XMMRegister);
5424 %}
5425 ins_pipe(pipe_slow); // XXX
5426 %}
5427
5428 // Store immediate Float value (it is faster than store from XMM register)
5429 instruct storeF0(memory mem, immF0 zero)
5430 %{
5431 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
5432 match(Set mem (StoreF mem zero));
5433
5434 ins_cost(25); // XXX
5435 format %{ "movl $mem, R12\t# float 0. (R12_heapbase==0)" %}
5436 ins_encode %{
5437 __ movl($mem$$Address, r12);
5438 %}
5439 ins_pipe(ialu_mem_reg);
5440 %}
5441
5442 instruct storeF_imm(memory mem, immF src)
5443 %{
5444 match(Set mem (StoreF mem src));
5445
5446 ins_cost(50);
5447 format %{ "movl $mem, $src\t# float" %}
5448 ins_encode %{
5449 __ movl($mem$$Address, jint_cast($src$$constant));
5450 %}
5451 ins_pipe(ialu_mem_imm);
5452 %}
5453
5454 // Store Double
5455 instruct storeD(memory mem, regD src)
5456 %{
5457 match(Set mem (StoreD mem src));
5458
5459 ins_cost(95); // XXX
5460 format %{ "movsd $mem, $src\t# double" %}
5461 ins_encode %{
5462 __ movdbl($mem$$Address, $src$$XMMRegister);
5463 %}
5464 ins_pipe(pipe_slow); // XXX
5465 %}
5466
5467 // Store immediate double 0.0 (it is faster than store from XMM register)
5468 instruct storeD0_imm(memory mem, immD0 src)
5469 %{
5470 predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
5471 match(Set mem (StoreD mem src));
5472
5473 ins_cost(50);
5474 format %{ "movq $mem, $src\t# double 0." %}
5475 ins_encode %{
5476 __ movq($mem$$Address, $src$$constant);
5477 %}
5478 ins_pipe(ialu_mem_imm);
5479 %}
5480
5481 instruct storeD0(memory mem, immD0 zero)
5482 %{
5483 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
5484 match(Set mem (StoreD mem zero));
5485
5486 ins_cost(25); // XXX
5487 format %{ "movq $mem, R12\t# double 0. (R12_heapbase==0)" %}
5488 ins_encode %{
5489 __ movq($mem$$Address, r12);
5490 %}
5491 ins_pipe(ialu_mem_reg);
5492 %}
5493
5494 instruct storeSSI(stackSlotI dst, rRegI src)
5495 %{
5496 match(Set dst src);
5497
5498 ins_cost(100);
5499 format %{ "movl $dst, $src\t# int stk" %}
5500 ins_encode %{
5501 __ movl($dst$$Address, $src$$Register);
5502 %}
5503 ins_pipe( ialu_mem_reg );
6010 format %{ "MEMBAR-storestore (empty encoding)" %}
6011 ins_encode( );
6012 ins_pipe(empty);
6013 %}
6014
6015 //----------Move Instructions--------------------------------------------------
6016
6017 instruct castX2P(rRegP dst, rRegL src)
6018 %{
6019 match(Set dst (CastX2P src));
6020
6021 format %{ "movq $dst, $src\t# long->ptr" %}
6022 ins_encode %{
6023 if ($dst$$reg != $src$$reg) {
6024 __ movptr($dst$$Register, $src$$Register);
6025 }
6026 %}
6027 ins_pipe(ialu_reg_reg); // XXX
6028 %}
6029
6030 instruct castN2X(rRegL dst, rRegN src)
6031 %{
6032 match(Set dst (CastP2X src));
6033
6034 format %{ "movq $dst, $src\t# ptr -> long" %}
6035 ins_encode %{
6036 if ($dst$$reg != $src$$reg) {
6037 __ movptr($dst$$Register, $src$$Register);
6038 }
6039 %}
6040 ins_pipe(ialu_reg_reg); // XXX
6041 %}
6042
6043 instruct castP2X(rRegL dst, rRegP src)
6044 %{
6045 match(Set dst (CastP2X src));
6046
6047 format %{ "movq $dst, $src\t# ptr -> long" %}
6048 ins_encode %{
6049 if ($dst$$reg != $src$$reg) {
6050 __ movptr($dst$$Register, $src$$Register);
6051 }
6052 %}
6053 ins_pipe(ialu_reg_reg); // XXX
6054 %}
6055
6056 // Convert oop into int for vectors alignment masking
6057 instruct convP2I(rRegI dst, rRegP src)
6058 %{
6059 match(Set dst (ConvL2I (CastP2X src)));
6060
6061 format %{ "movl $dst, $src\t# ptr -> int" %}
6062 ins_encode %{
10564 effect(DEF dst, USE src);
10565 ins_cost(100);
10566 format %{ "movd $dst,$src\t# MoveI2F" %}
10567 ins_encode %{
10568 __ movdl($dst$$XMMRegister, $src$$Register);
10569 %}
10570 ins_pipe( pipe_slow );
10571 %}
10572
10573 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
10574 match(Set dst (MoveL2D src));
10575 effect(DEF dst, USE src);
10576 ins_cost(100);
10577 format %{ "movd $dst,$src\t# MoveL2D" %}
10578 ins_encode %{
10579 __ movdq($dst$$XMMRegister, $src$$Register);
10580 %}
10581 ins_pipe( pipe_slow );
10582 %}
10583
10584
10585 // Fast clearing of an array
10586 // Small ClearArray non-AVX512.
10587 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
10588 Universe dummy, rFlagsReg cr)
10589 %{
10590 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
10591 match(Set dummy (ClearArray (Binary cnt base) val));
10592 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
10593
10594 format %{ $$template
10595 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
10596 $$emit$$"jg LARGE\n\t"
10597 $$emit$$"dec rcx\n\t"
10598 $$emit$$"js DONE\t# Zero length\n\t"
10599 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
10600 $$emit$$"dec rcx\n\t"
10601 $$emit$$"jge LOOP\n\t"
10602 $$emit$$"jmp DONE\n\t"
10603 $$emit$$"# LARGE:\n\t"
10604 if (UseFastStosb) {
10605 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10606 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
10607 } else if (UseXMMForObjInit) {
10608 $$emit$$"movdq $tmp, $val\n\t"
10609 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
10610 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
10611 $$emit$$"jmpq L_zero_64_bytes\n\t"
10612 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10613 $$emit$$"vmovdqu $tmp,(rax)\n\t"
10614 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
10615 $$emit$$"add 0x40,rax\n\t"
10616 $$emit$$"# L_zero_64_bytes:\n\t"
10617 $$emit$$"sub 0x8,rcx\n\t"
10618 $$emit$$"jge L_loop\n\t"
10619 $$emit$$"add 0x4,rcx\n\t"
10620 $$emit$$"jl L_tail\n\t"
10621 $$emit$$"vmovdqu $tmp,(rax)\n\t"
10622 $$emit$$"add 0x20,rax\n\t"
10623 $$emit$$"sub 0x4,rcx\n\t"
10624 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10625 $$emit$$"add 0x4,rcx\n\t"
10626 $$emit$$"jle L_end\n\t"
10627 $$emit$$"dec rcx\n\t"
10628 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10629 $$emit$$"vmovq xmm0,(rax)\n\t"
10630 $$emit$$"add 0x8,rax\n\t"
10631 $$emit$$"dec rcx\n\t"
10632 $$emit$$"jge L_sloop\n\t"
10633 $$emit$$"# L_end:\n\t"
10634 } else {
10635 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
10636 }
10637 $$emit$$"# DONE"
10638 %}
10639 ins_encode %{
10640 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10641 $tmp$$XMMRegister, false, false);
10642 %}
10643 ins_pipe(pipe_slow);
10644 %}
10645
10646 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
10647 Universe dummy, rFlagsReg cr)
10648 %{
10649 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
10650 match(Set dummy (ClearArray (Binary cnt base) val));
10651 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
10652
10653 format %{ $$template
10654 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
10655 $$emit$$"jg LARGE\n\t"
10656 $$emit$$"dec rcx\n\t"
10657 $$emit$$"js DONE\t# Zero length\n\t"
10658 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
10659 $$emit$$"dec rcx\n\t"
10660 $$emit$$"jge LOOP\n\t"
10661 $$emit$$"jmp DONE\n\t"
10662 $$emit$$"# LARGE:\n\t"
10663 if (UseXMMForObjInit) {
10664 $$emit$$"movdq $tmp, $val\n\t"
10665 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
10666 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
10667 $$emit$$"jmpq L_zero_64_bytes\n\t"
10668 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10669 $$emit$$"vmovdqu $tmp,(rax)\n\t"
10670 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
10671 $$emit$$"add 0x40,rax\n\t"
10672 $$emit$$"# L_zero_64_bytes:\n\t"
10673 $$emit$$"sub 0x8,rcx\n\t"
10674 $$emit$$"jge L_loop\n\t"
10675 $$emit$$"add 0x4,rcx\n\t"
10676 $$emit$$"jl L_tail\n\t"
10677 $$emit$$"vmovdqu $tmp,(rax)\n\t"
10678 $$emit$$"add 0x20,rax\n\t"
10679 $$emit$$"sub 0x4,rcx\n\t"
10680 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10681 $$emit$$"add 0x4,rcx\n\t"
10682 $$emit$$"jle L_end\n\t"
10683 $$emit$$"dec rcx\n\t"
10684 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10685 $$emit$$"vmovq xmm0,(rax)\n\t"
10686 $$emit$$"add 0x8,rax\n\t"
10687 $$emit$$"dec rcx\n\t"
10688 $$emit$$"jge L_sloop\n\t"
10689 $$emit$$"# L_end:\n\t"
10690 } else {
10691 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
10692 }
10693 $$emit$$"# DONE"
10694 %}
10695 ins_encode %{
10696 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10697 $tmp$$XMMRegister, false, true);
10698 %}
10699 ins_pipe(pipe_slow);
10700 %}
10701
10702 // Small ClearArray AVX512 non-constant length.
10703 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
10704 Universe dummy, rFlagsReg cr)
10705 %{
10706 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
10707 match(Set dummy (ClearArray (Binary cnt base) val));
10708 ins_cost(125);
10709 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
10710
10711 format %{ $$template
10712 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10713 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
10714 $$emit$$"jg LARGE\n\t"
10715 $$emit$$"dec rcx\n\t"
10716 $$emit$$"js DONE\t# Zero length\n\t"
10717 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
10718 $$emit$$"dec rcx\n\t"
10719 $$emit$$"jge LOOP\n\t"
10720 $$emit$$"jmp DONE\n\t"
10721 $$emit$$"# LARGE:\n\t"
10722 if (UseFastStosb) {
10723 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10724 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
10725 } else if (UseXMMForObjInit) {
10726 $$emit$$"mov rdi,rax\n\t"
10727 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
10728 $$emit$$"jmpq L_zero_64_bytes\n\t"
10729 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10737 $$emit$$"jl L_tail\n\t"
10738 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10739 $$emit$$"add 0x20,rax\n\t"
10740 $$emit$$"sub 0x4,rcx\n\t"
10741 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10742 $$emit$$"add 0x4,rcx\n\t"
10743 $$emit$$"jle L_end\n\t"
10744 $$emit$$"dec rcx\n\t"
10745 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10746 $$emit$$"vmovq xmm0,(rax)\n\t"
10747 $$emit$$"add 0x8,rax\n\t"
10748 $$emit$$"dec rcx\n\t"
10749 $$emit$$"jge L_sloop\n\t"
10750 $$emit$$"# L_end:\n\t"
10751 } else {
10752 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
10753 }
10754 $$emit$$"# DONE"
10755 %}
10756 ins_encode %{
10757 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10758 $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
10759 %}
10760 ins_pipe(pipe_slow);
10761 %}
10762
10763 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
10764 Universe dummy, rFlagsReg cr)
10765 %{
10766 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
10767 match(Set dummy (ClearArray (Binary cnt base) val));
10768 ins_cost(125);
10769 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
10770
10771 format %{ $$template
10772 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10773 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
10774 $$emit$$"jg LARGE\n\t"
10775 $$emit$$"dec rcx\n\t"
10776 $$emit$$"js DONE\t# Zero length\n\t"
10777 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
10778 $$emit$$"dec rcx\n\t"
10779 $$emit$$"jge LOOP\n\t"
10780 $$emit$$"jmp DONE\n\t"
10781 $$emit$$"# LARGE:\n\t"
10782 if (UseFastStosb) {
10783 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10784 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
10785 } else if (UseXMMForObjInit) {
10786 $$emit$$"mov rdi,rax\n\t"
10787 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
10788 $$emit$$"jmpq L_zero_64_bytes\n\t"
10789 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10797 $$emit$$"jl L_tail\n\t"
10798 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10799 $$emit$$"add 0x20,rax\n\t"
10800 $$emit$$"sub 0x4,rcx\n\t"
10801 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10802 $$emit$$"add 0x4,rcx\n\t"
10803 $$emit$$"jle L_end\n\t"
10804 $$emit$$"dec rcx\n\t"
10805 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10806 $$emit$$"vmovq xmm0,(rax)\n\t"
10807 $$emit$$"add 0x8,rax\n\t"
10808 $$emit$$"dec rcx\n\t"
10809 $$emit$$"jge L_sloop\n\t"
10810 $$emit$$"# L_end:\n\t"
10811 } else {
10812 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
10813 }
10814 $$emit$$"# DONE"
10815 %}
10816 ins_encode %{
10817 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10818 $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
10819 %}
10820 ins_pipe(pipe_slow);
10821 %}
10822
10823 // Large ClearArray non-AVX512.
10824 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
10825 Universe dummy, rFlagsReg cr)
10826 %{
10827 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
10828 match(Set dummy (ClearArray (Binary cnt base) val));
10829 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
10830
10831 format %{ $$template
10832 if (UseFastStosb) {
10833 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10834 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
10835 } else if (UseXMMForObjInit) {
10836 $$emit$$"movdq $tmp, $val\n\t"
10837 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
10838 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
10839 $$emit$$"jmpq L_zero_64_bytes\n\t"
10840 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10841 $$emit$$"vmovdqu $tmp,(rax)\n\t"
10842 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
10843 $$emit$$"add 0x40,rax\n\t"
10844 $$emit$$"# L_zero_64_bytes:\n\t"
10845 $$emit$$"sub 0x8,rcx\n\t"
10846 $$emit$$"jge L_loop\n\t"
10847 $$emit$$"add 0x4,rcx\n\t"
10848 $$emit$$"jl L_tail\n\t"
10849 $$emit$$"vmovdqu $tmp,(rax)\n\t"
10850 $$emit$$"add 0x20,rax\n\t"
10851 $$emit$$"sub 0x4,rcx\n\t"
10852 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10853 $$emit$$"add 0x4,rcx\n\t"
10854 $$emit$$"jle L_end\n\t"
10855 $$emit$$"dec rcx\n\t"
10856 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10857 $$emit$$"vmovq xmm0,(rax)\n\t"
10858 $$emit$$"add 0x8,rax\n\t"
10859 $$emit$$"dec rcx\n\t"
10860 $$emit$$"jge L_sloop\n\t"
10861 $$emit$$"# L_end:\n\t"
10862 } else {
10863 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
10864 }
10865 %}
10866 ins_encode %{
10867 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10868 $tmp$$XMMRegister, true, false);
10869 %}
10870 ins_pipe(pipe_slow);
10871 %}
10872
10873 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
10874 Universe dummy, rFlagsReg cr)
10875 %{
10876 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
10877 match(Set dummy (ClearArray (Binary cnt base) val));
10878 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
10879
10880 format %{ $$template
10881 if (UseXMMForObjInit) {
10882 $$emit$$"movdq $tmp, $val\n\t"
10883 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
10884 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
10885 $$emit$$"jmpq L_zero_64_bytes\n\t"
10886 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10887 $$emit$$"vmovdqu $tmp,(rax)\n\t"
10888 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
10889 $$emit$$"add 0x40,rax\n\t"
10890 $$emit$$"# L_zero_64_bytes:\n\t"
10891 $$emit$$"sub 0x8,rcx\n\t"
10892 $$emit$$"jge L_loop\n\t"
10893 $$emit$$"add 0x4,rcx\n\t"
10894 $$emit$$"jl L_tail\n\t"
10895 $$emit$$"vmovdqu $tmp,(rax)\n\t"
10896 $$emit$$"add 0x20,rax\n\t"
10897 $$emit$$"sub 0x4,rcx\n\t"
10898 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10899 $$emit$$"add 0x4,rcx\n\t"
10900 $$emit$$"jle L_end\n\t"
10901 $$emit$$"dec rcx\n\t"
10902 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10903 $$emit$$"vmovq xmm0,(rax)\n\t"
10904 $$emit$$"add 0x8,rax\n\t"
10905 $$emit$$"dec rcx\n\t"
10906 $$emit$$"jge L_sloop\n\t"
10907 $$emit$$"# L_end:\n\t"
10908 } else {
10909 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
10910 }
10911 %}
10912 ins_encode %{
10913 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10914 $tmp$$XMMRegister, true, true);
10915 %}
10916 ins_pipe(pipe_slow);
10917 %}
10918
10919 // Large ClearArray AVX512.
10920 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
10921 Universe dummy, rFlagsReg cr)
10922 %{
10923 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
10924 match(Set dummy (ClearArray (Binary cnt base) val));
10925 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
10926
10927 format %{ $$template
10928 if (UseFastStosb) {
10929 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10930 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10931 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
10932 } else if (UseXMMForObjInit) {
10933 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
10934 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
10935 $$emit$$"jmpq L_zero_64_bytes\n\t"
10936 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10937 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10938 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
10939 $$emit$$"add 0x40,rax\n\t"
10940 $$emit$$"# L_zero_64_bytes:\n\t"
10941 $$emit$$"sub 0x8,rcx\n\t"
10942 $$emit$$"jge L_loop\n\t"
10943 $$emit$$"add 0x4,rcx\n\t"
10944 $$emit$$"jl L_tail\n\t"
10945 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10946 $$emit$$"add 0x20,rax\n\t"
10947 $$emit$$"sub 0x4,rcx\n\t"
10948 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10949 $$emit$$"add 0x4,rcx\n\t"
10950 $$emit$$"jle L_end\n\t"
10951 $$emit$$"dec rcx\n\t"
10952 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10953 $$emit$$"vmovq xmm0,(rax)\n\t"
10954 $$emit$$"add 0x8,rax\n\t"
10955 $$emit$$"dec rcx\n\t"
10956 $$emit$$"jge L_sloop\n\t"
10957 $$emit$$"# L_end:\n\t"
10958 } else {
10959 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10960 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
10961 }
10962 %}
10963 ins_encode %{
10964 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10965 $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
10966 %}
10967 ins_pipe(pipe_slow);
10968 %}
10969
10970 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
10971 Universe dummy, rFlagsReg cr)
10972 %{
10973 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
10974 match(Set dummy (ClearArray (Binary cnt base) val));
10975 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
10976
10977 format %{ $$template
10978 if (UseFastStosb) {
10979 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10980 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10981 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
10982 } else if (UseXMMForObjInit) {
10983 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
10984 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
10985 $$emit$$"jmpq L_zero_64_bytes\n\t"
10986 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10987 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10988 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
10989 $$emit$$"add 0x40,rax\n\t"
10990 $$emit$$"# L_zero_64_bytes:\n\t"
10991 $$emit$$"sub 0x8,rcx\n\t"
10992 $$emit$$"jge L_loop\n\t"
10993 $$emit$$"add 0x4,rcx\n\t"
10994 $$emit$$"jl L_tail\n\t"
10995 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10996 $$emit$$"add 0x20,rax\n\t"
10997 $$emit$$"sub 0x4,rcx\n\t"
10998 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10999 $$emit$$"add 0x4,rcx\n\t"
11000 $$emit$$"jle L_end\n\t"
11001 $$emit$$"dec rcx\n\t"
11002 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11003 $$emit$$"vmovq xmm0,(rax)\n\t"
11004 $$emit$$"add 0x8,rax\n\t"
11005 $$emit$$"dec rcx\n\t"
11006 $$emit$$"jge L_sloop\n\t"
11007 $$emit$$"# L_end:\n\t"
11008 } else {
11009 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
11010 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
11011 }
11012 %}
11013 ins_encode %{
11014 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11015 $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
11016 %}
11017 ins_pipe(pipe_slow);
11018 %}
11019
11020 // Small ClearArray AVX512 constant length.
11021 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
11022 %{
11023 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
11024 ((UseAVX > 2) && VM_Version::supports_avx512vlbw()));
11025 match(Set dummy (ClearArray (Binary cnt base) val));
11026 ins_cost(100);
11027 effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
11028 format %{ "clear_mem_imm $base , $cnt \n\t" %}
11029 ins_encode %{
11030 __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
11031 %}
11032 ins_pipe(pipe_slow);
11033 %}
11034
11035 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11036 rax_RegI result, legRegD tmp1, rFlagsReg cr)
11037 %{
11038 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11039 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11040 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11041
11042 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
11043 ins_encode %{
11044 __ string_compare($str1$$Register, $str2$$Register,
11045 $cnt1$$Register, $cnt2$$Register, $result$$Register,
11046 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
11047 %}
11048 ins_pipe( pipe_slow );
11049 %}
11050
11919 ins_pipe(ialu_cr_reg_mem);
11920 %}
11921
11922 // This will generate a signed flags result. This should be OK since
11923 // any compare to a zero should be eq/neq.
11924 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
11925 %{
11926 match(Set cr (CmpP src zero));
11927
11928 format %{ "testq $src, $src\t# ptr" %}
11929 ins_encode %{
11930 __ testq($src$$Register, $src$$Register);
11931 %}
11932 ins_pipe(ialu_cr_reg_imm);
11933 %}
11934
11935 // This will generate a signed flags result. This should be OK since
11936 // any compare to a zero should be eq/neq.
11937 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
11938 %{
11939 predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
11940 n->in(1)->as_Load()->barrier_data() == 0);
11941 match(Set cr (CmpP (LoadP op) zero));
11942
11943 ins_cost(500); // XXX
11944 format %{ "testq $op, 0xffffffffffffffff\t# ptr" %}
11945 ins_encode %{
11946 __ testq($op$$Address, 0xFFFFFFFF);
11947 %}
11948 ins_pipe(ialu_cr_reg_imm);
11949 %}
11950
11951 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
11952 %{
11953 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
11954 n->in(1)->as_Load()->barrier_data() == 0);
11955 match(Set cr (CmpP (LoadP mem) zero));
11956
11957 format %{ "cmpq R12, $mem\t# ptr (R12_heapbase==0)" %}
11958 ins_encode %{
11959 __ cmpq(r12, $mem$$Address);
11960 %}
11961 ins_pipe(ialu_cr_reg_mem);
11962 %}
11963
11964 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
11965 %{
11966 match(Set cr (CmpN op1 op2));
11967
11968 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
11969 ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
11970 ins_pipe(ialu_cr_reg_reg);
11971 %}
11972
11973 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
12016 %{
12017 match(Set cr (CmpN src (LoadNKlass mem)));
12018
12019 format %{ "cmpl $mem, $src\t# compressed klass ptr" %}
12020 ins_encode %{
12021 __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
12022 %}
12023 ins_pipe(ialu_cr_reg_mem);
12024 %}
12025
12026 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
12027 match(Set cr (CmpN src zero));
12028
12029 format %{ "testl $src, $src\t# compressed ptr" %}
12030 ins_encode %{ __ testl($src$$Register, $src$$Register); %}
12031 ins_pipe(ialu_cr_reg_imm);
12032 %}
12033
12034 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
12035 %{
12036 predicate(CompressedOops::base() != nullptr);
12037 match(Set cr (CmpN (LoadN mem) zero));
12038
12039 ins_cost(500); // XXX
12040 format %{ "testl $mem, 0xffffffff\t# compressed ptr" %}
12041 ins_encode %{
12042 __ cmpl($mem$$Address, (int)0xFFFFFFFF);
12043 %}
12044 ins_pipe(ialu_cr_reg_mem);
12045 %}
12046
12047 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
12048 %{
12049 predicate(CompressedOops::base() == nullptr);
12050 match(Set cr (CmpN (LoadN mem) zero));
12051
12052 format %{ "cmpl R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
12053 ins_encode %{
12054 __ cmpl(r12, $mem$$Address);
12055 %}
12056 ins_pipe(ialu_cr_reg_mem);
12057 %}
12058
12059 // Yanked all unsigned pointer compare operations.
12060 // Pointer compares are done with CmpP which is already unsigned.
12061
12062 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
12063 %{
12064 match(Set cr (CmpL op1 op2));
12065
12066 format %{ "cmpq $op1, $op2" %}
12067 ins_encode %{
12068 __ cmpq($op1$$Register, $op2$$Register);
12069 %}
12780
12781 ins_cost(300);
12782 format %{ "call_leaf,runtime " %}
12783 ins_encode(clear_avx, Java_To_Runtime(meth));
12784 ins_pipe(pipe_slow);
12785 %}
12786
12787 // Call runtime without safepoint and with vector arguments
12788 instruct CallLeafDirectVector(method meth)
12789 %{
12790 match(CallLeafVector);
12791 effect(USE meth);
12792
12793 ins_cost(300);
12794 format %{ "call_leaf,vector " %}
12795 ins_encode(Java_To_Runtime(meth));
12796 ins_pipe(pipe_slow);
12797 %}
12798
12799 // Call runtime without safepoint
12800 // entry point is null, target holds the address to call
12801 instruct CallLeafNoFPInDirect(rRegP target)
12802 %{
12803 predicate(n->as_Call()->entry_point() == nullptr);
12804 match(CallLeafNoFP target);
12805
12806 ins_cost(300);
12807 format %{ "call_leaf_nofp,runtime indirect " %}
12808 ins_encode %{
12809 __ call($target$$Register);
12810 %}
12811
12812 ins_pipe(pipe_slow);
12813 %}
12814
12815 instruct CallLeafNoFPDirect(method meth)
12816 %{
12817 predicate(n->as_Call()->entry_point() != nullptr);
12818 match(CallLeafNoFP);
12819 effect(USE meth);
12820
12821 ins_cost(300);
12822 format %{ "call_leaf_nofp,runtime " %}
12823 ins_encode(clear_avx, Java_To_Runtime(meth));
12824 ins_pipe(pipe_slow);
12825 %}
12826
12827 // Return Instruction
12828 // Remove the return address & jump to it.
12829 // Notice: We always emit a nop after a ret to make sure there is room
12830 // for safepoint patching
12831 instruct Ret()
12832 %{
12833 match(Return);
12834
12835 format %{ "ret" %}
12836 ins_encode %{
12837 __ ret(0);
|