473 }
474
475 // !!!!! Special hack to get all types of calls to specify the byte offset
476 // from the start of the call to the point where the return address
477 // will point.
478 int MachCallStaticJavaNode::ret_addr_offset()
479 {
480 int offset = 5; // 5 bytes from start of call to where return address points
481 offset += clear_avx_size();
482 return offset;
483 }
484
485 int MachCallDynamicJavaNode::ret_addr_offset()
486 {
487 int offset = 15; // 15 bytes from start of call to where return address points
488 offset += clear_avx_size();
489 return offset;
490 }
491
492 int MachCallRuntimeNode::ret_addr_offset() {
493 int offset = 13; // movq r10,#addr; callq (r10)
494 if (this->ideal_Opcode() != Op_CallLeafVector) {
495 offset += clear_avx_size();
496 }
497 return offset;
498 }
499 //
500 // Compute padding required for nodes which need alignment
501 //
502
503 // The address of the call instruction needs to be 4-byte aligned to
504 // ensure that it does not span a cache line so that it can be patched.
505 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
506 {
507 current_offset += clear_avx_size(); // skip vzeroupper
508 current_offset += 1; // skip call opcode byte
509 return align_up(current_offset, alignment_required()) - current_offset;
510 }
511
512 // The address of the call instruction needs to be 4-byte aligned to
513 // ensure that it does not span a cache line so that it can be patched.
514 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
515 {
516 current_offset += clear_avx_size(); // skip vzeroupper
517 current_offset += 11; // skip movq instruction + call opcode byte
518 return align_up(current_offset, alignment_required()) - current_offset;
870 st->print("movq [rsp + #%d], rbp\t# Save rbp",framesize);
871 if (PreserveFramePointer) {
872 st->print("\n\t");
873 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
874 if (framesize > 0) {
875 st->print("\n\t");
876 st->print("addq rbp, #%d", framesize);
877 }
878 }
879 }
880
881 if (VerifyStackAtCalls) {
882 st->print("\n\t");
883 framesize -= wordSize;
884 st->print("movq [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
885 #ifdef ASSERT
886 st->print("\n\t");
887 st->print("# stack alignment check");
888 #endif
889 }
890 if (C->stub_function() != NULL && BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
891 st->print("\n\t");
892 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
893 st->print("\n\t");
894 st->print("je fast_entry\t");
895 st->print("\n\t");
896 st->print("call #nmethod_entry_barrier_stub\t");
897 st->print("\n\tfast_entry:");
898 }
899 st->cr();
900 }
901 #endif
902
903 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
904 Compile* C = ra_->C;
905 C2_MacroAssembler _masm(&cbuf);
906
907 int framesize = C->output()->frame_size_in_bytes();
908 int bangsize = C->output()->bang_size_in_bytes();
909
910 if (C->clinit_barrier_on_entry()) {
911 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
912 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
913
914 Label L_skip_barrier;
915 Register klass = rscratch1;
916
917 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
918 __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/);
919
920 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
921
922 __ bind(L_skip_barrier);
923 }
924
925 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != NULL);
926
927 C->output()->set_frame_complete(cbuf.insts_size());
928
929 if (C->has_mach_constant_base_node()) {
930 // NOTE: We set the table base offset here because users might be
931 // emitted before MachConstantBaseNode.
932 ConstantTable& constant_table = C->output()->constant_table();
933 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
934 }
935 }
936
937 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
938 {
939 return MachNode::size(ra_); // too many variables; just compute it
940 // the hard way
941 }
942
943 int MachPrologNode::reloc() const
944 {
945 return 0; // a large enough number
946 }
947
948 //=============================================================================
949 #ifndef PRODUCT
950 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
951 {
952 Compile* C = ra_->C;
953 if (generate_vzeroupper(C)) {
954 st->print("vzeroupper");
955 st->cr(); st->print("\t");
956 }
957
958 int framesize = C->output()->frame_size_in_bytes();
959 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
960 // Remove word for return adr already pushed
961 // and RBP
962 framesize -= 2*wordSize;
970 if (do_polling() && C->is_method_compilation()) {
971 st->print("\t");
972 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
973 "ja #safepoint_stub\t"
974 "# Safepoint: poll for GC");
975 }
976 }
977 #endif
978
979 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
980 {
981 Compile* C = ra_->C;
982 MacroAssembler _masm(&cbuf);
983
984 if (generate_vzeroupper(C)) {
985 // Clear upper bits of YMM registers when current compiled code uses
986 // wide vectors to avoid AVX <-> SSE transition penalty during call.
987 __ vzeroupper();
988 }
989
990 int framesize = C->output()->frame_size_in_bytes();
991 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
992 // Remove word for return adr already pushed
993 // and RBP
994 framesize -= 2*wordSize;
995
996 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
997
998 if (framesize) {
999 emit_opcode(cbuf, Assembler::REX_W);
1000 if (framesize < 0x80) {
1001 emit_opcode(cbuf, 0x83); // addq rsp, #framesize
1002 emit_rm(cbuf, 0x3, 0x00, RSP_enc);
1003 emit_d8(cbuf, framesize);
1004 } else {
1005 emit_opcode(cbuf, 0x81); // addq rsp, #framesize
1006 emit_rm(cbuf, 0x3, 0x00, RSP_enc);
1007 emit_d32(cbuf, framesize);
1008 }
1009 }
1010
1011 // popq rbp
1012 emit_opcode(cbuf, 0x58 | RBP_enc);
1013
1014 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1015 __ reserved_stack_check();
1016 }
1017
1018 if (do_polling() && C->is_method_compilation()) {
1019 MacroAssembler _masm(&cbuf);
1020 Label dummy_label;
1021 Label* code_stub = &dummy_label;
1022 if (!C->output()->in_scratch_emit_size()) {
1023 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1024 C->output()->add_stub(stub);
1025 code_stub = &stub->entry();
1026 }
1027 __ relocate(relocInfo::poll_return_type);
1028 __ safepoint_poll(*code_stub, r15_thread, true /* at_return */, true /* in_nmethod */);
1029 }
1030 }
1031
1032 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
1033 {
1034 return MachNode::size(ra_); // too many variables; just compute it
1035 // the hard way
1036 }
1037
1038 int MachEpilogNode::reloc() const
1039 {
1040 return 2; // a large enough number
1041 }
1042
1043 const Pipeline* MachEpilogNode::pipeline() const
1044 {
1045 return MachNode::pipeline_class();
1046 }
1047
1048 //=============================================================================
1049
1050 enum RC {
1051 rc_bad,
1052 rc_int,
1053 rc_kreg,
1054 rc_float,
1055 rc_stack
1056 };
1057
1144 src_offset, dst_offset);
1145 break;
1146 case Op_VecZ:
1147 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
1148 "vmovdqu xmm0, [rsp + #%d]\n\t"
1149 "vmovdqu [rsp + #%d], xmm0\n\t"
1150 "vmovdqu xmm0, [rsp - #64]",
1151 src_offset, dst_offset);
1152 break;
1153 default:
1154 ShouldNotReachHere();
1155 }
1156 #endif
1157 }
1158 }
1159
1160 uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
1161 PhaseRegAlloc* ra_,
1162 bool do_size,
1163 outputStream* st) const {
1164 assert(cbuf != NULL || st != NULL, "sanity");
1165 // Get registers to move
1166 OptoReg::Name src_second = ra_->get_reg_second(in(1));
1167 OptoReg::Name src_first = ra_->get_reg_first(in(1));
1168 OptoReg::Name dst_second = ra_->get_reg_second(this);
1169 OptoReg::Name dst_first = ra_->get_reg_first(this);
1170
1171 enum RC src_second_rc = rc_class(src_second);
1172 enum RC src_first_rc = rc_class(src_first);
1173 enum RC dst_second_rc = rc_class(dst_second);
1174 enum RC dst_first_rc = rc_class(dst_first);
1175
1176 assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
1177 "must move at least 1 register" );
1178
1179 if (src_first == dst_first && src_second == dst_second) {
1180 // Self copy, no move
1181 return 0;
1182 }
1183 if (bottom_type()->isa_vect() != NULL && bottom_type()->isa_vectmask() == NULL) {
1184 uint ireg = ideal_reg();
1185 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1186 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
1187 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1188 // mem -> mem
1189 int src_offset = ra_->reg2offset(src_first);
1190 int dst_offset = ra_->reg2offset(dst_first);
1191 vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
1192 } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
1193 vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st);
1194 } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1195 int stack_offset = ra_->reg2offset(dst_first);
1196 vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st);
1197 } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
1198 int stack_offset = ra_->reg2offset(src_first);
1199 vec_spill_helper(cbuf, true, stack_offset, dst_first, ireg, st);
1200 } else {
1201 ShouldNotReachHere();
1202 }
1203 return 0;
1602 st->print("kmovq %s, %s\t# spill",
1603 Matcher::regName[dst_first],
1604 Matcher::regName[src_first]);
1605 #endif
1606 }
1607 }
1608 return 0;
1609 } else if (dst_first_rc == rc_float) {
1610 assert(false, "Illegal spill");
1611 return 0;
1612 }
1613 }
1614
1615 assert(0," foo ");
1616 Unimplemented();
1617 return 0;
1618 }
1619
1620 #ifndef PRODUCT
1621 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1622 implementation(NULL, ra_, false, st);
1623 }
1624 #endif
1625
1626 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1627 implementation(&cbuf, ra_, false, NULL);
1628 }
1629
1630 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1631 return MachNode::size(ra_);
1632 }
1633
1634 //=============================================================================
1635 #ifndef PRODUCT
1636 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1637 {
1638 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1639 int reg = ra_->get_reg_first(this);
1640 st->print("leaq %s, [rsp + #%d]\t# box lock",
1641 Matcher::regName[reg], offset);
1642 }
1643 #endif
1644
1645 void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1646 {
1647 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1650 emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1651 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset]
1652 emit_rm(cbuf, 0x2, reg & 7, 0x04);
1653 emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1654 emit_d32(cbuf, offset);
1655 } else {
1656 emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1657 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset]
1658 emit_rm(cbuf, 0x1, reg & 7, 0x04);
1659 emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1660 emit_d8(cbuf, offset);
1661 }
1662 }
1663
1664 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1665 {
1666 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1667 return (offset < 0x80) ? 5 : 8; // REX
1668 }
1669
1670 //=============================================================================
1671 #ifndef PRODUCT
1672 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1673 {
1674 if (UseCompressedClassPointers) {
1675 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1676 st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
1677 st->print_cr("\tcmpq rax, rscratch1\t # Inline cache check");
1678 } else {
1679 st->print_cr("\tcmpq rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
1680 "# Inline cache check");
1681 }
1682 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
1683 st->print_cr("\tnop\t# nops to align entry point");
1684 }
1685 #endif
1686
1687 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1688 {
1689 MacroAssembler masm(&cbuf);
1692 masm.load_klass(rscratch1, j_rarg0, rscratch2);
1693 masm.cmpptr(rax, rscratch1);
1694 } else {
1695 masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
1696 }
1697
1698 masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1699
1700 /* WARNING these NOPs are critical so that verified entry point is properly
1701 4 bytes aligned for patching by NativeJump::patch_verified_entry() */
1702 int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
1703 if (OptoBreakpoint) {
1704 // Leave space for int3
1705 nops_cnt -= 1;
1706 }
1707 nops_cnt &= 0x3; // Do not add nops if code is aligned.
1708 if (nops_cnt > 0)
1709 masm.nop(nops_cnt);
1710 }
1711
1712 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1713 {
1714 return MachNode::size(ra_); // too many variables; just compute it
1715 // the hard way
1716 }
1717
1718
1719 //=============================================================================
1720
1721 bool Matcher::supports_vector_calling_convention(void) {
1722 if (EnableVectorSupport && UseVectorStubs) {
1723 return true;
1724 }
1725 return false;
1726 }
1727
1728 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
1729 assert(EnableVectorSupport && UseVectorStubs, "sanity");
1730 int lo = XMM0_num;
1731 int hi = XMM0b_num;
1732 if (ideal_reg == Op_VecX) hi = XMM0d_num;
1733 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
1734 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
1735 return OptoRegPair(hi, lo);
1736 }
1737
1738 // Is this branch offset short enough that a short branch can be used?
2113 %}
2114
2115 enc_class enc_cmov(cmpOp cop)
2116 %{
2117 // CMOV
2118 $$$emit8$primary;
2119 emit_cc(cbuf, $secondary, $cop$$cmpcode);
2120 %}
2121
2122 enc_class enc_PartialSubtypeCheck()
2123 %{
2124 Register Rrdi = as_Register(RDI_enc); // result register
2125 Register Rrax = as_Register(RAX_enc); // super class
2126 Register Rrcx = as_Register(RCX_enc); // killed
2127 Register Rrsi = as_Register(RSI_enc); // sub class
2128 Label miss;
2129 const bool set_cond_codes = true;
2130
2131 MacroAssembler _masm(&cbuf);
2132 __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
2133 NULL, &miss,
2134 /*set_cond_codes:*/ true);
2135 if ($primary) {
2136 __ xorptr(Rrdi, Rrdi);
2137 }
2138 __ bind(miss);
2139 %}
2140
2141 enc_class clear_avx %{
2142 debug_only(int off0 = cbuf.insts_size());
2143 if (generate_vzeroupper(Compile::current())) {
2144 // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
2145 // Clear upper bits of YMM registers when current compiled code uses
2146 // wide vectors to avoid AVX <-> SSE transition penalty during call.
2147 MacroAssembler _masm(&cbuf);
2148 __ vzeroupper();
2149 }
2150 debug_only(int off1 = cbuf.insts_size());
2151 assert(off1 - off0 == clear_avx_size(), "correct size prediction");
2152 %}
2153
2175 } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
2176 // The NOP here is purely to ensure that eliding a call to
2177 // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
2178 __ addr_nop_5();
2179 __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
2180 } else {
2181 $$$emit8$primary;
2182 int method_index = resolved_method_index(cbuf);
2183 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
2184 : static_call_Relocation::spec(method_index);
2185 emit_d32_reloc(cbuf, (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2186 rspec, RELOC_DISP32);
2187 address mark = cbuf.insts_mark();
2188 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
2189 // Calls of the same statically bound method can share
2190 // a stub to the interpreter.
2191 cbuf.shared_stub_to_interp_for(_method, cbuf.insts()->mark_off());
2192 } else {
2193 // Emit stubs for static call.
2194 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark);
2195 if (stub == NULL) {
2196 ciEnv::current()->record_failure("CodeCache is full");
2197 return;
2198 }
2199 }
2200 }
2201 _masm.clear_inst_mark();
2202 __ post_call_nop();
2203 %}
2204
2205 enc_class Java_Dynamic_Call(method meth) %{
2206 MacroAssembler _masm(&cbuf);
2207 __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
2208 __ post_call_nop();
2209 %}
2210
2211 enc_class reg_opc_imm(rRegI dst, immI8 shift)
2212 %{
2213 // SAL, SAR, SHR
2214 int dstenc = $dst$$reg;
2215 if (dstenc >= 8) {
3037 operand immI_64()
3038 %{
3039 predicate( n->get_int() == 64 );
3040 match(ConI);
3041
3042 op_cost(0);
3043 format %{ %}
3044 interface(CONST_INTER);
3045 %}
3046
3047 // Pointer Immediate
3048 operand immP()
3049 %{
3050 match(ConP);
3051
3052 op_cost(10);
3053 format %{ %}
3054 interface(CONST_INTER);
3055 %}
3056
3057 // NULL Pointer Immediate
3058 operand immP0()
3059 %{
3060 predicate(n->get_ptr() == 0);
3061 match(ConP);
3062
3063 op_cost(5);
3064 format %{ %}
3065 interface(CONST_INTER);
3066 %}
3067
3068 // Pointer Immediate
3069 operand immN() %{
3070 match(ConN);
3071
3072 op_cost(10);
3073 format %{ %}
3074 interface(CONST_INTER);
3075 %}
3076
3077 operand immNKlass() %{
3078 match(ConNKlass);
3079
3080 op_cost(10);
3081 format %{ %}
3082 interface(CONST_INTER);
3083 %}
3084
3085 // NULL Pointer Immediate
3086 operand immN0() %{
3087 predicate(n->get_narrowcon() == 0);
3088 match(ConN);
3089
3090 op_cost(5);
3091 format %{ %}
3092 interface(CONST_INTER);
3093 %}
3094
3095 operand immP31()
3096 %{
3097 predicate(n->as_Type()->type()->reloc() == relocInfo::none
3098 && (n->get_ptr() >> 31) == 0);
3099 match(ConP);
3100
3101 op_cost(5);
3102 format %{ %}
3103 interface(CONST_INTER);
3104 %}
3105
3977 %}
3978 %}
3979
3980 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
3981 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
3982 %{
3983 constraint(ALLOC_IN_RC(ptr_reg));
3984 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
3985 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
3986
3987 op_cost(10);
3988 format %{"[$reg + $off + $idx << $scale]" %}
3989 interface(MEMORY_INTER) %{
3990 base($reg);
3991 index($idx);
3992 scale($scale);
3993 disp($off);
3994 %}
3995 %}
3996
3997 // Indirect Narrow Oop Plus Offset Operand
3998 // Note: x86 architecture doesn't support "scale * index + offset" without a base
3999 // we can't free r12 even with CompressedOops::base() == NULL.
4000 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
4001 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
4002 constraint(ALLOC_IN_RC(ptr_reg));
4003 match(AddP (DecodeN reg) off);
4004
4005 op_cost(10);
4006 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
4007 interface(MEMORY_INTER) %{
4008 base(0xc); // R12
4009 index($reg);
4010 scale(0x3);
4011 disp($off);
4012 %}
4013 %}
4014
4015 // Indirect Memory Operand
4016 operand indirectNarrow(rRegN reg)
4017 %{
4018 predicate(CompressedOops::shift() == 0);
4019 constraint(ALLOC_IN_RC(ptr_reg));
4323 equal(0x4, "e");
4324 not_equal(0x5, "ne");
4325 less(0x2, "b");
4326 greater_equal(0x3, "ae");
4327 less_equal(0x6, "be");
4328 greater(0x7, "a");
4329 overflow(0x0, "o");
4330 no_overflow(0x1, "no");
4331 %}
4332 %}
4333
4334 //----------OPERAND CLASSES----------------------------------------------------
4335 // Operand Classes are groups of operands that are used as to simplify
4336 // instruction definitions by not requiring the AD writer to specify separate
4337 // instructions for every form of operand when the instruction accepts
4338 // multiple operand types with the same basic encoding and format. The classic
4339 // case of this is memory operands.
4340
4341 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
4342 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
4343 indCompressedOopOffset,
4344 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
4345 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
4346 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
4347
4348 //----------PIPELINE-----------------------------------------------------------
4349 // Rules which define the behavior of the target architectures pipeline.
4350 pipeline %{
4351
4352 //----------ATTRIBUTES---------------------------------------------------------
4353 attributes %{
4354 variable_size_instructions; // Fixed size instructions
4355 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
4356 instruction_unit_size = 1; // An instruction is 1 bytes long
4357 instruction_fetch_unit_size = 16; // The processor fetches one line
4358 instruction_fetch_units = 1; // of 16 bytes
4359
4360 // List of nop instructions
4361 nops( MachNop );
4362 %}
4363
5823 format %{ "movl $dst, $src\t# ptr (positive 32-bit)" %}
5824 ins_encode %{
5825 __ movl($dst$$Register, $src$$constant);
5826 %}
5827 ins_pipe(ialu_reg);
5828 %}
5829
5830 instruct loadConF(regF dst, immF con) %{
5831 match(Set dst con);
5832 ins_cost(125);
5833 format %{ "movss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
5834 ins_encode %{
5835 __ movflt($dst$$XMMRegister, $constantaddress($con));
5836 %}
5837 ins_pipe(pipe_slow);
5838 %}
5839
5840 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
5841 match(Set dst src);
5842 effect(KILL cr);
5843 format %{ "xorq $dst, $src\t# compressed NULL ptr" %}
5844 ins_encode %{
5845 __ xorq($dst$$Register, $dst$$Register);
5846 %}
5847 ins_pipe(ialu_reg);
5848 %}
5849
5850 instruct loadConN(rRegN dst, immN src) %{
5851 match(Set dst src);
5852
5853 ins_cost(125);
5854 format %{ "movl $dst, $src\t# compressed ptr" %}
5855 ins_encode %{
5856 address con = (address)$src$$constant;
5857 if (con == NULL) {
5858 ShouldNotReachHere();
5859 } else {
5860 __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
5861 }
5862 %}
5863 ins_pipe(ialu_reg_fat); // XXX
5864 %}
5865
5866 instruct loadConNKlass(rRegN dst, immNKlass src) %{
5867 match(Set dst src);
5868
5869 ins_cost(125);
5870 format %{ "movl $dst, $src\t# compressed klass ptr" %}
5871 ins_encode %{
5872 address con = (address)$src$$constant;
5873 if (con == NULL) {
5874 ShouldNotReachHere();
5875 } else {
5876 __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
5877 }
5878 %}
5879 ins_pipe(ialu_reg_fat); // XXX
5880 %}
5881
5882 instruct loadConF0(regF dst, immF0 src)
5883 %{
5884 match(Set dst src);
5885 ins_cost(100);
5886
5887 format %{ "xorps $dst, $dst\t# float 0.0" %}
5888 ins_encode %{
5889 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
5890 %}
5891 ins_pipe(pipe_slow);
5892 %}
5893
6076 %}
6077 ins_pipe(ialu_mem_reg); // XXX
6078 %}
6079
6080 // Store Pointer
6081 instruct storeP(memory mem, any_RegP src)
6082 %{
6083 predicate(n->as_Store()->barrier_data() == 0);
6084 match(Set mem (StoreP mem src));
6085
6086 ins_cost(125); // XXX
6087 format %{ "movq $mem, $src\t# ptr" %}
6088 ins_encode %{
6089 __ movq($mem$$Address, $src$$Register);
6090 %}
6091 ins_pipe(ialu_mem_reg);
6092 %}
6093
6094 instruct storeImmP0(memory mem, immP0 zero)
6095 %{
6096 predicate(UseCompressedOops && (CompressedOops::base() == NULL) && n->as_Store()->barrier_data() == 0);
6097 match(Set mem (StoreP mem zero));
6098
6099 ins_cost(125); // XXX
6100 format %{ "movq $mem, R12\t# ptr (R12_heapbase==0)" %}
6101 ins_encode %{
6102 __ movq($mem$$Address, r12);
6103 %}
6104 ins_pipe(ialu_mem_reg);
6105 %}
6106
6107 // Store NULL Pointer, mark word, or other simple pointer constant.
6108 instruct storeImmP(memory mem, immP31 src)
6109 %{
6110 predicate(n->as_Store()->barrier_data() == 0);
6111 match(Set mem (StoreP mem src));
6112
6113 ins_cost(150); // XXX
6114 format %{ "movq $mem, $src\t# ptr" %}
6115 ins_encode %{
6116 __ movq($mem$$Address, $src$$constant);
6117 %}
6118 ins_pipe(ialu_mem_imm);
6119 %}
6120
6121 // Store Compressed Pointer
6122 instruct storeN(memory mem, rRegN src)
6123 %{
6124 match(Set mem (StoreN mem src));
6125
6126 ins_cost(125); // XXX
6127 format %{ "movl $mem, $src\t# compressed ptr" %}
6128 ins_encode %{
6129 __ movl($mem$$Address, $src$$Register);
6130 %}
6131 ins_pipe(ialu_mem_reg);
6132 %}
6133
6134 instruct storeNKlass(memory mem, rRegN src)
6135 %{
6136 match(Set mem (StoreNKlass mem src));
6137
6138 ins_cost(125); // XXX
6139 format %{ "movl $mem, $src\t# compressed klass ptr" %}
6140 ins_encode %{
6141 __ movl($mem$$Address, $src$$Register);
6142 %}
6143 ins_pipe(ialu_mem_reg);
6144 %}
6145
6146 instruct storeImmN0(memory mem, immN0 zero)
6147 %{
6148 predicate(CompressedOops::base() == NULL);
6149 match(Set mem (StoreN mem zero));
6150
6151 ins_cost(125); // XXX
6152 format %{ "movl $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
6153 ins_encode %{
6154 __ movl($mem$$Address, r12);
6155 %}
6156 ins_pipe(ialu_mem_reg);
6157 %}
6158
6159 instruct storeImmN(memory mem, immN src)
6160 %{
6161 match(Set mem (StoreN mem src));
6162
6163 ins_cost(150); // XXX
6164 format %{ "movl $mem, $src\t# compressed ptr" %}
6165 ins_encode %{
6166 address con = (address)$src$$constant;
6167 if (con == NULL) {
6168 __ movl($mem$$Address, 0);
6169 } else {
6170 __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
6171 }
6172 %}
6173 ins_pipe(ialu_mem_imm);
6174 %}
6175
6176 instruct storeImmNKlass(memory mem, immNKlass src)
6177 %{
6178 match(Set mem (StoreNKlass mem src));
6179
6180 ins_cost(150); // XXX
6181 format %{ "movl $mem, $src\t# compressed klass ptr" %}
6182 ins_encode %{
6183 __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
6184 %}
6185 ins_pipe(ialu_mem_imm);
6186 %}
6187
6188 // Store Integer Immediate
6189 instruct storeImmI0(memory mem, immI_0 zero)
6190 %{
6191 predicate(UseCompressedOops && (CompressedOops::base() == NULL));
6192 match(Set mem (StoreI mem zero));
6193
6194 ins_cost(125); // XXX
6195 format %{ "movl $mem, R12\t# int (R12_heapbase==0)" %}
6196 ins_encode %{
6197 __ movl($mem$$Address, r12);
6198 %}
6199 ins_pipe(ialu_mem_reg);
6200 %}
6201
6202 instruct storeImmI(memory mem, immI src)
6203 %{
6204 match(Set mem (StoreI mem src));
6205
6206 ins_cost(150);
6207 format %{ "movl $mem, $src\t# int" %}
6208 ins_encode %{
6209 __ movl($mem$$Address, $src$$constant);
6210 %}
6211 ins_pipe(ialu_mem_imm);
6212 %}
6213
6214 // Store Long Immediate
6215 instruct storeImmL0(memory mem, immL0 zero)
6216 %{
6217 predicate(UseCompressedOops && (CompressedOops::base() == NULL));
6218 match(Set mem (StoreL mem zero));
6219
6220 ins_cost(125); // XXX
6221 format %{ "movq $mem, R12\t# long (R12_heapbase==0)" %}
6222 ins_encode %{
6223 __ movq($mem$$Address, r12);
6224 %}
6225 ins_pipe(ialu_mem_reg);
6226 %}
6227
6228 instruct storeImmL(memory mem, immL32 src)
6229 %{
6230 match(Set mem (StoreL mem src));
6231
6232 ins_cost(150);
6233 format %{ "movq $mem, $src\t# long" %}
6234 ins_encode %{
6235 __ movq($mem$$Address, $src$$constant);
6236 %}
6237 ins_pipe(ialu_mem_imm);
6238 %}
6239
6240 // Store Short/Char Immediate
6241 instruct storeImmC0(memory mem, immI_0 zero)
6242 %{
6243 predicate(UseCompressedOops && (CompressedOops::base() == NULL));
6244 match(Set mem (StoreC mem zero));
6245
6246 ins_cost(125); // XXX
6247 format %{ "movw $mem, R12\t# short/char (R12_heapbase==0)" %}
6248 ins_encode %{
6249 __ movw($mem$$Address, r12);
6250 %}
6251 ins_pipe(ialu_mem_reg);
6252 %}
6253
6254 instruct storeImmI16(memory mem, immI16 src)
6255 %{
6256 predicate(UseStoreImmI16);
6257 match(Set mem (StoreC mem src));
6258
6259 ins_cost(150);
6260 format %{ "movw $mem, $src\t# short/char" %}
6261 ins_encode %{
6262 __ movw($mem$$Address, $src$$constant);
6263 %}
6264 ins_pipe(ialu_mem_imm);
6265 %}
6266
6267 // Store Byte Immediate
6268 instruct storeImmB0(memory mem, immI_0 zero)
6269 %{
6270 predicate(UseCompressedOops && (CompressedOops::base() == NULL));
6271 match(Set mem (StoreB mem zero));
6272
6273 ins_cost(125); // XXX
6274 format %{ "movb $mem, R12\t# short/char (R12_heapbase==0)" %}
6275 ins_encode %{
6276 __ movb($mem$$Address, r12);
6277 %}
6278 ins_pipe(ialu_mem_reg);
6279 %}
6280
6281 instruct storeImmB(memory mem, immI8 src)
6282 %{
6283 match(Set mem (StoreB mem src));
6284
6285 ins_cost(150); // XXX
6286 format %{ "movb $mem, $src\t# byte" %}
6287 ins_encode %{
6288 __ movb($mem$$Address, $src$$constant);
6289 %}
6290 ins_pipe(ialu_mem_imm);
6291 %}
6292
6293 // Store CMS card-mark Immediate
6294 instruct storeImmCM0_reg(memory mem, immI_0 zero)
6295 %{
6296 predicate(UseCompressedOops && (CompressedOops::base() == NULL));
6297 match(Set mem (StoreCM mem zero));
6298
6299 ins_cost(125); // XXX
6300 format %{ "movb $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
6301 ins_encode %{
6302 __ movb($mem$$Address, r12);
6303 %}
6304 ins_pipe(ialu_mem_reg);
6305 %}
6306
6307 instruct storeImmCM0(memory mem, immI_0 src)
6308 %{
6309 match(Set mem (StoreCM mem src));
6310
6311 ins_cost(150); // XXX
6312 format %{ "movb $mem, $src\t# CMS card-mark byte 0" %}
6313 ins_encode %{
6314 __ movb($mem$$Address, $src$$constant);
6315 %}
6316 ins_pipe(ialu_mem_imm);
6317 %}
6318
6319 // Store Float
6320 instruct storeF(memory mem, regF src)
6321 %{
6322 match(Set mem (StoreF mem src));
6323
6324 ins_cost(95); // XXX
6325 format %{ "movss $mem, $src\t# float" %}
6326 ins_encode %{
6327 __ movflt($mem$$Address, $src$$XMMRegister);
6328 %}
6329 ins_pipe(pipe_slow); // XXX
6330 %}
6331
6332 // Store immediate Float value (it is faster than store from XMM register)
6333 instruct storeF0(memory mem, immF0 zero)
6334 %{
6335 predicate(UseCompressedOops && (CompressedOops::base() == NULL));
6336 match(Set mem (StoreF mem zero));
6337
6338 ins_cost(25); // XXX
6339 format %{ "movl $mem, R12\t# float 0. (R12_heapbase==0)" %}
6340 ins_encode %{
6341 __ movl($mem$$Address, r12);
6342 %}
6343 ins_pipe(ialu_mem_reg);
6344 %}
6345
6346 instruct storeF_imm(memory mem, immF src)
6347 %{
6348 match(Set mem (StoreF mem src));
6349
6350 ins_cost(50);
6351 format %{ "movl $mem, $src\t# float" %}
6352 ins_encode %{
6353 __ movl($mem$$Address, jint_cast($src$$constant));
6354 %}
6355 ins_pipe(ialu_mem_imm);
6356 %}
6357
6358 // Store Double
6359 instruct storeD(memory mem, regD src)
6360 %{
6361 match(Set mem (StoreD mem src));
6362
6363 ins_cost(95); // XXX
6364 format %{ "movsd $mem, $src\t# double" %}
6365 ins_encode %{
6366 __ movdbl($mem$$Address, $src$$XMMRegister);
6367 %}
6368 ins_pipe(pipe_slow); // XXX
6369 %}
6370
6371 // Store immediate double 0.0 (it is faster than store from XMM register)
6372 instruct storeD0_imm(memory mem, immD0 src)
6373 %{
6374 predicate(!UseCompressedOops || (CompressedOops::base() != NULL));
6375 match(Set mem (StoreD mem src));
6376
6377 ins_cost(50);
6378 format %{ "movq $mem, $src\t# double 0." %}
6379 ins_encode %{
6380 __ movq($mem$$Address, $src$$constant);
6381 %}
6382 ins_pipe(ialu_mem_imm);
6383 %}
6384
6385 instruct storeD0(memory mem, immD0 zero)
6386 %{
6387 predicate(UseCompressedOops && (CompressedOops::base() == NULL));
6388 match(Set mem (StoreD mem zero));
6389
6390 ins_cost(25); // XXX
6391 format %{ "movq $mem, R12\t# double 0. (R12_heapbase==0)" %}
6392 ins_encode %{
6393 __ movq($mem$$Address, r12);
6394 %}
6395 ins_pipe(ialu_mem_reg);
6396 %}
6397
6398 instruct storeSSI(stackSlotI dst, rRegI src)
6399 %{
6400 match(Set dst src);
6401
6402 ins_cost(100);
6403 format %{ "movl $dst, $src\t# int stk" %}
6404 opcode(0x89);
6405 ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
6406 ins_pipe( ialu_mem_reg );
6407 %}
6911 format %{ "MEMBAR-storestore (empty encoding)" %}
6912 ins_encode( );
6913 ins_pipe(empty);
6914 %}
6915
6916 //----------Move Instructions--------------------------------------------------
6917
6918 instruct castX2P(rRegP dst, rRegL src)
6919 %{
6920 match(Set dst (CastX2P src));
6921
6922 format %{ "movq $dst, $src\t# long->ptr" %}
6923 ins_encode %{
6924 if ($dst$$reg != $src$$reg) {
6925 __ movptr($dst$$Register, $src$$Register);
6926 }
6927 %}
6928 ins_pipe(ialu_reg_reg); // XXX
6929 %}
6930
6931 instruct castP2X(rRegL dst, rRegP src)
6932 %{
6933 match(Set dst (CastP2X src));
6934
6935 format %{ "movq $dst, $src\t# ptr -> long" %}
6936 ins_encode %{
6937 if ($dst$$reg != $src$$reg) {
6938 __ movptr($dst$$Register, $src$$Register);
6939 }
6940 %}
6941 ins_pipe(ialu_reg_reg); // XXX
6942 %}
6943
6944 // Convert oop into int for vectors alignment masking
6945 instruct convP2I(rRegI dst, rRegP src)
6946 %{
6947 match(Set dst (ConvL2I (CastP2X src)));
6948
6949 format %{ "movl $dst, $src\t# ptr -> int" %}
6950 ins_encode %{
11509 effect(DEF dst, USE src);
11510 ins_cost(100);
11511 format %{ "movd $dst,$src\t# MoveI2F" %}
11512 ins_encode %{
11513 __ movdl($dst$$XMMRegister, $src$$Register);
11514 %}
11515 ins_pipe( pipe_slow );
11516 %}
11517
11518 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
11519 match(Set dst (MoveL2D src));
11520 effect(DEF dst, USE src);
11521 ins_cost(100);
11522 format %{ "movd $dst,$src\t# MoveL2D" %}
11523 ins_encode %{
11524 __ movdq($dst$$XMMRegister, $src$$Register);
11525 %}
11526 ins_pipe( pipe_slow );
11527 %}
11528
11529 // Fast clearing of an array
11530 // Small ClearArray non-AVX512.
11531 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
11532 Universe dummy, rFlagsReg cr)
11533 %{
11534 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
11535 match(Set dummy (ClearArray cnt base));
11536 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11537
11538 format %{ $$template
11539 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
11540 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
11541 $$emit$$"jg LARGE\n\t"
11542 $$emit$$"dec rcx\n\t"
11543 $$emit$$"js DONE\t# Zero length\n\t"
11544 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
11545 $$emit$$"dec rcx\n\t"
11546 $$emit$$"jge LOOP\n\t"
11547 $$emit$$"jmp DONE\n\t"
11548 $$emit$$"# LARGE:\n\t"
11549 if (UseFastStosb) {
11550 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
11551 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
11552 } else if (UseXMMForObjInit) {
11553 $$emit$$"mov rdi,rax\n\t"
11554 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
11555 $$emit$$"jmpq L_zero_64_bytes\n\t"
11556 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11564 $$emit$$"jl L_tail\n\t"
11565 $$emit$$"vmovdqu ymm0,(rax)\n\t"
11566 $$emit$$"add 0x20,rax\n\t"
11567 $$emit$$"sub 0x4,rcx\n\t"
11568 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11569 $$emit$$"add 0x4,rcx\n\t"
11570 $$emit$$"jle L_end\n\t"
11571 $$emit$$"dec rcx\n\t"
11572 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11573 $$emit$$"vmovq xmm0,(rax)\n\t"
11574 $$emit$$"add 0x8,rax\n\t"
11575 $$emit$$"dec rcx\n\t"
11576 $$emit$$"jge L_sloop\n\t"
11577 $$emit$$"# L_end:\n\t"
11578 } else {
11579 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
11580 }
11581 $$emit$$"# DONE"
11582 %}
11583 ins_encode %{
11584 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11585 $tmp$$XMMRegister, false, knoreg);
11586 %}
11587 ins_pipe(pipe_slow);
11588 %}
11589
11590 // Small ClearArray AVX512 non-constant length.
11591 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
11592 Universe dummy, rFlagsReg cr)
11593 %{
11594 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
11595 match(Set dummy (ClearArray cnt base));
11596 ins_cost(125);
11597 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11598
11599 format %{ $$template
11600 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
11601 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
11602 $$emit$$"jg LARGE\n\t"
11603 $$emit$$"dec rcx\n\t"
11604 $$emit$$"js DONE\t# Zero length\n\t"
11605 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
11606 $$emit$$"dec rcx\n\t"
11607 $$emit$$"jge LOOP\n\t"
11608 $$emit$$"jmp DONE\n\t"
11609 $$emit$$"# LARGE:\n\t"
11610 if (UseFastStosb) {
11611 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
11612 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
11613 } else if (UseXMMForObjInit) {
11614 $$emit$$"mov rdi,rax\n\t"
11615 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
11616 $$emit$$"jmpq L_zero_64_bytes\n\t"
11617 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11625 $$emit$$"jl L_tail\n\t"
11626 $$emit$$"vmovdqu ymm0,(rax)\n\t"
11627 $$emit$$"add 0x20,rax\n\t"
11628 $$emit$$"sub 0x4,rcx\n\t"
11629 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11630 $$emit$$"add 0x4,rcx\n\t"
11631 $$emit$$"jle L_end\n\t"
11632 $$emit$$"dec rcx\n\t"
11633 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11634 $$emit$$"vmovq xmm0,(rax)\n\t"
11635 $$emit$$"add 0x8,rax\n\t"
11636 $$emit$$"dec rcx\n\t"
11637 $$emit$$"jge L_sloop\n\t"
11638 $$emit$$"# L_end:\n\t"
11639 } else {
11640 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
11641 }
11642 $$emit$$"# DONE"
11643 %}
11644 ins_encode %{
11645 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11646 $tmp$$XMMRegister, false, $ktmp$$KRegister);
11647 %}
11648 ins_pipe(pipe_slow);
11649 %}
11650
11651 // Large ClearArray non-AVX512.
11652 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
11653 Universe dummy, rFlagsReg cr)
11654 %{
11655 predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
11656 match(Set dummy (ClearArray cnt base));
11657 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11658
11659 format %{ $$template
11660 if (UseFastStosb) {
11661 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
11662 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
11663 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
11664 } else if (UseXMMForObjInit) {
11665 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
11666 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
11667 $$emit$$"jmpq L_zero_64_bytes\n\t"
11668 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11669 $$emit$$"vmovdqu ymm0,(rax)\n\t"
11670 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
11671 $$emit$$"add 0x40,rax\n\t"
11672 $$emit$$"# L_zero_64_bytes:\n\t"
11673 $$emit$$"sub 0x8,rcx\n\t"
11674 $$emit$$"jge L_loop\n\t"
11675 $$emit$$"add 0x4,rcx\n\t"
11676 $$emit$$"jl L_tail\n\t"
11677 $$emit$$"vmovdqu ymm0,(rax)\n\t"
11678 $$emit$$"add 0x20,rax\n\t"
11679 $$emit$$"sub 0x4,rcx\n\t"
11680 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11681 $$emit$$"add 0x4,rcx\n\t"
11682 $$emit$$"jle L_end\n\t"
11683 $$emit$$"dec rcx\n\t"
11684 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11685 $$emit$$"vmovq xmm0,(rax)\n\t"
11686 $$emit$$"add 0x8,rax\n\t"
11687 $$emit$$"dec rcx\n\t"
11688 $$emit$$"jge L_sloop\n\t"
11689 $$emit$$"# L_end:\n\t"
11690 } else {
11691 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
11692 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
11693 }
11694 %}
11695 ins_encode %{
11696 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11697 $tmp$$XMMRegister, true, knoreg);
11698 %}
11699 ins_pipe(pipe_slow);
11700 %}
11701
11702 // Large ClearArray AVX512.
11703 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
11704 Universe dummy, rFlagsReg cr)
11705 %{
11706 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
11707 match(Set dummy (ClearArray cnt base));
11708 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11709
11710 format %{ $$template
11711 if (UseFastStosb) {
11712 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
11713 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
11714 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
11715 } else if (UseXMMForObjInit) {
11716 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
11717 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
11718 $$emit$$"jmpq L_zero_64_bytes\n\t"
11719 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11720 $$emit$$"vmovdqu ymm0,(rax)\n\t"
11721 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
11722 $$emit$$"add 0x40,rax\n\t"
11723 $$emit$$"# L_zero_64_bytes:\n\t"
11724 $$emit$$"sub 0x8,rcx\n\t"
11725 $$emit$$"jge L_loop\n\t"
11726 $$emit$$"add 0x4,rcx\n\t"
11727 $$emit$$"jl L_tail\n\t"
11728 $$emit$$"vmovdqu ymm0,(rax)\n\t"
11729 $$emit$$"add 0x20,rax\n\t"
11730 $$emit$$"sub 0x4,rcx\n\t"
11731 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11732 $$emit$$"add 0x4,rcx\n\t"
11733 $$emit$$"jle L_end\n\t"
11734 $$emit$$"dec rcx\n\t"
11735 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11736 $$emit$$"vmovq xmm0,(rax)\n\t"
11737 $$emit$$"add 0x8,rax\n\t"
11738 $$emit$$"dec rcx\n\t"
11739 $$emit$$"jge L_sloop\n\t"
11740 $$emit$$"# L_end:\n\t"
11741 } else {
11742 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
11743 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
11744 }
11745 %}
11746 ins_encode %{
11747 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11748 $tmp$$XMMRegister, true, $ktmp$$KRegister);
11749 %}
11750 ins_pipe(pipe_slow);
11751 %}
11752
11753 // Small ClearArray AVX512 constant length.
11754 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
11755 %{
11756 predicate(!((ClearArrayNode*)n)->is_large() &&
11757 ((UseAVX > 2) && VM_Version::supports_avx512vlbw()));
11758 match(Set dummy (ClearArray cnt base));
11759 ins_cost(100);
11760 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
11761 format %{ "clear_mem_imm $base , $cnt \n\t" %}
11762 ins_encode %{
11763 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
11764 %}
11765 ins_pipe(pipe_slow);
11766 %}
11767
11768 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11769 rax_RegI result, legRegD tmp1, rFlagsReg cr)
11770 %{
11771 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11772 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11773 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11774
11775 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
11776 ins_encode %{
11777 __ string_compare($str1$$Register, $str2$$Register,
11778 $cnt1$$Register, $cnt2$$Register, $result$$Register,
11779 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
11780 %}
11781 ins_pipe( pipe_slow );
11782 %}
11783
12674 ins_pipe(ialu_cr_reg_mem);
12675 %}
12676
12677 // This will generate a signed flags result. This should be OK since
12678 // any compare to a zero should be eq/neq.
12679 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
12680 %{
12681 match(Set cr (CmpP src zero));
12682
12683 format %{ "testq $src, $src\t# ptr" %}
12684 ins_encode %{
12685 __ testq($src$$Register, $src$$Register);
12686 %}
12687 ins_pipe(ialu_cr_reg_imm);
12688 %}
12689
12690 // This will generate a signed flags result. This should be OK since
12691 // any compare to a zero should be eq/neq.
12692 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
12693 %{
12694 predicate((!UseCompressedOops || (CompressedOops::base() != NULL)) &&
12695 n->in(1)->as_Load()->barrier_data() == 0);
12696 match(Set cr (CmpP (LoadP op) zero));
12697
12698 ins_cost(500); // XXX
12699 format %{ "testq $op, 0xffffffffffffffff\t# ptr" %}
12700 ins_encode %{
12701 __ testq($op$$Address, 0xFFFFFFFF);
12702 %}
12703 ins_pipe(ialu_cr_reg_imm);
12704 %}
12705
12706 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
12707 %{
12708 predicate(UseCompressedOops && (CompressedOops::base() == NULL) &&
12709 n->in(1)->as_Load()->barrier_data() == 0);
12710 match(Set cr (CmpP (LoadP mem) zero));
12711
12712 format %{ "cmpq R12, $mem\t# ptr (R12_heapbase==0)" %}
12713 ins_encode %{
12714 __ cmpq(r12, $mem$$Address);
12715 %}
12716 ins_pipe(ialu_cr_reg_mem);
12717 %}
12718
12719 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
12720 %{
12721 match(Set cr (CmpN op1 op2));
12722
12723 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
12724 ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
12725 ins_pipe(ialu_cr_reg_reg);
12726 %}
12727
12728 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
12771 %{
12772 match(Set cr (CmpN src (LoadNKlass mem)));
12773
12774 format %{ "cmpl $mem, $src\t# compressed klass ptr" %}
12775 ins_encode %{
12776 __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
12777 %}
12778 ins_pipe(ialu_cr_reg_mem);
12779 %}
12780
12781 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
12782 match(Set cr (CmpN src zero));
12783
12784 format %{ "testl $src, $src\t# compressed ptr" %}
12785 ins_encode %{ __ testl($src$$Register, $src$$Register); %}
12786 ins_pipe(ialu_cr_reg_imm);
12787 %}
12788
12789 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
12790 %{
12791 predicate(CompressedOops::base() != NULL);
12792 match(Set cr (CmpN (LoadN mem) zero));
12793
12794 ins_cost(500); // XXX
12795 format %{ "testl $mem, 0xffffffff\t# compressed ptr" %}
12796 ins_encode %{
12797 __ cmpl($mem$$Address, (int)0xFFFFFFFF);
12798 %}
12799 ins_pipe(ialu_cr_reg_mem);
12800 %}
12801
12802 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
12803 %{
12804 predicate(CompressedOops::base() == NULL);
12805 match(Set cr (CmpN (LoadN mem) zero));
12806
12807 format %{ "cmpl R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
12808 ins_encode %{
12809 __ cmpl(r12, $mem$$Address);
12810 %}
12811 ins_pipe(ialu_cr_reg_mem);
12812 %}
12813
12814 // Yanked all unsigned pointer compare operations.
12815 // Pointer compares are done with CmpP which is already unsigned.
12816
12817 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
12818 %{
12819 match(Set cr (CmpL op1 op2));
12820
12821 format %{ "cmpq $op1, $op2" %}
12822 ins_encode %{
12823 __ cmpq($op1$$Register, $op2$$Register);
12824 %}
13535
13536 ins_cost(300);
13537 format %{ "call_leaf,runtime " %}
13538 ins_encode(clear_avx, Java_To_Runtime(meth));
13539 ins_pipe(pipe_slow);
13540 %}
13541
13542 // Call runtime without safepoint and with vector arguments
13543 instruct CallLeafDirectVector(method meth)
13544 %{
13545 match(CallLeafVector);
13546 effect(USE meth);
13547
13548 ins_cost(300);
13549 format %{ "call_leaf,vector " %}
13550 ins_encode(Java_To_Runtime(meth));
13551 ins_pipe(pipe_slow);
13552 %}
13553
13554 // Call runtime without safepoint
13555 instruct CallLeafNoFPDirect(method meth)
13556 %{
13557 match(CallLeafNoFP);
13558 effect(USE meth);
13559
13560 ins_cost(300);
13561 format %{ "call_leaf_nofp,runtime " %}
13562 ins_encode(clear_avx, Java_To_Runtime(meth));
13563 ins_pipe(pipe_slow);
13564 %}
13565
13566 // Return Instruction
13567 // Remove the return address & jump to it.
13568 // Notice: We always emit a nop after a ret to make sure there is room
13569 // for safepoint patching
13570 instruct Ret()
13571 %{
13572 match(Return);
13573
13574 format %{ "ret" %}
13575 ins_encode %{
13576 __ ret(0);
|
473 }
474
475 // !!!!! Special hack to get all types of calls to specify the byte offset
476 // from the start of the call to the point where the return address
477 // will point.
478 int MachCallStaticJavaNode::ret_addr_offset()
479 {
480 int offset = 5; // 5 bytes from start of call to where return address points
481 offset += clear_avx_size();
482 return offset;
483 }
484
485 int MachCallDynamicJavaNode::ret_addr_offset()
486 {
487 int offset = 15; // 15 bytes from start of call to where return address points
488 offset += clear_avx_size();
489 return offset;
490 }
491
492 int MachCallRuntimeNode::ret_addr_offset() {
493 if (_entry_point == nullptr) {
494 // CallLeafNoFPInDirect
495 return 3; // callq (register)
496 }
497 int offset = 13; // movq r10,#addr; callq (r10)
498 if (this->ideal_Opcode() != Op_CallLeafVector) {
499 offset += clear_avx_size();
500 }
501 return offset;
502 }
503
504 //
505 // Compute padding required for nodes which need alignment
506 //
507
508 // The address of the call instruction needs to be 4-byte aligned to
509 // ensure that it does not span a cache line so that it can be patched.
510 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
511 {
512 current_offset += clear_avx_size(); // skip vzeroupper
513 current_offset += 1; // skip call opcode byte
514 return align_up(current_offset, alignment_required()) - current_offset;
515 }
516
517 // The address of the call instruction needs to be 4-byte aligned to
518 // ensure that it does not span a cache line so that it can be patched.
519 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
520 {
521 current_offset += clear_avx_size(); // skip vzeroupper
522 current_offset += 11; // skip movq instruction + call opcode byte
523 return align_up(current_offset, alignment_required()) - current_offset;
875 st->print("movq [rsp + #%d], rbp\t# Save rbp",framesize);
876 if (PreserveFramePointer) {
877 st->print("\n\t");
878 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
879 if (framesize > 0) {
880 st->print("\n\t");
881 st->print("addq rbp, #%d", framesize);
882 }
883 }
884 }
885
886 if (VerifyStackAtCalls) {
887 st->print("\n\t");
888 framesize -= wordSize;
889 st->print("movq [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
890 #ifdef ASSERT
891 st->print("\n\t");
892 st->print("# stack alignment check");
893 #endif
894 }
895 if (C->stub_function() != nullptr && BarrierSet::barrier_set()->barrier_set_nmethod() != nullptr) {
896 st->print("\n\t");
897 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
898 st->print("\n\t");
899 st->print("je fast_entry\t");
900 st->print("\n\t");
901 st->print("call #nmethod_entry_barrier_stub\t");
902 st->print("\n\tfast_entry:");
903 }
904 st->cr();
905 }
906 #endif
907
908 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
909 Compile* C = ra_->C;
910 C2_MacroAssembler _masm(&cbuf);
911
912 __ verified_entry(C);
913
914 if (ra_->C->stub_function() == nullptr) {
915 __ entry_barrier();
916 }
917
918 if (!Compile::current()->output()->in_scratch_emit_size()) {
919 __ bind(*_verified_entry);
920 }
921
922 C->output()->set_frame_complete(cbuf.insts_size());
923
924 if (C->has_mach_constant_base_node()) {
925 // NOTE: We set the table base offset here because users might be
926 // emitted before MachConstantBaseNode.
927 ConstantTable& constant_table = C->output()->constant_table();
928 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
929 }
930 }
931
932 int MachPrologNode::reloc() const
933 {
934 return 0; // a large enough number
935 }
936
937 //=============================================================================
938 #ifndef PRODUCT
939 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
940 {
941 Compile* C = ra_->C;
942 if (generate_vzeroupper(C)) {
943 st->print("vzeroupper");
944 st->cr(); st->print("\t");
945 }
946
947 int framesize = C->output()->frame_size_in_bytes();
948 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
949 // Remove word for return adr already pushed
950 // and RBP
951 framesize -= 2*wordSize;
959 if (do_polling() && C->is_method_compilation()) {
960 st->print("\t");
961 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
962 "ja #safepoint_stub\t"
963 "# Safepoint: poll for GC");
964 }
965 }
966 #endif
967
968 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
969 {
970 Compile* C = ra_->C;
971 MacroAssembler _masm(&cbuf);
972
973 if (generate_vzeroupper(C)) {
974 // Clear upper bits of YMM registers when current compiled code uses
975 // wide vectors to avoid AVX <-> SSE transition penalty during call.
976 __ vzeroupper();
977 }
978
979 // Subtract two words to account for return address and rbp
980 int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
981 __ remove_frame(initial_framesize, C->needs_stack_repair());
982
983 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
984 __ reserved_stack_check();
985 }
986
987 if (do_polling() && C->is_method_compilation()) {
988 MacroAssembler _masm(&cbuf);
989 Label dummy_label;
990 Label* code_stub = &dummy_label;
991 if (!C->output()->in_scratch_emit_size()) {
992 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
993 C->output()->add_stub(stub);
994 code_stub = &stub->entry();
995 }
996 __ relocate(relocInfo::poll_return_type);
997 __ safepoint_poll(*code_stub, r15_thread, true /* at_return */, true /* in_nmethod */);
998 }
999 }
1000
1001 int MachEpilogNode::reloc() const
1002 {
1003 return 2; // a large enough number
1004 }
1005
1006 const Pipeline* MachEpilogNode::pipeline() const
1007 {
1008 return MachNode::pipeline_class();
1009 }
1010
1011 //=============================================================================
1012
1013 enum RC {
1014 rc_bad,
1015 rc_int,
1016 rc_kreg,
1017 rc_float,
1018 rc_stack
1019 };
1020
1107 src_offset, dst_offset);
1108 break;
1109 case Op_VecZ:
1110 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
1111 "vmovdqu xmm0, [rsp + #%d]\n\t"
1112 "vmovdqu [rsp + #%d], xmm0\n\t"
1113 "vmovdqu xmm0, [rsp - #64]",
1114 src_offset, dst_offset);
1115 break;
1116 default:
1117 ShouldNotReachHere();
1118 }
1119 #endif
1120 }
1121 }
1122
1123 uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
1124 PhaseRegAlloc* ra_,
1125 bool do_size,
1126 outputStream* st) const {
1127 assert(cbuf != nullptr || st != nullptr, "sanity");
1128 // Get registers to move
1129 OptoReg::Name src_second = ra_->get_reg_second(in(1));
1130 OptoReg::Name src_first = ra_->get_reg_first(in(1));
1131 OptoReg::Name dst_second = ra_->get_reg_second(this);
1132 OptoReg::Name dst_first = ra_->get_reg_first(this);
1133
1134 enum RC src_second_rc = rc_class(src_second);
1135 enum RC src_first_rc = rc_class(src_first);
1136 enum RC dst_second_rc = rc_class(dst_second);
1137 enum RC dst_first_rc = rc_class(dst_first);
1138
1139 assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
1140 "must move at least 1 register" );
1141
1142 if (src_first == dst_first && src_second == dst_second) {
1143 // Self copy, no move
1144 return 0;
1145 }
1146 if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
1147 uint ireg = ideal_reg();
1148 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1149 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
1150 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1151 // mem -> mem
1152 int src_offset = ra_->reg2offset(src_first);
1153 int dst_offset = ra_->reg2offset(dst_first);
1154 vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
1155 } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
1156 vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st);
1157 } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1158 int stack_offset = ra_->reg2offset(dst_first);
1159 vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st);
1160 } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
1161 int stack_offset = ra_->reg2offset(src_first);
1162 vec_spill_helper(cbuf, true, stack_offset, dst_first, ireg, st);
1163 } else {
1164 ShouldNotReachHere();
1165 }
1166 return 0;
1565 st->print("kmovq %s, %s\t# spill",
1566 Matcher::regName[dst_first],
1567 Matcher::regName[src_first]);
1568 #endif
1569 }
1570 }
1571 return 0;
1572 } else if (dst_first_rc == rc_float) {
1573 assert(false, "Illegal spill");
1574 return 0;
1575 }
1576 }
1577
1578 assert(0," foo ");
1579 Unimplemented();
1580 return 0;
1581 }
1582
1583 #ifndef PRODUCT
1584 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1585 implementation(nullptr, ra_, false, st);
1586 }
1587 #endif
1588
1589 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1590 implementation(&cbuf, ra_, false, nullptr);
1591 }
1592
1593 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1594 return MachNode::size(ra_);
1595 }
1596
1597 //=============================================================================
1598 #ifndef PRODUCT
1599 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1600 {
1601 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1602 int reg = ra_->get_reg_first(this);
1603 st->print("leaq %s, [rsp + #%d]\t# box lock",
1604 Matcher::regName[reg], offset);
1605 }
1606 #endif
1607
1608 void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1609 {
1610 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1613 emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1614 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset]
1615 emit_rm(cbuf, 0x2, reg & 7, 0x04);
1616 emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1617 emit_d32(cbuf, offset);
1618 } else {
1619 emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1620 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset]
1621 emit_rm(cbuf, 0x1, reg & 7, 0x04);
1622 emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1623 emit_d8(cbuf, offset);
1624 }
1625 }
1626
1627 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1628 {
1629 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1630 return (offset < 0x80) ? 5 : 8; // REX
1631 }
1632
1633 //=============================================================================
1634 #ifndef PRODUCT
1635 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1636 {
1637 st->print_cr("MachVEPNode");
1638 }
1639 #endif
1640
1641 void MachVEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1642 {
1643 C2_MacroAssembler _masm(&cbuf);
1644 uint insts_size = cbuf.insts_size();
1645 if (!_verified) {
1646 if (UseCompressedClassPointers) {
1647 __ load_klass(rscratch1, j_rarg0, rscratch2);
1648 __ cmpptr(rax, rscratch1);
1649 } else {
1650 __ cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
1651 }
1652 __ jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1653 } else {
1654 // TODO 8284443 Avoid creation of temporary frame
1655 if (ra_->C->stub_function() == nullptr) {
1656 __ verified_entry(ra_->C, 0);
1657 __ entry_barrier();
1658 int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
1659 __ remove_frame(initial_framesize, false);
1660 }
1661 // Unpack inline type args passed as oop and then jump to
1662 // the verified entry point (skipping the unverified entry).
1663 int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
1664 // Emit code for verified entry and save increment for stack repair on return
1665 __ verified_entry(ra_->C, sp_inc);
1666 if (Compile::current()->output()->in_scratch_emit_size()) {
1667 Label dummy_verified_entry;
1668 __ jmp(dummy_verified_entry);
1669 } else {
1670 __ jmp(*_verified_entry);
1671 }
1672 }
1673 /* WARNING these NOPs are critical so that verified entry point is properly
1674 4 bytes aligned for patching by NativeJump::patch_verified_entry() */
1675 int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
1676 nops_cnt &= 0x3; // Do not add nops if code is aligned.
1677 if (nops_cnt > 0) {
1678 __ nop(nops_cnt);
1679 }
1680 }
1681
1682 //=============================================================================
1683 #ifndef PRODUCT
1684 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1685 {
1686 if (UseCompressedClassPointers) {
1687 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1688 st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
1689 st->print_cr("\tcmpq rax, rscratch1\t # Inline cache check");
1690 } else {
1691 st->print_cr("\tcmpq rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
1692 "# Inline cache check");
1693 }
1694 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
1695 st->print_cr("\tnop\t# nops to align entry point");
1696 }
1697 #endif
1698
1699 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1700 {
1701 MacroAssembler masm(&cbuf);
1704 masm.load_klass(rscratch1, j_rarg0, rscratch2);
1705 masm.cmpptr(rax, rscratch1);
1706 } else {
1707 masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
1708 }
1709
1710 masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1711
1712 /* WARNING these NOPs are critical so that verified entry point is properly
1713 4 bytes aligned for patching by NativeJump::patch_verified_entry() */
1714 int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
1715 if (OptoBreakpoint) {
1716 // Leave space for int3
1717 nops_cnt -= 1;
1718 }
1719 nops_cnt &= 0x3; // Do not add nops if code is aligned.
1720 if (nops_cnt > 0)
1721 masm.nop(nops_cnt);
1722 }
1723
1724 //=============================================================================
1725
1726 bool Matcher::supports_vector_calling_convention(void) {
1727 if (EnableVectorSupport && UseVectorStubs) {
1728 return true;
1729 }
1730 return false;
1731 }
1732
1733 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
1734 assert(EnableVectorSupport && UseVectorStubs, "sanity");
1735 int lo = XMM0_num;
1736 int hi = XMM0b_num;
1737 if (ideal_reg == Op_VecX) hi = XMM0d_num;
1738 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
1739 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
1740 return OptoRegPair(hi, lo);
1741 }
1742
1743 // Is this branch offset short enough that a short branch can be used?
2118 %}
2119
2120 enc_class enc_cmov(cmpOp cop)
2121 %{
2122 // CMOV
2123 $$$emit8$primary;
2124 emit_cc(cbuf, $secondary, $cop$$cmpcode);
2125 %}
2126
2127 enc_class enc_PartialSubtypeCheck()
2128 %{
2129 Register Rrdi = as_Register(RDI_enc); // result register
2130 Register Rrax = as_Register(RAX_enc); // super class
2131 Register Rrcx = as_Register(RCX_enc); // killed
2132 Register Rrsi = as_Register(RSI_enc); // sub class
2133 Label miss;
2134 const bool set_cond_codes = true;
2135
2136 MacroAssembler _masm(&cbuf);
2137 __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
2138 nullptr, &miss,
2139 /*set_cond_codes:*/ true);
2140 if ($primary) {
2141 __ xorptr(Rrdi, Rrdi);
2142 }
2143 __ bind(miss);
2144 %}
2145
2146 enc_class clear_avx %{
2147 debug_only(int off0 = cbuf.insts_size());
2148 if (generate_vzeroupper(Compile::current())) {
2149 // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
2150 // Clear upper bits of YMM registers when current compiled code uses
2151 // wide vectors to avoid AVX <-> SSE transition penalty during call.
2152 MacroAssembler _masm(&cbuf);
2153 __ vzeroupper();
2154 }
2155 debug_only(int off1 = cbuf.insts_size());
2156 assert(off1 - off0 == clear_avx_size(), "correct size prediction");
2157 %}
2158
2180 } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
2181 // The NOP here is purely to ensure that eliding a call to
2182 // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
2183 __ addr_nop_5();
2184 __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
2185 } else {
2186 $$$emit8$primary;
2187 int method_index = resolved_method_index(cbuf);
2188 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
2189 : static_call_Relocation::spec(method_index);
2190 emit_d32_reloc(cbuf, (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2191 rspec, RELOC_DISP32);
2192 address mark = cbuf.insts_mark();
2193 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
2194 // Calls of the same statically bound method can share
2195 // a stub to the interpreter.
2196 cbuf.shared_stub_to_interp_for(_method, cbuf.insts()->mark_off());
2197 } else {
2198 // Emit stubs for static call.
2199 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark);
2200 if (stub == nullptr) {
2201 ciEnv::current()->record_failure("CodeCache is full");
2202 return;
2203 }
2204 }
2205 }
2206 _masm.clear_inst_mark();
2207 __ post_call_nop();
2208 %}
2209
2210 enc_class Java_Dynamic_Call(method meth) %{
2211 MacroAssembler _masm(&cbuf);
2212 __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
2213 __ post_call_nop();
2214 %}
2215
2216 enc_class reg_opc_imm(rRegI dst, immI8 shift)
2217 %{
2218 // SAL, SAR, SHR
2219 int dstenc = $dst$$reg;
2220 if (dstenc >= 8) {
3042 operand immI_64()
3043 %{
3044 predicate( n->get_int() == 64 );
3045 match(ConI);
3046
3047 op_cost(0);
3048 format %{ %}
3049 interface(CONST_INTER);
3050 %}
3051
3052 // Pointer Immediate
3053 operand immP()
3054 %{
3055 match(ConP);
3056
3057 op_cost(10);
3058 format %{ %}
3059 interface(CONST_INTER);
3060 %}
3061
3062 // nullptr Pointer Immediate
3063 operand immP0()
3064 %{
3065 predicate(n->get_ptr() == 0);
3066 match(ConP);
3067
3068 op_cost(5);
3069 format %{ %}
3070 interface(CONST_INTER);
3071 %}
3072
3073 // Pointer Immediate
3074 operand immN() %{
3075 match(ConN);
3076
3077 op_cost(10);
3078 format %{ %}
3079 interface(CONST_INTER);
3080 %}
3081
3082 operand immNKlass() %{
3083 match(ConNKlass);
3084
3085 op_cost(10);
3086 format %{ %}
3087 interface(CONST_INTER);
3088 %}
3089
3090 // nullptr Pointer Immediate
3091 operand immN0() %{
3092 predicate(n->get_narrowcon() == 0);
3093 match(ConN);
3094
3095 op_cost(5);
3096 format %{ %}
3097 interface(CONST_INTER);
3098 %}
3099
3100 operand immP31()
3101 %{
3102 predicate(n->as_Type()->type()->reloc() == relocInfo::none
3103 && (n->get_ptr() >> 31) == 0);
3104 match(ConP);
3105
3106 op_cost(5);
3107 format %{ %}
3108 interface(CONST_INTER);
3109 %}
3110
3982 %}
3983 %}
3984
3985 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
3986 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
3987 %{
3988 constraint(ALLOC_IN_RC(ptr_reg));
3989 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
3990 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
3991
3992 op_cost(10);
3993 format %{"[$reg + $off + $idx << $scale]" %}
3994 interface(MEMORY_INTER) %{
3995 base($reg);
3996 index($idx);
3997 scale($scale);
3998 disp($off);
3999 %}
4000 %}
4001
4002 // Indirect Narrow Oop Operand
4003 operand indCompressedOop(rRegN reg) %{
4004 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
4005 constraint(ALLOC_IN_RC(ptr_reg));
4006 match(DecodeN reg);
4007
4008 op_cost(10);
4009 format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
4010 interface(MEMORY_INTER) %{
4011 base(0xc); // R12
4012 index($reg);
4013 scale(0x3);
4014 disp(0x0);
4015 %}
4016 %}
4017
4018 // Indirect Narrow Oop Plus Offset Operand
4019 // Note: x86 architecture doesn't support "scale * index + offset" without a base
4020 // we can't free r12 even with CompressedOops::base() == nullptr.
4021 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
4022 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
4023 constraint(ALLOC_IN_RC(ptr_reg));
4024 match(AddP (DecodeN reg) off);
4025
4026 op_cost(10);
4027 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
4028 interface(MEMORY_INTER) %{
4029 base(0xc); // R12
4030 index($reg);
4031 scale(0x3);
4032 disp($off);
4033 %}
4034 %}
4035
4036 // Indirect Memory Operand
4037 operand indirectNarrow(rRegN reg)
4038 %{
4039 predicate(CompressedOops::shift() == 0);
4040 constraint(ALLOC_IN_RC(ptr_reg));
4344 equal(0x4, "e");
4345 not_equal(0x5, "ne");
4346 less(0x2, "b");
4347 greater_equal(0x3, "ae");
4348 less_equal(0x6, "be");
4349 greater(0x7, "a");
4350 overflow(0x0, "o");
4351 no_overflow(0x1, "no");
4352 %}
4353 %}
4354
4355 //----------OPERAND CLASSES----------------------------------------------------
4356 // Operand Classes are groups of operands that are used as to simplify
4357 // instruction definitions by not requiring the AD writer to specify separate
4358 // instructions for every form of operand when the instruction accepts
4359 // multiple operand types with the same basic encoding and format. The classic
4360 // case of this is memory operands.
4361
4362 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
4363 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
4364 indCompressedOop, indCompressedOopOffset,
4365 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
4366 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
4367 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
4368
4369 //----------PIPELINE-----------------------------------------------------------
4370 // Rules which define the behavior of the target architectures pipeline.
4371 pipeline %{
4372
4373 //----------ATTRIBUTES---------------------------------------------------------
4374 attributes %{
4375 variable_size_instructions; // Fixed size instructions
4376 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
4377 instruction_unit_size = 1; // An instruction is 1 bytes long
4378 instruction_fetch_unit_size = 16; // The processor fetches one line
4379 instruction_fetch_units = 1; // of 16 bytes
4380
4381 // List of nop instructions
4382 nops( MachNop );
4383 %}
4384
5844 format %{ "movl $dst, $src\t# ptr (positive 32-bit)" %}
5845 ins_encode %{
5846 __ movl($dst$$Register, $src$$constant);
5847 %}
5848 ins_pipe(ialu_reg);
5849 %}
5850
5851 instruct loadConF(regF dst, immF con) %{
5852 match(Set dst con);
5853 ins_cost(125);
5854 format %{ "movss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
5855 ins_encode %{
5856 __ movflt($dst$$XMMRegister, $constantaddress($con));
5857 %}
5858 ins_pipe(pipe_slow);
5859 %}
5860
5861 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
5862 match(Set dst src);
5863 effect(KILL cr);
5864 format %{ "xorq $dst, $src\t# compressed nullptr ptr" %}
5865 ins_encode %{
5866 __ xorq($dst$$Register, $dst$$Register);
5867 %}
5868 ins_pipe(ialu_reg);
5869 %}
5870
5871 instruct loadConN(rRegN dst, immN src) %{
5872 match(Set dst src);
5873
5874 ins_cost(125);
5875 format %{ "movl $dst, $src\t# compressed ptr" %}
5876 ins_encode %{
5877 address con = (address)$src$$constant;
5878 if (con == nullptr) {
5879 ShouldNotReachHere();
5880 } else {
5881 __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
5882 }
5883 %}
5884 ins_pipe(ialu_reg_fat); // XXX
5885 %}
5886
5887 instruct loadConNKlass(rRegN dst, immNKlass src) %{
5888 match(Set dst src);
5889
5890 ins_cost(125);
5891 format %{ "movl $dst, $src\t# compressed klass ptr" %}
5892 ins_encode %{
5893 address con = (address)$src$$constant;
5894 if (con == nullptr) {
5895 ShouldNotReachHere();
5896 } else {
5897 __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
5898 }
5899 %}
5900 ins_pipe(ialu_reg_fat); // XXX
5901 %}
5902
5903 instruct loadConF0(regF dst, immF0 src)
5904 %{
5905 match(Set dst src);
5906 ins_cost(100);
5907
5908 format %{ "xorps $dst, $dst\t# float 0.0" %}
5909 ins_encode %{
5910 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
5911 %}
5912 ins_pipe(pipe_slow);
5913 %}
5914
6097 %}
6098 ins_pipe(ialu_mem_reg); // XXX
6099 %}
6100
6101 // Store Pointer
6102 instruct storeP(memory mem, any_RegP src)
6103 %{
6104 predicate(n->as_Store()->barrier_data() == 0);
6105 match(Set mem (StoreP mem src));
6106
6107 ins_cost(125); // XXX
6108 format %{ "movq $mem, $src\t# ptr" %}
6109 ins_encode %{
6110 __ movq($mem$$Address, $src$$Register);
6111 %}
6112 ins_pipe(ialu_mem_reg);
6113 %}
6114
6115 instruct storeImmP0(memory mem, immP0 zero)
6116 %{
6117 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
6118 match(Set mem (StoreP mem zero));
6119
6120 ins_cost(125); // XXX
6121 format %{ "movq $mem, R12\t# ptr (R12_heapbase==0)" %}
6122 ins_encode %{
6123 __ movq($mem$$Address, r12);
6124 %}
6125 ins_pipe(ialu_mem_reg);
6126 %}
6127
6128 // Store nullptr Pointer, mark word, or other simple pointer constant.
6129 instruct storeImmP(memory mem, immP31 src)
6130 %{
6131 predicate(n->as_Store()->barrier_data() == 0);
6132 match(Set mem (StoreP mem src));
6133
6134 ins_cost(150); // XXX
6135 format %{ "movq $mem, $src\t# ptr" %}
6136 ins_encode %{
6137 __ movq($mem$$Address, $src$$constant);
6138 %}
6139 ins_pipe(ialu_mem_imm);
6140 %}
6141
6142 // Store Compressed Pointer
6143 instruct storeN(memory mem, rRegN src)
6144 %{
6145 match(Set mem (StoreN mem src));
6146
6147 ins_cost(125); // XXX
6148 format %{ "movl $mem, $src\t# compressed ptr" %}
6149 ins_encode %{
6150 __ movl($mem$$Address, $src$$Register);
6151 %}
6152 ins_pipe(ialu_mem_reg);
6153 %}
6154
6155 instruct storeNKlass(memory mem, rRegN src)
6156 %{
6157 match(Set mem (StoreNKlass mem src));
6158
6159 ins_cost(125); // XXX
6160 format %{ "movl $mem, $src\t# compressed klass ptr" %}
6161 ins_encode %{
6162 __ movl($mem$$Address, $src$$Register);
6163 %}
6164 ins_pipe(ialu_mem_reg);
6165 %}
6166
6167 instruct storeImmN0(memory mem, immN0 zero)
6168 %{
6169 predicate(CompressedOops::base() == nullptr);
6170 match(Set mem (StoreN mem zero));
6171
6172 ins_cost(125); // XXX
6173 format %{ "movl $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
6174 ins_encode %{
6175 __ movl($mem$$Address, r12);
6176 %}
6177 ins_pipe(ialu_mem_reg);
6178 %}
6179
6180 instruct storeImmN(memory mem, immN src)
6181 %{
6182 match(Set mem (StoreN mem src));
6183
6184 ins_cost(150); // XXX
6185 format %{ "movl $mem, $src\t# compressed ptr" %}
6186 ins_encode %{
6187 address con = (address)$src$$constant;
6188 if (con == nullptr) {
6189 __ movl($mem$$Address, 0);
6190 } else {
6191 __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
6192 }
6193 %}
6194 ins_pipe(ialu_mem_imm);
6195 %}
6196
6197 instruct storeImmNKlass(memory mem, immNKlass src)
6198 %{
6199 match(Set mem (StoreNKlass mem src));
6200
6201 ins_cost(150); // XXX
6202 format %{ "movl $mem, $src\t# compressed klass ptr" %}
6203 ins_encode %{
6204 __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
6205 %}
6206 ins_pipe(ialu_mem_imm);
6207 %}
6208
6209 // Store Integer Immediate
6210 instruct storeImmI0(memory mem, immI_0 zero)
6211 %{
6212 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
6213 match(Set mem (StoreI mem zero));
6214
6215 ins_cost(125); // XXX
6216 format %{ "movl $mem, R12\t# int (R12_heapbase==0)" %}
6217 ins_encode %{
6218 __ movl($mem$$Address, r12);
6219 %}
6220 ins_pipe(ialu_mem_reg);
6221 %}
6222
6223 instruct storeImmI(memory mem, immI src)
6224 %{
6225 match(Set mem (StoreI mem src));
6226
6227 ins_cost(150);
6228 format %{ "movl $mem, $src\t# int" %}
6229 ins_encode %{
6230 __ movl($mem$$Address, $src$$constant);
6231 %}
6232 ins_pipe(ialu_mem_imm);
6233 %}
6234
6235 // Store Long Immediate
6236 instruct storeImmL0(memory mem, immL0 zero)
6237 %{
6238 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
6239 match(Set mem (StoreL mem zero));
6240
6241 ins_cost(125); // XXX
6242 format %{ "movq $mem, R12\t# long (R12_heapbase==0)" %}
6243 ins_encode %{
6244 __ movq($mem$$Address, r12);
6245 %}
6246 ins_pipe(ialu_mem_reg);
6247 %}
6248
6249 instruct storeImmL(memory mem, immL32 src)
6250 %{
6251 match(Set mem (StoreL mem src));
6252
6253 ins_cost(150);
6254 format %{ "movq $mem, $src\t# long" %}
6255 ins_encode %{
6256 __ movq($mem$$Address, $src$$constant);
6257 %}
6258 ins_pipe(ialu_mem_imm);
6259 %}
6260
6261 // Store Short/Char Immediate
6262 instruct storeImmC0(memory mem, immI_0 zero)
6263 %{
6264 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
6265 match(Set mem (StoreC mem zero));
6266
6267 ins_cost(125); // XXX
6268 format %{ "movw $mem, R12\t# short/char (R12_heapbase==0)" %}
6269 ins_encode %{
6270 __ movw($mem$$Address, r12);
6271 %}
6272 ins_pipe(ialu_mem_reg);
6273 %}
6274
6275 instruct storeImmI16(memory mem, immI16 src)
6276 %{
6277 predicate(UseStoreImmI16);
6278 match(Set mem (StoreC mem src));
6279
6280 ins_cost(150);
6281 format %{ "movw $mem, $src\t# short/char" %}
6282 ins_encode %{
6283 __ movw($mem$$Address, $src$$constant);
6284 %}
6285 ins_pipe(ialu_mem_imm);
6286 %}
6287
6288 // Store Byte Immediate
6289 instruct storeImmB0(memory mem, immI_0 zero)
6290 %{
6291 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
6292 match(Set mem (StoreB mem zero));
6293
6294 ins_cost(125); // XXX
6295 format %{ "movb $mem, R12\t# short/char (R12_heapbase==0)" %}
6296 ins_encode %{
6297 __ movb($mem$$Address, r12);
6298 %}
6299 ins_pipe(ialu_mem_reg);
6300 %}
6301
6302 instruct storeImmB(memory mem, immI8 src)
6303 %{
6304 match(Set mem (StoreB mem src));
6305
6306 ins_cost(150); // XXX
6307 format %{ "movb $mem, $src\t# byte" %}
6308 ins_encode %{
6309 __ movb($mem$$Address, $src$$constant);
6310 %}
6311 ins_pipe(ialu_mem_imm);
6312 %}
6313
6314 // Store CMS card-mark Immediate
6315 instruct storeImmCM0_reg(memory mem, immI_0 zero)
6316 %{
6317 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
6318 match(Set mem (StoreCM mem zero));
6319
6320 ins_cost(125); // XXX
6321 format %{ "movb $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
6322 ins_encode %{
6323 __ movb($mem$$Address, r12);
6324 %}
6325 ins_pipe(ialu_mem_reg);
6326 %}
6327
6328 instruct storeImmCM0(memory mem, immI_0 src)
6329 %{
6330 match(Set mem (StoreCM mem src));
6331
6332 ins_cost(150); // XXX
6333 format %{ "movb $mem, $src\t# CMS card-mark byte 0" %}
6334 ins_encode %{
6335 __ movb($mem$$Address, $src$$constant);
6336 %}
6337 ins_pipe(ialu_mem_imm);
6338 %}
6339
6340 // Store Float
6341 instruct storeF(memory mem, regF src)
6342 %{
6343 match(Set mem (StoreF mem src));
6344
6345 ins_cost(95); // XXX
6346 format %{ "movss $mem, $src\t# float" %}
6347 ins_encode %{
6348 __ movflt($mem$$Address, $src$$XMMRegister);
6349 %}
6350 ins_pipe(pipe_slow); // XXX
6351 %}
6352
6353 // Store immediate Float value (it is faster than store from XMM register)
6354 instruct storeF0(memory mem, immF0 zero)
6355 %{
6356 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
6357 match(Set mem (StoreF mem zero));
6358
6359 ins_cost(25); // XXX
6360 format %{ "movl $mem, R12\t# float 0. (R12_heapbase==0)" %}
6361 ins_encode %{
6362 __ movl($mem$$Address, r12);
6363 %}
6364 ins_pipe(ialu_mem_reg);
6365 %}
6366
6367 instruct storeF_imm(memory mem, immF src)
6368 %{
6369 match(Set mem (StoreF mem src));
6370
6371 ins_cost(50);
6372 format %{ "movl $mem, $src\t# float" %}
6373 ins_encode %{
6374 __ movl($mem$$Address, jint_cast($src$$constant));
6375 %}
6376 ins_pipe(ialu_mem_imm);
6377 %}
6378
6379 // Store Double
6380 instruct storeD(memory mem, regD src)
6381 %{
6382 match(Set mem (StoreD mem src));
6383
6384 ins_cost(95); // XXX
6385 format %{ "movsd $mem, $src\t# double" %}
6386 ins_encode %{
6387 __ movdbl($mem$$Address, $src$$XMMRegister);
6388 %}
6389 ins_pipe(pipe_slow); // XXX
6390 %}
6391
6392 // Store immediate double 0.0 (it is faster than store from XMM register)
6393 instruct storeD0_imm(memory mem, immD0 src)
6394 %{
6395 predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
6396 match(Set mem (StoreD mem src));
6397
6398 ins_cost(50);
6399 format %{ "movq $mem, $src\t# double 0." %}
6400 ins_encode %{
6401 __ movq($mem$$Address, $src$$constant);
6402 %}
6403 ins_pipe(ialu_mem_imm);
6404 %}
6405
6406 instruct storeD0(memory mem, immD0 zero)
6407 %{
6408 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
6409 match(Set mem (StoreD mem zero));
6410
6411 ins_cost(25); // XXX
6412 format %{ "movq $mem, R12\t# double 0. (R12_heapbase==0)" %}
6413 ins_encode %{
6414 __ movq($mem$$Address, r12);
6415 %}
6416 ins_pipe(ialu_mem_reg);
6417 %}
6418
6419 instruct storeSSI(stackSlotI dst, rRegI src)
6420 %{
6421 match(Set dst src);
6422
6423 ins_cost(100);
6424 format %{ "movl $dst, $src\t# int stk" %}
6425 opcode(0x89);
6426 ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
6427 ins_pipe( ialu_mem_reg );
6428 %}
6932 format %{ "MEMBAR-storestore (empty encoding)" %}
6933 ins_encode( );
6934 ins_pipe(empty);
6935 %}
6936
6937 //----------Move Instructions--------------------------------------------------
6938
6939 instruct castX2P(rRegP dst, rRegL src)
6940 %{
6941 match(Set dst (CastX2P src));
6942
6943 format %{ "movq $dst, $src\t# long->ptr" %}
6944 ins_encode %{
6945 if ($dst$$reg != $src$$reg) {
6946 __ movptr($dst$$Register, $src$$Register);
6947 }
6948 %}
6949 ins_pipe(ialu_reg_reg); // XXX
6950 %}
6951
6952 instruct castN2X(rRegL dst, rRegN src)
6953 %{
6954 match(Set dst (CastP2X src));
6955
6956 format %{ "movq $dst, $src\t# ptr -> long" %}
6957 ins_encode %{
6958 if ($dst$$reg != $src$$reg) {
6959 __ movptr($dst$$Register, $src$$Register);
6960 }
6961 %}
6962 ins_pipe(ialu_reg_reg); // XXX
6963 %}
6964
6965 instruct castP2X(rRegL dst, rRegP src)
6966 %{
6967 match(Set dst (CastP2X src));
6968
6969 format %{ "movq $dst, $src\t# ptr -> long" %}
6970 ins_encode %{
6971 if ($dst$$reg != $src$$reg) {
6972 __ movptr($dst$$Register, $src$$Register);
6973 }
6974 %}
6975 ins_pipe(ialu_reg_reg); // XXX
6976 %}
6977
6978 // Convert oop into int for vectors alignment masking
6979 instruct convP2I(rRegI dst, rRegP src)
6980 %{
6981 match(Set dst (ConvL2I (CastP2X src)));
6982
6983 format %{ "movl $dst, $src\t# ptr -> int" %}
6984 ins_encode %{
11543 effect(DEF dst, USE src);
11544 ins_cost(100);
11545 format %{ "movd $dst,$src\t# MoveI2F" %}
11546 ins_encode %{
11547 __ movdl($dst$$XMMRegister, $src$$Register);
11548 %}
11549 ins_pipe( pipe_slow );
11550 %}
11551
11552 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
11553 match(Set dst (MoveL2D src));
11554 effect(DEF dst, USE src);
11555 ins_cost(100);
11556 format %{ "movd $dst,$src\t# MoveL2D" %}
11557 ins_encode %{
11558 __ movdq($dst$$XMMRegister, $src$$Register);
11559 %}
11560 ins_pipe( pipe_slow );
11561 %}
11562
11563
11564 // Fast clearing of an array
11565 // Small ClearArray non-AVX512.
11566 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
11567 Universe dummy, rFlagsReg cr)
11568 %{
11569 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
11570 match(Set dummy (ClearArray (Binary cnt base) val));
11571 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
11572
11573 format %{ $$template
11574 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
11575 $$emit$$"jg LARGE\n\t"
11576 $$emit$$"dec rcx\n\t"
11577 $$emit$$"js DONE\t# Zero length\n\t"
11578 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
11579 $$emit$$"dec rcx\n\t"
11580 $$emit$$"jge LOOP\n\t"
11581 $$emit$$"jmp DONE\n\t"
11582 $$emit$$"# LARGE:\n\t"
11583 if (UseFastStosb) {
11584 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
11585 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
11586 } else if (UseXMMForObjInit) {
11587 $$emit$$"movdq $tmp, $val\n\t"
11588 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
11589 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
11590 $$emit$$"jmpq L_zero_64_bytes\n\t"
11591 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11592 $$emit$$"vmovdqu $tmp,(rax)\n\t"
11593 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
11594 $$emit$$"add 0x40,rax\n\t"
11595 $$emit$$"# L_zero_64_bytes:\n\t"
11596 $$emit$$"sub 0x8,rcx\n\t"
11597 $$emit$$"jge L_loop\n\t"
11598 $$emit$$"add 0x4,rcx\n\t"
11599 $$emit$$"jl L_tail\n\t"
11600 $$emit$$"vmovdqu $tmp,(rax)\n\t"
11601 $$emit$$"add 0x20,rax\n\t"
11602 $$emit$$"sub 0x4,rcx\n\t"
11603 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11604 $$emit$$"add 0x4,rcx\n\t"
11605 $$emit$$"jle L_end\n\t"
11606 $$emit$$"dec rcx\n\t"
11607 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11608 $$emit$$"vmovq xmm0,(rax)\n\t"
11609 $$emit$$"add 0x8,rax\n\t"
11610 $$emit$$"dec rcx\n\t"
11611 $$emit$$"jge L_sloop\n\t"
11612 $$emit$$"# L_end:\n\t"
11613 } else {
11614 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
11615 }
11616 $$emit$$"# DONE"
11617 %}
11618 ins_encode %{
11619 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11620 $tmp$$XMMRegister, false, false);
11621 %}
11622 ins_pipe(pipe_slow);
11623 %}
11624
11625 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
11626 Universe dummy, rFlagsReg cr)
11627 %{
11628 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
11629 match(Set dummy (ClearArray (Binary cnt base) val));
11630 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
11631
11632 format %{ $$template
11633 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
11634 $$emit$$"jg LARGE\n\t"
11635 $$emit$$"dec rcx\n\t"
11636 $$emit$$"js DONE\t# Zero length\n\t"
11637 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
11638 $$emit$$"dec rcx\n\t"
11639 $$emit$$"jge LOOP\n\t"
11640 $$emit$$"jmp DONE\n\t"
11641 $$emit$$"# LARGE:\n\t"
11642 if (UseXMMForObjInit) {
11643 $$emit$$"movdq $tmp, $val\n\t"
11644 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
11645 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
11646 $$emit$$"jmpq L_zero_64_bytes\n\t"
11647 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11648 $$emit$$"vmovdqu $tmp,(rax)\n\t"
11649 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
11650 $$emit$$"add 0x40,rax\n\t"
11651 $$emit$$"# L_zero_64_bytes:\n\t"
11652 $$emit$$"sub 0x8,rcx\n\t"
11653 $$emit$$"jge L_loop\n\t"
11654 $$emit$$"add 0x4,rcx\n\t"
11655 $$emit$$"jl L_tail\n\t"
11656 $$emit$$"vmovdqu $tmp,(rax)\n\t"
11657 $$emit$$"add 0x20,rax\n\t"
11658 $$emit$$"sub 0x4,rcx\n\t"
11659 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11660 $$emit$$"add 0x4,rcx\n\t"
11661 $$emit$$"jle L_end\n\t"
11662 $$emit$$"dec rcx\n\t"
11663 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11664 $$emit$$"vmovq xmm0,(rax)\n\t"
11665 $$emit$$"add 0x8,rax\n\t"
11666 $$emit$$"dec rcx\n\t"
11667 $$emit$$"jge L_sloop\n\t"
11668 $$emit$$"# L_end:\n\t"
11669 } else {
11670 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
11671 }
11672 $$emit$$"# DONE"
11673 %}
11674 ins_encode %{
11675 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11676 $tmp$$XMMRegister, false, true);
11677 %}
11678 ins_pipe(pipe_slow);
11679 %}
11680
11681 // Small ClearArray AVX512 non-constant length.
11682 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
11683 Universe dummy, rFlagsReg cr)
11684 %{
11685 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
11686 match(Set dummy (ClearArray (Binary cnt base) val));
11687 ins_cost(125);
11688 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
11689
11690 format %{ $$template
11691 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
11692 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
11693 $$emit$$"jg LARGE\n\t"
11694 $$emit$$"dec rcx\n\t"
11695 $$emit$$"js DONE\t# Zero length\n\t"
11696 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
11697 $$emit$$"dec rcx\n\t"
11698 $$emit$$"jge LOOP\n\t"
11699 $$emit$$"jmp DONE\n\t"
11700 $$emit$$"# LARGE:\n\t"
11701 if (UseFastStosb) {
11702 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
11703 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
11704 } else if (UseXMMForObjInit) {
11705 $$emit$$"mov rdi,rax\n\t"
11706 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
11707 $$emit$$"jmpq L_zero_64_bytes\n\t"
11708 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11716 $$emit$$"jl L_tail\n\t"
11717 $$emit$$"vmovdqu ymm0,(rax)\n\t"
11718 $$emit$$"add 0x20,rax\n\t"
11719 $$emit$$"sub 0x4,rcx\n\t"
11720 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11721 $$emit$$"add 0x4,rcx\n\t"
11722 $$emit$$"jle L_end\n\t"
11723 $$emit$$"dec rcx\n\t"
11724 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11725 $$emit$$"vmovq xmm0,(rax)\n\t"
11726 $$emit$$"add 0x8,rax\n\t"
11727 $$emit$$"dec rcx\n\t"
11728 $$emit$$"jge L_sloop\n\t"
11729 $$emit$$"# L_end:\n\t"
11730 } else {
11731 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
11732 }
11733 $$emit$$"# DONE"
11734 %}
11735 ins_encode %{
11736 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11737 $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
11738 %}
11739 ins_pipe(pipe_slow);
11740 %}
11741
11742 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
11743 Universe dummy, rFlagsReg cr)
11744 %{
11745 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
11746 match(Set dummy (ClearArray (Binary cnt base) val));
11747 ins_cost(125);
11748 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
11749
11750 format %{ $$template
11751 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
11752 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
11753 $$emit$$"jg LARGE\n\t"
11754 $$emit$$"dec rcx\n\t"
11755 $$emit$$"js DONE\t# Zero length\n\t"
11756 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
11757 $$emit$$"dec rcx\n\t"
11758 $$emit$$"jge LOOP\n\t"
11759 $$emit$$"jmp DONE\n\t"
11760 $$emit$$"# LARGE:\n\t"
11761 if (UseFastStosb) {
11762 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
11763 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
11764 } else if (UseXMMForObjInit) {
11765 $$emit$$"mov rdi,rax\n\t"
11766 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
11767 $$emit$$"jmpq L_zero_64_bytes\n\t"
11768 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11776 $$emit$$"jl L_tail\n\t"
11777 $$emit$$"vmovdqu ymm0,(rax)\n\t"
11778 $$emit$$"add 0x20,rax\n\t"
11779 $$emit$$"sub 0x4,rcx\n\t"
11780 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11781 $$emit$$"add 0x4,rcx\n\t"
11782 $$emit$$"jle L_end\n\t"
11783 $$emit$$"dec rcx\n\t"
11784 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11785 $$emit$$"vmovq xmm0,(rax)\n\t"
11786 $$emit$$"add 0x8,rax\n\t"
11787 $$emit$$"dec rcx\n\t"
11788 $$emit$$"jge L_sloop\n\t"
11789 $$emit$$"# L_end:\n\t"
11790 } else {
11791 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
11792 }
11793 $$emit$$"# DONE"
11794 %}
11795 ins_encode %{
11796 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11797 $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
11798 %}
11799 ins_pipe(pipe_slow);
11800 %}
11801
11802 // Large ClearArray non-AVX512.
11803 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
11804 Universe dummy, rFlagsReg cr)
11805 %{
11806 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
11807 match(Set dummy (ClearArray (Binary cnt base) val));
11808 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
11809
11810 format %{ $$template
11811 if (UseFastStosb) {
11812 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
11813 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
11814 } else if (UseXMMForObjInit) {
11815 $$emit$$"movdq $tmp, $val\n\t"
11816 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
11817 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
11818 $$emit$$"jmpq L_zero_64_bytes\n\t"
11819 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11820 $$emit$$"vmovdqu $tmp,(rax)\n\t"
11821 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
11822 $$emit$$"add 0x40,rax\n\t"
11823 $$emit$$"# L_zero_64_bytes:\n\t"
11824 $$emit$$"sub 0x8,rcx\n\t"
11825 $$emit$$"jge L_loop\n\t"
11826 $$emit$$"add 0x4,rcx\n\t"
11827 $$emit$$"jl L_tail\n\t"
11828 $$emit$$"vmovdqu $tmp,(rax)\n\t"
11829 $$emit$$"add 0x20,rax\n\t"
11830 $$emit$$"sub 0x4,rcx\n\t"
11831 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11832 $$emit$$"add 0x4,rcx\n\t"
11833 $$emit$$"jle L_end\n\t"
11834 $$emit$$"dec rcx\n\t"
11835 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11836 $$emit$$"vmovq xmm0,(rax)\n\t"
11837 $$emit$$"add 0x8,rax\n\t"
11838 $$emit$$"dec rcx\n\t"
11839 $$emit$$"jge L_sloop\n\t"
11840 $$emit$$"# L_end:\n\t"
11841 } else {
11842 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
11843 }
11844 %}
11845 ins_encode %{
11846 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11847 $tmp$$XMMRegister, true, false);
11848 %}
11849 ins_pipe(pipe_slow);
11850 %}
11851
11852 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
11853 Universe dummy, rFlagsReg cr)
11854 %{
11855 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
11856 match(Set dummy (ClearArray (Binary cnt base) val));
11857 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
11858
11859 format %{ $$template
11860 if (UseXMMForObjInit) {
11861 $$emit$$"movdq $tmp, $val\n\t"
11862 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
11863 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
11864 $$emit$$"jmpq L_zero_64_bytes\n\t"
11865 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11866 $$emit$$"vmovdqu $tmp,(rax)\n\t"
11867 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
11868 $$emit$$"add 0x40,rax\n\t"
11869 $$emit$$"# L_zero_64_bytes:\n\t"
11870 $$emit$$"sub 0x8,rcx\n\t"
11871 $$emit$$"jge L_loop\n\t"
11872 $$emit$$"add 0x4,rcx\n\t"
11873 $$emit$$"jl L_tail\n\t"
11874 $$emit$$"vmovdqu $tmp,(rax)\n\t"
11875 $$emit$$"add 0x20,rax\n\t"
11876 $$emit$$"sub 0x4,rcx\n\t"
11877 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11878 $$emit$$"add 0x4,rcx\n\t"
11879 $$emit$$"jle L_end\n\t"
11880 $$emit$$"dec rcx\n\t"
11881 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11882 $$emit$$"vmovq xmm0,(rax)\n\t"
11883 $$emit$$"add 0x8,rax\n\t"
11884 $$emit$$"dec rcx\n\t"
11885 $$emit$$"jge L_sloop\n\t"
11886 $$emit$$"# L_end:\n\t"
11887 } else {
11888 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
11889 }
11890 %}
11891 ins_encode %{
11892 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11893 $tmp$$XMMRegister, true, true);
11894 %}
11895 ins_pipe(pipe_slow);
11896 %}
11897
11898 // Large ClearArray AVX512.
11899 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
11900 Universe dummy, rFlagsReg cr)
11901 %{
11902 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
11903 match(Set dummy (ClearArray (Binary cnt base) val));
11904 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
11905
11906 format %{ $$template
11907 if (UseFastStosb) {
11908 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
11909 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
11910 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
11911 } else if (UseXMMForObjInit) {
11912 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
11913 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
11914 $$emit$$"jmpq L_zero_64_bytes\n\t"
11915 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11916 $$emit$$"vmovdqu ymm0,(rax)\n\t"
11917 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
11918 $$emit$$"add 0x40,rax\n\t"
11919 $$emit$$"# L_zero_64_bytes:\n\t"
11920 $$emit$$"sub 0x8,rcx\n\t"
11921 $$emit$$"jge L_loop\n\t"
11922 $$emit$$"add 0x4,rcx\n\t"
11923 $$emit$$"jl L_tail\n\t"
11924 $$emit$$"vmovdqu ymm0,(rax)\n\t"
11925 $$emit$$"add 0x20,rax\n\t"
11926 $$emit$$"sub 0x4,rcx\n\t"
11927 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11928 $$emit$$"add 0x4,rcx\n\t"
11929 $$emit$$"jle L_end\n\t"
11930 $$emit$$"dec rcx\n\t"
11931 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11932 $$emit$$"vmovq xmm0,(rax)\n\t"
11933 $$emit$$"add 0x8,rax\n\t"
11934 $$emit$$"dec rcx\n\t"
11935 $$emit$$"jge L_sloop\n\t"
11936 $$emit$$"# L_end:\n\t"
11937 } else {
11938 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
11939 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
11940 }
11941 %}
11942 ins_encode %{
11943 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11944 $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
11945 %}
11946 ins_pipe(pipe_slow);
11947 %}
11948
11949 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
11950 Universe dummy, rFlagsReg cr)
11951 %{
11952 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
11953 match(Set dummy (ClearArray (Binary cnt base) val));
11954 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
11955
11956 format %{ $$template
11957 if (UseFastStosb) {
11958 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
11959 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
11960 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
11961 } else if (UseXMMForObjInit) {
11962 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
11963 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
11964 $$emit$$"jmpq L_zero_64_bytes\n\t"
11965 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11966 $$emit$$"vmovdqu ymm0,(rax)\n\t"
11967 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
11968 $$emit$$"add 0x40,rax\n\t"
11969 $$emit$$"# L_zero_64_bytes:\n\t"
11970 $$emit$$"sub 0x8,rcx\n\t"
11971 $$emit$$"jge L_loop\n\t"
11972 $$emit$$"add 0x4,rcx\n\t"
11973 $$emit$$"jl L_tail\n\t"
11974 $$emit$$"vmovdqu ymm0,(rax)\n\t"
11975 $$emit$$"add 0x20,rax\n\t"
11976 $$emit$$"sub 0x4,rcx\n\t"
11977 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11978 $$emit$$"add 0x4,rcx\n\t"
11979 $$emit$$"jle L_end\n\t"
11980 $$emit$$"dec rcx\n\t"
11981 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11982 $$emit$$"vmovq xmm0,(rax)\n\t"
11983 $$emit$$"add 0x8,rax\n\t"
11984 $$emit$$"dec rcx\n\t"
11985 $$emit$$"jge L_sloop\n\t"
11986 $$emit$$"# L_end:\n\t"
11987 } else {
11988 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
11989 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
11990 }
11991 %}
11992 ins_encode %{
11993 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11994 $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
11995 %}
11996 ins_pipe(pipe_slow);
11997 %}
11998
11999 // Small ClearArray AVX512 constant length.
12000 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
12001 %{
12002 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
12003 ((UseAVX > 2) && VM_Version::supports_avx512vlbw()));
12004 match(Set dummy (ClearArray (Binary cnt base) val));
12005 ins_cost(100);
12006 effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
12007 format %{ "clear_mem_imm $base , $cnt \n\t" %}
12008 ins_encode %{
12009 __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
12010 %}
12011 ins_pipe(pipe_slow);
12012 %}
12013
12014 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
12015 rax_RegI result, legRegD tmp1, rFlagsReg cr)
12016 %{
12017 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
12018 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12019 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
12020
12021 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
12022 ins_encode %{
12023 __ string_compare($str1$$Register, $str2$$Register,
12024 $cnt1$$Register, $cnt2$$Register, $result$$Register,
12025 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
12026 %}
12027 ins_pipe( pipe_slow );
12028 %}
12029
12920 ins_pipe(ialu_cr_reg_mem);
12921 %}
12922
12923 // This will generate a signed flags result. This should be OK since
12924 // any compare to a zero should be eq/neq.
12925 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
12926 %{
12927 match(Set cr (CmpP src zero));
12928
12929 format %{ "testq $src, $src\t# ptr" %}
12930 ins_encode %{
12931 __ testq($src$$Register, $src$$Register);
12932 %}
12933 ins_pipe(ialu_cr_reg_imm);
12934 %}
12935
12936 // This will generate a signed flags result. This should be OK since
12937 // any compare to a zero should be eq/neq.
12938 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
12939 %{
12940 predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
12941 n->in(1)->as_Load()->barrier_data() == 0);
12942 match(Set cr (CmpP (LoadP op) zero));
12943
12944 ins_cost(500); // XXX
12945 format %{ "testq $op, 0xffffffffffffffff\t# ptr" %}
12946 ins_encode %{
12947 __ testq($op$$Address, 0xFFFFFFFF);
12948 %}
12949 ins_pipe(ialu_cr_reg_imm);
12950 %}
12951
12952 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
12953 %{
12954 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
12955 n->in(1)->as_Load()->barrier_data() == 0);
12956 match(Set cr (CmpP (LoadP mem) zero));
12957
12958 format %{ "cmpq R12, $mem\t# ptr (R12_heapbase==0)" %}
12959 ins_encode %{
12960 __ cmpq(r12, $mem$$Address);
12961 %}
12962 ins_pipe(ialu_cr_reg_mem);
12963 %}
12964
12965 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
12966 %{
12967 match(Set cr (CmpN op1 op2));
12968
12969 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
12970 ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
12971 ins_pipe(ialu_cr_reg_reg);
12972 %}
12973
12974 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
13017 %{
13018 match(Set cr (CmpN src (LoadNKlass mem)));
13019
13020 format %{ "cmpl $mem, $src\t# compressed klass ptr" %}
13021 ins_encode %{
13022 __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
13023 %}
13024 ins_pipe(ialu_cr_reg_mem);
13025 %}
13026
13027 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
13028 match(Set cr (CmpN src zero));
13029
13030 format %{ "testl $src, $src\t# compressed ptr" %}
13031 ins_encode %{ __ testl($src$$Register, $src$$Register); %}
13032 ins_pipe(ialu_cr_reg_imm);
13033 %}
13034
13035 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
13036 %{
13037 predicate(CompressedOops::base() != nullptr);
13038 match(Set cr (CmpN (LoadN mem) zero));
13039
13040 ins_cost(500); // XXX
13041 format %{ "testl $mem, 0xffffffff\t# compressed ptr" %}
13042 ins_encode %{
13043 __ cmpl($mem$$Address, (int)0xFFFFFFFF);
13044 %}
13045 ins_pipe(ialu_cr_reg_mem);
13046 %}
13047
13048 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
13049 %{
13050 predicate(CompressedOops::base() == nullptr);
13051 match(Set cr (CmpN (LoadN mem) zero));
13052
13053 format %{ "cmpl R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
13054 ins_encode %{
13055 __ cmpl(r12, $mem$$Address);
13056 %}
13057 ins_pipe(ialu_cr_reg_mem);
13058 %}
13059
13060 // Yanked all unsigned pointer compare operations.
13061 // Pointer compares are done with CmpP which is already unsigned.
13062
13063 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
13064 %{
13065 match(Set cr (CmpL op1 op2));
13066
13067 format %{ "cmpq $op1, $op2" %}
13068 ins_encode %{
13069 __ cmpq($op1$$Register, $op2$$Register);
13070 %}
13781
13782 ins_cost(300);
13783 format %{ "call_leaf,runtime " %}
13784 ins_encode(clear_avx, Java_To_Runtime(meth));
13785 ins_pipe(pipe_slow);
13786 %}
13787
13788 // Call runtime without safepoint and with vector arguments
13789 instruct CallLeafDirectVector(method meth)
13790 %{
13791 match(CallLeafVector);
13792 effect(USE meth);
13793
13794 ins_cost(300);
13795 format %{ "call_leaf,vector " %}
13796 ins_encode(Java_To_Runtime(meth));
13797 ins_pipe(pipe_slow);
13798 %}
13799
13800 // Call runtime without safepoint
13801 // entry point is null, target holds the address to call
13802 instruct CallLeafNoFPInDirect(rRegP target)
13803 %{
13804 predicate(n->as_Call()->entry_point() == nullptr);
13805 match(CallLeafNoFP target);
13806
13807 ins_cost(300);
13808 format %{ "call_leaf_nofp,runtime indirect " %}
13809 ins_encode %{
13810 __ call($target$$Register);
13811 %}
13812
13813 ins_pipe(pipe_slow);
13814 %}
13815
13816 instruct CallLeafNoFPDirect(method meth)
13817 %{
13818 predicate(n->as_Call()->entry_point() != nullptr);
13819 match(CallLeafNoFP);
13820 effect(USE meth);
13821
13822 ins_cost(300);
13823 format %{ "call_leaf_nofp,runtime " %}
13824 ins_encode(clear_avx, Java_To_Runtime(meth));
13825 ins_pipe(pipe_slow);
13826 %}
13827
13828 // Return Instruction
13829 // Remove the return address & jump to it.
13830 // Notice: We always emit a nop after a ret to make sure there is room
13831 // for safepoint patching
13832 instruct Ret()
13833 %{
13834 match(Return);
13835
13836 format %{ "ret" %}
13837 ins_encode %{
13838 __ ret(0);
|