598 }
599
600 // !!!!! Special hack to get all types of calls to specify the byte offset
601 // from the start of the call to the point where the return address
602 // will point.
603 int MachCallStaticJavaNode::ret_addr_offset()
604 {
605 int offset = 5; // 5 bytes from start of call to where return address points
606 offset += clear_avx_size();
607 return offset;
608 }
609
610 int MachCallDynamicJavaNode::ret_addr_offset()
611 {
612 int offset = 15; // 15 bytes from start of call to where return address points
613 offset += clear_avx_size();
614 return offset;
615 }
616
617 int MachCallRuntimeNode::ret_addr_offset() {
618 int offset = 13; // movq r10,#addr; callq (r10)
619 if (this->ideal_Opcode() != Op_CallLeafVector) {
620 offset += clear_avx_size();
621 }
622 return offset;
623 }
624 //
625 // Compute padding required for nodes which need alignment
626 //
627
628 // The address of the call instruction needs to be 4-byte aligned to
629 // ensure that it does not span a cache line so that it can be patched.
630 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
631 {
632 current_offset += clear_avx_size(); // skip vzeroupper
633 current_offset += 1; // skip call opcode byte
634 return align_up(current_offset, alignment_required()) - current_offset;
635 }
636
637 // The address of the call instruction needs to be 4-byte aligned to
638 // ensure that it does not span a cache line so that it can be patched.
639 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
640 {
641 current_offset += clear_avx_size(); // skip vzeroupper
642 current_offset += 11; // skip movq instruction + call opcode byte
643 return align_up(current_offset, alignment_required()) - current_offset;
829 st->print("\n\t");
830 st->print("# stack alignment check");
831 #endif
832 }
833 if (C->stub_function() != nullptr) {
834 st->print("\n\t");
835 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
836 st->print("\n\t");
837 st->print("je fast_entry\t");
838 st->print("\n\t");
839 st->print("call #nmethod_entry_barrier_stub\t");
840 st->print("\n\tfast_entry:");
841 }
842 st->cr();
843 }
844 #endif
845
846 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
847 Compile* C = ra_->C;
848
849 int framesize = C->output()->frame_size_in_bytes();
850 int bangsize = C->output()->bang_size_in_bytes();
851
852 if (C->clinit_barrier_on_entry()) {
853 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
854 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
855
856 Label L_skip_barrier;
857 Register klass = rscratch1;
858
859 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
860 __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
861
862 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
863
864 __ bind(L_skip_barrier);
865 }
866
867 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
868
869 C->output()->set_frame_complete(__ offset());
870
871 if (C->has_mach_constant_base_node()) {
872 // NOTE: We set the table base offset here because users might be
873 // emitted before MachConstantBaseNode.
874 ConstantTable& constant_table = C->output()->constant_table();
875 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
876 }
877 }
878
879 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
880 {
881 return MachNode::size(ra_); // too many variables; just compute it
882 // the hard way
883 }
884
885 int MachPrologNode::reloc() const
886 {
887 return 0; // a large enough number
888 }
889
890 //=============================================================================
891 #ifndef PRODUCT
892 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
893 {
894 Compile* C = ra_->C;
895 if (generate_vzeroupper(C)) {
896 st->print("vzeroupper");
897 st->cr(); st->print("\t");
898 }
899
900 int framesize = C->output()->frame_size_in_bytes();
901 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
902 // Remove word for return adr already pushed
903 // and RBP
904 framesize -= 2*wordSize;
911 st->print_cr("popq rbp");
912 if (do_polling() && C->is_method_compilation()) {
913 st->print("\t");
914 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
915 "ja #safepoint_stub\t"
916 "# Safepoint: poll for GC");
917 }
918 }
919 #endif
920
921 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
922 {
923 Compile* C = ra_->C;
924
925 if (generate_vzeroupper(C)) {
926 // Clear upper bits of YMM registers when current compiled code uses
927 // wide vectors to avoid AVX <-> SSE transition penalty during call.
928 __ vzeroupper();
929 }
930
931 int framesize = C->output()->frame_size_in_bytes();
932 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
933 // Remove word for return adr already pushed
934 // and RBP
935 framesize -= 2*wordSize;
936
937 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
938
939 if (framesize) {
940 __ addq(rsp, framesize);
941 }
942
943 __ popq(rbp);
944
945 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
946 __ reserved_stack_check();
947 }
948
949 if (do_polling() && C->is_method_compilation()) {
950 Label dummy_label;
951 Label* code_stub = &dummy_label;
952 if (!C->output()->in_scratch_emit_size()) {
953 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
954 C->output()->add_stub(stub);
955 code_stub = &stub->entry();
956 }
957 __ relocate(relocInfo::poll_return_type);
958 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
959 }
960 }
961
962 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
963 {
964 return MachNode::size(ra_); // too many variables; just compute it
965 // the hard way
966 }
967
968 int MachEpilogNode::reloc() const
969 {
970 return 2; // a large enough number
971 }
972
973 const Pipeline* MachEpilogNode::pipeline() const
974 {
975 return MachNode::pipeline_class();
976 }
977
978 //=============================================================================
979
980 enum RC {
981 rc_bad,
982 rc_int,
983 rc_kreg,
984 rc_float,
985 rc_stack
986 };
987
1549 #endif
1550
1551 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1552 {
1553 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1554 int reg = ra_->get_encode(this);
1555
1556 __ lea(as_Register(reg), Address(rsp, offset));
1557 }
1558
1559 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1560 {
1561 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1562 if (ra_->get_encode(this) > 15) {
1563 return (offset < 0x80) ? 6 : 9; // REX2
1564 } else {
1565 return (offset < 0x80) ? 5 : 8; // REX
1566 }
1567 }
1568
1569 //=============================================================================
1570 #ifndef PRODUCT
1571 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1572 {
1573 if (UseCompressedClassPointers) {
1574 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1575 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
1576 } else {
1577 st->print_cr("movq rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1578 st->print_cr("\tcmpq rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
1579 }
1580 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
1581 }
1582 #endif
1583
1584 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1585 {
1586 __ ic_check(InteriorEntryAlignment);
1587 }
1588
1589 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1590 {
1591 return MachNode::size(ra_); // too many variables; just compute it
1592 // the hard way
1593 }
1594
1595
1596 //=============================================================================
1597
1598 bool Matcher::supports_vector_calling_convention(void) {
1599 return EnableVectorSupport;
1600 }
1601
1602 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
1603 assert(EnableVectorSupport, "sanity");
1604 int lo = XMM0_num;
1605 int hi = XMM0b_num;
1606 if (ideal_reg == Op_VecX) hi = XMM0d_num;
1607 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
1608 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
1609 return OptoRegPair(hi, lo);
1610 }
1611
1612 // Is this branch offset short enough that a short branch can be used?
1613 //
1614 // NOTE: If the platform does not provide any short branch variants, then
1615 // this method should return false for offset 0.
3050 %}
3051 %}
3052
3053 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
3054 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
3055 %{
3056 constraint(ALLOC_IN_RC(ptr_reg));
3057 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
3058 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
3059
3060 op_cost(10);
3061 format %{"[$reg + $off + $idx << $scale]" %}
3062 interface(MEMORY_INTER) %{
3063 base($reg);
3064 index($idx);
3065 scale($scale);
3066 disp($off);
3067 %}
3068 %}
3069
3070 // Indirect Narrow Oop Plus Offset Operand
3071 // Note: x86 architecture doesn't support "scale * index + offset" without a base
3072 // we can't free r12 even with CompressedOops::base() == nullptr.
3073 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
3074 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
3075 constraint(ALLOC_IN_RC(ptr_reg));
3076 match(AddP (DecodeN reg) off);
3077
3078 op_cost(10);
3079 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
3080 interface(MEMORY_INTER) %{
3081 base(0xc); // R12
3082 index($reg);
3083 scale(0x3);
3084 disp($off);
3085 %}
3086 %}
3087
3088 // Indirect Memory Operand
3089 operand indirectNarrow(rRegN reg)
3396 equal(0x4, "e");
3397 not_equal(0x5, "ne");
3398 less(0x2, "b");
3399 greater_equal(0x3, "ae");
3400 less_equal(0x6, "be");
3401 greater(0x7, "a");
3402 overflow(0x0, "o");
3403 no_overflow(0x1, "no");
3404 %}
3405 %}
3406
3407 //----------OPERAND CLASSES----------------------------------------------------
3408 // Operand Classes are groups of operands that are used as to simplify
3409 // instruction definitions by not requiring the AD writer to specify separate
3410 // instructions for every form of operand when the instruction accepts
3411 // multiple operand types with the same basic encoding and format. The classic
3412 // case of this is memory operands.
3413
3414 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
3415 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
3416 indCompressedOopOffset,
3417 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
3418 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
3419 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
3420
3421 //----------PIPELINE-----------------------------------------------------------
3422 // Rules which define the behavior of the target architectures pipeline.
3423 pipeline %{
3424
3425 //----------ATTRIBUTES---------------------------------------------------------
3426 attributes %{
3427 variable_size_instructions; // Fixed size instructions
3428 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
3429 instruction_unit_size = 1; // An instruction is 1 bytes long
3430 instruction_fetch_unit_size = 16; // The processor fetches one line
3431 instruction_fetch_units = 1; // of 16 bytes
3432
3433 // List of nop instructions
3434 nops( MachNop );
3435 %}
3436
5981 format %{ "MEMBAR-storestore (empty encoding)" %}
5982 ins_encode( );
5983 ins_pipe(empty);
5984 %}
5985
5986 //----------Move Instructions--------------------------------------------------
5987
5988 instruct castX2P(rRegP dst, rRegL src)
5989 %{
5990 match(Set dst (CastX2P src));
5991
5992 format %{ "movq $dst, $src\t# long->ptr" %}
5993 ins_encode %{
5994 if ($dst$$reg != $src$$reg) {
5995 __ movptr($dst$$Register, $src$$Register);
5996 }
5997 %}
5998 ins_pipe(ialu_reg_reg); // XXX
5999 %}
6000
6001 instruct castP2X(rRegL dst, rRegP src)
6002 %{
6003 match(Set dst (CastP2X src));
6004
6005 format %{ "movq $dst, $src\t# ptr -> long" %}
6006 ins_encode %{
6007 if ($dst$$reg != $src$$reg) {
6008 __ movptr($dst$$Register, $src$$Register);
6009 }
6010 %}
6011 ins_pipe(ialu_reg_reg); // XXX
6012 %}
6013
6014 // Convert oop into int for vectors alignment masking
6015 instruct convP2I(rRegI dst, rRegP src)
6016 %{
6017 match(Set dst (ConvL2I (CastP2X src)));
6018
6019 format %{ "movl $dst, $src\t# ptr -> int" %}
6020 ins_encode %{
12112 effect(DEF dst, USE src);
12113 ins_cost(100);
12114 format %{ "movd $dst,$src\t# MoveI2F" %}
12115 ins_encode %{
12116 __ movdl($dst$$XMMRegister, $src$$Register);
12117 %}
12118 ins_pipe( pipe_slow );
12119 %}
12120
12121 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
12122 match(Set dst (MoveL2D src));
12123 effect(DEF dst, USE src);
12124 ins_cost(100);
12125 format %{ "movd $dst,$src\t# MoveL2D" %}
12126 ins_encode %{
12127 __ movdq($dst$$XMMRegister, $src$$Register);
12128 %}
12129 ins_pipe( pipe_slow );
12130 %}
12131
12132 // Fast clearing of an array
12133 // Small non-constant lenght ClearArray for non-AVX512 targets.
12134 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
12135 Universe dummy, rFlagsReg cr)
12136 %{
12137 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
12138 match(Set dummy (ClearArray cnt base));
12139 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
12140
12141 format %{ $$template
12142 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12143 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
12144 $$emit$$"jg LARGE\n\t"
12145 $$emit$$"dec rcx\n\t"
12146 $$emit$$"js DONE\t# Zero length\n\t"
12147 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
12148 $$emit$$"dec rcx\n\t"
12149 $$emit$$"jge LOOP\n\t"
12150 $$emit$$"jmp DONE\n\t"
12151 $$emit$$"# LARGE:\n\t"
12152 if (UseFastStosb) {
12153 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
12154 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
12155 } else if (UseXMMForObjInit) {
12156 $$emit$$"mov rdi,rax\n\t"
12157 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
12158 $$emit$$"jmpq L_zero_64_bytes\n\t"
12159 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12167 $$emit$$"jl L_tail\n\t"
12168 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12169 $$emit$$"add 0x20,rax\n\t"
12170 $$emit$$"sub 0x4,rcx\n\t"
12171 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12172 $$emit$$"add 0x4,rcx\n\t"
12173 $$emit$$"jle L_end\n\t"
12174 $$emit$$"dec rcx\n\t"
12175 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12176 $$emit$$"vmovq xmm0,(rax)\n\t"
12177 $$emit$$"add 0x8,rax\n\t"
12178 $$emit$$"dec rcx\n\t"
12179 $$emit$$"jge L_sloop\n\t"
12180 $$emit$$"# L_end:\n\t"
12181 } else {
12182 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
12183 }
12184 $$emit$$"# DONE"
12185 %}
12186 ins_encode %{
12187 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
12188 $tmp$$XMMRegister, false, knoreg);
12189 %}
12190 ins_pipe(pipe_slow);
12191 %}
12192
12193 // Small non-constant length ClearArray for AVX512 targets.
12194 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
12195 Universe dummy, rFlagsReg cr)
12196 %{
12197 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
12198 match(Set dummy (ClearArray cnt base));
12199 ins_cost(125);
12200 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
12201
12202 format %{ $$template
12203 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12204 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
12205 $$emit$$"jg LARGE\n\t"
12206 $$emit$$"dec rcx\n\t"
12207 $$emit$$"js DONE\t# Zero length\n\t"
12208 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
12209 $$emit$$"dec rcx\n\t"
12210 $$emit$$"jge LOOP\n\t"
12211 $$emit$$"jmp DONE\n\t"
12212 $$emit$$"# LARGE:\n\t"
12213 if (UseFastStosb) {
12214 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
12215 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
12216 } else if (UseXMMForObjInit) {
12217 $$emit$$"mov rdi,rax\n\t"
12218 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
12219 $$emit$$"jmpq L_zero_64_bytes\n\t"
12220 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12228 $$emit$$"jl L_tail\n\t"
12229 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12230 $$emit$$"add 0x20,rax\n\t"
12231 $$emit$$"sub 0x4,rcx\n\t"
12232 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12233 $$emit$$"add 0x4,rcx\n\t"
12234 $$emit$$"jle L_end\n\t"
12235 $$emit$$"dec rcx\n\t"
12236 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12237 $$emit$$"vmovq xmm0,(rax)\n\t"
12238 $$emit$$"add 0x8,rax\n\t"
12239 $$emit$$"dec rcx\n\t"
12240 $$emit$$"jge L_sloop\n\t"
12241 $$emit$$"# L_end:\n\t"
12242 } else {
12243 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
12244 }
12245 $$emit$$"# DONE"
12246 %}
12247 ins_encode %{
12248 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
12249 $tmp$$XMMRegister, false, $ktmp$$KRegister);
12250 %}
12251 ins_pipe(pipe_slow);
12252 %}
12253
12254 // Large non-constant length ClearArray for non-AVX512 targets.
12255 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
12256 Universe dummy, rFlagsReg cr)
12257 %{
12258 predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
12259 match(Set dummy (ClearArray cnt base));
12260 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
12261
12262 format %{ $$template
12263 if (UseFastStosb) {
12264 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12265 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
12266 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
12267 } else if (UseXMMForObjInit) {
12268 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
12269 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
12270 $$emit$$"jmpq L_zero_64_bytes\n\t"
12271 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12272 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12273 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
12274 $$emit$$"add 0x40,rax\n\t"
12275 $$emit$$"# L_zero_64_bytes:\n\t"
12276 $$emit$$"sub 0x8,rcx\n\t"
12277 $$emit$$"jge L_loop\n\t"
12278 $$emit$$"add 0x4,rcx\n\t"
12279 $$emit$$"jl L_tail\n\t"
12280 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12281 $$emit$$"add 0x20,rax\n\t"
12282 $$emit$$"sub 0x4,rcx\n\t"
12283 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12284 $$emit$$"add 0x4,rcx\n\t"
12285 $$emit$$"jle L_end\n\t"
12286 $$emit$$"dec rcx\n\t"
12287 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12288 $$emit$$"vmovq xmm0,(rax)\n\t"
12289 $$emit$$"add 0x8,rax\n\t"
12290 $$emit$$"dec rcx\n\t"
12291 $$emit$$"jge L_sloop\n\t"
12292 $$emit$$"# L_end:\n\t"
12293 } else {
12294 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12295 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
12296 }
12297 %}
12298 ins_encode %{
12299 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
12300 $tmp$$XMMRegister, true, knoreg);
12301 %}
12302 ins_pipe(pipe_slow);
12303 %}
12304
12305 // Large non-constant length ClearArray for AVX512 targets.
12306 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
12307 Universe dummy, rFlagsReg cr)
12308 %{
12309 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
12310 match(Set dummy (ClearArray cnt base));
12311 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
12312
12313 format %{ $$template
12314 if (UseFastStosb) {
12315 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12316 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
12317 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
12318 } else if (UseXMMForObjInit) {
12319 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
12320 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
12321 $$emit$$"jmpq L_zero_64_bytes\n\t"
12322 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12323 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12324 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
12325 $$emit$$"add 0x40,rax\n\t"
12326 $$emit$$"# L_zero_64_bytes:\n\t"
12327 $$emit$$"sub 0x8,rcx\n\t"
12328 $$emit$$"jge L_loop\n\t"
12329 $$emit$$"add 0x4,rcx\n\t"
12330 $$emit$$"jl L_tail\n\t"
12331 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12332 $$emit$$"add 0x20,rax\n\t"
12333 $$emit$$"sub 0x4,rcx\n\t"
12334 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12335 $$emit$$"add 0x4,rcx\n\t"
12336 $$emit$$"jle L_end\n\t"
12337 $$emit$$"dec rcx\n\t"
12338 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12339 $$emit$$"vmovq xmm0,(rax)\n\t"
12340 $$emit$$"add 0x8,rax\n\t"
12341 $$emit$$"dec rcx\n\t"
12342 $$emit$$"jge L_sloop\n\t"
12343 $$emit$$"# L_end:\n\t"
12344 } else {
12345 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12346 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
12347 }
12348 %}
12349 ins_encode %{
12350 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
12351 $tmp$$XMMRegister, true, $ktmp$$KRegister);
12352 %}
12353 ins_pipe(pipe_slow);
12354 %}
12355
12356 // Small constant length ClearArray for AVX512 targets.
12357 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
12358 %{
12359 predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
12360 match(Set dummy (ClearArray cnt base));
12361 ins_cost(100);
12362 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
12363 format %{ "clear_mem_imm $base , $cnt \n\t" %}
12364 ins_encode %{
12365 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
12366 %}
12367 ins_pipe(pipe_slow);
12368 %}
12369
12370 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
12371 rax_RegI result, legRegD tmp1, rFlagsReg cr)
12372 %{
12373 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
12374 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12375 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
12376
12377 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
12378 ins_encode %{
12379 __ string_compare($str1$$Register, $str2$$Register,
12380 $cnt1$$Register, $cnt2$$Register, $result$$Register,
12381 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
12382 %}
12383 ins_pipe( pipe_slow );
12384 %}
12385
14218
14219 ins_cost(300);
14220 format %{ "call_leaf,runtime " %}
14221 ins_encode(clear_avx, Java_To_Runtime(meth));
14222 ins_pipe(pipe_slow);
14223 %}
14224
14225 // Call runtime without safepoint and with vector arguments
14226 instruct CallLeafDirectVector(method meth)
14227 %{
14228 match(CallLeafVector);
14229 effect(USE meth);
14230
14231 ins_cost(300);
14232 format %{ "call_leaf,vector " %}
14233 ins_encode(Java_To_Runtime(meth));
14234 ins_pipe(pipe_slow);
14235 %}
14236
14237 // Call runtime without safepoint
14238 instruct CallLeafNoFPDirect(method meth)
14239 %{
14240 match(CallLeafNoFP);
14241 effect(USE meth);
14242
14243 ins_cost(300);
14244 format %{ "call_leaf_nofp,runtime " %}
14245 ins_encode(clear_avx, Java_To_Runtime(meth));
14246 ins_pipe(pipe_slow);
14247 %}
14248
14249 // Return Instruction
14250 // Remove the return address & jump to it.
14251 // Notice: We always emit a nop after a ret to make sure there is room
14252 // for safepoint patching
14253 instruct Ret()
14254 %{
14255 match(Return);
14256
14257 format %{ "ret" %}
14258 ins_encode %{
14259 __ ret(0);
|
598 }
599
600 // !!!!! Special hack to get all types of calls to specify the byte offset
601 // from the start of the call to the point where the return address
602 // will point.
603 int MachCallStaticJavaNode::ret_addr_offset()
604 {
605 int offset = 5; // 5 bytes from start of call to where return address points
606 offset += clear_avx_size();
607 return offset;
608 }
609
610 int MachCallDynamicJavaNode::ret_addr_offset()
611 {
612 int offset = 15; // 15 bytes from start of call to where return address points
613 offset += clear_avx_size();
614 return offset;
615 }
616
617 int MachCallRuntimeNode::ret_addr_offset() {
618 if (_entry_point == nullptr) {
619 // CallLeafNoFPInDirect
620 return 3; // callq (register)
621 }
622 int offset = 13; // movq r10,#addr; callq (r10)
623 if (this->ideal_Opcode() != Op_CallLeafVector) {
624 offset += clear_avx_size();
625 }
626 return offset;
627 }
628
629 //
630 // Compute padding required for nodes which need alignment
631 //
632
633 // The address of the call instruction needs to be 4-byte aligned to
634 // ensure that it does not span a cache line so that it can be patched.
635 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
636 {
637 current_offset += clear_avx_size(); // skip vzeroupper
638 current_offset += 1; // skip call opcode byte
639 return align_up(current_offset, alignment_required()) - current_offset;
640 }
641
642 // The address of the call instruction needs to be 4-byte aligned to
643 // ensure that it does not span a cache line so that it can be patched.
644 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
645 {
646 current_offset += clear_avx_size(); // skip vzeroupper
647 current_offset += 11; // skip movq instruction + call opcode byte
648 return align_up(current_offset, alignment_required()) - current_offset;
834 st->print("\n\t");
835 st->print("# stack alignment check");
836 #endif
837 }
838 if (C->stub_function() != nullptr) {
839 st->print("\n\t");
840 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
841 st->print("\n\t");
842 st->print("je fast_entry\t");
843 st->print("\n\t");
844 st->print("call #nmethod_entry_barrier_stub\t");
845 st->print("\n\tfast_entry:");
846 }
847 st->cr();
848 }
849 #endif
850
851 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
852 Compile* C = ra_->C;
853
854 __ verified_entry(C);
855
856 if (ra_->C->stub_function() == nullptr) {
857 __ entry_barrier();
858 }
859
860 if (!Compile::current()->output()->in_scratch_emit_size()) {
861 __ bind(*_verified_entry);
862 }
863
864 C->output()->set_frame_complete(__ offset());
865
866 if (C->has_mach_constant_base_node()) {
867 // NOTE: We set the table base offset here because users might be
868 // emitted before MachConstantBaseNode.
869 ConstantTable& constant_table = C->output()->constant_table();
870 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
871 }
872 }
873
874 int MachPrologNode::reloc() const
875 {
876 return 0; // a large enough number
877 }
878
879 //=============================================================================
880 #ifndef PRODUCT
881 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
882 {
883 Compile* C = ra_->C;
884 if (generate_vzeroupper(C)) {
885 st->print("vzeroupper");
886 st->cr(); st->print("\t");
887 }
888
889 int framesize = C->output()->frame_size_in_bytes();
890 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
891 // Remove word for return adr already pushed
892 // and RBP
893 framesize -= 2*wordSize;
900 st->print_cr("popq rbp");
901 if (do_polling() && C->is_method_compilation()) {
902 st->print("\t");
903 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
904 "ja #safepoint_stub\t"
905 "# Safepoint: poll for GC");
906 }
907 }
908 #endif
909
910 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
911 {
912 Compile* C = ra_->C;
913
914 if (generate_vzeroupper(C)) {
915 // Clear upper bits of YMM registers when current compiled code uses
916 // wide vectors to avoid AVX <-> SSE transition penalty during call.
917 __ vzeroupper();
918 }
919
920 // Subtract two words to account for return address and rbp
921 int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
922 __ remove_frame(initial_framesize, C->needs_stack_repair());
923
924 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
925 __ reserved_stack_check();
926 }
927
928 if (do_polling() && C->is_method_compilation()) {
929 Label dummy_label;
930 Label* code_stub = &dummy_label;
931 if (!C->output()->in_scratch_emit_size()) {
932 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
933 C->output()->add_stub(stub);
934 code_stub = &stub->entry();
935 }
936 __ relocate(relocInfo::poll_return_type);
937 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
938 }
939 }
940
941 int MachEpilogNode::reloc() const
942 {
943 return 2; // a large enough number
944 }
945
946 const Pipeline* MachEpilogNode::pipeline() const
947 {
948 return MachNode::pipeline_class();
949 }
950
951 //=============================================================================
952
953 enum RC {
954 rc_bad,
955 rc_int,
956 rc_kreg,
957 rc_float,
958 rc_stack
959 };
960
1522 #endif
1523
1524 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1525 {
1526 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1527 int reg = ra_->get_encode(this);
1528
1529 __ lea(as_Register(reg), Address(rsp, offset));
1530 }
1531
1532 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1533 {
1534 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1535 if (ra_->get_encode(this) > 15) {
1536 return (offset < 0x80) ? 6 : 9; // REX2
1537 } else {
1538 return (offset < 0x80) ? 5 : 8; // REX
1539 }
1540 }
1541
1542 //=============================================================================
1543 #ifndef PRODUCT
1544 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1545 {
1546 st->print_cr("MachVEPNode");
1547 }
1548 #endif
1549
1550 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1551 {
1552 CodeBuffer* cbuf = masm->code();
1553 uint insts_size = cbuf->insts_size();
1554 if (!_verified) {
1555 __ ic_check(1);
1556 } else {
1557 // TODO 8284443 Avoid creation of temporary frame
1558 if (ra_->C->stub_function() == nullptr) {
1559 __ verified_entry(ra_->C, 0);
1560 __ entry_barrier();
1561 int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
1562 __ remove_frame(initial_framesize, false);
1563 }
1564 // Unpack inline type args passed as oop and then jump to
1565 // the verified entry point (skipping the unverified entry).
1566 int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
1567 // Emit code for verified entry and save increment for stack repair on return
1568 __ verified_entry(ra_->C, sp_inc);
1569 if (Compile::current()->output()->in_scratch_emit_size()) {
1570 Label dummy_verified_entry;
1571 __ jmp(dummy_verified_entry);
1572 } else {
1573 __ jmp(*_verified_entry);
1574 }
1575 }
1576 /* WARNING these NOPs are critical so that verified entry point is properly
1577 4 bytes aligned for patching by NativeJump::patch_verified_entry() */
1578 int nops_cnt = 4 - ((cbuf->insts_size() - insts_size) & 0x3);
1579 nops_cnt &= 0x3; // Do not add nops if code is aligned.
1580 if (nops_cnt > 0) {
1581 __ nop(nops_cnt);
1582 }
1583 }
1584
1585 //=============================================================================
1586 #ifndef PRODUCT
1587 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1588 {
1589 if (UseCompressedClassPointers) {
1590 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1591 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
1592 } else {
1593 st->print_cr("movq rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1594 st->print_cr("\tcmpq rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
1595 }
1596 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
1597 }
1598 #endif
1599
1600 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1601 {
1602 __ ic_check(InteriorEntryAlignment);
1603 }
1604
1605 //=============================================================================
1606
1607 bool Matcher::supports_vector_calling_convention(void) {
1608 return EnableVectorSupport;
1609 }
1610
1611 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
1612 assert(EnableVectorSupport, "sanity");
1613 int lo = XMM0_num;
1614 int hi = XMM0b_num;
1615 if (ideal_reg == Op_VecX) hi = XMM0d_num;
1616 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
1617 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
1618 return OptoRegPair(hi, lo);
1619 }
1620
1621 // Is this branch offset short enough that a short branch can be used?
1622 //
1623 // NOTE: If the platform does not provide any short branch variants, then
1624 // this method should return false for offset 0.
3059 %}
3060 %}
3061
3062 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
3063 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
3064 %{
3065 constraint(ALLOC_IN_RC(ptr_reg));
3066 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
3067 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
3068
3069 op_cost(10);
3070 format %{"[$reg + $off + $idx << $scale]" %}
3071 interface(MEMORY_INTER) %{
3072 base($reg);
3073 index($idx);
3074 scale($scale);
3075 disp($off);
3076 %}
3077 %}
3078
3079 // Indirect Narrow Oop Operand
3080 operand indCompressedOop(rRegN reg) %{
3081 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
3082 constraint(ALLOC_IN_RC(ptr_reg));
3083 match(DecodeN reg);
3084
3085 op_cost(10);
3086 format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
3087 interface(MEMORY_INTER) %{
3088 base(0xc); // R12
3089 index($reg);
3090 scale(0x3);
3091 disp(0x0);
3092 %}
3093 %}
3094
3095 // Indirect Narrow Oop Plus Offset Operand
3096 // Note: x86 architecture doesn't support "scale * index + offset" without a base
3097 // we can't free r12 even with CompressedOops::base() == nullptr.
3098 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
3099 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
3100 constraint(ALLOC_IN_RC(ptr_reg));
3101 match(AddP (DecodeN reg) off);
3102
3103 op_cost(10);
3104 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
3105 interface(MEMORY_INTER) %{
3106 base(0xc); // R12
3107 index($reg);
3108 scale(0x3);
3109 disp($off);
3110 %}
3111 %}
3112
3113 // Indirect Memory Operand
3114 operand indirectNarrow(rRegN reg)
3421 equal(0x4, "e");
3422 not_equal(0x5, "ne");
3423 less(0x2, "b");
3424 greater_equal(0x3, "ae");
3425 less_equal(0x6, "be");
3426 greater(0x7, "a");
3427 overflow(0x0, "o");
3428 no_overflow(0x1, "no");
3429 %}
3430 %}
3431
3432 //----------OPERAND CLASSES----------------------------------------------------
3433 // Operand Classes are groups of operands that are used as to simplify
3434 // instruction definitions by not requiring the AD writer to specify separate
3435 // instructions for every form of operand when the instruction accepts
3436 // multiple operand types with the same basic encoding and format. The classic
3437 // case of this is memory operands.
3438
3439 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
3440 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
3441 indCompressedOop, indCompressedOopOffset,
3442 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
3443 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
3444 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
3445
3446 //----------PIPELINE-----------------------------------------------------------
3447 // Rules which define the behavior of the target architectures pipeline.
3448 pipeline %{
3449
3450 //----------ATTRIBUTES---------------------------------------------------------
3451 attributes %{
3452 variable_size_instructions; // Fixed size instructions
3453 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
3454 instruction_unit_size = 1; // An instruction is 1 bytes long
3455 instruction_fetch_unit_size = 16; // The processor fetches one line
3456 instruction_fetch_units = 1; // of 16 bytes
3457
3458 // List of nop instructions
3459 nops( MachNop );
3460 %}
3461
6006 format %{ "MEMBAR-storestore (empty encoding)" %}
6007 ins_encode( );
6008 ins_pipe(empty);
6009 %}
6010
6011 //----------Move Instructions--------------------------------------------------
6012
6013 instruct castX2P(rRegP dst, rRegL src)
6014 %{
6015 match(Set dst (CastX2P src));
6016
6017 format %{ "movq $dst, $src\t# long->ptr" %}
6018 ins_encode %{
6019 if ($dst$$reg != $src$$reg) {
6020 __ movptr($dst$$Register, $src$$Register);
6021 }
6022 %}
6023 ins_pipe(ialu_reg_reg); // XXX
6024 %}
6025
6026 instruct castI2N(rRegN dst, rRegI src)
6027 %{
6028 match(Set dst (CastI2N src));
6029
6030 format %{ "movq $dst, $src\t# int -> narrow ptr" %}
6031 ins_encode %{
6032 if ($dst$$reg != $src$$reg) {
6033 __ movl($dst$$Register, $src$$Register);
6034 }
6035 %}
6036 ins_pipe(ialu_reg_reg); // XXX
6037 %}
6038
6039 instruct castN2X(rRegL dst, rRegN src)
6040 %{
6041 match(Set dst (CastP2X src));
6042
6043 format %{ "movq $dst, $src\t# ptr -> long" %}
6044 ins_encode %{
6045 if ($dst$$reg != $src$$reg) {
6046 __ movptr($dst$$Register, $src$$Register);
6047 }
6048 %}
6049 ins_pipe(ialu_reg_reg); // XXX
6050 %}
6051
6052 instruct castP2X(rRegL dst, rRegP src)
6053 %{
6054 match(Set dst (CastP2X src));
6055
6056 format %{ "movq $dst, $src\t# ptr -> long" %}
6057 ins_encode %{
6058 if ($dst$$reg != $src$$reg) {
6059 __ movptr($dst$$Register, $src$$Register);
6060 }
6061 %}
6062 ins_pipe(ialu_reg_reg); // XXX
6063 %}
6064
6065 // Convert oop into int for vectors alignment masking
6066 instruct convP2I(rRegI dst, rRegP src)
6067 %{
6068 match(Set dst (ConvL2I (CastP2X src)));
6069
6070 format %{ "movl $dst, $src\t# ptr -> int" %}
6071 ins_encode %{
12163 effect(DEF dst, USE src);
12164 ins_cost(100);
12165 format %{ "movd $dst,$src\t# MoveI2F" %}
12166 ins_encode %{
12167 __ movdl($dst$$XMMRegister, $src$$Register);
12168 %}
12169 ins_pipe( pipe_slow );
12170 %}
12171
12172 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
12173 match(Set dst (MoveL2D src));
12174 effect(DEF dst, USE src);
12175 ins_cost(100);
12176 format %{ "movd $dst,$src\t# MoveL2D" %}
12177 ins_encode %{
12178 __ movdq($dst$$XMMRegister, $src$$Register);
12179 %}
12180 ins_pipe( pipe_slow );
12181 %}
12182
12183
12184 // Fast clearing of an array
12185 // Small non-constant lenght ClearArray for non-AVX512 targets.
12186 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
12187 Universe dummy, rFlagsReg cr)
12188 %{
12189 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
12190 match(Set dummy (ClearArray (Binary cnt base) val));
12191 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
12192
12193 format %{ $$template
12194 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
12195 $$emit$$"jg LARGE\n\t"
12196 $$emit$$"dec rcx\n\t"
12197 $$emit$$"js DONE\t# Zero length\n\t"
12198 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
12199 $$emit$$"dec rcx\n\t"
12200 $$emit$$"jge LOOP\n\t"
12201 $$emit$$"jmp DONE\n\t"
12202 $$emit$$"# LARGE:\n\t"
12203 if (UseFastStosb) {
12204 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
12205 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
12206 } else if (UseXMMForObjInit) {
12207 $$emit$$"movdq $tmp, $val\n\t"
12208 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
12209 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
12210 $$emit$$"jmpq L_zero_64_bytes\n\t"
12211 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12212 $$emit$$"vmovdqu $tmp,(rax)\n\t"
12213 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
12214 $$emit$$"add 0x40,rax\n\t"
12215 $$emit$$"# L_zero_64_bytes:\n\t"
12216 $$emit$$"sub 0x8,rcx\n\t"
12217 $$emit$$"jge L_loop\n\t"
12218 $$emit$$"add 0x4,rcx\n\t"
12219 $$emit$$"jl L_tail\n\t"
12220 $$emit$$"vmovdqu $tmp,(rax)\n\t"
12221 $$emit$$"add 0x20,rax\n\t"
12222 $$emit$$"sub 0x4,rcx\n\t"
12223 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12224 $$emit$$"add 0x4,rcx\n\t"
12225 $$emit$$"jle L_end\n\t"
12226 $$emit$$"dec rcx\n\t"
12227 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12228 $$emit$$"vmovq xmm0,(rax)\n\t"
12229 $$emit$$"add 0x8,rax\n\t"
12230 $$emit$$"dec rcx\n\t"
12231 $$emit$$"jge L_sloop\n\t"
12232 $$emit$$"# L_end:\n\t"
12233 } else {
12234 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
12235 }
12236 $$emit$$"# DONE"
12237 %}
12238 ins_encode %{
12239 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12240 $tmp$$XMMRegister, false, false);
12241 %}
12242 ins_pipe(pipe_slow);
12243 %}
12244
12245 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
12246 Universe dummy, rFlagsReg cr)
12247 %{
12248 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
12249 match(Set dummy (ClearArray (Binary cnt base) val));
12250 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
12251
12252 format %{ $$template
12253 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
12254 $$emit$$"jg LARGE\n\t"
12255 $$emit$$"dec rcx\n\t"
12256 $$emit$$"js DONE\t# Zero length\n\t"
12257 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
12258 $$emit$$"dec rcx\n\t"
12259 $$emit$$"jge LOOP\n\t"
12260 $$emit$$"jmp DONE\n\t"
12261 $$emit$$"# LARGE:\n\t"
12262 if (UseXMMForObjInit) {
12263 $$emit$$"movdq $tmp, $val\n\t"
12264 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
12265 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
12266 $$emit$$"jmpq L_zero_64_bytes\n\t"
12267 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12268 $$emit$$"vmovdqu $tmp,(rax)\n\t"
12269 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
12270 $$emit$$"add 0x40,rax\n\t"
12271 $$emit$$"# L_zero_64_bytes:\n\t"
12272 $$emit$$"sub 0x8,rcx\n\t"
12273 $$emit$$"jge L_loop\n\t"
12274 $$emit$$"add 0x4,rcx\n\t"
12275 $$emit$$"jl L_tail\n\t"
12276 $$emit$$"vmovdqu $tmp,(rax)\n\t"
12277 $$emit$$"add 0x20,rax\n\t"
12278 $$emit$$"sub 0x4,rcx\n\t"
12279 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12280 $$emit$$"add 0x4,rcx\n\t"
12281 $$emit$$"jle L_end\n\t"
12282 $$emit$$"dec rcx\n\t"
12283 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12284 $$emit$$"vmovq xmm0,(rax)\n\t"
12285 $$emit$$"add 0x8,rax\n\t"
12286 $$emit$$"dec rcx\n\t"
12287 $$emit$$"jge L_sloop\n\t"
12288 $$emit$$"# L_end:\n\t"
12289 } else {
12290 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
12291 }
12292 $$emit$$"# DONE"
12293 %}
12294 ins_encode %{
12295 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12296 $tmp$$XMMRegister, false, true);
12297 %}
12298 ins_pipe(pipe_slow);
12299 %}
12300
12301 // Small non-constant length ClearArray for AVX512 targets.
12302 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
12303 Universe dummy, rFlagsReg cr)
12304 %{
12305 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
12306 match(Set dummy (ClearArray (Binary cnt base) val));
12307 ins_cost(125);
12308 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
12309
12310 format %{ $$template
12311 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12312 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
12313 $$emit$$"jg LARGE\n\t"
12314 $$emit$$"dec rcx\n\t"
12315 $$emit$$"js DONE\t# Zero length\n\t"
12316 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
12317 $$emit$$"dec rcx\n\t"
12318 $$emit$$"jge LOOP\n\t"
12319 $$emit$$"jmp DONE\n\t"
12320 $$emit$$"# LARGE:\n\t"
12321 if (UseFastStosb) {
12322 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
12323 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
12324 } else if (UseXMMForObjInit) {
12325 $$emit$$"mov rdi,rax\n\t"
12326 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
12327 $$emit$$"jmpq L_zero_64_bytes\n\t"
12328 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12336 $$emit$$"jl L_tail\n\t"
12337 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12338 $$emit$$"add 0x20,rax\n\t"
12339 $$emit$$"sub 0x4,rcx\n\t"
12340 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12341 $$emit$$"add 0x4,rcx\n\t"
12342 $$emit$$"jle L_end\n\t"
12343 $$emit$$"dec rcx\n\t"
12344 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12345 $$emit$$"vmovq xmm0,(rax)\n\t"
12346 $$emit$$"add 0x8,rax\n\t"
12347 $$emit$$"dec rcx\n\t"
12348 $$emit$$"jge L_sloop\n\t"
12349 $$emit$$"# L_end:\n\t"
12350 } else {
12351 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
12352 }
12353 $$emit$$"# DONE"
12354 %}
12355 ins_encode %{
12356 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12357 $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
12358 %}
12359 ins_pipe(pipe_slow);
12360 %}
12361
12362 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
12363 Universe dummy, rFlagsReg cr)
12364 %{
12365 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
12366 match(Set dummy (ClearArray (Binary cnt base) val));
12367 ins_cost(125);
12368 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
12369
12370 format %{ $$template
12371 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12372 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
12373 $$emit$$"jg LARGE\n\t"
12374 $$emit$$"dec rcx\n\t"
12375 $$emit$$"js DONE\t# Zero length\n\t"
12376 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
12377 $$emit$$"dec rcx\n\t"
12378 $$emit$$"jge LOOP\n\t"
12379 $$emit$$"jmp DONE\n\t"
12380 $$emit$$"# LARGE:\n\t"
12381 if (UseFastStosb) {
12382 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
12383 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
12384 } else if (UseXMMForObjInit) {
12385 $$emit$$"mov rdi,rax\n\t"
12386 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
12387 $$emit$$"jmpq L_zero_64_bytes\n\t"
12388 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12396 $$emit$$"jl L_tail\n\t"
12397 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12398 $$emit$$"add 0x20,rax\n\t"
12399 $$emit$$"sub 0x4,rcx\n\t"
12400 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12401 $$emit$$"add 0x4,rcx\n\t"
12402 $$emit$$"jle L_end\n\t"
12403 $$emit$$"dec rcx\n\t"
12404 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12405 $$emit$$"vmovq xmm0,(rax)\n\t"
12406 $$emit$$"add 0x8,rax\n\t"
12407 $$emit$$"dec rcx\n\t"
12408 $$emit$$"jge L_sloop\n\t"
12409 $$emit$$"# L_end:\n\t"
12410 } else {
12411 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
12412 }
12413 $$emit$$"# DONE"
12414 %}
12415 ins_encode %{
12416 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12417 $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
12418 %}
12419 ins_pipe(pipe_slow);
12420 %}
12421
12422 // Large non-constant length ClearArray for non-AVX512 targets.
12423 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
12424 Universe dummy, rFlagsReg cr)
12425 %{
12426 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
12427 match(Set dummy (ClearArray (Binary cnt base) val));
12428 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
12429
12430 format %{ $$template
12431 if (UseFastStosb) {
12432 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
12433 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
12434 } else if (UseXMMForObjInit) {
12435 $$emit$$"movdq $tmp, $val\n\t"
12436 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
12437 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
12438 $$emit$$"jmpq L_zero_64_bytes\n\t"
12439 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12440 $$emit$$"vmovdqu $tmp,(rax)\n\t"
12441 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
12442 $$emit$$"add 0x40,rax\n\t"
12443 $$emit$$"# L_zero_64_bytes:\n\t"
12444 $$emit$$"sub 0x8,rcx\n\t"
12445 $$emit$$"jge L_loop\n\t"
12446 $$emit$$"add 0x4,rcx\n\t"
12447 $$emit$$"jl L_tail\n\t"
12448 $$emit$$"vmovdqu $tmp,(rax)\n\t"
12449 $$emit$$"add 0x20,rax\n\t"
12450 $$emit$$"sub 0x4,rcx\n\t"
12451 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12452 $$emit$$"add 0x4,rcx\n\t"
12453 $$emit$$"jle L_end\n\t"
12454 $$emit$$"dec rcx\n\t"
12455 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12456 $$emit$$"vmovq xmm0,(rax)\n\t"
12457 $$emit$$"add 0x8,rax\n\t"
12458 $$emit$$"dec rcx\n\t"
12459 $$emit$$"jge L_sloop\n\t"
12460 $$emit$$"# L_end:\n\t"
12461 } else {
12462 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
12463 }
12464 %}
12465 ins_encode %{
12466 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12467 $tmp$$XMMRegister, true, false);
12468 %}
12469 ins_pipe(pipe_slow);
12470 %}
12471
12472 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
12473 Universe dummy, rFlagsReg cr)
12474 %{
12475 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
12476 match(Set dummy (ClearArray (Binary cnt base) val));
12477 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
12478
12479 format %{ $$template
12480 if (UseXMMForObjInit) {
12481 $$emit$$"movdq $tmp, $val\n\t"
12482 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
12483 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
12484 $$emit$$"jmpq L_zero_64_bytes\n\t"
12485 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12486 $$emit$$"vmovdqu $tmp,(rax)\n\t"
12487 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
12488 $$emit$$"add 0x40,rax\n\t"
12489 $$emit$$"# L_zero_64_bytes:\n\t"
12490 $$emit$$"sub 0x8,rcx\n\t"
12491 $$emit$$"jge L_loop\n\t"
12492 $$emit$$"add 0x4,rcx\n\t"
12493 $$emit$$"jl L_tail\n\t"
12494 $$emit$$"vmovdqu $tmp,(rax)\n\t"
12495 $$emit$$"add 0x20,rax\n\t"
12496 $$emit$$"sub 0x4,rcx\n\t"
12497 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12498 $$emit$$"add 0x4,rcx\n\t"
12499 $$emit$$"jle L_end\n\t"
12500 $$emit$$"dec rcx\n\t"
12501 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12502 $$emit$$"vmovq xmm0,(rax)\n\t"
12503 $$emit$$"add 0x8,rax\n\t"
12504 $$emit$$"dec rcx\n\t"
12505 $$emit$$"jge L_sloop\n\t"
12506 $$emit$$"# L_end:\n\t"
12507 } else {
12508 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
12509 }
12510 %}
12511 ins_encode %{
12512 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12513 $tmp$$XMMRegister, true, true);
12514 %}
12515 ins_pipe(pipe_slow);
12516 %}
12517
12518 // Large non-constant length ClearArray for AVX512 targets.
12519 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
12520 Universe dummy, rFlagsReg cr)
12521 %{
12522 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
12523 match(Set dummy (ClearArray (Binary cnt base) val));
12524 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
12525
12526 format %{ $$template
12527 if (UseFastStosb) {
12528 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12529 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
12530 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
12531 } else if (UseXMMForObjInit) {
12532 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
12533 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
12534 $$emit$$"jmpq L_zero_64_bytes\n\t"
12535 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12536 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12537 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
12538 $$emit$$"add 0x40,rax\n\t"
12539 $$emit$$"# L_zero_64_bytes:\n\t"
12540 $$emit$$"sub 0x8,rcx\n\t"
12541 $$emit$$"jge L_loop\n\t"
12542 $$emit$$"add 0x4,rcx\n\t"
12543 $$emit$$"jl L_tail\n\t"
12544 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12545 $$emit$$"add 0x20,rax\n\t"
12546 $$emit$$"sub 0x4,rcx\n\t"
12547 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12548 $$emit$$"add 0x4,rcx\n\t"
12549 $$emit$$"jle L_end\n\t"
12550 $$emit$$"dec rcx\n\t"
12551 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12552 $$emit$$"vmovq xmm0,(rax)\n\t"
12553 $$emit$$"add 0x8,rax\n\t"
12554 $$emit$$"dec rcx\n\t"
12555 $$emit$$"jge L_sloop\n\t"
12556 $$emit$$"# L_end:\n\t"
12557 } else {
12558 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12559 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
12560 }
12561 %}
12562 ins_encode %{
12563 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12564 $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
12565 %}
12566 ins_pipe(pipe_slow);
12567 %}
12568
12569 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
12570 Universe dummy, rFlagsReg cr)
12571 %{
12572 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
12573 match(Set dummy (ClearArray (Binary cnt base) val));
12574 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
12575
12576 format %{ $$template
12577 if (UseFastStosb) {
12578 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12579 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
12580 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
12581 } else if (UseXMMForObjInit) {
12582 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
12583 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
12584 $$emit$$"jmpq L_zero_64_bytes\n\t"
12585 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12586 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12587 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
12588 $$emit$$"add 0x40,rax\n\t"
12589 $$emit$$"# L_zero_64_bytes:\n\t"
12590 $$emit$$"sub 0x8,rcx\n\t"
12591 $$emit$$"jge L_loop\n\t"
12592 $$emit$$"add 0x4,rcx\n\t"
12593 $$emit$$"jl L_tail\n\t"
12594 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12595 $$emit$$"add 0x20,rax\n\t"
12596 $$emit$$"sub 0x4,rcx\n\t"
12597 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12598 $$emit$$"add 0x4,rcx\n\t"
12599 $$emit$$"jle L_end\n\t"
12600 $$emit$$"dec rcx\n\t"
12601 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12602 $$emit$$"vmovq xmm0,(rax)\n\t"
12603 $$emit$$"add 0x8,rax\n\t"
12604 $$emit$$"dec rcx\n\t"
12605 $$emit$$"jge L_sloop\n\t"
12606 $$emit$$"# L_end:\n\t"
12607 } else {
12608 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12609 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
12610 }
12611 %}
12612 ins_encode %{
12613 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12614 $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
12615 %}
12616 ins_pipe(pipe_slow);
12617 %}
12618
12619 // Small constant length ClearArray for AVX512 targets.
12620 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
12621 %{
12622 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
12623 ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
12624 match(Set dummy (ClearArray (Binary cnt base) val));
12625 ins_cost(100);
12626 effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
12627 format %{ "clear_mem_imm $base , $cnt \n\t" %}
12628 ins_encode %{
12629 __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
12630 %}
12631 ins_pipe(pipe_slow);
12632 %}
12633
12634 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
12635 rax_RegI result, legRegD tmp1, rFlagsReg cr)
12636 %{
12637 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
12638 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12639 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
12640
12641 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
12642 ins_encode %{
12643 __ string_compare($str1$$Register, $str2$$Register,
12644 $cnt1$$Register, $cnt2$$Register, $result$$Register,
12645 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
12646 %}
12647 ins_pipe( pipe_slow );
12648 %}
12649
14482
14483 ins_cost(300);
14484 format %{ "call_leaf,runtime " %}
14485 ins_encode(clear_avx, Java_To_Runtime(meth));
14486 ins_pipe(pipe_slow);
14487 %}
14488
14489 // Call runtime without safepoint and with vector arguments
14490 instruct CallLeafDirectVector(method meth)
14491 %{
14492 match(CallLeafVector);
14493 effect(USE meth);
14494
14495 ins_cost(300);
14496 format %{ "call_leaf,vector " %}
14497 ins_encode(Java_To_Runtime(meth));
14498 ins_pipe(pipe_slow);
14499 %}
14500
14501 // Call runtime without safepoint
14502 // entry point is null, target holds the address to call
14503 instruct CallLeafNoFPInDirect(rRegP target)
14504 %{
14505 predicate(n->as_Call()->entry_point() == nullptr);
14506 match(CallLeafNoFP target);
14507
14508 ins_cost(300);
14509 format %{ "call_leaf_nofp,runtime indirect " %}
14510 ins_encode %{
14511 __ call($target$$Register);
14512 %}
14513
14514 ins_pipe(pipe_slow);
14515 %}
14516
14517 instruct CallLeafNoFPDirect(method meth)
14518 %{
14519 predicate(n->as_Call()->entry_point() != nullptr);
14520 match(CallLeafNoFP);
14521 effect(USE meth);
14522
14523 ins_cost(300);
14524 format %{ "call_leaf_nofp,runtime " %}
14525 ins_encode(clear_avx, Java_To_Runtime(meth));
14526 ins_pipe(pipe_slow);
14527 %}
14528
14529 // Return Instruction
14530 // Remove the return address & jump to it.
14531 // Notice: We always emit a nop after a ret to make sure there is room
14532 // for safepoint patching
14533 instruct Ret()
14534 %{
14535 match(Return);
14536
14537 format %{ "ret" %}
14538 ins_encode %{
14539 __ ret(0);
|