598 }
599
600 // !!!!! Special hack to get all types of calls to specify the byte offset
601 // from the start of the call to the point where the return address
602 // will point.
603 int MachCallStaticJavaNode::ret_addr_offset()
604 {
605 int offset = 5; // 5 bytes from start of call to where return address points
606 offset += clear_avx_size();
607 return offset;
608 }
609
610 int MachCallDynamicJavaNode::ret_addr_offset()
611 {
612 int offset = 15; // 15 bytes from start of call to where return address points
613 offset += clear_avx_size();
614 return offset;
615 }
616
617 int MachCallRuntimeNode::ret_addr_offset() {
618 int offset = 13; // movq r10,#addr; callq (r10)
619 if (this->ideal_Opcode() != Op_CallLeafVector) {
620 offset += clear_avx_size();
621 }
622 return offset;
623 }
624 //
625 // Compute padding required for nodes which need alignment
626 //
627
628 // The address of the call instruction needs to be 4-byte aligned to
629 // ensure that it does not span a cache line so that it can be patched.
630 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
631 {
632 current_offset += clear_avx_size(); // skip vzeroupper
633 current_offset += 1; // skip call opcode byte
634 return align_up(current_offset, alignment_required()) - current_offset;
635 }
636
637 // The address of the call instruction needs to be 4-byte aligned to
638 // ensure that it does not span a cache line so that it can be patched.
639 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
640 {
641 current_offset += clear_avx_size(); // skip vzeroupper
642 current_offset += 11; // skip movq instruction + call opcode byte
643 return align_up(current_offset, alignment_required()) - current_offset;
829 st->print("\n\t");
830 st->print("# stack alignment check");
831 #endif
832 }
833 if (C->stub_function() != nullptr) {
834 st->print("\n\t");
835 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
836 st->print("\n\t");
837 st->print("je fast_entry\t");
838 st->print("\n\t");
839 st->print("call #nmethod_entry_barrier_stub\t");
840 st->print("\n\tfast_entry:");
841 }
842 st->cr();
843 }
844 #endif
845
846 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
847 Compile* C = ra_->C;
848
849 int framesize = C->output()->frame_size_in_bytes();
850 int bangsize = C->output()->bang_size_in_bytes();
851
852 if (C->clinit_barrier_on_entry()) {
853 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
854 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
855
856 Label L_skip_barrier;
857 Register klass = rscratch1;
858
859 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
860 __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
861
862 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
863
864 __ bind(L_skip_barrier);
865 }
866
867 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
868
869 C->output()->set_frame_complete(__ offset());
870
871 if (C->has_mach_constant_base_node()) {
872 // NOTE: We set the table base offset here because users might be
873 // emitted before MachConstantBaseNode.
874 ConstantTable& constant_table = C->output()->constant_table();
875 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
876 }
877 }
878
879 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
880 {
881 return MachNode::size(ra_); // too many variables; just compute it
882 // the hard way
883 }
884
885 int MachPrologNode::reloc() const
886 {
887 return 0; // a large enough number
888 }
889
890 //=============================================================================
891 #ifndef PRODUCT
892 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
893 {
894 Compile* C = ra_->C;
895 if (generate_vzeroupper(C)) {
896 st->print("vzeroupper");
897 st->cr(); st->print("\t");
898 }
899
900 int framesize = C->output()->frame_size_in_bytes();
901 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
902 // Remove word for return adr already pushed
903 // and RBP
904 framesize -= 2*wordSize;
911 st->print_cr("popq rbp");
912 if (do_polling() && C->is_method_compilation()) {
913 st->print("\t");
914 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
915 "ja #safepoint_stub\t"
916 "# Safepoint: poll for GC");
917 }
918 }
919 #endif
920
921 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
922 {
923 Compile* C = ra_->C;
924
925 if (generate_vzeroupper(C)) {
926 // Clear upper bits of YMM registers when current compiled code uses
927 // wide vectors to avoid AVX <-> SSE transition penalty during call.
928 __ vzeroupper();
929 }
930
931 int framesize = C->output()->frame_size_in_bytes();
932 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
933 // Remove word for return adr already pushed
934 // and RBP
935 framesize -= 2*wordSize;
936
937 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
938
939 if (framesize) {
940 __ addq(rsp, framesize);
941 }
942
943 __ popq(rbp);
944
945 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
946 __ reserved_stack_check();
947 }
948
949 if (do_polling() && C->is_method_compilation()) {
950 Label dummy_label;
951 Label* code_stub = &dummy_label;
952 if (!C->output()->in_scratch_emit_size()) {
953 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
954 C->output()->add_stub(stub);
955 code_stub = &stub->entry();
956 }
957 __ relocate(relocInfo::poll_return_type);
958 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
959 }
960 }
961
962 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
963 {
964 return MachNode::size(ra_); // too many variables; just compute it
965 // the hard way
966 }
967
968 int MachEpilogNode::reloc() const
969 {
970 return 2; // a large enough number
971 }
972
973 const Pipeline* MachEpilogNode::pipeline() const
974 {
975 return MachNode::pipeline_class();
976 }
977
978 //=============================================================================
979
980 enum RC {
981 rc_bad,
982 rc_int,
983 rc_kreg,
984 rc_float,
985 rc_stack
986 };
987
1549 #endif
1550
1551 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1552 {
1553 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1554 int reg = ra_->get_encode(this);
1555
1556 __ lea(as_Register(reg), Address(rsp, offset));
1557 }
1558
1559 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1560 {
1561 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1562 if (ra_->get_encode(this) > 15) {
1563 return (offset < 0x80) ? 6 : 9; // REX2
1564 } else {
1565 return (offset < 0x80) ? 5 : 8; // REX
1566 }
1567 }
1568
1569 //=============================================================================
1570 #ifndef PRODUCT
1571 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1572 {
1573 if (UseCompressedClassPointers) {
1574 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1575 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
1576 } else {
1577 st->print_cr("movq rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1578 st->print_cr("\tcmpq rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
1579 }
1580 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
1581 }
1582 #endif
1583
1584 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1585 {
1586 __ ic_check(InteriorEntryAlignment);
1587 }
1588
1589 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1590 {
1591 return MachNode::size(ra_); // too many variables; just compute it
1592 // the hard way
1593 }
1594
1595
1596 //=============================================================================
1597
1598 bool Matcher::supports_vector_calling_convention(void) {
1599 return EnableVectorSupport;
1600 }
1601
1602 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
1603 assert(EnableVectorSupport, "sanity");
1604 int lo = XMM0_num;
1605 int hi = XMM0b_num;
1606 if (ideal_reg == Op_VecX) hi = XMM0d_num;
1607 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
1608 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
1609 return OptoRegPair(hi, lo);
1610 }
1611
1612 // Is this branch offset short enough that a short branch can be used?
1613 //
1614 // NOTE: If the platform does not provide any short branch variants, then
1615 // this method should return false for offset 0.
3047 %}
3048 %}
3049
3050 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
3051 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
3052 %{
3053 constraint(ALLOC_IN_RC(ptr_reg));
3054 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
3055 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
3056
3057 op_cost(10);
3058 format %{"[$reg + $off + $idx << $scale]" %}
3059 interface(MEMORY_INTER) %{
3060 base($reg);
3061 index($idx);
3062 scale($scale);
3063 disp($off);
3064 %}
3065 %}
3066
3067 // Indirect Narrow Oop Plus Offset Operand
3068 // Note: x86 architecture doesn't support "scale * index + offset" without a base
3069 // we can't free r12 even with CompressedOops::base() == nullptr.
3070 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
3071 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
3072 constraint(ALLOC_IN_RC(ptr_reg));
3073 match(AddP (DecodeN reg) off);
3074
3075 op_cost(10);
3076 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
3077 interface(MEMORY_INTER) %{
3078 base(0xc); // R12
3079 index($reg);
3080 scale(0x3);
3081 disp($off);
3082 %}
3083 %}
3084
3085 // Indirect Memory Operand
3086 operand indirectNarrow(rRegN reg)
3393 equal(0x4, "e");
3394 not_equal(0x5, "ne");
3395 less(0x2, "b");
3396 greater_equal(0x3, "ae");
3397 less_equal(0x6, "be");
3398 greater(0x7, "a");
3399 overflow(0x0, "o");
3400 no_overflow(0x1, "no");
3401 %}
3402 %}
3403
3404 //----------OPERAND CLASSES----------------------------------------------------
3405 // Operand Classes are groups of operands that are used as to simplify
3406 // instruction definitions by not requiring the AD writer to specify separate
3407 // instructions for every form of operand when the instruction accepts
3408 // multiple operand types with the same basic encoding and format. The classic
3409 // case of this is memory operands.
3410
3411 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
3412 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
3413 indCompressedOopOffset,
3414 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
3415 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
3416 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
3417
3418 //----------PIPELINE-----------------------------------------------------------
3419 // Rules which define the behavior of the target architectures pipeline.
3420 pipeline %{
3421
3422 //----------ATTRIBUTES---------------------------------------------------------
3423 attributes %{
3424 variable_size_instructions; // Fixed size instructions
3425 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
3426 instruction_unit_size = 1; // An instruction is 1 bytes long
3427 instruction_fetch_unit_size = 16; // The processor fetches one line
3428 instruction_fetch_units = 1; // of 16 bytes
3429
3430 // List of nop instructions
3431 nops( MachNop );
3432 %}
3433
5934 format %{ "MEMBAR-storestore (empty encoding)" %}
5935 ins_encode( );
5936 ins_pipe(empty);
5937 %}
5938
5939 //----------Move Instructions--------------------------------------------------
5940
5941 instruct castX2P(rRegP dst, rRegL src)
5942 %{
5943 match(Set dst (CastX2P src));
5944
5945 format %{ "movq $dst, $src\t# long->ptr" %}
5946 ins_encode %{
5947 if ($dst$$reg != $src$$reg) {
5948 __ movptr($dst$$Register, $src$$Register);
5949 }
5950 %}
5951 ins_pipe(ialu_reg_reg); // XXX
5952 %}
5953
5954 instruct castP2X(rRegL dst, rRegP src)
5955 %{
5956 match(Set dst (CastP2X src));
5957
5958 format %{ "movq $dst, $src\t# ptr -> long" %}
5959 ins_encode %{
5960 if ($dst$$reg != $src$$reg) {
5961 __ movptr($dst$$Register, $src$$Register);
5962 }
5963 %}
5964 ins_pipe(ialu_reg_reg); // XXX
5965 %}
5966
5967 // Convert oop into int for vectors alignment masking
5968 instruct convP2I(rRegI dst, rRegP src)
5969 %{
5970 match(Set dst (ConvL2I (CastP2X src)));
5971
5972 format %{ "movl $dst, $src\t# ptr -> int" %}
5973 ins_encode %{
12185 effect(DEF dst, USE src);
12186 ins_cost(100);
12187 format %{ "movd $dst,$src\t# MoveI2F" %}
12188 ins_encode %{
12189 __ movdl($dst$$XMMRegister, $src$$Register);
12190 %}
12191 ins_pipe( pipe_slow );
12192 %}
12193
12194 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
12195 match(Set dst (MoveL2D src));
12196 effect(DEF dst, USE src);
12197 ins_cost(100);
12198 format %{ "movd $dst,$src\t# MoveL2D" %}
12199 ins_encode %{
12200 __ movdq($dst$$XMMRegister, $src$$Register);
12201 %}
12202 ins_pipe( pipe_slow );
12203 %}
12204
12205 // Fast clearing of an array
12206 // Small non-constant lenght ClearArray for non-AVX512 targets.
12207 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
12208 Universe dummy, rFlagsReg cr)
12209 %{
12210 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
12211 match(Set dummy (ClearArray cnt base));
12212 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
12213
12214 format %{ $$template
12215 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12216 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
12217 $$emit$$"jg LARGE\n\t"
12218 $$emit$$"dec rcx\n\t"
12219 $$emit$$"js DONE\t# Zero length\n\t"
12220 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
12221 $$emit$$"dec rcx\n\t"
12222 $$emit$$"jge LOOP\n\t"
12223 $$emit$$"jmp DONE\n\t"
12224 $$emit$$"# LARGE:\n\t"
12225 if (UseFastStosb) {
12226 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
12227 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
12228 } else if (UseXMMForObjInit) {
12229 $$emit$$"mov rdi,rax\n\t"
12230 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
12231 $$emit$$"jmpq L_zero_64_bytes\n\t"
12232 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12240 $$emit$$"jl L_tail\n\t"
12241 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12242 $$emit$$"add 0x20,rax\n\t"
12243 $$emit$$"sub 0x4,rcx\n\t"
12244 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12245 $$emit$$"add 0x4,rcx\n\t"
12246 $$emit$$"jle L_end\n\t"
12247 $$emit$$"dec rcx\n\t"
12248 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12249 $$emit$$"vmovq xmm0,(rax)\n\t"
12250 $$emit$$"add 0x8,rax\n\t"
12251 $$emit$$"dec rcx\n\t"
12252 $$emit$$"jge L_sloop\n\t"
12253 $$emit$$"# L_end:\n\t"
12254 } else {
12255 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
12256 }
12257 $$emit$$"# DONE"
12258 %}
12259 ins_encode %{
12260 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
12261 $tmp$$XMMRegister, false, knoreg);
12262 %}
12263 ins_pipe(pipe_slow);
12264 %}
12265
12266 // Small non-constant length ClearArray for AVX512 targets.
12267 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
12268 Universe dummy, rFlagsReg cr)
12269 %{
12270 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
12271 match(Set dummy (ClearArray cnt base));
12272 ins_cost(125);
12273 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
12274
12275 format %{ $$template
12276 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12277 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
12278 $$emit$$"jg LARGE\n\t"
12279 $$emit$$"dec rcx\n\t"
12280 $$emit$$"js DONE\t# Zero length\n\t"
12281 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
12282 $$emit$$"dec rcx\n\t"
12283 $$emit$$"jge LOOP\n\t"
12284 $$emit$$"jmp DONE\n\t"
12285 $$emit$$"# LARGE:\n\t"
12286 if (UseFastStosb) {
12287 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
12288 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
12289 } else if (UseXMMForObjInit) {
12290 $$emit$$"mov rdi,rax\n\t"
12291 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
12292 $$emit$$"jmpq L_zero_64_bytes\n\t"
12293 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12301 $$emit$$"jl L_tail\n\t"
12302 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12303 $$emit$$"add 0x20,rax\n\t"
12304 $$emit$$"sub 0x4,rcx\n\t"
12305 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12306 $$emit$$"add 0x4,rcx\n\t"
12307 $$emit$$"jle L_end\n\t"
12308 $$emit$$"dec rcx\n\t"
12309 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12310 $$emit$$"vmovq xmm0,(rax)\n\t"
12311 $$emit$$"add 0x8,rax\n\t"
12312 $$emit$$"dec rcx\n\t"
12313 $$emit$$"jge L_sloop\n\t"
12314 $$emit$$"# L_end:\n\t"
12315 } else {
12316 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
12317 }
12318 $$emit$$"# DONE"
12319 %}
12320 ins_encode %{
12321 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
12322 $tmp$$XMMRegister, false, $ktmp$$KRegister);
12323 %}
12324 ins_pipe(pipe_slow);
12325 %}
12326
12327 // Large non-constant length ClearArray for non-AVX512 targets.
12328 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
12329 Universe dummy, rFlagsReg cr)
12330 %{
12331 predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
12332 match(Set dummy (ClearArray cnt base));
12333 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
12334
12335 format %{ $$template
12336 if (UseFastStosb) {
12337 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12338 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
12339 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
12340 } else if (UseXMMForObjInit) {
12341 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
12342 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
12343 $$emit$$"jmpq L_zero_64_bytes\n\t"
12344 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12345 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12346 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
12347 $$emit$$"add 0x40,rax\n\t"
12348 $$emit$$"# L_zero_64_bytes:\n\t"
12349 $$emit$$"sub 0x8,rcx\n\t"
12350 $$emit$$"jge L_loop\n\t"
12351 $$emit$$"add 0x4,rcx\n\t"
12352 $$emit$$"jl L_tail\n\t"
12353 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12354 $$emit$$"add 0x20,rax\n\t"
12355 $$emit$$"sub 0x4,rcx\n\t"
12356 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12357 $$emit$$"add 0x4,rcx\n\t"
12358 $$emit$$"jle L_end\n\t"
12359 $$emit$$"dec rcx\n\t"
12360 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12361 $$emit$$"vmovq xmm0,(rax)\n\t"
12362 $$emit$$"add 0x8,rax\n\t"
12363 $$emit$$"dec rcx\n\t"
12364 $$emit$$"jge L_sloop\n\t"
12365 $$emit$$"# L_end:\n\t"
12366 } else {
12367 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12368 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
12369 }
12370 %}
12371 ins_encode %{
12372 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
12373 $tmp$$XMMRegister, true, knoreg);
12374 %}
12375 ins_pipe(pipe_slow);
12376 %}
12377
12378 // Large non-constant length ClearArray for AVX512 targets.
12379 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
12380 Universe dummy, rFlagsReg cr)
12381 %{
12382 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
12383 match(Set dummy (ClearArray cnt base));
12384 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
12385
12386 format %{ $$template
12387 if (UseFastStosb) {
12388 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12389 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
12390 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
12391 } else if (UseXMMForObjInit) {
12392 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
12393 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
12394 $$emit$$"jmpq L_zero_64_bytes\n\t"
12395 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12396 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12397 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
12398 $$emit$$"add 0x40,rax\n\t"
12399 $$emit$$"# L_zero_64_bytes:\n\t"
12400 $$emit$$"sub 0x8,rcx\n\t"
12401 $$emit$$"jge L_loop\n\t"
12402 $$emit$$"add 0x4,rcx\n\t"
12403 $$emit$$"jl L_tail\n\t"
12404 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12405 $$emit$$"add 0x20,rax\n\t"
12406 $$emit$$"sub 0x4,rcx\n\t"
12407 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12408 $$emit$$"add 0x4,rcx\n\t"
12409 $$emit$$"jle L_end\n\t"
12410 $$emit$$"dec rcx\n\t"
12411 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12412 $$emit$$"vmovq xmm0,(rax)\n\t"
12413 $$emit$$"add 0x8,rax\n\t"
12414 $$emit$$"dec rcx\n\t"
12415 $$emit$$"jge L_sloop\n\t"
12416 $$emit$$"# L_end:\n\t"
12417 } else {
12418 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12419 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
12420 }
12421 %}
12422 ins_encode %{
12423 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
12424 $tmp$$XMMRegister, true, $ktmp$$KRegister);
12425 %}
12426 ins_pipe(pipe_slow);
12427 %}
12428
12429 // Small constant length ClearArray for AVX512 targets.
12430 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
12431 %{
12432 predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
12433 match(Set dummy (ClearArray cnt base));
12434 ins_cost(100);
12435 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
12436 format %{ "clear_mem_imm $base , $cnt \n\t" %}
12437 ins_encode %{
12438 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
12439 %}
12440 ins_pipe(pipe_slow);
12441 %}
12442
12443 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
12444 rax_RegI result, legRegD tmp1, rFlagsReg cr)
12445 %{
12446 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
12447 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12448 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
12449
12450 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
12451 ins_encode %{
12452 __ string_compare($str1$$Register, $str2$$Register,
12453 $cnt1$$Register, $cnt2$$Register, $result$$Register,
12454 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
12455 %}
12456 ins_pipe( pipe_slow );
12457 %}
12458
14291
14292 ins_cost(300);
14293 format %{ "call_leaf,runtime " %}
14294 ins_encode(clear_avx, Java_To_Runtime(meth));
14295 ins_pipe(pipe_slow);
14296 %}
14297
14298 // Call runtime without safepoint and with vector arguments
14299 instruct CallLeafDirectVector(method meth)
14300 %{
14301 match(CallLeafVector);
14302 effect(USE meth);
14303
14304 ins_cost(300);
14305 format %{ "call_leaf,vector " %}
14306 ins_encode(Java_To_Runtime(meth));
14307 ins_pipe(pipe_slow);
14308 %}
14309
14310 // Call runtime without safepoint
14311 instruct CallLeafNoFPDirect(method meth)
14312 %{
14313 match(CallLeafNoFP);
14314 effect(USE meth);
14315
14316 ins_cost(300);
14317 format %{ "call_leaf_nofp,runtime " %}
14318 ins_encode(clear_avx, Java_To_Runtime(meth));
14319 ins_pipe(pipe_slow);
14320 %}
14321
14322 // Return Instruction
14323 // Remove the return address & jump to it.
14324 // Notice: We always emit a nop after a ret to make sure there is room
14325 // for safepoint patching
14326 instruct Ret()
14327 %{
14328 match(Return);
14329
14330 format %{ "ret" %}
14331 ins_encode %{
14332 __ ret(0);
|
598 }
599
600 // !!!!! Special hack to get all types of calls to specify the byte offset
601 // from the start of the call to the point where the return address
602 // will point.
603 int MachCallStaticJavaNode::ret_addr_offset()
604 {
605 int offset = 5; // 5 bytes from start of call to where return address points
606 offset += clear_avx_size();
607 return offset;
608 }
609
610 int MachCallDynamicJavaNode::ret_addr_offset()
611 {
612 int offset = 15; // 15 bytes from start of call to where return address points
613 offset += clear_avx_size();
614 return offset;
615 }
616
617 int MachCallRuntimeNode::ret_addr_offset() {
618 if (_entry_point == nullptr) {
619 // CallLeafNoFPInDirect
620 return 3; // callq (register)
621 }
622 int offset = 13; // movq r10,#addr; callq (r10)
623 if (this->ideal_Opcode() != Op_CallLeafVector) {
624 offset += clear_avx_size();
625 }
626 return offset;
627 }
628
629 //
630 // Compute padding required for nodes which need alignment
631 //
632
633 // The address of the call instruction needs to be 4-byte aligned to
634 // ensure that it does not span a cache line so that it can be patched.
635 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
636 {
637 current_offset += clear_avx_size(); // skip vzeroupper
638 current_offset += 1; // skip call opcode byte
639 return align_up(current_offset, alignment_required()) - current_offset;
640 }
641
642 // The address of the call instruction needs to be 4-byte aligned to
643 // ensure that it does not span a cache line so that it can be patched.
644 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
645 {
646 current_offset += clear_avx_size(); // skip vzeroupper
647 current_offset += 11; // skip movq instruction + call opcode byte
648 return align_up(current_offset, alignment_required()) - current_offset;
834 st->print("\n\t");
835 st->print("# stack alignment check");
836 #endif
837 }
838 if (C->stub_function() != nullptr) {
839 st->print("\n\t");
840 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
841 st->print("\n\t");
842 st->print("je fast_entry\t");
843 st->print("\n\t");
844 st->print("call #nmethod_entry_barrier_stub\t");
845 st->print("\n\tfast_entry:");
846 }
847 st->cr();
848 }
849 #endif
850
851 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
852 Compile* C = ra_->C;
853
854 __ verified_entry(C);
855
856 if (ra_->C->stub_function() == nullptr) {
857 __ entry_barrier();
858 }
859
860 if (!Compile::current()->output()->in_scratch_emit_size()) {
861 __ bind(*_verified_entry);
862 }
863
864 C->output()->set_frame_complete(__ offset());
865
866 if (C->has_mach_constant_base_node()) {
867 // NOTE: We set the table base offset here because users might be
868 // emitted before MachConstantBaseNode.
869 ConstantTable& constant_table = C->output()->constant_table();
870 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
871 }
872 }
873
874 int MachPrologNode::reloc() const
875 {
876 return 0; // a large enough number
877 }
878
879 //=============================================================================
880 #ifndef PRODUCT
881 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
882 {
883 Compile* C = ra_->C;
884 if (generate_vzeroupper(C)) {
885 st->print("vzeroupper");
886 st->cr(); st->print("\t");
887 }
888
889 int framesize = C->output()->frame_size_in_bytes();
890 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
891 // Remove word for return adr already pushed
892 // and RBP
893 framesize -= 2*wordSize;
900 st->print_cr("popq rbp");
901 if (do_polling() && C->is_method_compilation()) {
902 st->print("\t");
903 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
904 "ja #safepoint_stub\t"
905 "# Safepoint: poll for GC");
906 }
907 }
908 #endif
909
910 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
911 {
912 Compile* C = ra_->C;
913
914 if (generate_vzeroupper(C)) {
915 // Clear upper bits of YMM registers when current compiled code uses
916 // wide vectors to avoid AVX <-> SSE transition penalty during call.
917 __ vzeroupper();
918 }
919
920 // Subtract two words to account for return address and rbp
921 int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
922 __ remove_frame(initial_framesize, C->needs_stack_repair());
923
924 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
925 __ reserved_stack_check();
926 }
927
928 if (do_polling() && C->is_method_compilation()) {
929 Label dummy_label;
930 Label* code_stub = &dummy_label;
931 if (!C->output()->in_scratch_emit_size()) {
932 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
933 C->output()->add_stub(stub);
934 code_stub = &stub->entry();
935 }
936 __ relocate(relocInfo::poll_return_type);
937 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
938 }
939 }
940
941 int MachEpilogNode::reloc() const
942 {
943 return 2; // a large enough number
944 }
945
946 const Pipeline* MachEpilogNode::pipeline() const
947 {
948 return MachNode::pipeline_class();
949 }
950
951 //=============================================================================
952
953 enum RC {
954 rc_bad,
955 rc_int,
956 rc_kreg,
957 rc_float,
958 rc_stack
959 };
960
1522 #endif
1523
1524 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1525 {
1526 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1527 int reg = ra_->get_encode(this);
1528
1529 __ lea(as_Register(reg), Address(rsp, offset));
1530 }
1531
1532 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1533 {
1534 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1535 if (ra_->get_encode(this) > 15) {
1536 return (offset < 0x80) ? 6 : 9; // REX2
1537 } else {
1538 return (offset < 0x80) ? 5 : 8; // REX
1539 }
1540 }
1541
1542 //=============================================================================
1543 #ifndef PRODUCT
1544 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1545 {
1546 st->print_cr("MachVEPNode");
1547 }
1548 #endif
1549
1550 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1551 {
1552 CodeBuffer* cbuf = masm->code();
1553 uint insts_size = cbuf->insts_size();
1554 if (!_verified) {
1555 __ ic_check(1);
1556 } else {
1557 // TODO 8284443 Avoid creation of temporary frame
1558 if (ra_->C->stub_function() == nullptr) {
1559 __ verified_entry(ra_->C, 0);
1560 __ entry_barrier();
1561 int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
1562 __ remove_frame(initial_framesize, false);
1563 }
1564 // Unpack inline type args passed as oop and then jump to
1565 // the verified entry point (skipping the unverified entry).
1566 int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
1567 // Emit code for verified entry and save increment for stack repair on return
1568 __ verified_entry(ra_->C, sp_inc);
1569 if (Compile::current()->output()->in_scratch_emit_size()) {
1570 Label dummy_verified_entry;
1571 __ jmp(dummy_verified_entry);
1572 } else {
1573 __ jmp(*_verified_entry);
1574 }
1575 }
1576 /* WARNING these NOPs are critical so that verified entry point is properly
1577 4 bytes aligned for patching by NativeJump::patch_verified_entry() */
1578 int nops_cnt = 4 - ((cbuf->insts_size() - insts_size) & 0x3);
1579 nops_cnt &= 0x3; // Do not add nops if code is aligned.
1580 if (nops_cnt > 0) {
1581 __ nop(nops_cnt);
1582 }
1583 }
1584
1585 //=============================================================================
1586 #ifndef PRODUCT
1587 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1588 {
1589 if (UseCompressedClassPointers) {
1590 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1591 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
1592 } else {
1593 st->print_cr("movq rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1594 st->print_cr("\tcmpq rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
1595 }
1596 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
1597 }
1598 #endif
1599
1600 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1601 {
1602 __ ic_check(InteriorEntryAlignment);
1603 }
1604
1605 //=============================================================================
1606
1607 bool Matcher::supports_vector_calling_convention(void) {
1608 return EnableVectorSupport;
1609 }
1610
1611 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
1612 assert(EnableVectorSupport, "sanity");
1613 int lo = XMM0_num;
1614 int hi = XMM0b_num;
1615 if (ideal_reg == Op_VecX) hi = XMM0d_num;
1616 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
1617 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
1618 return OptoRegPair(hi, lo);
1619 }
1620
1621 // Is this branch offset short enough that a short branch can be used?
1622 //
1623 // NOTE: If the platform does not provide any short branch variants, then
1624 // this method should return false for offset 0.
3056 %}
3057 %}
3058
3059 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
3060 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
3061 %{
3062 constraint(ALLOC_IN_RC(ptr_reg));
3063 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
3064 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
3065
3066 op_cost(10);
3067 format %{"[$reg + $off + $idx << $scale]" %}
3068 interface(MEMORY_INTER) %{
3069 base($reg);
3070 index($idx);
3071 scale($scale);
3072 disp($off);
3073 %}
3074 %}
3075
3076 // Indirect Narrow Oop Operand
3077 operand indCompressedOop(rRegN reg) %{
3078 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
3079 constraint(ALLOC_IN_RC(ptr_reg));
3080 match(DecodeN reg);
3081
3082 op_cost(10);
3083 format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
3084 interface(MEMORY_INTER) %{
3085 base(0xc); // R12
3086 index($reg);
3087 scale(0x3);
3088 disp(0x0);
3089 %}
3090 %}
3091
3092 // Indirect Narrow Oop Plus Offset Operand
3093 // Note: x86 architecture doesn't support "scale * index + offset" without a base
3094 // we can't free r12 even with CompressedOops::base() == nullptr.
3095 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
3096 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
3097 constraint(ALLOC_IN_RC(ptr_reg));
3098 match(AddP (DecodeN reg) off);
3099
3100 op_cost(10);
3101 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
3102 interface(MEMORY_INTER) %{
3103 base(0xc); // R12
3104 index($reg);
3105 scale(0x3);
3106 disp($off);
3107 %}
3108 %}
3109
3110 // Indirect Memory Operand
3111 operand indirectNarrow(rRegN reg)
3418 equal(0x4, "e");
3419 not_equal(0x5, "ne");
3420 less(0x2, "b");
3421 greater_equal(0x3, "ae");
3422 less_equal(0x6, "be");
3423 greater(0x7, "a");
3424 overflow(0x0, "o");
3425 no_overflow(0x1, "no");
3426 %}
3427 %}
3428
3429 //----------OPERAND CLASSES----------------------------------------------------
3430 // Operand Classes are groups of operands that are used as to simplify
3431 // instruction definitions by not requiring the AD writer to specify separate
3432 // instructions for every form of operand when the instruction accepts
3433 // multiple operand types with the same basic encoding and format. The classic
3434 // case of this is memory operands.
3435
3436 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
3437 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
3438 indCompressedOop, indCompressedOopOffset,
3439 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
3440 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
3441 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
3442
3443 //----------PIPELINE-----------------------------------------------------------
3444 // Rules which define the behavior of the target architectures pipeline.
3445 pipeline %{
3446
3447 //----------ATTRIBUTES---------------------------------------------------------
3448 attributes %{
3449 variable_size_instructions; // Fixed size instructions
3450 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
3451 instruction_unit_size = 1; // An instruction is 1 bytes long
3452 instruction_fetch_unit_size = 16; // The processor fetches one line
3453 instruction_fetch_units = 1; // of 16 bytes
3454
3455 // List of nop instructions
3456 nops( MachNop );
3457 %}
3458
5959 format %{ "MEMBAR-storestore (empty encoding)" %}
5960 ins_encode( );
5961 ins_pipe(empty);
5962 %}
5963
5964 //----------Move Instructions--------------------------------------------------
5965
5966 instruct castX2P(rRegP dst, rRegL src)
5967 %{
5968 match(Set dst (CastX2P src));
5969
5970 format %{ "movq $dst, $src\t# long->ptr" %}
5971 ins_encode %{
5972 if ($dst$$reg != $src$$reg) {
5973 __ movptr($dst$$Register, $src$$Register);
5974 }
5975 %}
5976 ins_pipe(ialu_reg_reg); // XXX
5977 %}
5978
5979 instruct castI2N(rRegN dst, rRegI src)
5980 %{
5981 match(Set dst (CastI2N src));
5982
5983 format %{ "movq $dst, $src\t# int -> narrow ptr" %}
5984 ins_encode %{
5985 if ($dst$$reg != $src$$reg) {
5986 __ movl($dst$$Register, $src$$Register);
5987 }
5988 %}
5989 ins_pipe(ialu_reg_reg); // XXX
5990 %}
5991
5992 instruct castN2X(rRegL dst, rRegN src)
5993 %{
5994 match(Set dst (CastP2X src));
5995
5996 format %{ "movq $dst, $src\t# ptr -> long" %}
5997 ins_encode %{
5998 if ($dst$$reg != $src$$reg) {
5999 __ movptr($dst$$Register, $src$$Register);
6000 }
6001 %}
6002 ins_pipe(ialu_reg_reg); // XXX
6003 %}
6004
6005 instruct castP2X(rRegL dst, rRegP src)
6006 %{
6007 match(Set dst (CastP2X src));
6008
6009 format %{ "movq $dst, $src\t# ptr -> long" %}
6010 ins_encode %{
6011 if ($dst$$reg != $src$$reg) {
6012 __ movptr($dst$$Register, $src$$Register);
6013 }
6014 %}
6015 ins_pipe(ialu_reg_reg); // XXX
6016 %}
6017
6018 // Convert oop into int for vectors alignment masking
6019 instruct convP2I(rRegI dst, rRegP src)
6020 %{
6021 match(Set dst (ConvL2I (CastP2X src)));
6022
6023 format %{ "movl $dst, $src\t# ptr -> int" %}
6024 ins_encode %{
12236 effect(DEF dst, USE src);
12237 ins_cost(100);
12238 format %{ "movd $dst,$src\t# MoveI2F" %}
12239 ins_encode %{
12240 __ movdl($dst$$XMMRegister, $src$$Register);
12241 %}
12242 ins_pipe( pipe_slow );
12243 %}
12244
12245 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
12246 match(Set dst (MoveL2D src));
12247 effect(DEF dst, USE src);
12248 ins_cost(100);
12249 format %{ "movd $dst,$src\t# MoveL2D" %}
12250 ins_encode %{
12251 __ movdq($dst$$XMMRegister, $src$$Register);
12252 %}
12253 ins_pipe( pipe_slow );
12254 %}
12255
12256
12257 // Fast clearing of an array
12258 // Small non-constant lenght ClearArray for non-AVX512 targets.
12259 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
12260 Universe dummy, rFlagsReg cr)
12261 %{
12262 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
12263 match(Set dummy (ClearArray (Binary cnt base) val));
12264 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
12265
12266 format %{ $$template
12267 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
12268 $$emit$$"jg LARGE\n\t"
12269 $$emit$$"dec rcx\n\t"
12270 $$emit$$"js DONE\t# Zero length\n\t"
12271 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
12272 $$emit$$"dec rcx\n\t"
12273 $$emit$$"jge LOOP\n\t"
12274 $$emit$$"jmp DONE\n\t"
12275 $$emit$$"# LARGE:\n\t"
12276 if (UseFastStosb) {
12277 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
12278 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
12279 } else if (UseXMMForObjInit) {
12280 $$emit$$"movdq $tmp, $val\n\t"
12281 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
12282 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
12283 $$emit$$"jmpq L_zero_64_bytes\n\t"
12284 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12285 $$emit$$"vmovdqu $tmp,(rax)\n\t"
12286 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
12287 $$emit$$"add 0x40,rax\n\t"
12288 $$emit$$"# L_zero_64_bytes:\n\t"
12289 $$emit$$"sub 0x8,rcx\n\t"
12290 $$emit$$"jge L_loop\n\t"
12291 $$emit$$"add 0x4,rcx\n\t"
12292 $$emit$$"jl L_tail\n\t"
12293 $$emit$$"vmovdqu $tmp,(rax)\n\t"
12294 $$emit$$"add 0x20,rax\n\t"
12295 $$emit$$"sub 0x4,rcx\n\t"
12296 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12297 $$emit$$"add 0x4,rcx\n\t"
12298 $$emit$$"jle L_end\n\t"
12299 $$emit$$"dec rcx\n\t"
12300 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12301 $$emit$$"vmovq xmm0,(rax)\n\t"
12302 $$emit$$"add 0x8,rax\n\t"
12303 $$emit$$"dec rcx\n\t"
12304 $$emit$$"jge L_sloop\n\t"
12305 $$emit$$"# L_end:\n\t"
12306 } else {
12307 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
12308 }
12309 $$emit$$"# DONE"
12310 %}
12311 ins_encode %{
12312 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12313 $tmp$$XMMRegister, false, false);
12314 %}
12315 ins_pipe(pipe_slow);
12316 %}
12317
12318 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
12319 Universe dummy, rFlagsReg cr)
12320 %{
12321 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
12322 match(Set dummy (ClearArray (Binary cnt base) val));
12323 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
12324
12325 format %{ $$template
12326 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
12327 $$emit$$"jg LARGE\n\t"
12328 $$emit$$"dec rcx\n\t"
12329 $$emit$$"js DONE\t# Zero length\n\t"
12330 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
12331 $$emit$$"dec rcx\n\t"
12332 $$emit$$"jge LOOP\n\t"
12333 $$emit$$"jmp DONE\n\t"
12334 $$emit$$"# LARGE:\n\t"
12335 if (UseXMMForObjInit) {
12336 $$emit$$"movdq $tmp, $val\n\t"
12337 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
12338 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
12339 $$emit$$"jmpq L_zero_64_bytes\n\t"
12340 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12341 $$emit$$"vmovdqu $tmp,(rax)\n\t"
12342 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
12343 $$emit$$"add 0x40,rax\n\t"
12344 $$emit$$"# L_zero_64_bytes:\n\t"
12345 $$emit$$"sub 0x8,rcx\n\t"
12346 $$emit$$"jge L_loop\n\t"
12347 $$emit$$"add 0x4,rcx\n\t"
12348 $$emit$$"jl L_tail\n\t"
12349 $$emit$$"vmovdqu $tmp,(rax)\n\t"
12350 $$emit$$"add 0x20,rax\n\t"
12351 $$emit$$"sub 0x4,rcx\n\t"
12352 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12353 $$emit$$"add 0x4,rcx\n\t"
12354 $$emit$$"jle L_end\n\t"
12355 $$emit$$"dec rcx\n\t"
12356 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12357 $$emit$$"vmovq xmm0,(rax)\n\t"
12358 $$emit$$"add 0x8,rax\n\t"
12359 $$emit$$"dec rcx\n\t"
12360 $$emit$$"jge L_sloop\n\t"
12361 $$emit$$"# L_end:\n\t"
12362 } else {
12363 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
12364 }
12365 $$emit$$"# DONE"
12366 %}
12367 ins_encode %{
12368 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12369 $tmp$$XMMRegister, false, true);
12370 %}
12371 ins_pipe(pipe_slow);
12372 %}
12373
12374 // Small non-constant length ClearArray for AVX512 targets.
12375 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
12376 Universe dummy, rFlagsReg cr)
12377 %{
12378 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
12379 match(Set dummy (ClearArray (Binary cnt base) val));
12380 ins_cost(125);
12381 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
12382
12383 format %{ $$template
12384 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12385 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
12386 $$emit$$"jg LARGE\n\t"
12387 $$emit$$"dec rcx\n\t"
12388 $$emit$$"js DONE\t# Zero length\n\t"
12389 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
12390 $$emit$$"dec rcx\n\t"
12391 $$emit$$"jge LOOP\n\t"
12392 $$emit$$"jmp DONE\n\t"
12393 $$emit$$"# LARGE:\n\t"
12394 if (UseFastStosb) {
12395 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
12396 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
12397 } else if (UseXMMForObjInit) {
12398 $$emit$$"mov rdi,rax\n\t"
12399 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
12400 $$emit$$"jmpq L_zero_64_bytes\n\t"
12401 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12409 $$emit$$"jl L_tail\n\t"
12410 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12411 $$emit$$"add 0x20,rax\n\t"
12412 $$emit$$"sub 0x4,rcx\n\t"
12413 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12414 $$emit$$"add 0x4,rcx\n\t"
12415 $$emit$$"jle L_end\n\t"
12416 $$emit$$"dec rcx\n\t"
12417 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12418 $$emit$$"vmovq xmm0,(rax)\n\t"
12419 $$emit$$"add 0x8,rax\n\t"
12420 $$emit$$"dec rcx\n\t"
12421 $$emit$$"jge L_sloop\n\t"
12422 $$emit$$"# L_end:\n\t"
12423 } else {
12424 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
12425 }
12426 $$emit$$"# DONE"
12427 %}
12428 ins_encode %{
12429 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12430 $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
12431 %}
12432 ins_pipe(pipe_slow);
12433 %}
12434
12435 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
12436 Universe dummy, rFlagsReg cr)
12437 %{
12438 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
12439 match(Set dummy (ClearArray (Binary cnt base) val));
12440 ins_cost(125);
12441 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
12442
12443 format %{ $$template
12444 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12445 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
12446 $$emit$$"jg LARGE\n\t"
12447 $$emit$$"dec rcx\n\t"
12448 $$emit$$"js DONE\t# Zero length\n\t"
12449 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
12450 $$emit$$"dec rcx\n\t"
12451 $$emit$$"jge LOOP\n\t"
12452 $$emit$$"jmp DONE\n\t"
12453 $$emit$$"# LARGE:\n\t"
12454 if (UseFastStosb) {
12455 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
12456 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
12457 } else if (UseXMMForObjInit) {
12458 $$emit$$"mov rdi,rax\n\t"
12459 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
12460 $$emit$$"jmpq L_zero_64_bytes\n\t"
12461 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12469 $$emit$$"jl L_tail\n\t"
12470 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12471 $$emit$$"add 0x20,rax\n\t"
12472 $$emit$$"sub 0x4,rcx\n\t"
12473 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12474 $$emit$$"add 0x4,rcx\n\t"
12475 $$emit$$"jle L_end\n\t"
12476 $$emit$$"dec rcx\n\t"
12477 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12478 $$emit$$"vmovq xmm0,(rax)\n\t"
12479 $$emit$$"add 0x8,rax\n\t"
12480 $$emit$$"dec rcx\n\t"
12481 $$emit$$"jge L_sloop\n\t"
12482 $$emit$$"# L_end:\n\t"
12483 } else {
12484 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
12485 }
12486 $$emit$$"# DONE"
12487 %}
12488 ins_encode %{
12489 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12490 $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
12491 %}
12492 ins_pipe(pipe_slow);
12493 %}
12494
12495 // Large non-constant length ClearArray for non-AVX512 targets.
12496 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
12497 Universe dummy, rFlagsReg cr)
12498 %{
12499 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
12500 match(Set dummy (ClearArray (Binary cnt base) val));
12501 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
12502
12503 format %{ $$template
12504 if (UseFastStosb) {
12505 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
12506 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
12507 } else if (UseXMMForObjInit) {
12508 $$emit$$"movdq $tmp, $val\n\t"
12509 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
12510 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
12511 $$emit$$"jmpq L_zero_64_bytes\n\t"
12512 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12513 $$emit$$"vmovdqu $tmp,(rax)\n\t"
12514 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
12515 $$emit$$"add 0x40,rax\n\t"
12516 $$emit$$"# L_zero_64_bytes:\n\t"
12517 $$emit$$"sub 0x8,rcx\n\t"
12518 $$emit$$"jge L_loop\n\t"
12519 $$emit$$"add 0x4,rcx\n\t"
12520 $$emit$$"jl L_tail\n\t"
12521 $$emit$$"vmovdqu $tmp,(rax)\n\t"
12522 $$emit$$"add 0x20,rax\n\t"
12523 $$emit$$"sub 0x4,rcx\n\t"
12524 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12525 $$emit$$"add 0x4,rcx\n\t"
12526 $$emit$$"jle L_end\n\t"
12527 $$emit$$"dec rcx\n\t"
12528 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12529 $$emit$$"vmovq xmm0,(rax)\n\t"
12530 $$emit$$"add 0x8,rax\n\t"
12531 $$emit$$"dec rcx\n\t"
12532 $$emit$$"jge L_sloop\n\t"
12533 $$emit$$"# L_end:\n\t"
12534 } else {
12535 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
12536 }
12537 %}
12538 ins_encode %{
12539 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12540 $tmp$$XMMRegister, true, false);
12541 %}
12542 ins_pipe(pipe_slow);
12543 %}
12544
12545 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
12546 Universe dummy, rFlagsReg cr)
12547 %{
12548 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
12549 match(Set dummy (ClearArray (Binary cnt base) val));
12550 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
12551
12552 format %{ $$template
12553 if (UseXMMForObjInit) {
12554 $$emit$$"movdq $tmp, $val\n\t"
12555 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
12556 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
12557 $$emit$$"jmpq L_zero_64_bytes\n\t"
12558 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12559 $$emit$$"vmovdqu $tmp,(rax)\n\t"
12560 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
12561 $$emit$$"add 0x40,rax\n\t"
12562 $$emit$$"# L_zero_64_bytes:\n\t"
12563 $$emit$$"sub 0x8,rcx\n\t"
12564 $$emit$$"jge L_loop\n\t"
12565 $$emit$$"add 0x4,rcx\n\t"
12566 $$emit$$"jl L_tail\n\t"
12567 $$emit$$"vmovdqu $tmp,(rax)\n\t"
12568 $$emit$$"add 0x20,rax\n\t"
12569 $$emit$$"sub 0x4,rcx\n\t"
12570 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12571 $$emit$$"add 0x4,rcx\n\t"
12572 $$emit$$"jle L_end\n\t"
12573 $$emit$$"dec rcx\n\t"
12574 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12575 $$emit$$"vmovq xmm0,(rax)\n\t"
12576 $$emit$$"add 0x8,rax\n\t"
12577 $$emit$$"dec rcx\n\t"
12578 $$emit$$"jge L_sloop\n\t"
12579 $$emit$$"# L_end:\n\t"
12580 } else {
12581 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
12582 }
12583 %}
12584 ins_encode %{
12585 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12586 $tmp$$XMMRegister, true, true);
12587 %}
12588 ins_pipe(pipe_slow);
12589 %}
12590
12591 // Large non-constant length ClearArray for AVX512 targets.
12592 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
12593 Universe dummy, rFlagsReg cr)
12594 %{
12595 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
12596 match(Set dummy (ClearArray (Binary cnt base) val));
12597 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
12598
12599 format %{ $$template
12600 if (UseFastStosb) {
12601 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12602 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
12603 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
12604 } else if (UseXMMForObjInit) {
12605 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
12606 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
12607 $$emit$$"jmpq L_zero_64_bytes\n\t"
12608 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12609 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12610 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
12611 $$emit$$"add 0x40,rax\n\t"
12612 $$emit$$"# L_zero_64_bytes:\n\t"
12613 $$emit$$"sub 0x8,rcx\n\t"
12614 $$emit$$"jge L_loop\n\t"
12615 $$emit$$"add 0x4,rcx\n\t"
12616 $$emit$$"jl L_tail\n\t"
12617 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12618 $$emit$$"add 0x20,rax\n\t"
12619 $$emit$$"sub 0x4,rcx\n\t"
12620 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12621 $$emit$$"add 0x4,rcx\n\t"
12622 $$emit$$"jle L_end\n\t"
12623 $$emit$$"dec rcx\n\t"
12624 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12625 $$emit$$"vmovq xmm0,(rax)\n\t"
12626 $$emit$$"add 0x8,rax\n\t"
12627 $$emit$$"dec rcx\n\t"
12628 $$emit$$"jge L_sloop\n\t"
12629 $$emit$$"# L_end:\n\t"
12630 } else {
12631 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12632 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
12633 }
12634 %}
12635 ins_encode %{
12636 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12637 $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
12638 %}
12639 ins_pipe(pipe_slow);
12640 %}
12641
12642 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
12643 Universe dummy, rFlagsReg cr)
12644 %{
12645 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
12646 match(Set dummy (ClearArray (Binary cnt base) val));
12647 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
12648
12649 format %{ $$template
12650 if (UseFastStosb) {
12651 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12652 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
12653 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
12654 } else if (UseXMMForObjInit) {
12655 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
12656 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
12657 $$emit$$"jmpq L_zero_64_bytes\n\t"
12658 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12659 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12660 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
12661 $$emit$$"add 0x40,rax\n\t"
12662 $$emit$$"# L_zero_64_bytes:\n\t"
12663 $$emit$$"sub 0x8,rcx\n\t"
12664 $$emit$$"jge L_loop\n\t"
12665 $$emit$$"add 0x4,rcx\n\t"
12666 $$emit$$"jl L_tail\n\t"
12667 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12668 $$emit$$"add 0x20,rax\n\t"
12669 $$emit$$"sub 0x4,rcx\n\t"
12670 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12671 $$emit$$"add 0x4,rcx\n\t"
12672 $$emit$$"jle L_end\n\t"
12673 $$emit$$"dec rcx\n\t"
12674 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12675 $$emit$$"vmovq xmm0,(rax)\n\t"
12676 $$emit$$"add 0x8,rax\n\t"
12677 $$emit$$"dec rcx\n\t"
12678 $$emit$$"jge L_sloop\n\t"
12679 $$emit$$"# L_end:\n\t"
12680 } else {
12681 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12682 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
12683 }
12684 %}
12685 ins_encode %{
12686 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12687 $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
12688 %}
12689 ins_pipe(pipe_slow);
12690 %}
12691
12692 // Small constant length ClearArray for AVX512 targets.
12693 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
12694 %{
12695 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
12696 ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
12697 match(Set dummy (ClearArray (Binary cnt base) val));
12698 ins_cost(100);
12699 effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
12700 format %{ "clear_mem_imm $base , $cnt \n\t" %}
12701 ins_encode %{
12702 __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
12703 %}
12704 ins_pipe(pipe_slow);
12705 %}
12706
12707 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
12708 rax_RegI result, legRegD tmp1, rFlagsReg cr)
12709 %{
12710 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
12711 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12712 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
12713
12714 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
12715 ins_encode %{
12716 __ string_compare($str1$$Register, $str2$$Register,
12717 $cnt1$$Register, $cnt2$$Register, $result$$Register,
12718 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
12719 %}
12720 ins_pipe( pipe_slow );
12721 %}
12722
14555
14556 ins_cost(300);
14557 format %{ "call_leaf,runtime " %}
14558 ins_encode(clear_avx, Java_To_Runtime(meth));
14559 ins_pipe(pipe_slow);
14560 %}
14561
14562 // Call runtime without safepoint and with vector arguments
14563 instruct CallLeafDirectVector(method meth)
14564 %{
14565 match(CallLeafVector);
14566 effect(USE meth);
14567
14568 ins_cost(300);
14569 format %{ "call_leaf,vector " %}
14570 ins_encode(Java_To_Runtime(meth));
14571 ins_pipe(pipe_slow);
14572 %}
14573
14574 // Call runtime without safepoint
14575 // entry point is null, target holds the address to call
14576 instruct CallLeafNoFPInDirect(rRegP target)
14577 %{
14578 predicate(n->as_Call()->entry_point() == nullptr);
14579 match(CallLeafNoFP target);
14580
14581 ins_cost(300);
14582 format %{ "call_leaf_nofp,runtime indirect " %}
14583 ins_encode %{
14584 __ call($target$$Register);
14585 %}
14586
14587 ins_pipe(pipe_slow);
14588 %}
14589
14590 instruct CallLeafNoFPDirect(method meth)
14591 %{
14592 predicate(n->as_Call()->entry_point() != nullptr);
14593 match(CallLeafNoFP);
14594 effect(USE meth);
14595
14596 ins_cost(300);
14597 format %{ "call_leaf_nofp,runtime " %}
14598 ins_encode(clear_avx, Java_To_Runtime(meth));
14599 ins_pipe(pipe_slow);
14600 %}
14601
14602 // Return Instruction
14603 // Remove the return address & jump to it.
14604 // Notice: We always emit a nop after a ret to make sure there is room
14605 // for safepoint patching
14606 instruct Ret()
14607 %{
14608 match(Return);
14609
14610 format %{ "ret" %}
14611 ins_encode %{
14612 __ ret(0);
|