586 }
587
588 // !!!!! Special hack to get all types of calls to specify the byte offset
589 // from the start of the call to the point where the return address
590 // will point.
591 int MachCallStaticJavaNode::ret_addr_offset()
592 {
593 int offset = 5; // 5 bytes from start of call to where return address points
594 offset += clear_avx_size();
595 return offset;
596 }
597
598 int MachCallDynamicJavaNode::ret_addr_offset()
599 {
600 int offset = 15; // 15 bytes from start of call to where return address points
601 offset += clear_avx_size();
602 return offset;
603 }
604
605 int MachCallRuntimeNode::ret_addr_offset() {
606 int offset = 13; // movq r10,#addr; callq (r10)
607 if (this->ideal_Opcode() != Op_CallLeafVector) {
608 offset += clear_avx_size();
609 }
610 return offset;
611 }
612 //
613 // Compute padding required for nodes which need alignment
614 //
615
616 // The address of the call instruction needs to be 4-byte aligned to
617 // ensure that it does not span a cache line so that it can be patched.
618 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
619 {
620 current_offset += clear_avx_size(); // skip vzeroupper
621 current_offset += 1; // skip call opcode byte
622 return align_up(current_offset, alignment_required()) - current_offset;
623 }
624
625 // The address of the call instruction needs to be 4-byte aligned to
626 // ensure that it does not span a cache line so that it can be patched.
627 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
628 {
629 current_offset += clear_avx_size(); // skip vzeroupper
630 current_offset += 11; // skip movq instruction + call opcode byte
631 return align_up(current_offset, alignment_required()) - current_offset;
818 st->print("\n\t");
819 st->print("# stack alignment check");
820 #endif
821 }
822 if (C->stub_function() != nullptr && BarrierSet::barrier_set()->barrier_set_nmethod() != nullptr) {
823 st->print("\n\t");
824 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
825 st->print("\n\t");
826 st->print("je fast_entry\t");
827 st->print("\n\t");
828 st->print("call #nmethod_entry_barrier_stub\t");
829 st->print("\n\tfast_entry:");
830 }
831 st->cr();
832 }
833 #endif
834
835 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
836 Compile* C = ra_->C;
837
838 int framesize = C->output()->frame_size_in_bytes();
839 int bangsize = C->output()->bang_size_in_bytes();
840
841 if (C->clinit_barrier_on_entry()) {
842 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
843 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
844
845 Label L_skip_barrier;
846 Register klass = rscratch1;
847
848 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
849 __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/);
850
851 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
852
853 __ bind(L_skip_barrier);
854 }
855
856 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
857
858 C->output()->set_frame_complete(__ offset());
859
860 if (C->has_mach_constant_base_node()) {
861 // NOTE: We set the table base offset here because users might be
862 // emitted before MachConstantBaseNode.
863 ConstantTable& constant_table = C->output()->constant_table();
864 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
865 }
866 }
867
868 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
869 {
870 return MachNode::size(ra_); // too many variables; just compute it
871 // the hard way
872 }
873
874 int MachPrologNode::reloc() const
875 {
876 return 0; // a large enough number
877 }
878
879 //=============================================================================
880 #ifndef PRODUCT
881 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
882 {
883 Compile* C = ra_->C;
884 if (generate_vzeroupper(C)) {
885 st->print("vzeroupper");
886 st->cr(); st->print("\t");
887 }
888
889 int framesize = C->output()->frame_size_in_bytes();
890 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
891 // Remove word for return adr already pushed
892 // and RBP
893 framesize -= 2*wordSize;
900 st->print_cr("popq rbp");
901 if (do_polling() && C->is_method_compilation()) {
902 st->print("\t");
903 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
904 "ja #safepoint_stub\t"
905 "# Safepoint: poll for GC");
906 }
907 }
908 #endif
909
910 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
911 {
912 Compile* C = ra_->C;
913
914 if (generate_vzeroupper(C)) {
915 // Clear upper bits of YMM registers when current compiled code uses
916 // wide vectors to avoid AVX <-> SSE transition penalty during call.
917 __ vzeroupper();
918 }
919
920 int framesize = C->output()->frame_size_in_bytes();
921 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
922 // Remove word for return adr already pushed
923 // and RBP
924 framesize -= 2*wordSize;
925
926 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
927
928 if (framesize) {
929 __ addq(rsp, framesize);
930 }
931
932 __ popq(rbp);
933
934 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
935 __ reserved_stack_check();
936 }
937
938 if (do_polling() && C->is_method_compilation()) {
939 Label dummy_label;
940 Label* code_stub = &dummy_label;
941 if (!C->output()->in_scratch_emit_size()) {
942 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
943 C->output()->add_stub(stub);
944 code_stub = &stub->entry();
945 }
946 __ relocate(relocInfo::poll_return_type);
947 __ safepoint_poll(*code_stub, r15_thread, true /* at_return */, true /* in_nmethod */);
948 }
949 }
950
951 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
952 {
953 return MachNode::size(ra_); // too many variables; just compute it
954 // the hard way
955 }
956
957 int MachEpilogNode::reloc() const
958 {
959 return 2; // a large enough number
960 }
961
962 const Pipeline* MachEpilogNode::pipeline() const
963 {
964 return MachNode::pipeline_class();
965 }
966
967 //=============================================================================
968
969 enum RC {
970 rc_bad,
971 rc_int,
972 rc_kreg,
973 rc_float,
974 rc_stack
975 };
976
1534 int reg = ra_->get_reg_first(this);
1535 st->print("leaq %s, [rsp + #%d]\t# box lock",
1536 Matcher::regName[reg], offset);
1537 }
1538 #endif
1539
1540 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1541 {
1542 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1543 int reg = ra_->get_encode(this);
1544
1545 __ lea(as_Register(reg), Address(rsp, offset));
1546 }
1547
1548 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1549 {
1550 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1551 return (offset < 0x80) ? 5 : 8; // REX
1552 }
1553
1554 //=============================================================================
1555 #ifndef PRODUCT
1556 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1557 {
1558 if (UseCompressedClassPointers) {
1559 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1560 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
1561 } else {
1562 st->print_cr("movq rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1563 st->print_cr("\tcmpq rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
1564 }
1565 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
1566 }
1567 #endif
1568
1569 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1570 {
1571 __ ic_check(InteriorEntryAlignment);
1572 }
1573
1574 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1575 {
1576 return MachNode::size(ra_); // too many variables; just compute it
1577 // the hard way
1578 }
1579
1580
1581 //=============================================================================
1582
1583 bool Matcher::supports_vector_calling_convention(void) {
1584 if (EnableVectorSupport && UseVectorStubs) {
1585 return true;
1586 }
1587 return false;
1588 }
1589
1590 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
1591 assert(EnableVectorSupport && UseVectorStubs, "sanity");
1592 int lo = XMM0_num;
1593 int hi = XMM0b_num;
1594 if (ideal_reg == Op_VecX) hi = XMM0d_num;
1595 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
1596 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
1597 return OptoRegPair(hi, lo);
1598 }
1599
1600 // Is this branch offset short enough that a short branch can be used?
3043 %}
3044 %}
3045
3046 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
3047 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
3048 %{
3049 constraint(ALLOC_IN_RC(ptr_reg));
3050 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
3051 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
3052
3053 op_cost(10);
3054 format %{"[$reg + $off + $idx << $scale]" %}
3055 interface(MEMORY_INTER) %{
3056 base($reg);
3057 index($idx);
3058 scale($scale);
3059 disp($off);
3060 %}
3061 %}
3062
3063 // Indirect Narrow Oop Plus Offset Operand
3064 // Note: x86 architecture doesn't support "scale * index + offset" without a base
3065 // we can't free r12 even with CompressedOops::base() == nullptr.
3066 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
3067 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
3068 constraint(ALLOC_IN_RC(ptr_reg));
3069 match(AddP (DecodeN reg) off);
3070
3071 op_cost(10);
3072 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
3073 interface(MEMORY_INTER) %{
3074 base(0xc); // R12
3075 index($reg);
3076 scale(0x3);
3077 disp($off);
3078 %}
3079 %}
3080
3081 // Indirect Memory Operand
3082 operand indirectNarrow(rRegN reg)
3389 equal(0x4, "e");
3390 not_equal(0x5, "ne");
3391 less(0x2, "b");
3392 greater_equal(0x3, "ae");
3393 less_equal(0x6, "be");
3394 greater(0x7, "a");
3395 overflow(0x0, "o");
3396 no_overflow(0x1, "no");
3397 %}
3398 %}
3399
3400 //----------OPERAND CLASSES----------------------------------------------------
3401 // Operand Classes are groups of operands that are used as to simplify
3402 // instruction definitions by not requiring the AD writer to specify separate
3403 // instructions for every form of operand when the instruction accepts
3404 // multiple operand types with the same basic encoding and format. The classic
3405 // case of this is memory operands.
3406
3407 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
3408 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
3409 indCompressedOopOffset,
3410 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
3411 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
3412 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
3413
3414 //----------PIPELINE-----------------------------------------------------------
3415 // Rules which define the behavior of the target architectures pipeline.
3416 pipeline %{
3417
3418 //----------ATTRIBUTES---------------------------------------------------------
3419 attributes %{
3420 variable_size_instructions; // Fixed size instructions
3421 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
3422 instruction_unit_size = 1; // An instruction is 1 bytes long
3423 instruction_fetch_unit_size = 16; // The processor fetches one line
3424 instruction_fetch_units = 1; // of 16 bytes
3425
3426 // List of nop instructions
3427 nops( MachNop );
3428 %}
3429
5920 format %{ "MEMBAR-storestore (empty encoding)" %}
5921 ins_encode( );
5922 ins_pipe(empty);
5923 %}
5924
5925 //----------Move Instructions--------------------------------------------------
5926
5927 instruct castX2P(rRegP dst, rRegL src)
5928 %{
5929 match(Set dst (CastX2P src));
5930
5931 format %{ "movq $dst, $src\t# long->ptr" %}
5932 ins_encode %{
5933 if ($dst$$reg != $src$$reg) {
5934 __ movptr($dst$$Register, $src$$Register);
5935 }
5936 %}
5937 ins_pipe(ialu_reg_reg); // XXX
5938 %}
5939
5940 instruct castP2X(rRegL dst, rRegP src)
5941 %{
5942 match(Set dst (CastP2X src));
5943
5944 format %{ "movq $dst, $src\t# ptr -> long" %}
5945 ins_encode %{
5946 if ($dst$$reg != $src$$reg) {
5947 __ movptr($dst$$Register, $src$$Register);
5948 }
5949 %}
5950 ins_pipe(ialu_reg_reg); // XXX
5951 %}
5952
5953 // Convert oop into int for vectors alignment masking
5954 instruct convP2I(rRegI dst, rRegP src)
5955 %{
5956 match(Set dst (ConvL2I (CastP2X src)));
5957
5958 format %{ "movl $dst, $src\t# ptr -> int" %}
5959 ins_encode %{
10479 effect(DEF dst, USE src);
10480 ins_cost(100);
10481 format %{ "movd $dst,$src\t# MoveI2F" %}
10482 ins_encode %{
10483 __ movdl($dst$$XMMRegister, $src$$Register);
10484 %}
10485 ins_pipe( pipe_slow );
10486 %}
10487
10488 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
10489 match(Set dst (MoveL2D src));
10490 effect(DEF dst, USE src);
10491 ins_cost(100);
10492 format %{ "movd $dst,$src\t# MoveL2D" %}
10493 ins_encode %{
10494 __ movdq($dst$$XMMRegister, $src$$Register);
10495 %}
10496 ins_pipe( pipe_slow );
10497 %}
10498
10499 // Fast clearing of an array
10500 // Small non-constant lenght ClearArray for non-AVX512 targets.
10501 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
10502 Universe dummy, rFlagsReg cr)
10503 %{
10504 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
10505 match(Set dummy (ClearArray cnt base));
10506 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
10507
10508 format %{ $$template
10509 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10510 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
10511 $$emit$$"jg LARGE\n\t"
10512 $$emit$$"dec rcx\n\t"
10513 $$emit$$"js DONE\t# Zero length\n\t"
10514 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
10515 $$emit$$"dec rcx\n\t"
10516 $$emit$$"jge LOOP\n\t"
10517 $$emit$$"jmp DONE\n\t"
10518 $$emit$$"# LARGE:\n\t"
10519 if (UseFastStosb) {
10520 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10521 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
10522 } else if (UseXMMForObjInit) {
10523 $$emit$$"mov rdi,rax\n\t"
10524 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
10525 $$emit$$"jmpq L_zero_64_bytes\n\t"
10526 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10534 $$emit$$"jl L_tail\n\t"
10535 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10536 $$emit$$"add 0x20,rax\n\t"
10537 $$emit$$"sub 0x4,rcx\n\t"
10538 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10539 $$emit$$"add 0x4,rcx\n\t"
10540 $$emit$$"jle L_end\n\t"
10541 $$emit$$"dec rcx\n\t"
10542 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10543 $$emit$$"vmovq xmm0,(rax)\n\t"
10544 $$emit$$"add 0x8,rax\n\t"
10545 $$emit$$"dec rcx\n\t"
10546 $$emit$$"jge L_sloop\n\t"
10547 $$emit$$"# L_end:\n\t"
10548 } else {
10549 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
10550 }
10551 $$emit$$"# DONE"
10552 %}
10553 ins_encode %{
10554 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
10555 $tmp$$XMMRegister, false, knoreg);
10556 %}
10557 ins_pipe(pipe_slow);
10558 %}
10559
10560 // Small non-constant length ClearArray for AVX512 targets.
10561 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
10562 Universe dummy, rFlagsReg cr)
10563 %{
10564 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
10565 match(Set dummy (ClearArray cnt base));
10566 ins_cost(125);
10567 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
10568
10569 format %{ $$template
10570 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10571 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
10572 $$emit$$"jg LARGE\n\t"
10573 $$emit$$"dec rcx\n\t"
10574 $$emit$$"js DONE\t# Zero length\n\t"
10575 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
10576 $$emit$$"dec rcx\n\t"
10577 $$emit$$"jge LOOP\n\t"
10578 $$emit$$"jmp DONE\n\t"
10579 $$emit$$"# LARGE:\n\t"
10580 if (UseFastStosb) {
10581 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10582 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
10583 } else if (UseXMMForObjInit) {
10584 $$emit$$"mov rdi,rax\n\t"
10585 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
10586 $$emit$$"jmpq L_zero_64_bytes\n\t"
10587 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10595 $$emit$$"jl L_tail\n\t"
10596 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10597 $$emit$$"add 0x20,rax\n\t"
10598 $$emit$$"sub 0x4,rcx\n\t"
10599 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10600 $$emit$$"add 0x4,rcx\n\t"
10601 $$emit$$"jle L_end\n\t"
10602 $$emit$$"dec rcx\n\t"
10603 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10604 $$emit$$"vmovq xmm0,(rax)\n\t"
10605 $$emit$$"add 0x8,rax\n\t"
10606 $$emit$$"dec rcx\n\t"
10607 $$emit$$"jge L_sloop\n\t"
10608 $$emit$$"# L_end:\n\t"
10609 } else {
10610 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
10611 }
10612 $$emit$$"# DONE"
10613 %}
10614 ins_encode %{
10615 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
10616 $tmp$$XMMRegister, false, $ktmp$$KRegister);
10617 %}
10618 ins_pipe(pipe_slow);
10619 %}
10620
10621 // Large non-constant length ClearArray for non-AVX512 targets.
10622 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
10623 Universe dummy, rFlagsReg cr)
10624 %{
10625 predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
10626 match(Set dummy (ClearArray cnt base));
10627 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
10628
10629 format %{ $$template
10630 if (UseFastStosb) {
10631 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10632 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10633 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
10634 } else if (UseXMMForObjInit) {
10635 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
10636 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
10637 $$emit$$"jmpq L_zero_64_bytes\n\t"
10638 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10639 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10640 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
10641 $$emit$$"add 0x40,rax\n\t"
10642 $$emit$$"# L_zero_64_bytes:\n\t"
10643 $$emit$$"sub 0x8,rcx\n\t"
10644 $$emit$$"jge L_loop\n\t"
10645 $$emit$$"add 0x4,rcx\n\t"
10646 $$emit$$"jl L_tail\n\t"
10647 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10648 $$emit$$"add 0x20,rax\n\t"
10649 $$emit$$"sub 0x4,rcx\n\t"
10650 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10651 $$emit$$"add 0x4,rcx\n\t"
10652 $$emit$$"jle L_end\n\t"
10653 $$emit$$"dec rcx\n\t"
10654 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10655 $$emit$$"vmovq xmm0,(rax)\n\t"
10656 $$emit$$"add 0x8,rax\n\t"
10657 $$emit$$"dec rcx\n\t"
10658 $$emit$$"jge L_sloop\n\t"
10659 $$emit$$"# L_end:\n\t"
10660 } else {
10661 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10662 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
10663 }
10664 %}
10665 ins_encode %{
10666 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
10667 $tmp$$XMMRegister, true, knoreg);
10668 %}
10669 ins_pipe(pipe_slow);
10670 %}
10671
10672 // Large non-constant length ClearArray for AVX512 targets.
10673 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
10674 Universe dummy, rFlagsReg cr)
10675 %{
10676 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
10677 match(Set dummy (ClearArray cnt base));
10678 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
10679
10680 format %{ $$template
10681 if (UseFastStosb) {
10682 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10683 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10684 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
10685 } else if (UseXMMForObjInit) {
10686 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
10687 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
10688 $$emit$$"jmpq L_zero_64_bytes\n\t"
10689 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10690 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10691 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
10692 $$emit$$"add 0x40,rax\n\t"
10693 $$emit$$"# L_zero_64_bytes:\n\t"
10694 $$emit$$"sub 0x8,rcx\n\t"
10695 $$emit$$"jge L_loop\n\t"
10696 $$emit$$"add 0x4,rcx\n\t"
10697 $$emit$$"jl L_tail\n\t"
10698 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10699 $$emit$$"add 0x20,rax\n\t"
10700 $$emit$$"sub 0x4,rcx\n\t"
10701 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10702 $$emit$$"add 0x4,rcx\n\t"
10703 $$emit$$"jle L_end\n\t"
10704 $$emit$$"dec rcx\n\t"
10705 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10706 $$emit$$"vmovq xmm0,(rax)\n\t"
10707 $$emit$$"add 0x8,rax\n\t"
10708 $$emit$$"dec rcx\n\t"
10709 $$emit$$"jge L_sloop\n\t"
10710 $$emit$$"# L_end:\n\t"
10711 } else {
10712 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10713 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
10714 }
10715 %}
10716 ins_encode %{
10717 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
10718 $tmp$$XMMRegister, true, $ktmp$$KRegister);
10719 %}
10720 ins_pipe(pipe_slow);
10721 %}
10722
10723 // Small constant length ClearArray for AVX512 targets.
10724 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
10725 %{
10726 predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
10727 match(Set dummy (ClearArray cnt base));
10728 ins_cost(100);
10729 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
10730 format %{ "clear_mem_imm $base , $cnt \n\t" %}
10731 ins_encode %{
10732 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
10733 %}
10734 ins_pipe(pipe_slow);
10735 %}
10736
10737 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
10738 rax_RegI result, legRegD tmp1, rFlagsReg cr)
10739 %{
10740 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
10741 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
10742 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
10743
10744 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
10745 ins_encode %{
10746 __ string_compare($str1$$Register, $str2$$Register,
10747 $cnt1$$Register, $cnt2$$Register, $result$$Register,
10748 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
10749 %}
10750 ins_pipe( pipe_slow );
10751 %}
10752
12515
12516 ins_cost(300);
12517 format %{ "call_leaf,runtime " %}
12518 ins_encode(clear_avx, Java_To_Runtime(meth));
12519 ins_pipe(pipe_slow);
12520 %}
12521
12522 // Call runtime without safepoint and with vector arguments
12523 instruct CallLeafDirectVector(method meth)
12524 %{
12525 match(CallLeafVector);
12526 effect(USE meth);
12527
12528 ins_cost(300);
12529 format %{ "call_leaf,vector " %}
12530 ins_encode(Java_To_Runtime(meth));
12531 ins_pipe(pipe_slow);
12532 %}
12533
12534 // Call runtime without safepoint
12535 instruct CallLeafNoFPDirect(method meth)
12536 %{
12537 match(CallLeafNoFP);
12538 effect(USE meth);
12539
12540 ins_cost(300);
12541 format %{ "call_leaf_nofp,runtime " %}
12542 ins_encode(clear_avx, Java_To_Runtime(meth));
12543 ins_pipe(pipe_slow);
12544 %}
12545
12546 // Return Instruction
12547 // Remove the return address & jump to it.
12548 // Notice: We always emit a nop after a ret to make sure there is room
12549 // for safepoint patching
12550 instruct Ret()
12551 %{
12552 match(Return);
12553
12554 format %{ "ret" %}
12555 ins_encode %{
12556 __ ret(0);
|
586 }
587
588 // !!!!! Special hack to get all types of calls to specify the byte offset
589 // from the start of the call to the point where the return address
590 // will point.
591 int MachCallStaticJavaNode::ret_addr_offset()
592 {
593 int offset = 5; // 5 bytes from start of call to where return address points
594 offset += clear_avx_size();
595 return offset;
596 }
597
598 int MachCallDynamicJavaNode::ret_addr_offset()
599 {
600 int offset = 15; // 15 bytes from start of call to where return address points
601 offset += clear_avx_size();
602 return offset;
603 }
604
605 int MachCallRuntimeNode::ret_addr_offset() {
606 if (_entry_point == nullptr) {
607 // CallLeafNoFPInDirect
608 return 3; // callq (register)
609 }
610 int offset = 13; // movq r10,#addr; callq (r10)
611 if (this->ideal_Opcode() != Op_CallLeafVector) {
612 offset += clear_avx_size();
613 }
614 return offset;
615 }
616
617 //
618 // Compute padding required for nodes which need alignment
619 //
620
621 // The address of the call instruction needs to be 4-byte aligned to
622 // ensure that it does not span a cache line so that it can be patched.
623 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
624 {
625 current_offset += clear_avx_size(); // skip vzeroupper
626 current_offset += 1; // skip call opcode byte
627 return align_up(current_offset, alignment_required()) - current_offset;
628 }
629
630 // The address of the call instruction needs to be 4-byte aligned to
631 // ensure that it does not span a cache line so that it can be patched.
632 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
633 {
634 current_offset += clear_avx_size(); // skip vzeroupper
635 current_offset += 11; // skip movq instruction + call opcode byte
636 return align_up(current_offset, alignment_required()) - current_offset;
823 st->print("\n\t");
824 st->print("# stack alignment check");
825 #endif
826 }
827 if (C->stub_function() != nullptr && BarrierSet::barrier_set()->barrier_set_nmethod() != nullptr) {
828 st->print("\n\t");
829 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
830 st->print("\n\t");
831 st->print("je fast_entry\t");
832 st->print("\n\t");
833 st->print("call #nmethod_entry_barrier_stub\t");
834 st->print("\n\tfast_entry:");
835 }
836 st->cr();
837 }
838 #endif
839
840 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
841 Compile* C = ra_->C;
842
843 __ verified_entry(C);
844
845 if (ra_->C->stub_function() == nullptr) {
846 __ entry_barrier();
847 }
848
849 if (!Compile::current()->output()->in_scratch_emit_size()) {
850 __ bind(*_verified_entry);
851 }
852
853 C->output()->set_frame_complete(__ offset());
854
855 if (C->has_mach_constant_base_node()) {
856 // NOTE: We set the table base offset here because users might be
857 // emitted before MachConstantBaseNode.
858 ConstantTable& constant_table = C->output()->constant_table();
859 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
860 }
861 }
862
863 int MachPrologNode::reloc() const
864 {
865 return 0; // a large enough number
866 }
867
868 //=============================================================================
869 #ifndef PRODUCT
870 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
871 {
872 Compile* C = ra_->C;
873 if (generate_vzeroupper(C)) {
874 st->print("vzeroupper");
875 st->cr(); st->print("\t");
876 }
877
878 int framesize = C->output()->frame_size_in_bytes();
879 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
880 // Remove word for return adr already pushed
881 // and RBP
882 framesize -= 2*wordSize;
889 st->print_cr("popq rbp");
890 if (do_polling() && C->is_method_compilation()) {
891 st->print("\t");
892 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
893 "ja #safepoint_stub\t"
894 "# Safepoint: poll for GC");
895 }
896 }
897 #endif
898
899 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
900 {
901 Compile* C = ra_->C;
902
903 if (generate_vzeroupper(C)) {
904 // Clear upper bits of YMM registers when current compiled code uses
905 // wide vectors to avoid AVX <-> SSE transition penalty during call.
906 __ vzeroupper();
907 }
908
909 // Subtract two words to account for return address and rbp
910 int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
911 __ remove_frame(initial_framesize, C->needs_stack_repair());
912
913 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
914 __ reserved_stack_check();
915 }
916
917 if (do_polling() && C->is_method_compilation()) {
918 Label dummy_label;
919 Label* code_stub = &dummy_label;
920 if (!C->output()->in_scratch_emit_size()) {
921 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
922 C->output()->add_stub(stub);
923 code_stub = &stub->entry();
924 }
925 __ relocate(relocInfo::poll_return_type);
926 __ safepoint_poll(*code_stub, r15_thread, true /* at_return */, true /* in_nmethod */);
927 }
928 }
929
930 int MachEpilogNode::reloc() const
931 {
932 return 2; // a large enough number
933 }
934
935 const Pipeline* MachEpilogNode::pipeline() const
936 {
937 return MachNode::pipeline_class();
938 }
939
940 //=============================================================================
941
942 enum RC {
943 rc_bad,
944 rc_int,
945 rc_kreg,
946 rc_float,
947 rc_stack
948 };
949
1507 int reg = ra_->get_reg_first(this);
1508 st->print("leaq %s, [rsp + #%d]\t# box lock",
1509 Matcher::regName[reg], offset);
1510 }
1511 #endif
1512
1513 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1514 {
1515 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1516 int reg = ra_->get_encode(this);
1517
1518 __ lea(as_Register(reg), Address(rsp, offset));
1519 }
1520
1521 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1522 {
1523 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1524 return (offset < 0x80) ? 5 : 8; // REX
1525 }
1526
1527 //=============================================================================
1528 #ifndef PRODUCT
1529 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1530 {
1531 st->print_cr("MachVEPNode");
1532 }
1533 #endif
1534
1535 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1536 {
1537 CodeBuffer* cbuf = masm->code();
1538 uint insts_size = cbuf->insts_size();
1539 if (!_verified) {
1540 __ ic_check(1);
1541 } else {
1542 // TODO 8284443 Avoid creation of temporary frame
1543 if (ra_->C->stub_function() == nullptr) {
1544 __ verified_entry(ra_->C, 0);
1545 __ entry_barrier();
1546 int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
1547 __ remove_frame(initial_framesize, false);
1548 }
1549 // Unpack inline type args passed as oop and then jump to
1550 // the verified entry point (skipping the unverified entry).
1551 int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
1552 // Emit code for verified entry and save increment for stack repair on return
1553 __ verified_entry(ra_->C, sp_inc);
1554 if (Compile::current()->output()->in_scratch_emit_size()) {
1555 Label dummy_verified_entry;
1556 __ jmp(dummy_verified_entry);
1557 } else {
1558 __ jmp(*_verified_entry);
1559 }
1560 }
1561 /* WARNING these NOPs are critical so that verified entry point is properly
1562 4 bytes aligned for patching by NativeJump::patch_verified_entry() */
1563 int nops_cnt = 4 - ((cbuf->insts_size() - insts_size) & 0x3);
1564 nops_cnt &= 0x3; // Do not add nops if code is aligned.
1565 if (nops_cnt > 0) {
1566 __ nop(nops_cnt);
1567 }
1568 }
1569
1570 //=============================================================================
1571 #ifndef PRODUCT
1572 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1573 {
1574 if (UseCompressedClassPointers) {
1575 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1576 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
1577 } else {
1578 st->print_cr("movq rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1579 st->print_cr("\tcmpq rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
1580 }
1581 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
1582 }
1583 #endif
1584
1585 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1586 {
1587 __ ic_check(InteriorEntryAlignment);
1588 }
1589
1590 //=============================================================================
1591
1592 bool Matcher::supports_vector_calling_convention(void) {
1593 if (EnableVectorSupport && UseVectorStubs) {
1594 return true;
1595 }
1596 return false;
1597 }
1598
1599 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
1600 assert(EnableVectorSupport && UseVectorStubs, "sanity");
1601 int lo = XMM0_num;
1602 int hi = XMM0b_num;
1603 if (ideal_reg == Op_VecX) hi = XMM0d_num;
1604 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
1605 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
1606 return OptoRegPair(hi, lo);
1607 }
1608
1609 // Is this branch offset short enough that a short branch can be used?
3052 %}
3053 %}
3054
3055 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
3056 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
3057 %{
3058 constraint(ALLOC_IN_RC(ptr_reg));
3059 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
3060 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
3061
3062 op_cost(10);
3063 format %{"[$reg + $off + $idx << $scale]" %}
3064 interface(MEMORY_INTER) %{
3065 base($reg);
3066 index($idx);
3067 scale($scale);
3068 disp($off);
3069 %}
3070 %}
3071
3072 // Indirect Narrow Oop Operand
3073 operand indCompressedOop(rRegN reg) %{
3074 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
3075 constraint(ALLOC_IN_RC(ptr_reg));
3076 match(DecodeN reg);
3077
3078 op_cost(10);
3079 format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
3080 interface(MEMORY_INTER) %{
3081 base(0xc); // R12
3082 index($reg);
3083 scale(0x3);
3084 disp(0x0);
3085 %}
3086 %}
3087
3088 // Indirect Narrow Oop Plus Offset Operand
3089 // Note: x86 architecture doesn't support "scale * index + offset" without a base
3090 // we can't free r12 even with CompressedOops::base() == nullptr.
3091 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
3092 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
3093 constraint(ALLOC_IN_RC(ptr_reg));
3094 match(AddP (DecodeN reg) off);
3095
3096 op_cost(10);
3097 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
3098 interface(MEMORY_INTER) %{
3099 base(0xc); // R12
3100 index($reg);
3101 scale(0x3);
3102 disp($off);
3103 %}
3104 %}
3105
3106 // Indirect Memory Operand
3107 operand indirectNarrow(rRegN reg)
3414 equal(0x4, "e");
3415 not_equal(0x5, "ne");
3416 less(0x2, "b");
3417 greater_equal(0x3, "ae");
3418 less_equal(0x6, "be");
3419 greater(0x7, "a");
3420 overflow(0x0, "o");
3421 no_overflow(0x1, "no");
3422 %}
3423 %}
3424
3425 //----------OPERAND CLASSES----------------------------------------------------
3426 // Operand Classes are groups of operands that are used as to simplify
3427 // instruction definitions by not requiring the AD writer to specify separate
3428 // instructions for every form of operand when the instruction accepts
3429 // multiple operand types with the same basic encoding and format. The classic
3430 // case of this is memory operands.
3431
3432 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
3433 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
3434 indCompressedOop, indCompressedOopOffset,
3435 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
3436 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
3437 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
3438
3439 //----------PIPELINE-----------------------------------------------------------
3440 // Rules which define the behavior of the target architectures pipeline.
3441 pipeline %{
3442
3443 //----------ATTRIBUTES---------------------------------------------------------
3444 attributes %{
3445 variable_size_instructions; // Fixed size instructions
3446 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
3447 instruction_unit_size = 1; // An instruction is 1 bytes long
3448 instruction_fetch_unit_size = 16; // The processor fetches one line
3449 instruction_fetch_units = 1; // of 16 bytes
3450
3451 // List of nop instructions
3452 nops( MachNop );
3453 %}
3454
5945 format %{ "MEMBAR-storestore (empty encoding)" %}
5946 ins_encode( );
5947 ins_pipe(empty);
5948 %}
5949
5950 //----------Move Instructions--------------------------------------------------
5951
5952 instruct castX2P(rRegP dst, rRegL src)
5953 %{
5954 match(Set dst (CastX2P src));
5955
5956 format %{ "movq $dst, $src\t# long->ptr" %}
5957 ins_encode %{
5958 if ($dst$$reg != $src$$reg) {
5959 __ movptr($dst$$Register, $src$$Register);
5960 }
5961 %}
5962 ins_pipe(ialu_reg_reg); // XXX
5963 %}
5964
5965 instruct castN2X(rRegL dst, rRegN src)
5966 %{
5967 match(Set dst (CastP2X src));
5968
5969 format %{ "movq $dst, $src\t# ptr -> long" %}
5970 ins_encode %{
5971 if ($dst$$reg != $src$$reg) {
5972 __ movptr($dst$$Register, $src$$Register);
5973 }
5974 %}
5975 ins_pipe(ialu_reg_reg); // XXX
5976 %}
5977
5978 instruct castP2X(rRegL dst, rRegP src)
5979 %{
5980 match(Set dst (CastP2X src));
5981
5982 format %{ "movq $dst, $src\t# ptr -> long" %}
5983 ins_encode %{
5984 if ($dst$$reg != $src$$reg) {
5985 __ movptr($dst$$Register, $src$$Register);
5986 }
5987 %}
5988 ins_pipe(ialu_reg_reg); // XXX
5989 %}
5990
5991 // Convert oop into int for vectors alignment masking
5992 instruct convP2I(rRegI dst, rRegP src)
5993 %{
5994 match(Set dst (ConvL2I (CastP2X src)));
5995
5996 format %{ "movl $dst, $src\t# ptr -> int" %}
5997 ins_encode %{
10517 effect(DEF dst, USE src);
10518 ins_cost(100);
10519 format %{ "movd $dst,$src\t# MoveI2F" %}
10520 ins_encode %{
10521 __ movdl($dst$$XMMRegister, $src$$Register);
10522 %}
10523 ins_pipe( pipe_slow );
10524 %}
10525
10526 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
10527 match(Set dst (MoveL2D src));
10528 effect(DEF dst, USE src);
10529 ins_cost(100);
10530 format %{ "movd $dst,$src\t# MoveL2D" %}
10531 ins_encode %{
10532 __ movdq($dst$$XMMRegister, $src$$Register);
10533 %}
10534 ins_pipe( pipe_slow );
10535 %}
10536
10537
10538 // Fast clearing of an array
10539 // Small non-constant lenght ClearArray for non-AVX512 targets.
10540 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
10541 Universe dummy, rFlagsReg cr)
10542 %{
10543 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
10544 match(Set dummy (ClearArray (Binary cnt base) val));
10545 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
10546
10547 format %{ $$template
10548 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
10549 $$emit$$"jg LARGE\n\t"
10550 $$emit$$"dec rcx\n\t"
10551 $$emit$$"js DONE\t# Zero length\n\t"
10552 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
10553 $$emit$$"dec rcx\n\t"
10554 $$emit$$"jge LOOP\n\t"
10555 $$emit$$"jmp DONE\n\t"
10556 $$emit$$"# LARGE:\n\t"
10557 if (UseFastStosb) {
10558 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10559 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
10560 } else if (UseXMMForObjInit) {
10561 $$emit$$"movdq $tmp, $val\n\t"
10562 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
10563 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
10564 $$emit$$"jmpq L_zero_64_bytes\n\t"
10565 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10566 $$emit$$"vmovdqu $tmp,(rax)\n\t"
10567 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
10568 $$emit$$"add 0x40,rax\n\t"
10569 $$emit$$"# L_zero_64_bytes:\n\t"
10570 $$emit$$"sub 0x8,rcx\n\t"
10571 $$emit$$"jge L_loop\n\t"
10572 $$emit$$"add 0x4,rcx\n\t"
10573 $$emit$$"jl L_tail\n\t"
10574 $$emit$$"vmovdqu $tmp,(rax)\n\t"
10575 $$emit$$"add 0x20,rax\n\t"
10576 $$emit$$"sub 0x4,rcx\n\t"
10577 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10578 $$emit$$"add 0x4,rcx\n\t"
10579 $$emit$$"jle L_end\n\t"
10580 $$emit$$"dec rcx\n\t"
10581 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10582 $$emit$$"vmovq xmm0,(rax)\n\t"
10583 $$emit$$"add 0x8,rax\n\t"
10584 $$emit$$"dec rcx\n\t"
10585 $$emit$$"jge L_sloop\n\t"
10586 $$emit$$"# L_end:\n\t"
10587 } else {
10588 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
10589 }
10590 $$emit$$"# DONE"
10591 %}
10592 ins_encode %{
10593 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10594 $tmp$$XMMRegister, false, false);
10595 %}
10596 ins_pipe(pipe_slow);
10597 %}
10598
10599 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
10600 Universe dummy, rFlagsReg cr)
10601 %{
10602 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
10603 match(Set dummy (ClearArray (Binary cnt base) val));
10604 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
10605
10606 format %{ $$template
10607 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
10608 $$emit$$"jg LARGE\n\t"
10609 $$emit$$"dec rcx\n\t"
10610 $$emit$$"js DONE\t# Zero length\n\t"
10611 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
10612 $$emit$$"dec rcx\n\t"
10613 $$emit$$"jge LOOP\n\t"
10614 $$emit$$"jmp DONE\n\t"
10615 $$emit$$"# LARGE:\n\t"
10616 if (UseXMMForObjInit) {
10617 $$emit$$"movdq $tmp, $val\n\t"
10618 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
10619 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
10620 $$emit$$"jmpq L_zero_64_bytes\n\t"
10621 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10622 $$emit$$"vmovdqu $tmp,(rax)\n\t"
10623 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
10624 $$emit$$"add 0x40,rax\n\t"
10625 $$emit$$"# L_zero_64_bytes:\n\t"
10626 $$emit$$"sub 0x8,rcx\n\t"
10627 $$emit$$"jge L_loop\n\t"
10628 $$emit$$"add 0x4,rcx\n\t"
10629 $$emit$$"jl L_tail\n\t"
10630 $$emit$$"vmovdqu $tmp,(rax)\n\t"
10631 $$emit$$"add 0x20,rax\n\t"
10632 $$emit$$"sub 0x4,rcx\n\t"
10633 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10634 $$emit$$"add 0x4,rcx\n\t"
10635 $$emit$$"jle L_end\n\t"
10636 $$emit$$"dec rcx\n\t"
10637 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10638 $$emit$$"vmovq xmm0,(rax)\n\t"
10639 $$emit$$"add 0x8,rax\n\t"
10640 $$emit$$"dec rcx\n\t"
10641 $$emit$$"jge L_sloop\n\t"
10642 $$emit$$"# L_end:\n\t"
10643 } else {
10644 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
10645 }
10646 $$emit$$"# DONE"
10647 %}
10648 ins_encode %{
10649 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10650 $tmp$$XMMRegister, false, true);
10651 %}
10652 ins_pipe(pipe_slow);
10653 %}
10654
10655 // Small non-constant length ClearArray for AVX512 targets.
10656 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
10657 Universe dummy, rFlagsReg cr)
10658 %{
10659 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
10660 match(Set dummy (ClearArray (Binary cnt base) val));
10661 ins_cost(125);
10662 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
10663
10664 format %{ $$template
10665 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10666 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
10667 $$emit$$"jg LARGE\n\t"
10668 $$emit$$"dec rcx\n\t"
10669 $$emit$$"js DONE\t# Zero length\n\t"
10670 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
10671 $$emit$$"dec rcx\n\t"
10672 $$emit$$"jge LOOP\n\t"
10673 $$emit$$"jmp DONE\n\t"
10674 $$emit$$"# LARGE:\n\t"
10675 if (UseFastStosb) {
10676 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10677 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
10678 } else if (UseXMMForObjInit) {
10679 $$emit$$"mov rdi,rax\n\t"
10680 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
10681 $$emit$$"jmpq L_zero_64_bytes\n\t"
10682 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10690 $$emit$$"jl L_tail\n\t"
10691 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10692 $$emit$$"add 0x20,rax\n\t"
10693 $$emit$$"sub 0x4,rcx\n\t"
10694 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10695 $$emit$$"add 0x4,rcx\n\t"
10696 $$emit$$"jle L_end\n\t"
10697 $$emit$$"dec rcx\n\t"
10698 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10699 $$emit$$"vmovq xmm0,(rax)\n\t"
10700 $$emit$$"add 0x8,rax\n\t"
10701 $$emit$$"dec rcx\n\t"
10702 $$emit$$"jge L_sloop\n\t"
10703 $$emit$$"# L_end:\n\t"
10704 } else {
10705 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
10706 }
10707 $$emit$$"# DONE"
10708 %}
10709 ins_encode %{
10710 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10711 $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
10712 %}
10713 ins_pipe(pipe_slow);
10714 %}
10715
10716 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
10717 Universe dummy, rFlagsReg cr)
10718 %{
10719 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
10720 match(Set dummy (ClearArray (Binary cnt base) val));
10721 ins_cost(125);
10722 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
10723
10724 format %{ $$template
10725 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10726 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
10727 $$emit$$"jg LARGE\n\t"
10728 $$emit$$"dec rcx\n\t"
10729 $$emit$$"js DONE\t# Zero length\n\t"
10730 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
10731 $$emit$$"dec rcx\n\t"
10732 $$emit$$"jge LOOP\n\t"
10733 $$emit$$"jmp DONE\n\t"
10734 $$emit$$"# LARGE:\n\t"
10735 if (UseFastStosb) {
10736 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10737 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
10738 } else if (UseXMMForObjInit) {
10739 $$emit$$"mov rdi,rax\n\t"
10740 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
10741 $$emit$$"jmpq L_zero_64_bytes\n\t"
10742 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10750 $$emit$$"jl L_tail\n\t"
10751 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10752 $$emit$$"add 0x20,rax\n\t"
10753 $$emit$$"sub 0x4,rcx\n\t"
10754 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10755 $$emit$$"add 0x4,rcx\n\t"
10756 $$emit$$"jle L_end\n\t"
10757 $$emit$$"dec rcx\n\t"
10758 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10759 $$emit$$"vmovq xmm0,(rax)\n\t"
10760 $$emit$$"add 0x8,rax\n\t"
10761 $$emit$$"dec rcx\n\t"
10762 $$emit$$"jge L_sloop\n\t"
10763 $$emit$$"# L_end:\n\t"
10764 } else {
10765 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
10766 }
10767 $$emit$$"# DONE"
10768 %}
10769 ins_encode %{
10770 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10771 $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
10772 %}
10773 ins_pipe(pipe_slow);
10774 %}
10775
10776 // Large non-constant length ClearArray for non-AVX512 targets.
10777 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
10778 Universe dummy, rFlagsReg cr)
10779 %{
10780 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
10781 match(Set dummy (ClearArray (Binary cnt base) val));
10782 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
10783
10784 format %{ $$template
10785 if (UseFastStosb) {
10786 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10787 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
10788 } else if (UseXMMForObjInit) {
10789 $$emit$$"movdq $tmp, $val\n\t"
10790 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
10791 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
10792 $$emit$$"jmpq L_zero_64_bytes\n\t"
10793 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10794 $$emit$$"vmovdqu $tmp,(rax)\n\t"
10795 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
10796 $$emit$$"add 0x40,rax\n\t"
10797 $$emit$$"# L_zero_64_bytes:\n\t"
10798 $$emit$$"sub 0x8,rcx\n\t"
10799 $$emit$$"jge L_loop\n\t"
10800 $$emit$$"add 0x4,rcx\n\t"
10801 $$emit$$"jl L_tail\n\t"
10802 $$emit$$"vmovdqu $tmp,(rax)\n\t"
10803 $$emit$$"add 0x20,rax\n\t"
10804 $$emit$$"sub 0x4,rcx\n\t"
10805 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10806 $$emit$$"add 0x4,rcx\n\t"
10807 $$emit$$"jle L_end\n\t"
10808 $$emit$$"dec rcx\n\t"
10809 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10810 $$emit$$"vmovq xmm0,(rax)\n\t"
10811 $$emit$$"add 0x8,rax\n\t"
10812 $$emit$$"dec rcx\n\t"
10813 $$emit$$"jge L_sloop\n\t"
10814 $$emit$$"# L_end:\n\t"
10815 } else {
10816 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
10817 }
10818 %}
10819 ins_encode %{
10820 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10821 $tmp$$XMMRegister, true, false);
10822 %}
10823 ins_pipe(pipe_slow);
10824 %}
10825
10826 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
10827 Universe dummy, rFlagsReg cr)
10828 %{
10829 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
10830 match(Set dummy (ClearArray (Binary cnt base) val));
10831 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
10832
10833 format %{ $$template
10834 if (UseXMMForObjInit) {
10835 $$emit$$"movdq $tmp, $val\n\t"
10836 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
10837 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
10838 $$emit$$"jmpq L_zero_64_bytes\n\t"
10839 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10840 $$emit$$"vmovdqu $tmp,(rax)\n\t"
10841 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
10842 $$emit$$"add 0x40,rax\n\t"
10843 $$emit$$"# L_zero_64_bytes:\n\t"
10844 $$emit$$"sub 0x8,rcx\n\t"
10845 $$emit$$"jge L_loop\n\t"
10846 $$emit$$"add 0x4,rcx\n\t"
10847 $$emit$$"jl L_tail\n\t"
10848 $$emit$$"vmovdqu $tmp,(rax)\n\t"
10849 $$emit$$"add 0x20,rax\n\t"
10850 $$emit$$"sub 0x4,rcx\n\t"
10851 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10852 $$emit$$"add 0x4,rcx\n\t"
10853 $$emit$$"jle L_end\n\t"
10854 $$emit$$"dec rcx\n\t"
10855 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10856 $$emit$$"vmovq xmm0,(rax)\n\t"
10857 $$emit$$"add 0x8,rax\n\t"
10858 $$emit$$"dec rcx\n\t"
10859 $$emit$$"jge L_sloop\n\t"
10860 $$emit$$"# L_end:\n\t"
10861 } else {
10862 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
10863 }
10864 %}
10865 ins_encode %{
10866 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10867 $tmp$$XMMRegister, true, true);
10868 %}
10869 ins_pipe(pipe_slow);
10870 %}
10871
10872 // Large non-constant length ClearArray for AVX512 targets.
10873 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
10874 Universe dummy, rFlagsReg cr)
10875 %{
10876 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
10877 match(Set dummy (ClearArray (Binary cnt base) val));
10878 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
10879
10880 format %{ $$template
10881 if (UseFastStosb) {
10882 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10883 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10884 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
10885 } else if (UseXMMForObjInit) {
10886 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
10887 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
10888 $$emit$$"jmpq L_zero_64_bytes\n\t"
10889 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10890 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10891 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
10892 $$emit$$"add 0x40,rax\n\t"
10893 $$emit$$"# L_zero_64_bytes:\n\t"
10894 $$emit$$"sub 0x8,rcx\n\t"
10895 $$emit$$"jge L_loop\n\t"
10896 $$emit$$"add 0x4,rcx\n\t"
10897 $$emit$$"jl L_tail\n\t"
10898 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10899 $$emit$$"add 0x20,rax\n\t"
10900 $$emit$$"sub 0x4,rcx\n\t"
10901 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10902 $$emit$$"add 0x4,rcx\n\t"
10903 $$emit$$"jle L_end\n\t"
10904 $$emit$$"dec rcx\n\t"
10905 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10906 $$emit$$"vmovq xmm0,(rax)\n\t"
10907 $$emit$$"add 0x8,rax\n\t"
10908 $$emit$$"dec rcx\n\t"
10909 $$emit$$"jge L_sloop\n\t"
10910 $$emit$$"# L_end:\n\t"
10911 } else {
10912 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10913 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
10914 }
10915 %}
10916 ins_encode %{
10917 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10918 $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
10919 %}
10920 ins_pipe(pipe_slow);
10921 %}
10922
10923 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
10924 Universe dummy, rFlagsReg cr)
10925 %{
10926 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
10927 match(Set dummy (ClearArray (Binary cnt base) val));
10928 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
10929
10930 format %{ $$template
10931 if (UseFastStosb) {
10932 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10933 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10934 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
10935 } else if (UseXMMForObjInit) {
10936 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
10937 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
10938 $$emit$$"jmpq L_zero_64_bytes\n\t"
10939 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10940 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10941 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
10942 $$emit$$"add 0x40,rax\n\t"
10943 $$emit$$"# L_zero_64_bytes:\n\t"
10944 $$emit$$"sub 0x8,rcx\n\t"
10945 $$emit$$"jge L_loop\n\t"
10946 $$emit$$"add 0x4,rcx\n\t"
10947 $$emit$$"jl L_tail\n\t"
10948 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10949 $$emit$$"add 0x20,rax\n\t"
10950 $$emit$$"sub 0x4,rcx\n\t"
10951 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10952 $$emit$$"add 0x4,rcx\n\t"
10953 $$emit$$"jle L_end\n\t"
10954 $$emit$$"dec rcx\n\t"
10955 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10956 $$emit$$"vmovq xmm0,(rax)\n\t"
10957 $$emit$$"add 0x8,rax\n\t"
10958 $$emit$$"dec rcx\n\t"
10959 $$emit$$"jge L_sloop\n\t"
10960 $$emit$$"# L_end:\n\t"
10961 } else {
10962 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10963 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
10964 }
10965 %}
10966 ins_encode %{
10967 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10968 $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
10969 %}
10970 ins_pipe(pipe_slow);
10971 %}
10972
10973 // Small constant length ClearArray for AVX512 targets.
10974 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
10975 %{
10976 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
10977 ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
10978 match(Set dummy (ClearArray (Binary cnt base) val));
10979 ins_cost(100);
10980 effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
10981 format %{ "clear_mem_imm $base , $cnt \n\t" %}
10982 ins_encode %{
10983 __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
10984 %}
10985 ins_pipe(pipe_slow);
10986 %}
10987
10988 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
10989 rax_RegI result, legRegD tmp1, rFlagsReg cr)
10990 %{
10991 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
10992 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
10993 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
10994
10995 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
10996 ins_encode %{
10997 __ string_compare($str1$$Register, $str2$$Register,
10998 $cnt1$$Register, $cnt2$$Register, $result$$Register,
10999 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
11000 %}
11001 ins_pipe( pipe_slow );
11002 %}
11003
12766
12767 ins_cost(300);
12768 format %{ "call_leaf,runtime " %}
12769 ins_encode(clear_avx, Java_To_Runtime(meth));
12770 ins_pipe(pipe_slow);
12771 %}
12772
12773 // Call runtime without safepoint and with vector arguments
12774 instruct CallLeafDirectVector(method meth)
12775 %{
12776 match(CallLeafVector);
12777 effect(USE meth);
12778
12779 ins_cost(300);
12780 format %{ "call_leaf,vector " %}
12781 ins_encode(Java_To_Runtime(meth));
12782 ins_pipe(pipe_slow);
12783 %}
12784
12785 // Call runtime without safepoint
12786 // entry point is null, target holds the address to call
12787 instruct CallLeafNoFPInDirect(rRegP target)
12788 %{
12789 predicate(n->as_Call()->entry_point() == nullptr);
12790 match(CallLeafNoFP target);
12791
12792 ins_cost(300);
12793 format %{ "call_leaf_nofp,runtime indirect " %}
12794 ins_encode %{
12795 __ call($target$$Register);
12796 %}
12797
12798 ins_pipe(pipe_slow);
12799 %}
12800
12801 instruct CallLeafNoFPDirect(method meth)
12802 %{
12803 predicate(n->as_Call()->entry_point() != nullptr);
12804 match(CallLeafNoFP);
12805 effect(USE meth);
12806
12807 ins_cost(300);
12808 format %{ "call_leaf_nofp,runtime " %}
12809 ins_encode(clear_avx, Java_To_Runtime(meth));
12810 ins_pipe(pipe_slow);
12811 %}
12812
12813 // Return Instruction
12814 // Remove the return address & jump to it.
12815 // Notice: We always emit a nop after a ret to make sure there is room
12816 // for safepoint patching
12817 instruct Ret()
12818 %{
12819 match(Return);
12820
12821 format %{ "ret" %}
12822 ins_encode %{
12823 __ ret(0);
|