586 }
587
588 // !!!!! Special hack to get all types of calls to specify the byte offset
589 // from the start of the call to the point where the return address
590 // will point.
591 int MachCallStaticJavaNode::ret_addr_offset()
592 {
593 int offset = 5; // 5 bytes from start of call to where return address points
594 offset += clear_avx_size();
595 return offset;
596 }
597
598 int MachCallDynamicJavaNode::ret_addr_offset()
599 {
600 int offset = 15; // 15 bytes from start of call to where return address points
601 offset += clear_avx_size();
602 return offset;
603 }
604
605 int MachCallRuntimeNode::ret_addr_offset() {
606 int offset = 13; // movq r10,#addr; callq (r10)
607 if (this->ideal_Opcode() != Op_CallLeafVector) {
608 offset += clear_avx_size();
609 }
610 return offset;
611 }
612 //
613 // Compute padding required for nodes which need alignment
614 //
615
616 // The address of the call instruction needs to be 4-byte aligned to
617 // ensure that it does not span a cache line so that it can be patched.
618 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
619 {
620 current_offset += clear_avx_size(); // skip vzeroupper
621 current_offset += 1; // skip call opcode byte
622 return align_up(current_offset, alignment_required()) - current_offset;
623 }
624
625 // The address of the call instruction needs to be 4-byte aligned to
626 // ensure that it does not span a cache line so that it can be patched.
627 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
628 {
629 current_offset += clear_avx_size(); // skip vzeroupper
630 current_offset += 11; // skip movq instruction + call opcode byte
631 return align_up(current_offset, alignment_required()) - current_offset;
817 st->print("\n\t");
818 st->print("# stack alignment check");
819 #endif
820 }
821 if (C->stub_function() != nullptr && BarrierSet::barrier_set()->barrier_set_nmethod() != nullptr) {
822 st->print("\n\t");
823 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
824 st->print("\n\t");
825 st->print("je fast_entry\t");
826 st->print("\n\t");
827 st->print("call #nmethod_entry_barrier_stub\t");
828 st->print("\n\tfast_entry:");
829 }
830 st->cr();
831 }
832 #endif
833
834 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
835 Compile* C = ra_->C;
836
837 int framesize = C->output()->frame_size_in_bytes();
838 int bangsize = C->output()->bang_size_in_bytes();
839
840 if (C->clinit_barrier_on_entry()) {
841 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
842 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
843
844 Label L_skip_barrier;
845 Register klass = rscratch1;
846
847 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
848 __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/);
849
850 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
851
852 __ bind(L_skip_barrier);
853 }
854
855 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
856
857 C->output()->set_frame_complete(__ offset());
858
859 if (C->has_mach_constant_base_node()) {
860 // NOTE: We set the table base offset here because users might be
861 // emitted before MachConstantBaseNode.
862 ConstantTable& constant_table = C->output()->constant_table();
863 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
864 }
865 }
866
867 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
868 {
869 return MachNode::size(ra_); // too many variables; just compute it
870 // the hard way
871 }
872
873 int MachPrologNode::reloc() const
874 {
875 return 0; // a large enough number
876 }
877
878 //=============================================================================
879 #ifndef PRODUCT
880 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
881 {
882 Compile* C = ra_->C;
883 if (generate_vzeroupper(C)) {
884 st->print("vzeroupper");
885 st->cr(); st->print("\t");
886 }
887
888 int framesize = C->output()->frame_size_in_bytes();
889 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
890 // Remove word for return adr already pushed
891 // and RBP
892 framesize -= 2*wordSize;
899 st->print_cr("popq rbp");
900 if (do_polling() && C->is_method_compilation()) {
901 st->print("\t");
902 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
903 "ja #safepoint_stub\t"
904 "# Safepoint: poll for GC");
905 }
906 }
907 #endif
908
909 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
910 {
911 Compile* C = ra_->C;
912
913 if (generate_vzeroupper(C)) {
914 // Clear upper bits of YMM registers when current compiled code uses
915 // wide vectors to avoid AVX <-> SSE transition penalty during call.
916 __ vzeroupper();
917 }
918
919 int framesize = C->output()->frame_size_in_bytes();
920 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
921 // Remove word for return adr already pushed
922 // and RBP
923 framesize -= 2*wordSize;
924
925 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
926
927 if (framesize) {
928 __ addq(rsp, framesize);
929 }
930
931 __ popq(rbp);
932
933 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
934 __ reserved_stack_check();
935 }
936
937 if (do_polling() && C->is_method_compilation()) {
938 Label dummy_label;
939 Label* code_stub = &dummy_label;
940 if (!C->output()->in_scratch_emit_size()) {
941 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
942 C->output()->add_stub(stub);
943 code_stub = &stub->entry();
944 }
945 __ relocate(relocInfo::poll_return_type);
946 __ safepoint_poll(*code_stub, r15_thread, true /* at_return */, true /* in_nmethod */);
947 }
948 }
949
950 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
951 {
952 return MachNode::size(ra_); // too many variables; just compute it
953 // the hard way
954 }
955
956 int MachEpilogNode::reloc() const
957 {
958 return 2; // a large enough number
959 }
960
961 const Pipeline* MachEpilogNode::pipeline() const
962 {
963 return MachNode::pipeline_class();
964 }
965
966 //=============================================================================
967
968 enum RC {
969 rc_bad,
970 rc_int,
971 rc_kreg,
972 rc_float,
973 rc_stack
974 };
975
1533 int reg = ra_->get_reg_first(this);
1534 st->print("leaq %s, [rsp + #%d]\t# box lock",
1535 Matcher::regName[reg], offset);
1536 }
1537 #endif
1538
1539 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1540 {
1541 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1542 int reg = ra_->get_encode(this);
1543
1544 __ lea(as_Register(reg), Address(rsp, offset));
1545 }
1546
1547 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1548 {
1549 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1550 return (offset < 0x80) ? 5 : 8; // REX
1551 }
1552
1553 //=============================================================================
1554 #ifndef PRODUCT
1555 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1556 {
1557 if (UseCompressedClassPointers) {
1558 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1559 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
1560 } else {
1561 st->print_cr("movq rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1562 st->print_cr("\tcmpq rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
1563 }
1564 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
1565 }
1566 #endif
1567
1568 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1569 {
1570 __ ic_check(InteriorEntryAlignment);
1571 }
1572
1573 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1574 {
1575 return MachNode::size(ra_); // too many variables; just compute it
1576 // the hard way
1577 }
1578
1579
1580 //=============================================================================
1581
1582 bool Matcher::supports_vector_calling_convention(void) {
1583 if (EnableVectorSupport && UseVectorStubs) {
1584 return true;
1585 }
1586 return false;
1587 }
1588
1589 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
1590 assert(EnableVectorSupport && UseVectorStubs, "sanity");
1591 int lo = XMM0_num;
1592 int hi = XMM0b_num;
1593 if (ideal_reg == Op_VecX) hi = XMM0d_num;
1594 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
1595 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
1596 return OptoRegPair(hi, lo);
1597 }
1598
1599 // Is this branch offset short enough that a short branch can be used?
3042 %}
3043 %}
3044
3045 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
3046 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
3047 %{
3048 constraint(ALLOC_IN_RC(ptr_reg));
3049 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
3050 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
3051
3052 op_cost(10);
3053 format %{"[$reg + $off + $idx << $scale]" %}
3054 interface(MEMORY_INTER) %{
3055 base($reg);
3056 index($idx);
3057 scale($scale);
3058 disp($off);
3059 %}
3060 %}
3061
3062 // Indirect Narrow Oop Plus Offset Operand
3063 // Note: x86 architecture doesn't support "scale * index + offset" without a base
3064 // we can't free r12 even with CompressedOops::base() == nullptr.
3065 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
3066 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
3067 constraint(ALLOC_IN_RC(ptr_reg));
3068 match(AddP (DecodeN reg) off);
3069
3070 op_cost(10);
3071 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
3072 interface(MEMORY_INTER) %{
3073 base(0xc); // R12
3074 index($reg);
3075 scale(0x3);
3076 disp($off);
3077 %}
3078 %}
3079
3080 // Indirect Memory Operand
3081 operand indirectNarrow(rRegN reg)
3388 equal(0x4, "e");
3389 not_equal(0x5, "ne");
3390 less(0x2, "b");
3391 greater_equal(0x3, "ae");
3392 less_equal(0x6, "be");
3393 greater(0x7, "a");
3394 overflow(0x0, "o");
3395 no_overflow(0x1, "no");
3396 %}
3397 %}
3398
3399 //----------OPERAND CLASSES----------------------------------------------------
3400 // Operand Classes are groups of operands that are used as to simplify
3401 // instruction definitions by not requiring the AD writer to specify separate
3402 // instructions for every form of operand when the instruction accepts
3403 // multiple operand types with the same basic encoding and format. The classic
3404 // case of this is memory operands.
3405
3406 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
3407 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
3408 indCompressedOopOffset,
3409 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
3410 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
3411 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
3412
3413 //----------PIPELINE-----------------------------------------------------------
3414 // Rules which define the behavior of the target architectures pipeline.
3415 pipeline %{
3416
3417 //----------ATTRIBUTES---------------------------------------------------------
3418 attributes %{
3419 variable_size_instructions; // Fixed size instructions
3420 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
3421 instruction_unit_size = 1; // An instruction is 1 bytes long
3422 instruction_fetch_unit_size = 16; // The processor fetches one line
3423 instruction_fetch_units = 1; // of 16 bytes
3424
3425 // List of nop instructions
3426 nops( MachNop );
3427 %}
3428
5896 format %{ "MEMBAR-storestore (empty encoding)" %}
5897 ins_encode( );
5898 ins_pipe(empty);
5899 %}
5900
5901 //----------Move Instructions--------------------------------------------------
5902
5903 instruct castX2P(rRegP dst, rRegL src)
5904 %{
5905 match(Set dst (CastX2P src));
5906
5907 format %{ "movq $dst, $src\t# long->ptr" %}
5908 ins_encode %{
5909 if ($dst$$reg != $src$$reg) {
5910 __ movptr($dst$$Register, $src$$Register);
5911 }
5912 %}
5913 ins_pipe(ialu_reg_reg); // XXX
5914 %}
5915
5916 instruct castP2X(rRegL dst, rRegP src)
5917 %{
5918 match(Set dst (CastP2X src));
5919
5920 format %{ "movq $dst, $src\t# ptr -> long" %}
5921 ins_encode %{
5922 if ($dst$$reg != $src$$reg) {
5923 __ movptr($dst$$Register, $src$$Register);
5924 }
5925 %}
5926 ins_pipe(ialu_reg_reg); // XXX
5927 %}
5928
5929 // Convert oop into int for vectors alignment masking
5930 instruct convP2I(rRegI dst, rRegP src)
5931 %{
5932 match(Set dst (ConvL2I (CastP2X src)));
5933
5934 format %{ "movl $dst, $src\t# ptr -> int" %}
5935 ins_encode %{
10444 effect(DEF dst, USE src);
10445 ins_cost(100);
10446 format %{ "movd $dst,$src\t# MoveI2F" %}
10447 ins_encode %{
10448 __ movdl($dst$$XMMRegister, $src$$Register);
10449 %}
10450 ins_pipe( pipe_slow );
10451 %}
10452
10453 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
10454 match(Set dst (MoveL2D src));
10455 effect(DEF dst, USE src);
10456 ins_cost(100);
10457 format %{ "movd $dst,$src\t# MoveL2D" %}
10458 ins_encode %{
10459 __ movdq($dst$$XMMRegister, $src$$Register);
10460 %}
10461 ins_pipe( pipe_slow );
10462 %}
10463
10464 // Fast clearing of an array
10465 // Small non-constant lenght ClearArray for non-AVX512 targets.
10466 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
10467 Universe dummy, rFlagsReg cr)
10468 %{
10469 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
10470 match(Set dummy (ClearArray cnt base));
10471 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
10472
10473 format %{ $$template
10474 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10475 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
10476 $$emit$$"jg LARGE\n\t"
10477 $$emit$$"dec rcx\n\t"
10478 $$emit$$"js DONE\t# Zero length\n\t"
10479 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
10480 $$emit$$"dec rcx\n\t"
10481 $$emit$$"jge LOOP\n\t"
10482 $$emit$$"jmp DONE\n\t"
10483 $$emit$$"# LARGE:\n\t"
10484 if (UseFastStosb) {
10485 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10486 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
10487 } else if (UseXMMForObjInit) {
10488 $$emit$$"mov rdi,rax\n\t"
10489 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
10490 $$emit$$"jmpq L_zero_64_bytes\n\t"
10491 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10499 $$emit$$"jl L_tail\n\t"
10500 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10501 $$emit$$"add 0x20,rax\n\t"
10502 $$emit$$"sub 0x4,rcx\n\t"
10503 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10504 $$emit$$"add 0x4,rcx\n\t"
10505 $$emit$$"jle L_end\n\t"
10506 $$emit$$"dec rcx\n\t"
10507 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10508 $$emit$$"vmovq xmm0,(rax)\n\t"
10509 $$emit$$"add 0x8,rax\n\t"
10510 $$emit$$"dec rcx\n\t"
10511 $$emit$$"jge L_sloop\n\t"
10512 $$emit$$"# L_end:\n\t"
10513 } else {
10514 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
10515 }
10516 $$emit$$"# DONE"
10517 %}
10518 ins_encode %{
10519 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
10520 $tmp$$XMMRegister, false, knoreg);
10521 %}
10522 ins_pipe(pipe_slow);
10523 %}
10524
10525 // Small non-constant length ClearArray for AVX512 targets.
10526 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
10527 Universe dummy, rFlagsReg cr)
10528 %{
10529 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
10530 match(Set dummy (ClearArray cnt base));
10531 ins_cost(125);
10532 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
10533
10534 format %{ $$template
10535 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10536 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
10537 $$emit$$"jg LARGE\n\t"
10538 $$emit$$"dec rcx\n\t"
10539 $$emit$$"js DONE\t# Zero length\n\t"
10540 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
10541 $$emit$$"dec rcx\n\t"
10542 $$emit$$"jge LOOP\n\t"
10543 $$emit$$"jmp DONE\n\t"
10544 $$emit$$"# LARGE:\n\t"
10545 if (UseFastStosb) {
10546 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10547 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
10548 } else if (UseXMMForObjInit) {
10549 $$emit$$"mov rdi,rax\n\t"
10550 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
10551 $$emit$$"jmpq L_zero_64_bytes\n\t"
10552 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10560 $$emit$$"jl L_tail\n\t"
10561 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10562 $$emit$$"add 0x20,rax\n\t"
10563 $$emit$$"sub 0x4,rcx\n\t"
10564 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10565 $$emit$$"add 0x4,rcx\n\t"
10566 $$emit$$"jle L_end\n\t"
10567 $$emit$$"dec rcx\n\t"
10568 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10569 $$emit$$"vmovq xmm0,(rax)\n\t"
10570 $$emit$$"add 0x8,rax\n\t"
10571 $$emit$$"dec rcx\n\t"
10572 $$emit$$"jge L_sloop\n\t"
10573 $$emit$$"# L_end:\n\t"
10574 } else {
10575 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
10576 }
10577 $$emit$$"# DONE"
10578 %}
10579 ins_encode %{
10580 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
10581 $tmp$$XMMRegister, false, $ktmp$$KRegister);
10582 %}
10583 ins_pipe(pipe_slow);
10584 %}
10585
10586 // Large non-constant length ClearArray for non-AVX512 targets.
10587 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
10588 Universe dummy, rFlagsReg cr)
10589 %{
10590 predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
10591 match(Set dummy (ClearArray cnt base));
10592 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
10593
10594 format %{ $$template
10595 if (UseFastStosb) {
10596 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10597 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10598 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
10599 } else if (UseXMMForObjInit) {
10600 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
10601 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
10602 $$emit$$"jmpq L_zero_64_bytes\n\t"
10603 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10604 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10605 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
10606 $$emit$$"add 0x40,rax\n\t"
10607 $$emit$$"# L_zero_64_bytes:\n\t"
10608 $$emit$$"sub 0x8,rcx\n\t"
10609 $$emit$$"jge L_loop\n\t"
10610 $$emit$$"add 0x4,rcx\n\t"
10611 $$emit$$"jl L_tail\n\t"
10612 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10613 $$emit$$"add 0x20,rax\n\t"
10614 $$emit$$"sub 0x4,rcx\n\t"
10615 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10616 $$emit$$"add 0x4,rcx\n\t"
10617 $$emit$$"jle L_end\n\t"
10618 $$emit$$"dec rcx\n\t"
10619 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10620 $$emit$$"vmovq xmm0,(rax)\n\t"
10621 $$emit$$"add 0x8,rax\n\t"
10622 $$emit$$"dec rcx\n\t"
10623 $$emit$$"jge L_sloop\n\t"
10624 $$emit$$"# L_end:\n\t"
10625 } else {
10626 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10627 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
10628 }
10629 %}
10630 ins_encode %{
10631 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
10632 $tmp$$XMMRegister, true, knoreg);
10633 %}
10634 ins_pipe(pipe_slow);
10635 %}
10636
10637 // Large non-constant length ClearArray for AVX512 targets.
10638 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
10639 Universe dummy, rFlagsReg cr)
10640 %{
10641 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
10642 match(Set dummy (ClearArray cnt base));
10643 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
10644
10645 format %{ $$template
10646 if (UseFastStosb) {
10647 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10648 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10649 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
10650 } else if (UseXMMForObjInit) {
10651 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
10652 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
10653 $$emit$$"jmpq L_zero_64_bytes\n\t"
10654 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10655 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10656 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
10657 $$emit$$"add 0x40,rax\n\t"
10658 $$emit$$"# L_zero_64_bytes:\n\t"
10659 $$emit$$"sub 0x8,rcx\n\t"
10660 $$emit$$"jge L_loop\n\t"
10661 $$emit$$"add 0x4,rcx\n\t"
10662 $$emit$$"jl L_tail\n\t"
10663 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10664 $$emit$$"add 0x20,rax\n\t"
10665 $$emit$$"sub 0x4,rcx\n\t"
10666 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10667 $$emit$$"add 0x4,rcx\n\t"
10668 $$emit$$"jle L_end\n\t"
10669 $$emit$$"dec rcx\n\t"
10670 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10671 $$emit$$"vmovq xmm0,(rax)\n\t"
10672 $$emit$$"add 0x8,rax\n\t"
10673 $$emit$$"dec rcx\n\t"
10674 $$emit$$"jge L_sloop\n\t"
10675 $$emit$$"# L_end:\n\t"
10676 } else {
10677 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10678 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
10679 }
10680 %}
10681 ins_encode %{
10682 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
10683 $tmp$$XMMRegister, true, $ktmp$$KRegister);
10684 %}
10685 ins_pipe(pipe_slow);
10686 %}
10687
10688 // Small constant length ClearArray for AVX512 targets.
10689 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
10690 %{
10691 predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
10692 match(Set dummy (ClearArray cnt base));
10693 ins_cost(100);
10694 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
10695 format %{ "clear_mem_imm $base , $cnt \n\t" %}
10696 ins_encode %{
10697 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
10698 %}
10699 ins_pipe(pipe_slow);
10700 %}
10701
10702 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
10703 rax_RegI result, legRegD tmp1, rFlagsReg cr)
10704 %{
10705 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
10706 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
10707 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
10708
10709 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
10710 ins_encode %{
10711 __ string_compare($str1$$Register, $str2$$Register,
10712 $cnt1$$Register, $cnt2$$Register, $result$$Register,
10713 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
10714 %}
10715 ins_pipe( pipe_slow );
10716 %}
10717
12478
12479 ins_cost(300);
12480 format %{ "call_leaf,runtime " %}
12481 ins_encode(clear_avx, Java_To_Runtime(meth));
12482 ins_pipe(pipe_slow);
12483 %}
12484
12485 // Call runtime without safepoint and with vector arguments
12486 instruct CallLeafDirectVector(method meth)
12487 %{
12488 match(CallLeafVector);
12489 effect(USE meth);
12490
12491 ins_cost(300);
12492 format %{ "call_leaf,vector " %}
12493 ins_encode(Java_To_Runtime(meth));
12494 ins_pipe(pipe_slow);
12495 %}
12496
12497 // Call runtime without safepoint
12498 instruct CallLeafNoFPDirect(method meth)
12499 %{
12500 match(CallLeafNoFP);
12501 effect(USE meth);
12502
12503 ins_cost(300);
12504 format %{ "call_leaf_nofp,runtime " %}
12505 ins_encode(clear_avx, Java_To_Runtime(meth));
12506 ins_pipe(pipe_slow);
12507 %}
12508
12509 // Return Instruction
12510 // Remove the return address & jump to it.
12511 // Notice: We always emit a nop after a ret to make sure there is room
12512 // for safepoint patching
12513 instruct Ret()
12514 %{
12515 match(Return);
12516
12517 format %{ "ret" %}
12518 ins_encode %{
12519 __ ret(0);
|
586 }
587
588 // !!!!! Special hack to get all types of calls to specify the byte offset
589 // from the start of the call to the point where the return address
590 // will point.
591 int MachCallStaticJavaNode::ret_addr_offset()
592 {
593 int offset = 5; // 5 bytes from start of call to where return address points
594 offset += clear_avx_size();
595 return offset;
596 }
597
598 int MachCallDynamicJavaNode::ret_addr_offset()
599 {
600 int offset = 15; // 15 bytes from start of call to where return address points
601 offset += clear_avx_size();
602 return offset;
603 }
604
605 int MachCallRuntimeNode::ret_addr_offset() {
606 if (_entry_point == nullptr) {
607 // CallLeafNoFPInDirect
608 return 3; // callq (register)
609 }
610 int offset = 13; // movq r10,#addr; callq (r10)
611 if (this->ideal_Opcode() != Op_CallLeafVector) {
612 offset += clear_avx_size();
613 }
614 return offset;
615 }
616
617 //
618 // Compute padding required for nodes which need alignment
619 //
620
621 // The address of the call instruction needs to be 4-byte aligned to
622 // ensure that it does not span a cache line so that it can be patched.
623 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
624 {
625 current_offset += clear_avx_size(); // skip vzeroupper
626 current_offset += 1; // skip call opcode byte
627 return align_up(current_offset, alignment_required()) - current_offset;
628 }
629
630 // The address of the call instruction needs to be 4-byte aligned to
631 // ensure that it does not span a cache line so that it can be patched.
632 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
633 {
634 current_offset += clear_avx_size(); // skip vzeroupper
635 current_offset += 11; // skip movq instruction + call opcode byte
636 return align_up(current_offset, alignment_required()) - current_offset;
822 st->print("\n\t");
823 st->print("# stack alignment check");
824 #endif
825 }
826 if (C->stub_function() != nullptr && BarrierSet::barrier_set()->barrier_set_nmethod() != nullptr) {
827 st->print("\n\t");
828 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
829 st->print("\n\t");
830 st->print("je fast_entry\t");
831 st->print("\n\t");
832 st->print("call #nmethod_entry_barrier_stub\t");
833 st->print("\n\tfast_entry:");
834 }
835 st->cr();
836 }
837 #endif
838
839 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
840 Compile* C = ra_->C;
841
842 __ verified_entry(C);
843
844 if (ra_->C->stub_function() == nullptr) {
845 __ entry_barrier();
846 }
847
848 if (!Compile::current()->output()->in_scratch_emit_size()) {
849 __ bind(*_verified_entry);
850 }
851
852 C->output()->set_frame_complete(__ offset());
853
854 if (C->has_mach_constant_base_node()) {
855 // NOTE: We set the table base offset here because users might be
856 // emitted before MachConstantBaseNode.
857 ConstantTable& constant_table = C->output()->constant_table();
858 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
859 }
860 }
861
862 int MachPrologNode::reloc() const
863 {
864 return 0; // a large enough number
865 }
866
867 //=============================================================================
868 #ifndef PRODUCT
869 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
870 {
871 Compile* C = ra_->C;
872 if (generate_vzeroupper(C)) {
873 st->print("vzeroupper");
874 st->cr(); st->print("\t");
875 }
876
877 int framesize = C->output()->frame_size_in_bytes();
878 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
879 // Remove word for return adr already pushed
880 // and RBP
881 framesize -= 2*wordSize;
888 st->print_cr("popq rbp");
889 if (do_polling() && C->is_method_compilation()) {
890 st->print("\t");
891 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
892 "ja #safepoint_stub\t"
893 "# Safepoint: poll for GC");
894 }
895 }
896 #endif
897
898 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
899 {
900 Compile* C = ra_->C;
901
902 if (generate_vzeroupper(C)) {
903 // Clear upper bits of YMM registers when current compiled code uses
904 // wide vectors to avoid AVX <-> SSE transition penalty during call.
905 __ vzeroupper();
906 }
907
908 // Subtract two words to account for return address and rbp
909 int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
910 __ remove_frame(initial_framesize, C->needs_stack_repair());
911
912 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
913 __ reserved_stack_check();
914 }
915
916 if (do_polling() && C->is_method_compilation()) {
917 Label dummy_label;
918 Label* code_stub = &dummy_label;
919 if (!C->output()->in_scratch_emit_size()) {
920 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
921 C->output()->add_stub(stub);
922 code_stub = &stub->entry();
923 }
924 __ relocate(relocInfo::poll_return_type);
925 __ safepoint_poll(*code_stub, r15_thread, true /* at_return */, true /* in_nmethod */);
926 }
927 }
928
929 int MachEpilogNode::reloc() const
930 {
931 return 2; // a large enough number
932 }
933
934 const Pipeline* MachEpilogNode::pipeline() const
935 {
936 return MachNode::pipeline_class();
937 }
938
939 //=============================================================================
940
941 enum RC {
942 rc_bad,
943 rc_int,
944 rc_kreg,
945 rc_float,
946 rc_stack
947 };
948
1506 int reg = ra_->get_reg_first(this);
1507 st->print("leaq %s, [rsp + #%d]\t# box lock",
1508 Matcher::regName[reg], offset);
1509 }
1510 #endif
1511
1512 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1513 {
1514 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1515 int reg = ra_->get_encode(this);
1516
1517 __ lea(as_Register(reg), Address(rsp, offset));
1518 }
1519
1520 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1521 {
1522 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1523 return (offset < 0x80) ? 5 : 8; // REX
1524 }
1525
1526 //=============================================================================
1527 #ifndef PRODUCT
1528 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1529 {
1530 st->print_cr("MachVEPNode");
1531 }
1532 #endif
1533
1534 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1535 {
1536 CodeBuffer* cbuf = masm->code();
1537 uint insts_size = cbuf->insts_size();
1538 if (!_verified) {
1539 __ ic_check(1);
1540 } else {
1541 // TODO 8284443 Avoid creation of temporary frame
1542 if (ra_->C->stub_function() == nullptr) {
1543 __ verified_entry(ra_->C, 0);
1544 __ entry_barrier();
1545 int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
1546 __ remove_frame(initial_framesize, false);
1547 }
1548 // Unpack inline type args passed as oop and then jump to
1549 // the verified entry point (skipping the unverified entry).
1550 int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
1551 // Emit code for verified entry and save increment for stack repair on return
1552 __ verified_entry(ra_->C, sp_inc);
1553 if (Compile::current()->output()->in_scratch_emit_size()) {
1554 Label dummy_verified_entry;
1555 __ jmp(dummy_verified_entry);
1556 } else {
1557 __ jmp(*_verified_entry);
1558 }
1559 }
1560 /* WARNING these NOPs are critical so that verified entry point is properly
1561 4 bytes aligned for patching by NativeJump::patch_verified_entry() */
1562 int nops_cnt = 4 - ((cbuf->insts_size() - insts_size) & 0x3);
1563 nops_cnt &= 0x3; // Do not add nops if code is aligned.
1564 if (nops_cnt > 0) {
1565 __ nop(nops_cnt);
1566 }
1567 }
1568
1569 //=============================================================================
1570 #ifndef PRODUCT
1571 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1572 {
1573 if (UseCompressedClassPointers) {
1574 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1575 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
1576 } else {
1577 st->print_cr("movq rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1578 st->print_cr("\tcmpq rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
1579 }
1580 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
1581 }
1582 #endif
1583
1584 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1585 {
1586 __ ic_check(InteriorEntryAlignment);
1587 }
1588
1589 //=============================================================================
1590
1591 bool Matcher::supports_vector_calling_convention(void) {
1592 if (EnableVectorSupport && UseVectorStubs) {
1593 return true;
1594 }
1595 return false;
1596 }
1597
1598 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
1599 assert(EnableVectorSupport && UseVectorStubs, "sanity");
1600 int lo = XMM0_num;
1601 int hi = XMM0b_num;
1602 if (ideal_reg == Op_VecX) hi = XMM0d_num;
1603 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
1604 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
1605 return OptoRegPair(hi, lo);
1606 }
1607
1608 // Is this branch offset short enough that a short branch can be used?
3051 %}
3052 %}
3053
3054 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
3055 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
3056 %{
3057 constraint(ALLOC_IN_RC(ptr_reg));
3058 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
3059 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
3060
3061 op_cost(10);
3062 format %{"[$reg + $off + $idx << $scale]" %}
3063 interface(MEMORY_INTER) %{
3064 base($reg);
3065 index($idx);
3066 scale($scale);
3067 disp($off);
3068 %}
3069 %}
3070
3071 // Indirect Narrow Oop Operand
3072 operand indCompressedOop(rRegN reg) %{
3073 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
3074 constraint(ALLOC_IN_RC(ptr_reg));
3075 match(DecodeN reg);
3076
3077 op_cost(10);
3078 format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
3079 interface(MEMORY_INTER) %{
3080 base(0xc); // R12
3081 index($reg);
3082 scale(0x3);
3083 disp(0x0);
3084 %}
3085 %}
3086
3087 // Indirect Narrow Oop Plus Offset Operand
3088 // Note: x86 architecture doesn't support "scale * index + offset" without a base
3089 // we can't free r12 even with CompressedOops::base() == nullptr.
3090 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
3091 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
3092 constraint(ALLOC_IN_RC(ptr_reg));
3093 match(AddP (DecodeN reg) off);
3094
3095 op_cost(10);
3096 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
3097 interface(MEMORY_INTER) %{
3098 base(0xc); // R12
3099 index($reg);
3100 scale(0x3);
3101 disp($off);
3102 %}
3103 %}
3104
3105 // Indirect Memory Operand
3106 operand indirectNarrow(rRegN reg)
3413 equal(0x4, "e");
3414 not_equal(0x5, "ne");
3415 less(0x2, "b");
3416 greater_equal(0x3, "ae");
3417 less_equal(0x6, "be");
3418 greater(0x7, "a");
3419 overflow(0x0, "o");
3420 no_overflow(0x1, "no");
3421 %}
3422 %}
3423
3424 //----------OPERAND CLASSES----------------------------------------------------
3425 // Operand Classes are groups of operands that are used as to simplify
3426 // instruction definitions by not requiring the AD writer to specify separate
3427 // instructions for every form of operand when the instruction accepts
3428 // multiple operand types with the same basic encoding and format. The classic
3429 // case of this is memory operands.
3430
3431 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
3432 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
3433 indCompressedOop, indCompressedOopOffset,
3434 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
3435 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
3436 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
3437
3438 //----------PIPELINE-----------------------------------------------------------
3439 // Rules which define the behavior of the target architectures pipeline.
3440 pipeline %{
3441
3442 //----------ATTRIBUTES---------------------------------------------------------
3443 attributes %{
3444 variable_size_instructions; // Fixed size instructions
3445 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
3446 instruction_unit_size = 1; // An instruction is 1 bytes long
3447 instruction_fetch_unit_size = 16; // The processor fetches one line
3448 instruction_fetch_units = 1; // of 16 bytes
3449
3450 // List of nop instructions
3451 nops( MachNop );
3452 %}
3453
5921 format %{ "MEMBAR-storestore (empty encoding)" %}
5922 ins_encode( );
5923 ins_pipe(empty);
5924 %}
5925
5926 //----------Move Instructions--------------------------------------------------
5927
5928 instruct castX2P(rRegP dst, rRegL src)
5929 %{
5930 match(Set dst (CastX2P src));
5931
5932 format %{ "movq $dst, $src\t# long->ptr" %}
5933 ins_encode %{
5934 if ($dst$$reg != $src$$reg) {
5935 __ movptr($dst$$Register, $src$$Register);
5936 }
5937 %}
5938 ins_pipe(ialu_reg_reg); // XXX
5939 %}
5940
5941 instruct castN2X(rRegL dst, rRegN src)
5942 %{
5943 match(Set dst (CastP2X src));
5944
5945 format %{ "movq $dst, $src\t# ptr -> long" %}
5946 ins_encode %{
5947 if ($dst$$reg != $src$$reg) {
5948 __ movptr($dst$$Register, $src$$Register);
5949 }
5950 %}
5951 ins_pipe(ialu_reg_reg); // XXX
5952 %}
5953
5954 instruct castP2X(rRegL dst, rRegP src)
5955 %{
5956 match(Set dst (CastP2X src));
5957
5958 format %{ "movq $dst, $src\t# ptr -> long" %}
5959 ins_encode %{
5960 if ($dst$$reg != $src$$reg) {
5961 __ movptr($dst$$Register, $src$$Register);
5962 }
5963 %}
5964 ins_pipe(ialu_reg_reg); // XXX
5965 %}
5966
5967 // Convert oop into int for vectors alignment masking
5968 instruct convP2I(rRegI dst, rRegP src)
5969 %{
5970 match(Set dst (ConvL2I (CastP2X src)));
5971
5972 format %{ "movl $dst, $src\t# ptr -> int" %}
5973 ins_encode %{
10482 effect(DEF dst, USE src);
10483 ins_cost(100);
10484 format %{ "movd $dst,$src\t# MoveI2F" %}
10485 ins_encode %{
10486 __ movdl($dst$$XMMRegister, $src$$Register);
10487 %}
10488 ins_pipe( pipe_slow );
10489 %}
10490
10491 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
10492 match(Set dst (MoveL2D src));
10493 effect(DEF dst, USE src);
10494 ins_cost(100);
10495 format %{ "movd $dst,$src\t# MoveL2D" %}
10496 ins_encode %{
10497 __ movdq($dst$$XMMRegister, $src$$Register);
10498 %}
10499 ins_pipe( pipe_slow );
10500 %}
10501
10502
10503 // Fast clearing of an array
10504 // Small non-constant lenght ClearArray for non-AVX512 targets.
10505 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
10506 Universe dummy, rFlagsReg cr)
10507 %{
10508 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
10509 match(Set dummy (ClearArray (Binary cnt base) val));
10510 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
10511
10512 format %{ $$template
10513 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
10514 $$emit$$"jg LARGE\n\t"
10515 $$emit$$"dec rcx\n\t"
10516 $$emit$$"js DONE\t# Zero length\n\t"
10517 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
10518 $$emit$$"dec rcx\n\t"
10519 $$emit$$"jge LOOP\n\t"
10520 $$emit$$"jmp DONE\n\t"
10521 $$emit$$"# LARGE:\n\t"
10522 if (UseFastStosb) {
10523 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10524 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
10525 } else if (UseXMMForObjInit) {
10526 $$emit$$"movdq $tmp, $val\n\t"
10527 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
10528 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
10529 $$emit$$"jmpq L_zero_64_bytes\n\t"
10530 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10531 $$emit$$"vmovdqu $tmp,(rax)\n\t"
10532 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
10533 $$emit$$"add 0x40,rax\n\t"
10534 $$emit$$"# L_zero_64_bytes:\n\t"
10535 $$emit$$"sub 0x8,rcx\n\t"
10536 $$emit$$"jge L_loop\n\t"
10537 $$emit$$"add 0x4,rcx\n\t"
10538 $$emit$$"jl L_tail\n\t"
10539 $$emit$$"vmovdqu $tmp,(rax)\n\t"
10540 $$emit$$"add 0x20,rax\n\t"
10541 $$emit$$"sub 0x4,rcx\n\t"
10542 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10543 $$emit$$"add 0x4,rcx\n\t"
10544 $$emit$$"jle L_end\n\t"
10545 $$emit$$"dec rcx\n\t"
10546 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10547 $$emit$$"vmovq xmm0,(rax)\n\t"
10548 $$emit$$"add 0x8,rax\n\t"
10549 $$emit$$"dec rcx\n\t"
10550 $$emit$$"jge L_sloop\n\t"
10551 $$emit$$"# L_end:\n\t"
10552 } else {
10553 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
10554 }
10555 $$emit$$"# DONE"
10556 %}
10557 ins_encode %{
10558 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10559 $tmp$$XMMRegister, false, false);
10560 %}
10561 ins_pipe(pipe_slow);
10562 %}
10563
10564 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
10565 Universe dummy, rFlagsReg cr)
10566 %{
10567 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
10568 match(Set dummy (ClearArray (Binary cnt base) val));
10569 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
10570
10571 format %{ $$template
10572 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
10573 $$emit$$"jg LARGE\n\t"
10574 $$emit$$"dec rcx\n\t"
10575 $$emit$$"js DONE\t# Zero length\n\t"
10576 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
10577 $$emit$$"dec rcx\n\t"
10578 $$emit$$"jge LOOP\n\t"
10579 $$emit$$"jmp DONE\n\t"
10580 $$emit$$"# LARGE:\n\t"
10581 if (UseXMMForObjInit) {
10582 $$emit$$"movdq $tmp, $val\n\t"
10583 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
10584 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
10585 $$emit$$"jmpq L_zero_64_bytes\n\t"
10586 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10587 $$emit$$"vmovdqu $tmp,(rax)\n\t"
10588 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
10589 $$emit$$"add 0x40,rax\n\t"
10590 $$emit$$"# L_zero_64_bytes:\n\t"
10591 $$emit$$"sub 0x8,rcx\n\t"
10592 $$emit$$"jge L_loop\n\t"
10593 $$emit$$"add 0x4,rcx\n\t"
10594 $$emit$$"jl L_tail\n\t"
10595 $$emit$$"vmovdqu $tmp,(rax)\n\t"
10596 $$emit$$"add 0x20,rax\n\t"
10597 $$emit$$"sub 0x4,rcx\n\t"
10598 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10599 $$emit$$"add 0x4,rcx\n\t"
10600 $$emit$$"jle L_end\n\t"
10601 $$emit$$"dec rcx\n\t"
10602 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10603 $$emit$$"vmovq xmm0,(rax)\n\t"
10604 $$emit$$"add 0x8,rax\n\t"
10605 $$emit$$"dec rcx\n\t"
10606 $$emit$$"jge L_sloop\n\t"
10607 $$emit$$"# L_end:\n\t"
10608 } else {
10609 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
10610 }
10611 $$emit$$"# DONE"
10612 %}
10613 ins_encode %{
10614 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10615 $tmp$$XMMRegister, false, true);
10616 %}
10617 ins_pipe(pipe_slow);
10618 %}
10619
10620 // Small non-constant length ClearArray for AVX512 targets.
10621 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
10622 Universe dummy, rFlagsReg cr)
10623 %{
10624 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
10625 match(Set dummy (ClearArray (Binary cnt base) val));
10626 ins_cost(125);
10627 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
10628
10629 format %{ $$template
10630 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10631 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
10632 $$emit$$"jg LARGE\n\t"
10633 $$emit$$"dec rcx\n\t"
10634 $$emit$$"js DONE\t# Zero length\n\t"
10635 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
10636 $$emit$$"dec rcx\n\t"
10637 $$emit$$"jge LOOP\n\t"
10638 $$emit$$"jmp DONE\n\t"
10639 $$emit$$"# LARGE:\n\t"
10640 if (UseFastStosb) {
10641 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10642 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
10643 } else if (UseXMMForObjInit) {
10644 $$emit$$"mov rdi,rax\n\t"
10645 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
10646 $$emit$$"jmpq L_zero_64_bytes\n\t"
10647 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10655 $$emit$$"jl L_tail\n\t"
10656 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10657 $$emit$$"add 0x20,rax\n\t"
10658 $$emit$$"sub 0x4,rcx\n\t"
10659 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10660 $$emit$$"add 0x4,rcx\n\t"
10661 $$emit$$"jle L_end\n\t"
10662 $$emit$$"dec rcx\n\t"
10663 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10664 $$emit$$"vmovq xmm0,(rax)\n\t"
10665 $$emit$$"add 0x8,rax\n\t"
10666 $$emit$$"dec rcx\n\t"
10667 $$emit$$"jge L_sloop\n\t"
10668 $$emit$$"# L_end:\n\t"
10669 } else {
10670 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
10671 }
10672 $$emit$$"# DONE"
10673 %}
10674 ins_encode %{
10675 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10676 $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
10677 %}
10678 ins_pipe(pipe_slow);
10679 %}
10680
10681 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
10682 Universe dummy, rFlagsReg cr)
10683 %{
10684 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
10685 match(Set dummy (ClearArray (Binary cnt base) val));
10686 ins_cost(125);
10687 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
10688
10689 format %{ $$template
10690 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10691 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
10692 $$emit$$"jg LARGE\n\t"
10693 $$emit$$"dec rcx\n\t"
10694 $$emit$$"js DONE\t# Zero length\n\t"
10695 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
10696 $$emit$$"dec rcx\n\t"
10697 $$emit$$"jge LOOP\n\t"
10698 $$emit$$"jmp DONE\n\t"
10699 $$emit$$"# LARGE:\n\t"
10700 if (UseFastStosb) {
10701 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10702 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
10703 } else if (UseXMMForObjInit) {
10704 $$emit$$"mov rdi,rax\n\t"
10705 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
10706 $$emit$$"jmpq L_zero_64_bytes\n\t"
10707 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10715 $$emit$$"jl L_tail\n\t"
10716 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10717 $$emit$$"add 0x20,rax\n\t"
10718 $$emit$$"sub 0x4,rcx\n\t"
10719 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10720 $$emit$$"add 0x4,rcx\n\t"
10721 $$emit$$"jle L_end\n\t"
10722 $$emit$$"dec rcx\n\t"
10723 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10724 $$emit$$"vmovq xmm0,(rax)\n\t"
10725 $$emit$$"add 0x8,rax\n\t"
10726 $$emit$$"dec rcx\n\t"
10727 $$emit$$"jge L_sloop\n\t"
10728 $$emit$$"# L_end:\n\t"
10729 } else {
10730 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
10731 }
10732 $$emit$$"# DONE"
10733 %}
10734 ins_encode %{
10735 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10736 $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
10737 %}
10738 ins_pipe(pipe_slow);
10739 %}
10740
10741 // Large non-constant length ClearArray for non-AVX512 targets.
10742 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
10743 Universe dummy, rFlagsReg cr)
10744 %{
10745 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
10746 match(Set dummy (ClearArray (Binary cnt base) val));
10747 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
10748
10749 format %{ $$template
10750 if (UseFastStosb) {
10751 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10752 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
10753 } else if (UseXMMForObjInit) {
10754 $$emit$$"movdq $tmp, $val\n\t"
10755 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
10756 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
10757 $$emit$$"jmpq L_zero_64_bytes\n\t"
10758 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10759 $$emit$$"vmovdqu $tmp,(rax)\n\t"
10760 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
10761 $$emit$$"add 0x40,rax\n\t"
10762 $$emit$$"# L_zero_64_bytes:\n\t"
10763 $$emit$$"sub 0x8,rcx\n\t"
10764 $$emit$$"jge L_loop\n\t"
10765 $$emit$$"add 0x4,rcx\n\t"
10766 $$emit$$"jl L_tail\n\t"
10767 $$emit$$"vmovdqu $tmp,(rax)\n\t"
10768 $$emit$$"add 0x20,rax\n\t"
10769 $$emit$$"sub 0x4,rcx\n\t"
10770 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10771 $$emit$$"add 0x4,rcx\n\t"
10772 $$emit$$"jle L_end\n\t"
10773 $$emit$$"dec rcx\n\t"
10774 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10775 $$emit$$"vmovq xmm0,(rax)\n\t"
10776 $$emit$$"add 0x8,rax\n\t"
10777 $$emit$$"dec rcx\n\t"
10778 $$emit$$"jge L_sloop\n\t"
10779 $$emit$$"# L_end:\n\t"
10780 } else {
10781 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
10782 }
10783 %}
10784 ins_encode %{
10785 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10786 $tmp$$XMMRegister, true, false);
10787 %}
10788 ins_pipe(pipe_slow);
10789 %}
10790
10791 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
10792 Universe dummy, rFlagsReg cr)
10793 %{
10794 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
10795 match(Set dummy (ClearArray (Binary cnt base) val));
10796 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
10797
10798 format %{ $$template
10799 if (UseXMMForObjInit) {
10800 $$emit$$"movdq $tmp, $val\n\t"
10801 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
10802 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
10803 $$emit$$"jmpq L_zero_64_bytes\n\t"
10804 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10805 $$emit$$"vmovdqu $tmp,(rax)\n\t"
10806 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
10807 $$emit$$"add 0x40,rax\n\t"
10808 $$emit$$"# L_zero_64_bytes:\n\t"
10809 $$emit$$"sub 0x8,rcx\n\t"
10810 $$emit$$"jge L_loop\n\t"
10811 $$emit$$"add 0x4,rcx\n\t"
10812 $$emit$$"jl L_tail\n\t"
10813 $$emit$$"vmovdqu $tmp,(rax)\n\t"
10814 $$emit$$"add 0x20,rax\n\t"
10815 $$emit$$"sub 0x4,rcx\n\t"
10816 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10817 $$emit$$"add 0x4,rcx\n\t"
10818 $$emit$$"jle L_end\n\t"
10819 $$emit$$"dec rcx\n\t"
10820 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10821 $$emit$$"vmovq xmm0,(rax)\n\t"
10822 $$emit$$"add 0x8,rax\n\t"
10823 $$emit$$"dec rcx\n\t"
10824 $$emit$$"jge L_sloop\n\t"
10825 $$emit$$"# L_end:\n\t"
10826 } else {
10827 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
10828 }
10829 %}
10830 ins_encode %{
10831 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10832 $tmp$$XMMRegister, true, true);
10833 %}
10834 ins_pipe(pipe_slow);
10835 %}
10836
10837 // Large non-constant length ClearArray for AVX512 targets.
10838 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
10839 Universe dummy, rFlagsReg cr)
10840 %{
10841 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
10842 match(Set dummy (ClearArray (Binary cnt base) val));
10843 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
10844
10845 format %{ $$template
10846 if (UseFastStosb) {
10847 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10848 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10849 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
10850 } else if (UseXMMForObjInit) {
10851 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
10852 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
10853 $$emit$$"jmpq L_zero_64_bytes\n\t"
10854 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10855 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10856 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
10857 $$emit$$"add 0x40,rax\n\t"
10858 $$emit$$"# L_zero_64_bytes:\n\t"
10859 $$emit$$"sub 0x8,rcx\n\t"
10860 $$emit$$"jge L_loop\n\t"
10861 $$emit$$"add 0x4,rcx\n\t"
10862 $$emit$$"jl L_tail\n\t"
10863 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10864 $$emit$$"add 0x20,rax\n\t"
10865 $$emit$$"sub 0x4,rcx\n\t"
10866 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10867 $$emit$$"add 0x4,rcx\n\t"
10868 $$emit$$"jle L_end\n\t"
10869 $$emit$$"dec rcx\n\t"
10870 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10871 $$emit$$"vmovq xmm0,(rax)\n\t"
10872 $$emit$$"add 0x8,rax\n\t"
10873 $$emit$$"dec rcx\n\t"
10874 $$emit$$"jge L_sloop\n\t"
10875 $$emit$$"# L_end:\n\t"
10876 } else {
10877 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10878 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
10879 }
10880 %}
10881 ins_encode %{
10882 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10883 $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
10884 %}
10885 ins_pipe(pipe_slow);
10886 %}
10887
10888 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
10889 Universe dummy, rFlagsReg cr)
10890 %{
10891 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
10892 match(Set dummy (ClearArray (Binary cnt base) val));
10893 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
10894
10895 format %{ $$template
10896 if (UseFastStosb) {
10897 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10898 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10899 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
10900 } else if (UseXMMForObjInit) {
10901 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
10902 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
10903 $$emit$$"jmpq L_zero_64_bytes\n\t"
10904 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10905 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10906 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
10907 $$emit$$"add 0x40,rax\n\t"
10908 $$emit$$"# L_zero_64_bytes:\n\t"
10909 $$emit$$"sub 0x8,rcx\n\t"
10910 $$emit$$"jge L_loop\n\t"
10911 $$emit$$"add 0x4,rcx\n\t"
10912 $$emit$$"jl L_tail\n\t"
10913 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10914 $$emit$$"add 0x20,rax\n\t"
10915 $$emit$$"sub 0x4,rcx\n\t"
10916 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10917 $$emit$$"add 0x4,rcx\n\t"
10918 $$emit$$"jle L_end\n\t"
10919 $$emit$$"dec rcx\n\t"
10920 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10921 $$emit$$"vmovq xmm0,(rax)\n\t"
10922 $$emit$$"add 0x8,rax\n\t"
10923 $$emit$$"dec rcx\n\t"
10924 $$emit$$"jge L_sloop\n\t"
10925 $$emit$$"# L_end:\n\t"
10926 } else {
10927 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10928 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
10929 }
10930 %}
10931 ins_encode %{
10932 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10933 $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
10934 %}
10935 ins_pipe(pipe_slow);
10936 %}
10937
10938 // Small constant length ClearArray for AVX512 targets.
10939 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
10940 %{
10941 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
10942 ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
10943 match(Set dummy (ClearArray (Binary cnt base) val));
10944 ins_cost(100);
10945 effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
10946 format %{ "clear_mem_imm $base , $cnt \n\t" %}
10947 ins_encode %{
10948 __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
10949 %}
10950 ins_pipe(pipe_slow);
10951 %}
10952
10953 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
10954 rax_RegI result, legRegD tmp1, rFlagsReg cr)
10955 %{
10956 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
10957 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
10958 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
10959
10960 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
10961 ins_encode %{
10962 __ string_compare($str1$$Register, $str2$$Register,
10963 $cnt1$$Register, $cnt2$$Register, $result$$Register,
10964 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
10965 %}
10966 ins_pipe( pipe_slow );
10967 %}
10968
12729
12730 ins_cost(300);
12731 format %{ "call_leaf,runtime " %}
12732 ins_encode(clear_avx, Java_To_Runtime(meth));
12733 ins_pipe(pipe_slow);
12734 %}
12735
12736 // Call runtime without safepoint and with vector arguments
12737 instruct CallLeafDirectVector(method meth)
12738 %{
12739 match(CallLeafVector);
12740 effect(USE meth);
12741
12742 ins_cost(300);
12743 format %{ "call_leaf,vector " %}
12744 ins_encode(Java_To_Runtime(meth));
12745 ins_pipe(pipe_slow);
12746 %}
12747
12748 // Call runtime without safepoint
12749 // entry point is null, target holds the address to call
12750 instruct CallLeafNoFPInDirect(rRegP target)
12751 %{
12752 predicate(n->as_Call()->entry_point() == nullptr);
12753 match(CallLeafNoFP target);
12754
12755 ins_cost(300);
12756 format %{ "call_leaf_nofp,runtime indirect " %}
12757 ins_encode %{
12758 __ call($target$$Register);
12759 %}
12760
12761 ins_pipe(pipe_slow);
12762 %}
12763
12764 instruct CallLeafNoFPDirect(method meth)
12765 %{
12766 predicate(n->as_Call()->entry_point() != nullptr);
12767 match(CallLeafNoFP);
12768 effect(USE meth);
12769
12770 ins_cost(300);
12771 format %{ "call_leaf_nofp,runtime " %}
12772 ins_encode(clear_avx, Java_To_Runtime(meth));
12773 ins_pipe(pipe_slow);
12774 %}
12775
12776 // Return Instruction
12777 // Remove the return address & jump to it.
12778 // Notice: We always emit a nop after a ret to make sure there is room
12779 // for safepoint patching
12780 instruct Ret()
12781 %{
12782 match(Return);
12783
12784 format %{ "ret" %}
12785 ins_encode %{
12786 __ ret(0);
|