473 }
474
475 // !!!!! Special hack to get all types of calls to specify the byte offset
476 // from the start of the call to the point where the return address
477 // will point.
478 int MachCallStaticJavaNode::ret_addr_offset()
479 {
480 int offset = 5; // 5 bytes from start of call to where return address points
481 offset += clear_avx_size();
482 return offset;
483 }
484
485 int MachCallDynamicJavaNode::ret_addr_offset()
486 {
487 int offset = 15; // 15 bytes from start of call to where return address points
488 offset += clear_avx_size();
489 return offset;
490 }
491
492 int MachCallRuntimeNode::ret_addr_offset() {
493 int offset = 13; // movq r10,#addr; callq (r10)
494 if (this->ideal_Opcode() != Op_CallLeafVector) {
495 offset += clear_avx_size();
496 }
497 return offset;
498 }
499 //
500 // Compute padding required for nodes which need alignment
501 //
502
503 // The address of the call instruction needs to be 4-byte aligned to
504 // ensure that it does not span a cache line so that it can be patched.
505 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
506 {
507 current_offset += clear_avx_size(); // skip vzeroupper
508 current_offset += 1; // skip call opcode byte
509 return align_up(current_offset, alignment_required()) - current_offset;
510 }
511
512 // The address of the call instruction needs to be 4-byte aligned to
513 // ensure that it does not span a cache line so that it can be patched.
514 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
515 {
516 current_offset += clear_avx_size(); // skip vzeroupper
517 current_offset += 11; // skip movq instruction + call opcode byte
518 return align_up(current_offset, alignment_required()) - current_offset;
706 st->print("# stack alignment check");
707 #endif
708 }
709 if (C->stub_function() != nullptr && BarrierSet::barrier_set()->barrier_set_nmethod() != nullptr) {
710 st->print("\n\t");
711 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
712 st->print("\n\t");
713 st->print("je fast_entry\t");
714 st->print("\n\t");
715 st->print("call #nmethod_entry_barrier_stub\t");
716 st->print("\n\tfast_entry:");
717 }
718 st->cr();
719 }
720 #endif
721
722 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
723 Compile* C = ra_->C;
724 C2_MacroAssembler _masm(&cbuf);
725
726 int framesize = C->output()->frame_size_in_bytes();
727 int bangsize = C->output()->bang_size_in_bytes();
728
729 if (C->clinit_barrier_on_entry()) {
730 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
731 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
732
733 Label L_skip_barrier;
734 Register klass = rscratch1;
735
736 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
737 __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/);
738
739 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
740
741 __ bind(L_skip_barrier);
742 }
743
744 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
745
746 C->output()->set_frame_complete(cbuf.insts_size());
747
748 if (C->has_mach_constant_base_node()) {
749 // NOTE: We set the table base offset here because users might be
750 // emitted before MachConstantBaseNode.
751 ConstantTable& constant_table = C->output()->constant_table();
752 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
753 }
754 }
755
756 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
757 {
758 return MachNode::size(ra_); // too many variables; just compute it
759 // the hard way
760 }
761
762 int MachPrologNode::reloc() const
763 {
764 return 0; // a large enough number
765 }
766
767 //=============================================================================
768 #ifndef PRODUCT
769 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
770 {
771 Compile* C = ra_->C;
772 if (generate_vzeroupper(C)) {
773 st->print("vzeroupper");
774 st->cr(); st->print("\t");
775 }
776
777 int framesize = C->output()->frame_size_in_bytes();
778 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
779 // Remove word for return adr already pushed
780 // and RBP
781 framesize -= 2*wordSize;
789 if (do_polling() && C->is_method_compilation()) {
790 st->print("\t");
791 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
792 "ja #safepoint_stub\t"
793 "# Safepoint: poll for GC");
794 }
795 }
796 #endif
797
798 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
799 {
800 Compile* C = ra_->C;
801 MacroAssembler _masm(&cbuf);
802
803 if (generate_vzeroupper(C)) {
804 // Clear upper bits of YMM registers when current compiled code uses
805 // wide vectors to avoid AVX <-> SSE transition penalty during call.
806 __ vzeroupper();
807 }
808
809 int framesize = C->output()->frame_size_in_bytes();
810 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
811 // Remove word for return adr already pushed
812 // and RBP
813 framesize -= 2*wordSize;
814
815 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
816
817 if (framesize) {
818 __ addq(rsp, framesize);
819 }
820
821 __ popq(rbp);
822
823 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
824 __ reserved_stack_check();
825 }
826
827 if (do_polling() && C->is_method_compilation()) {
828 MacroAssembler _masm(&cbuf);
829 Label dummy_label;
830 Label* code_stub = &dummy_label;
831 if (!C->output()->in_scratch_emit_size()) {
832 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
833 C->output()->add_stub(stub);
834 code_stub = &stub->entry();
835 }
836 __ relocate(relocInfo::poll_return_type);
837 __ safepoint_poll(*code_stub, r15_thread, true /* at_return */, true /* in_nmethod */);
838 }
839 }
840
841 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
842 {
843 return MachNode::size(ra_); // too many variables; just compute it
844 // the hard way
845 }
846
847 int MachEpilogNode::reloc() const
848 {
849 return 2; // a large enough number
850 }
851
852 const Pipeline* MachEpilogNode::pipeline() const
853 {
854 return MachNode::pipeline_class();
855 }
856
857 //=============================================================================
858
859 enum RC {
860 rc_bad,
861 rc_int,
862 rc_kreg,
863 rc_float,
864 rc_stack
865 };
866
1449 st->print("leaq %s, [rsp + #%d]\t# box lock",
1450 Matcher::regName[reg], offset);
1451 }
1452 #endif
1453
1454 void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1455 {
1456 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1457 int reg = ra_->get_encode(this);
1458
1459 MacroAssembler masm(&cbuf);
1460 masm.lea(as_Register(reg), Address(rsp, offset));
1461 }
1462
1463 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1464 {
1465 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1466 return (offset < 0x80) ? 5 : 8; // REX
1467 }
1468
1469 //=============================================================================
1470 #ifndef PRODUCT
1471 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1472 {
1473 if (UseCompressedClassPointers) {
1474 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1475 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
1476 } else {
1477 st->print_cr("movq rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1478 st->print_cr("\tcmpq rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
1479 }
1480 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
1481 }
1482 #endif
1483
1484 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1485 {
1486 MacroAssembler masm(&cbuf);
1487 masm.ic_check(InteriorEntryAlignment);
1488 }
1489
1490 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1491 {
1492 return MachNode::size(ra_); // too many variables; just compute it
1493 // the hard way
1494 }
1495
1496
1497 //=============================================================================
1498
1499 bool Matcher::supports_vector_calling_convention(void) {
1500 if (EnableVectorSupport && UseVectorStubs) {
1501 return true;
1502 }
1503 return false;
1504 }
1505
1506 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
1507 assert(EnableVectorSupport && UseVectorStubs, "sanity");
1508 int lo = XMM0_num;
1509 int hi = XMM0b_num;
1510 if (ideal_reg == Op_VecX) hi = XMM0d_num;
1511 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
1512 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
1513 return OptoRegPair(hi, lo);
1514 }
1515
1516 // Is this branch offset short enough that a short branch can be used?
3081 %}
3082 %}
3083
3084 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
3085 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
3086 %{
3087 constraint(ALLOC_IN_RC(ptr_reg));
3088 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
3089 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
3090
3091 op_cost(10);
3092 format %{"[$reg + $off + $idx << $scale]" %}
3093 interface(MEMORY_INTER) %{
3094 base($reg);
3095 index($idx);
3096 scale($scale);
3097 disp($off);
3098 %}
3099 %}
3100
3101 // Indirect Narrow Oop Plus Offset Operand
3102 // Note: x86 architecture doesn't support "scale * index + offset" without a base
3103 // we can't free r12 even with CompressedOops::base() == nullptr.
3104 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
3105 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
3106 constraint(ALLOC_IN_RC(ptr_reg));
3107 match(AddP (DecodeN reg) off);
3108
3109 op_cost(10);
3110 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
3111 interface(MEMORY_INTER) %{
3112 base(0xc); // R12
3113 index($reg);
3114 scale(0x3);
3115 disp($off);
3116 %}
3117 %}
3118
3119 // Indirect Memory Operand
3120 operand indirectNarrow(rRegN reg)
3427 equal(0x4, "e");
3428 not_equal(0x5, "ne");
3429 less(0x2, "b");
3430 greater_equal(0x3, "ae");
3431 less_equal(0x6, "be");
3432 greater(0x7, "a");
3433 overflow(0x0, "o");
3434 no_overflow(0x1, "no");
3435 %}
3436 %}
3437
3438 //----------OPERAND CLASSES----------------------------------------------------
3439 // Operand Classes are groups of operands that are used as to simplify
3440 // instruction definitions by not requiring the AD writer to specify separate
3441 // instructions for every form of operand when the instruction accepts
3442 // multiple operand types with the same basic encoding and format. The classic
3443 // case of this is memory operands.
3444
3445 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
3446 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
3447 indCompressedOopOffset,
3448 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
3449 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
3450 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
3451
3452 //----------PIPELINE-----------------------------------------------------------
3453 // Rules which define the behavior of the target architectures pipeline.
3454 pipeline %{
3455
3456 //----------ATTRIBUTES---------------------------------------------------------
3457 attributes %{
3458 variable_size_instructions; // Fixed size instructions
3459 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
3460 instruction_unit_size = 1; // An instruction is 1 bytes long
3461 instruction_fetch_unit_size = 16; // The processor fetches one line
3462 instruction_fetch_units = 1; // of 16 bytes
3463
3464 // List of nop instructions
3465 nops( MachNop );
3466 %}
3467
5958 format %{ "MEMBAR-storestore (empty encoding)" %}
5959 ins_encode( );
5960 ins_pipe(empty);
5961 %}
5962
5963 //----------Move Instructions--------------------------------------------------
5964
5965 instruct castX2P(rRegP dst, rRegL src)
5966 %{
5967 match(Set dst (CastX2P src));
5968
5969 format %{ "movq $dst, $src\t# long->ptr" %}
5970 ins_encode %{
5971 if ($dst$$reg != $src$$reg) {
5972 __ movptr($dst$$Register, $src$$Register);
5973 }
5974 %}
5975 ins_pipe(ialu_reg_reg); // XXX
5976 %}
5977
5978 instruct castP2X(rRegL dst, rRegP src)
5979 %{
5980 match(Set dst (CastP2X src));
5981
5982 format %{ "movq $dst, $src\t# ptr -> long" %}
5983 ins_encode %{
5984 if ($dst$$reg != $src$$reg) {
5985 __ movptr($dst$$Register, $src$$Register);
5986 }
5987 %}
5988 ins_pipe(ialu_reg_reg); // XXX
5989 %}
5990
5991 // Convert oop into int for vectors alignment masking
5992 instruct convP2I(rRegI dst, rRegP src)
5993 %{
5994 match(Set dst (ConvL2I (CastP2X src)));
5995
5996 format %{ "movl $dst, $src\t# ptr -> int" %}
5997 ins_encode %{
10499 effect(DEF dst, USE src);
10500 ins_cost(100);
10501 format %{ "movd $dst,$src\t# MoveI2F" %}
10502 ins_encode %{
10503 __ movdl($dst$$XMMRegister, $src$$Register);
10504 %}
10505 ins_pipe( pipe_slow );
10506 %}
10507
10508 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
10509 match(Set dst (MoveL2D src));
10510 effect(DEF dst, USE src);
10511 ins_cost(100);
10512 format %{ "movd $dst,$src\t# MoveL2D" %}
10513 ins_encode %{
10514 __ movdq($dst$$XMMRegister, $src$$Register);
10515 %}
10516 ins_pipe( pipe_slow );
10517 %}
10518
10519 // Fast clearing of an array
10520 // Small ClearArray non-AVX512.
10521 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
10522 Universe dummy, rFlagsReg cr)
10523 %{
10524 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
10525 match(Set dummy (ClearArray cnt base));
10526 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
10527
10528 format %{ $$template
10529 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10530 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
10531 $$emit$$"jg LARGE\n\t"
10532 $$emit$$"dec rcx\n\t"
10533 $$emit$$"js DONE\t# Zero length\n\t"
10534 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
10535 $$emit$$"dec rcx\n\t"
10536 $$emit$$"jge LOOP\n\t"
10537 $$emit$$"jmp DONE\n\t"
10538 $$emit$$"# LARGE:\n\t"
10539 if (UseFastStosb) {
10540 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10541 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
10542 } else if (UseXMMForObjInit) {
10543 $$emit$$"mov rdi,rax\n\t"
10544 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
10545 $$emit$$"jmpq L_zero_64_bytes\n\t"
10546 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10554 $$emit$$"jl L_tail\n\t"
10555 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10556 $$emit$$"add 0x20,rax\n\t"
10557 $$emit$$"sub 0x4,rcx\n\t"
10558 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10559 $$emit$$"add 0x4,rcx\n\t"
10560 $$emit$$"jle L_end\n\t"
10561 $$emit$$"dec rcx\n\t"
10562 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10563 $$emit$$"vmovq xmm0,(rax)\n\t"
10564 $$emit$$"add 0x8,rax\n\t"
10565 $$emit$$"dec rcx\n\t"
10566 $$emit$$"jge L_sloop\n\t"
10567 $$emit$$"# L_end:\n\t"
10568 } else {
10569 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
10570 }
10571 $$emit$$"# DONE"
10572 %}
10573 ins_encode %{
10574 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
10575 $tmp$$XMMRegister, false, knoreg);
10576 %}
10577 ins_pipe(pipe_slow);
10578 %}
10579
10580 // Small ClearArray AVX512 non-constant length.
10581 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
10582 Universe dummy, rFlagsReg cr)
10583 %{
10584 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
10585 match(Set dummy (ClearArray cnt base));
10586 ins_cost(125);
10587 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
10588
10589 format %{ $$template
10590 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10591 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
10592 $$emit$$"jg LARGE\n\t"
10593 $$emit$$"dec rcx\n\t"
10594 $$emit$$"js DONE\t# Zero length\n\t"
10595 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
10596 $$emit$$"dec rcx\n\t"
10597 $$emit$$"jge LOOP\n\t"
10598 $$emit$$"jmp DONE\n\t"
10599 $$emit$$"# LARGE:\n\t"
10600 if (UseFastStosb) {
10601 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10602 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
10603 } else if (UseXMMForObjInit) {
10604 $$emit$$"mov rdi,rax\n\t"
10605 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
10606 $$emit$$"jmpq L_zero_64_bytes\n\t"
10607 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10615 $$emit$$"jl L_tail\n\t"
10616 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10617 $$emit$$"add 0x20,rax\n\t"
10618 $$emit$$"sub 0x4,rcx\n\t"
10619 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10620 $$emit$$"add 0x4,rcx\n\t"
10621 $$emit$$"jle L_end\n\t"
10622 $$emit$$"dec rcx\n\t"
10623 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10624 $$emit$$"vmovq xmm0,(rax)\n\t"
10625 $$emit$$"add 0x8,rax\n\t"
10626 $$emit$$"dec rcx\n\t"
10627 $$emit$$"jge L_sloop\n\t"
10628 $$emit$$"# L_end:\n\t"
10629 } else {
10630 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
10631 }
10632 $$emit$$"# DONE"
10633 %}
10634 ins_encode %{
10635 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
10636 $tmp$$XMMRegister, false, $ktmp$$KRegister);
10637 %}
10638 ins_pipe(pipe_slow);
10639 %}
10640
10641 // Large ClearArray non-AVX512.
10642 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
10643 Universe dummy, rFlagsReg cr)
10644 %{
10645 predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
10646 match(Set dummy (ClearArray cnt base));
10647 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
10648
10649 format %{ $$template
10650 if (UseFastStosb) {
10651 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10652 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10653 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
10654 } else if (UseXMMForObjInit) {
10655 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
10656 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
10657 $$emit$$"jmpq L_zero_64_bytes\n\t"
10658 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10659 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10660 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
10661 $$emit$$"add 0x40,rax\n\t"
10662 $$emit$$"# L_zero_64_bytes:\n\t"
10663 $$emit$$"sub 0x8,rcx\n\t"
10664 $$emit$$"jge L_loop\n\t"
10665 $$emit$$"add 0x4,rcx\n\t"
10666 $$emit$$"jl L_tail\n\t"
10667 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10668 $$emit$$"add 0x20,rax\n\t"
10669 $$emit$$"sub 0x4,rcx\n\t"
10670 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10671 $$emit$$"add 0x4,rcx\n\t"
10672 $$emit$$"jle L_end\n\t"
10673 $$emit$$"dec rcx\n\t"
10674 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10675 $$emit$$"vmovq xmm0,(rax)\n\t"
10676 $$emit$$"add 0x8,rax\n\t"
10677 $$emit$$"dec rcx\n\t"
10678 $$emit$$"jge L_sloop\n\t"
10679 $$emit$$"# L_end:\n\t"
10680 } else {
10681 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10682 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
10683 }
10684 %}
10685 ins_encode %{
10686 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
10687 $tmp$$XMMRegister, true, knoreg);
10688 %}
10689 ins_pipe(pipe_slow);
10690 %}
10691
10692 // Large ClearArray AVX512.
10693 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
10694 Universe dummy, rFlagsReg cr)
10695 %{
10696 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
10697 match(Set dummy (ClearArray cnt base));
10698 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
10699
10700 format %{ $$template
10701 if (UseFastStosb) {
10702 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10703 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10704 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
10705 } else if (UseXMMForObjInit) {
10706 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
10707 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
10708 $$emit$$"jmpq L_zero_64_bytes\n\t"
10709 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10710 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10711 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
10712 $$emit$$"add 0x40,rax\n\t"
10713 $$emit$$"# L_zero_64_bytes:\n\t"
10714 $$emit$$"sub 0x8,rcx\n\t"
10715 $$emit$$"jge L_loop\n\t"
10716 $$emit$$"add 0x4,rcx\n\t"
10717 $$emit$$"jl L_tail\n\t"
10718 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10719 $$emit$$"add 0x20,rax\n\t"
10720 $$emit$$"sub 0x4,rcx\n\t"
10721 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10722 $$emit$$"add 0x4,rcx\n\t"
10723 $$emit$$"jle L_end\n\t"
10724 $$emit$$"dec rcx\n\t"
10725 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10726 $$emit$$"vmovq xmm0,(rax)\n\t"
10727 $$emit$$"add 0x8,rax\n\t"
10728 $$emit$$"dec rcx\n\t"
10729 $$emit$$"jge L_sloop\n\t"
10730 $$emit$$"# L_end:\n\t"
10731 } else {
10732 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10733 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
10734 }
10735 %}
10736 ins_encode %{
10737 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
10738 $tmp$$XMMRegister, true, $ktmp$$KRegister);
10739 %}
10740 ins_pipe(pipe_slow);
10741 %}
10742
10743 // Small ClearArray AVX512 constant length.
10744 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
10745 %{
10746 predicate(!((ClearArrayNode*)n)->is_large() &&
10747 ((UseAVX > 2) && VM_Version::supports_avx512vlbw()));
10748 match(Set dummy (ClearArray cnt base));
10749 ins_cost(100);
10750 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
10751 format %{ "clear_mem_imm $base , $cnt \n\t" %}
10752 ins_encode %{
10753 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
10754 %}
10755 ins_pipe(pipe_slow);
10756 %}
10757
10758 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
10759 rax_RegI result, legRegD tmp1, rFlagsReg cr)
10760 %{
10761 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
10762 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
10763 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
10764
10765 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
10766 ins_encode %{
10767 __ string_compare($str1$$Register, $str2$$Register,
10768 $cnt1$$Register, $cnt2$$Register, $result$$Register,
10769 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
10770 %}
10771 ins_pipe( pipe_slow );
10772 %}
10773
12528
12529 ins_cost(300);
12530 format %{ "call_leaf,runtime " %}
12531 ins_encode(clear_avx, Java_To_Runtime(meth));
12532 ins_pipe(pipe_slow);
12533 %}
12534
12535 // Call runtime without safepoint and with vector arguments
12536 instruct CallLeafDirectVector(method meth)
12537 %{
12538 match(CallLeafVector);
12539 effect(USE meth);
12540
12541 ins_cost(300);
12542 format %{ "call_leaf,vector " %}
12543 ins_encode(Java_To_Runtime(meth));
12544 ins_pipe(pipe_slow);
12545 %}
12546
12547 // Call runtime without safepoint
12548 instruct CallLeafNoFPDirect(method meth)
12549 %{
12550 match(CallLeafNoFP);
12551 effect(USE meth);
12552
12553 ins_cost(300);
12554 format %{ "call_leaf_nofp,runtime " %}
12555 ins_encode(clear_avx, Java_To_Runtime(meth));
12556 ins_pipe(pipe_slow);
12557 %}
12558
12559 // Return Instruction
12560 // Remove the return address & jump to it.
12561 // Notice: We always emit a nop after a ret to make sure there is room
12562 // for safepoint patching
12563 instruct Ret()
12564 %{
12565 match(Return);
12566
12567 format %{ "ret" %}
12568 ins_encode %{
12569 __ ret(0);
|
473 }
474
475 // !!!!! Special hack to get all types of calls to specify the byte offset
476 // from the start of the call to the point where the return address
477 // will point.
478 int MachCallStaticJavaNode::ret_addr_offset()
479 {
480 int offset = 5; // 5 bytes from start of call to where return address points
481 offset += clear_avx_size();
482 return offset;
483 }
484
485 int MachCallDynamicJavaNode::ret_addr_offset()
486 {
487 int offset = 15; // 15 bytes from start of call to where return address points
488 offset += clear_avx_size();
489 return offset;
490 }
491
492 int MachCallRuntimeNode::ret_addr_offset() {
493 if (_entry_point == nullptr) {
494 // CallLeafNoFPInDirect
495 return 3; // callq (register)
496 }
497 int offset = 13; // movq r10,#addr; callq (r10)
498 if (this->ideal_Opcode() != Op_CallLeafVector) {
499 offset += clear_avx_size();
500 }
501 return offset;
502 }
503
504 //
505 // Compute padding required for nodes which need alignment
506 //
507
508 // The address of the call instruction needs to be 4-byte aligned to
509 // ensure that it does not span a cache line so that it can be patched.
510 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
511 {
512 current_offset += clear_avx_size(); // skip vzeroupper
513 current_offset += 1; // skip call opcode byte
514 return align_up(current_offset, alignment_required()) - current_offset;
515 }
516
517 // The address of the call instruction needs to be 4-byte aligned to
518 // ensure that it does not span a cache line so that it can be patched.
519 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
520 {
521 current_offset += clear_avx_size(); // skip vzeroupper
522 current_offset += 11; // skip movq instruction + call opcode byte
523 return align_up(current_offset, alignment_required()) - current_offset;
711 st->print("# stack alignment check");
712 #endif
713 }
714 if (C->stub_function() != nullptr && BarrierSet::barrier_set()->barrier_set_nmethod() != nullptr) {
715 st->print("\n\t");
716 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
717 st->print("\n\t");
718 st->print("je fast_entry\t");
719 st->print("\n\t");
720 st->print("call #nmethod_entry_barrier_stub\t");
721 st->print("\n\tfast_entry:");
722 }
723 st->cr();
724 }
725 #endif
726
727 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
728 Compile* C = ra_->C;
729 C2_MacroAssembler _masm(&cbuf);
730
731 __ verified_entry(C);
732
733 if (ra_->C->stub_function() == nullptr) {
734 __ entry_barrier();
735 }
736
737 if (!Compile::current()->output()->in_scratch_emit_size()) {
738 __ bind(*_verified_entry);
739 }
740
741 C->output()->set_frame_complete(cbuf.insts_size());
742
743 if (C->has_mach_constant_base_node()) {
744 // NOTE: We set the table base offset here because users might be
745 // emitted before MachConstantBaseNode.
746 ConstantTable& constant_table = C->output()->constant_table();
747 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
748 }
749 }
750
751 int MachPrologNode::reloc() const
752 {
753 return 0; // a large enough number
754 }
755
756 //=============================================================================
757 #ifndef PRODUCT
758 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
759 {
760 Compile* C = ra_->C;
761 if (generate_vzeroupper(C)) {
762 st->print("vzeroupper");
763 st->cr(); st->print("\t");
764 }
765
766 int framesize = C->output()->frame_size_in_bytes();
767 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
768 // Remove word for return adr already pushed
769 // and RBP
770 framesize -= 2*wordSize;
778 if (do_polling() && C->is_method_compilation()) {
779 st->print("\t");
780 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
781 "ja #safepoint_stub\t"
782 "# Safepoint: poll for GC");
783 }
784 }
785 #endif
786
787 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
788 {
789 Compile* C = ra_->C;
790 MacroAssembler _masm(&cbuf);
791
792 if (generate_vzeroupper(C)) {
793 // Clear upper bits of YMM registers when current compiled code uses
794 // wide vectors to avoid AVX <-> SSE transition penalty during call.
795 __ vzeroupper();
796 }
797
798 // Subtract two words to account for return address and rbp
799 int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
800 __ remove_frame(initial_framesize, C->needs_stack_repair());
801
802 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
803 __ reserved_stack_check();
804 }
805
806 if (do_polling() && C->is_method_compilation()) {
807 MacroAssembler _masm(&cbuf);
808 Label dummy_label;
809 Label* code_stub = &dummy_label;
810 if (!C->output()->in_scratch_emit_size()) {
811 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
812 C->output()->add_stub(stub);
813 code_stub = &stub->entry();
814 }
815 __ relocate(relocInfo::poll_return_type);
816 __ safepoint_poll(*code_stub, r15_thread, true /* at_return */, true /* in_nmethod */);
817 }
818 }
819
820 int MachEpilogNode::reloc() const
821 {
822 return 2; // a large enough number
823 }
824
825 const Pipeline* MachEpilogNode::pipeline() const
826 {
827 return MachNode::pipeline_class();
828 }
829
830 //=============================================================================
831
832 enum RC {
833 rc_bad,
834 rc_int,
835 rc_kreg,
836 rc_float,
837 rc_stack
838 };
839
1422 st->print("leaq %s, [rsp + #%d]\t# box lock",
1423 Matcher::regName[reg], offset);
1424 }
1425 #endif
1426
1427 void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1428 {
1429 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1430 int reg = ra_->get_encode(this);
1431
1432 MacroAssembler masm(&cbuf);
1433 masm.lea(as_Register(reg), Address(rsp, offset));
1434 }
1435
1436 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1437 {
1438 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1439 return (offset < 0x80) ? 5 : 8; // REX
1440 }
1441
1442 //=============================================================================
1443 #ifndef PRODUCT
1444 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1445 {
1446 st->print_cr("MachVEPNode");
1447 }
1448 #endif
1449
1450 void MachVEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1451 {
1452 C2_MacroAssembler _masm(&cbuf);
1453 uint insts_size = cbuf.insts_size();
1454 if (!_verified) {
1455 if (UseCompressedClassPointers) {
1456 __ load_klass(rscratch1, j_rarg0, rscratch2);
1457 __ cmpptr(rax, rscratch1);
1458 } else {
1459 __ cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
1460 }
1461 __ jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1462 } else {
1463 // TODO 8284443 Avoid creation of temporary frame
1464 if (ra_->C->stub_function() == nullptr) {
1465 __ verified_entry(ra_->C, 0);
1466 __ entry_barrier();
1467 int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
1468 __ remove_frame(initial_framesize, false);
1469 }
1470 // Unpack inline type args passed as oop and then jump to
1471 // the verified entry point (skipping the unverified entry).
1472 int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
1473 // Emit code for verified entry and save increment for stack repair on return
1474 __ verified_entry(ra_->C, sp_inc);
1475 if (Compile::current()->output()->in_scratch_emit_size()) {
1476 Label dummy_verified_entry;
1477 __ jmp(dummy_verified_entry);
1478 } else {
1479 __ jmp(*_verified_entry);
1480 }
1481 }
1482 /* WARNING these NOPs are critical so that verified entry point is properly
1483 4 bytes aligned for patching by NativeJump::patch_verified_entry() */
1484 int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
1485 nops_cnt &= 0x3; // Do not add nops if code is aligned.
1486 if (nops_cnt > 0) {
1487 __ nop(nops_cnt);
1488 }
1489 }
1490
1491 //=============================================================================
1492 #ifndef PRODUCT
1493 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1494 {
1495 if (UseCompressedClassPointers) {
1496 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1497 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
1498 } else {
1499 st->print_cr("movq rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1500 st->print_cr("\tcmpq rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
1501 }
1502 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
1503 }
1504 #endif
1505
1506 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1507 {
1508 MacroAssembler masm(&cbuf);
1509 masm.ic_check(InteriorEntryAlignment);
1510 }
1511
1512 //=============================================================================
1513
1514 bool Matcher::supports_vector_calling_convention(void) {
1515 if (EnableVectorSupport && UseVectorStubs) {
1516 return true;
1517 }
1518 return false;
1519 }
1520
1521 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
1522 assert(EnableVectorSupport && UseVectorStubs, "sanity");
1523 int lo = XMM0_num;
1524 int hi = XMM0b_num;
1525 if (ideal_reg == Op_VecX) hi = XMM0d_num;
1526 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
1527 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
1528 return OptoRegPair(hi, lo);
1529 }
1530
1531 // Is this branch offset short enough that a short branch can be used?
3096 %}
3097 %}
3098
3099 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
3100 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
3101 %{
3102 constraint(ALLOC_IN_RC(ptr_reg));
3103 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
3104 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
3105
3106 op_cost(10);
3107 format %{"[$reg + $off + $idx << $scale]" %}
3108 interface(MEMORY_INTER) %{
3109 base($reg);
3110 index($idx);
3111 scale($scale);
3112 disp($off);
3113 %}
3114 %}
3115
3116 // Indirect Narrow Oop Operand
3117 operand indCompressedOop(rRegN reg) %{
3118 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
3119 constraint(ALLOC_IN_RC(ptr_reg));
3120 match(DecodeN reg);
3121
3122 op_cost(10);
3123 format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
3124 interface(MEMORY_INTER) %{
3125 base(0xc); // R12
3126 index($reg);
3127 scale(0x3);
3128 disp(0x0);
3129 %}
3130 %}
3131
3132 // Indirect Narrow Oop Plus Offset Operand
3133 // Note: x86 architecture doesn't support "scale * index + offset" without a base
3134 // we can't free r12 even with CompressedOops::base() == nullptr.
3135 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
3136 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
3137 constraint(ALLOC_IN_RC(ptr_reg));
3138 match(AddP (DecodeN reg) off);
3139
3140 op_cost(10);
3141 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
3142 interface(MEMORY_INTER) %{
3143 base(0xc); // R12
3144 index($reg);
3145 scale(0x3);
3146 disp($off);
3147 %}
3148 %}
3149
3150 // Indirect Memory Operand
3151 operand indirectNarrow(rRegN reg)
3458 equal(0x4, "e");
3459 not_equal(0x5, "ne");
3460 less(0x2, "b");
3461 greater_equal(0x3, "ae");
3462 less_equal(0x6, "be");
3463 greater(0x7, "a");
3464 overflow(0x0, "o");
3465 no_overflow(0x1, "no");
3466 %}
3467 %}
3468
3469 //----------OPERAND CLASSES----------------------------------------------------
3470 // Operand Classes are groups of operands that are used as to simplify
3471 // instruction definitions by not requiring the AD writer to specify separate
3472 // instructions for every form of operand when the instruction accepts
3473 // multiple operand types with the same basic encoding and format. The classic
3474 // case of this is memory operands.
3475
3476 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
3477 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
3478 indCompressedOop, indCompressedOopOffset,
3479 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
3480 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
3481 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
3482
3483 //----------PIPELINE-----------------------------------------------------------
3484 // Rules which define the behavior of the target architectures pipeline.
3485 pipeline %{
3486
3487 //----------ATTRIBUTES---------------------------------------------------------
3488 attributes %{
3489 variable_size_instructions; // Fixed size instructions
3490 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
3491 instruction_unit_size = 1; // An instruction is 1 bytes long
3492 instruction_fetch_unit_size = 16; // The processor fetches one line
3493 instruction_fetch_units = 1; // of 16 bytes
3494
3495 // List of nop instructions
3496 nops( MachNop );
3497 %}
3498
5989 format %{ "MEMBAR-storestore (empty encoding)" %}
5990 ins_encode( );
5991 ins_pipe(empty);
5992 %}
5993
5994 //----------Move Instructions--------------------------------------------------
5995
5996 instruct castX2P(rRegP dst, rRegL src)
5997 %{
5998 match(Set dst (CastX2P src));
5999
6000 format %{ "movq $dst, $src\t# long->ptr" %}
6001 ins_encode %{
6002 if ($dst$$reg != $src$$reg) {
6003 __ movptr($dst$$Register, $src$$Register);
6004 }
6005 %}
6006 ins_pipe(ialu_reg_reg); // XXX
6007 %}
6008
6009 instruct castN2X(rRegL dst, rRegN src)
6010 %{
6011 match(Set dst (CastP2X src));
6012
6013 format %{ "movq $dst, $src\t# ptr -> long" %}
6014 ins_encode %{
6015 if ($dst$$reg != $src$$reg) {
6016 __ movptr($dst$$Register, $src$$Register);
6017 }
6018 %}
6019 ins_pipe(ialu_reg_reg); // XXX
6020 %}
6021
6022 instruct castP2X(rRegL dst, rRegP src)
6023 %{
6024 match(Set dst (CastP2X src));
6025
6026 format %{ "movq $dst, $src\t# ptr -> long" %}
6027 ins_encode %{
6028 if ($dst$$reg != $src$$reg) {
6029 __ movptr($dst$$Register, $src$$Register);
6030 }
6031 %}
6032 ins_pipe(ialu_reg_reg); // XXX
6033 %}
6034
6035 // Convert oop into int for vectors alignment masking
6036 instruct convP2I(rRegI dst, rRegP src)
6037 %{
6038 match(Set dst (ConvL2I (CastP2X src)));
6039
6040 format %{ "movl $dst, $src\t# ptr -> int" %}
6041 ins_encode %{
10543 effect(DEF dst, USE src);
10544 ins_cost(100);
10545 format %{ "movd $dst,$src\t# MoveI2F" %}
10546 ins_encode %{
10547 __ movdl($dst$$XMMRegister, $src$$Register);
10548 %}
10549 ins_pipe( pipe_slow );
10550 %}
10551
10552 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
10553 match(Set dst (MoveL2D src));
10554 effect(DEF dst, USE src);
10555 ins_cost(100);
10556 format %{ "movd $dst,$src\t# MoveL2D" %}
10557 ins_encode %{
10558 __ movdq($dst$$XMMRegister, $src$$Register);
10559 %}
10560 ins_pipe( pipe_slow );
10561 %}
10562
10563
10564 // Fast clearing of an array
10565 // Small ClearArray non-AVX512.
10566 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
10567 Universe dummy, rFlagsReg cr)
10568 %{
10569 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
10570 match(Set dummy (ClearArray (Binary cnt base) val));
10571 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
10572
10573 format %{ $$template
10574 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
10575 $$emit$$"jg LARGE\n\t"
10576 $$emit$$"dec rcx\n\t"
10577 $$emit$$"js DONE\t# Zero length\n\t"
10578 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
10579 $$emit$$"dec rcx\n\t"
10580 $$emit$$"jge LOOP\n\t"
10581 $$emit$$"jmp DONE\n\t"
10582 $$emit$$"# LARGE:\n\t"
10583 if (UseFastStosb) {
10584 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10585 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
10586 } else if (UseXMMForObjInit) {
10587 $$emit$$"movdq $tmp, $val\n\t"
10588 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
10589 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
10590 $$emit$$"jmpq L_zero_64_bytes\n\t"
10591 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10592 $$emit$$"vmovdqu $tmp,(rax)\n\t"
10593 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
10594 $$emit$$"add 0x40,rax\n\t"
10595 $$emit$$"# L_zero_64_bytes:\n\t"
10596 $$emit$$"sub 0x8,rcx\n\t"
10597 $$emit$$"jge L_loop\n\t"
10598 $$emit$$"add 0x4,rcx\n\t"
10599 $$emit$$"jl L_tail\n\t"
10600 $$emit$$"vmovdqu $tmp,(rax)\n\t"
10601 $$emit$$"add 0x20,rax\n\t"
10602 $$emit$$"sub 0x4,rcx\n\t"
10603 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10604 $$emit$$"add 0x4,rcx\n\t"
10605 $$emit$$"jle L_end\n\t"
10606 $$emit$$"dec rcx\n\t"
10607 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10608 $$emit$$"vmovq xmm0,(rax)\n\t"
10609 $$emit$$"add 0x8,rax\n\t"
10610 $$emit$$"dec rcx\n\t"
10611 $$emit$$"jge L_sloop\n\t"
10612 $$emit$$"# L_end:\n\t"
10613 } else {
10614 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
10615 }
10616 $$emit$$"# DONE"
10617 %}
10618 ins_encode %{
10619 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10620 $tmp$$XMMRegister, false, false);
10621 %}
10622 ins_pipe(pipe_slow);
10623 %}
10624
10625 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
10626 Universe dummy, rFlagsReg cr)
10627 %{
10628 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
10629 match(Set dummy (ClearArray (Binary cnt base) val));
10630 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
10631
10632 format %{ $$template
10633 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
10634 $$emit$$"jg LARGE\n\t"
10635 $$emit$$"dec rcx\n\t"
10636 $$emit$$"js DONE\t# Zero length\n\t"
10637 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
10638 $$emit$$"dec rcx\n\t"
10639 $$emit$$"jge LOOP\n\t"
10640 $$emit$$"jmp DONE\n\t"
10641 $$emit$$"# LARGE:\n\t"
10642 if (UseXMMForObjInit) {
10643 $$emit$$"movdq $tmp, $val\n\t"
10644 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
10645 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
10646 $$emit$$"jmpq L_zero_64_bytes\n\t"
10647 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10648 $$emit$$"vmovdqu $tmp,(rax)\n\t"
10649 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
10650 $$emit$$"add 0x40,rax\n\t"
10651 $$emit$$"# L_zero_64_bytes:\n\t"
10652 $$emit$$"sub 0x8,rcx\n\t"
10653 $$emit$$"jge L_loop\n\t"
10654 $$emit$$"add 0x4,rcx\n\t"
10655 $$emit$$"jl L_tail\n\t"
10656 $$emit$$"vmovdqu $tmp,(rax)\n\t"
10657 $$emit$$"add 0x20,rax\n\t"
10658 $$emit$$"sub 0x4,rcx\n\t"
10659 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10660 $$emit$$"add 0x4,rcx\n\t"
10661 $$emit$$"jle L_end\n\t"
10662 $$emit$$"dec rcx\n\t"
10663 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10664 $$emit$$"vmovq xmm0,(rax)\n\t"
10665 $$emit$$"add 0x8,rax\n\t"
10666 $$emit$$"dec rcx\n\t"
10667 $$emit$$"jge L_sloop\n\t"
10668 $$emit$$"# L_end:\n\t"
10669 } else {
10670 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
10671 }
10672 $$emit$$"# DONE"
10673 %}
10674 ins_encode %{
10675 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10676 $tmp$$XMMRegister, false, true);
10677 %}
10678 ins_pipe(pipe_slow);
10679 %}
10680
10681 // Small ClearArray AVX512 non-constant length.
10682 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
10683 Universe dummy, rFlagsReg cr)
10684 %{
10685 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
10686 match(Set dummy (ClearArray (Binary cnt base) val));
10687 ins_cost(125);
10688 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
10689
10690 format %{ $$template
10691 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10692 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
10693 $$emit$$"jg LARGE\n\t"
10694 $$emit$$"dec rcx\n\t"
10695 $$emit$$"js DONE\t# Zero length\n\t"
10696 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
10697 $$emit$$"dec rcx\n\t"
10698 $$emit$$"jge LOOP\n\t"
10699 $$emit$$"jmp DONE\n\t"
10700 $$emit$$"# LARGE:\n\t"
10701 if (UseFastStosb) {
10702 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10703 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
10704 } else if (UseXMMForObjInit) {
10705 $$emit$$"mov rdi,rax\n\t"
10706 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
10707 $$emit$$"jmpq L_zero_64_bytes\n\t"
10708 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10716 $$emit$$"jl L_tail\n\t"
10717 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10718 $$emit$$"add 0x20,rax\n\t"
10719 $$emit$$"sub 0x4,rcx\n\t"
10720 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10721 $$emit$$"add 0x4,rcx\n\t"
10722 $$emit$$"jle L_end\n\t"
10723 $$emit$$"dec rcx\n\t"
10724 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10725 $$emit$$"vmovq xmm0,(rax)\n\t"
10726 $$emit$$"add 0x8,rax\n\t"
10727 $$emit$$"dec rcx\n\t"
10728 $$emit$$"jge L_sloop\n\t"
10729 $$emit$$"# L_end:\n\t"
10730 } else {
10731 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
10732 }
10733 $$emit$$"# DONE"
10734 %}
10735 ins_encode %{
10736 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10737 $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
10738 %}
10739 ins_pipe(pipe_slow);
10740 %}
10741
10742 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
10743 Universe dummy, rFlagsReg cr)
10744 %{
10745 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
10746 match(Set dummy (ClearArray (Binary cnt base) val));
10747 ins_cost(125);
10748 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
10749
10750 format %{ $$template
10751 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10752 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
10753 $$emit$$"jg LARGE\n\t"
10754 $$emit$$"dec rcx\n\t"
10755 $$emit$$"js DONE\t# Zero length\n\t"
10756 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
10757 $$emit$$"dec rcx\n\t"
10758 $$emit$$"jge LOOP\n\t"
10759 $$emit$$"jmp DONE\n\t"
10760 $$emit$$"# LARGE:\n\t"
10761 if (UseFastStosb) {
10762 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10763 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
10764 } else if (UseXMMForObjInit) {
10765 $$emit$$"mov rdi,rax\n\t"
10766 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
10767 $$emit$$"jmpq L_zero_64_bytes\n\t"
10768 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10776 $$emit$$"jl L_tail\n\t"
10777 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10778 $$emit$$"add 0x20,rax\n\t"
10779 $$emit$$"sub 0x4,rcx\n\t"
10780 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10781 $$emit$$"add 0x4,rcx\n\t"
10782 $$emit$$"jle L_end\n\t"
10783 $$emit$$"dec rcx\n\t"
10784 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10785 $$emit$$"vmovq xmm0,(rax)\n\t"
10786 $$emit$$"add 0x8,rax\n\t"
10787 $$emit$$"dec rcx\n\t"
10788 $$emit$$"jge L_sloop\n\t"
10789 $$emit$$"# L_end:\n\t"
10790 } else {
10791 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
10792 }
10793 $$emit$$"# DONE"
10794 %}
10795 ins_encode %{
10796 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10797 $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
10798 %}
10799 ins_pipe(pipe_slow);
10800 %}
10801
10802 // Large ClearArray non-AVX512.
10803 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
10804 Universe dummy, rFlagsReg cr)
10805 %{
10806 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
10807 match(Set dummy (ClearArray (Binary cnt base) val));
10808 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
10809
10810 format %{ $$template
10811 if (UseFastStosb) {
10812 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10813 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
10814 } else if (UseXMMForObjInit) {
10815 $$emit$$"movdq $tmp, $val\n\t"
10816 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
10817 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
10818 $$emit$$"jmpq L_zero_64_bytes\n\t"
10819 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10820 $$emit$$"vmovdqu $tmp,(rax)\n\t"
10821 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
10822 $$emit$$"add 0x40,rax\n\t"
10823 $$emit$$"# L_zero_64_bytes:\n\t"
10824 $$emit$$"sub 0x8,rcx\n\t"
10825 $$emit$$"jge L_loop\n\t"
10826 $$emit$$"add 0x4,rcx\n\t"
10827 $$emit$$"jl L_tail\n\t"
10828 $$emit$$"vmovdqu $tmp,(rax)\n\t"
10829 $$emit$$"add 0x20,rax\n\t"
10830 $$emit$$"sub 0x4,rcx\n\t"
10831 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10832 $$emit$$"add 0x4,rcx\n\t"
10833 $$emit$$"jle L_end\n\t"
10834 $$emit$$"dec rcx\n\t"
10835 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10836 $$emit$$"vmovq xmm0,(rax)\n\t"
10837 $$emit$$"add 0x8,rax\n\t"
10838 $$emit$$"dec rcx\n\t"
10839 $$emit$$"jge L_sloop\n\t"
10840 $$emit$$"# L_end:\n\t"
10841 } else {
10842 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
10843 }
10844 %}
10845 ins_encode %{
10846 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10847 $tmp$$XMMRegister, true, false);
10848 %}
10849 ins_pipe(pipe_slow);
10850 %}
10851
10852 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
10853 Universe dummy, rFlagsReg cr)
10854 %{
10855 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
10856 match(Set dummy (ClearArray (Binary cnt base) val));
10857 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
10858
10859 format %{ $$template
10860 if (UseXMMForObjInit) {
10861 $$emit$$"movdq $tmp, $val\n\t"
10862 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
10863 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
10864 $$emit$$"jmpq L_zero_64_bytes\n\t"
10865 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10866 $$emit$$"vmovdqu $tmp,(rax)\n\t"
10867 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
10868 $$emit$$"add 0x40,rax\n\t"
10869 $$emit$$"# L_zero_64_bytes:\n\t"
10870 $$emit$$"sub 0x8,rcx\n\t"
10871 $$emit$$"jge L_loop\n\t"
10872 $$emit$$"add 0x4,rcx\n\t"
10873 $$emit$$"jl L_tail\n\t"
10874 $$emit$$"vmovdqu $tmp,(rax)\n\t"
10875 $$emit$$"add 0x20,rax\n\t"
10876 $$emit$$"sub 0x4,rcx\n\t"
10877 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10878 $$emit$$"add 0x4,rcx\n\t"
10879 $$emit$$"jle L_end\n\t"
10880 $$emit$$"dec rcx\n\t"
10881 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10882 $$emit$$"vmovq xmm0,(rax)\n\t"
10883 $$emit$$"add 0x8,rax\n\t"
10884 $$emit$$"dec rcx\n\t"
10885 $$emit$$"jge L_sloop\n\t"
10886 $$emit$$"# L_end:\n\t"
10887 } else {
10888 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
10889 }
10890 %}
10891 ins_encode %{
10892 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10893 $tmp$$XMMRegister, true, true);
10894 %}
10895 ins_pipe(pipe_slow);
10896 %}
10897
10898 // Large ClearArray AVX512.
10899 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
10900 Universe dummy, rFlagsReg cr)
10901 %{
10902 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
10903 match(Set dummy (ClearArray (Binary cnt base) val));
10904 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
10905
10906 format %{ $$template
10907 if (UseFastStosb) {
10908 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10909 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10910 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
10911 } else if (UseXMMForObjInit) {
10912 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
10913 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
10914 $$emit$$"jmpq L_zero_64_bytes\n\t"
10915 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10916 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10917 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
10918 $$emit$$"add 0x40,rax\n\t"
10919 $$emit$$"# L_zero_64_bytes:\n\t"
10920 $$emit$$"sub 0x8,rcx\n\t"
10921 $$emit$$"jge L_loop\n\t"
10922 $$emit$$"add 0x4,rcx\n\t"
10923 $$emit$$"jl L_tail\n\t"
10924 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10925 $$emit$$"add 0x20,rax\n\t"
10926 $$emit$$"sub 0x4,rcx\n\t"
10927 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10928 $$emit$$"add 0x4,rcx\n\t"
10929 $$emit$$"jle L_end\n\t"
10930 $$emit$$"dec rcx\n\t"
10931 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10932 $$emit$$"vmovq xmm0,(rax)\n\t"
10933 $$emit$$"add 0x8,rax\n\t"
10934 $$emit$$"dec rcx\n\t"
10935 $$emit$$"jge L_sloop\n\t"
10936 $$emit$$"# L_end:\n\t"
10937 } else {
10938 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10939 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
10940 }
10941 %}
10942 ins_encode %{
10943 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10944 $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
10945 %}
10946 ins_pipe(pipe_slow);
10947 %}
10948
10949 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
10950 Universe dummy, rFlagsReg cr)
10951 %{
10952 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
10953 match(Set dummy (ClearArray (Binary cnt base) val));
10954 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
10955
10956 format %{ $$template
10957 if (UseFastStosb) {
10958 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10959 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10960 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
10961 } else if (UseXMMForObjInit) {
10962 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
10963 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
10964 $$emit$$"jmpq L_zero_64_bytes\n\t"
10965 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10966 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10967 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
10968 $$emit$$"add 0x40,rax\n\t"
10969 $$emit$$"# L_zero_64_bytes:\n\t"
10970 $$emit$$"sub 0x8,rcx\n\t"
10971 $$emit$$"jge L_loop\n\t"
10972 $$emit$$"add 0x4,rcx\n\t"
10973 $$emit$$"jl L_tail\n\t"
10974 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10975 $$emit$$"add 0x20,rax\n\t"
10976 $$emit$$"sub 0x4,rcx\n\t"
10977 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10978 $$emit$$"add 0x4,rcx\n\t"
10979 $$emit$$"jle L_end\n\t"
10980 $$emit$$"dec rcx\n\t"
10981 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10982 $$emit$$"vmovq xmm0,(rax)\n\t"
10983 $$emit$$"add 0x8,rax\n\t"
10984 $$emit$$"dec rcx\n\t"
10985 $$emit$$"jge L_sloop\n\t"
10986 $$emit$$"# L_end:\n\t"
10987 } else {
10988 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10989 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
10990 }
10991 %}
10992 ins_encode %{
10993 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10994 $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
10995 %}
10996 ins_pipe(pipe_slow);
10997 %}
10998
10999 // Small ClearArray AVX512 constant length.
11000 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
11001 %{
11002 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
11003 ((UseAVX > 2) && VM_Version::supports_avx512vlbw()));
11004 match(Set dummy (ClearArray (Binary cnt base) val));
11005 ins_cost(100);
11006 effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
11007 format %{ "clear_mem_imm $base , $cnt \n\t" %}
11008 ins_encode %{
11009 __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
11010 %}
11011 ins_pipe(pipe_slow);
11012 %}
11013
11014 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11015 rax_RegI result, legRegD tmp1, rFlagsReg cr)
11016 %{
11017 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11018 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11019 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11020
11021 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
11022 ins_encode %{
11023 __ string_compare($str1$$Register, $str2$$Register,
11024 $cnt1$$Register, $cnt2$$Register, $result$$Register,
11025 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
11026 %}
11027 ins_pipe( pipe_slow );
11028 %}
11029
12784
12785 ins_cost(300);
12786 format %{ "call_leaf,runtime " %}
12787 ins_encode(clear_avx, Java_To_Runtime(meth));
12788 ins_pipe(pipe_slow);
12789 %}
12790
12791 // Call runtime without safepoint and with vector arguments
12792 instruct CallLeafDirectVector(method meth)
12793 %{
12794 match(CallLeafVector);
12795 effect(USE meth);
12796
12797 ins_cost(300);
12798 format %{ "call_leaf,vector " %}
12799 ins_encode(Java_To_Runtime(meth));
12800 ins_pipe(pipe_slow);
12801 %}
12802
12803 // Call runtime without safepoint
12804 // entry point is null, target holds the address to call
12805 instruct CallLeafNoFPInDirect(rRegP target)
12806 %{
12807 predicate(n->as_Call()->entry_point() == nullptr);
12808 match(CallLeafNoFP target);
12809
12810 ins_cost(300);
12811 format %{ "call_leaf_nofp,runtime indirect " %}
12812 ins_encode %{
12813 __ call($target$$Register);
12814 %}
12815
12816 ins_pipe(pipe_slow);
12817 %}
12818
12819 instruct CallLeafNoFPDirect(method meth)
12820 %{
12821 predicate(n->as_Call()->entry_point() != nullptr);
12822 match(CallLeafNoFP);
12823 effect(USE meth);
12824
12825 ins_cost(300);
12826 format %{ "call_leaf_nofp,runtime " %}
12827 ins_encode(clear_avx, Java_To_Runtime(meth));
12828 ins_pipe(pipe_slow);
12829 %}
12830
12831 // Return Instruction
12832 // Remove the return address & jump to it.
12833 // Notice: We always emit a nop after a ret to make sure there is room
12834 // for safepoint patching
12835 instruct Ret()
12836 %{
12837 match(Return);
12838
12839 format %{ "ret" %}
12840 ins_encode %{
12841 __ ret(0);
|