598 }
599
600 // !!!!! Special hack to get all types of calls to specify the byte offset
601 // from the start of the call to the point where the return address
602 // will point.
603 int MachCallStaticJavaNode::ret_addr_offset()
604 {
605 int offset = 5; // 5 bytes from start of call to where return address points
606 offset += clear_avx_size();
607 return offset;
608 }
609
610 int MachCallDynamicJavaNode::ret_addr_offset()
611 {
612 int offset = 15; // 15 bytes from start of call to where return address points
613 offset += clear_avx_size();
614 return offset;
615 }
616
617 int MachCallRuntimeNode::ret_addr_offset() {
618 int offset = 13; // movq r10,#addr; callq (r10)
619 if (this->ideal_Opcode() != Op_CallLeafVector) {
620 offset += clear_avx_size();
621 }
622 return offset;
623 }
624 //
625 // Compute padding required for nodes which need alignment
626 //
627
628 // The address of the call instruction needs to be 4-byte aligned to
629 // ensure that it does not span a cache line so that it can be patched.
630 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
631 {
632 current_offset += clear_avx_size(); // skip vzeroupper
633 current_offset += 1; // skip call opcode byte
634 return align_up(current_offset, alignment_required()) - current_offset;
635 }
636
637 // The address of the call instruction needs to be 4-byte aligned to
638 // ensure that it does not span a cache line so that it can be patched.
639 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
640 {
641 current_offset += clear_avx_size(); // skip vzeroupper
642 current_offset += 11; // skip movq instruction + call opcode byte
643 return align_up(current_offset, alignment_required()) - current_offset;
829 st->print("\n\t");
830 st->print("# stack alignment check");
831 #endif
832 }
833 if (C->stub_function() != nullptr) {
834 st->print("\n\t");
835 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
836 st->print("\n\t");
837 st->print("je fast_entry\t");
838 st->print("\n\t");
839 st->print("call #nmethod_entry_barrier_stub\t");
840 st->print("\n\tfast_entry:");
841 }
842 st->cr();
843 }
844 #endif
845
846 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
847 Compile* C = ra_->C;
848
849 int framesize = C->output()->frame_size_in_bytes();
850 int bangsize = C->output()->bang_size_in_bytes();
851
852 if (C->clinit_barrier_on_entry()) {
853 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
854 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
855
856 Label L_skip_barrier;
857 Register klass = rscratch1;
858
859 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
860 __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
861
862 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
863
864 __ bind(L_skip_barrier);
865 }
866
867 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
868
869 C->output()->set_frame_complete(__ offset());
870
871 if (C->has_mach_constant_base_node()) {
872 // NOTE: We set the table base offset here because users might be
873 // emitted before MachConstantBaseNode.
874 ConstantTable& constant_table = C->output()->constant_table();
875 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
876 }
877 }
878
879 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
880 {
881 return MachNode::size(ra_); // too many variables; just compute it
882 // the hard way
883 }
884
885 int MachPrologNode::reloc() const
886 {
887 return 0; // a large enough number
888 }
889
890 //=============================================================================
891 #ifndef PRODUCT
892 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
893 {
894 Compile* C = ra_->C;
895 if (generate_vzeroupper(C)) {
896 st->print("vzeroupper");
897 st->cr(); st->print("\t");
898 }
899
900 int framesize = C->output()->frame_size_in_bytes();
901 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
902 // Remove word for return adr already pushed
903 // and RBP
904 framesize -= 2*wordSize;
911 st->print_cr("popq rbp");
912 if (do_polling() && C->is_method_compilation()) {
913 st->print("\t");
914 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
915 "ja #safepoint_stub\t"
916 "# Safepoint: poll for GC");
917 }
918 }
919 #endif
920
921 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
922 {
923 Compile* C = ra_->C;
924
925 if (generate_vzeroupper(C)) {
926 // Clear upper bits of YMM registers when current compiled code uses
927 // wide vectors to avoid AVX <-> SSE transition penalty during call.
928 __ vzeroupper();
929 }
930
931 int framesize = C->output()->frame_size_in_bytes();
932 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
933 // Remove word for return adr already pushed
934 // and RBP
935 framesize -= 2*wordSize;
936
937 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
938
939 if (framesize) {
940 __ addq(rsp, framesize);
941 }
942
943 __ popq(rbp);
944
945 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
946 __ reserved_stack_check();
947 }
948
949 if (do_polling() && C->is_method_compilation()) {
950 Label dummy_label;
951 Label* code_stub = &dummy_label;
952 if (!C->output()->in_scratch_emit_size()) {
953 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
954 C->output()->add_stub(stub);
955 code_stub = &stub->entry();
956 }
957 __ relocate(relocInfo::poll_return_type);
958 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
959 }
960 }
961
962 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
963 {
964 return MachNode::size(ra_); // too many variables; just compute it
965 // the hard way
966 }
967
968 int MachEpilogNode::reloc() const
969 {
970 return 2; // a large enough number
971 }
972
973 const Pipeline* MachEpilogNode::pipeline() const
974 {
975 return MachNode::pipeline_class();
976 }
977
978 //=============================================================================
979
980 enum RC {
981 rc_bad,
982 rc_int,
983 rc_kreg,
984 rc_float,
985 rc_stack
986 };
987
1549 #endif
1550
1551 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1552 {
1553 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1554 int reg = ra_->get_encode(this);
1555
1556 __ lea(as_Register(reg), Address(rsp, offset));
1557 }
1558
1559 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1560 {
1561 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1562 if (ra_->get_encode(this) > 15) {
1563 return (offset < 0x80) ? 6 : 9; // REX2
1564 } else {
1565 return (offset < 0x80) ? 5 : 8; // REX
1566 }
1567 }
1568
1569 //=============================================================================
1570 #ifndef PRODUCT
1571 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1572 {
1573 if (UseCompressedClassPointers) {
1574 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1575 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
1576 } else {
1577 st->print_cr("movq rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1578 st->print_cr("\tcmpq rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
1579 }
1580 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
1581 }
1582 #endif
1583
1584 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1585 {
1586 __ ic_check(InteriorEntryAlignment);
1587 }
1588
1589 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1590 {
1591 return MachNode::size(ra_); // too many variables; just compute it
1592 // the hard way
1593 }
1594
1595
1596 //=============================================================================
1597
1598 bool Matcher::supports_vector_calling_convention(void) {
1599 return EnableVectorSupport;
1600 }
1601
1602 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
1603 assert(EnableVectorSupport, "sanity");
1604 int lo = XMM0_num;
1605 int hi = XMM0b_num;
1606 if (ideal_reg == Op_VecX) hi = XMM0d_num;
1607 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
1608 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
1609 return OptoRegPair(hi, lo);
1610 }
1611
1612 // Is this branch offset short enough that a short branch can be used?
1613 //
1614 // NOTE: If the platform does not provide any short branch variants, then
1615 // this method should return false for offset 0.
3046 %}
3047 %}
3048
3049 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
3050 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
3051 %{
3052 constraint(ALLOC_IN_RC(ptr_reg));
3053 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
3054 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
3055
3056 op_cost(10);
3057 format %{"[$reg + $off + $idx << $scale]" %}
3058 interface(MEMORY_INTER) %{
3059 base($reg);
3060 index($idx);
3061 scale($scale);
3062 disp($off);
3063 %}
3064 %}
3065
3066 // Indirect Narrow Oop Plus Offset Operand
3067 // Note: x86 architecture doesn't support "scale * index + offset" without a base
3068 // we can't free r12 even with CompressedOops::base() == nullptr.
3069 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
3070 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
3071 constraint(ALLOC_IN_RC(ptr_reg));
3072 match(AddP (DecodeN reg) off);
3073
3074 op_cost(10);
3075 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
3076 interface(MEMORY_INTER) %{
3077 base(0xc); // R12
3078 index($reg);
3079 scale(0x3);
3080 disp($off);
3081 %}
3082 %}
3083
3084 // Indirect Memory Operand
3085 operand indirectNarrow(rRegN reg)
3392 equal(0x4, "e");
3393 not_equal(0x5, "ne");
3394 less(0x2, "b");
3395 greater_equal(0x3, "ae");
3396 less_equal(0x6, "be");
3397 greater(0x7, "a");
3398 overflow(0x0, "o");
3399 no_overflow(0x1, "no");
3400 %}
3401 %}
3402
3403 //----------OPERAND CLASSES----------------------------------------------------
3404 // Operand Classes are groups of operands that are used as to simplify
3405 // instruction definitions by not requiring the AD writer to specify separate
3406 // instructions for every form of operand when the instruction accepts
3407 // multiple operand types with the same basic encoding and format. The classic
3408 // case of this is memory operands.
3409
3410 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
3411 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
3412 indCompressedOopOffset,
3413 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
3414 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
3415 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
3416
3417 //----------PIPELINE-----------------------------------------------------------
3418 // Rules which define the behavior of the target architectures pipeline.
3419 pipeline %{
3420
3421 //----------ATTRIBUTES---------------------------------------------------------
3422 attributes %{
3423 variable_size_instructions; // Fixed size instructions
3424 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
3425 instruction_unit_size = 1; // An instruction is 1 bytes long
3426 instruction_fetch_unit_size = 16; // The processor fetches one line
3427 instruction_fetch_units = 1; // of 16 bytes
3428
3429 // List of nop instructions
3430 nops( MachNop );
3431 %}
3432
5933 format %{ "MEMBAR-storestore (empty encoding)" %}
5934 ins_encode( );
5935 ins_pipe(empty);
5936 %}
5937
5938 //----------Move Instructions--------------------------------------------------
5939
5940 instruct castX2P(rRegP dst, rRegL src)
5941 %{
5942 match(Set dst (CastX2P src));
5943
5944 format %{ "movq $dst, $src\t# long->ptr" %}
5945 ins_encode %{
5946 if ($dst$$reg != $src$$reg) {
5947 __ movptr($dst$$Register, $src$$Register);
5948 }
5949 %}
5950 ins_pipe(ialu_reg_reg); // XXX
5951 %}
5952
5953 instruct castP2X(rRegL dst, rRegP src)
5954 %{
5955 match(Set dst (CastP2X src));
5956
5957 format %{ "movq $dst, $src\t# ptr -> long" %}
5958 ins_encode %{
5959 if ($dst$$reg != $src$$reg) {
5960 __ movptr($dst$$Register, $src$$Register);
5961 }
5962 %}
5963 ins_pipe(ialu_reg_reg); // XXX
5964 %}
5965
5966 // Convert oop into int for vectors alignment masking
5967 instruct convP2I(rRegI dst, rRegP src)
5968 %{
5969 match(Set dst (ConvL2I (CastP2X src)));
5970
5971 format %{ "movl $dst, $src\t# ptr -> int" %}
5972 ins_encode %{
12193 effect(DEF dst, USE src);
12194 ins_cost(100);
12195 format %{ "movd $dst,$src\t# MoveI2F" %}
12196 ins_encode %{
12197 __ movdl($dst$$XMMRegister, $src$$Register);
12198 %}
12199 ins_pipe( pipe_slow );
12200 %}
12201
12202 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
12203 match(Set dst (MoveL2D src));
12204 effect(DEF dst, USE src);
12205 ins_cost(100);
12206 format %{ "movd $dst,$src\t# MoveL2D" %}
12207 ins_encode %{
12208 __ movdq($dst$$XMMRegister, $src$$Register);
12209 %}
12210 ins_pipe( pipe_slow );
12211 %}
12212
12213 // Fast clearing of an array
12214 // Small non-constant lenght ClearArray for non-AVX512 targets.
12215 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
12216 Universe dummy, rFlagsReg cr)
12217 %{
12218 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
12219 match(Set dummy (ClearArray cnt base));
12220 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
12221
12222 format %{ $$template
12223 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12224 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
12225 $$emit$$"jg LARGE\n\t"
12226 $$emit$$"dec rcx\n\t"
12227 $$emit$$"js DONE\t# Zero length\n\t"
12228 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
12229 $$emit$$"dec rcx\n\t"
12230 $$emit$$"jge LOOP\n\t"
12231 $$emit$$"jmp DONE\n\t"
12232 $$emit$$"# LARGE:\n\t"
12233 if (UseFastStosb) {
12234 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
12235 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
12236 } else if (UseXMMForObjInit) {
12237 $$emit$$"mov rdi,rax\n\t"
12238 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
12239 $$emit$$"jmpq L_zero_64_bytes\n\t"
12240 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12248 $$emit$$"jl L_tail\n\t"
12249 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12250 $$emit$$"add 0x20,rax\n\t"
12251 $$emit$$"sub 0x4,rcx\n\t"
12252 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12253 $$emit$$"add 0x4,rcx\n\t"
12254 $$emit$$"jle L_end\n\t"
12255 $$emit$$"dec rcx\n\t"
12256 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12257 $$emit$$"vmovq xmm0,(rax)\n\t"
12258 $$emit$$"add 0x8,rax\n\t"
12259 $$emit$$"dec rcx\n\t"
12260 $$emit$$"jge L_sloop\n\t"
12261 $$emit$$"# L_end:\n\t"
12262 } else {
12263 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
12264 }
12265 $$emit$$"# DONE"
12266 %}
12267 ins_encode %{
12268 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
12269 $tmp$$XMMRegister, false, knoreg);
12270 %}
12271 ins_pipe(pipe_slow);
12272 %}
12273
12274 // Small non-constant length ClearArray for AVX512 targets.
12275 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
12276 Universe dummy, rFlagsReg cr)
12277 %{
12278 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
12279 match(Set dummy (ClearArray cnt base));
12280 ins_cost(125);
12281 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
12282
12283 format %{ $$template
12284 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12285 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
12286 $$emit$$"jg LARGE\n\t"
12287 $$emit$$"dec rcx\n\t"
12288 $$emit$$"js DONE\t# Zero length\n\t"
12289 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
12290 $$emit$$"dec rcx\n\t"
12291 $$emit$$"jge LOOP\n\t"
12292 $$emit$$"jmp DONE\n\t"
12293 $$emit$$"# LARGE:\n\t"
12294 if (UseFastStosb) {
12295 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
12296 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
12297 } else if (UseXMMForObjInit) {
12298 $$emit$$"mov rdi,rax\n\t"
12299 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
12300 $$emit$$"jmpq L_zero_64_bytes\n\t"
12301 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12309 $$emit$$"jl L_tail\n\t"
12310 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12311 $$emit$$"add 0x20,rax\n\t"
12312 $$emit$$"sub 0x4,rcx\n\t"
12313 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12314 $$emit$$"add 0x4,rcx\n\t"
12315 $$emit$$"jle L_end\n\t"
12316 $$emit$$"dec rcx\n\t"
12317 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12318 $$emit$$"vmovq xmm0,(rax)\n\t"
12319 $$emit$$"add 0x8,rax\n\t"
12320 $$emit$$"dec rcx\n\t"
12321 $$emit$$"jge L_sloop\n\t"
12322 $$emit$$"# L_end:\n\t"
12323 } else {
12324 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
12325 }
12326 $$emit$$"# DONE"
12327 %}
12328 ins_encode %{
12329 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
12330 $tmp$$XMMRegister, false, $ktmp$$KRegister);
12331 %}
12332 ins_pipe(pipe_slow);
12333 %}
12334
12335 // Large non-constant length ClearArray for non-AVX512 targets.
12336 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
12337 Universe dummy, rFlagsReg cr)
12338 %{
12339 predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
12340 match(Set dummy (ClearArray cnt base));
12341 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
12342
12343 format %{ $$template
12344 if (UseFastStosb) {
12345 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12346 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
12347 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
12348 } else if (UseXMMForObjInit) {
12349 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
12350 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
12351 $$emit$$"jmpq L_zero_64_bytes\n\t"
12352 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12353 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12354 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
12355 $$emit$$"add 0x40,rax\n\t"
12356 $$emit$$"# L_zero_64_bytes:\n\t"
12357 $$emit$$"sub 0x8,rcx\n\t"
12358 $$emit$$"jge L_loop\n\t"
12359 $$emit$$"add 0x4,rcx\n\t"
12360 $$emit$$"jl L_tail\n\t"
12361 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12362 $$emit$$"add 0x20,rax\n\t"
12363 $$emit$$"sub 0x4,rcx\n\t"
12364 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12365 $$emit$$"add 0x4,rcx\n\t"
12366 $$emit$$"jle L_end\n\t"
12367 $$emit$$"dec rcx\n\t"
12368 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12369 $$emit$$"vmovq xmm0,(rax)\n\t"
12370 $$emit$$"add 0x8,rax\n\t"
12371 $$emit$$"dec rcx\n\t"
12372 $$emit$$"jge L_sloop\n\t"
12373 $$emit$$"# L_end:\n\t"
12374 } else {
12375 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12376 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
12377 }
12378 %}
12379 ins_encode %{
12380 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
12381 $tmp$$XMMRegister, true, knoreg);
12382 %}
12383 ins_pipe(pipe_slow);
12384 %}
12385
12386 // Large non-constant length ClearArray for AVX512 targets.
12387 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
12388 Universe dummy, rFlagsReg cr)
12389 %{
12390 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
12391 match(Set dummy (ClearArray cnt base));
12392 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
12393
12394 format %{ $$template
12395 if (UseFastStosb) {
12396 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12397 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
12398 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
12399 } else if (UseXMMForObjInit) {
12400 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
12401 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
12402 $$emit$$"jmpq L_zero_64_bytes\n\t"
12403 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12404 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12405 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
12406 $$emit$$"add 0x40,rax\n\t"
12407 $$emit$$"# L_zero_64_bytes:\n\t"
12408 $$emit$$"sub 0x8,rcx\n\t"
12409 $$emit$$"jge L_loop\n\t"
12410 $$emit$$"add 0x4,rcx\n\t"
12411 $$emit$$"jl L_tail\n\t"
12412 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12413 $$emit$$"add 0x20,rax\n\t"
12414 $$emit$$"sub 0x4,rcx\n\t"
12415 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12416 $$emit$$"add 0x4,rcx\n\t"
12417 $$emit$$"jle L_end\n\t"
12418 $$emit$$"dec rcx\n\t"
12419 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12420 $$emit$$"vmovq xmm0,(rax)\n\t"
12421 $$emit$$"add 0x8,rax\n\t"
12422 $$emit$$"dec rcx\n\t"
12423 $$emit$$"jge L_sloop\n\t"
12424 $$emit$$"# L_end:\n\t"
12425 } else {
12426 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12427 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
12428 }
12429 %}
12430 ins_encode %{
12431 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
12432 $tmp$$XMMRegister, true, $ktmp$$KRegister);
12433 %}
12434 ins_pipe(pipe_slow);
12435 %}
12436
12437 // Small constant length ClearArray for AVX512 targets.
12438 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
12439 %{
12440 predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
12441 match(Set dummy (ClearArray cnt base));
12442 ins_cost(100);
12443 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
12444 format %{ "clear_mem_imm $base , $cnt \n\t" %}
12445 ins_encode %{
12446 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
12447 %}
12448 ins_pipe(pipe_slow);
12449 %}
12450
12451 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
12452 rax_RegI result, legRegD tmp1, rFlagsReg cr)
12453 %{
12454 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
12455 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12456 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
12457
12458 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
12459 ins_encode %{
12460 __ string_compare($str1$$Register, $str2$$Register,
12461 $cnt1$$Register, $cnt2$$Register, $result$$Register,
12462 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
12463 %}
12464 ins_pipe( pipe_slow );
12465 %}
12466
14299
14300 ins_cost(300);
14301 format %{ "call_leaf,runtime " %}
14302 ins_encode(clear_avx, Java_To_Runtime(meth));
14303 ins_pipe(pipe_slow);
14304 %}
14305
14306 // Call runtime without safepoint and with vector arguments
14307 instruct CallLeafDirectVector(method meth)
14308 %{
14309 match(CallLeafVector);
14310 effect(USE meth);
14311
14312 ins_cost(300);
14313 format %{ "call_leaf,vector " %}
14314 ins_encode(Java_To_Runtime(meth));
14315 ins_pipe(pipe_slow);
14316 %}
14317
14318 // Call runtime without safepoint
14319 instruct CallLeafNoFPDirect(method meth)
14320 %{
14321 match(CallLeafNoFP);
14322 effect(USE meth);
14323
14324 ins_cost(300);
14325 format %{ "call_leaf_nofp,runtime " %}
14326 ins_encode(clear_avx, Java_To_Runtime(meth));
14327 ins_pipe(pipe_slow);
14328 %}
14329
14330 // Return Instruction
14331 // Remove the return address & jump to it.
14332 // Notice: We always emit a nop after a ret to make sure there is room
14333 // for safepoint patching
14334 instruct Ret()
14335 %{
14336 match(Return);
14337
14338 format %{ "ret" %}
14339 ins_encode %{
14340 __ ret(0);
|
598 }
599
600 // !!!!! Special hack to get all types of calls to specify the byte offset
601 // from the start of the call to the point where the return address
602 // will point.
603 int MachCallStaticJavaNode::ret_addr_offset()
604 {
605 int offset = 5; // 5 bytes from start of call to where return address points
606 offset += clear_avx_size();
607 return offset;
608 }
609
610 int MachCallDynamicJavaNode::ret_addr_offset()
611 {
612 int offset = 15; // 15 bytes from start of call to where return address points
613 offset += clear_avx_size();
614 return offset;
615 }
616
617 int MachCallRuntimeNode::ret_addr_offset() {
618 if (_entry_point == nullptr) {
619 // CallLeafNoFPInDirect
620 return 3; // callq (register)
621 }
622 int offset = 13; // movq r10,#addr; callq (r10)
623 if (this->ideal_Opcode() != Op_CallLeafVector) {
624 offset += clear_avx_size();
625 }
626 return offset;
627 }
628
629 //
630 // Compute padding required for nodes which need alignment
631 //
632
633 // The address of the call instruction needs to be 4-byte aligned to
634 // ensure that it does not span a cache line so that it can be patched.
635 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
636 {
637 current_offset += clear_avx_size(); // skip vzeroupper
638 current_offset += 1; // skip call opcode byte
639 return align_up(current_offset, alignment_required()) - current_offset;
640 }
641
642 // The address of the call instruction needs to be 4-byte aligned to
643 // ensure that it does not span a cache line so that it can be patched.
644 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
645 {
646 current_offset += clear_avx_size(); // skip vzeroupper
647 current_offset += 11; // skip movq instruction + call opcode byte
648 return align_up(current_offset, alignment_required()) - current_offset;
834 st->print("\n\t");
835 st->print("# stack alignment check");
836 #endif
837 }
838 if (C->stub_function() != nullptr) {
839 st->print("\n\t");
840 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
841 st->print("\n\t");
842 st->print("je fast_entry\t");
843 st->print("\n\t");
844 st->print("call #nmethod_entry_barrier_stub\t");
845 st->print("\n\tfast_entry:");
846 }
847 st->cr();
848 }
849 #endif
850
851 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
852 Compile* C = ra_->C;
853
854 __ verified_entry(C);
855
856 if (ra_->C->stub_function() == nullptr) {
857 __ entry_barrier();
858 }
859
860 if (!Compile::current()->output()->in_scratch_emit_size()) {
861 __ bind(*_verified_entry);
862 }
863
864 C->output()->set_frame_complete(__ offset());
865
866 if (C->has_mach_constant_base_node()) {
867 // NOTE: We set the table base offset here because users might be
868 // emitted before MachConstantBaseNode.
869 ConstantTable& constant_table = C->output()->constant_table();
870 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
871 }
872 }
873
874 int MachPrologNode::reloc() const
875 {
876 return 0; // a large enough number
877 }
878
879 //=============================================================================
880 #ifndef PRODUCT
881 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
882 {
883 Compile* C = ra_->C;
884 if (generate_vzeroupper(C)) {
885 st->print("vzeroupper");
886 st->cr(); st->print("\t");
887 }
888
889 int framesize = C->output()->frame_size_in_bytes();
890 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
891 // Remove word for return adr already pushed
892 // and RBP
893 framesize -= 2*wordSize;
900 st->print_cr("popq rbp");
901 if (do_polling() && C->is_method_compilation()) {
902 st->print("\t");
903 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
904 "ja #safepoint_stub\t"
905 "# Safepoint: poll for GC");
906 }
907 }
908 #endif
909
910 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
911 {
912 Compile* C = ra_->C;
913
914 if (generate_vzeroupper(C)) {
915 // Clear upper bits of YMM registers when current compiled code uses
916 // wide vectors to avoid AVX <-> SSE transition penalty during call.
917 __ vzeroupper();
918 }
919
920 // Subtract two words to account for return address and rbp
921 int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
922 __ remove_frame(initial_framesize, C->needs_stack_repair());
923
924 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
925 __ reserved_stack_check();
926 }
927
928 if (do_polling() && C->is_method_compilation()) {
929 Label dummy_label;
930 Label* code_stub = &dummy_label;
931 if (!C->output()->in_scratch_emit_size()) {
932 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
933 C->output()->add_stub(stub);
934 code_stub = &stub->entry();
935 }
936 __ relocate(relocInfo::poll_return_type);
937 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
938 }
939 }
940
941 int MachEpilogNode::reloc() const
942 {
943 return 2; // a large enough number
944 }
945
946 const Pipeline* MachEpilogNode::pipeline() const
947 {
948 return MachNode::pipeline_class();
949 }
950
951 //=============================================================================
952
953 enum RC {
954 rc_bad,
955 rc_int,
956 rc_kreg,
957 rc_float,
958 rc_stack
959 };
960
1522 #endif
1523
1524 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1525 {
1526 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1527 int reg = ra_->get_encode(this);
1528
1529 __ lea(as_Register(reg), Address(rsp, offset));
1530 }
1531
1532 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1533 {
1534 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1535 if (ra_->get_encode(this) > 15) {
1536 return (offset < 0x80) ? 6 : 9; // REX2
1537 } else {
1538 return (offset < 0x80) ? 5 : 8; // REX
1539 }
1540 }
1541
1542 //=============================================================================
1543 #ifndef PRODUCT
1544 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1545 {
1546 st->print_cr("MachVEPNode");
1547 }
1548 #endif
1549
1550 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1551 {
1552 CodeBuffer* cbuf = masm->code();
1553 uint insts_size = cbuf->insts_size();
1554 if (!_verified) {
1555 __ ic_check(1);
1556 } else {
1557 // TODO 8284443 Avoid creation of temporary frame
1558 if (ra_->C->stub_function() == nullptr) {
1559 __ verified_entry(ra_->C, 0);
1560 __ entry_barrier();
1561 int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
1562 __ remove_frame(initial_framesize, false);
1563 }
1564 // Unpack inline type args passed as oop and then jump to
1565 // the verified entry point (skipping the unverified entry).
1566 int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
1567 // Emit code for verified entry and save increment for stack repair on return
1568 __ verified_entry(ra_->C, sp_inc);
1569 if (Compile::current()->output()->in_scratch_emit_size()) {
1570 Label dummy_verified_entry;
1571 __ jmp(dummy_verified_entry);
1572 } else {
1573 __ jmp(*_verified_entry);
1574 }
1575 }
1576 /* WARNING these NOPs are critical so that verified entry point is properly
1577 4 bytes aligned for patching by NativeJump::patch_verified_entry() */
1578 int nops_cnt = 4 - ((cbuf->insts_size() - insts_size) & 0x3);
1579 nops_cnt &= 0x3; // Do not add nops if code is aligned.
1580 if (nops_cnt > 0) {
1581 __ nop(nops_cnt);
1582 }
1583 }
1584
1585 //=============================================================================
1586 #ifndef PRODUCT
1587 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1588 {
1589 if (UseCompressedClassPointers) {
1590 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1591 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
1592 } else {
1593 st->print_cr("movq rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1594 st->print_cr("\tcmpq rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
1595 }
1596 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
1597 }
1598 #endif
1599
1600 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1601 {
1602 __ ic_check(InteriorEntryAlignment);
1603 }
1604
1605 //=============================================================================
1606
1607 bool Matcher::supports_vector_calling_convention(void) {
1608 return EnableVectorSupport;
1609 }
1610
1611 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
1612 assert(EnableVectorSupport, "sanity");
1613 int lo = XMM0_num;
1614 int hi = XMM0b_num;
1615 if (ideal_reg == Op_VecX) hi = XMM0d_num;
1616 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
1617 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
1618 return OptoRegPair(hi, lo);
1619 }
1620
1621 // Is this branch offset short enough that a short branch can be used?
1622 //
1623 // NOTE: If the platform does not provide any short branch variants, then
1624 // this method should return false for offset 0.
3055 %}
3056 %}
3057
3058 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
3059 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
3060 %{
3061 constraint(ALLOC_IN_RC(ptr_reg));
3062 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
3063 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
3064
3065 op_cost(10);
3066 format %{"[$reg + $off + $idx << $scale]" %}
3067 interface(MEMORY_INTER) %{
3068 base($reg);
3069 index($idx);
3070 scale($scale);
3071 disp($off);
3072 %}
3073 %}
3074
3075 // Indirect Narrow Oop Operand
3076 operand indCompressedOop(rRegN reg) %{
3077 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
3078 constraint(ALLOC_IN_RC(ptr_reg));
3079 match(DecodeN reg);
3080
3081 op_cost(10);
3082 format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
3083 interface(MEMORY_INTER) %{
3084 base(0xc); // R12
3085 index($reg);
3086 scale(0x3);
3087 disp(0x0);
3088 %}
3089 %}
3090
3091 // Indirect Narrow Oop Plus Offset Operand
3092 // Note: x86 architecture doesn't support "scale * index + offset" without a base
3093 // we can't free r12 even with CompressedOops::base() == nullptr.
3094 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
3095 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
3096 constraint(ALLOC_IN_RC(ptr_reg));
3097 match(AddP (DecodeN reg) off);
3098
3099 op_cost(10);
3100 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
3101 interface(MEMORY_INTER) %{
3102 base(0xc); // R12
3103 index($reg);
3104 scale(0x3);
3105 disp($off);
3106 %}
3107 %}
3108
3109 // Indirect Memory Operand
3110 operand indirectNarrow(rRegN reg)
3417 equal(0x4, "e");
3418 not_equal(0x5, "ne");
3419 less(0x2, "b");
3420 greater_equal(0x3, "ae");
3421 less_equal(0x6, "be");
3422 greater(0x7, "a");
3423 overflow(0x0, "o");
3424 no_overflow(0x1, "no");
3425 %}
3426 %}
3427
3428 //----------OPERAND CLASSES----------------------------------------------------
3429 // Operand Classes are groups of operands that are used as to simplify
3430 // instruction definitions by not requiring the AD writer to specify separate
3431 // instructions for every form of operand when the instruction accepts
3432 // multiple operand types with the same basic encoding and format. The classic
3433 // case of this is memory operands.
3434
3435 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
3436 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
3437 indCompressedOop, indCompressedOopOffset,
3438 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
3439 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
3440 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
3441
3442 //----------PIPELINE-----------------------------------------------------------
3443 // Rules which define the behavior of the target architectures pipeline.
3444 pipeline %{
3445
3446 //----------ATTRIBUTES---------------------------------------------------------
3447 attributes %{
3448 variable_size_instructions; // Fixed size instructions
3449 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
3450 instruction_unit_size = 1; // An instruction is 1 bytes long
3451 instruction_fetch_unit_size = 16; // The processor fetches one line
3452 instruction_fetch_units = 1; // of 16 bytes
3453
3454 // List of nop instructions
3455 nops( MachNop );
3456 %}
3457
5958 format %{ "MEMBAR-storestore (empty encoding)" %}
5959 ins_encode( );
5960 ins_pipe(empty);
5961 %}
5962
5963 //----------Move Instructions--------------------------------------------------
5964
5965 instruct castX2P(rRegP dst, rRegL src)
5966 %{
5967 match(Set dst (CastX2P src));
5968
5969 format %{ "movq $dst, $src\t# long->ptr" %}
5970 ins_encode %{
5971 if ($dst$$reg != $src$$reg) {
5972 __ movptr($dst$$Register, $src$$Register);
5973 }
5974 %}
5975 ins_pipe(ialu_reg_reg); // XXX
5976 %}
5977
5978 instruct castI2N(rRegN dst, rRegI src)
5979 %{
5980 match(Set dst (CastI2N src));
5981
5982 format %{ "movq $dst, $src\t# int -> narrow ptr" %}
5983 ins_encode %{
5984 if ($dst$$reg != $src$$reg) {
5985 __ movl($dst$$Register, $src$$Register);
5986 }
5987 %}
5988 ins_pipe(ialu_reg_reg); // XXX
5989 %}
5990
5991 instruct castN2X(rRegL dst, rRegN src)
5992 %{
5993 match(Set dst (CastP2X src));
5994
5995 format %{ "movq $dst, $src\t# ptr -> long" %}
5996 ins_encode %{
5997 if ($dst$$reg != $src$$reg) {
5998 __ movptr($dst$$Register, $src$$Register);
5999 }
6000 %}
6001 ins_pipe(ialu_reg_reg); // XXX
6002 %}
6003
6004 instruct castP2X(rRegL dst, rRegP src)
6005 %{
6006 match(Set dst (CastP2X src));
6007
6008 format %{ "movq $dst, $src\t# ptr -> long" %}
6009 ins_encode %{
6010 if ($dst$$reg != $src$$reg) {
6011 __ movptr($dst$$Register, $src$$Register);
6012 }
6013 %}
6014 ins_pipe(ialu_reg_reg); // XXX
6015 %}
6016
6017 // Convert oop into int for vectors alignment masking
6018 instruct convP2I(rRegI dst, rRegP src)
6019 %{
6020 match(Set dst (ConvL2I (CastP2X src)));
6021
6022 format %{ "movl $dst, $src\t# ptr -> int" %}
6023 ins_encode %{
12244 effect(DEF dst, USE src);
12245 ins_cost(100);
12246 format %{ "movd $dst,$src\t# MoveI2F" %}
12247 ins_encode %{
12248 __ movdl($dst$$XMMRegister, $src$$Register);
12249 %}
12250 ins_pipe( pipe_slow );
12251 %}
12252
12253 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
12254 match(Set dst (MoveL2D src));
12255 effect(DEF dst, USE src);
12256 ins_cost(100);
12257 format %{ "movd $dst,$src\t# MoveL2D" %}
12258 ins_encode %{
12259 __ movdq($dst$$XMMRegister, $src$$Register);
12260 %}
12261 ins_pipe( pipe_slow );
12262 %}
12263
12264
12265 // Fast clearing of an array
12266 // Small non-constant lenght ClearArray for non-AVX512 targets.
12267 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
12268 Universe dummy, rFlagsReg cr)
12269 %{
12270 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
12271 match(Set dummy (ClearArray (Binary cnt base) val));
12272 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
12273
12274 format %{ $$template
12275 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
12276 $$emit$$"jg LARGE\n\t"
12277 $$emit$$"dec rcx\n\t"
12278 $$emit$$"js DONE\t# Zero length\n\t"
12279 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
12280 $$emit$$"dec rcx\n\t"
12281 $$emit$$"jge LOOP\n\t"
12282 $$emit$$"jmp DONE\n\t"
12283 $$emit$$"# LARGE:\n\t"
12284 if (UseFastStosb) {
12285 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
12286 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
12287 } else if (UseXMMForObjInit) {
12288 $$emit$$"movdq $tmp, $val\n\t"
12289 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
12290 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
12291 $$emit$$"jmpq L_zero_64_bytes\n\t"
12292 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12293 $$emit$$"vmovdqu $tmp,(rax)\n\t"
12294 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
12295 $$emit$$"add 0x40,rax\n\t"
12296 $$emit$$"# L_zero_64_bytes:\n\t"
12297 $$emit$$"sub 0x8,rcx\n\t"
12298 $$emit$$"jge L_loop\n\t"
12299 $$emit$$"add 0x4,rcx\n\t"
12300 $$emit$$"jl L_tail\n\t"
12301 $$emit$$"vmovdqu $tmp,(rax)\n\t"
12302 $$emit$$"add 0x20,rax\n\t"
12303 $$emit$$"sub 0x4,rcx\n\t"
12304 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12305 $$emit$$"add 0x4,rcx\n\t"
12306 $$emit$$"jle L_end\n\t"
12307 $$emit$$"dec rcx\n\t"
12308 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12309 $$emit$$"vmovq xmm0,(rax)\n\t"
12310 $$emit$$"add 0x8,rax\n\t"
12311 $$emit$$"dec rcx\n\t"
12312 $$emit$$"jge L_sloop\n\t"
12313 $$emit$$"# L_end:\n\t"
12314 } else {
12315 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
12316 }
12317 $$emit$$"# DONE"
12318 %}
12319 ins_encode %{
12320 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12321 $tmp$$XMMRegister, false, false);
12322 %}
12323 ins_pipe(pipe_slow);
12324 %}
12325
12326 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
12327 Universe dummy, rFlagsReg cr)
12328 %{
12329 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
12330 match(Set dummy (ClearArray (Binary cnt base) val));
12331 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
12332
12333 format %{ $$template
12334 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
12335 $$emit$$"jg LARGE\n\t"
12336 $$emit$$"dec rcx\n\t"
12337 $$emit$$"js DONE\t# Zero length\n\t"
12338 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
12339 $$emit$$"dec rcx\n\t"
12340 $$emit$$"jge LOOP\n\t"
12341 $$emit$$"jmp DONE\n\t"
12342 $$emit$$"# LARGE:\n\t"
12343 if (UseXMMForObjInit) {
12344 $$emit$$"movdq $tmp, $val\n\t"
12345 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
12346 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
12347 $$emit$$"jmpq L_zero_64_bytes\n\t"
12348 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12349 $$emit$$"vmovdqu $tmp,(rax)\n\t"
12350 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
12351 $$emit$$"add 0x40,rax\n\t"
12352 $$emit$$"# L_zero_64_bytes:\n\t"
12353 $$emit$$"sub 0x8,rcx\n\t"
12354 $$emit$$"jge L_loop\n\t"
12355 $$emit$$"add 0x4,rcx\n\t"
12356 $$emit$$"jl L_tail\n\t"
12357 $$emit$$"vmovdqu $tmp,(rax)\n\t"
12358 $$emit$$"add 0x20,rax\n\t"
12359 $$emit$$"sub 0x4,rcx\n\t"
12360 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12361 $$emit$$"add 0x4,rcx\n\t"
12362 $$emit$$"jle L_end\n\t"
12363 $$emit$$"dec rcx\n\t"
12364 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12365 $$emit$$"vmovq xmm0,(rax)\n\t"
12366 $$emit$$"add 0x8,rax\n\t"
12367 $$emit$$"dec rcx\n\t"
12368 $$emit$$"jge L_sloop\n\t"
12369 $$emit$$"# L_end:\n\t"
12370 } else {
12371 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
12372 }
12373 $$emit$$"# DONE"
12374 %}
12375 ins_encode %{
12376 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12377 $tmp$$XMMRegister, false, true);
12378 %}
12379 ins_pipe(pipe_slow);
12380 %}
12381
12382 // Small non-constant length ClearArray for AVX512 targets.
12383 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
12384 Universe dummy, rFlagsReg cr)
12385 %{
12386 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
12387 match(Set dummy (ClearArray (Binary cnt base) val));
12388 ins_cost(125);
12389 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
12390
12391 format %{ $$template
12392 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12393 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
12394 $$emit$$"jg LARGE\n\t"
12395 $$emit$$"dec rcx\n\t"
12396 $$emit$$"js DONE\t# Zero length\n\t"
12397 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
12398 $$emit$$"dec rcx\n\t"
12399 $$emit$$"jge LOOP\n\t"
12400 $$emit$$"jmp DONE\n\t"
12401 $$emit$$"# LARGE:\n\t"
12402 if (UseFastStosb) {
12403 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
12404 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
12405 } else if (UseXMMForObjInit) {
12406 $$emit$$"mov rdi,rax\n\t"
12407 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
12408 $$emit$$"jmpq L_zero_64_bytes\n\t"
12409 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12417 $$emit$$"jl L_tail\n\t"
12418 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12419 $$emit$$"add 0x20,rax\n\t"
12420 $$emit$$"sub 0x4,rcx\n\t"
12421 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12422 $$emit$$"add 0x4,rcx\n\t"
12423 $$emit$$"jle L_end\n\t"
12424 $$emit$$"dec rcx\n\t"
12425 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12426 $$emit$$"vmovq xmm0,(rax)\n\t"
12427 $$emit$$"add 0x8,rax\n\t"
12428 $$emit$$"dec rcx\n\t"
12429 $$emit$$"jge L_sloop\n\t"
12430 $$emit$$"# L_end:\n\t"
12431 } else {
12432 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
12433 }
12434 $$emit$$"# DONE"
12435 %}
12436 ins_encode %{
12437 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12438 $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
12439 %}
12440 ins_pipe(pipe_slow);
12441 %}
12442
12443 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
12444 Universe dummy, rFlagsReg cr)
12445 %{
12446 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
12447 match(Set dummy (ClearArray (Binary cnt base) val));
12448 ins_cost(125);
12449 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
12450
12451 format %{ $$template
12452 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12453 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
12454 $$emit$$"jg LARGE\n\t"
12455 $$emit$$"dec rcx\n\t"
12456 $$emit$$"js DONE\t# Zero length\n\t"
12457 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
12458 $$emit$$"dec rcx\n\t"
12459 $$emit$$"jge LOOP\n\t"
12460 $$emit$$"jmp DONE\n\t"
12461 $$emit$$"# LARGE:\n\t"
12462 if (UseFastStosb) {
12463 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
12464 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
12465 } else if (UseXMMForObjInit) {
12466 $$emit$$"mov rdi,rax\n\t"
12467 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
12468 $$emit$$"jmpq L_zero_64_bytes\n\t"
12469 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12477 $$emit$$"jl L_tail\n\t"
12478 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12479 $$emit$$"add 0x20,rax\n\t"
12480 $$emit$$"sub 0x4,rcx\n\t"
12481 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12482 $$emit$$"add 0x4,rcx\n\t"
12483 $$emit$$"jle L_end\n\t"
12484 $$emit$$"dec rcx\n\t"
12485 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12486 $$emit$$"vmovq xmm0,(rax)\n\t"
12487 $$emit$$"add 0x8,rax\n\t"
12488 $$emit$$"dec rcx\n\t"
12489 $$emit$$"jge L_sloop\n\t"
12490 $$emit$$"# L_end:\n\t"
12491 } else {
12492 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
12493 }
12494 $$emit$$"# DONE"
12495 %}
12496 ins_encode %{
12497 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12498 $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
12499 %}
12500 ins_pipe(pipe_slow);
12501 %}
12502
12503 // Large non-constant length ClearArray for non-AVX512 targets.
12504 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
12505 Universe dummy, rFlagsReg cr)
12506 %{
12507 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
12508 match(Set dummy (ClearArray (Binary cnt base) val));
12509 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
12510
12511 format %{ $$template
12512 if (UseFastStosb) {
12513 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
12514 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
12515 } else if (UseXMMForObjInit) {
12516 $$emit$$"movdq $tmp, $val\n\t"
12517 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
12518 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
12519 $$emit$$"jmpq L_zero_64_bytes\n\t"
12520 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12521 $$emit$$"vmovdqu $tmp,(rax)\n\t"
12522 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
12523 $$emit$$"add 0x40,rax\n\t"
12524 $$emit$$"# L_zero_64_bytes:\n\t"
12525 $$emit$$"sub 0x8,rcx\n\t"
12526 $$emit$$"jge L_loop\n\t"
12527 $$emit$$"add 0x4,rcx\n\t"
12528 $$emit$$"jl L_tail\n\t"
12529 $$emit$$"vmovdqu $tmp,(rax)\n\t"
12530 $$emit$$"add 0x20,rax\n\t"
12531 $$emit$$"sub 0x4,rcx\n\t"
12532 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12533 $$emit$$"add 0x4,rcx\n\t"
12534 $$emit$$"jle L_end\n\t"
12535 $$emit$$"dec rcx\n\t"
12536 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12537 $$emit$$"vmovq xmm0,(rax)\n\t"
12538 $$emit$$"add 0x8,rax\n\t"
12539 $$emit$$"dec rcx\n\t"
12540 $$emit$$"jge L_sloop\n\t"
12541 $$emit$$"# L_end:\n\t"
12542 } else {
12543 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
12544 }
12545 %}
12546 ins_encode %{
12547 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12548 $tmp$$XMMRegister, true, false);
12549 %}
12550 ins_pipe(pipe_slow);
12551 %}
12552
12553 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
12554 Universe dummy, rFlagsReg cr)
12555 %{
12556 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
12557 match(Set dummy (ClearArray (Binary cnt base) val));
12558 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
12559
12560 format %{ $$template
12561 if (UseXMMForObjInit) {
12562 $$emit$$"movdq $tmp, $val\n\t"
12563 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
12564 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
12565 $$emit$$"jmpq L_zero_64_bytes\n\t"
12566 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12567 $$emit$$"vmovdqu $tmp,(rax)\n\t"
12568 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
12569 $$emit$$"add 0x40,rax\n\t"
12570 $$emit$$"# L_zero_64_bytes:\n\t"
12571 $$emit$$"sub 0x8,rcx\n\t"
12572 $$emit$$"jge L_loop\n\t"
12573 $$emit$$"add 0x4,rcx\n\t"
12574 $$emit$$"jl L_tail\n\t"
12575 $$emit$$"vmovdqu $tmp,(rax)\n\t"
12576 $$emit$$"add 0x20,rax\n\t"
12577 $$emit$$"sub 0x4,rcx\n\t"
12578 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12579 $$emit$$"add 0x4,rcx\n\t"
12580 $$emit$$"jle L_end\n\t"
12581 $$emit$$"dec rcx\n\t"
12582 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12583 $$emit$$"vmovq xmm0,(rax)\n\t"
12584 $$emit$$"add 0x8,rax\n\t"
12585 $$emit$$"dec rcx\n\t"
12586 $$emit$$"jge L_sloop\n\t"
12587 $$emit$$"# L_end:\n\t"
12588 } else {
12589 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
12590 }
12591 %}
12592 ins_encode %{
12593 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12594 $tmp$$XMMRegister, true, true);
12595 %}
12596 ins_pipe(pipe_slow);
12597 %}
12598
12599 // Large non-constant length ClearArray for AVX512 targets.
12600 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
12601 Universe dummy, rFlagsReg cr)
12602 %{
12603 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
12604 match(Set dummy (ClearArray (Binary cnt base) val));
12605 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
12606
12607 format %{ $$template
12608 if (UseFastStosb) {
12609 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12610 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
12611 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
12612 } else if (UseXMMForObjInit) {
12613 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
12614 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
12615 $$emit$$"jmpq L_zero_64_bytes\n\t"
12616 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12617 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12618 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
12619 $$emit$$"add 0x40,rax\n\t"
12620 $$emit$$"# L_zero_64_bytes:\n\t"
12621 $$emit$$"sub 0x8,rcx\n\t"
12622 $$emit$$"jge L_loop\n\t"
12623 $$emit$$"add 0x4,rcx\n\t"
12624 $$emit$$"jl L_tail\n\t"
12625 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12626 $$emit$$"add 0x20,rax\n\t"
12627 $$emit$$"sub 0x4,rcx\n\t"
12628 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12629 $$emit$$"add 0x4,rcx\n\t"
12630 $$emit$$"jle L_end\n\t"
12631 $$emit$$"dec rcx\n\t"
12632 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12633 $$emit$$"vmovq xmm0,(rax)\n\t"
12634 $$emit$$"add 0x8,rax\n\t"
12635 $$emit$$"dec rcx\n\t"
12636 $$emit$$"jge L_sloop\n\t"
12637 $$emit$$"# L_end:\n\t"
12638 } else {
12639 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12640 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
12641 }
12642 %}
12643 ins_encode %{
12644 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12645 $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
12646 %}
12647 ins_pipe(pipe_slow);
12648 %}
12649
12650 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
12651 Universe dummy, rFlagsReg cr)
12652 %{
12653 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
12654 match(Set dummy (ClearArray (Binary cnt base) val));
12655 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
12656
12657 format %{ $$template
12658 if (UseFastStosb) {
12659 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12660 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
12661 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
12662 } else if (UseXMMForObjInit) {
12663 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
12664 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
12665 $$emit$$"jmpq L_zero_64_bytes\n\t"
12666 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12667 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12668 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
12669 $$emit$$"add 0x40,rax\n\t"
12670 $$emit$$"# L_zero_64_bytes:\n\t"
12671 $$emit$$"sub 0x8,rcx\n\t"
12672 $$emit$$"jge L_loop\n\t"
12673 $$emit$$"add 0x4,rcx\n\t"
12674 $$emit$$"jl L_tail\n\t"
12675 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12676 $$emit$$"add 0x20,rax\n\t"
12677 $$emit$$"sub 0x4,rcx\n\t"
12678 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12679 $$emit$$"add 0x4,rcx\n\t"
12680 $$emit$$"jle L_end\n\t"
12681 $$emit$$"dec rcx\n\t"
12682 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12683 $$emit$$"vmovq xmm0,(rax)\n\t"
12684 $$emit$$"add 0x8,rax\n\t"
12685 $$emit$$"dec rcx\n\t"
12686 $$emit$$"jge L_sloop\n\t"
12687 $$emit$$"# L_end:\n\t"
12688 } else {
12689 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12690 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
12691 }
12692 %}
12693 ins_encode %{
12694 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12695 $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
12696 %}
12697 ins_pipe(pipe_slow);
12698 %}
12699
12700 // Small constant length ClearArray for AVX512 targets.
12701 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
12702 %{
12703 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
12704 ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
12705 match(Set dummy (ClearArray (Binary cnt base) val));
12706 ins_cost(100);
12707 effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
12708 format %{ "clear_mem_imm $base , $cnt \n\t" %}
12709 ins_encode %{
12710 __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
12711 %}
12712 ins_pipe(pipe_slow);
12713 %}
12714
12715 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
12716 rax_RegI result, legRegD tmp1, rFlagsReg cr)
12717 %{
12718 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
12719 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12720 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
12721
12722 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
12723 ins_encode %{
12724 __ string_compare($str1$$Register, $str2$$Register,
12725 $cnt1$$Register, $cnt2$$Register, $result$$Register,
12726 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
12727 %}
12728 ins_pipe( pipe_slow );
12729 %}
12730
14563
14564 ins_cost(300);
14565 format %{ "call_leaf,runtime " %}
14566 ins_encode(clear_avx, Java_To_Runtime(meth));
14567 ins_pipe(pipe_slow);
14568 %}
14569
14570 // Call runtime without safepoint and with vector arguments
14571 instruct CallLeafDirectVector(method meth)
14572 %{
14573 match(CallLeafVector);
14574 effect(USE meth);
14575
14576 ins_cost(300);
14577 format %{ "call_leaf,vector " %}
14578 ins_encode(Java_To_Runtime(meth));
14579 ins_pipe(pipe_slow);
14580 %}
14581
14582 // Call runtime without safepoint
14583 // entry point is null, target holds the address to call
14584 instruct CallLeafNoFPInDirect(rRegP target)
14585 %{
14586 predicate(n->as_Call()->entry_point() == nullptr);
14587 match(CallLeafNoFP target);
14588
14589 ins_cost(300);
14590 format %{ "call_leaf_nofp,runtime indirect " %}
14591 ins_encode %{
14592 __ call($target$$Register);
14593 %}
14594
14595 ins_pipe(pipe_slow);
14596 %}
14597
14598 instruct CallLeafNoFPDirect(method meth)
14599 %{
14600 predicate(n->as_Call()->entry_point() != nullptr);
14601 match(CallLeafNoFP);
14602 effect(USE meth);
14603
14604 ins_cost(300);
14605 format %{ "call_leaf_nofp,runtime " %}
14606 ins_encode(clear_avx, Java_To_Runtime(meth));
14607 ins_pipe(pipe_slow);
14608 %}
14609
14610 // Return Instruction
14611 // Remove the return address & jump to it.
14612 // Notice: We always emit a nop after a ret to make sure there is room
14613 // for safepoint patching
14614 instruct Ret()
14615 %{
14616 match(Return);
14617
14618 format %{ "ret" %}
14619 ins_encode %{
14620 __ ret(0);
|