598 }
599
600 // !!!!! Special hack to get all types of calls to specify the byte offset
601 // from the start of the call to the point where the return address
602 // will point.
603 int MachCallStaticJavaNode::ret_addr_offset()
604 {
605 int offset = 5; // 5 bytes from start of call to where return address points
606 offset += clear_avx_size();
607 return offset;
608 }
609
610 int MachCallDynamicJavaNode::ret_addr_offset()
611 {
612 int offset = 15; // 15 bytes from start of call to where return address points
613 offset += clear_avx_size();
614 return offset;
615 }
616
617 int MachCallRuntimeNode::ret_addr_offset() {
618 int offset = 13; // movq r10,#addr; callq (r10)
619 if (this->ideal_Opcode() != Op_CallLeafVector) {
620 offset += clear_avx_size();
621 }
622 return offset;
623 }
624 //
625 // Compute padding required for nodes which need alignment
626 //
627
628 // The address of the call instruction needs to be 4-byte aligned to
629 // ensure that it does not span a cache line so that it can be patched.
630 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
631 {
632 current_offset += clear_avx_size(); // skip vzeroupper
633 current_offset += 1; // skip call opcode byte
634 return align_up(current_offset, alignment_required()) - current_offset;
635 }
636
637 // The address of the call instruction needs to be 4-byte aligned to
638 // ensure that it does not span a cache line so that it can be patched.
639 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
640 {
641 current_offset += clear_avx_size(); // skip vzeroupper
642 current_offset += 11; // skip movq instruction + call opcode byte
643 return align_up(current_offset, alignment_required()) - current_offset;
829 st->print("\n\t");
830 st->print("# stack alignment check");
831 #endif
832 }
833 if (C->stub_function() != nullptr) {
834 st->print("\n\t");
835 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
836 st->print("\n\t");
837 st->print("je fast_entry\t");
838 st->print("\n\t");
839 st->print("call #nmethod_entry_barrier_stub\t");
840 st->print("\n\tfast_entry:");
841 }
842 st->cr();
843 }
844 #endif
845
846 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
847 Compile* C = ra_->C;
848
849 int framesize = C->output()->frame_size_in_bytes();
850 int bangsize = C->output()->bang_size_in_bytes();
851
852 if (C->clinit_barrier_on_entry()) {
853 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
854 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
855
856 Label L_skip_barrier;
857 Register klass = rscratch1;
858
859 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
860 __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
861
862 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
863
864 __ bind(L_skip_barrier);
865 }
866
867 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
868
869 C->output()->set_frame_complete(__ offset());
870
871 if (C->has_mach_constant_base_node()) {
872 // NOTE: We set the table base offset here because users might be
873 // emitted before MachConstantBaseNode.
874 ConstantTable& constant_table = C->output()->constant_table();
875 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
876 }
877 }
878
879 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
880 {
881 return MachNode::size(ra_); // too many variables; just compute it
882 // the hard way
883 }
884
885 int MachPrologNode::reloc() const
886 {
887 return 0; // a large enough number
888 }
889
890 //=============================================================================
891 #ifndef PRODUCT
892 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
893 {
894 Compile* C = ra_->C;
895 if (generate_vzeroupper(C)) {
896 st->print("vzeroupper");
897 st->cr(); st->print("\t");
898 }
899
900 int framesize = C->output()->frame_size_in_bytes();
901 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
902 // Remove word for return adr already pushed
903 // and RBP
904 framesize -= 2*wordSize;
911 st->print_cr("popq rbp");
912 if (do_polling() && C->is_method_compilation()) {
913 st->print("\t");
914 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
915 "ja #safepoint_stub\t"
916 "# Safepoint: poll for GC");
917 }
918 }
919 #endif
920
921 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
922 {
923 Compile* C = ra_->C;
924
925 if (generate_vzeroupper(C)) {
926 // Clear upper bits of YMM registers when current compiled code uses
927 // wide vectors to avoid AVX <-> SSE transition penalty during call.
928 __ vzeroupper();
929 }
930
931 int framesize = C->output()->frame_size_in_bytes();
932 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
933 // Remove word for return adr already pushed
934 // and RBP
935 framesize -= 2*wordSize;
936
937 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
938
939 if (framesize) {
940 __ addq(rsp, framesize);
941 }
942
943 __ popq(rbp);
944
945 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
946 __ reserved_stack_check();
947 }
948
949 if (do_polling() && C->is_method_compilation()) {
950 Label dummy_label;
951 Label* code_stub = &dummy_label;
952 if (!C->output()->in_scratch_emit_size()) {
953 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
954 C->output()->add_stub(stub);
955 code_stub = &stub->entry();
956 }
957 __ relocate(relocInfo::poll_return_type);
958 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
959 }
960 }
961
962 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
963 {
964 return MachNode::size(ra_); // too many variables; just compute it
965 // the hard way
966 }
967
968 int MachEpilogNode::reloc() const
969 {
970 return 2; // a large enough number
971 }
972
973 const Pipeline* MachEpilogNode::pipeline() const
974 {
975 return MachNode::pipeline_class();
976 }
977
978 //=============================================================================
979
980 enum RC {
981 rc_bad,
982 rc_int,
983 rc_kreg,
984 rc_float,
985 rc_stack
986 };
987
1549 #endif
1550
1551 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1552 {
1553 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1554 int reg = ra_->get_encode(this);
1555
1556 __ lea(as_Register(reg), Address(rsp, offset));
1557 }
1558
1559 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1560 {
1561 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1562 if (ra_->get_encode(this) > 15) {
1563 return (offset < 0x80) ? 6 : 9; // REX2
1564 } else {
1565 return (offset < 0x80) ? 5 : 8; // REX
1566 }
1567 }
1568
1569 //=============================================================================
1570 #ifndef PRODUCT
1571 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1572 {
1573 if (UseCompressedClassPointers) {
1574 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1575 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
1576 } else {
1577 st->print_cr("movq rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1578 st->print_cr("\tcmpq rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
1579 }
1580 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
1581 }
1582 #endif
1583
1584 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1585 {
1586 __ ic_check(InteriorEntryAlignment);
1587 }
1588
1589 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1590 {
1591 return MachNode::size(ra_); // too many variables; just compute it
1592 // the hard way
1593 }
1594
1595
1596 //=============================================================================
1597
1598 bool Matcher::supports_vector_calling_convention(void) {
1599 return EnableVectorSupport;
1600 }
1601
1602 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
1603 assert(EnableVectorSupport, "sanity");
1604 int lo = XMM0_num;
1605 int hi = XMM0b_num;
1606 if (ideal_reg == Op_VecX) hi = XMM0d_num;
1607 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
1608 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
1609 return OptoRegPair(hi, lo);
1610 }
1611
1612 // Is this branch offset short enough that a short branch can be used?
1613 //
1614 // NOTE: If the platform does not provide any short branch variants, then
1615 // this method should return false for offset 0.
3045 %}
3046 %}
3047
3048 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
3049 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
3050 %{
3051 constraint(ALLOC_IN_RC(ptr_reg));
3052 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
3053 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
3054
3055 op_cost(10);
3056 format %{"[$reg + $off + $idx << $scale]" %}
3057 interface(MEMORY_INTER) %{
3058 base($reg);
3059 index($idx);
3060 scale($scale);
3061 disp($off);
3062 %}
3063 %}
3064
3065 // Indirect Narrow Oop Plus Offset Operand
3066 // Note: x86 architecture doesn't support "scale * index + offset" without a base
3067 // we can't free r12 even with CompressedOops::base() == nullptr.
3068 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
3069 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
3070 constraint(ALLOC_IN_RC(ptr_reg));
3071 match(AddP (DecodeN reg) off);
3072
3073 op_cost(10);
3074 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
3075 interface(MEMORY_INTER) %{
3076 base(0xc); // R12
3077 index($reg);
3078 scale(0x3);
3079 disp($off);
3080 %}
3081 %}
3082
3083 // Indirect Memory Operand
3084 operand indirectNarrow(rRegN reg)
3391 equal(0x4, "e");
3392 not_equal(0x5, "ne");
3393 less(0x2, "b");
3394 greater_equal(0x3, "ae");
3395 less_equal(0x6, "be");
3396 greater(0x7, "a");
3397 overflow(0x0, "o");
3398 no_overflow(0x1, "no");
3399 %}
3400 %}
3401
3402 //----------OPERAND CLASSES----------------------------------------------------
3403 // Operand Classes are groups of operands that are used as to simplify
3404 // instruction definitions by not requiring the AD writer to specify separate
3405 // instructions for every form of operand when the instruction accepts
3406 // multiple operand types with the same basic encoding and format. The classic
3407 // case of this is memory operands.
3408
3409 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
3410 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
3411 indCompressedOopOffset,
3412 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
3413 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
3414 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
3415
3416 //----------PIPELINE-----------------------------------------------------------
3417 // Rules which define the behavior of the target architectures pipeline.
3418 pipeline %{
3419
3420 //----------ATTRIBUTES---------------------------------------------------------
3421 attributes %{
3422 variable_size_instructions; // Fixed size instructions
3423 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
3424 instruction_unit_size = 1; // An instruction is 1 bytes long
3425 instruction_fetch_unit_size = 16; // The processor fetches one line
3426 instruction_fetch_units = 1; // of 16 bytes
3427 %}
3428
3429 //----------RESOURCES----------------------------------------------------------
3430 // Resources are the functional units available to the machine
3431
5973 format %{ "MEMBAR-storestore (empty encoding)" %}
5974 ins_encode( );
5975 ins_pipe(empty);
5976 %}
5977
5978 //----------Move Instructions--------------------------------------------------
5979
5980 instruct castX2P(rRegP dst, rRegL src)
5981 %{
5982 match(Set dst (CastX2P src));
5983
5984 format %{ "movq $dst, $src\t# long->ptr" %}
5985 ins_encode %{
5986 if ($dst$$reg != $src$$reg) {
5987 __ movptr($dst$$Register, $src$$Register);
5988 }
5989 %}
5990 ins_pipe(ialu_reg_reg); // XXX
5991 %}
5992
5993 instruct castP2X(rRegL dst, rRegP src)
5994 %{
5995 match(Set dst (CastP2X src));
5996
5997 format %{ "movq $dst, $src\t# ptr -> long" %}
5998 ins_encode %{
5999 if ($dst$$reg != $src$$reg) {
6000 __ movptr($dst$$Register, $src$$Register);
6001 }
6002 %}
6003 ins_pipe(ialu_reg_reg); // XXX
6004 %}
6005
6006 // Convert oop into int for vectors alignment masking
6007 instruct convP2I(rRegI dst, rRegP src)
6008 %{
6009 match(Set dst (ConvL2I (CastP2X src)));
6010
6011 format %{ "movl $dst, $src\t# ptr -> int" %}
6012 ins_encode %{
12196 effect(DEF dst, USE src);
12197 ins_cost(100);
12198 format %{ "movd $dst,$src\t# MoveI2F" %}
12199 ins_encode %{
12200 __ movdl($dst$$XMMRegister, $src$$Register);
12201 %}
12202 ins_pipe( pipe_slow );
12203 %}
12204
12205 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
12206 match(Set dst (MoveL2D src));
12207 effect(DEF dst, USE src);
12208 ins_cost(100);
12209 format %{ "movd $dst,$src\t# MoveL2D" %}
12210 ins_encode %{
12211 __ movdq($dst$$XMMRegister, $src$$Register);
12212 %}
12213 ins_pipe( pipe_slow );
12214 %}
12215
12216 // Fast clearing of an array
12217 // Small non-constant lenght ClearArray for non-AVX512 targets.
12218 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
12219 Universe dummy, rFlagsReg cr)
12220 %{
12221 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
12222 match(Set dummy (ClearArray cnt base));
12223 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
12224
12225 format %{ $$template
12226 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12227 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
12228 $$emit$$"jg LARGE\n\t"
12229 $$emit$$"dec rcx\n\t"
12230 $$emit$$"js DONE\t# Zero length\n\t"
12231 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
12232 $$emit$$"dec rcx\n\t"
12233 $$emit$$"jge LOOP\n\t"
12234 $$emit$$"jmp DONE\n\t"
12235 $$emit$$"# LARGE:\n\t"
12236 if (UseFastStosb) {
12237 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
12238 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
12239 } else if (UseXMMForObjInit) {
12240 $$emit$$"mov rdi,rax\n\t"
12241 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
12242 $$emit$$"jmpq L_zero_64_bytes\n\t"
12243 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12251 $$emit$$"jl L_tail\n\t"
12252 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12253 $$emit$$"add 0x20,rax\n\t"
12254 $$emit$$"sub 0x4,rcx\n\t"
12255 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12256 $$emit$$"add 0x4,rcx\n\t"
12257 $$emit$$"jle L_end\n\t"
12258 $$emit$$"dec rcx\n\t"
12259 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12260 $$emit$$"vmovq xmm0,(rax)\n\t"
12261 $$emit$$"add 0x8,rax\n\t"
12262 $$emit$$"dec rcx\n\t"
12263 $$emit$$"jge L_sloop\n\t"
12264 $$emit$$"# L_end:\n\t"
12265 } else {
12266 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
12267 }
12268 $$emit$$"# DONE"
12269 %}
12270 ins_encode %{
12271 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
12272 $tmp$$XMMRegister, false, knoreg);
12273 %}
12274 ins_pipe(pipe_slow);
12275 %}
12276
12277 // Small non-constant length ClearArray for AVX512 targets.
12278 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
12279 Universe dummy, rFlagsReg cr)
12280 %{
12281 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
12282 match(Set dummy (ClearArray cnt base));
12283 ins_cost(125);
12284 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
12285
12286 format %{ $$template
12287 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12288 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
12289 $$emit$$"jg LARGE\n\t"
12290 $$emit$$"dec rcx\n\t"
12291 $$emit$$"js DONE\t# Zero length\n\t"
12292 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
12293 $$emit$$"dec rcx\n\t"
12294 $$emit$$"jge LOOP\n\t"
12295 $$emit$$"jmp DONE\n\t"
12296 $$emit$$"# LARGE:\n\t"
12297 if (UseFastStosb) {
12298 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
12299 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
12300 } else if (UseXMMForObjInit) {
12301 $$emit$$"mov rdi,rax\n\t"
12302 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
12303 $$emit$$"jmpq L_zero_64_bytes\n\t"
12304 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12312 $$emit$$"jl L_tail\n\t"
12313 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12314 $$emit$$"add 0x20,rax\n\t"
12315 $$emit$$"sub 0x4,rcx\n\t"
12316 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12317 $$emit$$"add 0x4,rcx\n\t"
12318 $$emit$$"jle L_end\n\t"
12319 $$emit$$"dec rcx\n\t"
12320 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12321 $$emit$$"vmovq xmm0,(rax)\n\t"
12322 $$emit$$"add 0x8,rax\n\t"
12323 $$emit$$"dec rcx\n\t"
12324 $$emit$$"jge L_sloop\n\t"
12325 $$emit$$"# L_end:\n\t"
12326 } else {
12327 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
12328 }
12329 $$emit$$"# DONE"
12330 %}
12331 ins_encode %{
12332 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
12333 $tmp$$XMMRegister, false, $ktmp$$KRegister);
12334 %}
12335 ins_pipe(pipe_slow);
12336 %}
12337
12338 // Large non-constant length ClearArray for non-AVX512 targets.
12339 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
12340 Universe dummy, rFlagsReg cr)
12341 %{
12342 predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
12343 match(Set dummy (ClearArray cnt base));
12344 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
12345
12346 format %{ $$template
12347 if (UseFastStosb) {
12348 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12349 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
12350 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
12351 } else if (UseXMMForObjInit) {
12352 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
12353 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
12354 $$emit$$"jmpq L_zero_64_bytes\n\t"
12355 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12356 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12357 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
12358 $$emit$$"add 0x40,rax\n\t"
12359 $$emit$$"# L_zero_64_bytes:\n\t"
12360 $$emit$$"sub 0x8,rcx\n\t"
12361 $$emit$$"jge L_loop\n\t"
12362 $$emit$$"add 0x4,rcx\n\t"
12363 $$emit$$"jl L_tail\n\t"
12364 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12365 $$emit$$"add 0x20,rax\n\t"
12366 $$emit$$"sub 0x4,rcx\n\t"
12367 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12368 $$emit$$"add 0x4,rcx\n\t"
12369 $$emit$$"jle L_end\n\t"
12370 $$emit$$"dec rcx\n\t"
12371 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12372 $$emit$$"vmovq xmm0,(rax)\n\t"
12373 $$emit$$"add 0x8,rax\n\t"
12374 $$emit$$"dec rcx\n\t"
12375 $$emit$$"jge L_sloop\n\t"
12376 $$emit$$"# L_end:\n\t"
12377 } else {
12378 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12379 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
12380 }
12381 %}
12382 ins_encode %{
12383 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
12384 $tmp$$XMMRegister, true, knoreg);
12385 %}
12386 ins_pipe(pipe_slow);
12387 %}
12388
12389 // Large non-constant length ClearArray for AVX512 targets.
12390 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
12391 Universe dummy, rFlagsReg cr)
12392 %{
12393 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
12394 match(Set dummy (ClearArray cnt base));
12395 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
12396
12397 format %{ $$template
12398 if (UseFastStosb) {
12399 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12400 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
12401 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
12402 } else if (UseXMMForObjInit) {
12403 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
12404 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
12405 $$emit$$"jmpq L_zero_64_bytes\n\t"
12406 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12407 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12408 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
12409 $$emit$$"add 0x40,rax\n\t"
12410 $$emit$$"# L_zero_64_bytes:\n\t"
12411 $$emit$$"sub 0x8,rcx\n\t"
12412 $$emit$$"jge L_loop\n\t"
12413 $$emit$$"add 0x4,rcx\n\t"
12414 $$emit$$"jl L_tail\n\t"
12415 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12416 $$emit$$"add 0x20,rax\n\t"
12417 $$emit$$"sub 0x4,rcx\n\t"
12418 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12419 $$emit$$"add 0x4,rcx\n\t"
12420 $$emit$$"jle L_end\n\t"
12421 $$emit$$"dec rcx\n\t"
12422 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12423 $$emit$$"vmovq xmm0,(rax)\n\t"
12424 $$emit$$"add 0x8,rax\n\t"
12425 $$emit$$"dec rcx\n\t"
12426 $$emit$$"jge L_sloop\n\t"
12427 $$emit$$"# L_end:\n\t"
12428 } else {
12429 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12430 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
12431 }
12432 %}
12433 ins_encode %{
12434 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
12435 $tmp$$XMMRegister, true, $ktmp$$KRegister);
12436 %}
12437 ins_pipe(pipe_slow);
12438 %}
12439
12440 // Small constant length ClearArray for AVX512 targets.
12441 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
12442 %{
12443 predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
12444 match(Set dummy (ClearArray cnt base));
12445 ins_cost(100);
12446 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
12447 format %{ "clear_mem_imm $base , $cnt \n\t" %}
12448 ins_encode %{
12449 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
12450 %}
12451 ins_pipe(pipe_slow);
12452 %}
12453
12454 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
12455 rax_RegI result, legRegD tmp1, rFlagsReg cr)
12456 %{
12457 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
12458 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12459 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
12460
12461 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
12462 ins_encode %{
12463 __ string_compare($str1$$Register, $str2$$Register,
12464 $cnt1$$Register, $cnt2$$Register, $result$$Register,
12465 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
12466 %}
12467 ins_pipe( pipe_slow );
12468 %}
12469
14275
14276 ins_cost(300);
14277 format %{ "call_leaf,runtime " %}
14278 ins_encode(clear_avx, Java_To_Runtime(meth));
14279 ins_pipe(pipe_slow);
14280 %}
14281
14282 // Call runtime without safepoint and with vector arguments
14283 instruct CallLeafDirectVector(method meth)
14284 %{
14285 match(CallLeafVector);
14286 effect(USE meth);
14287
14288 ins_cost(300);
14289 format %{ "call_leaf,vector " %}
14290 ins_encode(Java_To_Runtime(meth));
14291 ins_pipe(pipe_slow);
14292 %}
14293
14294 // Call runtime without safepoint
14295 instruct CallLeafNoFPDirect(method meth)
14296 %{
14297 match(CallLeafNoFP);
14298 effect(USE meth);
14299
14300 ins_cost(300);
14301 format %{ "call_leaf_nofp,runtime " %}
14302 ins_encode(clear_avx, Java_To_Runtime(meth));
14303 ins_pipe(pipe_slow);
14304 %}
14305
14306 // Return Instruction
14307 // Remove the return address & jump to it.
14308 // Notice: We always emit a nop after a ret to make sure there is room
14309 // for safepoint patching
14310 instruct Ret()
14311 %{
14312 match(Return);
14313
14314 format %{ "ret" %}
14315 ins_encode %{
14316 __ ret(0);
|
598 }
599
600 // !!!!! Special hack to get all types of calls to specify the byte offset
601 // from the start of the call to the point where the return address
602 // will point.
603 int MachCallStaticJavaNode::ret_addr_offset()
604 {
605 int offset = 5; // 5 bytes from start of call to where return address points
606 offset += clear_avx_size();
607 return offset;
608 }
609
610 int MachCallDynamicJavaNode::ret_addr_offset()
611 {
612 int offset = 15; // 15 bytes from start of call to where return address points
613 offset += clear_avx_size();
614 return offset;
615 }
616
617 int MachCallRuntimeNode::ret_addr_offset() {
618 if (_entry_point == nullptr) {
619 // CallLeafNoFPInDirect
620 return 3; // callq (register)
621 }
622 int offset = 13; // movq r10,#addr; callq (r10)
623 if (this->ideal_Opcode() != Op_CallLeafVector) {
624 offset += clear_avx_size();
625 }
626 return offset;
627 }
628
629 //
630 // Compute padding required for nodes which need alignment
631 //
632
633 // The address of the call instruction needs to be 4-byte aligned to
634 // ensure that it does not span a cache line so that it can be patched.
635 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
636 {
637 current_offset += clear_avx_size(); // skip vzeroupper
638 current_offset += 1; // skip call opcode byte
639 return align_up(current_offset, alignment_required()) - current_offset;
640 }
641
642 // The address of the call instruction needs to be 4-byte aligned to
643 // ensure that it does not span a cache line so that it can be patched.
644 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
645 {
646 current_offset += clear_avx_size(); // skip vzeroupper
647 current_offset += 11; // skip movq instruction + call opcode byte
648 return align_up(current_offset, alignment_required()) - current_offset;
834 st->print("\n\t");
835 st->print("# stack alignment check");
836 #endif
837 }
838 if (C->stub_function() != nullptr) {
839 st->print("\n\t");
840 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
841 st->print("\n\t");
842 st->print("je fast_entry\t");
843 st->print("\n\t");
844 st->print("call #nmethod_entry_barrier_stub\t");
845 st->print("\n\tfast_entry:");
846 }
847 st->cr();
848 }
849 #endif
850
851 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
852 Compile* C = ra_->C;
853
854 __ verified_entry(C);
855
856 if (ra_->C->stub_function() == nullptr) {
857 __ entry_barrier();
858 }
859
860 if (!Compile::current()->output()->in_scratch_emit_size()) {
861 __ bind(*_verified_entry);
862 }
863
864 C->output()->set_frame_complete(__ offset());
865
866 if (C->has_mach_constant_base_node()) {
867 // NOTE: We set the table base offset here because users might be
868 // emitted before MachConstantBaseNode.
869 ConstantTable& constant_table = C->output()->constant_table();
870 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
871 }
872 }
873
874 int MachPrologNode::reloc() const
875 {
876 return 0; // a large enough number
877 }
878
879 //=============================================================================
880 #ifndef PRODUCT
881 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
882 {
883 Compile* C = ra_->C;
884 if (generate_vzeroupper(C)) {
885 st->print("vzeroupper");
886 st->cr(); st->print("\t");
887 }
888
889 int framesize = C->output()->frame_size_in_bytes();
890 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
891 // Remove word for return adr already pushed
892 // and RBP
893 framesize -= 2*wordSize;
900 st->print_cr("popq rbp");
901 if (do_polling() && C->is_method_compilation()) {
902 st->print("\t");
903 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
904 "ja #safepoint_stub\t"
905 "# Safepoint: poll for GC");
906 }
907 }
908 #endif
909
910 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
911 {
912 Compile* C = ra_->C;
913
914 if (generate_vzeroupper(C)) {
915 // Clear upper bits of YMM registers when current compiled code uses
916 // wide vectors to avoid AVX <-> SSE transition penalty during call.
917 __ vzeroupper();
918 }
919
920 // Subtract two words to account for return address and rbp
921 int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
922 __ remove_frame(initial_framesize, C->needs_stack_repair());
923
924 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
925 __ reserved_stack_check();
926 }
927
928 if (do_polling() && C->is_method_compilation()) {
929 Label dummy_label;
930 Label* code_stub = &dummy_label;
931 if (!C->output()->in_scratch_emit_size()) {
932 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
933 C->output()->add_stub(stub);
934 code_stub = &stub->entry();
935 }
936 __ relocate(relocInfo::poll_return_type);
937 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
938 }
939 }
940
941 int MachEpilogNode::reloc() const
942 {
943 return 2; // a large enough number
944 }
945
946 const Pipeline* MachEpilogNode::pipeline() const
947 {
948 return MachNode::pipeline_class();
949 }
950
951 //=============================================================================
952
953 enum RC {
954 rc_bad,
955 rc_int,
956 rc_kreg,
957 rc_float,
958 rc_stack
959 };
960
1522 #endif
1523
1524 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1525 {
1526 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1527 int reg = ra_->get_encode(this);
1528
1529 __ lea(as_Register(reg), Address(rsp, offset));
1530 }
1531
1532 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1533 {
1534 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1535 if (ra_->get_encode(this) > 15) {
1536 return (offset < 0x80) ? 6 : 9; // REX2
1537 } else {
1538 return (offset < 0x80) ? 5 : 8; // REX
1539 }
1540 }
1541
1542 //=============================================================================
1543 #ifndef PRODUCT
1544 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1545 {
1546 st->print_cr("MachVEPNode");
1547 }
1548 #endif
1549
1550 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1551 {
1552 CodeBuffer* cbuf = masm->code();
1553 uint insts_size = cbuf->insts_size();
1554 if (!_verified) {
1555 __ ic_check(1);
1556 } else {
1557 // TODO 8284443 Avoid creation of temporary frame
1558 if (ra_->C->stub_function() == nullptr) {
1559 __ verified_entry(ra_->C, 0);
1560 __ entry_barrier();
1561 int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
1562 __ remove_frame(initial_framesize, false);
1563 }
1564 // Unpack inline type args passed as oop and then jump to
1565 // the verified entry point (skipping the unverified entry).
1566 int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
1567 // Emit code for verified entry and save increment for stack repair on return
1568 __ verified_entry(ra_->C, sp_inc);
1569 if (Compile::current()->output()->in_scratch_emit_size()) {
1570 Label dummy_verified_entry;
1571 __ jmp(dummy_verified_entry);
1572 } else {
1573 __ jmp(*_verified_entry);
1574 }
1575 }
1576 /* WARNING these NOPs are critical so that verified entry point is properly
1577 4 bytes aligned for patching by NativeJump::patch_verified_entry() */
1578 int nops_cnt = 4 - ((cbuf->insts_size() - insts_size) & 0x3);
1579 nops_cnt &= 0x3; // Do not add nops if code is aligned.
1580 if (nops_cnt > 0) {
1581 __ nop(nops_cnt);
1582 }
1583 }
1584
1585 //=============================================================================
1586 #ifndef PRODUCT
1587 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1588 {
1589 if (UseCompressedClassPointers) {
1590 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1591 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
1592 } else {
1593 st->print_cr("movq rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1594 st->print_cr("\tcmpq rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
1595 }
1596 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
1597 }
1598 #endif
1599
1600 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1601 {
1602 __ ic_check(InteriorEntryAlignment);
1603 }
1604
1605 //=============================================================================
1606
1607 bool Matcher::supports_vector_calling_convention(void) {
1608 return EnableVectorSupport;
1609 }
1610
1611 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
1612 assert(EnableVectorSupport, "sanity");
1613 int lo = XMM0_num;
1614 int hi = XMM0b_num;
1615 if (ideal_reg == Op_VecX) hi = XMM0d_num;
1616 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
1617 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
1618 return OptoRegPair(hi, lo);
1619 }
1620
1621 // Is this branch offset short enough that a short branch can be used?
1622 //
1623 // NOTE: If the platform does not provide any short branch variants, then
1624 // this method should return false for offset 0.
3054 %}
3055 %}
3056
3057 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
3058 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
3059 %{
3060 constraint(ALLOC_IN_RC(ptr_reg));
3061 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
3062 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
3063
3064 op_cost(10);
3065 format %{"[$reg + $off + $idx << $scale]" %}
3066 interface(MEMORY_INTER) %{
3067 base($reg);
3068 index($idx);
3069 scale($scale);
3070 disp($off);
3071 %}
3072 %}
3073
3074 // Indirect Narrow Oop Operand
3075 operand indCompressedOop(rRegN reg) %{
3076 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
3077 constraint(ALLOC_IN_RC(ptr_reg));
3078 match(DecodeN reg);
3079
3080 op_cost(10);
3081 format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
3082 interface(MEMORY_INTER) %{
3083 base(0xc); // R12
3084 index($reg);
3085 scale(0x3);
3086 disp(0x0);
3087 %}
3088 %}
3089
3090 // Indirect Narrow Oop Plus Offset Operand
3091 // Note: x86 architecture doesn't support "scale * index + offset" without a base
3092 // we can't free r12 even with CompressedOops::base() == nullptr.
3093 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
3094 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
3095 constraint(ALLOC_IN_RC(ptr_reg));
3096 match(AddP (DecodeN reg) off);
3097
3098 op_cost(10);
3099 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
3100 interface(MEMORY_INTER) %{
3101 base(0xc); // R12
3102 index($reg);
3103 scale(0x3);
3104 disp($off);
3105 %}
3106 %}
3107
3108 // Indirect Memory Operand
3109 operand indirectNarrow(rRegN reg)
3416 equal(0x4, "e");
3417 not_equal(0x5, "ne");
3418 less(0x2, "b");
3419 greater_equal(0x3, "ae");
3420 less_equal(0x6, "be");
3421 greater(0x7, "a");
3422 overflow(0x0, "o");
3423 no_overflow(0x1, "no");
3424 %}
3425 %}
3426
3427 //----------OPERAND CLASSES----------------------------------------------------
3428 // Operand Classes are groups of operands that are used as to simplify
3429 // instruction definitions by not requiring the AD writer to specify separate
3430 // instructions for every form of operand when the instruction accepts
3431 // multiple operand types with the same basic encoding and format. The classic
3432 // case of this is memory operands.
3433
3434 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
3435 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
3436 indCompressedOop, indCompressedOopOffset,
3437 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
3438 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
3439 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
3440
3441 //----------PIPELINE-----------------------------------------------------------
3442 // Rules which define the behavior of the target architectures pipeline.
3443 pipeline %{
3444
3445 //----------ATTRIBUTES---------------------------------------------------------
3446 attributes %{
3447 variable_size_instructions; // Fixed size instructions
3448 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
3449 instruction_unit_size = 1; // An instruction is 1 bytes long
3450 instruction_fetch_unit_size = 16; // The processor fetches one line
3451 instruction_fetch_units = 1; // of 16 bytes
3452 %}
3453
3454 //----------RESOURCES----------------------------------------------------------
3455 // Resources are the functional units available to the machine
3456
5998 format %{ "MEMBAR-storestore (empty encoding)" %}
5999 ins_encode( );
6000 ins_pipe(empty);
6001 %}
6002
6003 //----------Move Instructions--------------------------------------------------
6004
6005 instruct castX2P(rRegP dst, rRegL src)
6006 %{
6007 match(Set dst (CastX2P src));
6008
6009 format %{ "movq $dst, $src\t# long->ptr" %}
6010 ins_encode %{
6011 if ($dst$$reg != $src$$reg) {
6012 __ movptr($dst$$Register, $src$$Register);
6013 }
6014 %}
6015 ins_pipe(ialu_reg_reg); // XXX
6016 %}
6017
6018 instruct castI2N(rRegN dst, rRegI src)
6019 %{
6020 match(Set dst (CastI2N src));
6021
6022 format %{ "movq $dst, $src\t# int -> narrow ptr" %}
6023 ins_encode %{
6024 if ($dst$$reg != $src$$reg) {
6025 __ movl($dst$$Register, $src$$Register);
6026 }
6027 %}
6028 ins_pipe(ialu_reg_reg); // XXX
6029 %}
6030
6031 instruct castN2X(rRegL dst, rRegN src)
6032 %{
6033 match(Set dst (CastP2X src));
6034
6035 format %{ "movq $dst, $src\t# ptr -> long" %}
6036 ins_encode %{
6037 if ($dst$$reg != $src$$reg) {
6038 __ movptr($dst$$Register, $src$$Register);
6039 }
6040 %}
6041 ins_pipe(ialu_reg_reg); // XXX
6042 %}
6043
6044 instruct castP2X(rRegL dst, rRegP src)
6045 %{
6046 match(Set dst (CastP2X src));
6047
6048 format %{ "movq $dst, $src\t# ptr -> long" %}
6049 ins_encode %{
6050 if ($dst$$reg != $src$$reg) {
6051 __ movptr($dst$$Register, $src$$Register);
6052 }
6053 %}
6054 ins_pipe(ialu_reg_reg); // XXX
6055 %}
6056
6057 // Convert oop into int for vectors alignment masking
6058 instruct convP2I(rRegI dst, rRegP src)
6059 %{
6060 match(Set dst (ConvL2I (CastP2X src)));
6061
6062 format %{ "movl $dst, $src\t# ptr -> int" %}
6063 ins_encode %{
12247 effect(DEF dst, USE src);
12248 ins_cost(100);
12249 format %{ "movd $dst,$src\t# MoveI2F" %}
12250 ins_encode %{
12251 __ movdl($dst$$XMMRegister, $src$$Register);
12252 %}
12253 ins_pipe( pipe_slow );
12254 %}
12255
12256 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
12257 match(Set dst (MoveL2D src));
12258 effect(DEF dst, USE src);
12259 ins_cost(100);
12260 format %{ "movd $dst,$src\t# MoveL2D" %}
12261 ins_encode %{
12262 __ movdq($dst$$XMMRegister, $src$$Register);
12263 %}
12264 ins_pipe( pipe_slow );
12265 %}
12266
12267
12268 // Fast clearing of an array
12269 // Small non-constant lenght ClearArray for non-AVX512 targets.
12270 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
12271 Universe dummy, rFlagsReg cr)
12272 %{
12273 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
12274 match(Set dummy (ClearArray (Binary cnt base) val));
12275 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
12276
12277 format %{ $$template
12278 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
12279 $$emit$$"jg LARGE\n\t"
12280 $$emit$$"dec rcx\n\t"
12281 $$emit$$"js DONE\t# Zero length\n\t"
12282 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
12283 $$emit$$"dec rcx\n\t"
12284 $$emit$$"jge LOOP\n\t"
12285 $$emit$$"jmp DONE\n\t"
12286 $$emit$$"# LARGE:\n\t"
12287 if (UseFastStosb) {
12288 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
12289 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
12290 } else if (UseXMMForObjInit) {
12291 $$emit$$"movdq $tmp, $val\n\t"
12292 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
12293 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
12294 $$emit$$"jmpq L_zero_64_bytes\n\t"
12295 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12296 $$emit$$"vmovdqu $tmp,(rax)\n\t"
12297 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
12298 $$emit$$"add 0x40,rax\n\t"
12299 $$emit$$"# L_zero_64_bytes:\n\t"
12300 $$emit$$"sub 0x8,rcx\n\t"
12301 $$emit$$"jge L_loop\n\t"
12302 $$emit$$"add 0x4,rcx\n\t"
12303 $$emit$$"jl L_tail\n\t"
12304 $$emit$$"vmovdqu $tmp,(rax)\n\t"
12305 $$emit$$"add 0x20,rax\n\t"
12306 $$emit$$"sub 0x4,rcx\n\t"
12307 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12308 $$emit$$"add 0x4,rcx\n\t"
12309 $$emit$$"jle L_end\n\t"
12310 $$emit$$"dec rcx\n\t"
12311 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12312 $$emit$$"vmovq xmm0,(rax)\n\t"
12313 $$emit$$"add 0x8,rax\n\t"
12314 $$emit$$"dec rcx\n\t"
12315 $$emit$$"jge L_sloop\n\t"
12316 $$emit$$"# L_end:\n\t"
12317 } else {
12318 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
12319 }
12320 $$emit$$"# DONE"
12321 %}
12322 ins_encode %{
12323 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12324 $tmp$$XMMRegister, false, false);
12325 %}
12326 ins_pipe(pipe_slow);
12327 %}
12328
12329 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
12330 Universe dummy, rFlagsReg cr)
12331 %{
12332 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
12333 match(Set dummy (ClearArray (Binary cnt base) val));
12334 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
12335
12336 format %{ $$template
12337 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
12338 $$emit$$"jg LARGE\n\t"
12339 $$emit$$"dec rcx\n\t"
12340 $$emit$$"js DONE\t# Zero length\n\t"
12341 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
12342 $$emit$$"dec rcx\n\t"
12343 $$emit$$"jge LOOP\n\t"
12344 $$emit$$"jmp DONE\n\t"
12345 $$emit$$"# LARGE:\n\t"
12346 if (UseXMMForObjInit) {
12347 $$emit$$"movdq $tmp, $val\n\t"
12348 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
12349 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
12350 $$emit$$"jmpq L_zero_64_bytes\n\t"
12351 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12352 $$emit$$"vmovdqu $tmp,(rax)\n\t"
12353 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
12354 $$emit$$"add 0x40,rax\n\t"
12355 $$emit$$"# L_zero_64_bytes:\n\t"
12356 $$emit$$"sub 0x8,rcx\n\t"
12357 $$emit$$"jge L_loop\n\t"
12358 $$emit$$"add 0x4,rcx\n\t"
12359 $$emit$$"jl L_tail\n\t"
12360 $$emit$$"vmovdqu $tmp,(rax)\n\t"
12361 $$emit$$"add 0x20,rax\n\t"
12362 $$emit$$"sub 0x4,rcx\n\t"
12363 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12364 $$emit$$"add 0x4,rcx\n\t"
12365 $$emit$$"jle L_end\n\t"
12366 $$emit$$"dec rcx\n\t"
12367 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12368 $$emit$$"vmovq xmm0,(rax)\n\t"
12369 $$emit$$"add 0x8,rax\n\t"
12370 $$emit$$"dec rcx\n\t"
12371 $$emit$$"jge L_sloop\n\t"
12372 $$emit$$"# L_end:\n\t"
12373 } else {
12374 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
12375 }
12376 $$emit$$"# DONE"
12377 %}
12378 ins_encode %{
12379 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12380 $tmp$$XMMRegister, false, true);
12381 %}
12382 ins_pipe(pipe_slow);
12383 %}
12384
12385 // Small non-constant length ClearArray for AVX512 targets.
12386 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
12387 Universe dummy, rFlagsReg cr)
12388 %{
12389 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
12390 match(Set dummy (ClearArray (Binary cnt base) val));
12391 ins_cost(125);
12392 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
12393
12394 format %{ $$template
12395 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12396 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
12397 $$emit$$"jg LARGE\n\t"
12398 $$emit$$"dec rcx\n\t"
12399 $$emit$$"js DONE\t# Zero length\n\t"
12400 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
12401 $$emit$$"dec rcx\n\t"
12402 $$emit$$"jge LOOP\n\t"
12403 $$emit$$"jmp DONE\n\t"
12404 $$emit$$"# LARGE:\n\t"
12405 if (UseFastStosb) {
12406 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
12407 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
12408 } else if (UseXMMForObjInit) {
12409 $$emit$$"mov rdi,rax\n\t"
12410 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
12411 $$emit$$"jmpq L_zero_64_bytes\n\t"
12412 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12420 $$emit$$"jl L_tail\n\t"
12421 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12422 $$emit$$"add 0x20,rax\n\t"
12423 $$emit$$"sub 0x4,rcx\n\t"
12424 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12425 $$emit$$"add 0x4,rcx\n\t"
12426 $$emit$$"jle L_end\n\t"
12427 $$emit$$"dec rcx\n\t"
12428 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12429 $$emit$$"vmovq xmm0,(rax)\n\t"
12430 $$emit$$"add 0x8,rax\n\t"
12431 $$emit$$"dec rcx\n\t"
12432 $$emit$$"jge L_sloop\n\t"
12433 $$emit$$"# L_end:\n\t"
12434 } else {
12435 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
12436 }
12437 $$emit$$"# DONE"
12438 %}
12439 ins_encode %{
12440 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12441 $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
12442 %}
12443 ins_pipe(pipe_slow);
12444 %}
12445
12446 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
12447 Universe dummy, rFlagsReg cr)
12448 %{
12449 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
12450 match(Set dummy (ClearArray (Binary cnt base) val));
12451 ins_cost(125);
12452 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
12453
12454 format %{ $$template
12455 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12456 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
12457 $$emit$$"jg LARGE\n\t"
12458 $$emit$$"dec rcx\n\t"
12459 $$emit$$"js DONE\t# Zero length\n\t"
12460 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
12461 $$emit$$"dec rcx\n\t"
12462 $$emit$$"jge LOOP\n\t"
12463 $$emit$$"jmp DONE\n\t"
12464 $$emit$$"# LARGE:\n\t"
12465 if (UseFastStosb) {
12466 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
12467 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
12468 } else if (UseXMMForObjInit) {
12469 $$emit$$"mov rdi,rax\n\t"
12470 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
12471 $$emit$$"jmpq L_zero_64_bytes\n\t"
12472 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12480 $$emit$$"jl L_tail\n\t"
12481 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12482 $$emit$$"add 0x20,rax\n\t"
12483 $$emit$$"sub 0x4,rcx\n\t"
12484 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12485 $$emit$$"add 0x4,rcx\n\t"
12486 $$emit$$"jle L_end\n\t"
12487 $$emit$$"dec rcx\n\t"
12488 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12489 $$emit$$"vmovq xmm0,(rax)\n\t"
12490 $$emit$$"add 0x8,rax\n\t"
12491 $$emit$$"dec rcx\n\t"
12492 $$emit$$"jge L_sloop\n\t"
12493 $$emit$$"# L_end:\n\t"
12494 } else {
12495 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
12496 }
12497 $$emit$$"# DONE"
12498 %}
12499 ins_encode %{
12500 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12501 $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
12502 %}
12503 ins_pipe(pipe_slow);
12504 %}
12505
12506 // Large non-constant length ClearArray for non-AVX512 targets.
12507 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
12508 Universe dummy, rFlagsReg cr)
12509 %{
12510 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
12511 match(Set dummy (ClearArray (Binary cnt base) val));
12512 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
12513
12514 format %{ $$template
12515 if (UseFastStosb) {
12516 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
12517 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
12518 } else if (UseXMMForObjInit) {
12519 $$emit$$"movdq $tmp, $val\n\t"
12520 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
12521 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
12522 $$emit$$"jmpq L_zero_64_bytes\n\t"
12523 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12524 $$emit$$"vmovdqu $tmp,(rax)\n\t"
12525 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
12526 $$emit$$"add 0x40,rax\n\t"
12527 $$emit$$"# L_zero_64_bytes:\n\t"
12528 $$emit$$"sub 0x8,rcx\n\t"
12529 $$emit$$"jge L_loop\n\t"
12530 $$emit$$"add 0x4,rcx\n\t"
12531 $$emit$$"jl L_tail\n\t"
12532 $$emit$$"vmovdqu $tmp,(rax)\n\t"
12533 $$emit$$"add 0x20,rax\n\t"
12534 $$emit$$"sub 0x4,rcx\n\t"
12535 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12536 $$emit$$"add 0x4,rcx\n\t"
12537 $$emit$$"jle L_end\n\t"
12538 $$emit$$"dec rcx\n\t"
12539 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12540 $$emit$$"vmovq xmm0,(rax)\n\t"
12541 $$emit$$"add 0x8,rax\n\t"
12542 $$emit$$"dec rcx\n\t"
12543 $$emit$$"jge L_sloop\n\t"
12544 $$emit$$"# L_end:\n\t"
12545 } else {
12546 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
12547 }
12548 %}
12549 ins_encode %{
12550 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12551 $tmp$$XMMRegister, true, false);
12552 %}
12553 ins_pipe(pipe_slow);
12554 %}
12555
12556 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
12557 Universe dummy, rFlagsReg cr)
12558 %{
12559 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
12560 match(Set dummy (ClearArray (Binary cnt base) val));
12561 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
12562
12563 format %{ $$template
12564 if (UseXMMForObjInit) {
12565 $$emit$$"movdq $tmp, $val\n\t"
12566 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
12567 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
12568 $$emit$$"jmpq L_zero_64_bytes\n\t"
12569 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12570 $$emit$$"vmovdqu $tmp,(rax)\n\t"
12571 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
12572 $$emit$$"add 0x40,rax\n\t"
12573 $$emit$$"# L_zero_64_bytes:\n\t"
12574 $$emit$$"sub 0x8,rcx\n\t"
12575 $$emit$$"jge L_loop\n\t"
12576 $$emit$$"add 0x4,rcx\n\t"
12577 $$emit$$"jl L_tail\n\t"
12578 $$emit$$"vmovdqu $tmp,(rax)\n\t"
12579 $$emit$$"add 0x20,rax\n\t"
12580 $$emit$$"sub 0x4,rcx\n\t"
12581 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12582 $$emit$$"add 0x4,rcx\n\t"
12583 $$emit$$"jle L_end\n\t"
12584 $$emit$$"dec rcx\n\t"
12585 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12586 $$emit$$"vmovq xmm0,(rax)\n\t"
12587 $$emit$$"add 0x8,rax\n\t"
12588 $$emit$$"dec rcx\n\t"
12589 $$emit$$"jge L_sloop\n\t"
12590 $$emit$$"# L_end:\n\t"
12591 } else {
12592 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
12593 }
12594 %}
12595 ins_encode %{
12596 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12597 $tmp$$XMMRegister, true, true);
12598 %}
12599 ins_pipe(pipe_slow);
12600 %}
12601
12602 // Large non-constant length ClearArray for AVX512 targets.
12603 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
12604 Universe dummy, rFlagsReg cr)
12605 %{
12606 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
12607 match(Set dummy (ClearArray (Binary cnt base) val));
12608 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
12609
12610 format %{ $$template
12611 if (UseFastStosb) {
12612 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12613 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
12614 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
12615 } else if (UseXMMForObjInit) {
12616 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
12617 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
12618 $$emit$$"jmpq L_zero_64_bytes\n\t"
12619 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12620 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12621 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
12622 $$emit$$"add 0x40,rax\n\t"
12623 $$emit$$"# L_zero_64_bytes:\n\t"
12624 $$emit$$"sub 0x8,rcx\n\t"
12625 $$emit$$"jge L_loop\n\t"
12626 $$emit$$"add 0x4,rcx\n\t"
12627 $$emit$$"jl L_tail\n\t"
12628 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12629 $$emit$$"add 0x20,rax\n\t"
12630 $$emit$$"sub 0x4,rcx\n\t"
12631 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12632 $$emit$$"add 0x4,rcx\n\t"
12633 $$emit$$"jle L_end\n\t"
12634 $$emit$$"dec rcx\n\t"
12635 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12636 $$emit$$"vmovq xmm0,(rax)\n\t"
12637 $$emit$$"add 0x8,rax\n\t"
12638 $$emit$$"dec rcx\n\t"
12639 $$emit$$"jge L_sloop\n\t"
12640 $$emit$$"# L_end:\n\t"
12641 } else {
12642 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12643 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
12644 }
12645 %}
12646 ins_encode %{
12647 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12648 $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
12649 %}
12650 ins_pipe(pipe_slow);
12651 %}
12652
12653 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
12654 Universe dummy, rFlagsReg cr)
12655 %{
12656 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
12657 match(Set dummy (ClearArray (Binary cnt base) val));
12658 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
12659
12660 format %{ $$template
12661 if (UseFastStosb) {
12662 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12663 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
12664 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
12665 } else if (UseXMMForObjInit) {
12666 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
12667 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
12668 $$emit$$"jmpq L_zero_64_bytes\n\t"
12669 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12670 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12671 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
12672 $$emit$$"add 0x40,rax\n\t"
12673 $$emit$$"# L_zero_64_bytes:\n\t"
12674 $$emit$$"sub 0x8,rcx\n\t"
12675 $$emit$$"jge L_loop\n\t"
12676 $$emit$$"add 0x4,rcx\n\t"
12677 $$emit$$"jl L_tail\n\t"
12678 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12679 $$emit$$"add 0x20,rax\n\t"
12680 $$emit$$"sub 0x4,rcx\n\t"
12681 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12682 $$emit$$"add 0x4,rcx\n\t"
12683 $$emit$$"jle L_end\n\t"
12684 $$emit$$"dec rcx\n\t"
12685 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12686 $$emit$$"vmovq xmm0,(rax)\n\t"
12687 $$emit$$"add 0x8,rax\n\t"
12688 $$emit$$"dec rcx\n\t"
12689 $$emit$$"jge L_sloop\n\t"
12690 $$emit$$"# L_end:\n\t"
12691 } else {
12692 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12693 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
12694 }
12695 %}
12696 ins_encode %{
12697 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12698 $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
12699 %}
12700 ins_pipe(pipe_slow);
12701 %}
12702
12703 // Small constant length ClearArray for AVX512 targets.
12704 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
12705 %{
12706 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
12707 ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
12708 match(Set dummy (ClearArray (Binary cnt base) val));
12709 ins_cost(100);
12710 effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
12711 format %{ "clear_mem_imm $base , $cnt \n\t" %}
12712 ins_encode %{
12713 __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
12714 %}
12715 ins_pipe(pipe_slow);
12716 %}
12717
12718 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
12719 rax_RegI result, legRegD tmp1, rFlagsReg cr)
12720 %{
12721 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
12722 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12723 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
12724
12725 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
12726 ins_encode %{
12727 __ string_compare($str1$$Register, $str2$$Register,
12728 $cnt1$$Register, $cnt2$$Register, $result$$Register,
12729 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
12730 %}
12731 ins_pipe( pipe_slow );
12732 %}
12733
14539
14540 ins_cost(300);
14541 format %{ "call_leaf,runtime " %}
14542 ins_encode(clear_avx, Java_To_Runtime(meth));
14543 ins_pipe(pipe_slow);
14544 %}
14545
14546 // Call runtime without safepoint and with vector arguments
14547 instruct CallLeafDirectVector(method meth)
14548 %{
14549 match(CallLeafVector);
14550 effect(USE meth);
14551
14552 ins_cost(300);
14553 format %{ "call_leaf,vector " %}
14554 ins_encode(Java_To_Runtime(meth));
14555 ins_pipe(pipe_slow);
14556 %}
14557
14558 // Call runtime without safepoint
14559 // entry point is null, target holds the address to call
14560 instruct CallLeafNoFPInDirect(rRegP target)
14561 %{
14562 predicate(n->as_Call()->entry_point() == nullptr);
14563 match(CallLeafNoFP target);
14564
14565 ins_cost(300);
14566 format %{ "call_leaf_nofp,runtime indirect " %}
14567 ins_encode %{
14568 __ call($target$$Register);
14569 %}
14570
14571 ins_pipe(pipe_slow);
14572 %}
14573
14574 instruct CallLeafNoFPDirect(method meth)
14575 %{
14576 predicate(n->as_Call()->entry_point() != nullptr);
14577 match(CallLeafNoFP);
14578 effect(USE meth);
14579
14580 ins_cost(300);
14581 format %{ "call_leaf_nofp,runtime " %}
14582 ins_encode(clear_avx, Java_To_Runtime(meth));
14583 ins_pipe(pipe_slow);
14584 %}
14585
14586 // Return Instruction
14587 // Remove the return address & jump to it.
14588 // Notice: We always emit a nop after a ret to make sure there is room
14589 // for safepoint patching
14590 instruct Ret()
14591 %{
14592 match(Return);
14593
14594 format %{ "ret" %}
14595 ins_encode %{
14596 __ ret(0);
|