< prev index next >

src/hotspot/cpu/x86/x86_64.ad

Print this page

  473 }
  474 
  475 // !!!!! Special hack to get all types of calls to specify the byte offset
  476 //       from the start of the call to the point where the return address
  477 //       will point.
  478 int MachCallStaticJavaNode::ret_addr_offset()
  479 {
  480   int offset = 5; // 5 bytes from start of call to where return address points
  481   offset += clear_avx_size();
  482   return offset;
  483 }
  484 
  485 int MachCallDynamicJavaNode::ret_addr_offset()
  486 {
  487   int offset = 15; // 15 bytes from start of call to where return address points
  488   offset += clear_avx_size();
  489   return offset;
  490 }
  491 
  492 int MachCallRuntimeNode::ret_addr_offset() {




  493   int offset = 13; // movq r10,#addr; callq (r10)
  494   if (this->ideal_Opcode() != Op_CallLeafVector) {
  495     offset += clear_avx_size();
  496   }
  497   return offset;
  498 }

  499 //
  500 // Compute padding required for nodes which need alignment
  501 //
  502 
  503 // The address of the call instruction needs to be 4-byte aligned to
  504 // ensure that it does not span a cache line so that it can be patched.
  505 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
  506 {
  507   current_offset += clear_avx_size(); // skip vzeroupper
  508   current_offset += 1; // skip call opcode byte
  509   return align_up(current_offset, alignment_required()) - current_offset;
  510 }
  511 
  512 // The address of the call instruction needs to be 4-byte aligned to
  513 // ensure that it does not span a cache line so that it can be patched.
  514 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
  515 {
  516   current_offset += clear_avx_size(); // skip vzeroupper
  517   current_offset += 11; // skip movq instruction + call opcode byte
  518   return align_up(current_offset, alignment_required()) - current_offset;

  887     st->print("# stack alignment check");
  888 #endif
  889   }
  890   if (C->stub_function() != NULL && BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
  891     st->print("\n\t");
  892     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
  893     st->print("\n\t");
  894     st->print("je      fast_entry\t");
  895     st->print("\n\t");
  896     st->print("call    #nmethod_entry_barrier_stub\t");
  897     st->print("\n\tfast_entry:");
  898   }
  899   st->cr();
  900 }
  901 #endif
  902 
  903 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  904   Compile* C = ra_->C;
  905   C2_MacroAssembler _masm(&cbuf);
  906 
  907   int framesize = C->output()->frame_size_in_bytes();
  908   int bangsize = C->output()->bang_size_in_bytes();
  909 
  910   if (C->clinit_barrier_on_entry()) {
  911     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
  912     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
  913 
  914     Label L_skip_barrier;
  915     Register klass = rscratch1;
  916 
  917     __ mov_metadata(klass, C->method()->holder()->constant_encoding());
  918     __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/);
  919 
  920     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
  921 
  922     __ bind(L_skip_barrier);

  923   }
  924 
  925   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != NULL);


  926 
  927   C->output()->set_frame_complete(cbuf.insts_size());
  928 
  929   if (C->has_mach_constant_base_node()) {
  930     // NOTE: We set the table base offset here because users might be
  931     // emitted before MachConstantBaseNode.
  932     ConstantTable& constant_table = C->output()->constant_table();
  933     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  934   }
  935 }
  936 
  937 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
  938 {
  939   return MachNode::size(ra_); // too many variables; just compute it
  940                               // the hard way
  941 }
  942 
  943 int MachPrologNode::reloc() const
  944 {
  945   return 0; // a large enough number
  946 }
  947 
  948 //=============================================================================
  949 #ifndef PRODUCT
  950 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
  951 {
  952   Compile* C = ra_->C;
  953   if (generate_vzeroupper(C)) {
  954     st->print("vzeroupper");
  955     st->cr(); st->print("\t");
  956   }
  957 
  958   int framesize = C->output()->frame_size_in_bytes();
  959   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  960   // Remove word for return adr already pushed
  961   // and RBP
  962   framesize -= 2*wordSize;

  970   if (do_polling() && C->is_method_compilation()) {
  971     st->print("\t");
  972     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
  973                  "ja      #safepoint_stub\t"
  974                  "# Safepoint: poll for GC");
  975   }
  976 }
  977 #endif
  978 
  979 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
  980 {
  981   Compile* C = ra_->C;
  982   MacroAssembler _masm(&cbuf);
  983 
  984   if (generate_vzeroupper(C)) {
  985     // Clear upper bits of YMM registers when current compiled code uses
  986     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  987     __ vzeroupper();
  988   }
  989 
  990   int framesize = C->output()->frame_size_in_bytes();
  991   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  992   // Remove word for return adr already pushed
  993   // and RBP
  994   framesize -= 2*wordSize;
  995 
  996   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
  997 
  998   if (framesize) {
  999     emit_opcode(cbuf, Assembler::REX_W);
 1000     if (framesize < 0x80) {
 1001       emit_opcode(cbuf, 0x83); // addq rsp, #framesize
 1002       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
 1003       emit_d8(cbuf, framesize);
 1004     } else {
 1005       emit_opcode(cbuf, 0x81); // addq rsp, #framesize
 1006       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
 1007       emit_d32(cbuf, framesize);
 1008     }
 1009   }
 1010 
 1011   // popq rbp
 1012   emit_opcode(cbuf, 0x58 | RBP_enc);
 1013 
 1014   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1015     __ reserved_stack_check();
 1016   }
 1017 
 1018   if (do_polling() && C->is_method_compilation()) {
 1019     MacroAssembler _masm(&cbuf);
 1020     Label dummy_label;
 1021     Label* code_stub = &dummy_label;
 1022     if (!C->output()->in_scratch_emit_size()) {
 1023       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1024       C->output()->add_stub(stub);
 1025       code_stub = &stub->entry();
 1026     }
 1027     __ relocate(relocInfo::poll_return_type);
 1028     __ safepoint_poll(*code_stub, r15_thread, true /* at_return */, true /* in_nmethod */);
 1029   }
 1030 }
 1031 
 1032 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
 1033 {
 1034   return MachNode::size(ra_); // too many variables; just compute it
 1035                               // the hard way
 1036 }
 1037 
 1038 int MachEpilogNode::reloc() const
 1039 {
 1040   return 2; // a large enough number
 1041 }
 1042 
 1043 const Pipeline* MachEpilogNode::pipeline() const
 1044 {
 1045   return MachNode::pipeline_class();
 1046 }
 1047 
 1048 //=============================================================================
 1049 
 1050 enum RC {
 1051   rc_bad,
 1052   rc_int,
 1053   rc_kreg,
 1054   rc_float,
 1055   rc_stack
 1056 };
 1057 

 1650     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
 1651     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
 1652     emit_rm(cbuf, 0x2, reg & 7, 0x04);
 1653     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
 1654     emit_d32(cbuf, offset);
 1655   } else {
 1656     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
 1657     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
 1658     emit_rm(cbuf, 0x1, reg & 7, 0x04);
 1659     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
 1660     emit_d8(cbuf, offset);
 1661   }
 1662 }
 1663 
 1664 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 1665 {
 1666   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1667   return (offset < 0x80) ? 5 : 8; // REX
 1668 }
 1669 

















































 1670 //=============================================================================
 1671 #ifndef PRODUCT
 1672 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1673 {
 1674   if (UseCompressedClassPointers) {
 1675     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 1676     st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
 1677     st->print_cr("\tcmpq    rax, rscratch1\t # Inline cache check");
 1678   } else {
 1679     st->print_cr("\tcmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
 1680                  "# Inline cache check");
 1681   }
 1682   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 1683   st->print_cr("\tnop\t# nops to align entry point");
 1684 }
 1685 #endif
 1686 
 1687 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 1688 {
 1689   MacroAssembler masm(&cbuf);

 1692     masm.load_klass(rscratch1, j_rarg0, rscratch2);
 1693     masm.cmpptr(rax, rscratch1);
 1694   } else {
 1695     masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
 1696   }
 1697 
 1698   masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 1699 
 1700   /* WARNING these NOPs are critical so that verified entry point is properly
 1701      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
 1702   int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
 1703   if (OptoBreakpoint) {
 1704     // Leave space for int3
 1705     nops_cnt -= 1;
 1706   }
 1707   nops_cnt &= 0x3; // Do not add nops if code is aligned.
 1708   if (nops_cnt > 0)
 1709     masm.nop(nops_cnt);
 1710 }
 1711 
 1712 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 1713 {
 1714   return MachNode::size(ra_); // too many variables; just compute it
 1715                               // the hard way
 1716 }
 1717 
 1718 
 1719 //=============================================================================
 1720 
 1721 const bool Matcher::supports_vector_calling_convention(void) {
 1722   if (EnableVectorSupport && UseVectorStubs) {
 1723     return true;
 1724   }
 1725   return false;
 1726 }
 1727 
 1728 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1729   assert(EnableVectorSupport && UseVectorStubs, "sanity");
 1730   int lo = XMM0_num;
 1731   int hi = XMM0b_num;
 1732   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 1733   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 1734   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 1735   return OptoRegPair(hi, lo);
 1736 }
 1737 
 1738 // Is this branch offset short enough that a short branch can be used?

 3977   %}
 3978 %}
 3979 
 3980 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 3981 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 3982 %{
 3983   constraint(ALLOC_IN_RC(ptr_reg));
 3984   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 3985   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 3986 
 3987   op_cost(10);
 3988   format %{"[$reg + $off + $idx << $scale]" %}
 3989   interface(MEMORY_INTER) %{
 3990     base($reg);
 3991     index($idx);
 3992     scale($scale);
 3993     disp($off);
 3994   %}
 3995 %}
 3996 
















 3997 // Indirect Narrow Oop Plus Offset Operand
 3998 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 3999 // we can't free r12 even with CompressedOops::base() == NULL.
 4000 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 4001   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 4002   constraint(ALLOC_IN_RC(ptr_reg));
 4003   match(AddP (DecodeN reg) off);
 4004 
 4005   op_cost(10);
 4006   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 4007   interface(MEMORY_INTER) %{
 4008     base(0xc); // R12
 4009     index($reg);
 4010     scale(0x3);
 4011     disp($off);
 4012   %}
 4013 %}
 4014 
 4015 // Indirect Memory Operand
 4016 operand indirectNarrow(rRegN reg)

 4323     equal(0x4, "e");
 4324     not_equal(0x5, "ne");
 4325     less(0x2, "b");
 4326     greater_equal(0x3, "ae");
 4327     less_equal(0x6, "be");
 4328     greater(0x7, "a");
 4329     overflow(0x0, "o");
 4330     no_overflow(0x1, "no");
 4331   %}
 4332 %}
 4333 
 4334 //----------OPERAND CLASSES----------------------------------------------------
 4335 // Operand Classes are groups of operands that are used as to simplify
 4336 // instruction definitions by not requiring the AD writer to specify separate
 4337 // instructions for every form of operand when the instruction accepts
 4338 // multiple operand types with the same basic encoding and format.  The classic
 4339 // case of this is memory operands.
 4340 
 4341 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 4342                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 4343                indCompressedOopOffset,
 4344                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 4345                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 4346                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 4347 
 4348 //----------PIPELINE-----------------------------------------------------------
 4349 // Rules which define the behavior of the target architectures pipeline.
 4350 pipeline %{
 4351 
 4352 //----------ATTRIBUTES---------------------------------------------------------
 4353 attributes %{
 4354   variable_size_instructions;        // Fixed size instructions
 4355   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 4356   instruction_unit_size = 1;         // An instruction is 1 bytes long
 4357   instruction_fetch_unit_size = 16;  // The processor fetches one line
 4358   instruction_fetch_units = 1;       // of 16 bytes
 4359 
 4360   // List of nop instructions
 4361   nops( MachNop );
 4362 %}
 4363 

 6911   format %{ "MEMBAR-storestore (empty encoding)" %}
 6912   ins_encode( );
 6913   ins_pipe(empty);
 6914 %}
 6915 
 6916 //----------Move Instructions--------------------------------------------------
 6917 
 6918 instruct castX2P(rRegP dst, rRegL src)
 6919 %{
 6920   match(Set dst (CastX2P src));
 6921 
 6922   format %{ "movq    $dst, $src\t# long->ptr" %}
 6923   ins_encode %{
 6924     if ($dst$$reg != $src$$reg) {
 6925       __ movptr($dst$$Register, $src$$Register);
 6926     }
 6927   %}
 6928   ins_pipe(ialu_reg_reg); // XXX
 6929 %}
 6930 













 6931 instruct castP2X(rRegL dst, rRegP src)
 6932 %{
 6933   match(Set dst (CastP2X src));
 6934 
 6935   format %{ "movq    $dst, $src\t# ptr -> long" %}
 6936   ins_encode %{
 6937     if ($dst$$reg != $src$$reg) {
 6938       __ movptr($dst$$Register, $src$$Register);
 6939     }
 6940   %}
 6941   ins_pipe(ialu_reg_reg); // XXX
 6942 %}
 6943 
 6944 // Convert oop into int for vectors alignment masking
 6945 instruct convP2I(rRegI dst, rRegP src)
 6946 %{
 6947   match(Set dst (ConvL2I (CastP2X src)));
 6948 
 6949   format %{ "movl    $dst, $src\t# ptr -> int" %}
 6950   ins_encode %{

11426   effect(DEF dst, USE src);
11427   ins_cost(100);
11428   format %{ "movd    $dst,$src\t# MoveI2F" %}
11429   ins_encode %{
11430     __ movdl($dst$$XMMRegister, $src$$Register);
11431   %}
11432   ins_pipe( pipe_slow );
11433 %}
11434 
11435 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
11436   match(Set dst (MoveL2D src));
11437   effect(DEF dst, USE src);
11438   ins_cost(100);
11439   format %{ "movd    $dst,$src\t# MoveL2D" %}
11440   ins_encode %{
11441      __ movdq($dst$$XMMRegister, $src$$Register);
11442   %}
11443   ins_pipe( pipe_slow );
11444 %}
11445 

11446 // Fast clearing of an array
11447 // Small ClearArray non-AVX512.
11448 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
11449                   Universe dummy, rFlagsReg cr)
11450 %{
11451   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
11452   match(Set dummy (ClearArray cnt base));
11453   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);





















































































































11454 
11455   format %{ $$template
11456     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11457     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
11458     $$emit$$"jg      LARGE\n\t"
11459     $$emit$$"dec     rcx\n\t"
11460     $$emit$$"js      DONE\t# Zero length\n\t"
11461     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
11462     $$emit$$"dec     rcx\n\t"
11463     $$emit$$"jge     LOOP\n\t"
11464     $$emit$$"jmp     DONE\n\t"
11465     $$emit$$"# LARGE:\n\t"
11466     if (UseFastStosb) {
11467        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11468        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
11469     } else if (UseXMMForObjInit) {
11470        $$emit$$"mov     rdi,rax\n\t"
11471        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11472        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11473        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

11481        $$emit$$"jl      L_tail\n\t"
11482        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11483        $$emit$$"add     0x20,rax\n\t"
11484        $$emit$$"sub     0x4,rcx\n\t"
11485        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11486        $$emit$$"add     0x4,rcx\n\t"
11487        $$emit$$"jle     L_end\n\t"
11488        $$emit$$"dec     rcx\n\t"
11489        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11490        $$emit$$"vmovq   xmm0,(rax)\n\t"
11491        $$emit$$"add     0x8,rax\n\t"
11492        $$emit$$"dec     rcx\n\t"
11493        $$emit$$"jge     L_sloop\n\t"
11494        $$emit$$"# L_end:\n\t"
11495     } else {
11496        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
11497     }
11498     $$emit$$"# DONE"
11499   %}
11500   ins_encode %{
11501     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11502                  $tmp$$XMMRegister, false, knoreg);
11503   %}
11504   ins_pipe(pipe_slow);
11505 %}
11506 
11507 // Small ClearArray AVX512 non-constant length.
11508 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
11509                        Universe dummy, rFlagsReg cr)
11510 %{
11511   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
11512   match(Set dummy (ClearArray cnt base));
11513   ins_cost(125);
11514   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11515 
11516   format %{ $$template
11517     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11518     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
11519     $$emit$$"jg      LARGE\n\t"
11520     $$emit$$"dec     rcx\n\t"
11521     $$emit$$"js      DONE\t# Zero length\n\t"
11522     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
11523     $$emit$$"dec     rcx\n\t"
11524     $$emit$$"jge     LOOP\n\t"
11525     $$emit$$"jmp     DONE\n\t"
11526     $$emit$$"# LARGE:\n\t"
11527     if (UseFastStosb) {
11528        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11529        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
11530     } else if (UseXMMForObjInit) {
11531        $$emit$$"mov     rdi,rax\n\t"
11532        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11533        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11534        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

11542        $$emit$$"jl      L_tail\n\t"
11543        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11544        $$emit$$"add     0x20,rax\n\t"
11545        $$emit$$"sub     0x4,rcx\n\t"
11546        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11547        $$emit$$"add     0x4,rcx\n\t"
11548        $$emit$$"jle     L_end\n\t"
11549        $$emit$$"dec     rcx\n\t"
11550        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11551        $$emit$$"vmovq   xmm0,(rax)\n\t"
11552        $$emit$$"add     0x8,rax\n\t"
11553        $$emit$$"dec     rcx\n\t"
11554        $$emit$$"jge     L_sloop\n\t"
11555        $$emit$$"# L_end:\n\t"
11556     } else {
11557        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
11558     }
11559     $$emit$$"# DONE"
11560   %}
11561   ins_encode %{
11562     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11563                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
11564   %}
11565   ins_pipe(pipe_slow);
11566 %}
11567 
11568 // Large ClearArray non-AVX512.
11569 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
11570                         Universe dummy, rFlagsReg cr)
11571 %{
11572   predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
11573   match(Set dummy (ClearArray cnt base));
11574   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
































































































11575 
11576   format %{ $$template
11577     if (UseFastStosb) {
11578        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11579        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11580        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
11581     } else if (UseXMMForObjInit) {
11582        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
11583        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11584        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11585        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11586        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11587        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
11588        $$emit$$"add     0x40,rax\n\t"
11589        $$emit$$"# L_zero_64_bytes:\n\t"
11590        $$emit$$"sub     0x8,rcx\n\t"
11591        $$emit$$"jge     L_loop\n\t"
11592        $$emit$$"add     0x4,rcx\n\t"
11593        $$emit$$"jl      L_tail\n\t"
11594        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11595        $$emit$$"add     0x20,rax\n\t"
11596        $$emit$$"sub     0x4,rcx\n\t"
11597        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11598        $$emit$$"add     0x4,rcx\n\t"
11599        $$emit$$"jle     L_end\n\t"
11600        $$emit$$"dec     rcx\n\t"
11601        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11602        $$emit$$"vmovq   xmm0,(rax)\n\t"
11603        $$emit$$"add     0x8,rax\n\t"
11604        $$emit$$"dec     rcx\n\t"
11605        $$emit$$"jge     L_sloop\n\t"
11606        $$emit$$"# L_end:\n\t"
11607     } else {
11608        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11609        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11610     }
11611   %}
11612   ins_encode %{
11613     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11614                  $tmp$$XMMRegister, true, knoreg);
11615   %}
11616   ins_pipe(pipe_slow);
11617 %}
11618 
11619 // Large ClearArray AVX512.
11620 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
11621                              Universe dummy, rFlagsReg cr)
11622 %{
11623   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
11624   match(Set dummy (ClearArray cnt base));
11625   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11626 
11627   format %{ $$template
11628     if (UseFastStosb) {
11629        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11630        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11631        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
11632     } else if (UseXMMForObjInit) {
11633        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
11634        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11635        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11636        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11637        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11638        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
11639        $$emit$$"add     0x40,rax\n\t"
11640        $$emit$$"# L_zero_64_bytes:\n\t"
11641        $$emit$$"sub     0x8,rcx\n\t"
11642        $$emit$$"jge     L_loop\n\t"
11643        $$emit$$"add     0x4,rcx\n\t"
11644        $$emit$$"jl      L_tail\n\t"
11645        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11646        $$emit$$"add     0x20,rax\n\t"
11647        $$emit$$"sub     0x4,rcx\n\t"
11648        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11649        $$emit$$"add     0x4,rcx\n\t"
11650        $$emit$$"jle     L_end\n\t"
11651        $$emit$$"dec     rcx\n\t"
11652        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11653        $$emit$$"vmovq   xmm0,(rax)\n\t"
11654        $$emit$$"add     0x8,rax\n\t"
11655        $$emit$$"dec     rcx\n\t"
11656        $$emit$$"jge     L_sloop\n\t"
11657        $$emit$$"# L_end:\n\t"
11658     } else {
11659        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11660        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11661     }
11662   %}
11663   ins_encode %{
11664     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11665                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
11666   %}
11667   ins_pipe(pipe_slow);
11668 %}
11669 
11670 // Small ClearArray AVX512 constant length.
11671 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
11672 %{
11673   predicate(!((ClearArrayNode*)n)->is_large() &&
11674               ((UseAVX > 2) && VM_Version::supports_avx512vlbw()));
11675   match(Set dummy (ClearArray cnt base));
11676   ins_cost(100);
11677   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
11678   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
11679   ins_encode %{
11680    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
11681   %}
11682   ins_pipe(pipe_slow);
11683 %}
11684 
11685 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11686                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
11687 %{
11688   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11689   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11690   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11691 
11692   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11693   ins_encode %{
11694     __ string_compare($str1$$Register, $str2$$Register,
11695                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11696                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
11697   %}
11698   ins_pipe( pipe_slow );
11699 %}
11700 

13464 
13465   ins_cost(300);
13466   format %{ "call_leaf,runtime " %}
13467   ins_encode(clear_avx, Java_To_Runtime(meth));
13468   ins_pipe(pipe_slow);
13469 %}
13470 
13471 // Call runtime without safepoint and with vector arguments
13472 instruct CallLeafDirectVector(method meth)
13473 %{
13474   match(CallLeafVector);
13475   effect(USE meth);
13476 
13477   ins_cost(300);
13478   format %{ "call_leaf,vector " %}
13479   ins_encode(Java_To_Runtime(meth));
13480   ins_pipe(pipe_slow);
13481 %}
13482 
13483 // Call runtime without safepoint















13484 instruct CallLeafNoFPDirect(method meth)
13485 %{

13486   match(CallLeafNoFP);
13487   effect(USE meth);
13488 
13489   ins_cost(300);
13490   format %{ "call_leaf_nofp,runtime " %}
13491   ins_encode(clear_avx, Java_To_Runtime(meth));
13492   ins_pipe(pipe_slow);
13493 %}
13494 
13495 // Return Instruction
13496 // Remove the return address & jump to it.
13497 // Notice: We always emit a nop after a ret to make sure there is room
13498 // for safepoint patching
13499 instruct Ret()
13500 %{
13501   match(Return);
13502 
13503   format %{ "ret" %}
13504   ins_encode %{
13505     __ ret(0);

  473 }
  474 
  475 // !!!!! Special hack to get all types of calls to specify the byte offset
  476 //       from the start of the call to the point where the return address
  477 //       will point.
  478 int MachCallStaticJavaNode::ret_addr_offset()
  479 {
  480   int offset = 5; // 5 bytes from start of call to where return address points
  481   offset += clear_avx_size();
  482   return offset;
  483 }
  484 
  485 int MachCallDynamicJavaNode::ret_addr_offset()
  486 {
  487   int offset = 15; // 15 bytes from start of call to where return address points
  488   offset += clear_avx_size();
  489   return offset;
  490 }
  491 
  492 int MachCallRuntimeNode::ret_addr_offset() {
  493   if (_entry_point == NULL) {
  494     // CallLeafNoFPInDirect
  495     return 3; // callq (register)
  496   }
  497   int offset = 13; // movq r10,#addr; callq (r10)
  498   if (this->ideal_Opcode() != Op_CallLeafVector) {
  499     offset += clear_avx_size();
  500   }
  501   return offset;
  502 }
  503 
  504 //
  505 // Compute padding required for nodes which need alignment
  506 //
  507 
  508 // The address of the call instruction needs to be 4-byte aligned to
  509 // ensure that it does not span a cache line so that it can be patched.
  510 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
  511 {
  512   current_offset += clear_avx_size(); // skip vzeroupper
  513   current_offset += 1; // skip call opcode byte
  514   return align_up(current_offset, alignment_required()) - current_offset;
  515 }
  516 
  517 // The address of the call instruction needs to be 4-byte aligned to
  518 // ensure that it does not span a cache line so that it can be patched.
  519 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
  520 {
  521   current_offset += clear_avx_size(); // skip vzeroupper
  522   current_offset += 11; // skip movq instruction + call opcode byte
  523   return align_up(current_offset, alignment_required()) - current_offset;

  892     st->print("# stack alignment check");
  893 #endif
  894   }
  895   if (C->stub_function() != NULL && BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
  896     st->print("\n\t");
  897     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
  898     st->print("\n\t");
  899     st->print("je      fast_entry\t");
  900     st->print("\n\t");
  901     st->print("call    #nmethod_entry_barrier_stub\t");
  902     st->print("\n\tfast_entry:");
  903   }
  904   st->cr();
  905 }
  906 #endif
  907 
  908 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  909   Compile* C = ra_->C;
  910   C2_MacroAssembler _masm(&cbuf);
  911 
  912   __ verified_entry(C);













  913 
  914   if (ra_->C->stub_function() == NULL) {
  915     __ entry_barrier();
  916   }
  917 
  918   if (!Compile::current()->output()->in_scratch_emit_size()) {
  919     __ bind(*_verified_entry);
  920   }
  921 
  922   C->output()->set_frame_complete(cbuf.insts_size());
  923 
  924   if (C->has_mach_constant_base_node()) {
  925     // NOTE: We set the table base offset here because users might be
  926     // emitted before MachConstantBaseNode.
  927     ConstantTable& constant_table = C->output()->constant_table();
  928     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  929   }
  930 }
  931 






  932 int MachPrologNode::reloc() const
  933 {
  934   return 0; // a large enough number
  935 }
  936 
  937 //=============================================================================
  938 #ifndef PRODUCT
  939 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
  940 {
  941   Compile* C = ra_->C;
  942   if (generate_vzeroupper(C)) {
  943     st->print("vzeroupper");
  944     st->cr(); st->print("\t");
  945   }
  946 
  947   int framesize = C->output()->frame_size_in_bytes();
  948   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  949   // Remove word for return adr already pushed
  950   // and RBP
  951   framesize -= 2*wordSize;

  959   if (do_polling() && C->is_method_compilation()) {
  960     st->print("\t");
  961     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
  962                  "ja      #safepoint_stub\t"
  963                  "# Safepoint: poll for GC");
  964   }
  965 }
  966 #endif
  967 
  968 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
  969 {
  970   Compile* C = ra_->C;
  971   MacroAssembler _masm(&cbuf);
  972 
  973   if (generate_vzeroupper(C)) {
  974     // Clear upper bits of YMM registers when current compiled code uses
  975     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  976     __ vzeroupper();
  977   }
  978 
  979   // Subtract two words to account for return address and rbp
  980   int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
  981   __ remove_frame(initial_framesize, C->needs_stack_repair());




















  982 
  983   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
  984     __ reserved_stack_check();
  985   }
  986 
  987   if (do_polling() && C->is_method_compilation()) {
  988     MacroAssembler _masm(&cbuf);
  989     Label dummy_label;
  990     Label* code_stub = &dummy_label;
  991     if (!C->output()->in_scratch_emit_size()) {
  992       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
  993       C->output()->add_stub(stub);
  994       code_stub = &stub->entry();
  995     }
  996     __ relocate(relocInfo::poll_return_type);
  997     __ safepoint_poll(*code_stub, r15_thread, true /* at_return */, true /* in_nmethod */);
  998   }
  999 }
 1000 






 1001 int MachEpilogNode::reloc() const
 1002 {
 1003   return 2; // a large enough number
 1004 }
 1005 
 1006 const Pipeline* MachEpilogNode::pipeline() const
 1007 {
 1008   return MachNode::pipeline_class();
 1009 }
 1010 
 1011 //=============================================================================
 1012 
 1013 enum RC {
 1014   rc_bad,
 1015   rc_int,
 1016   rc_kreg,
 1017   rc_float,
 1018   rc_stack
 1019 };
 1020 

 1613     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
 1614     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
 1615     emit_rm(cbuf, 0x2, reg & 7, 0x04);
 1616     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
 1617     emit_d32(cbuf, offset);
 1618   } else {
 1619     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
 1620     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
 1621     emit_rm(cbuf, 0x1, reg & 7, 0x04);
 1622     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
 1623     emit_d8(cbuf, offset);
 1624   }
 1625 }
 1626 
 1627 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 1628 {
 1629   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1630   return (offset < 0x80) ? 5 : 8; // REX
 1631 }
 1632 
 1633 //=============================================================================
 1634 #ifndef PRODUCT
 1635 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1636 {
 1637   st->print_cr("MachVEPNode");
 1638 }
 1639 #endif
 1640 
 1641 void MachVEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 1642 {
 1643   C2_MacroAssembler _masm(&cbuf);
 1644   uint insts_size = cbuf.insts_size();
 1645   if (!_verified) {
 1646     if (UseCompressedClassPointers) {
 1647       __ load_klass(rscratch1, j_rarg0, rscratch2);
 1648       __ cmpptr(rax, rscratch1);
 1649     } else {
 1650       __ cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
 1651     }
 1652     __ jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 1653   } else {
 1654     // TODO 8284443 Avoid creation of temporary frame
 1655     if (ra_->C->stub_function() == NULL) {
 1656       __ verified_entry(ra_->C, 0);
 1657       __ entry_barrier();
 1658       int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
 1659       __ remove_frame(initial_framesize, false);
 1660     }
 1661     // Unpack inline type args passed as oop and then jump to
 1662     // the verified entry point (skipping the unverified entry).
 1663     int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
 1664     // Emit code for verified entry and save increment for stack repair on return
 1665     __ verified_entry(ra_->C, sp_inc);
 1666     if (Compile::current()->output()->in_scratch_emit_size()) {
 1667       Label dummy_verified_entry;
 1668       __ jmp(dummy_verified_entry);
 1669     } else {
 1670       __ jmp(*_verified_entry);
 1671     }
 1672   }
 1673   /* WARNING these NOPs are critical so that verified entry point is properly
 1674      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
 1675   int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
 1676   nops_cnt &= 0x3; // Do not add nops if code is aligned.
 1677   if (nops_cnt > 0) {
 1678     __ nop(nops_cnt);
 1679   }
 1680 }
 1681 
 1682 //=============================================================================
 1683 #ifndef PRODUCT
 1684 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1685 {
 1686   if (UseCompressedClassPointers) {
 1687     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 1688     st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
 1689     st->print_cr("\tcmpq    rax, rscratch1\t # Inline cache check");
 1690   } else {
 1691     st->print_cr("\tcmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
 1692                  "# Inline cache check");
 1693   }
 1694   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 1695   st->print_cr("\tnop\t# nops to align entry point");
 1696 }
 1697 #endif
 1698 
 1699 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 1700 {
 1701   MacroAssembler masm(&cbuf);

 1704     masm.load_klass(rscratch1, j_rarg0, rscratch2);
 1705     masm.cmpptr(rax, rscratch1);
 1706   } else {
 1707     masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
 1708   }
 1709 
 1710   masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 1711 
 1712   /* WARNING these NOPs are critical so that verified entry point is properly
 1713      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
 1714   int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
 1715   if (OptoBreakpoint) {
 1716     // Leave space for int3
 1717     nops_cnt -= 1;
 1718   }
 1719   nops_cnt &= 0x3; // Do not add nops if code is aligned.
 1720   if (nops_cnt > 0)
 1721     masm.nop(nops_cnt);
 1722 }
 1723 







 1724 //=============================================================================
 1725 
 1726 const bool Matcher::supports_vector_calling_convention(void) {
 1727   if (EnableVectorSupport && UseVectorStubs) {
 1728     return true;
 1729   }
 1730   return false;
 1731 }
 1732 
 1733 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1734   assert(EnableVectorSupport && UseVectorStubs, "sanity");
 1735   int lo = XMM0_num;
 1736   int hi = XMM0b_num;
 1737   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 1738   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 1739   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 1740   return OptoRegPair(hi, lo);
 1741 }
 1742 
 1743 // Is this branch offset short enough that a short branch can be used?

 3982   %}
 3983 %}
 3984 
 3985 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 3986 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 3987 %{
 3988   constraint(ALLOC_IN_RC(ptr_reg));
 3989   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 3990   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 3991 
 3992   op_cost(10);
 3993   format %{"[$reg + $off + $idx << $scale]" %}
 3994   interface(MEMORY_INTER) %{
 3995     base($reg);
 3996     index($idx);
 3997     scale($scale);
 3998     disp($off);
 3999   %}
 4000 %}
 4001 
 4002 // Indirect Narrow Oop Operand
 4003 operand indCompressedOop(rRegN reg) %{
 4004   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 4005   constraint(ALLOC_IN_RC(ptr_reg));
 4006   match(DecodeN reg);
 4007 
 4008   op_cost(10);
 4009   format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
 4010   interface(MEMORY_INTER) %{
 4011     base(0xc); // R12
 4012     index($reg);
 4013     scale(0x3);
 4014     disp(0x0);
 4015   %}
 4016 %}
 4017 
 4018 // Indirect Narrow Oop Plus Offset Operand
 4019 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 4020 // we can't free r12 even with CompressedOops::base() == NULL.
 4021 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 4022   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 4023   constraint(ALLOC_IN_RC(ptr_reg));
 4024   match(AddP (DecodeN reg) off);
 4025 
 4026   op_cost(10);
 4027   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 4028   interface(MEMORY_INTER) %{
 4029     base(0xc); // R12
 4030     index($reg);
 4031     scale(0x3);
 4032     disp($off);
 4033   %}
 4034 %}
 4035 
 4036 // Indirect Memory Operand
 4037 operand indirectNarrow(rRegN reg)

 4344     equal(0x4, "e");
 4345     not_equal(0x5, "ne");
 4346     less(0x2, "b");
 4347     greater_equal(0x3, "ae");
 4348     less_equal(0x6, "be");
 4349     greater(0x7, "a");
 4350     overflow(0x0, "o");
 4351     no_overflow(0x1, "no");
 4352   %}
 4353 %}
 4354 
 4355 //----------OPERAND CLASSES----------------------------------------------------
 4356 // Operand Classes are groups of operands that are used as to simplify
 4357 // instruction definitions by not requiring the AD writer to specify separate
 4358 // instructions for every form of operand when the instruction accepts
 4359 // multiple operand types with the same basic encoding and format.  The classic
 4360 // case of this is memory operands.
 4361 
 4362 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 4363                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 4364                indCompressedOop, indCompressedOopOffset,
 4365                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 4366                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 4367                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 4368 
 4369 //----------PIPELINE-----------------------------------------------------------
 4370 // Rules which define the behavior of the target architectures pipeline.
 4371 pipeline %{
 4372 
 4373 //----------ATTRIBUTES---------------------------------------------------------
 4374 attributes %{
 4375   variable_size_instructions;        // Fixed size instructions
 4376   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 4377   instruction_unit_size = 1;         // An instruction is 1 bytes long
 4378   instruction_fetch_unit_size = 16;  // The processor fetches one line
 4379   instruction_fetch_units = 1;       // of 16 bytes
 4380 
 4381   // List of nop instructions
 4382   nops( MachNop );
 4383 %}
 4384 

 6932   format %{ "MEMBAR-storestore (empty encoding)" %}
 6933   ins_encode( );
 6934   ins_pipe(empty);
 6935 %}
 6936 
 6937 //----------Move Instructions--------------------------------------------------
 6938 
 6939 instruct castX2P(rRegP dst, rRegL src)
 6940 %{
 6941   match(Set dst (CastX2P src));
 6942 
 6943   format %{ "movq    $dst, $src\t# long->ptr" %}
 6944   ins_encode %{
 6945     if ($dst$$reg != $src$$reg) {
 6946       __ movptr($dst$$Register, $src$$Register);
 6947     }
 6948   %}
 6949   ins_pipe(ialu_reg_reg); // XXX
 6950 %}
 6951 
 6952 instruct castN2X(rRegL dst, rRegN src)
 6953 %{
 6954   match(Set dst (CastP2X src));
 6955 
 6956   format %{ "movq    $dst, $src\t# ptr -> long" %}
 6957   ins_encode %{
 6958     if ($dst$$reg != $src$$reg) {
 6959       __ movptr($dst$$Register, $src$$Register);
 6960     }
 6961   %}
 6962   ins_pipe(ialu_reg_reg); // XXX
 6963 %}
 6964 
 6965 instruct castP2X(rRegL dst, rRegP src)
 6966 %{
 6967   match(Set dst (CastP2X src));
 6968 
 6969   format %{ "movq    $dst, $src\t# ptr -> long" %}
 6970   ins_encode %{
 6971     if ($dst$$reg != $src$$reg) {
 6972       __ movptr($dst$$Register, $src$$Register);
 6973     }
 6974   %}
 6975   ins_pipe(ialu_reg_reg); // XXX
 6976 %}
 6977 
 6978 // Convert oop into int for vectors alignment masking
 6979 instruct convP2I(rRegI dst, rRegP src)
 6980 %{
 6981   match(Set dst (ConvL2I (CastP2X src)));
 6982 
 6983   format %{ "movl    $dst, $src\t# ptr -> int" %}
 6984   ins_encode %{

11460   effect(DEF dst, USE src);
11461   ins_cost(100);
11462   format %{ "movd    $dst,$src\t# MoveI2F" %}
11463   ins_encode %{
11464     __ movdl($dst$$XMMRegister, $src$$Register);
11465   %}
11466   ins_pipe( pipe_slow );
11467 %}
11468 
11469 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
11470   match(Set dst (MoveL2D src));
11471   effect(DEF dst, USE src);
11472   ins_cost(100);
11473   format %{ "movd    $dst,$src\t# MoveL2D" %}
11474   ins_encode %{
11475      __ movdq($dst$$XMMRegister, $src$$Register);
11476   %}
11477   ins_pipe( pipe_slow );
11478 %}
11479 
11480 
11481 // Fast clearing of an array
11482 // Small ClearArray non-AVX512.
11483 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
11484                   Universe dummy, rFlagsReg cr)
11485 %{
11486   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
11487   match(Set dummy (ClearArray (Binary cnt base) val));
11488   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
11489 
11490   format %{ $$template
11491     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
11492     $$emit$$"jg      LARGE\n\t"
11493     $$emit$$"dec     rcx\n\t"
11494     $$emit$$"js      DONE\t# Zero length\n\t"
11495     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
11496     $$emit$$"dec     rcx\n\t"
11497     $$emit$$"jge     LOOP\n\t"
11498     $$emit$$"jmp     DONE\n\t"
11499     $$emit$$"# LARGE:\n\t"
11500     if (UseFastStosb) {
11501        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11502        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
11503     } else if (UseXMMForObjInit) {
11504        $$emit$$"movdq   $tmp, $val\n\t"
11505        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
11506        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
11507        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11508        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11509        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11510        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
11511        $$emit$$"add     0x40,rax\n\t"
11512        $$emit$$"# L_zero_64_bytes:\n\t"
11513        $$emit$$"sub     0x8,rcx\n\t"
11514        $$emit$$"jge     L_loop\n\t"
11515        $$emit$$"add     0x4,rcx\n\t"
11516        $$emit$$"jl      L_tail\n\t"
11517        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11518        $$emit$$"add     0x20,rax\n\t"
11519        $$emit$$"sub     0x4,rcx\n\t"
11520        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11521        $$emit$$"add     0x4,rcx\n\t"
11522        $$emit$$"jle     L_end\n\t"
11523        $$emit$$"dec     rcx\n\t"
11524        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11525        $$emit$$"vmovq   xmm0,(rax)\n\t"
11526        $$emit$$"add     0x8,rax\n\t"
11527        $$emit$$"dec     rcx\n\t"
11528        $$emit$$"jge     L_sloop\n\t"
11529        $$emit$$"# L_end:\n\t"
11530     } else {
11531        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
11532     }
11533     $$emit$$"# DONE"
11534   %}
11535   ins_encode %{
11536     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11537                  $tmp$$XMMRegister, false, false);
11538   %}
11539   ins_pipe(pipe_slow);
11540 %}
11541 
11542 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
11543                             Universe dummy, rFlagsReg cr)
11544 %{
11545   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
11546   match(Set dummy (ClearArray (Binary cnt base) val));
11547   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
11548 
11549   format %{ $$template
11550     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
11551     $$emit$$"jg      LARGE\n\t"
11552     $$emit$$"dec     rcx\n\t"
11553     $$emit$$"js      DONE\t# Zero length\n\t"
11554     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
11555     $$emit$$"dec     rcx\n\t"
11556     $$emit$$"jge     LOOP\n\t"
11557     $$emit$$"jmp     DONE\n\t"
11558     $$emit$$"# LARGE:\n\t"
11559     if (UseXMMForObjInit) {
11560        $$emit$$"movdq   $tmp, $val\n\t"
11561        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
11562        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
11563        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11564        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11565        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11566        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
11567        $$emit$$"add     0x40,rax\n\t"
11568        $$emit$$"# L_zero_64_bytes:\n\t"
11569        $$emit$$"sub     0x8,rcx\n\t"
11570        $$emit$$"jge     L_loop\n\t"
11571        $$emit$$"add     0x4,rcx\n\t"
11572        $$emit$$"jl      L_tail\n\t"
11573        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11574        $$emit$$"add     0x20,rax\n\t"
11575        $$emit$$"sub     0x4,rcx\n\t"
11576        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11577        $$emit$$"add     0x4,rcx\n\t"
11578        $$emit$$"jle     L_end\n\t"
11579        $$emit$$"dec     rcx\n\t"
11580        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11581        $$emit$$"vmovq   xmm0,(rax)\n\t"
11582        $$emit$$"add     0x8,rax\n\t"
11583        $$emit$$"dec     rcx\n\t"
11584        $$emit$$"jge     L_sloop\n\t"
11585        $$emit$$"# L_end:\n\t"
11586     } else {
11587        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
11588     }
11589     $$emit$$"# DONE"
11590   %}
11591   ins_encode %{
11592     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11593                  $tmp$$XMMRegister, false, true);
11594   %}
11595   ins_pipe(pipe_slow);
11596 %}
11597 
11598 // Small ClearArray AVX512 non-constant length.
11599 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
11600                        Universe dummy, rFlagsReg cr)
11601 %{
11602   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
11603   match(Set dummy (ClearArray (Binary cnt base) val));
11604   ins_cost(125);
11605   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
11606 
11607   format %{ $$template
11608     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11609     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
11610     $$emit$$"jg      LARGE\n\t"
11611     $$emit$$"dec     rcx\n\t"
11612     $$emit$$"js      DONE\t# Zero length\n\t"
11613     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
11614     $$emit$$"dec     rcx\n\t"
11615     $$emit$$"jge     LOOP\n\t"
11616     $$emit$$"jmp     DONE\n\t"
11617     $$emit$$"# LARGE:\n\t"
11618     if (UseFastStosb) {
11619        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11620        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
11621     } else if (UseXMMForObjInit) {
11622        $$emit$$"mov     rdi,rax\n\t"
11623        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11624        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11625        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

11633        $$emit$$"jl      L_tail\n\t"
11634        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11635        $$emit$$"add     0x20,rax\n\t"
11636        $$emit$$"sub     0x4,rcx\n\t"
11637        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11638        $$emit$$"add     0x4,rcx\n\t"
11639        $$emit$$"jle     L_end\n\t"
11640        $$emit$$"dec     rcx\n\t"
11641        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11642        $$emit$$"vmovq   xmm0,(rax)\n\t"
11643        $$emit$$"add     0x8,rax\n\t"
11644        $$emit$$"dec     rcx\n\t"
11645        $$emit$$"jge     L_sloop\n\t"
11646        $$emit$$"# L_end:\n\t"
11647     } else {
11648        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
11649     }
11650     $$emit$$"# DONE"
11651   %}
11652   ins_encode %{
11653     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11654                  $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
11655   %}
11656   ins_pipe(pipe_slow);
11657 %}
11658 
11659 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
11660                                  Universe dummy, rFlagsReg cr)

11661 %{
11662   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
11663   match(Set dummy (ClearArray (Binary cnt base) val));
11664   ins_cost(125);
11665   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
11666 
11667   format %{ $$template
11668     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11669     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
11670     $$emit$$"jg      LARGE\n\t"
11671     $$emit$$"dec     rcx\n\t"
11672     $$emit$$"js      DONE\t# Zero length\n\t"
11673     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
11674     $$emit$$"dec     rcx\n\t"
11675     $$emit$$"jge     LOOP\n\t"
11676     $$emit$$"jmp     DONE\n\t"
11677     $$emit$$"# LARGE:\n\t"
11678     if (UseFastStosb) {
11679        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11680        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
11681     } else if (UseXMMForObjInit) {
11682        $$emit$$"mov     rdi,rax\n\t"
11683        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11684        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11685        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

11693        $$emit$$"jl      L_tail\n\t"
11694        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11695        $$emit$$"add     0x20,rax\n\t"
11696        $$emit$$"sub     0x4,rcx\n\t"
11697        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11698        $$emit$$"add     0x4,rcx\n\t"
11699        $$emit$$"jle     L_end\n\t"
11700        $$emit$$"dec     rcx\n\t"
11701        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11702        $$emit$$"vmovq   xmm0,(rax)\n\t"
11703        $$emit$$"add     0x8,rax\n\t"
11704        $$emit$$"dec     rcx\n\t"
11705        $$emit$$"jge     L_sloop\n\t"
11706        $$emit$$"# L_end:\n\t"
11707     } else {
11708        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
11709     }
11710     $$emit$$"# DONE"
11711   %}
11712   ins_encode %{
11713     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11714                  $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
11715   %}
11716   ins_pipe(pipe_slow);
11717 %}
11718 
11719 // Large ClearArray non-AVX512.
11720 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
11721                         Universe dummy, rFlagsReg cr)
11722 %{
11723   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
11724   match(Set dummy (ClearArray (Binary cnt base) val));
11725   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
11726 
11727   format %{ $$template
11728     if (UseFastStosb) {
11729        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11730        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
11731     } else if (UseXMMForObjInit) {
11732        $$emit$$"movdq   $tmp, $val\n\t"
11733        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
11734        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
11735        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11736        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11737        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11738        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
11739        $$emit$$"add     0x40,rax\n\t"
11740        $$emit$$"# L_zero_64_bytes:\n\t"
11741        $$emit$$"sub     0x8,rcx\n\t"
11742        $$emit$$"jge     L_loop\n\t"
11743        $$emit$$"add     0x4,rcx\n\t"
11744        $$emit$$"jl      L_tail\n\t"
11745        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11746        $$emit$$"add     0x20,rax\n\t"
11747        $$emit$$"sub     0x4,rcx\n\t"
11748        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11749        $$emit$$"add     0x4,rcx\n\t"
11750        $$emit$$"jle     L_end\n\t"
11751        $$emit$$"dec     rcx\n\t"
11752        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11753        $$emit$$"vmovq   xmm0,(rax)\n\t"
11754        $$emit$$"add     0x8,rax\n\t"
11755        $$emit$$"dec     rcx\n\t"
11756        $$emit$$"jge     L_sloop\n\t"
11757        $$emit$$"# L_end:\n\t"
11758     } else {
11759        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11760     }
11761   %}
11762   ins_encode %{
11763     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11764                  $tmp$$XMMRegister, true, false);
11765   %}
11766   ins_pipe(pipe_slow);
11767 %}
11768 
11769 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
11770                                   Universe dummy, rFlagsReg cr)
11771 %{
11772   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
11773   match(Set dummy (ClearArray (Binary cnt base) val));
11774   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
11775 
11776   format %{ $$template
11777     if (UseXMMForObjInit) {
11778        $$emit$$"movdq   $tmp, $val\n\t"
11779        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
11780        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
11781        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11782        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11783        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11784        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
11785        $$emit$$"add     0x40,rax\n\t"
11786        $$emit$$"# L_zero_64_bytes:\n\t"
11787        $$emit$$"sub     0x8,rcx\n\t"
11788        $$emit$$"jge     L_loop\n\t"
11789        $$emit$$"add     0x4,rcx\n\t"
11790        $$emit$$"jl      L_tail\n\t"
11791        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11792        $$emit$$"add     0x20,rax\n\t"
11793        $$emit$$"sub     0x4,rcx\n\t"
11794        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11795        $$emit$$"add     0x4,rcx\n\t"
11796        $$emit$$"jle     L_end\n\t"
11797        $$emit$$"dec     rcx\n\t"
11798        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11799        $$emit$$"vmovq   xmm0,(rax)\n\t"
11800        $$emit$$"add     0x8,rax\n\t"
11801        $$emit$$"dec     rcx\n\t"
11802        $$emit$$"jge     L_sloop\n\t"
11803        $$emit$$"# L_end:\n\t"
11804     } else {
11805        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11806     }
11807   %}
11808   ins_encode %{
11809     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11810                  $tmp$$XMMRegister, true, true);
11811   %}
11812   ins_pipe(pipe_slow);
11813 %}
11814 
11815 // Large ClearArray AVX512.
11816 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
11817                              Universe dummy, rFlagsReg cr)
11818 %{
11819   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
11820   match(Set dummy (ClearArray (Binary cnt base) val));
11821   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
11822 
11823   format %{ $$template
11824     if (UseFastStosb) {
11825        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11826        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11827        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
11828     } else if (UseXMMForObjInit) {
11829        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
11830        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11831        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11832        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11833        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11834        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
11835        $$emit$$"add     0x40,rax\n\t"
11836        $$emit$$"# L_zero_64_bytes:\n\t"
11837        $$emit$$"sub     0x8,rcx\n\t"
11838        $$emit$$"jge     L_loop\n\t"
11839        $$emit$$"add     0x4,rcx\n\t"
11840        $$emit$$"jl      L_tail\n\t"
11841        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11842        $$emit$$"add     0x20,rax\n\t"
11843        $$emit$$"sub     0x4,rcx\n\t"
11844        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11845        $$emit$$"add     0x4,rcx\n\t"
11846        $$emit$$"jle     L_end\n\t"
11847        $$emit$$"dec     rcx\n\t"
11848        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11849        $$emit$$"vmovq   xmm0,(rax)\n\t"
11850        $$emit$$"add     0x8,rax\n\t"
11851        $$emit$$"dec     rcx\n\t"
11852        $$emit$$"jge     L_sloop\n\t"
11853        $$emit$$"# L_end:\n\t"
11854     } else {
11855        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11856        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11857     }
11858   %}
11859   ins_encode %{
11860     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11861                  $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
11862   %}
11863   ins_pipe(pipe_slow);
11864 %}
11865 
11866 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
11867                                        Universe dummy, rFlagsReg cr)

11868 %{
11869   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
11870   match(Set dummy (ClearArray (Binary cnt base) val));
11871   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
11872 
11873   format %{ $$template
11874     if (UseFastStosb) {
11875        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11876        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11877        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
11878     } else if (UseXMMForObjInit) {
11879        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
11880        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11881        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11882        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11883        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11884        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
11885        $$emit$$"add     0x40,rax\n\t"
11886        $$emit$$"# L_zero_64_bytes:\n\t"
11887        $$emit$$"sub     0x8,rcx\n\t"
11888        $$emit$$"jge     L_loop\n\t"
11889        $$emit$$"add     0x4,rcx\n\t"
11890        $$emit$$"jl      L_tail\n\t"
11891        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11892        $$emit$$"add     0x20,rax\n\t"
11893        $$emit$$"sub     0x4,rcx\n\t"
11894        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11895        $$emit$$"add     0x4,rcx\n\t"
11896        $$emit$$"jle     L_end\n\t"
11897        $$emit$$"dec     rcx\n\t"
11898        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11899        $$emit$$"vmovq   xmm0,(rax)\n\t"
11900        $$emit$$"add     0x8,rax\n\t"
11901        $$emit$$"dec     rcx\n\t"
11902        $$emit$$"jge     L_sloop\n\t"
11903        $$emit$$"# L_end:\n\t"
11904     } else {
11905        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11906        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11907     }
11908   %}
11909   ins_encode %{
11910     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11911                  $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
11912   %}
11913   ins_pipe(pipe_slow);
11914 %}
11915 
11916 // Small ClearArray AVX512 constant length.
11917 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
11918 %{
11919   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
11920             ((UseAVX > 2) && VM_Version::supports_avx512vlbw()));
11921   match(Set dummy (ClearArray (Binary cnt base) val));
11922   ins_cost(100);
11923   effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
11924   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
11925   ins_encode %{
11926     __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
11927   %}
11928   ins_pipe(pipe_slow);
11929 %}
11930 
11931 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11932                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
11933 %{
11934   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11935   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11936   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11937 
11938   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11939   ins_encode %{
11940     __ string_compare($str1$$Register, $str2$$Register,
11941                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11942                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
11943   %}
11944   ins_pipe( pipe_slow );
11945 %}
11946 

13710 
13711   ins_cost(300);
13712   format %{ "call_leaf,runtime " %}
13713   ins_encode(clear_avx, Java_To_Runtime(meth));
13714   ins_pipe(pipe_slow);
13715 %}
13716 
13717 // Call runtime without safepoint and with vector arguments
13718 instruct CallLeafDirectVector(method meth)
13719 %{
13720   match(CallLeafVector);
13721   effect(USE meth);
13722 
13723   ins_cost(300);
13724   format %{ "call_leaf,vector " %}
13725   ins_encode(Java_To_Runtime(meth));
13726   ins_pipe(pipe_slow);
13727 %}
13728 
13729 // Call runtime without safepoint
13730 // entry point is null, target holds the address to call
13731 instruct CallLeafNoFPInDirect(rRegP target)
13732 %{
13733   predicate(n->as_Call()->entry_point() == NULL);
13734   match(CallLeafNoFP target);
13735 
13736   ins_cost(300);
13737   format %{ "call_leaf_nofp,runtime indirect " %}
13738   ins_encode %{
13739      __ call($target$$Register);
13740   %}
13741 
13742   ins_pipe(pipe_slow);
13743 %}
13744 
13745 instruct CallLeafNoFPDirect(method meth)
13746 %{
13747   predicate(n->as_Call()->entry_point() != NULL);
13748   match(CallLeafNoFP);
13749   effect(USE meth);
13750 
13751   ins_cost(300);
13752   format %{ "call_leaf_nofp,runtime " %}
13753   ins_encode(clear_avx, Java_To_Runtime(meth));
13754   ins_pipe(pipe_slow);
13755 %}
13756 
13757 // Return Instruction
13758 // Remove the return address & jump to it.
13759 // Notice: We always emit a nop after a ret to make sure there is room
13760 // for safepoint patching
13761 instruct Ret()
13762 %{
13763   match(Return);
13764 
13765   format %{ "ret" %}
13766   ins_encode %{
13767     __ ret(0);
< prev index next >