< prev index next >

src/hotspot/cpu/x86/x86_64.ad

Print this page

  451 }
  452 
  453 // !!!!! Special hack to get all types of calls to specify the byte offset
  454 //       from the start of the call to the point where the return address
  455 //       will point.
  456 int MachCallStaticJavaNode::ret_addr_offset()
  457 {
  458   int offset = 5; // 5 bytes from start of call to where return address points
  459   offset += clear_avx_size();
  460   return offset;
  461 }
  462 
  463 int MachCallDynamicJavaNode::ret_addr_offset()
  464 {
  465   int offset = 15; // 15 bytes from start of call to where return address points
  466   offset += clear_avx_size();
  467   return offset;
  468 }
  469 
  470 int MachCallRuntimeNode::ret_addr_offset() {




  471   int offset = 13; // movq r10,#addr; callq (r10)
  472   if (this->ideal_Opcode() != Op_CallLeafVector) {
  473     offset += clear_avx_size();
  474   }
  475   return offset;
  476 }
  477 
  478 int MachCallNativeNode::ret_addr_offset() {
  479   int offset = 13; // movq r10,#addr; callq (r10)
  480   offset += clear_avx_size();
  481   return offset;
  482 }

  483 //
  484 // Compute padding required for nodes which need alignment
  485 //
  486 
  487 // The address of the call instruction needs to be 4-byte aligned to
  488 // ensure that it does not span a cache line so that it can be patched.
  489 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
  490 {
  491   current_offset += clear_avx_size(); // skip vzeroupper
  492   current_offset += 1; // skip call opcode byte
  493   return align_up(current_offset, alignment_required()) - current_offset;
  494 }
  495 
  496 // The address of the call instruction needs to be 4-byte aligned to
  497 // ensure that it does not span a cache line so that it can be patched.
  498 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
  499 {
  500   current_offset += clear_avx_size(); // skip vzeroupper
  501   current_offset += 11; // skip movq instruction + call opcode byte
  502   return align_up(current_offset, alignment_required()) - current_offset;

  871     st->print("# stack alignment check");
  872 #endif
  873   }
  874   if (C->stub_function() != NULL && BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
  875     st->print("\n\t");
  876     st->print("cmpl    [r15_thread + #disarmed_offset], #disarmed_value\t");
  877     st->print("\n\t");
  878     st->print("je      fast_entry\t");
  879     st->print("\n\t");
  880     st->print("call    #nmethod_entry_barrier_stub\t");
  881     st->print("\n\tfast_entry:");
  882   }
  883   st->cr();
  884 }
  885 #endif
  886 
  887 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  888   Compile* C = ra_->C;
  889   MacroAssembler _masm(&cbuf);
  890 
  891   int framesize = C->output()->frame_size_in_bytes();
  892   int bangsize = C->output()->bang_size_in_bytes();
  893 
  894   if (C->clinit_barrier_on_entry()) {
  895     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
  896     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
  897 
  898     Label L_skip_barrier;
  899     Register klass = rscratch1;
  900 
  901     __ mov_metadata(klass, C->method()->holder()->constant_encoding());
  902     __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/);
  903 
  904     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
  905 
  906     __ bind(L_skip_barrier);
  907   }
  908 
  909   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != NULL);






  910 
  911   C->output()->set_frame_complete(cbuf.insts_size());
  912 
  913   if (C->has_mach_constant_base_node()) {
  914     // NOTE: We set the table base offset here because users might be
  915     // emitted before MachConstantBaseNode.
  916     ConstantTable& constant_table = C->output()->constant_table();
  917     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  918   }
  919 }
  920 
  921 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
  922 {
  923   return MachNode::size(ra_); // too many variables; just compute it
  924                               // the hard way
  925 }
  926 
  927 int MachPrologNode::reloc() const
  928 {
  929   return 0; // a large enough number
  930 }
  931 
  932 //=============================================================================
  933 #ifndef PRODUCT
  934 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
  935 {
  936   Compile* C = ra_->C;
  937   if (generate_vzeroupper(C)) {
  938     st->print("vzeroupper");
  939     st->cr(); st->print("\t");
  940   }
  941 
  942   int framesize = C->output()->frame_size_in_bytes();
  943   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  944   // Remove word for return adr already pushed
  945   // and RBP
  946   framesize -= 2*wordSize;

  954   if (do_polling() && C->is_method_compilation()) {
  955     st->print("\t");
  956     st->print_cr("cmpq     rsp, poll_offset[r15_thread] \n\t"
  957                  "ja       #safepoint_stub\t"
  958                  "# Safepoint: poll for GC");
  959   }
  960 }
  961 #endif
  962 
  963 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
  964 {
  965   Compile* C = ra_->C;
  966   MacroAssembler _masm(&cbuf);
  967 
  968   if (generate_vzeroupper(C)) {
  969     // Clear upper bits of YMM registers when current compiled code uses
  970     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  971     __ vzeroupper();
  972   }
  973 
  974   int framesize = C->output()->frame_size_in_bytes();
  975   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  976   // Remove word for return adr already pushed
  977   // and RBP
  978   framesize -= 2*wordSize;
  979 
  980   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
  981 
  982   if (framesize) {
  983     emit_opcode(cbuf, Assembler::REX_W);
  984     if (framesize < 0x80) {
  985       emit_opcode(cbuf, 0x83); // addq rsp, #framesize
  986       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
  987       emit_d8(cbuf, framesize);
  988     } else {
  989       emit_opcode(cbuf, 0x81); // addq rsp, #framesize
  990       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
  991       emit_d32(cbuf, framesize);
  992     }
  993   }
  994 
  995   // popq rbp
  996   emit_opcode(cbuf, 0x58 | RBP_enc);
  997 
  998   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
  999     __ reserved_stack_check();
 1000   }
 1001 
 1002   if (do_polling() && C->is_method_compilation()) {
 1003     MacroAssembler _masm(&cbuf);
 1004     Label dummy_label;
 1005     Label* code_stub = &dummy_label;
 1006     if (!C->output()->in_scratch_emit_size()) {
 1007       code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset());
 1008     }
 1009     __ relocate(relocInfo::poll_return_type);
 1010     __ safepoint_poll(*code_stub, r15_thread, true /* at_return */, true /* in_nmethod */);
 1011   }
 1012 }
 1013 
 1014 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
 1015 {
 1016   return MachNode::size(ra_); // too many variables; just compute it
 1017                               // the hard way
 1018 }
 1019 
 1020 int MachEpilogNode::reloc() const
 1021 {
 1022   return 2; // a large enough number
 1023 }
 1024 
 1025 const Pipeline* MachEpilogNode::pipeline() const
 1026 {
 1027   return MachNode::pipeline_class();
 1028 }
 1029 
 1030 //=============================================================================
 1031 
 1032 enum RC {
 1033   rc_bad,
 1034   rc_int,
 1035   rc_kreg,
 1036   rc_float,
 1037   rc_stack
 1038 };
 1039 

 1632     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
 1633     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
 1634     emit_rm(cbuf, 0x2, reg & 7, 0x04);
 1635     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
 1636     emit_d32(cbuf, offset);
 1637   } else {
 1638     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
 1639     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
 1640     emit_rm(cbuf, 0x1, reg & 7, 0x04);
 1641     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
 1642     emit_d8(cbuf, offset);
 1643   }
 1644 }
 1645 
 1646 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 1647 {
 1648   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1649   return (offset < 0x80) ? 5 : 8; // REX
 1650 }
 1651 






























 1652 //=============================================================================
 1653 #ifndef PRODUCT
 1654 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1655 {
 1656   if (UseCompressedClassPointers) {
 1657     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 1658     st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
 1659     st->print_cr("\tcmpq    rax, rscratch1\t # Inline cache check");
 1660   } else {
 1661     st->print_cr("\tcmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
 1662                  "# Inline cache check");
 1663   }
 1664   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 1665   st->print_cr("\tnop\t# nops to align entry point");
 1666 }
 1667 #endif
 1668 
 1669 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 1670 {
 1671   MacroAssembler masm(&cbuf);

 1674     masm.load_klass(rscratch1, j_rarg0, rscratch2);
 1675     masm.cmpptr(rax, rscratch1);
 1676   } else {
 1677     masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
 1678   }
 1679 
 1680   masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 1681 
 1682   /* WARNING these NOPs are critical so that verified entry point is properly
 1683      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
 1684   int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
 1685   if (OptoBreakpoint) {
 1686     // Leave space for int3
 1687     nops_cnt -= 1;
 1688   }
 1689   nops_cnt &= 0x3; // Do not add nops if code is aligned.
 1690   if (nops_cnt > 0)
 1691     masm.nop(nops_cnt);
 1692 }
 1693 
 1694 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 1695 {
 1696   return MachNode::size(ra_); // too many variables; just compute it
 1697                               // the hard way
 1698 }
 1699 
 1700 
 1701 //=============================================================================
 1702 
 1703 const bool Matcher::supports_vector_calling_convention(void) {
 1704   if (EnableVectorSupport && UseVectorStubs) {
 1705     return true;
 1706   }
 1707   return false;
 1708 }
 1709 
 1710 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1711   assert(EnableVectorSupport && UseVectorStubs, "sanity");
 1712   int lo = XMM0_num;
 1713   int hi = XMM0b_num;
 1714   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 1715   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 1716   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 1717   return OptoRegPair(hi, lo);
 1718 }
 1719 
 1720 // Is this branch offset short enough that a short branch can be used?

 3958   %}
 3959 %}
 3960 
 3961 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 3962 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 3963 %{
 3964   constraint(ALLOC_IN_RC(ptr_reg));
 3965   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 3966   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 3967 
 3968   op_cost(10);
 3969   format %{"[$reg + $off + $idx << $scale]" %}
 3970   interface(MEMORY_INTER) %{
 3971     base($reg);
 3972     index($idx);
 3973     scale($scale);
 3974     disp($off);
 3975   %}
 3976 %}
 3977 
















 3978 // Indirect Narrow Oop Plus Offset Operand
 3979 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 3980 // we can't free r12 even with CompressedOops::base() == NULL.
 3981 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 3982   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 3983   constraint(ALLOC_IN_RC(ptr_reg));
 3984   match(AddP (DecodeN reg) off);
 3985 
 3986   op_cost(10);
 3987   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 3988   interface(MEMORY_INTER) %{
 3989     base(0xc); // R12
 3990     index($reg);
 3991     scale(0x3);
 3992     disp($off);
 3993   %}
 3994 %}
 3995 
 3996 // Indirect Memory Operand
 3997 operand indirectNarrow(rRegN reg)

 4300     equal(0x4, "e");
 4301     not_equal(0x5, "ne");
 4302     less(0x2, "b");
 4303     greater_equal(0x3, "nb");
 4304     less_equal(0x6, "be");
 4305     greater(0x7, "nbe");
 4306     overflow(0x0, "o");
 4307     no_overflow(0x1, "no");
 4308   %}
 4309 %}
 4310 
 4311 //----------OPERAND CLASSES----------------------------------------------------
 4312 // Operand Classes are groups of operands that are used as to simplify
 4313 // instruction definitions by not requiring the AD writer to specify separate
 4314 // instructions for every form of operand when the instruction accepts
 4315 // multiple operand types with the same basic encoding and format.  The classic
 4316 // case of this is memory operands.
 4317 
 4318 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 4319                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 4320                indCompressedOopOffset,
 4321                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 4322                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 4323                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 4324 
 4325 //----------PIPELINE-----------------------------------------------------------
 4326 // Rules which define the behavior of the target architectures pipeline.
 4327 pipeline %{
 4328 
 4329 //----------ATTRIBUTES---------------------------------------------------------
 4330 attributes %{
 4331   variable_size_instructions;        // Fixed size instructions
 4332   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 4333   instruction_unit_size = 1;         // An instruction is 1 bytes long
 4334   instruction_fetch_unit_size = 16;  // The processor fetches one line
 4335   instruction_fetch_units = 1;       // of 16 bytes
 4336 
 4337   // List of nop instructions
 4338   nops( MachNop );
 4339 %}
 4340 

 6793   format %{ "MEMBAR-storestore (empty encoding)" %}
 6794   ins_encode( );
 6795   ins_pipe(empty);
 6796 %}
 6797 
 6798 //----------Move Instructions--------------------------------------------------
 6799 
 6800 instruct castX2P(rRegP dst, rRegL src)
 6801 %{
 6802   match(Set dst (CastX2P src));
 6803 
 6804   format %{ "movq    $dst, $src\t# long->ptr" %}
 6805   ins_encode %{
 6806     if ($dst$$reg != $src$$reg) {
 6807       __ movptr($dst$$Register, $src$$Register);
 6808     }
 6809   %}
 6810   ins_pipe(ialu_reg_reg); // XXX
 6811 %}
 6812 













 6813 instruct castP2X(rRegL dst, rRegP src)
 6814 %{
 6815   match(Set dst (CastP2X src));
 6816 
 6817   format %{ "movq    $dst, $src\t# ptr -> long" %}
 6818   ins_encode %{
 6819     if ($dst$$reg != $src$$reg) {
 6820       __ movptr($dst$$Register, $src$$Register);
 6821     }
 6822   %}
 6823   ins_pipe(ialu_reg_reg); // XXX
 6824 %}
 6825 
 6826 // Convert oop into int for vectors alignment masking
 6827 instruct convP2I(rRegI dst, rRegP src)
 6828 %{
 6829   match(Set dst (ConvL2I (CastP2X src)));
 6830 
 6831   format %{ "movl    $dst, $src\t# ptr -> int" %}
 6832   ins_encode %{

11023   effect(DEF dst, USE src);
11024   ins_cost(100);
11025   format %{ "movd    $dst,$src\t# MoveI2F" %}
11026   ins_encode %{
11027     __ movdl($dst$$XMMRegister, $src$$Register);
11028   %}
11029   ins_pipe( pipe_slow );
11030 %}
11031 
11032 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
11033   match(Set dst (MoveL2D src));
11034   effect(DEF dst, USE src);
11035   ins_cost(100);
11036   format %{ "movd    $dst,$src\t# MoveL2D" %}
11037   ins_encode %{
11038      __ movdq($dst$$XMMRegister, $src$$Register);
11039   %}
11040   ins_pipe( pipe_slow );
11041 %}
11042 

11043 // Fast clearing of an array
11044 // Small ClearArray non-AVX512.
11045 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
11046                   Universe dummy, rFlagsReg cr)
11047 %{
11048   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
11049   match(Set dummy (ClearArray cnt base));
11050   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);





















































































































11051 
11052   format %{ $$template
11053     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11054     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
11055     $$emit$$"jg      LARGE\n\t"
11056     $$emit$$"dec     rcx\n\t"
11057     $$emit$$"js      DONE\t# Zero length\n\t"
11058     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
11059     $$emit$$"dec     rcx\n\t"
11060     $$emit$$"jge     LOOP\n\t"
11061     $$emit$$"jmp     DONE\n\t"
11062     $$emit$$"# LARGE:\n\t"
11063     if (UseFastStosb) {
11064        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11065        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
11066     } else if (UseXMMForObjInit) {
11067        $$emit$$"mov     rdi,rax\n\t"
11068        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11069        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11070        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

11078        $$emit$$"jl      L_tail\n\t"
11079        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11080        $$emit$$"add     0x20,rax\n\t"
11081        $$emit$$"sub     0x4,rcx\n\t"
11082        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11083        $$emit$$"add     0x4,rcx\n\t"
11084        $$emit$$"jle     L_end\n\t"
11085        $$emit$$"dec     rcx\n\t"
11086        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11087        $$emit$$"vmovq   xmm0,(rax)\n\t"
11088        $$emit$$"add     0x8,rax\n\t"
11089        $$emit$$"dec     rcx\n\t"
11090        $$emit$$"jge     L_sloop\n\t"
11091        $$emit$$"# L_end:\n\t"
11092     } else {
11093        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
11094     }
11095     $$emit$$"# DONE"
11096   %}
11097   ins_encode %{
11098     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11099                  $tmp$$XMMRegister, false, knoreg);
11100   %}
11101   ins_pipe(pipe_slow);
11102 %}
11103 
11104 // Small ClearArray AVX512 non-constant length.
11105 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
11106                        Universe dummy, rFlagsReg cr)
11107 %{
11108   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
11109   match(Set dummy (ClearArray cnt base));
11110   ins_cost(125);
11111   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11112 
11113   format %{ $$template
11114     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11115     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
11116     $$emit$$"jg      LARGE\n\t"
11117     $$emit$$"dec     rcx\n\t"
11118     $$emit$$"js      DONE\t# Zero length\n\t"
11119     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
11120     $$emit$$"dec     rcx\n\t"
11121     $$emit$$"jge     LOOP\n\t"
11122     $$emit$$"jmp     DONE\n\t"
11123     $$emit$$"# LARGE:\n\t"
11124     if (UseFastStosb) {
11125        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11126        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
11127     } else if (UseXMMForObjInit) {
11128        $$emit$$"mov     rdi,rax\n\t"
11129        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11130        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11131        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

11139        $$emit$$"jl      L_tail\n\t"
11140        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11141        $$emit$$"add     0x20,rax\n\t"
11142        $$emit$$"sub     0x4,rcx\n\t"
11143        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11144        $$emit$$"add     0x4,rcx\n\t"
11145        $$emit$$"jle     L_end\n\t"
11146        $$emit$$"dec     rcx\n\t"
11147        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11148        $$emit$$"vmovq   xmm0,(rax)\n\t"
11149        $$emit$$"add     0x8,rax\n\t"
11150        $$emit$$"dec     rcx\n\t"
11151        $$emit$$"jge     L_sloop\n\t"
11152        $$emit$$"# L_end:\n\t"
11153     } else {
11154        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
11155     }
11156     $$emit$$"# DONE"
11157   %}
11158   ins_encode %{
11159     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11160                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
11161   %}
11162   ins_pipe(pipe_slow);
11163 %}
11164 
11165 // Large ClearArray non-AVX512.
11166 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
11167                         Universe dummy, rFlagsReg cr)
11168 %{
11169   predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
11170   match(Set dummy (ClearArray cnt base));
11171   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
































































































11172 
11173   format %{ $$template
11174     if (UseFastStosb) {
11175        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11176        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11177        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
11178     } else if (UseXMMForObjInit) {
11179        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
11180        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11181        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11182        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11183        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11184        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
11185        $$emit$$"add     0x40,rax\n\t"
11186        $$emit$$"# L_zero_64_bytes:\n\t"
11187        $$emit$$"sub     0x8,rcx\n\t"
11188        $$emit$$"jge     L_loop\n\t"
11189        $$emit$$"add     0x4,rcx\n\t"
11190        $$emit$$"jl      L_tail\n\t"
11191        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11192        $$emit$$"add     0x20,rax\n\t"
11193        $$emit$$"sub     0x4,rcx\n\t"
11194        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11195        $$emit$$"add     0x4,rcx\n\t"
11196        $$emit$$"jle     L_end\n\t"
11197        $$emit$$"dec     rcx\n\t"
11198        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11199        $$emit$$"vmovq   xmm0,(rax)\n\t"
11200        $$emit$$"add     0x8,rax\n\t"
11201        $$emit$$"dec     rcx\n\t"
11202        $$emit$$"jge     L_sloop\n\t"
11203        $$emit$$"# L_end:\n\t"
11204     } else {
11205        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11206        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11207     }
11208   %}
11209   ins_encode %{
11210     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11211                  $tmp$$XMMRegister, true, knoreg);
11212   %}
11213   ins_pipe(pipe_slow);
11214 %}
11215 
11216 // Large ClearArray AVX512.
11217 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
11218                              Universe dummy, rFlagsReg cr)
11219 %{
11220   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
11221   match(Set dummy (ClearArray cnt base));
11222   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11223 
11224   format %{ $$template
11225     if (UseFastStosb) {
11226        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11227        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11228        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
11229     } else if (UseXMMForObjInit) {
11230        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
11231        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11232        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11233        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11234        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11235        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
11236        $$emit$$"add     0x40,rax\n\t"
11237        $$emit$$"# L_zero_64_bytes:\n\t"
11238        $$emit$$"sub     0x8,rcx\n\t"
11239        $$emit$$"jge     L_loop\n\t"
11240        $$emit$$"add     0x4,rcx\n\t"
11241        $$emit$$"jl      L_tail\n\t"
11242        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11243        $$emit$$"add     0x20,rax\n\t"
11244        $$emit$$"sub     0x4,rcx\n\t"
11245        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11246        $$emit$$"add     0x4,rcx\n\t"
11247        $$emit$$"jle     L_end\n\t"
11248        $$emit$$"dec     rcx\n\t"
11249        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11250        $$emit$$"vmovq   xmm0,(rax)\n\t"
11251        $$emit$$"add     0x8,rax\n\t"
11252        $$emit$$"dec     rcx\n\t"
11253        $$emit$$"jge     L_sloop\n\t"
11254        $$emit$$"# L_end:\n\t"
11255     } else {
11256        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11257        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11258     }
11259   %}
11260   ins_encode %{
11261     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11262                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
11263   %}
11264   ins_pipe(pipe_slow);
11265 %}
11266 
11267 // Small ClearArray AVX512 constant length.
11268 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
11269 %{
11270   predicate(!((ClearArrayNode*)n)->is_large() &&
11271               ((UseAVX > 2) && VM_Version::supports_avx512vlbw()));
11272   match(Set dummy (ClearArray cnt base));
11273   ins_cost(100);
11274   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
11275   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
11276   ins_encode %{
11277    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
11278   %}
11279   ins_pipe(pipe_slow);
11280 %}
11281 
11282 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11283                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
11284 %{
11285   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11286   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11287   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11288 
11289   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11290   ins_encode %{
11291     __ string_compare($str1$$Register, $str2$$Register,
11292                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11293                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
11294   %}
11295   ins_pipe( pipe_slow );
11296 %}
11297 

13062 
13063   ins_cost(300);
13064   format %{ "call_leaf,vector " %}
13065   ins_encode(Java_To_Runtime(meth));
13066   ins_pipe(pipe_slow);
13067 %}
13068 
13069 //
13070 instruct CallNativeDirect(method meth)
13071 %{
13072   match(CallNative);
13073   effect(USE meth);
13074 
13075   ins_cost(300);
13076   format %{ "call_native " %}
13077   ins_encode(clear_avx, Java_To_Runtime(meth));
13078   ins_pipe(pipe_slow);
13079 %}
13080 
13081 // Call runtime without safepoint















13082 instruct CallLeafNoFPDirect(method meth)
13083 %{

13084   match(CallLeafNoFP);
13085   effect(USE meth);
13086 
13087   ins_cost(300);
13088   format %{ "call_leaf_nofp,runtime " %}
13089   ins_encode(clear_avx, Java_To_Runtime(meth));
13090   ins_pipe(pipe_slow);
13091 %}
13092 
13093 // Return Instruction
13094 // Remove the return address & jump to it.
13095 // Notice: We always emit a nop after a ret to make sure there is room
13096 // for safepoint patching
13097 instruct Ret()
13098 %{
13099   match(Return);
13100 
13101   format %{ "ret" %}
13102   ins_encode %{
13103     __ ret(0);

  451 }
  452 
  453 // !!!!! Special hack to get all types of calls to specify the byte offset
  454 //       from the start of the call to the point where the return address
  455 //       will point.
  456 int MachCallStaticJavaNode::ret_addr_offset()
  457 {
  458   int offset = 5; // 5 bytes from start of call to where return address points
  459   offset += clear_avx_size();
  460   return offset;
  461 }
  462 
  463 int MachCallDynamicJavaNode::ret_addr_offset()
  464 {
  465   int offset = 15; // 15 bytes from start of call to where return address points
  466   offset += clear_avx_size();
  467   return offset;
  468 }
  469 
  470 int MachCallRuntimeNode::ret_addr_offset() {
  471   if (_entry_point == NULL) {
  472     // CallLeafNoFPInDirect
  473     return 3; // callq (register)
  474   }
  475   int offset = 13; // movq r10,#addr; callq (r10)
  476   if (this->ideal_Opcode() != Op_CallLeafVector) {
  477     offset += clear_avx_size();
  478   }
  479   return offset;
  480 }
  481 
  482 int MachCallNativeNode::ret_addr_offset() {
  483   int offset = 13; // movq r10,#addr; callq (r10)
  484   offset += clear_avx_size();
  485   return offset;
  486 }
  487 
  488 //
  489 // Compute padding required for nodes which need alignment
  490 //
  491 
  492 // The address of the call instruction needs to be 4-byte aligned to
  493 // ensure that it does not span a cache line so that it can be patched.
  494 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
  495 {
  496   current_offset += clear_avx_size(); // skip vzeroupper
  497   current_offset += 1; // skip call opcode byte
  498   return align_up(current_offset, alignment_required()) - current_offset;
  499 }
  500 
  501 // The address of the call instruction needs to be 4-byte aligned to
  502 // ensure that it does not span a cache line so that it can be patched.
  503 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
  504 {
  505   current_offset += clear_avx_size(); // skip vzeroupper
  506   current_offset += 11; // skip movq instruction + call opcode byte
  507   return align_up(current_offset, alignment_required()) - current_offset;

  876     st->print("# stack alignment check");
  877 #endif
  878   }
  879   if (C->stub_function() != NULL && BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
  880     st->print("\n\t");
  881     st->print("cmpl    [r15_thread + #disarmed_offset], #disarmed_value\t");
  882     st->print("\n\t");
  883     st->print("je      fast_entry\t");
  884     st->print("\n\t");
  885     st->print("call    #nmethod_entry_barrier_stub\t");
  886     st->print("\n\tfast_entry:");
  887   }
  888   st->cr();
  889 }
  890 #endif
  891 
  892 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  893   Compile* C = ra_->C;
  894   MacroAssembler _masm(&cbuf);
  895 



  896   if (C->clinit_barrier_on_entry()) {
  897     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
  898     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
  899 
  900     Label L_skip_barrier;
  901     Register klass = rscratch1;
  902 
  903     __ mov_metadata(klass, C->method()->holder()->constant_encoding());
  904     __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/);
  905 
  906     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
  907 
  908     __ bind(L_skip_barrier);
  909   }
  910 
  911   __ verified_entry(C);
  912   __ bind(*_verified_entry);
  913 
  914   if (C->stub_function() == NULL) {
  915     BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
  916     bs->nmethod_entry_barrier(&_masm);
  917   }
  918 
  919   C->output()->set_frame_complete(cbuf.insts_size());
  920 
  921   if (C->has_mach_constant_base_node()) {
  922     // NOTE: We set the table base offset here because users might be
  923     // emitted before MachConstantBaseNode.
  924     ConstantTable& constant_table = C->output()->constant_table();
  925     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  926   }
  927 }
  928 






  929 int MachPrologNode::reloc() const
  930 {
  931   return 0; // a large enough number
  932 }
  933 
  934 //=============================================================================
  935 #ifndef PRODUCT
  936 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
  937 {
  938   Compile* C = ra_->C;
  939   if (generate_vzeroupper(C)) {
  940     st->print("vzeroupper");
  941     st->cr(); st->print("\t");
  942   }
  943 
  944   int framesize = C->output()->frame_size_in_bytes();
  945   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  946   // Remove word for return adr already pushed
  947   // and RBP
  948   framesize -= 2*wordSize;

  956   if (do_polling() && C->is_method_compilation()) {
  957     st->print("\t");
  958     st->print_cr("cmpq     rsp, poll_offset[r15_thread] \n\t"
  959                  "ja       #safepoint_stub\t"
  960                  "# Safepoint: poll for GC");
  961   }
  962 }
  963 #endif
  964 
  965 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
  966 {
  967   Compile* C = ra_->C;
  968   MacroAssembler _masm(&cbuf);
  969 
  970   if (generate_vzeroupper(C)) {
  971     // Clear upper bits of YMM registers when current compiled code uses
  972     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  973     __ vzeroupper();
  974   }
  975 
  976   // Subtract two words to account for return address and rbp
  977   int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
  978   __ remove_frame(initial_framesize, C->needs_stack_repair());




















  979 
  980   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
  981     __ reserved_stack_check();
  982   }
  983 
  984   if (do_polling() && C->is_method_compilation()) {
  985     MacroAssembler _masm(&cbuf);
  986     Label dummy_label;
  987     Label* code_stub = &dummy_label;
  988     if (!C->output()->in_scratch_emit_size()) {
  989       code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset());
  990     }
  991     __ relocate(relocInfo::poll_return_type);
  992     __ safepoint_poll(*code_stub, r15_thread, true /* at_return */, true /* in_nmethod */);
  993   }
  994 }
  995 






  996 int MachEpilogNode::reloc() const
  997 {
  998   return 2; // a large enough number
  999 }
 1000 
 1001 const Pipeline* MachEpilogNode::pipeline() const
 1002 {
 1003   return MachNode::pipeline_class();
 1004 }
 1005 
 1006 //=============================================================================
 1007 
 1008 enum RC {
 1009   rc_bad,
 1010   rc_int,
 1011   rc_kreg,
 1012   rc_float,
 1013   rc_stack
 1014 };
 1015 

 1608     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
 1609     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
 1610     emit_rm(cbuf, 0x2, reg & 7, 0x04);
 1611     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
 1612     emit_d32(cbuf, offset);
 1613   } else {
 1614     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
 1615     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
 1616     emit_rm(cbuf, 0x1, reg & 7, 0x04);
 1617     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
 1618     emit_d8(cbuf, offset);
 1619   }
 1620 }
 1621 
 1622 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 1623 {
 1624   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1625   return (offset < 0x80) ? 5 : 8; // REX
 1626 }
 1627 
 1628 //=============================================================================
 1629 #ifndef PRODUCT
 1630 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1631 {
 1632   st->print_cr("MachVEPNode");
 1633 }
 1634 #endif
 1635 
 1636 void MachVEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 1637 {
 1638   MacroAssembler _masm(&cbuf);
 1639   if (!_verified) {
 1640     uint insts_size = cbuf.insts_size();
 1641     if (UseCompressedClassPointers) {
 1642       __ load_klass(rscratch1, j_rarg0, rscratch2);
 1643       __ cmpptr(rax, rscratch1);
 1644     } else {
 1645       __ cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
 1646     }
 1647     __ jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 1648   } else {
 1649     // Unpack inline type args passed as oop and then jump to
 1650     // the verified entry point (skipping the unverified entry).
 1651     int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
 1652     // Emit code for verified entry and save increment for stack repair on return
 1653     __ verified_entry(ra_->C, sp_inc);
 1654     __ jmp(*_verified_entry);
 1655   }
 1656 }
 1657 
 1658 //=============================================================================
 1659 #ifndef PRODUCT
 1660 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1661 {
 1662   if (UseCompressedClassPointers) {
 1663     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 1664     st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
 1665     st->print_cr("\tcmpq    rax, rscratch1\t # Inline cache check");
 1666   } else {
 1667     st->print_cr("\tcmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
 1668                  "# Inline cache check");
 1669   }
 1670   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 1671   st->print_cr("\tnop\t# nops to align entry point");
 1672 }
 1673 #endif
 1674 
 1675 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 1676 {
 1677   MacroAssembler masm(&cbuf);

 1680     masm.load_klass(rscratch1, j_rarg0, rscratch2);
 1681     masm.cmpptr(rax, rscratch1);
 1682   } else {
 1683     masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
 1684   }
 1685 
 1686   masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 1687 
 1688   /* WARNING these NOPs are critical so that verified entry point is properly
 1689      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
 1690   int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
 1691   if (OptoBreakpoint) {
 1692     // Leave space for int3
 1693     nops_cnt -= 1;
 1694   }
 1695   nops_cnt &= 0x3; // Do not add nops if code is aligned.
 1696   if (nops_cnt > 0)
 1697     masm.nop(nops_cnt);
 1698 }
 1699 







 1700 //=============================================================================
 1701 
 1702 const bool Matcher::supports_vector_calling_convention(void) {
 1703   if (EnableVectorSupport && UseVectorStubs) {
 1704     return true;
 1705   }
 1706   return false;
 1707 }
 1708 
 1709 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1710   assert(EnableVectorSupport && UseVectorStubs, "sanity");
 1711   int lo = XMM0_num;
 1712   int hi = XMM0b_num;
 1713   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 1714   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 1715   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 1716   return OptoRegPair(hi, lo);
 1717 }
 1718 
 1719 // Is this branch offset short enough that a short branch can be used?

 3957   %}
 3958 %}
 3959 
 3960 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 3961 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 3962 %{
 3963   constraint(ALLOC_IN_RC(ptr_reg));
 3964   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 3965   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 3966 
 3967   op_cost(10);
 3968   format %{"[$reg + $off + $idx << $scale]" %}
 3969   interface(MEMORY_INTER) %{
 3970     base($reg);
 3971     index($idx);
 3972     scale($scale);
 3973     disp($off);
 3974   %}
 3975 %}
 3976 
 3977 // Indirect Narrow Oop Operand
 3978 operand indCompressedOop(rRegN reg) %{
 3979   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 3980   constraint(ALLOC_IN_RC(ptr_reg));
 3981   match(DecodeN reg);
 3982 
 3983   op_cost(10);
 3984   format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
 3985   interface(MEMORY_INTER) %{
 3986     base(0xc); // R12
 3987     index($reg);
 3988     scale(0x3);
 3989     disp(0x0);
 3990   %}
 3991 %}
 3992 
 3993 // Indirect Narrow Oop Plus Offset Operand
 3994 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 3995 // we can't free r12 even with CompressedOops::base() == NULL.
 3996 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 3997   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 3998   constraint(ALLOC_IN_RC(ptr_reg));
 3999   match(AddP (DecodeN reg) off);
 4000 
 4001   op_cost(10);
 4002   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 4003   interface(MEMORY_INTER) %{
 4004     base(0xc); // R12
 4005     index($reg);
 4006     scale(0x3);
 4007     disp($off);
 4008   %}
 4009 %}
 4010 
 4011 // Indirect Memory Operand
 4012 operand indirectNarrow(rRegN reg)

 4315     equal(0x4, "e");
 4316     not_equal(0x5, "ne");
 4317     less(0x2, "b");
 4318     greater_equal(0x3, "nb");
 4319     less_equal(0x6, "be");
 4320     greater(0x7, "nbe");
 4321     overflow(0x0, "o");
 4322     no_overflow(0x1, "no");
 4323   %}
 4324 %}
 4325 
 4326 //----------OPERAND CLASSES----------------------------------------------------
 4327 // Operand Classes are groups of operands that are used as to simplify
 4328 // instruction definitions by not requiring the AD writer to specify separate
 4329 // instructions for every form of operand when the instruction accepts
 4330 // multiple operand types with the same basic encoding and format.  The classic
 4331 // case of this is memory operands.
 4332 
 4333 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 4334                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 4335                indCompressedOop, indCompressedOopOffset,
 4336                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 4337                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 4338                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 4339 
 4340 //----------PIPELINE-----------------------------------------------------------
 4341 // Rules which define the behavior of the target architectures pipeline.
 4342 pipeline %{
 4343 
 4344 //----------ATTRIBUTES---------------------------------------------------------
 4345 attributes %{
 4346   variable_size_instructions;        // Fixed size instructions
 4347   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 4348   instruction_unit_size = 1;         // An instruction is 1 bytes long
 4349   instruction_fetch_unit_size = 16;  // The processor fetches one line
 4350   instruction_fetch_units = 1;       // of 16 bytes
 4351 
 4352   // List of nop instructions
 4353   nops( MachNop );
 4354 %}
 4355 

 6808   format %{ "MEMBAR-storestore (empty encoding)" %}
 6809   ins_encode( );
 6810   ins_pipe(empty);
 6811 %}
 6812 
 6813 //----------Move Instructions--------------------------------------------------
 6814 
 6815 instruct castX2P(rRegP dst, rRegL src)
 6816 %{
 6817   match(Set dst (CastX2P src));
 6818 
 6819   format %{ "movq    $dst, $src\t# long->ptr" %}
 6820   ins_encode %{
 6821     if ($dst$$reg != $src$$reg) {
 6822       __ movptr($dst$$Register, $src$$Register);
 6823     }
 6824   %}
 6825   ins_pipe(ialu_reg_reg); // XXX
 6826 %}
 6827 
 6828 instruct castN2X(rRegL dst, rRegN src)
 6829 %{
 6830   match(Set dst (CastP2X src));
 6831 
 6832   format %{ "movq    $dst, $src\t# ptr -> long" %}
 6833   ins_encode %{
 6834     if ($dst$$reg != $src$$reg) {
 6835       __ movptr($dst$$Register, $src$$Register);
 6836     }
 6837   %}
 6838   ins_pipe(ialu_reg_reg); // XXX
 6839 %}
 6840 
 6841 instruct castP2X(rRegL dst, rRegP src)
 6842 %{
 6843   match(Set dst (CastP2X src));
 6844 
 6845   format %{ "movq    $dst, $src\t# ptr -> long" %}
 6846   ins_encode %{
 6847     if ($dst$$reg != $src$$reg) {
 6848       __ movptr($dst$$Register, $src$$Register);
 6849     }
 6850   %}
 6851   ins_pipe(ialu_reg_reg); // XXX
 6852 %}
 6853 
 6854 // Convert oop into int for vectors alignment masking
 6855 instruct convP2I(rRegI dst, rRegP src)
 6856 %{
 6857   match(Set dst (ConvL2I (CastP2X src)));
 6858 
 6859   format %{ "movl    $dst, $src\t# ptr -> int" %}
 6860   ins_encode %{

11051   effect(DEF dst, USE src);
11052   ins_cost(100);
11053   format %{ "movd    $dst,$src\t# MoveI2F" %}
11054   ins_encode %{
11055     __ movdl($dst$$XMMRegister, $src$$Register);
11056   %}
11057   ins_pipe( pipe_slow );
11058 %}
11059 
11060 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
11061   match(Set dst (MoveL2D src));
11062   effect(DEF dst, USE src);
11063   ins_cost(100);
11064   format %{ "movd    $dst,$src\t# MoveL2D" %}
11065   ins_encode %{
11066      __ movdq($dst$$XMMRegister, $src$$Register);
11067   %}
11068   ins_pipe( pipe_slow );
11069 %}
11070 
11071 
11072 // Fast clearing of an array
11073 // Small ClearArray non-AVX512.
11074 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
11075                   Universe dummy, rFlagsReg cr)
11076 %{
11077   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
11078   match(Set dummy (ClearArray (Binary cnt base) val));
11079   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
11080 
11081   format %{ $$template
11082     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
11083     $$emit$$"jg      LARGE\n\t"
11084     $$emit$$"dec     rcx\n\t"
11085     $$emit$$"js      DONE\t# Zero length\n\t"
11086     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
11087     $$emit$$"dec     rcx\n\t"
11088     $$emit$$"jge     LOOP\n\t"
11089     $$emit$$"jmp     DONE\n\t"
11090     $$emit$$"# LARGE:\n\t"
11091     if (UseFastStosb) {
11092        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11093        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
11094     } else if (UseXMMForObjInit) {
11095        $$emit$$"movdq   $tmp, $val\n\t"
11096        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
11097        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
11098        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11099        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11100        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11101        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
11102        $$emit$$"add     0x40,rax\n\t"
11103        $$emit$$"# L_zero_64_bytes:\n\t"
11104        $$emit$$"sub     0x8,rcx\n\t"
11105        $$emit$$"jge     L_loop\n\t"
11106        $$emit$$"add     0x4,rcx\n\t"
11107        $$emit$$"jl      L_tail\n\t"
11108        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11109        $$emit$$"add     0x20,rax\n\t"
11110        $$emit$$"sub     0x4,rcx\n\t"
11111        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11112        $$emit$$"add     0x4,rcx\n\t"
11113        $$emit$$"jle     L_end\n\t"
11114        $$emit$$"dec     rcx\n\t"
11115        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11116        $$emit$$"vmovq   xmm0,(rax)\n\t"
11117        $$emit$$"add     0x8,rax\n\t"
11118        $$emit$$"dec     rcx\n\t"
11119        $$emit$$"jge     L_sloop\n\t"
11120        $$emit$$"# L_end:\n\t"
11121     } else {
11122        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
11123     }
11124     $$emit$$"# DONE"
11125   %}
11126   ins_encode %{
11127     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11128                  $tmp$$XMMRegister, false, false);
11129   %}
11130   ins_pipe(pipe_slow);
11131 %}
11132 
11133 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
11134                             Universe dummy, rFlagsReg cr)
11135 %{
11136   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
11137   match(Set dummy (ClearArray (Binary cnt base) val));
11138   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
11139 
11140   format %{ $$template
11141     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
11142     $$emit$$"jg      LARGE\n\t"
11143     $$emit$$"dec     rcx\n\t"
11144     $$emit$$"js      DONE\t# Zero length\n\t"
11145     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
11146     $$emit$$"dec     rcx\n\t"
11147     $$emit$$"jge     LOOP\n\t"
11148     $$emit$$"jmp     DONE\n\t"
11149     $$emit$$"# LARGE:\n\t"
11150     if (UseXMMForObjInit) {
11151        $$emit$$"movdq   $tmp, $val\n\t"
11152        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
11153        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
11154        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11155        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11156        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11157        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
11158        $$emit$$"add     0x40,rax\n\t"
11159        $$emit$$"# L_zero_64_bytes:\n\t"
11160        $$emit$$"sub     0x8,rcx\n\t"
11161        $$emit$$"jge     L_loop\n\t"
11162        $$emit$$"add     0x4,rcx\n\t"
11163        $$emit$$"jl      L_tail\n\t"
11164        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11165        $$emit$$"add     0x20,rax\n\t"
11166        $$emit$$"sub     0x4,rcx\n\t"
11167        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11168        $$emit$$"add     0x4,rcx\n\t"
11169        $$emit$$"jle     L_end\n\t"
11170        $$emit$$"dec     rcx\n\t"
11171        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11172        $$emit$$"vmovq   xmm0,(rax)\n\t"
11173        $$emit$$"add     0x8,rax\n\t"
11174        $$emit$$"dec     rcx\n\t"
11175        $$emit$$"jge     L_sloop\n\t"
11176        $$emit$$"# L_end:\n\t"
11177     } else {
11178        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
11179     }
11180     $$emit$$"# DONE"
11181   %}
11182   ins_encode %{
11183     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11184                  $tmp$$XMMRegister, false, true);
11185   %}
11186   ins_pipe(pipe_slow);
11187 %}
11188 
11189 // Small ClearArray AVX512 non-constant length.
11190 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
11191                        Universe dummy, rFlagsReg cr)
11192 %{
11193   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
11194   match(Set dummy (ClearArray (Binary cnt base) val));
11195   ins_cost(125);
11196   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
11197 
11198   format %{ $$template
11199     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11200     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
11201     $$emit$$"jg      LARGE\n\t"
11202     $$emit$$"dec     rcx\n\t"
11203     $$emit$$"js      DONE\t# Zero length\n\t"
11204     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
11205     $$emit$$"dec     rcx\n\t"
11206     $$emit$$"jge     LOOP\n\t"
11207     $$emit$$"jmp     DONE\n\t"
11208     $$emit$$"# LARGE:\n\t"
11209     if (UseFastStosb) {
11210        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11211        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
11212     } else if (UseXMMForObjInit) {
11213        $$emit$$"mov     rdi,rax\n\t"
11214        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11215        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11216        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

11224        $$emit$$"jl      L_tail\n\t"
11225        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11226        $$emit$$"add     0x20,rax\n\t"
11227        $$emit$$"sub     0x4,rcx\n\t"
11228        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11229        $$emit$$"add     0x4,rcx\n\t"
11230        $$emit$$"jle     L_end\n\t"
11231        $$emit$$"dec     rcx\n\t"
11232        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11233        $$emit$$"vmovq   xmm0,(rax)\n\t"
11234        $$emit$$"add     0x8,rax\n\t"
11235        $$emit$$"dec     rcx\n\t"
11236        $$emit$$"jge     L_sloop\n\t"
11237        $$emit$$"# L_end:\n\t"
11238     } else {
11239        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
11240     }
11241     $$emit$$"# DONE"
11242   %}
11243   ins_encode %{
11244     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11245                  $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
11246   %}
11247   ins_pipe(pipe_slow);
11248 %}
11249 
11250 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
11251                                  Universe dummy, rFlagsReg cr)

11252 %{
11253   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
11254   match(Set dummy (ClearArray (Binary cnt base) val));
11255   ins_cost(125);
11256   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
11257 
11258   format %{ $$template
11259     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11260     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
11261     $$emit$$"jg      LARGE\n\t"
11262     $$emit$$"dec     rcx\n\t"
11263     $$emit$$"js      DONE\t# Zero length\n\t"
11264     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
11265     $$emit$$"dec     rcx\n\t"
11266     $$emit$$"jge     LOOP\n\t"
11267     $$emit$$"jmp     DONE\n\t"
11268     $$emit$$"# LARGE:\n\t"
11269     if (UseFastStosb) {
11270        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11271        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
11272     } else if (UseXMMForObjInit) {
11273        $$emit$$"mov     rdi,rax\n\t"
11274        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11275        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11276        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

11284        $$emit$$"jl      L_tail\n\t"
11285        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11286        $$emit$$"add     0x20,rax\n\t"
11287        $$emit$$"sub     0x4,rcx\n\t"
11288        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11289        $$emit$$"add     0x4,rcx\n\t"
11290        $$emit$$"jle     L_end\n\t"
11291        $$emit$$"dec     rcx\n\t"
11292        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11293        $$emit$$"vmovq   xmm0,(rax)\n\t"
11294        $$emit$$"add     0x8,rax\n\t"
11295        $$emit$$"dec     rcx\n\t"
11296        $$emit$$"jge     L_sloop\n\t"
11297        $$emit$$"# L_end:\n\t"
11298     } else {
11299        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
11300     }
11301     $$emit$$"# DONE"
11302   %}
11303   ins_encode %{
11304     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11305                  $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
11306   %}
11307   ins_pipe(pipe_slow);
11308 %}
11309 
11310 // Large ClearArray non-AVX512.
11311 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
11312                         Universe dummy, rFlagsReg cr)
11313 %{
11314   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
11315   match(Set dummy (ClearArray (Binary cnt base) val));
11316   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
11317 
11318   format %{ $$template
11319     if (UseFastStosb) {
11320        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11321        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
11322     } else if (UseXMMForObjInit) {
11323        $$emit$$"movdq   $tmp, $val\n\t"
11324        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
11325        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
11326        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11327        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11328        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11329        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
11330        $$emit$$"add     0x40,rax\n\t"
11331        $$emit$$"# L_zero_64_bytes:\n\t"
11332        $$emit$$"sub     0x8,rcx\n\t"
11333        $$emit$$"jge     L_loop\n\t"
11334        $$emit$$"add     0x4,rcx\n\t"
11335        $$emit$$"jl      L_tail\n\t"
11336        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11337        $$emit$$"add     0x20,rax\n\t"
11338        $$emit$$"sub     0x4,rcx\n\t"
11339        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11340        $$emit$$"add     0x4,rcx\n\t"
11341        $$emit$$"jle     L_end\n\t"
11342        $$emit$$"dec     rcx\n\t"
11343        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11344        $$emit$$"vmovq   xmm0,(rax)\n\t"
11345        $$emit$$"add     0x8,rax\n\t"
11346        $$emit$$"dec     rcx\n\t"
11347        $$emit$$"jge     L_sloop\n\t"
11348        $$emit$$"# L_end:\n\t"
11349     } else {
11350        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11351     }
11352   %}
11353   ins_encode %{
11354     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11355                  $tmp$$XMMRegister, true, false);
11356   %}
11357   ins_pipe(pipe_slow);
11358 %}
11359 
11360 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
11361                                   Universe dummy, rFlagsReg cr)
11362 %{
11363   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
11364   match(Set dummy (ClearArray (Binary cnt base) val));
11365   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
11366 
11367   format %{ $$template
11368     if (UseXMMForObjInit) {
11369        $$emit$$"movdq   $tmp, $val\n\t"
11370        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
11371        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
11372        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11373        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11374        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11375        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
11376        $$emit$$"add     0x40,rax\n\t"
11377        $$emit$$"# L_zero_64_bytes:\n\t"
11378        $$emit$$"sub     0x8,rcx\n\t"
11379        $$emit$$"jge     L_loop\n\t"
11380        $$emit$$"add     0x4,rcx\n\t"
11381        $$emit$$"jl      L_tail\n\t"
11382        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11383        $$emit$$"add     0x20,rax\n\t"
11384        $$emit$$"sub     0x4,rcx\n\t"
11385        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11386        $$emit$$"add     0x4,rcx\n\t"
11387        $$emit$$"jle     L_end\n\t"
11388        $$emit$$"dec     rcx\n\t"
11389        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11390        $$emit$$"vmovq   xmm0,(rax)\n\t"
11391        $$emit$$"add     0x8,rax\n\t"
11392        $$emit$$"dec     rcx\n\t"
11393        $$emit$$"jge     L_sloop\n\t"
11394        $$emit$$"# L_end:\n\t"
11395     } else {
11396        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11397     }
11398   %}
11399   ins_encode %{
11400     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11401                  $tmp$$XMMRegister, true, true);
11402   %}
11403   ins_pipe(pipe_slow);
11404 %}
11405 
11406 // Large ClearArray AVX512.
11407 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
11408                              Universe dummy, rFlagsReg cr)
11409 %{
11410   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
11411   match(Set dummy (ClearArray (Binary cnt base) val));
11412   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
11413 
11414   format %{ $$template
11415     if (UseFastStosb) {
11416        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11417        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11418        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
11419     } else if (UseXMMForObjInit) {
11420        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
11421        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11422        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11423        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11424        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11425        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
11426        $$emit$$"add     0x40,rax\n\t"
11427        $$emit$$"# L_zero_64_bytes:\n\t"
11428        $$emit$$"sub     0x8,rcx\n\t"
11429        $$emit$$"jge     L_loop\n\t"
11430        $$emit$$"add     0x4,rcx\n\t"
11431        $$emit$$"jl      L_tail\n\t"
11432        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11433        $$emit$$"add     0x20,rax\n\t"
11434        $$emit$$"sub     0x4,rcx\n\t"
11435        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11436        $$emit$$"add     0x4,rcx\n\t"
11437        $$emit$$"jle     L_end\n\t"
11438        $$emit$$"dec     rcx\n\t"
11439        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11440        $$emit$$"vmovq   xmm0,(rax)\n\t"
11441        $$emit$$"add     0x8,rax\n\t"
11442        $$emit$$"dec     rcx\n\t"
11443        $$emit$$"jge     L_sloop\n\t"
11444        $$emit$$"# L_end:\n\t"
11445     } else {
11446        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11447        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11448     }
11449   %}
11450   ins_encode %{
11451     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11452                  $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
11453   %}
11454   ins_pipe(pipe_slow);
11455 %}
11456 
11457 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
11458                                        Universe dummy, rFlagsReg cr)

11459 %{
11460   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
11461   match(Set dummy (ClearArray (Binary cnt base) val));
11462   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
11463 
11464   format %{ $$template
11465     if (UseFastStosb) {
11466        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11467        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11468        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
11469     } else if (UseXMMForObjInit) {
11470        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
11471        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11472        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11473        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11474        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11475        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
11476        $$emit$$"add     0x40,rax\n\t"
11477        $$emit$$"# L_zero_64_bytes:\n\t"
11478        $$emit$$"sub     0x8,rcx\n\t"
11479        $$emit$$"jge     L_loop\n\t"
11480        $$emit$$"add     0x4,rcx\n\t"
11481        $$emit$$"jl      L_tail\n\t"
11482        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11483        $$emit$$"add     0x20,rax\n\t"
11484        $$emit$$"sub     0x4,rcx\n\t"
11485        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11486        $$emit$$"add     0x4,rcx\n\t"
11487        $$emit$$"jle     L_end\n\t"
11488        $$emit$$"dec     rcx\n\t"
11489        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11490        $$emit$$"vmovq   xmm0,(rax)\n\t"
11491        $$emit$$"add     0x8,rax\n\t"
11492        $$emit$$"dec     rcx\n\t"
11493        $$emit$$"jge     L_sloop\n\t"
11494        $$emit$$"# L_end:\n\t"
11495     } else {
11496        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11497        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11498     }
11499   %}
11500   ins_encode %{
11501     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11502                  $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
11503   %}
11504   ins_pipe(pipe_slow);
11505 %}
11506 
11507 // Small ClearArray AVX512 constant length.
11508 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
11509 %{
11510   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
11511             ((UseAVX > 2) && VM_Version::supports_avx512vlbw()));
11512   match(Set dummy (ClearArray (Binary cnt base) val));
11513   ins_cost(100);
11514   effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
11515   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
11516   ins_encode %{
11517     __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
11518   %}
11519   ins_pipe(pipe_slow);
11520 %}
11521 
11522 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11523                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
11524 %{
11525   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11526   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11527   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11528 
11529   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11530   ins_encode %{
11531     __ string_compare($str1$$Register, $str2$$Register,
11532                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11533                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
11534   %}
11535   ins_pipe( pipe_slow );
11536 %}
11537 

13302 
13303   ins_cost(300);
13304   format %{ "call_leaf,vector " %}
13305   ins_encode(Java_To_Runtime(meth));
13306   ins_pipe(pipe_slow);
13307 %}
13308 
13309 //
13310 instruct CallNativeDirect(method meth)
13311 %{
13312   match(CallNative);
13313   effect(USE meth);
13314 
13315   ins_cost(300);
13316   format %{ "call_native " %}
13317   ins_encode(clear_avx, Java_To_Runtime(meth));
13318   ins_pipe(pipe_slow);
13319 %}
13320 
13321 // Call runtime without safepoint
13322 // entry point is null, target holds the address to call
13323 instruct CallLeafNoFPInDirect(rRegP target)
13324 %{
13325   predicate(n->as_Call()->entry_point() == NULL);
13326   match(CallLeafNoFP target);
13327 
13328   ins_cost(300);
13329   format %{ "call_leaf_nofp,runtime indirect " %}
13330   ins_encode %{
13331      __ call($target$$Register);
13332   %}
13333 
13334   ins_pipe(pipe_slow);
13335 %}
13336 
13337 instruct CallLeafNoFPDirect(method meth)
13338 %{
13339   predicate(n->as_Call()->entry_point() != NULL);
13340   match(CallLeafNoFP);
13341   effect(USE meth);
13342 
13343   ins_cost(300);
13344   format %{ "call_leaf_nofp,runtime " %}
13345   ins_encode(clear_avx, Java_To_Runtime(meth));
13346   ins_pipe(pipe_slow);
13347 %}
13348 
13349 // Return Instruction
13350 // Remove the return address & jump to it.
13351 // Notice: We always emit a nop after a ret to make sure there is room
13352 // for safepoint patching
13353 instruct Ret()
13354 %{
13355   match(Return);
13356 
13357   format %{ "ret" %}
13358   ins_encode %{
13359     __ ret(0);
< prev index next >