< prev index next >

src/hotspot/cpu/x86/x86_64.ad

Print this page
*** 488,16 ***
--- 488,21 ---
    offset += clear_avx_size();
    return offset;
  }
  
  int MachCallRuntimeNode::ret_addr_offset() {
+   if (_entry_point == nullptr) {
+     // CallLeafNoFPInDirect
+     return 3; // callq (register)
+   }
    int offset = 13; // movq r10,#addr; callq (r10)
    if (this->ideal_Opcode() != Op_CallLeafVector) {
      offset += clear_avx_size();
    }
    return offset;
  }
+ 
  //
  // Compute padding required for nodes which need alignment
  //
  
  // The address of the call instruction needs to be 4-byte aligned to

*** 704,11 ***
  #ifdef ASSERT
      st->print("\n\t");
      st->print("# stack alignment check");
  #endif
    }
!   if (C->stub_function() != NULL && BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
      st->print("\n\t");
      st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
      st->print("\n\t");
      st->print("je      fast_entry\t");
      st->print("\n\t");
--- 709,11 ---
  #ifdef ASSERT
      st->print("\n\t");
      st->print("# stack alignment check");
  #endif
    }
!   if (C->stub_function() != nullptr && BarrierSet::barrier_set()->barrier_set_nmethod() != nullptr) {
      st->print("\n\t");
      st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
      st->print("\n\t");
      st->print("je      fast_entry\t");
      st->print("\n\t");

*** 721,29 ***
  
  void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
    Compile* C = ra_->C;
    C2_MacroAssembler _masm(&cbuf);
  
!   int framesize = C->output()->frame_size_in_bytes();
-   int bangsize = C->output()->bang_size_in_bytes();
- 
-   if (C->clinit_barrier_on_entry()) {
-     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
-     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
- 
-     Label L_skip_barrier;
-     Register klass = rscratch1;
- 
-     __ mov_metadata(klass, C->method()->holder()->constant_encoding());
-     __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/);
- 
-     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
  
!     __ bind(L_skip_barrier);
    }
  
!   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != NULL);
  
    C->output()->set_frame_complete(cbuf.insts_size());
  
    if (C->has_mach_constant_base_node()) {
      // NOTE: We set the table base offset here because users might be
--- 726,19 ---
  
  void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
    Compile* C = ra_->C;
    C2_MacroAssembler _masm(&cbuf);
  
!   __ verified_entry(C);
  
!   if (ra_->C->stub_function() == nullptr) {
+     __ entry_barrier();
    }
  
!   if (!Compile::current()->output()->in_scratch_emit_size()) {
+     __ bind(*_verified_entry);
+   }
  
    C->output()->set_frame_complete(cbuf.insts_size());
  
    if (C->has_mach_constant_base_node()) {
      // NOTE: We set the table base offset here because users might be

*** 751,16 ***
      ConstantTable& constant_table = C->output()->constant_table();
      constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
    }
  }
  
- uint MachPrologNode::size(PhaseRegAlloc* ra_) const
- {
-   return MachNode::size(ra_); // too many variables; just compute it
-                               // the hard way
- }
- 
  int MachPrologNode::reloc() const
  {
    return 0; // a large enough number
  }
  
--- 746,10 ---

*** 804,23 ***
      // Clear upper bits of YMM registers when current compiled code uses
      // wide vectors to avoid AVX <-> SSE transition penalty during call.
      __ vzeroupper();
    }
  
!   int framesize = C->output()->frame_size_in_bytes();
!   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
!   // Remove word for return adr already pushed
-   // and RBP
-   framesize -= 2*wordSize;
- 
-   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
- 
-   if (framesize) {
-     __ addq(rsp, framesize);
-   }
- 
-   __ popq(rbp);
  
    if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
      __ reserved_stack_check();
    }
  
--- 793,13 ---
      // Clear upper bits of YMM registers when current compiled code uses
      // wide vectors to avoid AVX <-> SSE transition penalty during call.
      __ vzeroupper();
    }
  
!   // Subtract two words to account for return address and rbp
!   int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
!   __ remove_frame(initial_framesize, C->needs_stack_repair());
  
    if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
      __ reserved_stack_check();
    }
  

*** 836,16 ***
      __ relocate(relocInfo::poll_return_type);
      __ safepoint_poll(*code_stub, r15_thread, true /* at_return */, true /* in_nmethod */);
    }
  }
  
- uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
- {
-   return MachNode::size(ra_); // too many variables; just compute it
-                               // the hard way
- }
- 
  int MachEpilogNode::reloc() const
  {
    return 2; // a large enough number
  }
  
--- 815,10 ---

*** 968,11 ***
  
  uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
                                         PhaseRegAlloc* ra_,
                                         bool do_size,
                                         outputStream* st) const {
!   assert(cbuf != NULL || st  != NULL, "sanity");
    // Get registers to move
    OptoReg::Name src_second = ra_->get_reg_second(in(1));
    OptoReg::Name src_first = ra_->get_reg_first(in(1));
    OptoReg::Name dst_second = ra_->get_reg_second(this);
    OptoReg::Name dst_first = ra_->get_reg_first(this);
--- 941,11 ---
  
  uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
                                         PhaseRegAlloc* ra_,
                                         bool do_size,
                                         outputStream* st) const {
!   assert(cbuf != nullptr || st  != nullptr, "sanity");
    // Get registers to move
    OptoReg::Name src_second = ra_->get_reg_second(in(1));
    OptoReg::Name src_first = ra_->get_reg_first(in(1));
    OptoReg::Name dst_second = ra_->get_reg_second(this);
    OptoReg::Name dst_first = ra_->get_reg_first(this);

*** 987,11 ***
  
    if (src_first == dst_first && src_second == dst_second) {
      // Self copy, no move
      return 0;
    }
!   if (bottom_type()->isa_vect() != NULL && bottom_type()->isa_vectmask() == NULL) {
      uint ireg = ideal_reg();
      assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
      assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
      if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
        // mem -> mem
--- 960,11 ---
  
    if (src_first == dst_first && src_second == dst_second) {
      // Self copy, no move
      return 0;
    }
!   if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
      uint ireg = ideal_reg();
      assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
      assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
      if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
        // mem -> mem

*** 1426,16 ***
    return 0;
  }
  
  #ifndef PRODUCT
  void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
!   implementation(NULL, ra_, false, st);
  }
  #endif
  
  void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
!   implementation(&cbuf, ra_, false, NULL);
  }
  
  uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
    return MachNode::size(ra_);
  }
--- 1399,16 ---
    return 0;
  }
  
  #ifndef PRODUCT
  void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
!   implementation(nullptr, ra_, false, st);
  }
  #endif
  
  void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
!   implementation(&cbuf, ra_, false, nullptr);
  }
  
  uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
    return MachNode::size(ra_);
  }

*** 1464,10 ***
--- 1437,59 ---
  {
    int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
    return (offset < 0x80) ? 5 : 8; // REX
  }
  
+ //=============================================================================
+ #ifndef PRODUCT
+ void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
+ {
+   st->print_cr("MachVEPNode");
+ }
+ #endif
+ 
+ void MachVEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
+ {
+   C2_MacroAssembler _masm(&cbuf);
+   uint insts_size = cbuf.insts_size();
+   if (!_verified) {
+     if (UseCompressedClassPointers) {
+       __ load_klass(rscratch1, j_rarg0, rscratch2);
+       __ cmpptr(rax, rscratch1);
+     } else {
+       __ cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
+     }
+     __ jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
+   } else {
+     // TODO 8284443 Avoid creation of temporary frame
+     if (ra_->C->stub_function() == nullptr) {
+       __ verified_entry(ra_->C, 0);
+       __ entry_barrier();
+       int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
+       __ remove_frame(initial_framesize, false);
+     }
+     // Unpack inline type args passed as oop and then jump to
+     // the verified entry point (skipping the unverified entry).
+     int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
+     // Emit code for verified entry and save increment for stack repair on return
+     __ verified_entry(ra_->C, sp_inc);
+     if (Compile::current()->output()->in_scratch_emit_size()) {
+       Label dummy_verified_entry;
+       __ jmp(dummy_verified_entry);
+     } else {
+       __ jmp(*_verified_entry);
+     }
+   }
+   /* WARNING these NOPs are critical so that verified entry point is properly
+      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
+   int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
+   nops_cnt &= 0x3; // Do not add nops if code is aligned.
+   if (nops_cnt > 0) {
+     __ nop(nops_cnt);
+   }
+ }
+ 
  //=============================================================================
  #ifndef PRODUCT
  void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
  {
    if (UseCompressedClassPointers) {

*** 1506,17 ***
    nops_cnt &= 0x3; // Do not add nops if code is aligned.
    if (nops_cnt > 0)
      masm.nop(nops_cnt);
  }
  
- uint MachUEPNode::size(PhaseRegAlloc* ra_) const
- {
-   return MachNode::size(ra_); // too many variables; just compute it
-                               // the hard way
- }
- 
- 
  //=============================================================================
  
  bool Matcher::supports_vector_calling_convention(void) {
    if (EnableVectorSupport && UseVectorStubs) {
      return true;
--- 1528,10 ---

*** 1782,11 ***
      Label miss;
      const bool set_cond_codes = true;
  
      MacroAssembler _masm(&cbuf);
      __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
!                                      NULL, &miss,
                                       /*set_cond_codes:*/ true);
      if ($primary) {
        __ xorptr(Rrdi, Rrdi);
      }
      __ bind(miss);
--- 1797,11 ---
      Label miss;
      const bool set_cond_codes = true;
  
      MacroAssembler _masm(&cbuf);
      __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
!                                      nullptr, &miss,
                                       /*set_cond_codes:*/ true);
      if ($primary) {
        __ xorptr(Rrdi, Rrdi);
      }
      __ bind(miss);

*** 1839,11 ***
          // a stub to the interpreter.
          cbuf.shared_stub_to_interp_for(_method, call_offset);
        } else {
          // Emit stubs for static call.
          address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark);
!         if (stub == NULL) {
            ciEnv::current()->record_failure("CodeCache is full");
            return;
          }
        }
      }
--- 1854,11 ---
          // a stub to the interpreter.
          cbuf.shared_stub_to_interp_for(_method, call_offset);
        } else {
          // Emit stubs for static call.
          address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark);
!         if (stub == nullptr) {
            ciEnv::current()->record_failure("CodeCache is full");
            return;
          }
        }
      }

*** 2177,11 ***
    op_cost(10);
    format %{ %}
    interface(CONST_INTER);
  %}
  
! // NULL Pointer Immediate
  operand immP0()
  %{
    predicate(n->get_ptr() == 0);
    match(ConP);
  
--- 2192,11 ---
    op_cost(10);
    format %{ %}
    interface(CONST_INTER);
  %}
  
! // nullptr Pointer Immediate
  operand immP0()
  %{
    predicate(n->get_ptr() == 0);
    match(ConP);
  

*** 2205,11 ***
    op_cost(10);
    format %{ %}
    interface(CONST_INTER);
  %}
  
! // NULL Pointer Immediate
  operand immN0() %{
    predicate(n->get_narrowcon() == 0);
    match(ConN);
  
    op_cost(5);
--- 2220,11 ---
    op_cost(10);
    format %{ %}
    interface(CONST_INTER);
  %}
  
! // nullptr Pointer Immediate
  operand immN0() %{
    predicate(n->get_narrowcon() == 0);
    match(ConN);
  
    op_cost(5);

*** 3117,13 ***
      scale($scale);
      disp($off);
    %}
  %}
  
  // Indirect Narrow Oop Plus Offset Operand
  // Note: x86 architecture doesn't support "scale * index + offset" without a base
! // we can't free r12 even with CompressedOops::base() == NULL.
  operand indCompressedOopOffset(rRegN reg, immL32 off) %{
    predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
    constraint(ALLOC_IN_RC(ptr_reg));
    match(AddP (DecodeN reg) off);
  
--- 3132,29 ---
      scale($scale);
      disp($off);
    %}
  %}
  
+ // Indirect Narrow Oop Operand
+ operand indCompressedOop(rRegN reg) %{
+   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
+   constraint(ALLOC_IN_RC(ptr_reg));
+   match(DecodeN reg);
+ 
+   op_cost(10);
+   format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
+   interface(MEMORY_INTER) %{
+     base(0xc); // R12
+     index($reg);
+     scale(0x3);
+     disp(0x0);
+   %}
+ %}
+ 
  // Indirect Narrow Oop Plus Offset Operand
  // Note: x86 architecture doesn't support "scale * index + offset" without a base
! // we can't free r12 even with CompressedOops::base() == nullptr.
  operand indCompressedOopOffset(rRegN reg, immL32 off) %{
    predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
    constraint(ALLOC_IN_RC(ptr_reg));
    match(AddP (DecodeN reg) off);
  

*** 3463,11 ***
  // multiple operand types with the same basic encoding and format.  The classic
  // case of this is memory operands.
  
  opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
                 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
!                indCompressedOopOffset,
                 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
                 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
                 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
  
  //----------PIPELINE-----------------------------------------------------------
--- 3494,11 ---
  // multiple operand types with the same basic encoding and format.  The classic
  // case of this is memory operands.
  
  opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
                 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
!                indCompressedOop, indCompressedOopOffset,
                 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
                 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
                 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
  
  //----------PIPELINE-----------------------------------------------------------

*** 4952,11 ***
  %}
  
  instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
    match(Set dst src);
    effect(KILL cr);
!   format %{ "xorq    $dst, $src\t# compressed NULL ptr" %}
    ins_encode %{
      __ xorq($dst$$Register, $dst$$Register);
    %}
    ins_pipe(ialu_reg);
  %}
--- 4983,11 ---
  %}
  
  instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
    match(Set dst src);
    effect(KILL cr);
!   format %{ "xorq    $dst, $src\t# compressed nullptr ptr" %}
    ins_encode %{
      __ xorq($dst$$Register, $dst$$Register);
    %}
    ins_pipe(ialu_reg);
  %}

*** 4966,11 ***
  
    ins_cost(125);
    format %{ "movl    $dst, $src\t# compressed ptr" %}
    ins_encode %{
      address con = (address)$src$$constant;
!     if (con == NULL) {
        ShouldNotReachHere();
      } else {
        __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
      }
    %}
--- 4997,11 ---
  
    ins_cost(125);
    format %{ "movl    $dst, $src\t# compressed ptr" %}
    ins_encode %{
      address con = (address)$src$$constant;
!     if (con == nullptr) {
        ShouldNotReachHere();
      } else {
        __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
      }
    %}

*** 4982,11 ***
  
    ins_cost(125);
    format %{ "movl    $dst, $src\t# compressed klass ptr" %}
    ins_encode %{
      address con = (address)$src$$constant;
!     if (con == NULL) {
        ShouldNotReachHere();
      } else {
        __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
      }
    %}
--- 5013,11 ---
  
    ins_cost(125);
    format %{ "movl    $dst, $src\t# compressed klass ptr" %}
    ins_encode %{
      address con = (address)$src$$constant;
!     if (con == nullptr) {
        ShouldNotReachHere();
      } else {
        __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
      }
    %}

*** 5208,22 ***
    ins_pipe(ialu_mem_reg);
  %}
  
  instruct storeImmP0(memory mem, immP0 zero)
  %{
!   predicate(UseCompressedOops && (CompressedOops::base() == NULL) && n->as_Store()->barrier_data() == 0);
    match(Set mem (StoreP mem zero));
  
    ins_cost(125); // XXX
    format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
    ins_encode %{
      __ movq($mem$$Address, r12);
    %}
    ins_pipe(ialu_mem_reg);
  %}
  
! // Store NULL Pointer, mark word, or other simple pointer constant.
  instruct storeImmP(memory mem, immP31 src)
  %{
    predicate(n->as_Store()->barrier_data() == 0);
    match(Set mem (StoreP mem src));
  
--- 5239,22 ---
    ins_pipe(ialu_mem_reg);
  %}
  
  instruct storeImmP0(memory mem, immP0 zero)
  %{
!   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
    match(Set mem (StoreP mem zero));
  
    ins_cost(125); // XXX
    format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
    ins_encode %{
      __ movq($mem$$Address, r12);
    %}
    ins_pipe(ialu_mem_reg);
  %}
  
! // Store nullptr Pointer, mark word, or other simple pointer constant.
  instruct storeImmP(memory mem, immP31 src)
  %{
    predicate(n->as_Store()->barrier_data() == 0);
    match(Set mem (StoreP mem src));
  

*** 5260,11 ***
    ins_pipe(ialu_mem_reg);
  %}
  
  instruct storeImmN0(memory mem, immN0 zero)
  %{
!   predicate(CompressedOops::base() == NULL);
    match(Set mem (StoreN mem zero));
  
    ins_cost(125); // XXX
    format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
    ins_encode %{
--- 5291,11 ---
    ins_pipe(ialu_mem_reg);
  %}
  
  instruct storeImmN0(memory mem, immN0 zero)
  %{
!   predicate(CompressedOops::base() == nullptr);
    match(Set mem (StoreN mem zero));
  
    ins_cost(125); // XXX
    format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
    ins_encode %{

*** 5279,11 ***
  
    ins_cost(150); // XXX
    format %{ "movl    $mem, $src\t# compressed ptr" %}
    ins_encode %{
      address con = (address)$src$$constant;
!     if (con == NULL) {
        __ movl($mem$$Address, 0);
      } else {
        __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
      }
    %}
--- 5310,11 ---
  
    ins_cost(150); // XXX
    format %{ "movl    $mem, $src\t# compressed ptr" %}
    ins_encode %{
      address con = (address)$src$$constant;
!     if (con == nullptr) {
        __ movl($mem$$Address, 0);
      } else {
        __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
      }
    %}

*** 5303,11 ***
  %}
  
  // Store Integer Immediate
  instruct storeImmI0(memory mem, immI_0 zero)
  %{
!   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
    match(Set mem (StoreI mem zero));
  
    ins_cost(125); // XXX
    format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
    ins_encode %{
--- 5334,11 ---
  %}
  
  // Store Integer Immediate
  instruct storeImmI0(memory mem, immI_0 zero)
  %{
!   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
    match(Set mem (StoreI mem zero));
  
    ins_cost(125); // XXX
    format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
    ins_encode %{

*** 5329,11 ***
  %}
  
  // Store Long Immediate
  instruct storeImmL0(memory mem, immL0 zero)
  %{
!   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
    match(Set mem (StoreL mem zero));
  
    ins_cost(125); // XXX
    format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
    ins_encode %{
--- 5360,11 ---
  %}
  
  // Store Long Immediate
  instruct storeImmL0(memory mem, immL0 zero)
  %{
!   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
    match(Set mem (StoreL mem zero));
  
    ins_cost(125); // XXX
    format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
    ins_encode %{

*** 5355,11 ***
  %}
  
  // Store Short/Char Immediate
  instruct storeImmC0(memory mem, immI_0 zero)
  %{
!   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
    match(Set mem (StoreC mem zero));
  
    ins_cost(125); // XXX
    format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
    ins_encode %{
--- 5386,11 ---
  %}
  
  // Store Short/Char Immediate
  instruct storeImmC0(memory mem, immI_0 zero)
  %{
!   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
    match(Set mem (StoreC mem zero));
  
    ins_cost(125); // XXX
    format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
    ins_encode %{

*** 5382,11 ***
  %}
  
  // Store Byte Immediate
  instruct storeImmB0(memory mem, immI_0 zero)
  %{
!   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
    match(Set mem (StoreB mem zero));
  
    ins_cost(125); // XXX
    format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
    ins_encode %{
--- 5413,11 ---
  %}
  
  // Store Byte Immediate
  instruct storeImmB0(memory mem, immI_0 zero)
  %{
!   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
    match(Set mem (StoreB mem zero));
  
    ins_cost(125); // XXX
    format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
    ins_encode %{

*** 5408,11 ***
  %}
  
  // Store CMS card-mark Immediate
  instruct storeImmCM0_reg(memory mem, immI_0 zero)
  %{
!   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
    match(Set mem (StoreCM mem zero));
  
    ins_cost(125); // XXX
    format %{ "movb    $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
    ins_encode %{
--- 5439,11 ---
  %}
  
  // Store CMS card-mark Immediate
  instruct storeImmCM0_reg(memory mem, immI_0 zero)
  %{
!   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
    match(Set mem (StoreCM mem zero));
  
    ins_cost(125); // XXX
    format %{ "movb    $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
    ins_encode %{

*** 5447,11 ***
  %}
  
  // Store immediate Float value (it is faster than store from XMM register)
  instruct storeF0(memory mem, immF0 zero)
  %{
!   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
    match(Set mem (StoreF mem zero));
  
    ins_cost(25); // XXX
    format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
    ins_encode %{
--- 5478,11 ---
  %}
  
  // Store immediate Float value (it is faster than store from XMM register)
  instruct storeF0(memory mem, immF0 zero)
  %{
!   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
    match(Set mem (StoreF mem zero));
  
    ins_cost(25); // XXX
    format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
    ins_encode %{

*** 5486,11 ***
  %}
  
  // Store immediate double 0.0 (it is faster than store from XMM register)
  instruct storeD0_imm(memory mem, immD0 src)
  %{
!   predicate(!UseCompressedOops || (CompressedOops::base() != NULL));
    match(Set mem (StoreD mem src));
  
    ins_cost(50);
    format %{ "movq    $mem, $src\t# double 0." %}
    ins_encode %{
--- 5517,11 ---
  %}
  
  // Store immediate double 0.0 (it is faster than store from XMM register)
  instruct storeD0_imm(memory mem, immD0 src)
  %{
!   predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
    match(Set mem (StoreD mem src));
  
    ins_cost(50);
    format %{ "movq    $mem, $src\t# double 0." %}
    ins_encode %{

*** 5499,11 ***
    ins_pipe(ialu_mem_imm);
  %}
  
  instruct storeD0(memory mem, immD0 zero)
  %{
!   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
    match(Set mem (StoreD mem zero));
  
    ins_cost(25); // XXX
    format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
    ins_encode %{
--- 5530,11 ---
    ins_pipe(ialu_mem_imm);
  %}
  
  instruct storeD0(memory mem, immD0 zero)
  %{
!   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
    match(Set mem (StoreD mem zero));
  
    ins_cost(25); // XXX
    format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
    ins_encode %{

*** 6046,10 ***
--- 6077,23 ---
      }
    %}
    ins_pipe(ialu_reg_reg); // XXX
  %}
  
+ instruct castN2X(rRegL dst, rRegN src)
+ %{
+   match(Set dst (CastP2X src));
+ 
+   format %{ "movq    $dst, $src\t# ptr -> long" %}
+   ins_encode %{
+     if ($dst$$reg != $src$$reg) {
+       __ movptr($dst$$Register, $src$$Register);
+     }
+   %}
+   ins_pipe(ialu_reg_reg); // XXX
+ %}
+ 
  instruct castP2X(rRegL dst, rRegP src)
  %{
    match(Set dst (CastP2X src));
  
    format %{ "movq    $dst, $src\t# ptr -> long" %}

*** 10587,18 ***
       __ movdq($dst$$XMMRegister, $src$$Register);
    %}
    ins_pipe( pipe_slow );
  %}
  
  // Fast clearing of an array
  // Small ClearArray non-AVX512.
! instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
                    Universe dummy, rFlagsReg cr)
  %{
!   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
!   match(Set dummy (ClearArray cnt base));
!   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
  
    format %{ $$template
      $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
      $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
      $$emit$$"jg      LARGE\n\t"
--- 10631,136 ---
       __ movdq($dst$$XMMRegister, $src$$Register);
    %}
    ins_pipe( pipe_slow );
  %}
  
+ 
  // Fast clearing of an array
  // Small ClearArray non-AVX512.
! instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
                    Universe dummy, rFlagsReg cr)
  %{
!   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
!   match(Set dummy (ClearArray (Binary cnt base) val));
!   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
+ 
+   format %{ $$template
+     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
+     $$emit$$"jg      LARGE\n\t"
+     $$emit$$"dec     rcx\n\t"
+     $$emit$$"js      DONE\t# Zero length\n\t"
+     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
+     $$emit$$"dec     rcx\n\t"
+     $$emit$$"jge     LOOP\n\t"
+     $$emit$$"jmp     DONE\n\t"
+     $$emit$$"# LARGE:\n\t"
+     if (UseFastStosb) {
+        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
+        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
+     } else if (UseXMMForObjInit) {
+        $$emit$$"movdq   $tmp, $val\n\t"
+        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
+        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
+        $$emit$$"jmpq    L_zero_64_bytes\n\t"
+        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
+        $$emit$$"vmovdqu $tmp,(rax)\n\t"
+        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
+        $$emit$$"add     0x40,rax\n\t"
+        $$emit$$"# L_zero_64_bytes:\n\t"
+        $$emit$$"sub     0x8,rcx\n\t"
+        $$emit$$"jge     L_loop\n\t"
+        $$emit$$"add     0x4,rcx\n\t"
+        $$emit$$"jl      L_tail\n\t"
+        $$emit$$"vmovdqu $tmp,(rax)\n\t"
+        $$emit$$"add     0x20,rax\n\t"
+        $$emit$$"sub     0x4,rcx\n\t"
+        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
+        $$emit$$"add     0x4,rcx\n\t"
+        $$emit$$"jle     L_end\n\t"
+        $$emit$$"dec     rcx\n\t"
+        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
+        $$emit$$"vmovq   xmm0,(rax)\n\t"
+        $$emit$$"add     0x8,rax\n\t"
+        $$emit$$"dec     rcx\n\t"
+        $$emit$$"jge     L_sloop\n\t"
+        $$emit$$"# L_end:\n\t"
+     } else {
+        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
+     }
+     $$emit$$"# DONE"
+   %}
+   ins_encode %{
+     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
+                  $tmp$$XMMRegister, false, false);
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
+                             Universe dummy, rFlagsReg cr)
+ %{
+   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
+   match(Set dummy (ClearArray (Binary cnt base) val));
+   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
+ 
+   format %{ $$template
+     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
+     $$emit$$"jg      LARGE\n\t"
+     $$emit$$"dec     rcx\n\t"
+     $$emit$$"js      DONE\t# Zero length\n\t"
+     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
+     $$emit$$"dec     rcx\n\t"
+     $$emit$$"jge     LOOP\n\t"
+     $$emit$$"jmp     DONE\n\t"
+     $$emit$$"# LARGE:\n\t"
+     if (UseXMMForObjInit) {
+        $$emit$$"movdq   $tmp, $val\n\t"
+        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
+        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
+        $$emit$$"jmpq    L_zero_64_bytes\n\t"
+        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
+        $$emit$$"vmovdqu $tmp,(rax)\n\t"
+        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
+        $$emit$$"add     0x40,rax\n\t"
+        $$emit$$"# L_zero_64_bytes:\n\t"
+        $$emit$$"sub     0x8,rcx\n\t"
+        $$emit$$"jge     L_loop\n\t"
+        $$emit$$"add     0x4,rcx\n\t"
+        $$emit$$"jl      L_tail\n\t"
+        $$emit$$"vmovdqu $tmp,(rax)\n\t"
+        $$emit$$"add     0x20,rax\n\t"
+        $$emit$$"sub     0x4,rcx\n\t"
+        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
+        $$emit$$"add     0x4,rcx\n\t"
+        $$emit$$"jle     L_end\n\t"
+        $$emit$$"dec     rcx\n\t"
+        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
+        $$emit$$"vmovq   xmm0,(rax)\n\t"
+        $$emit$$"add     0x8,rax\n\t"
+        $$emit$$"dec     rcx\n\t"
+        $$emit$$"jge     L_sloop\n\t"
+        $$emit$$"# L_end:\n\t"
+     } else {
+        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
+     }
+     $$emit$$"# DONE"
+   %}
+   ins_encode %{
+     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
+                  $tmp$$XMMRegister, false, true);
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ // Small ClearArray AVX512 non-constant length.
+ instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
+                        Universe dummy, rFlagsReg cr)
+ %{
+   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
+   match(Set dummy (ClearArray (Binary cnt base) val));
+   ins_cost(125);
+   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
  
    format %{ $$template
      $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
      $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
      $$emit$$"jg      LARGE\n\t"

*** 10642,24 ***
         $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
      }
      $$emit$$"# DONE"
    %}
    ins_encode %{
!     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
!                  $tmp$$XMMRegister, false, knoreg);
    %}
    ins_pipe(pipe_slow);
  %}
  
! // Small ClearArray AVX512 non-constant length.
! instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
-                        Universe dummy, rFlagsReg cr)
  %{
!   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
!   match(Set dummy (ClearArray cnt base));
    ins_cost(125);
!   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
  
    format %{ $$template
      $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
      $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
      $$emit$$"jg      LARGE\n\t"
--- 10804,23 ---
         $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
      }
      $$emit$$"# DONE"
    %}
    ins_encode %{
!     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
!                  $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
!                                  Universe dummy, rFlagsReg cr)
  %{
!   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
!   match(Set dummy (ClearArray (Binary cnt base) val));
    ins_cost(125);
!   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
  
    format %{ $$template
      $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
      $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
      $$emit$$"jg      LARGE\n\t"

*** 10703,23 ***
         $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
      }
      $$emit$$"# DONE"
    %}
    ins_encode %{
!     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
!                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
    %}
    ins_pipe(pipe_slow);
  %}
  
  // Large ClearArray non-AVX512.
! instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
                          Universe dummy, rFlagsReg cr)
  %{
!   predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
!   match(Set dummy (ClearArray cnt base));
!   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
  
    format %{ $$template
      if (UseFastStosb) {
         $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
         $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
--- 10864,119 ---
         $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
      }
      $$emit$$"# DONE"
    %}
    ins_encode %{
!     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
!                  $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
    %}
    ins_pipe(pipe_slow);
  %}
  
  // Large ClearArray non-AVX512.
! instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
                          Universe dummy, rFlagsReg cr)
  %{
!   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
!   match(Set dummy (ClearArray (Binary cnt base) val));
!   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
+ 
+   format %{ $$template
+     if (UseFastStosb) {
+        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
+        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
+     } else if (UseXMMForObjInit) {
+        $$emit$$"movdq   $tmp, $val\n\t"
+        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
+        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
+        $$emit$$"jmpq    L_zero_64_bytes\n\t"
+        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
+        $$emit$$"vmovdqu $tmp,(rax)\n\t"
+        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
+        $$emit$$"add     0x40,rax\n\t"
+        $$emit$$"# L_zero_64_bytes:\n\t"
+        $$emit$$"sub     0x8,rcx\n\t"
+        $$emit$$"jge     L_loop\n\t"
+        $$emit$$"add     0x4,rcx\n\t"
+        $$emit$$"jl      L_tail\n\t"
+        $$emit$$"vmovdqu $tmp,(rax)\n\t"
+        $$emit$$"add     0x20,rax\n\t"
+        $$emit$$"sub     0x4,rcx\n\t"
+        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
+        $$emit$$"add     0x4,rcx\n\t"
+        $$emit$$"jle     L_end\n\t"
+        $$emit$$"dec     rcx\n\t"
+        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
+        $$emit$$"vmovq   xmm0,(rax)\n\t"
+        $$emit$$"add     0x8,rax\n\t"
+        $$emit$$"dec     rcx\n\t"
+        $$emit$$"jge     L_sloop\n\t"
+        $$emit$$"# L_end:\n\t"
+     } else {
+        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
+     }
+   %}
+   ins_encode %{
+     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
+                  $tmp$$XMMRegister, true, false);
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
+                                   Universe dummy, rFlagsReg cr)
+ %{
+   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
+   match(Set dummy (ClearArray (Binary cnt base) val));
+   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
+ 
+   format %{ $$template
+     if (UseXMMForObjInit) {
+        $$emit$$"movdq   $tmp, $val\n\t"
+        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
+        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
+        $$emit$$"jmpq    L_zero_64_bytes\n\t"
+        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
+        $$emit$$"vmovdqu $tmp,(rax)\n\t"
+        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
+        $$emit$$"add     0x40,rax\n\t"
+        $$emit$$"# L_zero_64_bytes:\n\t"
+        $$emit$$"sub     0x8,rcx\n\t"
+        $$emit$$"jge     L_loop\n\t"
+        $$emit$$"add     0x4,rcx\n\t"
+        $$emit$$"jl      L_tail\n\t"
+        $$emit$$"vmovdqu $tmp,(rax)\n\t"
+        $$emit$$"add     0x20,rax\n\t"
+        $$emit$$"sub     0x4,rcx\n\t"
+        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
+        $$emit$$"add     0x4,rcx\n\t"
+        $$emit$$"jle     L_end\n\t"
+        $$emit$$"dec     rcx\n\t"
+        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
+        $$emit$$"vmovq   xmm0,(rax)\n\t"
+        $$emit$$"add     0x8,rax\n\t"
+        $$emit$$"dec     rcx\n\t"
+        $$emit$$"jge     L_sloop\n\t"
+        $$emit$$"# L_end:\n\t"
+     } else {
+        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
+     }
+   %}
+   ins_encode %{
+     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
+                  $tmp$$XMMRegister, true, true);
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ // Large ClearArray AVX512.
+ instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
+                              Universe dummy, rFlagsReg cr)
+ %{
+   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
+   match(Set dummy (ClearArray (Binary cnt base) val));
+   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
  
    format %{ $$template
      if (UseFastStosb) {
         $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
         $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"

*** 10754,23 ***
         $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
         $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
      }
    %}
    ins_encode %{
!     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
!                  $tmp$$XMMRegister, true, knoreg);
    %}
    ins_pipe(pipe_slow);
  %}
  
! // Large ClearArray AVX512.
! instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
-                              Universe dummy, rFlagsReg cr)
  %{
!   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
!   match(Set dummy (ClearArray cnt base));
!   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
  
    format %{ $$template
      if (UseFastStosb) {
         $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
         $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
--- 11011,22 ---
         $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
         $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
      }
    %}
    ins_encode %{
!     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
!                  $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
!                                        Universe dummy, rFlagsReg cr)
  %{
!   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
!   match(Set dummy (ClearArray (Binary cnt base) val));
!   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
  
    format %{ $$template
      if (UseFastStosb) {
         $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
         $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"

*** 10805,27 ***
         $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
         $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
      }
    %}
    ins_encode %{
!     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
!                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
    %}
    ins_pipe(pipe_slow);
  %}
  
  // Small ClearArray AVX512 constant length.
! instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
  %{
!   predicate(!((ClearArrayNode*)n)->is_large() &&
!               ((UseAVX > 2) && VM_Version::supports_avx512vlbw()));
!   match(Set dummy (ClearArray cnt base));
    ins_cost(100);
!   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
    format %{ "clear_mem_imm $base , $cnt  \n\t" %}
    ins_encode %{
!    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
    %}
    ins_pipe(pipe_slow);
  %}
  
  instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
--- 11061,27 ---
         $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
         $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
      }
    %}
    ins_encode %{
!     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
!                  $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
    %}
    ins_pipe(pipe_slow);
  %}
  
  // Small ClearArray AVX512 constant length.
! instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
  %{
!   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
!             ((UseAVX > 2) && VM_Version::supports_avx512vlbw()));
!   match(Set dummy (ClearArray (Binary cnt base) val));
    ins_cost(100);
!   effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
    format %{ "clear_mem_imm $base , $cnt  \n\t" %}
    ins_encode %{
!     __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
    %}
    ins_pipe(pipe_slow);
  %}
  
  instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,

*** 11730,11 ***
  
  // This will generate a signed flags result. This should be OK since
  // any compare to a zero should be eq/neq.
  instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
  %{
!   predicate((!UseCompressedOops || (CompressedOops::base() != NULL)) &&
              n->in(1)->as_Load()->barrier_data() == 0);
    match(Set cr (CmpP (LoadP op) zero));
  
    ins_cost(500); // XXX
    format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
--- 11986,11 ---
  
  // This will generate a signed flags result. This should be OK since
  // any compare to a zero should be eq/neq.
  instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
  %{
!   predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
              n->in(1)->as_Load()->barrier_data() == 0);
    match(Set cr (CmpP (LoadP op) zero));
  
    ins_cost(500); // XXX
    format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}

*** 11744,11 ***
    ins_pipe(ialu_cr_reg_imm);
  %}
  
  instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
  %{
!   predicate(UseCompressedOops && (CompressedOops::base() == NULL) &&
              n->in(1)->as_Load()->barrier_data() == 0);
    match(Set cr (CmpP (LoadP mem) zero));
  
    format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
    ins_encode %{
--- 12000,11 ---
    ins_pipe(ialu_cr_reg_imm);
  %}
  
  instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
  %{
!   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
              n->in(1)->as_Load()->barrier_data() == 0);
    match(Set cr (CmpP (LoadP mem) zero));
  
    format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
    ins_encode %{

*** 11827,11 ***
    ins_pipe(ialu_cr_reg_imm);
  %}
  
  instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
  %{
!   predicate(CompressedOops::base() != NULL);
    match(Set cr (CmpN (LoadN mem) zero));
  
    ins_cost(500); // XXX
    format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
    ins_encode %{
--- 12083,11 ---
    ins_pipe(ialu_cr_reg_imm);
  %}
  
  instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
  %{
!   predicate(CompressedOops::base() != nullptr);
    match(Set cr (CmpN (LoadN mem) zero));
  
    ins_cost(500); // XXX
    format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
    ins_encode %{

*** 11840,11 ***
    ins_pipe(ialu_cr_reg_mem);
  %}
  
  instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
  %{
!   predicate(CompressedOops::base() == NULL);
    match(Set cr (CmpN (LoadN mem) zero));
  
    format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
    ins_encode %{
      __ cmpl(r12, $mem$$Address);
--- 12096,11 ---
    ins_pipe(ialu_cr_reg_mem);
  %}
  
  instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
  %{
!   predicate(CompressedOops::base() == nullptr);
    match(Set cr (CmpN (LoadN mem) zero));
  
    format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
    ins_encode %{
      __ cmpl(r12, $mem$$Address);

*** 12591,12 ***
--- 12847,28 ---
    ins_encode(Java_To_Runtime(meth));
    ins_pipe(pipe_slow);
  %}
  
  // Call runtime without safepoint
+ // entry point is null, target holds the address to call
+ instruct CallLeafNoFPInDirect(rRegP target)
+ %{
+   predicate(n->as_Call()->entry_point() == nullptr);
+   match(CallLeafNoFP target);
+ 
+   ins_cost(300);
+   format %{ "call_leaf_nofp,runtime indirect " %}
+   ins_encode %{
+      __ call($target$$Register);
+   %}
+ 
+   ins_pipe(pipe_slow);
+ %}
+ 
  instruct CallLeafNoFPDirect(method meth)
  %{
+   predicate(n->as_Call()->entry_point() != nullptr);
    match(CallLeafNoFP);
    effect(USE meth);
  
    ins_cost(300);
    format %{ "call_leaf_nofp,runtime " %}
< prev index next >