< prev index next >

src/hotspot/cpu/x86/x86.ad

Print this page
*** 1239,14 ***
--- 1239,24 ---
    uint def_idx = use->operand_index(opnd);
    Node* def = use->in(def_idx);
    return vector_length_encoding(def);
  }
  
+ static inline bool is_vector_popcount_predicate(BasicType bt) {
+   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
+          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
+ }
+ 
  static inline bool is_unsigned_booltest_pred(int bt) {
    return  ((bt & BoolTest::unsigned_compare) == BoolTest::unsigned_compare);
  }
  
+ static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
+   return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
+            (VM_Version::supports_avx512vl() || vlen_bytes == 64);
+ }
+ 
  class Node::PD {
  public:
    enum NodeFlags {
      Flag_intel_jcc_erratum = Node::_last_flag << 1,
      _last_flag             = Flag_intel_jcc_erratum

*** 1403,16 ***
        if (!UsePopCountInstruction) {
          return false;
        }
        break;
      case Op_PopCountVI:
!       if (!UsePopCountInstruction || (UseAVX < 2)) {
          return false;
        }
        break;
      case Op_PopCountVL:
!       if (!UsePopCountInstruction || (UseAVX <= 2)) {
          return false;
        }
        break;
      case Op_MulVI:
        if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
--- 1413,16 ---
        if (!UsePopCountInstruction) {
          return false;
        }
        break;
      case Op_PopCountVI:
!       if (UseAVX < 2) {
          return false;
        }
        break;
      case Op_PopCountVL:
!       if (UseAVX < 2) {
          return false;
        }
        break;
      case Op_MulVI:
        if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX

*** 1623,10 ***
--- 1633,21 ---
      case Op_SignumD:
        if (UseSSE < 2) {
          return false;
        }
        break;
+     case Op_CompressM:
+       if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
+         return false;
+       }
+       break;
+     case Op_CompressV:
+     case Op_ExpandV:
+       if (!VM_Version::supports_avx512vl()) {
+         return false;
+       }
+       break;
      case Op_SqrtF:
        if (UseSSE < 1) {
          return false;
        }
        break;

*** 1644,10 ***
--- 1665,15 ---
    return true;  // Match rules are supported by default.
  }
  
  //------------------------------------------------------------------------
  
+ static inline bool is_pop_count_instr_target(BasicType bt) {
+   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
+          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
+ }
+ 
  // Identify extra cases that we might want to provide match rules for vector nodes and
  // other intrinsics guarded with vector length (vlen) and element type (bt).
  const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
    const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false);
    if (!match_rule_supported(opcode)) {

*** 1849,11 ***
        }
        break;
      case Op_LoadVectorGatherMasked:
      case Op_StoreVectorScatterMasked:
      case Op_StoreVectorScatter:
!       if(is_subword_type(bt)) {
          return false;
        } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
          return false;
        }
        // fallthrough
--- 1875,11 ---
        }
        break;
      case Op_LoadVectorGatherMasked:
      case Op_StoreVectorScatterMasked:
      case Op_StoreVectorScatter:
!       if (is_subword_type(bt)) {
          return false;
        } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
          return false;
        }
        // fallthrough

*** 1876,27 ***
      case Op_VectorMaskCmp:
        if (vlen < 2 || size_in_bits < 32) {
          return false;
        }
        break;
      case Op_VectorLongToMask:
        if (UseAVX < 1 || !is_LP64) {
          return false;
        }
        if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
          return false;
        }
        break;
      case Op_PopCountVI:
!       if (!VM_Version::supports_avx512_vpopcntdq() &&
!           (vlen == 16) && !VM_Version::supports_avx512bw()) {
          return false;
        }
        break;
!     case Op_PopCountVL:
!       if (!VM_Version::supports_avx512_vpopcntdq() &&
!           ((vlen <= 4) || ((vlen == 8) && !VM_Version::supports_avx512bw()))) {
          return false;
        }
        break;
    }
    return true;  // Per default match rules are supported.
--- 1902,52 ---
      case Op_VectorMaskCmp:
        if (vlen < 2 || size_in_bits < 32) {
          return false;
        }
        break;
+     case Op_CompressM:
+       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
+         return false;
+       }
+       break;
+     case Op_CompressV:
+     case Op_ExpandV:
+       if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
+         return false;
+       }
+       if (size_in_bits < 128 ) {
+         return false;
+       }
+       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
+         return false;
+       }
+       break;
      case Op_VectorLongToMask:
        if (UseAVX < 1 || !is_LP64) {
          return false;
        }
        if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
          return false;
        }
        break;
      case Op_PopCountVI:
!     case Op_PopCountVL: {
!         if (!is_pop_count_instr_target(bt) &&
+             (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
+           return false;
+         }
+       }
+       break;
+     case Op_ReverseV:
+     case Op_ReverseBytesV:
+       if (UseAVX < 2) {
          return false;
        }
        break;
!     case Op_CountTrailingZerosV:
!     case Op_CountLeadingZerosV:
!       if (UseAVX < 2) {
          return false;
        }
        break;
    }
    return true;  // Per default match rules are supported.

*** 2040,13 ***
--- 2091,24 ---
        if (vlen > 16 && !VM_Version::supports_avx512bw()) {
          return false; // Implementation limitation
        }
        return true;
  
+     case Op_PopCountVI:
+     case Op_PopCountVL:
+       if (!is_pop_count_instr_target(bt)) {
+         return false;
+       }
+       return true;
+ 
      case Op_MaskAll:
        return true;
  
+     case Op_CountLeadingZerosV:
+       if ((bt == T_INT || bt == T_LONG) && VM_Version::supports_avx512cd()) {
+         return true;
+       }
      default:
        return false;
    }
  }
  

*** 8640,62 ***
    ins_cost(10);
  %}
  
  // --------------------------------- PopCount --------------------------------------
  
! instruct vpopcountI_popcntd(vec dst, vec src) %{
!   predicate(VM_Version::supports_avx512_vpopcntdq());
    match(Set dst (PopCountVI src));
!   format %{ "vector_popcount_int $dst, $src\t! vector popcount packedI" %}
    ins_encode %{
!     assert(UsePopCountInstruction, "not enabled");
!     int vlen_enc = vector_length_encoding(this);
!     __ vector_popcount_int($dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg, noreg, vlen_enc);
    %}
    ins_pipe( pipe_slow );
  %}
  
! instruct vpopcountI(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp, rFlagsReg cc) %{
!   predicate(!VM_Version::supports_avx512_vpopcntdq());
!   match(Set dst (PopCountVI src));
!   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, KILL cc);
!   format %{ "vector_popcount_int  $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
    ins_encode %{
!     assert(UsePopCountInstruction, "not enabled");
!     int vlen_enc = vector_length_encoding(this);
!     __ vector_popcount_int($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
!                            $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
    %}
    ins_pipe( pipe_slow );
  %}
  
! instruct vpopcountL_popcntd(vec dst, vec src) %{
!   predicate(VM_Version::supports_avx512_vpopcntdq());
    match(Set dst (PopCountVL src));
!   format %{ "vector_popcount_long  $dst, $src\t! vector popcount packedL" %}
    ins_encode %{
!     assert(UsePopCountInstruction, "not enabled");
      int vlen_enc = vector_length_encoding(this, $src);
!     __ vector_popcount_long($dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg, noreg, vlen_enc);
    %}
    ins_pipe( pipe_slow );
  %}
  
! instruct vpopcountL(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp, rFlagsReg cc) %{
!   predicate(!VM_Version::supports_avx512_vpopcntdq());
!   match(Set dst (PopCountVL src));
!   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, KILL cc);
!   format %{ "vector_popcount_long  $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
    ins_encode %{
-     assert(UsePopCountInstruction, "not enabled");
      int vlen_enc = vector_length_encoding(this, $src);
!     __ vector_popcount_long($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
!                            $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
    %}
    ins_pipe( pipe_slow );
  %}
  
  // --------------------------------- Bitwise Ternary Logic ----------------------------------
  
  instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
    match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
    effect(TEMP dst);
--- 8702,159 ---
    ins_cost(10);
  %}
  
  // --------------------------------- PopCount --------------------------------------
  
! instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
!   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
    match(Set dst (PopCountVI src));
!   match(Set dst (PopCountVL src));
+   ins_cost(400);
+   format %{ "vector_popcount_integral $dst, $src" %}
    ins_encode %{
!     int opcode = this->ideal_Opcode();
!     int vlen_enc = vector_length_encoding(this, $src);
!     BasicType bt = Matcher::vector_element_basic_type(this, $src);
+     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
+     // TODO: Once auto-vectorizer supports ConvL2I operation, PopCountVL
+     // should be succeeded by its corresponding vector IR and following
+     // special handling should be removed.
+     if (opcode == Op_PopCountVL && Matcher::vector_element_basic_type(this) == T_INT) {
+       __ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
+     }
    %}
    ins_pipe( pipe_slow );
  %}
  
! instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
!   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
!   match(Set dst (PopCountVI src mask));
!   match(Set dst (PopCountVL src mask));
!   format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
    ins_encode %{
!     int vlen_enc = vector_length_encoding(this, $src);
!     BasicType bt = Matcher::vector_element_basic_type(this, $src);
!     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
!     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
    %}
    ins_pipe( pipe_slow );
  %}
  
! instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
!   predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
+   match(Set dst (PopCountVI src));
    match(Set dst (PopCountVL src));
!   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
+   format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
    ins_encode %{
!     int opcode = this->ideal_Opcode();
      int vlen_enc = vector_length_encoding(this, $src);
!     BasicType bt = Matcher::vector_element_basic_type(this, $src);
+     __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
+                                 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
+     // TODO: Once auto-vectorizer supports ConvL2I operation, PopCountVL
+     // should be succeeded by its corresponding vector IR and following
+     // special handling should be removed.
+     if (opcode == Op_PopCountVL && Matcher::vector_element_basic_type(this) == T_INT) {
+       if (VM_Version::supports_avx512vl()) {
+         __ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
+       } else {
+         assert(VM_Version::supports_avx2(), "");
+         __ vpshufd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
+         __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
+       }
+     }
    %}
    ins_pipe( pipe_slow );
  %}
  
! // --------------------------------- Vector Trailing Zeros Count --------------------------------------
! 
! instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
!   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
!                                               Matcher::vector_length_in_bytes(n->in(1))));
+   match(Set dst (CountTrailingZerosV src));
+   effect(TEMP dst, TEMP xtmp, TEMP rtmp);
+   ins_cost(400);
+   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
+   ins_encode %{
+     int vlen_enc = vector_length_encoding(this, $src);
+     BasicType bt = Matcher::vector_element_basic_type(this, $src);
+     BasicType rbt = Matcher::vector_element_basic_type(this);
+     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
+                                         xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
+     // TODO: Once auto-vectorizer supports ConvL2I operation, CountTrailingZerosV
+     // should be succeeded by its corresponding vector IR and following
+     // special handling should be removed.
+     if (bt == T_LONG && rbt == T_INT) {
+       __ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
+     }
+   %}
+   ins_pipe( pipe_slow );
+ %}
+ 
+ instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
+   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
+             VM_Version::supports_avx512cd() &&
+             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
+   match(Set dst (CountTrailingZerosV src));
+   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
+   ins_cost(400);
+   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
+   ins_encode %{
+     int vlen_enc = vector_length_encoding(this, $src);
+     BasicType bt = Matcher::vector_element_basic_type(this, $src);
+     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
+                                         $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
+   %}
+   ins_pipe( pipe_slow );
+ %}
+ 
+ instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
+   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
+   match(Set dst (CountTrailingZerosV src));
+   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
+   ins_cost(400);
+   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
+   ins_encode %{
+     int vlen_enc = vector_length_encoding(this, $src);
+     BasicType bt = Matcher::vector_element_basic_type(this, $src);
+     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
+                                         $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
+                                         $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
+   %}
+   ins_pipe( pipe_slow );
+ %}
+ 
+ instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
+   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
+   match(Set dst (CountTrailingZerosV src));
+   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
+   format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
    ins_encode %{
      int vlen_enc = vector_length_encoding(this, $src);
!     BasicType bt = Matcher::vector_element_basic_type(this, $src);
!     BasicType rbt = Matcher::vector_element_basic_type(this);
+     __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
+                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
+     // TODO: Once auto-vectorizer supports ConvL2I operation, PopCountVL
+     // should be succeeded by its corresponding vector IR and following
+     // special handling should be removed.
+     if (bt == T_LONG && rbt == T_INT) {
+       if (VM_Version::supports_avx512vl()) {
+         __ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
+       } else {
+         assert(VM_Version::supports_avx2(), "");
+         __ vpshufd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
+         __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
+       }
+     }
    %}
    ins_pipe( pipe_slow );
  %}
  
+ 
  // --------------------------------- Bitwise Ternary Logic ----------------------------------
  
  instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
    match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
    effect(TEMP dst);

*** 8966,12 ***
--- 9125,204 ---
      __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
                               $tmp$$Register, mask_len, mbt, vlen_enc);
    %}
    ins_pipe( pipe_slow );
  %}
+ 
+ // --------------------------------- Compress/Expand Operations ---------------------------
+ 
+ instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
+   match(Set dst (CompressV src mask));
+   match(Set dst (ExpandV src mask));
+   format %{ "vector_compress_expand $dst, $src, $mask" %}
+   ins_encode %{
+     int opcode = this->ideal_Opcode();
+     int vector_len = vector_length_encoding(this);
+     BasicType bt  = Matcher::vector_element_basic_type(this);
+     __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
+   %}
+   ins_pipe( pipe_slow );
+ %}
+ 
+ instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
+   match(Set dst (CompressM mask));
+   effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
+   format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
+   ins_encode %{
+     assert(this->in(1)->bottom_type()->isa_vectmask(), "");
+     int mask_len = Matcher::vector_length(this);
+     __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
+   %}
+   ins_pipe( pipe_slow );
+ %}
+ 
  #endif // _LP64
  
+ // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
+ 
+ instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
+   predicate(!VM_Version::supports_gfni());
+   match(Set dst (ReverseV src));
+   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
+   format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
+   ins_encode %{
+     int vec_enc = vector_length_encoding(this);
+     BasicType bt = Matcher::vector_element_basic_type(this);
+     __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
+                           $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
+   %}
+   ins_pipe( pipe_slow );
+ %}
+ 
+ instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp, rRegI rtmp) %{
+   predicate(VM_Version::supports_gfni());
+   match(Set dst (ReverseV src));
+   effect(TEMP dst, TEMP xtmp, TEMP rtmp);
+   format %{ "vector_reverse_bit_gfni $dst, $src!\t using $rtmp and $xtmp as TEMP" %}
+   ins_encode %{
+     int vec_enc = vector_length_encoding(this);
+     BasicType bt  = Matcher::vector_element_basic_type(this);
+     InternalAddress addr = $constantaddress(T_LONG, vreplicate_imm(T_LONG, 0x8040201008040201L, 1));
+     __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
+                                addr, $rtmp$$Register, vec_enc);
+   %}
+   ins_pipe( pipe_slow );
+ %}
+ 
+ instruct vreverse_byte_reg(vec dst, vec src, rRegI rtmp) %{
+   predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
+   match(Set dst (ReverseBytesV src));
+   effect(TEMP dst, TEMP rtmp);
+   format %{ "vector_reverse_byte $dst, $src!\t using $rtmp as TEMP" %}
+   ins_encode %{
+     int vec_enc = vector_length_encoding(this);
+     BasicType bt = Matcher::vector_element_basic_type(this);
+     __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, $rtmp$$Register, vec_enc);
+   %}
+   ins_pipe( pipe_slow );
+ %}
+ 
+ instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
+   predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
+   match(Set dst (ReverseBytesV src));
+   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
+   format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
+   ins_encode %{
+     int vec_enc = vector_length_encoding(this);
+     BasicType bt = Matcher::vector_element_basic_type(this);
+     __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
+                              $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
+   %}
+   ins_pipe( pipe_slow );
+ %}
+ 
+ // ---------------------------------- Vector Count Leading Zeros -----------------------------------
+ 
+ instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
+   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
+                                               Matcher::vector_length_in_bytes(n->in(1))));
+   match(Set dst (CountLeadingZerosV src));
+   format %{ "vector_count_leading_zeros $dst, $src" %}
+   ins_encode %{
+      int vlen_enc = vector_length_encoding(this, $src);
+      BasicType bt = Matcher::vector_element_basic_type(this, $src);
+      BasicType rbt = Matcher::vector_element_basic_type(this);
+      __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
+                                         xnoreg, xnoreg, k0, noreg, true, vlen_enc);
+      // TODO: Once auto-vectorizer supports ConvL2I operation, CountLeadingZerosV
+      // should be succeeded by its corresponding vector IR and following
+      // special handling should be removed.
+      if (rbt == T_INT && bt == T_LONG) {
+        __ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
+      }
+   %}
+   ins_pipe( pipe_slow );
+ %}
+ 
+ instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
+   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
+                                               Matcher::vector_length_in_bytes(n->in(1))));
+   match(Set dst (CountLeadingZerosV src mask));
+   format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
+   ins_encode %{
+     int vlen_enc = vector_length_encoding(this, $src);
+     BasicType bt = Matcher::vector_element_basic_type(this, $src);
+     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
+     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
+                                        xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
+   %}
+   ins_pipe( pipe_slow );
+ %}
+ 
+ instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
+   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
+             VM_Version::supports_avx512cd() &&
+             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
+   match(Set dst (CountLeadingZerosV src));
+   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
+   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
+   ins_encode %{
+     int vlen_enc = vector_length_encoding(this, $src);
+     BasicType bt = Matcher::vector_element_basic_type(this, $src);
+     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
+                                        $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
+   %}
+   ins_pipe( pipe_slow );
+ %}
+ 
+ instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
+   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
+   match(Set dst (CountLeadingZerosV src));
+   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
+   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
+   ins_encode %{
+     int vlen_enc = vector_length_encoding(this, $src);
+     BasicType bt = Matcher::vector_element_basic_type(this, $src);
+     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
+                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
+                                        $rtmp$$Register, true, vlen_enc);
+   %}
+   ins_pipe( pipe_slow );
+ %}
+ 
+ instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
+   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
+             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
+   match(Set dst (CountLeadingZerosV src));
+   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
+   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
+   ins_encode %{
+     int vlen_enc = vector_length_encoding(this, $src);
+     BasicType bt = Matcher::vector_element_basic_type(this, $src);
+     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
+                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
+   %}
+   ins_pipe( pipe_slow );
+ %}
+ 
+ instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
+   predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
+             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
+   match(Set dst (CountLeadingZerosV src));
+   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
+   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
+   ins_encode %{
+     int vlen_enc = vector_length_encoding(this, $src);
+     BasicType bt = Matcher::vector_element_basic_type(this, $src);
+     BasicType rbt = Matcher::vector_element_basic_type(this);
+     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
+                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
+     // TODO: Once auto-vectorizer supports ConvL2I operation, CountLeadingZerosV
+     // should be succeeded by its corresponding vector IR and following
+     // special handling should be removed.
+     if (rbt == T_INT && bt == T_LONG) {
+       __ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
+     }
+   %}
+   ins_pipe( pipe_slow );
+ %}
+ 
  // ---------------------------------- Vector Masked Operations ------------------------------------
  
  instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
    match(Set dst (AddVB (Binary dst src2) mask));
    match(Set dst (AddVS (Binary dst src2) mask));
< prev index next >