< prev index next >

src/hotspot/cpu/aarch64/aarch64_sve_ad.m4

Print this page
@@ -142,10 +142,12 @@
        case Op_VectorRearrange:
          return vlen >= 4 && length_in_bytes <= MaxVectorSize;
        case Op_LoadVector:
        case Op_StoreVector:
          return Matcher::vector_size_supported(bt, vlen);
+       case Op_ExpandV:
+         if (UseSVE < 2 || is_subword_type(bt)) return false;
        default:
          break;
      }
      // By default, we only support vector operations with no less than 8 bytes and 2 elements.
      return 8 <= length_in_bytes && length_in_bytes <= MaxVectorSize && vlen >= 2;

@@ -1168,22 +1170,82 @@
  UNARY_OP_PREDICATE(vnegI, NegVI, B/H/S, sve_neg)
  UNARY_OP_PREDICATE(vnegL, NegVL, D, sve_neg)
  UNARY_OP_PREDICATE(vnegF, NegVF, S, sve_fneg)
  UNARY_OP_PREDICATE(vnegD, NegVD, D, sve_fneg)
  
- // popcount vector
+ dnl
+ dnl VPOPCOUNT($1,          $2  )
+ dnl VPOPCOUNT(name_suffix, size)
+ define(`VPOPCOUNT', `
+ instruct vpopcount$1(vReg dst, vReg src) %{
+   predicate(UseSVE > 0 &&
+             !n->as_Vector()->is_predicated_vector()`'ifelse($1, `L', ` &&
+             n->bottom_type()->is_vect()->element_basic_type() == T_LONG', `'));
+   match(Set dst (PopCountV$1 src));
+   ins_cost(SVE_COST);
+   format %{ "sve_cnt $dst, $src\t# vector (sve) ($2)" %}
+   ins_encode %{
+     assert(UsePopCountInstruction, "unsupported");dnl
+ ifelse($1, `I', `
+     BasicType bt = Matcher::vector_element_basic_type(this);', `')
+     __ sve_cnt(as_FloatRegister($dst$$reg), ifelse($1, `I', `__ elemType_to_regVariant(bt)', `__ D'),
+          ptrue, as_FloatRegister($src$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}')dnl
+ dnl
+ // vector popcount
+ VPOPCOUNT(I, B/H/S)
+ VPOPCOUNT(L, D)
  
- instruct vpopcountI(vReg dst, vReg src) %{
-   predicate(UseSVE > 0);
-   match(Set dst (PopCountVI src));
-   format %{ "sve_cnt $dst, $src\t# vector (sve) (S)\n\t" %}
+ // If the PopCountVL is generated by auto-vectorization, the dst basic
+ // type is T_INT. And once we have unified the type definition for
+ // Vector API and auto-vectorization, this rule can be merged with
+ // "vpopcountL" rule.
+ instruct vpopcountLI(vReg dst, vReg src, vReg vtmp) %{
+   predicate(UseSVE > 0 &&
+             !n->as_Vector()->is_predicated_vector() &&
+             n->bottom_type()->is_vect()->element_basic_type() == T_INT);
+   match(Set dst (PopCountVL src));
+   effect(TEMP_DEF dst, TEMP vtmp);
+   ins_cost(3 * SVE_COST);
+   format %{ "sve_cnt $dst, $src\n\t"
+             "sve_dup $vtmp, #0\n\t"
+             "sve_uzp1 $dst, $dst, $vtmp\t# vector (sve) (S)" %}
    ins_encode %{
-      __ sve_cnt(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg));
+     assert(UsePopCountInstruction, "unsupported");
+     __ sve_cnt(as_FloatRegister($dst$$reg), __ D,
+          ptrue, as_FloatRegister($src$$reg));
+     __ sve_vector_narrow(as_FloatRegister($dst$$reg), __ S,
+          as_FloatRegister($dst$$reg), __ D, as_FloatRegister($vtmp$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
+ dnl
+ dnl VPOPCOUNT_PREDICATE($1,          $2  )
+ dnl VPOPCOUNT_PREDICATE(name_suffix, size)
+ define(`VPOPCOUNT_PREDICATE', `
+ instruct vpopcount$1_masked(vReg dst_src, pRegGov pg) %{
+   predicate(UseSVE > 0`'ifelse($1, `L', ` &&
+             n->bottom_type()->is_vect()->element_basic_type() == T_LONG', `'));
+   match(Set dst_src (PopCountV$1 dst_src pg));
+   ins_cost(SVE_COST);
+   format %{ "sve_cnt $dst_src, $pg, $dst_src\t# vector (sve) ($2)" %}
+   ins_encode %{
+     assert(UsePopCountInstruction, "unsupported");dnl
+ ifelse($1, `I', `
+     BasicType bt = Matcher::vector_element_basic_type(this);', `')
+     __ sve_cnt(as_FloatRegister($dst_src$$reg), ifelse($1, `I', `__ elemType_to_regVariant(bt)', `__ D'),
+          as_PRegister($pg$$reg), as_FloatRegister($dst_src$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}')dnl
+ // vector popcount - predicated
+ VPOPCOUNT_PREDICATE(I, B/H/S)
+ VPOPCOUNT_PREDICATE(L, D)
+ 
  // vector blend
  
  instruct vblend(vReg dst, vReg src1, vReg src2, pRegGov pg) %{
    predicate(UseSVE > 0);
    match(Set dst (VectorBlend (Binary src1 src2) pg));

@@ -3178,10 +3240,108 @@
      __ sve_cmp(Assembler::NE, as_PRegister($dst$$reg), size, ptrue, as_FloatRegister($tmp$$reg), 0);
    %}
    ins_pipe(pipe_slow);
  %}
  
+ // ---------------------------- Compress/Expand Operations ---------------------------
+ 
+ instruct mcompress(pReg dst, pReg pg, rFlagsReg cr) %{
+   predicate(UseSVE > 0);
+   match(Set dst (CompressM pg));
+   effect(KILL cr);
+   ins_cost(2 * SVE_COST);
+   format %{ "sve_cntp rscratch1, $pg\n\t"
+             "sve_whilelo $dst, zr, rscratch1\t# mask compress (B/H/S/D)" %}
+   ins_encode %{
+     BasicType bt = Matcher::vector_element_basic_type(this);
+     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
+     __ sve_cntp(rscratch1, size, ptrue, as_PRegister($pg$$reg));
+     __ sve_whilelo(as_PRegister($dst$$reg), size, zr, rscratch1);
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vcompress(vReg dst, vReg src, pRegGov pg) %{
+   predicate(UseSVE > 0 &&
+             (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
+              n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT ||
+              n->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
+              n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
+   match(Set dst (CompressV src pg));
+   ins_cost(SVE_COST);
+   format %{ "sve_compact $dst, $src, $pg\t# vector compress (S/D)" %}
+   ins_encode %{
+     BasicType bt = Matcher::vector_element_basic_type(this);
+     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
+     __ sve_compact(as_FloatRegister($dst$$reg), size, as_FloatRegister($src$$reg), as_PRegister($pg$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vcompressB(vReg dst, vReg src, pReg pg, vReg vtmp1, vReg vtmp2, vReg vtmp3, vReg vtmp4,
+                     pReg ptmp, pRegGov pgtmp) %{
+   predicate(UseSVE > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+   effect(TEMP_DEF dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4, TEMP ptmp, TEMP pgtmp);
+   match(Set dst (CompressV src pg));
+   ins_cost(13 * SVE_COST);
+   format %{ "sve_compact $dst, $src, $pg\t# vector compress (B)" %}
+   ins_encode %{
+     __ sve_compress_byte(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_PRegister($pg$$reg),
+                          as_FloatRegister($vtmp1$$reg),as_FloatRegister($vtmp2$$reg),
+                          as_FloatRegister($vtmp3$$reg),as_FloatRegister($vtmp4$$reg),
+                          as_PRegister($ptmp$$reg), as_PRegister($pgtmp$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vcompressS(vReg dst, vReg src, pReg pg, vReg vtmp1, vReg vtmp2, pRegGov pgtmp) %{
+   predicate(UseSVE > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+   effect(TEMP_DEF dst, TEMP vtmp1, TEMP vtmp2, TEMP pgtmp);
+   match(Set dst (CompressV src pg));
+   ins_cost(38 * SVE_COST);
+   format %{ "sve_compact $dst, $src, $pg\t# vector compress (H)" %}
+   ins_encode %{
+     __ sve_compress_short(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_PRegister($pg$$reg),
+                           as_FloatRegister($vtmp1$$reg),as_FloatRegister($vtmp2$$reg), as_PRegister($pgtmp$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vexpand(vReg dst, vReg src, pRegGov pg) %{
+   match(Set dst (ExpandV src pg));
+   effect(TEMP_DEF dst);
+   ins_cost(4 * SVE_COST);
+   format %{ "sve_dup $dst, S/D, 0\n\t"
+             "sve_histcnt $dst, S/D, $pg, $dst, $dst\n\t"
+             "sve_sub $dst, S/D, 1\n\t"
+             "sve_tbl $dst, S/D, $src, $dst\t# vector expand (S/D)" %}
+   ins_encode %{
+     // Example input:   src   = 1 2 3 4 5 6 7 8
+     //                  pg    = 1 0 0 1 1 0 1 1
+     // Expected result: dst   = 4 0 0 5 6 0 7 8
+ 
+     // The basic idea is to use TBL which can shuffle the elements in the given
+     // vector flexibly. HISTCNT + SUB is used to generate the second source input
+     // for TBL whose value is used to select the indexed element from src vector.
+ 
+     BasicType bt = Matcher::vector_element_basic_type(this);
+     assert(UseSVE == 2 && !is_subword_type(bt), "unsupported");
+     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
+     // dst = 0 0 0 0 0 0 0 0
+     __ sve_dup(as_FloatRegister($dst$$reg), size, 0);
+     // dst = 5 0 0 4 3 0 2 1
+     __ sve_histcnt(as_FloatRegister($dst$$reg), size, as_PRegister($pg$$reg),
+                    as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg));
+     // dst = 4 -1 -1 3 2 -1 1 0
+     __ sve_sub(as_FloatRegister($dst$$reg), size, 1);
+     // dst = 4 0 0 5 6 0 7 8
+     __ sve_tbl(as_FloatRegister($dst$$reg), size, as_FloatRegister($src$$reg),
+                as_FloatRegister($dst$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
  instruct vmask_gen(pRegGov pg, iRegL len, rFlagsReg cr) %{
    predicate(UseSVE > 0);
    match(Set pg (VectorMaskGen len));
    effect(KILL cr);
    ins_cost(SVE_COST);

@@ -3191,5 +3351,81 @@
      Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
      __ sve_whilelo(as_PRegister($pg$$reg), size, zr, as_Register($len$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
+ 
+ dnl
+ dnl BITWISE_UNARY($1,        $2,      $3  )
+ dnl BITWISE_UNARY(insn_name, op_name, insn)
+ define(`BITWISE_UNARY', `
+ instruct $1(vReg dst, vReg src) %{
+   predicate(UseSVE > 0 &&
+             !n->as_Vector()->is_predicated_vector());
+   match(Set dst ($2 src));
+   ins_cost(ifelse($2, `CountTrailingZerosV', `2 * ', `')SVE_COST);
+   format %{ ifelse($2, `CountTrailingZerosV', `"sve_rbit $dst, $src\n\t"
+             "$3  $dst, $dst', `"$3 $dst, $src')\t# vector (sve)" %}
+   ins_encode %{
+     BasicType bt = Matcher::vector_element_basic_type(this);
+     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);dnl
+ ifelse($2, `CountTrailingZerosV', `
+     __ sve_rbit(as_FloatRegister($dst$$reg), size, ptrue, as_FloatRegister($src$$reg));', `')dnl
+ ifelse($2, `ReverseBytesV', `
+     if (bt == T_BYTE) {
+       if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
+         __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
+       }
+     } else {
+       __ $3(as_FloatRegister($dst$$reg), size, ptrue, as_FloatRegister($src$$reg));
+     }', `
+     __ $3(as_FloatRegister($dst$$reg), size, ptrue, as_FloatRegister($ifelse($2, `CountTrailingZerosV', dst, src)$$reg));')
+   %}
+   ins_pipe(pipe_slow);
+ %}')dnl
+ dnl
+ dnl BITWISE_UNARY_PREDICATE($1,        $2,      $3  )
+ dnl BITWISE_UNARY_PREDICATE(insn_name, op_name, insn)
+ define(`BITWISE_UNARY_PREDICATE', `
+ // The dst and src should use the same register to make sure the
+ // inactive lanes in dst save the same elements as src.
+ instruct $1_masked(vReg dst_src, pRegGov pg) %{
+   predicate(UseSVE > 0);
+   match(Set dst_src ($2 dst_src pg));
+   ins_cost(ifelse($2, `CountTrailingZerosV', `2 * ', `')SVE_COST);
+   format %{ ifelse($2, `CountTrailingZerosV', `"sve_rbit $dst_src, $pg, $dst_src\n\t"
+             "$3  $dst_src, $pg, $dst_src', `"$3 $dst_src, $pg, $dst_src')\t# vector (sve)" %}
+   ins_encode %{
+     BasicType bt = Matcher::vector_element_basic_type(this);
+     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);dnl
+ ifelse($2, `CountTrailingZerosV', `
+     __ sve_rbit(as_FloatRegister($dst_src$$reg), size,
+         as_PRegister($pg$$reg), as_FloatRegister($dst_src$$reg));', `')dnl
+ ifelse($2, `ReverseBytesV', `
+     if (bt == T_BYTE) {
+       // do nothing
+     } else {
+       __ $3(as_FloatRegister($dst_src$$reg), size,
+           as_PRegister($pg$$reg), as_FloatRegister($dst_src$$reg));
+     }', `
+     __ $3(as_FloatRegister($dst_src$$reg), size,
+         as_PRegister($pg$$reg), as_FloatRegister($dst_src$$reg));')
+   %}
+   ins_pipe(pipe_slow);
+ %}')dnl
+ dnl
+ // ------------------------------ CountLeadingZerosV ------------------------------
+ BITWISE_UNARY(vcountLeadingZeros, CountLeadingZerosV, sve_clz)
+ BITWISE_UNARY_PREDICATE(vcountLeadingZeros, CountLeadingZerosV, sve_clz)
+ 
+ // ------------------------------ CountTrailingZerosV -----------------------------
+ BITWISE_UNARY(vcountTrailingZeros, CountTrailingZerosV, sve_clz)
+ BITWISE_UNARY_PREDICATE(vcountTrailingZeros, CountTrailingZerosV, sve_clz)
+ 
+ // ---------------------------------- ReverseV ------------------------------------
+ BITWISE_UNARY(vreverse, ReverseV, sve_rbit)
+ BITWISE_UNARY_PREDICATE(vreverse, ReverseV, sve_rbit)
+ 
+ // -------------------------------- ReverseBytesV ---------------------------------
+ BITWISE_UNARY(vreverseBytes, ReverseBytesV, sve_revb)
+ BITWISE_UNARY_PREDICATE(vreverseBytes, ReverseBytesV, sve_revb)
+ 
< prev index next >