< prev index next >

src/hotspot/cpu/aarch64/aarch64_neon_ad.m4

Print this page
@@ -2443,32 +2443,55 @@
      }
    %}
    ins_pipe(vdop_fp128);
  %}
  dnl
- define(`VPOPCOUNT', `
- instruct vpopcount$1$2`'(vec$5 dst, vec$5 src) %{
-   predicate(UsePopCountInstruction && n->as_Vector()->length() == $1);
-   match(Set dst (PopCountVI src));
-   format %{
-     "cnt     $dst, $src\t# vector ($3B)\n\t"
-     "uaddlp  $dst, $dst\t# vector ($3B)\n\t"
-     "uaddlp  $dst, $dst\t# vector ($4H)"
-   %}
-   ins_encode %{
-     __ cnt(as_FloatRegister($dst$$reg), __ T$3B,
-            as_FloatRegister($src$$reg));
-     __ uaddlp(as_FloatRegister($dst$$reg), __ T$3B,
+ define(`VPOPCOUNT', `dnl
+ ifelse($1$2, `LD', `
+ // If the PopCountVL is generated by auto-vectorization, the dst basic
+ // type is T_INT. And once we have unified the type definition for
+ // Vector API and auto-vectorization, this rule can be merged with
+ // "vpopcountLX" rule.', `')
+ instruct vpopcount$1$2`'(vec$2 dst, vec$3 src) %{
+   predicate(n->as_Vector()->length_in_bytes() $4 16`'ifelse($1$2, `LD', ` &&
+             n->bottom_type()->is_vect()->element_basic_type() == T_INT', $1$2, `LX', ` &&
+             n->bottom_type()->is_vect()->element_basic_type() == T_LONG', `'));
+   match(Set dst (PopCountV$1 src));
+   ins_cost($5 * INSN_COST);
+   format %{ "vpopcount$1  $dst, $src\t# vector ($6)" %}
+   ins_encode %{
+     assert(UsePopCountInstruction, "unsupported");dnl
+ ifelse($1, `I', `
+     BasicType bt = Matcher::vector_element_basic_type(this);', `')
+     __ cnt(as_FloatRegister($dst$$reg), __ T`'ifelse($3, D, 8, 16)B,
+            as_FloatRegister($src$$reg));dnl
+ ifelse($1, `L', `
+     __ uaddlp(as_FloatRegister($dst$$reg), __ T16B,
                as_FloatRegister($dst$$reg));
-     __ uaddlp(as_FloatRegister($dst$$reg), __ T$4H,
+     __ uaddlp(as_FloatRegister($dst$$reg), __ T8H,
                as_FloatRegister($dst$$reg));
+     __ uaddlp(as_FloatRegister($dst$$reg), __ T4S,
+               as_FloatRegister($dst$$reg));', `
+     if (bt == T_SHORT || bt == T_INT) {
+       __ uaddlp(as_FloatRegister($dst$$reg), __ T`'ifelse($2, D, 8, 16)B,
+                 as_FloatRegister($dst$$reg));
+     }
+     if (bt == T_INT) {
+       __ uaddlp(as_FloatRegister($dst$$reg), __ T`'ifelse($2, D, 4, 8)H,
+                 as_FloatRegister($dst$$reg));
+     }')dnl
+ ifelse($1$2, `LD', `
+     __ xtn(as_FloatRegister($dst$$reg), __ T2S,
+            as_FloatRegister($dst$$reg), __ T2D);', `')
    %}
    ins_pipe(pipe_class_default);
  %}')dnl
- dnl       $1 $2 $3  $4 $5
- VPOPCOUNT(4, I, 16, 8, X)
- VPOPCOUNT(2, I, 8,  4, D)
+ dnl       $1 $2 $3 $4  $5 $6
+ VPOPCOUNT(I, D, D, <,  3, 8B/4H/2S)
+ VPOPCOUNT(I, X, X, ==, 3, 16B/8H/4S)
+ VPOPCOUNT(L, D, X, <,  5, 2S)
+ VPOPCOUNT(L, X, X, ==, 4, 2D)
  dnl
  dnl VMASK_TRUECOUNT($1,     $2 )
  dnl VMASK_TRUECOUNT(suffix, reg)
  define(`VMASK_TRUECOUNT', `
  instruct vmask_truecount$1(iRegINoSp dst, $2 src, $2 tmp) %{

@@ -2645,5 +2668,83 @@
      __ orr(as_Register($dst$$reg), as_Register($dst$$reg),
             rscratch1, Assembler::LSL, 8);
    %}
    ins_pipe(pipe_slow);
  %}
+ 
+ dnl
+ dnl CLTZ_D($1     )
+ dnl CLTZ_D(op_name)
+ define(`CLTZ_D', `
+ instruct count$1D(vecD dst, vecD src) %{
+   predicate(n->as_Vector()->length_in_bytes() == 8);
+   match(Set dst (Count$1 src));
+   ins_cost(ifelse($1, `TrailingZerosV', `3 * ', `')INSN_COST);
+   format %{ "count$1 $dst, $src\t# vector (8B/4H/2S)" %}
+   ins_encode %{
+     BasicType bt = Matcher::vector_element_basic_type(this);
+     Assembler::SIMD_Arrangement size = __ esize2arrangement((unsigned)type2aelembytes(bt), false);dnl
+ ifelse($1, `TrailingZerosV', `
+     __ neon_reverse_bits(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), bt, false);', `')
+     __ clz(as_FloatRegister($dst$$reg), size, as_FloatRegister($ifelse($1, `TrailingZerosV', dst, src)$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}')dnl
+ dnl
+ dnl CLTZ_X($1     )
+ dnl CLTZ_X(op_name)
+ define(`CLTZ_X', `
+ instruct count$1X(vecX dst, vecX src) %{
+   predicate(n->as_Vector()->length_in_bytes() == 16);
+   match(Set dst (Count$1 src));
+   ins_cost(ifelse($1, `TrailingZerosV', `3 * ', `')INSN_COST);
+   format %{ "count$1 $dst, $src\t# vector (16B/8H/4S/2D)" %}
+   ins_encode %{
+     BasicType bt = Matcher::vector_element_basic_type(this);
+     Assembler::SIMD_Arrangement size = __ esize2arrangement((unsigned)type2aelembytes(bt), true);dnl
+ ifelse($1, `TrailingZerosV', `
+     __ neon_reverse_bits(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), bt, true);', `')
+     if (bt != T_LONG) {
+       __ clz(as_FloatRegister($dst$$reg), size, as_FloatRegister($ifelse($1, `TrailingZerosV', dst, src)$$reg));
+     } else {
+       __ umov(rscratch1, as_FloatRegister($ifelse($1, `TrailingZerosV', dst, src)$$reg), __ D, 0);
+       __ clz(rscratch1, rscratch1);
+       __ mov(as_FloatRegister($dst$$reg), __ D, 0, rscratch1);
+       __ umov(rscratch1, as_FloatRegister($ifelse($1, `TrailingZerosV', dst, src)$$reg), __ D, 1);
+       __ clz(rscratch1, rscratch1);
+       __ mov(as_FloatRegister($dst$$reg), __ D, 1, rscratch1);
+     }
+   %}
+   ins_pipe(pipe_slow);
+ %}')dnl
+ dnl
+ //------------------------- CountLeadingZerosV -----------------------------
+ CLTZ_D(LeadingZerosV)
+ CLTZ_X(LeadingZerosV)
+ 
+ //------------------------- CountTrailingZerosV ----------------------------
+ CLTZ_D(TrailingZerosV)
+ CLTZ_X(TrailingZerosV)
+ 
+ dnl
+ dnl REVERSE($1,        $2,      $3,   $4  )
+ dnl REVERSE(insn_name, op_name, type, insn)
+ define(`REVERSE', `
+ instruct $1(vec$3 dst, vec$3 src) %{
+   predicate(n->as_Vector()->length_in_bytes() == ifelse($3, D, 8, 16));
+   match(Set dst ($2 src));
+   ins_cost(ifelse($2, `ReverseV', `2 * ', `')INSN_COST);
+   format %{ "$2 $dst, $src\t# vector ($3)" %}
+   ins_encode %{
+     BasicType bt = Matcher::vector_element_basic_type(this);
+     __ $4(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), bt, ifelse($3, D, false, true));
+   %}
+   ins_pipe(pipe_slow);
+ %}')dnl
+ dnl
+ //------------------------------ ReverseV -----------------------------------
+ REVERSE(vreverseD, ReverseV, D, neon_reverse_bits)
+ REVERSE(vreverseX, ReverseV, X, neon_reverse_bits)
+ 
+ //---------------------------- ReverseBytesV --------------------------------
+ REVERSE(vreverseBytesD, ReverseBytesV, D, neon_reverse_bytes)
+ REVERSE(vreverseBytesX, ReverseBytesV, X, neon_reverse_bytes)
< prev index next >