1 // Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved.
   2 // Copyright (c) 2020, 2022, Arm Limited. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 dnl Generate the warning
  26 // This file is automatically generated by running "m4 aarch64_neon_ad.m4". Do not edit ----
  27 dnl
  28 
  29 // AArch64 NEON Architecture Description File
  30 
  31 dnl
  32 define(`ORL2I', `ifelse($1,I,orL2I)')dnl
  33 dnl
  34 define(`error', `__program__:__file__:__line__: Invalid argument ``$1''m4exit(`1')')dnl
  35 dnl
  36 define(`iTYPE2SIMD',
  37 `ifelse($1, `B', `B',
  38         $1, `S', `H',
  39         $1, `I', `S',
  40         $1, `L', `D',
  41         `error($1)')')dnl
  42 dnl
  43 define(`fTYPE2SIMD',
  44 `ifelse($1, `F', `S',
  45         $1, `D', `D',
  46         `error($1)')')dnl
  47 dnl
  48 define(`TYPE2DATATYPE',
  49 `ifelse($1, `B', `BYTE',
  50         $1, `S', `SHORT',
  51         $1, `I', `INT',
  52         $1, `L', `LONG',
  53         $1, `F', `FLOAT',
  54         $1, `D', `DOUBLE',
  55         `error($1)')')dnl
  56 dnl
  57 // ====================VECTOR INSTRUCTIONS==================================
  58 
  59 // ------------------------------ Load/store/reinterpret -----------------------
  60 define(`VLoadStore', `
  61 // ifelse(load, $3, Load, Store) Vector ($6 bits)
  62 instruct $3V$4`'(vec$5 $7, vmem$4 mem)
  63 %{
  64   predicate($8`n->as_'ifelse(load, $3, Load, Store)Vector()->memory_size() == $4);
  65   match(Set ifelse(load, $3, dst (LoadVector mem), mem (StoreVector mem src)));
  66   ins_cost(4 * INSN_COST);
  67   format %{ "$1   ifelse(load, $3, `$dst,$mem', `$mem,$src')\t# vector ($6 bits)" %}
  68   ins_encode( `aarch64_enc_'ifelse(load, $3, ldr, str)v$2($7, mem) );
  69   ins_pipe(v$3`_reg_mem'ifelse(eval($4 * 8), 128, 128, 64));
  70 %}')dnl
  71 dnl        $1    $2 $3     $4  $5 $6   $7   $8
  72 VLoadStore(ldrh, H, load,  2,  D, 16,  dst, UseSVE == 0 && )
  73 VLoadStore(ldrs, S, load,  4,  D, 32,  dst, UseSVE == 0 && )
  74 VLoadStore(ldrd, D, load,  8,  D, 64,  dst, UseSVE == 0 && )
  75 VLoadStore(ldrq, Q, load, 16,  X, 128, dst, UseSVE == 0 && )
  76 VLoadStore(strh, H, store, 2,  D, 16,  src, )
  77 VLoadStore(strs, S, store, 4,  D, 32,  src, )
  78 VLoadStore(strd, D, store, 8,  D, 64,  src, )
  79 VLoadStore(strq, Q, store, 16, X, 128, src, )
  80 dnl
  81 define(`REINTERPRET', `
  82 instruct reinterpret$1`'(vec$1 dst)
  83 %{
  84   predicate(n->bottom_type()->is_vect()->length_in_bytes() == $2 &&
  85             n->in(1)->bottom_type()->is_vect()->length_in_bytes() == $2);
  86   match(Set dst (VectorReinterpret dst));
  87   ins_cost(0);
  88   format %{ " # reinterpret $dst" %}
  89   ins_encode %{
  90     // empty
  91   %}
  92   ins_pipe(pipe_class_empty);
  93 %}')dnl
  94 dnl         $1 $2
  95 REINTERPRET(D, 8)
  96 REINTERPRET(X, 16)
  97 dnl
  98 define(`REINTERPRET_DX', `
  99 instruct reinterpret$1`'2$2`'(vec$2 dst, vec$1 src)
 100 %{
 101   predicate(n->bottom_type()->is_vect()->length_in_bytes() == $3 &&
 102             n->in(1)->bottom_type()->is_vect()->length_in_bytes() == $4);
 103   match(Set dst (VectorReinterpret src));
 104   ins_cost(INSN_COST);
 105   format %{ " # reinterpret $dst,$src\t# $1 to $2" %}
 106   ins_encode %{
 107     // The higher 64-bits of the "dst" register must be cleared to zero.
 108     __ orr(as_FloatRegister($dst$$reg), __ T8B,
 109            as_FloatRegister($src$$reg),
 110            as_FloatRegister($src$$reg));
 111   %}
 112   ins_pipe(vlogical64);
 113 %}')dnl
 114 dnl            $1 $2 $3  $4
 115 REINTERPRET_DX(D, X, 16, 8)
 116 REINTERPRET_DX(X, D, 8,  16)
 117 dnl
 118 define(`REINTERPRET_SX', `
 119 instruct reinterpret$1`'2$2`'(vec$3 dst, vec$4 src)
 120 %{
 121   predicate(n->bottom_type()->is_vect()->length_in_bytes() == $5 &&
 122             n->in(1)->bottom_type()->is_vect()->length_in_bytes() == $6);
 123   match(Set dst (VectorReinterpret src));
 124   ins_cost(INSN_COST);
 125   format %{ " # reinterpret $dst,$src\t# $1 to $2" %}
 126   ins_encode %{
 127     // The higher bits of the "dst" register must be cleared to zero.
 128     __ dup(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($src$$reg));
 129   %}
 130   ins_pipe(pipe_slow);
 131 %}')dnl
 132 dnl            $1 $2 $3 $4 $5  $6
 133 REINTERPRET_SX(S, X, X, D, 16, 4)
 134 REINTERPRET_SX(X, S, D, X, 4,  16)
 135 REINTERPRET_SX(S, D, D, D, 8,  4)
 136 REINTERPRET_SX(D, S, D, D, 4,  8)
 137 dnl
 138 
 139 // ------------------------------ Vector cast -------------------------------
 140 dnl
 141 define(`VECTOR_CAST_I2I', `
 142 instruct vcvt$1$2to$1$3`'(vec$4 dst, vec$5 src)
 143 %{
 144   predicate(n->as_Vector()->length() == $1 && n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($3));
 145   match(Set dst (VectorCast$2`'2X src));
 146   format %{ "$6  $dst, T$8, $src, T$7\t# convert $1$2 to $1$3 vector" %}
 147   ins_encode %{
 148     __ $6(as_FloatRegister($dst$$reg), __ T$8, as_FloatRegister($src$$reg), __ T$7);
 149   %}
 150   ins_pipe(pipe_class_default);
 151 %}')dnl
 152 dnl             $1 $2 $3 $4 $5 $6    $7  $8
 153 VECTOR_CAST_I2I(8, B, S, X, D, sxtl, 8B, 8H)
 154 VECTOR_CAST_I2I(4, B, S, D, D, sxtl, 8B, 8H)
 155 VECTOR_CAST_I2I(8, S, B, D, X, xtn,  8H, 8B)
 156 VECTOR_CAST_I2I(4, S, B, D, D, xtn,  8H, 8B)
 157 VECTOR_CAST_I2I(4, S, I, X, D, sxtl, 4H, 4S)
 158 VECTOR_CAST_I2I(4, I, S, D, X, xtn,  4S, 4H)
 159 VECTOR_CAST_I2I(2, I, L, X, D, sxtl, 2S, 2D)
 160 VECTOR_CAST_I2I(2, L, I, D, X, xtn,  2D, 2S)
 161 dnl
 162 define(`VECTOR_CAST_I2I_L', `
 163 instruct vcvt$1$2to$1$3`'(vec$4 dst, vec$5 src)
 164 %{
 165   predicate(n->as_Vector()->length() == $1 && n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($3));
 166   match(Set dst (VectorCast$2`'2X src));
 167   format %{ "$6  $dst, T$8, $src, T$7\n\t"
 168             "$6  $dst, T$10, $dst, T$9\t# convert $1$2 to $1$3 vector"
 169   %}
 170   ins_encode %{
 171     __ $6(as_FloatRegister($dst$$reg), __ T$8, as_FloatRegister($src$$reg), __ T$7);
 172     __ $6(as_FloatRegister($dst$$reg), __ T$10, as_FloatRegister($dst$$reg), __ T$9);
 173   %}
 174   ins_pipe(pipe_class_default);
 175 %}')dnl
 176 dnl               $1 $2 $3 $4 $5 $6    $7  $8  $9  $10
 177 VECTOR_CAST_I2I_L(4, I, B, D, X, xtn,  4S, 4H, 8H, 8B)
 178 VECTOR_CAST_I2I_L(4, B, I, X, D, sxtl, 8B, 8H, 4H, 4S)
 179 dnl
 180 
 181 instruct vcvt2Lto2F(vecD dst, vecX src, vRegF tmp)
 182 %{
 183   predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
 184   match(Set dst (VectorCastL2X src));
 185   effect(TEMP_DEF dst, TEMP tmp);
 186   format %{ "umov   rscratch1, $src, D, 0\n\t"
 187             "scvtfs $dst, rscratch1\n\t"
 188             "umov   rscratch1, $src, D, 1\n\t"
 189             "scvtfs $tmp, rscratch1\n\t"
 190             "ins    $dst, S, $tmp, 1, 0\t# convert 2L to 2F vector"
 191   %}
 192   ins_encode %{
 193     __ umov(rscratch1, as_FloatRegister($src$$reg), __ D, 0);
 194     __ scvtfs(as_FloatRegister($dst$$reg), rscratch1);
 195     __ umov(rscratch1, as_FloatRegister($src$$reg), __ D, 1);
 196     __ scvtfs(as_FloatRegister($tmp$$reg), rscratch1);
 197     __ ins(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($tmp$$reg), 1, 0);
 198   %}
 199   ins_pipe(pipe_slow);
 200 %}
 201 dnl
 202 define(`VECTOR_CAST_I2F', `
 203 instruct vcvt$1$2to$1$3`'(vec$4 dst, vec$4 src)
 204 %{
 205   predicate(n->as_Vector()->length() == $1 && n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($3));
 206   match(Set dst (VectorCast$2`'2X src));
 207   format %{ "scvtfv  T$5, $dst, $src\t# convert $1$2 to $1$3 vector" %}
 208   ins_encode %{
 209     __ scvtfv(__ T$5, as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
 210   %}
 211   ins_pipe(pipe_class_default);
 212 %}')dnl
 213 dnl             $1 $2 $3 $4 $5
 214 VECTOR_CAST_I2F(2, I, F, D, 2S)
 215 VECTOR_CAST_I2F(4, I, F, X, 4S)
 216 VECTOR_CAST_I2F(2, L, D, X, 2D)
 217 dnl
 218 define(`VECTOR_CAST_I2F_L', `
 219 instruct vcvt$1$2to$1$3`'(vec$4 dst, vec$5 src)
 220 %{
 221   predicate(n->as_Vector()->length() == $1 && n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($3));
 222   match(Set dst (VectorCast$2`'2X src));
 223   format %{ "sxtl    $dst, T$7, $src, T$6\n\t"
 224             "scvtfv  T$7, $dst, $dst\t# convert $1$2 to $1$3 vector"
 225   %}
 226   ins_encode %{
 227     __ sxtl(as_FloatRegister($dst$$reg), __ T$7, as_FloatRegister($src$$reg), __ T$6);
 228     __ scvtfv(__ T$7, as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg));
 229   %}
 230   ins_pipe(pipe_slow);
 231 %}')dnl
 232 dnl               $1 $2 $3 $4 $5 $6  $7
 233 VECTOR_CAST_I2F_L(4, S, F, X, D, 4H, 4S)
 234 VECTOR_CAST_I2F_L(2, I, D, X, D, 2S, 2D)
 235 dnl
 236 
 237 instruct vcvt4Bto4F(vecX dst, vecD src)
 238 %{
 239   predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
 240   match(Set dst (VectorCastB2X src));
 241   format %{ "sxtl    $dst, T8H, $src, T8B\n\t"
 242             "sxtl    $dst, T4S, $dst, T4H\n\t"
 243             "scvtfv  T4S, $dst, $dst\t# convert 4B to 4F vector"
 244   %}
 245   ins_encode %{
 246     __ sxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
 247     __ sxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg), __ T4H);
 248     __ scvtfv(__ T4S, as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg));
 249   %}
 250   ins_pipe(pipe_slow);
 251 %}
 252 
 253 instruct vcvt2Fto2L(vecX dst, vecD src)
 254 %{
 255   predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
 256   match(Set dst (VectorCastF2X src));
 257   format %{ "fcvtl   $dst, T2D, $src, T2S\n\t"
 258             "fcvtzs  $dst, T2D, $dst\t# convert 2F to 2L vector"
 259   %}
 260   ins_encode %{
 261     __ fcvtl(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg), __ T2S);
 262     __ fcvtzs(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($dst$$reg));
 263   %}
 264   ins_pipe(pipe_slow);
 265 %}
 266 dnl
 267 define(`VECTOR_CAST_F2I', `
 268 instruct vcvt$1$2to$1$3`'(vec$4 dst, vec$4 src)
 269 %{
 270   predicate(n->as_Vector()->length() == $1 && n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($3));
 271   match(Set dst (VectorCast$2`'2X src));
 272   format %{ "fcvtzs  $dst, T$5, $src\t# convert $1$2 to $1$3 vector" %}
 273   ins_encode %{
 274     __ fcvtzs(as_FloatRegister($dst$$reg), __ T$5, as_FloatRegister($src$$reg));
 275   %}
 276   ins_pipe(pipe_class_default);
 277 %}')dnl
 278 dnl             $1 $2 $3 $4 $5
 279 VECTOR_CAST_F2I(2, F, I, D, 2S)
 280 VECTOR_CAST_F2I(4, F, I, X, 4S)
 281 VECTOR_CAST_F2I(2, D, L, X, 2D)
 282 
 283 instruct vcvt4Fto4S(vecD dst, vecX src)
 284 %{
 285   predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
 286   match(Set dst (VectorCastF2X src));
 287   format %{ "fcvtzs  $dst, T4S, $src\n\t"
 288             "xtn     $dst, T4H, $dst, T4S\t# convert 4F to 4S vector"
 289   %}
 290   ins_encode %{
 291     __ fcvtzs(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($src$$reg));
 292     __ xtn(as_FloatRegister($dst$$reg), __ T4H, as_FloatRegister($dst$$reg), __ T4S);
 293   %}
 294   ins_pipe(pipe_slow);
 295 %}
 296 
 297 instruct vcvt2Dto2I(vecD dst, vecX src)
 298 %{
 299   predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT);
 300   match(Set dst (VectorCastD2X src));
 301   effect(TEMP_DEF dst);
 302   format %{ "ins      $dst, D, $src, 0, 1\n\t"
 303             "fcvtzdw  rscratch1, $src\n\t"
 304             "fcvtzdw  rscratch2, $dst\n\t"
 305             "fmovs    $dst, rscratch1\n\t"
 306             "mov      $dst, S, 1, rscratch2\t#convert 2D to 2I vector"
 307   %}
 308   ins_encode %{
 309     __ ins(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($src$$reg), 0, 1);
 310     // We can't use fcvtzs(vector, integer) instruction here because we need
 311     // saturation arithmetic. See JDK-8276151.
 312     __ fcvtzdw(rscratch1, as_FloatRegister($src$$reg));
 313     __ fcvtzdw(rscratch2, as_FloatRegister($dst$$reg));
 314     __ fmovs(as_FloatRegister($dst$$reg), rscratch1);
 315     __ mov(as_FloatRegister($dst$$reg), __ S, 1, rscratch2);
 316   %}
 317   ins_pipe(pipe_slow);
 318 %}
 319 
 320 instruct vcvt4Fto4B(vecD dst, vecX src)
 321 %{
 322   predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
 323   match(Set dst (VectorCastF2X src));
 324   format %{ "fcvtzs  $dst, T4S, $src\n\t"
 325             "xtn     $dst, T4H, $dst, T4S\n\t"
 326             "xtn     $dst, T8B, $dst, T8H\t# convert 4F to 4B vector"
 327   %}
 328   ins_encode %{
 329     __ fcvtzs(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($src$$reg));
 330     __ xtn(as_FloatRegister($dst$$reg), __ T4H, as_FloatRegister($dst$$reg), __ T4S);
 331     __ xtn(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg), __ T8H);
 332   %}
 333   ins_pipe(pipe_slow);
 334 %}
 335 dnl
 336 define(`VECTOR_CAST_F2F', `
 337 instruct vcvt2$1to2$2`'(vec$3 dst, vec$4 src)
 338 %{
 339   predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2));
 340   match(Set dst (VectorCast$1`'2X src));
 341   format %{ "$5  $dst, T$7, $src, T$6\t# convert 2$1 to 2$2 vector" %}
 342   ins_encode %{
 343     __ $5(as_FloatRegister($dst$$reg), __ T$7, as_FloatRegister($src$$reg), __ T$6);
 344   %}
 345   ins_pipe(pipe_class_default);
 346 %}')dnl
 347 dnl             $1 $2 $3 $4 $5     $6  $7
 348 VECTOR_CAST_F2F(F, D, X, D, fcvtl, 2S, 2D)
 349 VECTOR_CAST_F2F(D, F, D, X, fcvtn, 2D, 2S)
 350 dnl
 351 
 352 define(`VECTOR_JAVA_FROUND', `
 353 instruct vround$7$2to$5$3($7 dst, $7 src, $7 tmp1, $7 tmp2, $7 tmp3)
 354 %{
 355   predicate(UseSVE == 0 &&
 356             n->as_Vector()->length() == $5 && n->bottom_type()->is_vect()->element_basic_type() == T_$6);
 357   match(Set dst (RoundV$1 src));
 358   effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, TEMP tmp3);
 359   format %{ "vround  $dst, $4, $src\t# round $7 $2 to $5$3 vector" %}
 360   ins_encode %{
 361     __ vector_round_neon(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
 362                          as_FloatRegister($tmp1$$reg), as_FloatRegister($tmp2$$reg),
 363                          as_FloatRegister($tmp3$$reg), __ $4);
 364   %}
 365   ins_pipe(pipe_class_default);
 366 %}')dnl           $1  $2  $3   $4 $5    $6    $7
 367 VECTOR_JAVA_FROUND(F, 2F,  I, T2S, 2,  INT, vecD)
 368 VECTOR_JAVA_FROUND(F, 4F,  I, T4S, 4,  INT, vecX)
 369 VECTOR_JAVA_FROUND(D, 2D,  L, T2D, 2, LONG, vecX)
 370 
 371 // ------------------------------ Reduction -------------------------------
 372 dnl
 373 define(`REDUCE_ADD_BORS', `
 374 instruct reduce_add$1$2`'(iRegINoSp dst, iRegIorL2I isrc, vec$3 vsrc, vec$3 tmp)
 375 %{
 376   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2));
 377   match(Set dst (AddReductionVI isrc vsrc));
 378   ins_cost(INSN_COST);
 379   effect(TEMP_DEF dst, TEMP tmp);
 380   format %{ "addv  $tmp, T$1`'iTYPE2SIMD($2), $vsrc\n\t"
 381             "smov  $dst, $tmp, iTYPE2SIMD($2), 0\n\t"
 382             "addw  $dst, $dst, $isrc\n\t"
 383             "sxt$4  $dst, $dst\t# add reduction$1$2"
 384   %}
 385   ins_encode %{
 386     __ addv(as_FloatRegister($tmp$$reg), __ T$1`'iTYPE2SIMD($2), as_FloatRegister($vsrc$$reg));
 387     __ smov($dst$$Register, as_FloatRegister($tmp$$reg), __ iTYPE2SIMD($2), 0);
 388     __ addw($dst$$Register, $dst$$Register, $isrc$$Register);
 389     __ sxt$4($dst$$Register, $dst$$Register);
 390   %}
 391   ins_pipe(pipe_slow);
 392 %}')dnl
 393 dnl             $1  $2 $3 $4
 394 REDUCE_ADD_BORS(8,  B, D, b)
 395 REDUCE_ADD_BORS(16, B, X, b)
 396 REDUCE_ADD_BORS(4,  S, D, h)
 397 REDUCE_ADD_BORS(8,  S, X, h)
 398 dnl
 399 
 400 instruct reduce_add2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, vecX tmp)
 401 %{
 402   match(Set dst (AddReductionVL isrc vsrc));
 403   ins_cost(INSN_COST);
 404   effect(TEMP_DEF dst, TEMP tmp);
 405   format %{ "addpd $tmp, $vsrc\n\t"
 406             "umov  $dst, $tmp, D, 0\n\t"
 407             "add   $dst, $isrc, $dst\t# add reduction2L"
 408   %}
 409   ins_encode %{
 410     __ addpd(as_FloatRegister($tmp$$reg), as_FloatRegister($vsrc$$reg));
 411     __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ D, 0);
 412     __ add($dst$$Register, $isrc$$Register, $dst$$Register);
 413   %}
 414   ins_pipe(pipe_slow);
 415 %}
 416 
 417 instruct reduce_mul8B(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD vtmp1, vecD vtmp2, iRegINoSp itmp)
 418 %{
 419   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
 420   match(Set dst (MulReductionVI isrc vsrc));
 421   ins_cost(INSN_COST);
 422   effect(TEMP_DEF dst, TEMP vtmp1, TEMP vtmp2, TEMP itmp);
 423   format %{ "ins   $vtmp1, S, $vsrc, 0, 1\n\t"
 424             "mulv  $vtmp1, T8B, $vtmp1, $vsrc\n\t"
 425             "ins   $vtmp2, H, $vtmp1, 0, 1\n\t"
 426             "mulv  $vtmp2, T8B, $vtmp2, $vtmp1\n\t"
 427             "umov  $itmp, $vtmp2, B, 0\n\t"
 428             "mulw  $dst, $itmp, $isrc\n\t"
 429             "sxtb  $dst, $dst\n\t"
 430             "umov  $itmp, $vtmp2, B, 1\n\t"
 431             "mulw  $dst, $itmp, $dst\n\t"
 432             "sxtb  $dst, $dst\t# mul reduction8B"
 433   %}
 434   ins_encode %{
 435     __ ins(as_FloatRegister($vtmp1$$reg), __ S,
 436            as_FloatRegister($vsrc$$reg), 0, 1);
 437     __ mulv(as_FloatRegister($vtmp1$$reg), __ T8B,
 438             as_FloatRegister($vtmp1$$reg), as_FloatRegister($vsrc$$reg));
 439     __ ins(as_FloatRegister($vtmp2$$reg), __ H,
 440            as_FloatRegister($vtmp1$$reg), 0, 1);
 441     __ mulv(as_FloatRegister($vtmp2$$reg), __ T8B,
 442             as_FloatRegister($vtmp2$$reg), as_FloatRegister($vtmp1$$reg));
 443     __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ B, 0);
 444     __ mulw($dst$$Register, $itmp$$Register, $isrc$$Register);
 445     __ sxtb($dst$$Register, $dst$$Register);
 446     __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ B, 1);
 447     __ mulw($dst$$Register, $itmp$$Register, $dst$$Register);
 448     __ sxtb($dst$$Register, $dst$$Register);
 449   %}
 450   ins_pipe(pipe_slow);
 451 %}
 452 
 453 instruct reduce_mul16B(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp1, vecX vtmp2, iRegINoSp itmp)
 454 %{
 455   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
 456   match(Set dst (MulReductionVI isrc vsrc));
 457   ins_cost(INSN_COST);
 458   effect(TEMP_DEF dst, TEMP vtmp1, TEMP vtmp2, TEMP itmp);
 459   format %{ "ins   $vtmp1, D, $vsrc, 0, 1\n\t"
 460             "mulv  $vtmp1, T8B, $vtmp1, $vsrc\n\t"
 461             "ins   $vtmp2, S, $vtmp1, 0, 1\n\t"
 462             "mulv  $vtmp1, T8B, $vtmp2, $vtmp1\n\t"
 463             "ins   $vtmp2, H, $vtmp1, 0, 1\n\t"
 464             "mulv  $vtmp2, T8B, $vtmp2, $vtmp1\n\t"
 465             "umov  $itmp, $vtmp2, B, 0\n\t"
 466             "mulw  $dst, $itmp, $isrc\n\t"
 467             "sxtb  $dst, $dst\n\t"
 468             "umov  $itmp, $vtmp2, B, 1\n\t"
 469             "mulw  $dst, $itmp, $dst\n\t"
 470             "sxtb  $dst, $dst\t# mul reduction16B"
 471   %}
 472   ins_encode %{
 473     __ ins(as_FloatRegister($vtmp1$$reg), __ D,
 474            as_FloatRegister($vsrc$$reg), 0, 1);
 475     __ mulv(as_FloatRegister($vtmp1$$reg), __ T8B,
 476             as_FloatRegister($vtmp1$$reg), as_FloatRegister($vsrc$$reg));
 477     __ ins(as_FloatRegister($vtmp2$$reg), __ S,
 478            as_FloatRegister($vtmp1$$reg), 0, 1);
 479     __ mulv(as_FloatRegister($vtmp1$$reg), __ T8B,
 480             as_FloatRegister($vtmp2$$reg), as_FloatRegister($vtmp1$$reg));
 481     __ ins(as_FloatRegister($vtmp2$$reg), __ H,
 482            as_FloatRegister($vtmp1$$reg), 0, 1);
 483     __ mulv(as_FloatRegister($vtmp2$$reg), __ T8B,
 484             as_FloatRegister($vtmp2$$reg), as_FloatRegister($vtmp1$$reg));
 485     __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ B, 0);
 486     __ mulw($dst$$Register, $itmp$$Register, $isrc$$Register);
 487     __ sxtb($dst$$Register, $dst$$Register);
 488     __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ B, 1);
 489     __ mulw($dst$$Register, $itmp$$Register, $dst$$Register);
 490     __ sxtb($dst$$Register, $dst$$Register);
 491   %}
 492   ins_pipe(pipe_slow);
 493 %}
 494 
 495 instruct reduce_mul4S(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD vtmp, iRegINoSp itmp)
 496 %{
 497   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
 498   match(Set dst (MulReductionVI isrc vsrc));
 499   ins_cost(INSN_COST);
 500   effect(TEMP_DEF dst, TEMP vtmp, TEMP itmp);
 501   format %{ "ins   $vtmp, S, $vsrc, 0, 1\n\t"
 502             "mulv  $vtmp, T4H, $vtmp, $vsrc\n\t"
 503             "umov  $itmp, $vtmp, H, 0\n\t"
 504             "mulw  $dst, $itmp, $isrc\n\t"
 505             "sxth  $dst, $dst\n\t"
 506             "umov  $itmp, $vtmp, H, 1\n\t"
 507             "mulw  $dst, $itmp, $dst\n\t"
 508             "sxth  $dst, $dst\t# mul reduction4S"
 509   %}
 510   ins_encode %{
 511     __ ins(as_FloatRegister($vtmp$$reg), __ S,
 512            as_FloatRegister($vsrc$$reg), 0, 1);
 513     __ mulv(as_FloatRegister($vtmp$$reg), __ T4H,
 514             as_FloatRegister($vtmp$$reg), as_FloatRegister($vsrc$$reg));
 515     __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ H, 0);
 516     __ mulw($dst$$Register, $itmp$$Register, $isrc$$Register);
 517     __ sxth($dst$$Register, $dst$$Register);
 518     __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ H, 1);
 519     __ mulw($dst$$Register, $itmp$$Register, $dst$$Register);
 520     __ sxth($dst$$Register, $dst$$Register);
 521   %}
 522   ins_pipe(pipe_slow);
 523 %}
 524 
 525 instruct reduce_mul8S(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp1, vecX vtmp2, iRegINoSp itmp)
 526 %{
 527   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
 528   match(Set dst (MulReductionVI isrc vsrc));
 529   ins_cost(INSN_COST);
 530   effect(TEMP_DEF dst, TEMP vtmp1, TEMP vtmp2, TEMP itmp);
 531   format %{ "ins   $vtmp1, D, $vsrc, 0, 1\n\t"
 532             "mulv  $vtmp1, T4H, $vtmp1, $vsrc\n\t"
 533             "ins   $vtmp2, S, $vtmp1, 0, 1\n\t"
 534             "mulv  $vtmp2, T4H, $vtmp2, $vtmp1\n\t"
 535             "umov  $itmp, $vtmp2, H, 0\n\t"
 536             "mulw  $dst, $itmp, $isrc\n\t"
 537             "sxth  $dst, $dst\n\t"
 538             "umov  $itmp, $vtmp2, H, 1\n\t"
 539             "mulw  $dst, $itmp, $dst\n\t"
 540             "sxth  $dst, $dst\t# mul reduction8S"
 541   %}
 542   ins_encode %{
 543     __ ins(as_FloatRegister($vtmp1$$reg), __ D,
 544            as_FloatRegister($vsrc$$reg), 0, 1);
 545     __ mulv(as_FloatRegister($vtmp1$$reg), __ T4H,
 546             as_FloatRegister($vtmp1$$reg), as_FloatRegister($vsrc$$reg));
 547     __ ins(as_FloatRegister($vtmp2$$reg), __ S,
 548            as_FloatRegister($vtmp1$$reg), 0, 1);
 549     __ mulv(as_FloatRegister($vtmp2$$reg), __ T4H,
 550             as_FloatRegister($vtmp2$$reg), as_FloatRegister($vtmp1$$reg));
 551     __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ H, 0);
 552     __ mulw($dst$$Register, $itmp$$Register, $isrc$$Register);
 553     __ sxth($dst$$Register, $dst$$Register);
 554     __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ H, 1);
 555     __ mulw($dst$$Register, $itmp$$Register, $dst$$Register);
 556     __ sxth($dst$$Register, $dst$$Register);
 557   %}
 558   ins_pipe(pipe_slow);
 559 %}
 560 
 561 instruct reduce_mul2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, iRegLNoSp tmp)
 562 %{
 563   match(Set dst (MulReductionVL isrc vsrc));
 564   ins_cost(INSN_COST);
 565   effect(TEMP_DEF dst, TEMP tmp);
 566   format %{ "umov  $tmp, $vsrc, D, 0\n\t"
 567             "mul   $dst, $isrc, $tmp\n\t"
 568             "umov  $tmp, $vsrc, D, 1\n\t"
 569             "mul   $dst, $dst, $tmp\t# mul reduction2L"
 570   %}
 571   ins_encode %{
 572     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
 573     __ mul($dst$$Register, $isrc$$Register, $tmp$$Register);
 574     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
 575     __ mul($dst$$Register, $dst$$Register, $tmp$$Register);
 576   %}
 577   ins_pipe(pipe_slow);
 578 %}
 579 dnl
 580 define(`REDUCE_MAX_MIN_INT', `
 581 instruct reduce_$1$2$3`'(iRegINoSp dst, iRegIorL2I isrc, vec$4 vsrc, vec$4 tmp, rFlagsReg cr)
 582 %{
 583   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($3));
 584   match(Set dst ($5ReductionV isrc vsrc));
 585   ins_cost(INSN_COST);
 586   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
 587   format %{ "s$1v $tmp, T$2`'iTYPE2SIMD($3), $vsrc\n\t"
 588             "$6mov  $dst, $tmp, iTYPE2SIMD($3), 0\n\t"
 589             "cmpw  $dst, $isrc\n\t"
 590             "cselw $dst, $dst, $isrc $7\t# $1 reduction$2$3"
 591   %}
 592   ins_encode %{
 593     __ s$1v(as_FloatRegister($tmp$$reg), __ T$2`'iTYPE2SIMD($3), as_FloatRegister($vsrc$$reg));
 594     __ $6mov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ iTYPE2SIMD($3), 0);
 595     __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
 596     __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::$7);
 597   %}
 598   ins_pipe(pipe_slow);
 599 %}')dnl
 600 dnl                $1   $2  $3 $4 $5   $6 $7
 601 REDUCE_MAX_MIN_INT(max, 8,  B, D, Max, s, GT)
 602 REDUCE_MAX_MIN_INT(max, 16, B, X, Max, s, GT)
 603 REDUCE_MAX_MIN_INT(max, 4,  S, D, Max, s, GT)
 604 REDUCE_MAX_MIN_INT(max, 8,  S, X, Max, s, GT)
 605 REDUCE_MAX_MIN_INT(max, 4,  I, X, Max, u, GT)
 606 REDUCE_MAX_MIN_INT(min, 8,  B, D, Min, s, LT)
 607 REDUCE_MAX_MIN_INT(min, 16, B, X, Min, s, LT)
 608 REDUCE_MAX_MIN_INT(min, 4,  S, D, Min, s, LT)
 609 REDUCE_MAX_MIN_INT(min, 8,  S, X, Min, s, LT)
 610 REDUCE_MAX_MIN_INT(min, 4,  I, X, Min, u, LT)
 611 dnl
 612 define(`REDUCE_MAX_MIN_2I', `
 613 instruct reduce_$1`'2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD tmp, rFlagsReg cr)
 614 %{
 615   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
 616   match(Set dst ($2ReductionV isrc vsrc));
 617   ins_cost(INSN_COST);
 618   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
 619   format %{ "s$1p $tmp, T2S, $vsrc, $vsrc\n\t"
 620             "umov  $dst, $tmp, S, 0\n\t"
 621             "cmpw  $dst, $isrc\n\t"
 622             "cselw $dst, $dst, $isrc $3\t# $1 reduction2I"
 623   %}
 624   ins_encode %{
 625     __ s$1p(as_FloatRegister($tmp$$reg), __ T2S, as_FloatRegister($vsrc$$reg), as_FloatRegister($vsrc$$reg));
 626     __ umov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ S, 0);
 627     __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
 628     __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::$3);
 629   %}
 630   ins_pipe(pipe_slow);
 631 %}')dnl
 632 dnl               $1   $2   $3
 633 REDUCE_MAX_MIN_2I(max, Max, GT)
 634 REDUCE_MAX_MIN_2I(min, Min, LT)
 635 dnl
 636 define(`REDUCE_MAX_MIN_2L', `
 637 instruct reduce_$1`'2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, iRegLNoSp tmp, rFlagsReg cr)
 638 %{
 639   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
 640   match(Set dst ($2ReductionV isrc vsrc));
 641   ins_cost(INSN_COST);
 642   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
 643   format %{ "umov  $tmp, $vsrc, D, 0\n\t"
 644             "cmp   $isrc,$tmp\n\t"
 645             "csel  $dst, $isrc, $tmp $3\n\t"
 646             "umov  $tmp, $vsrc, D, 1\n\t"
 647             "cmp   $dst, $tmp\n\t"
 648             "csel  $dst, $dst, $tmp $3\t# $1 reduction2L"
 649   %}
 650   ins_encode %{
 651     __ umov(as_Register($tmp$$reg), as_FloatRegister($vsrc$$reg), __ D, 0);
 652     __ cmp(as_Register($isrc$$reg), as_Register($tmp$$reg));
 653     __ csel(as_Register($dst$$reg), as_Register($isrc$$reg), as_Register($tmp$$reg), Assembler::$3);
 654     __ umov(as_Register($tmp$$reg), as_FloatRegister($vsrc$$reg), __ D, 1);
 655     __ cmp(as_Register($dst$$reg), as_Register($tmp$$reg));
 656     __ csel(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($tmp$$reg), Assembler::$3);
 657   %}
 658   ins_pipe(pipe_slow);
 659 %}')dnl
 660 dnl               $1   $2   $3
 661 REDUCE_MAX_MIN_2L(max, Max, GT)
 662 REDUCE_MAX_MIN_2L(min, Min, LT)
 663 dnl
 664 define(`REDUCE_MINMAX_FORD', `
 665 instruct reduce_$1$4$5(vReg$5 dst, vReg$5 $6src, vec$7 vsrc) %{
 666   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_`'ifelse($5, F, FLOAT, DOUBLE));
 667   match(Set dst (ifelse($1, max, Max, Min)ReductionV $6src vsrc));
 668   ins_cost(INSN_COST);
 669   effect(TEMP_DEF dst);
 670   format %{ "$2 $dst, ifelse($4, 2, $vsrc`, 'ifelse($5, F, S, D), ` T4S, $vsrc')\n\t"
 671             "$3 $dst, $dst, $$6src\t# $1 reduction$4$5" %}
 672   ins_encode %{
 673     __ $2(as_FloatRegister($dst$$reg), ifelse($4, 4, `__ T4S, as_FloatRegister($vsrc$$reg))',
 674                                               $4$5, 2F, `as_FloatRegister($vsrc$$reg), __ S)',
 675                                               $4$5, 2D, `as_FloatRegister($vsrc$$reg), __ D)');
 676     __ $3(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($$6src$$reg));
 677   %}
 678   ins_pipe(pipe_class_default);
 679 %}')dnl
 680 dnl                $1   $2     $3     $4 $5 $6 $7
 681 REDUCE_MINMAX_FORD(max, fmaxp, fmaxs, 2, F, f, D)
 682 REDUCE_MINMAX_FORD(max, fmaxv, fmaxs, 4, F, f, X)
 683 REDUCE_MINMAX_FORD(max, fmaxp, fmaxd, 2, D, d, X)
 684 REDUCE_MINMAX_FORD(min, fminp, fmins, 2, F, f, D)
 685 REDUCE_MINMAX_FORD(min, fminv, fmins, 4, F, f, X)
 686 REDUCE_MINMAX_FORD(min, fminp, fmind, 2, D, d, X)
 687 dnl
 688 define(`REDUCE_LOGIC_OP_8B', `
 689 instruct reduce_$1`'8B(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
 690 %{
 691   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
 692   match(Set dst ($2ReductionV isrc vsrc));
 693   ins_cost(INSN_COST);
 694   effect(TEMP_DEF dst, TEMP tmp);
 695   format %{ "umov   $tmp, $vsrc, S, 0\n\t"
 696             "umov   $dst, $vsrc, S, 1\n\t"
 697             "$1w   $dst, $dst, $tmp\n\t"
 698             "$1w   $dst, $dst, $dst, LSR #16\n\t"
 699             "$1w   $dst, $dst, $dst, LSR #8\n\t"
 700             "$1w   $dst, $isrc, $dst\n\t"
 701             "sxtb   $dst, $dst\t# $1 reduction8B"
 702   %}
 703   ins_encode %{
 704     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
 705     __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
 706     __ $1w($dst$$Register, $dst$$Register, $tmp$$Register);
 707     __ $1w($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
 708     __ $1w($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 8);
 709     __ $1w($dst$$Register, $isrc$$Register, $dst$$Register);
 710     __ sxtb($dst$$Register, $dst$$Register);
 711   %}
 712   ins_pipe(pipe_slow);
 713 %}')dnl
 714 dnl                $1   $2
 715 REDUCE_LOGIC_OP_8B(and, And)
 716 REDUCE_LOGIC_OP_8B(orr, Or)
 717 REDUCE_LOGIC_OP_8B(eor, Xor)
 718 define(`REDUCE_LOGIC_OP_16B', `
 719 instruct reduce_$1`'16B(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp)
 720 %{
 721   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
 722   match(Set dst ($2ReductionV isrc vsrc));
 723   ins_cost(INSN_COST);
 724   effect(TEMP_DEF dst, TEMP tmp);
 725   format %{ "umov   $tmp, $vsrc, D, 0\n\t"
 726             "umov   $dst, $vsrc, D, 1\n\t"
 727             "$3   $dst, $dst, $tmp\n\t"
 728             "$3   $dst, $dst, $dst, LSR #32\n\t"
 729             "$1w   $dst, $dst, $dst, LSR #16\n\t"
 730             "$1w   $dst, $dst, $dst, LSR #8\n\t"
 731             "$1w   $dst, $isrc, $dst\n\t"
 732             "sxtb   $dst, $dst\t# $1 reduction16B"
 733   %}
 734   ins_encode %{
 735     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
 736     __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
 737     __ $3($dst$$Register, $dst$$Register, $tmp$$Register);
 738     __ $3($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32);
 739     __ $1w($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
 740     __ $1w($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 8);
 741     __ $1w($dst$$Register, $isrc$$Register, $dst$$Register);
 742     __ sxtb($dst$$Register, $dst$$Register);
 743   %}
 744   ins_pipe(pipe_slow);
 745 %}')dnl
 746 dnl                 $1   $2   $3
 747 REDUCE_LOGIC_OP_16B(and, And, andr)
 748 REDUCE_LOGIC_OP_16B(orr, Or,  orr )
 749 REDUCE_LOGIC_OP_16B(eor, Xor, eor )
 750 dnl
 751 define(`REDUCE_LOGIC_OP_4S', `
 752 instruct reduce_$1`'4S(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
 753 %{
 754   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
 755   match(Set dst ($2ReductionV isrc vsrc));
 756   ins_cost(INSN_COST);
 757   effect(TEMP_DEF dst, TEMP tmp);
 758   format %{ "umov   $tmp, $vsrc, S, 0\n\t"
 759             "umov   $dst, $vsrc, S, 1\n\t"
 760             "$1w   $dst, $dst, $tmp\n\t"
 761             "$1w   $dst, $dst, $dst, LSR #16\n\t"
 762             "$1w   $dst, $isrc, $dst\n\t"
 763             "sxth   $dst, $dst\t# $1 reduction4S"
 764   %}
 765   ins_encode %{
 766     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
 767     __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
 768     __ $1w($dst$$Register, $dst$$Register, $tmp$$Register);
 769     __ $1w($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
 770     __ $1w($dst$$Register, $isrc$$Register, $dst$$Register);
 771     __ sxth($dst$$Register, $dst$$Register);
 772   %}
 773   ins_pipe(pipe_slow);
 774 %}')dnl
 775 dnl                $1   $2
 776 REDUCE_LOGIC_OP_4S(and, And)
 777 REDUCE_LOGIC_OP_4S(orr, Or)
 778 REDUCE_LOGIC_OP_4S(eor, Xor)
 779 dnl
 780 define(`REDUCE_LOGIC_OP_8S', `
 781 instruct reduce_$1`'8S(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp)
 782 %{
 783   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
 784   match(Set dst ($2ReductionV isrc vsrc));
 785   ins_cost(INSN_COST);
 786   effect(TEMP_DEF dst, TEMP tmp);
 787   format %{ "umov   $tmp, $vsrc, D, 0\n\t"
 788             "umov   $dst, $vsrc, D, 1\n\t"
 789             "$3   $dst, $dst, $tmp\n\t"
 790             "$3   $dst, $dst, $dst, LSR #32\n\t"
 791             "$1w   $dst, $dst, $dst, LSR #16\n\t"
 792             "$1w   $dst, $isrc, $dst\n\t"
 793             "sxth   $dst, $dst\t# $1 reduction8S"
 794   %}
 795   ins_encode %{
 796     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
 797     __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
 798     __ $3($dst$$Register, $dst$$Register, $tmp$$Register);
 799     __ $3($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32);
 800     __ $1w($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
 801     __ $1w($dst$$Register, $isrc$$Register, $dst$$Register);
 802     __ sxth($dst$$Register, $dst$$Register);
 803   %}
 804   ins_pipe(pipe_slow);
 805 %}')dnl
 806 dnl                $1   $2   $3
 807 REDUCE_LOGIC_OP_8S(and, And, andr)
 808 REDUCE_LOGIC_OP_8S(orr, Or,  orr )
 809 REDUCE_LOGIC_OP_8S(eor, Xor, eor )
 810 dnl
 811 define(`REDUCE_LOGIC_OP_2I', `
 812 instruct reduce_$1`'2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
 813 %{
 814   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
 815   match(Set dst ($2ReductionV isrc vsrc));
 816   ins_cost(INSN_COST);
 817   effect(TEMP_DEF dst, TEMP tmp);
 818   format %{ "umov  $tmp, $vsrc, S, 0\n\t"
 819             "$1w  $dst, $tmp, $isrc\n\t"
 820             "umov  $tmp, $vsrc, S, 1\n\t"
 821             "$1w  $dst, $tmp, $dst\t# $1 reduction2I"
 822   %}
 823   ins_encode %{
 824     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
 825     __ $1w($dst$$Register, $tmp$$Register, $isrc$$Register);
 826     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
 827     __ $1w($dst$$Register, $tmp$$Register, $dst$$Register);
 828   %}
 829   ins_pipe(pipe_slow);
 830 %}')dnl
 831 dnl                $1   $2
 832 REDUCE_LOGIC_OP_2I(and, And)
 833 REDUCE_LOGIC_OP_2I(orr, Or)
 834 REDUCE_LOGIC_OP_2I(eor, Xor)
 835 dnl
 836 define(`REDUCE_LOGIC_OP_4I', `
 837 instruct reduce_$1`'4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp)
 838 %{
 839   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
 840   match(Set dst ($2ReductionV isrc vsrc));
 841   ins_cost(INSN_COST);
 842   effect(TEMP_DEF dst, TEMP tmp);
 843   format %{ "umov   $tmp, $vsrc, D, 0\n\t"
 844             "umov   $dst, $vsrc, D, 1\n\t"
 845             "$3   $dst, $dst, $tmp\n\t"
 846             "$3   $dst, $dst, $dst, LSR #32\n\t"
 847             "$1w   $dst, $isrc, $dst\t# $1 reduction4I"
 848   %}
 849   ins_encode %{
 850     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
 851     __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
 852     __ $3($dst$$Register, $dst$$Register, $tmp$$Register);
 853     __ $3($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32);
 854     __ $1w($dst$$Register, $isrc$$Register, $dst$$Register);
 855   %}
 856   ins_pipe(pipe_slow);
 857 %}')dnl
 858 dnl                $1   $2   $3
 859 REDUCE_LOGIC_OP_4I(and, And, andr)
 860 REDUCE_LOGIC_OP_4I(orr, Or,  orr )
 861 REDUCE_LOGIC_OP_4I(eor, Xor, eor )
 862 dnl
 863 define(`REDUCE_LOGIC_OP_2L', `
 864 instruct reduce_$1`'2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, iRegLNoSp tmp)
 865 %{
 866   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
 867   match(Set dst ($2ReductionV isrc vsrc));
 868   ins_cost(INSN_COST);
 869   effect(TEMP_DEF dst, TEMP tmp);
 870   format %{ "umov  $tmp, $vsrc, D, 0\n\t"
 871             "$3  $dst, $isrc, $tmp\n\t"
 872             "umov  $tmp, $vsrc, D, 1\n\t"
 873             "$3  $dst, $dst, $tmp\t# $1 reduction2L"
 874   %}
 875   ins_encode %{
 876     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
 877     __ $3($dst$$Register, $isrc$$Register, $tmp$$Register);
 878     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
 879     __ $3($dst$$Register, $dst$$Register, $tmp$$Register);
 880   %}
 881   ins_pipe(pipe_slow);
 882 %}')dnl
 883 dnl                $1   $2   $3
 884 REDUCE_LOGIC_OP_2L(and, And, andr)
 885 REDUCE_LOGIC_OP_2L(orr, Or,  orr )
 886 REDUCE_LOGIC_OP_2L(eor, Xor, eor )
 887 dnl
 888 
 889 // ------------------------------ Vector insert ---------------------------------
 890 dnl VECTOR_INSERT_I($1,        $2,                     $3,          $4,   $5)
 891 dnl VECTOR_INSERT_I(rule_name, vector_length_in_bytes, reg_variant, vreg, ireg)
 892 define(`VECTOR_INSERT_I', `
 893 instruct $1($4 dst, $4 src, $5 val, immI idx)
 894 %{
 895   predicate(ifelse($3, D, n->bottom_type()->is_vect()->element_basic_type() == T_LONG,
 896             (n->bottom_type()->is_vect()->element_basic_type() == T_BYTE ||
 897              n->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
 898              n->bottom_type()->is_vect()->element_basic_type() == T_INT)));
 899   match(Set dst (VectorInsert (Binary src val) idx));
 900   ins_cost(2 * INSN_COST);
 901   format %{ "orr    $dst, T$2B, $src, $src\n\t"
 902             "mov    $dst, $3, $idx, $val\t`#' insert into vector ($3)" %}
 903   ins_encode %{
 904     if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
 905       __ orr(as_FloatRegister($dst$$reg), __ T$2B,
 906              as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
 907     }
 908     __ mov(as_FloatRegister($dst$$reg), __ ifelse($3, D, D, elemType_to_regVariant(Matcher::vector_element_basic_type(this))),
 909            $idx$$constant, $val$$Register);
 910   %}
 911   ins_pipe(pipe_slow);
 912 %}')dnl
 913 dnl             $1        $2  $3     $4    $5
 914 VECTOR_INSERT_I(insertID, 8,  B/H/S, vecD, iRegIorL2I)
 915 VECTOR_INSERT_I(insertIX, 16, B/H/S, vecX, iRegIorL2I)
 916 VECTOR_INSERT_I(insert2L, 16, D,     vecX, iRegL)
 917 dnl
 918 define(`VECTOR_INSERT_F', `
 919 instruct insert$3`'(vec$2 dst, vec$2 src, vReg$1 val, immI idx)
 920 %{
 921   predicate(n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($1));
 922   match(Set dst (VectorInsert (Binary src val) idx));
 923   ins_cost(2 * INSN_COST);
 924   effect(TEMP_DEF dst);
 925   format %{ "orr    $dst, ifelse($2, D, T8B, T16B), $src, $src\n\t"
 926             "ins    $dst, ifelse($1, F, S, D), $val, $idx, 0\t# insert into vector($3)" %}
 927   ins_encode %{
 928     __ orr(as_FloatRegister($dst$$reg), __ ifelse($2, D, T8B, T16B),
 929            as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
 930     __ ins(as_FloatRegister($dst$$reg), __ ifelse($1, F, S, D),
 931            as_FloatRegister($val$$reg), $idx$$constant, 0);
 932   %}
 933   ins_pipe(pipe_slow);
 934 %}')dnl
 935 dnl             $1 $2 $3
 936 VECTOR_INSERT_F(F, D, 2F)
 937 VECTOR_INSERT_F(F, X, 4F)
 938 VECTOR_INSERT_F(D, X, 2D)
 939 dnl
 940 
 941 // ------------------------------ Vector extract ---------------------------------
 942 define(`VECTOR_EXTRACT_I', `
 943 instruct extract$1$2`'(iReg$3NoSp dst, vec$4 src, immI idx)
 944 %{
 945   predicate(n->in(1)->bottom_type()->is_vect()->length() == $1);
 946   match(Set dst (Extract$2 src idx));
 947   ins_cost(INSN_COST);
 948   format %{ "$5mov    $dst, $src, $6, $idx\t# extract from vector($1$2)" %}
 949   ins_encode %{
 950     __ $5mov($dst$$Register, as_FloatRegister($src$$reg), __ $6, $idx$$constant);
 951   %}
 952   ins_pipe(pipe_class_default);
 953 %}')dnl
 954 dnl             $1   $2 $3 $4 $5 $6
 955 VECTOR_EXTRACT_I(8,  B, I, D, s, B)
 956 VECTOR_EXTRACT_I(16, B, I, X, s, B)
 957 VECTOR_EXTRACT_I(4,  S, I, D, s, H)
 958 VECTOR_EXTRACT_I(8,  S, I, X, s, H)
 959 VECTOR_EXTRACT_I(2,  I, I, D, u, S)
 960 VECTOR_EXTRACT_I(4,  I, I, X, u, S)
 961 VECTOR_EXTRACT_I(2,  L, L, X, u, D)
 962 dnl
 963 define(`VECTOR_EXTRACT_F', `
 964 instruct extract$1$2`'(vReg$2 dst, vec$3 src, immI idx)
 965 %{
 966   predicate(n->in(1)->bottom_type()->is_vect()->length() == $1);
 967   match(Set dst (Extract$2 src idx));
 968   ins_cost(INSN_COST);
 969   format %{ "ins   $dst, $4, $src, 0, $idx\t# extract from vector($1$2)" %}
 970   ins_encode %{
 971     if ((0 == $idx$$constant) &&
 972         (as_FloatRegister($dst$$reg) == as_FloatRegister($src$$reg))) {
 973       /* empty */
 974     } else if ($idx$$constant == 0) {
 975       __ ifelse($2, F, fmovs, fmovd)(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
 976     } else {
 977       __ ins(as_FloatRegister($dst$$reg), __ $4,
 978              as_FloatRegister($src$$reg), 0, $idx$$constant);
 979     }
 980   %}
 981   ins_pipe(pipe_class_default);
 982 %}')dnl
 983 dnl             $1  $2 $3 $4
 984 VECTOR_EXTRACT_F(2, F, D, S)
 985 VECTOR_EXTRACT_F(4, F, X, S)
 986 VECTOR_EXTRACT_F(2, D, X, D)
 987 dnl
 988 
 989 // ------------------------------ Vector comparison ---------------------------------
 990 
 991 instruct vcmpD(vecD dst, vecD src1, vecD src2, immI cond)
 992 %{
 993   predicate(n->as_Vector()->length_in_bytes() == 8);
 994   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
 995   format %{ "vcmpD  $dst, $src1, $src2\t# vector compare " %}
 996   ins_cost(INSN_COST);
 997   ins_encode %{
 998     BasicType bt = Matcher::vector_element_basic_type(this);
 999     assert(type2aelembytes(bt) != 8, "not supported");
1000     __ neon_compare(as_FloatRegister($dst$$reg), bt, as_FloatRegister($src1$$reg),
1001                     as_FloatRegister($src2$$reg), (int)$cond$$constant, /*isQ*/ false);
1002   %}
1003   ins_pipe(vdop64);
1004 %}
1005 
1006 instruct vcmpX(vecX dst, vecX src1, vecX src2, immI cond)
1007 %{
1008   predicate(n->as_Vector()->length_in_bytes() == 16);
1009   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
1010   format %{ "vcmpX  $dst, $src1, $src2\t# vector compare " %}
1011   ins_cost(INSN_COST);
1012   ins_encode %{
1013     BasicType bt = Matcher::vector_element_basic_type(this);
1014     __ neon_compare(as_FloatRegister($dst$$reg), bt, as_FloatRegister($src1$$reg),
1015                     as_FloatRegister($src2$$reg), (int)$cond$$constant, /*isQ*/ true);
1016   %}
1017   ins_pipe(vdop128);
1018 %}
1019 
1020 // ------------------------------ Vector mul -----------------------------------
1021 
1022 instruct vmul2L(vecX dst, vecX src1, vecX src2, iRegLNoSp tmp1, iRegLNoSp tmp2)
1023 %{
1024   predicate(n->as_Vector()->length() == 2);
1025   match(Set dst (MulVL src1 src2));
1026   ins_cost(INSN_COST);
1027   effect(TEMP tmp1, TEMP tmp2);
1028   format %{ "umov   $tmp1, $src1, D, 0\n\t"
1029             "umov   $tmp2, $src2, D, 0\n\t"
1030             "mul    $tmp2, $tmp2, $tmp1\n\t"
1031             "mov    $dst,  T2D,   0, $tmp2\t# insert into vector(2L)\n\t"
1032             "umov   $tmp1, $src1, D, 1\n\t"
1033             "umov   $tmp2, $src2, D, 1\n\t"
1034             "mul    $tmp2, $tmp2, $tmp1\n\t"
1035             "mov    $dst,  T2D,   1, $tmp2\t# insert into vector(2L)"
1036   %}
1037   ins_encode %{
1038     __ umov($tmp1$$Register, as_FloatRegister($src1$$reg), __ D, 0);
1039     __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ D, 0);
1040     __ mul(as_Register($tmp2$$reg), as_Register($tmp2$$reg), as_Register($tmp1$$reg));
1041     __ mov(as_FloatRegister($dst$$reg), __ D, 0, $tmp2$$Register);
1042     __ umov($tmp1$$Register, as_FloatRegister($src1$$reg), __ D, 1);
1043     __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ D, 1);
1044     __ mul(as_Register($tmp2$$reg), as_Register($tmp2$$reg), as_Register($tmp1$$reg));
1045     __ mov(as_FloatRegister($dst$$reg), __ D, 1, $tmp2$$Register);
1046   %}
1047   ins_pipe(pipe_slow);
1048 %}
1049 
1050 // --------------------------------- Vector not --------------------------------
1051 dnl
1052 define(`MATCH_RULE', `ifelse($1, I,
1053 `match(Set dst (XorV src (ReplicateB m1)));
1054   match(Set dst (XorV src (ReplicateS m1)));
1055   match(Set dst (XorV src (ReplicateI m1)));',
1056 `match(Set dst (XorV src (ReplicateL m1)));')')dnl
1057 dnl
1058 define(`VECTOR_NOT', `
1059 instruct vnot$1$2`'(vec$3 dst, vec$3 src, imm$2_M1 m1)
1060 %{
1061   predicate(n->as_Vector()->length_in_bytes() == $4);
1062   MATCH_RULE($2)
1063   ins_cost(INSN_COST);
1064   format %{ "not  $dst, T$5, $src\t# vector ($5)" %}
1065   ins_encode %{
1066     __ notr(as_FloatRegister($dst$$reg), __ T$5,
1067             as_FloatRegister($src$$reg));
1068   %}
1069   ins_pipe(pipe_class_default);
1070 %}')dnl
1071 dnl        $1 $2 $3 $4  $5
1072 VECTOR_NOT(2, I, D, 8,  8B)
1073 VECTOR_NOT(4, I, X, 16, 16B)
1074 VECTOR_NOT(2, L, X, 16, 16B)
1075 undefine(MATCH_RULE)
1076 // ------------------------------ Vector and_not -------------------------------
1077 dnl
1078 define(`MATCH_RULE', `ifelse($1, I,
1079 `match(Set dst (AndV src1 (XorV src2 (ReplicateB m1))));
1080   match(Set dst (AndV src1 (XorV src2 (ReplicateS m1))));
1081   match(Set dst (AndV src1 (XorV src2 (ReplicateI m1))));',
1082 `match(Set dst (AndV src1 (XorV src2 (ReplicateL m1))));')')dnl
1083 dnl
1084 define(`VECTOR_AND_NOT', `
1085 instruct vand_not$1$2`'(vec$3 dst, vec$3 src1, vec$3 src2, imm$2_M1 m1)
1086 %{
1087   predicate(n->as_Vector()->length_in_bytes() == $4);
1088   MATCH_RULE($2)
1089   ins_cost(INSN_COST);
1090   format %{ "bic  $dst, T$5, $src1, $src2\t# vector ($5)" %}
1091   ins_encode %{
1092     __ bic(as_FloatRegister($dst$$reg), __ T$5,
1093            as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
1094   %}
1095   ins_pipe(pipe_class_default);
1096 %}')dnl
1097 dnl            $1 $2 $3 $4  $5
1098 VECTOR_AND_NOT(2, I, D, 8,  8B)
1099 VECTOR_AND_NOT(4, I, X, 16, 16B)
1100 VECTOR_AND_NOT(2, L, X, 16, 16B)
1101 undefine(MATCH_RULE)
1102 dnl
1103 // ------------------------------ Vector max/min -------------------------------
1104 dnl
1105 define(`PREDICATE', `ifelse($1, 8B,
1106 `predicate((n->as_Vector()->length() == 4 || n->as_Vector()->length() == 8) &&
1107              n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);',
1108 `predicate(n->as_Vector()->length() == $2 && n->bottom_type()->is_vect()->element_basic_type() == T_$3);')')dnl
1109 dnl
1110 define(`VECTOR_MAX_MIN_INT', `
1111 instruct v$1$2$3`'(vec$4 dst, vec$4 src1, vec$4 src2)
1112 %{
1113   PREDICATE(`$2$3', $2, TYPE2DATATYPE($3))
1114   match(Set dst ($5V src1 src2));
1115   ins_cost(INSN_COST);
1116   format %{ "$1v  $dst, T$2`'iTYPE2SIMD($3), $src1, $src2\t# vector ($2$3)" %}
1117   ins_encode %{
1118     __ $1v(as_FloatRegister($dst$$reg), __ T$2`'iTYPE2SIMD($3),
1119             as_FloatRegister($src1$$reg),
1120             as_FloatRegister($src2$$reg));
1121   %}
1122   ins_pipe(vdop$6);
1123 %}')dnl
1124 dnl                $1   $2  $3 $4 $5   $6
1125 VECTOR_MAX_MIN_INT(max, 8,  B, D, Max, 64)
1126 VECTOR_MAX_MIN_INT(max, 16, B, X, Max, 128)
1127 VECTOR_MAX_MIN_INT(max, 4,  S, D, Max, 64)
1128 VECTOR_MAX_MIN_INT(max, 8,  S, X, Max, 128)
1129 VECTOR_MAX_MIN_INT(max, 2,  I, D, Max, 64)
1130 VECTOR_MAX_MIN_INT(max, 4,  I, X, Max, 128)
1131 VECTOR_MAX_MIN_INT(min, 8,  B, D, Min, 64)
1132 VECTOR_MAX_MIN_INT(min, 16, B, X, Min, 128)
1133 VECTOR_MAX_MIN_INT(min, 4,  S, D, Min, 64)
1134 VECTOR_MAX_MIN_INT(min, 8,  S, X, Min, 128)
1135 VECTOR_MAX_MIN_INT(min, 2,  I, D, Min, 64)
1136 VECTOR_MAX_MIN_INT(min, 4,  I, X, Min, 128)
1137 undefine(PREDICATE)
1138 dnl
1139 define(`VECTOR_MAX_MIN_LONG', `
1140 instruct v$1`'2L`'(vecX dst, vecX src1, vecX src2)
1141 %{
1142   predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
1143   match(Set dst ($2V src1 src2));
1144   ins_cost(INSN_COST);
1145   effect(TEMP dst);
1146   format %{ "cmgt  $dst, T2D, $src1, $src2\t# vector (2L)\n\t"
1147             "bsl   $dst, T16B, $$3, $$4\t# vector (16B)" %}
1148   ins_encode %{
1149     __ cmgt(as_FloatRegister($dst$$reg), __ T2D,
1150             as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
1151     __ bsl(as_FloatRegister($dst$$reg), __ T16B,
1152            as_FloatRegister($$3$$reg), as_FloatRegister($$4$$reg));
1153   %}
1154   ins_pipe(vdop128);
1155 %}')dnl
1156 dnl                 $1   $2   $3    $4
1157 VECTOR_MAX_MIN_LONG(max, Max, src1, src2)
1158 VECTOR_MAX_MIN_LONG(min, Min, src2, src1)
1159 dnl
1160 
1161 // --------------------------------- blend (bsl) ----------------------------
1162 dnl
1163 define(`VECTOR_BSL', `
1164 instruct vbsl$1B`'(vec$2 dst, vec$2 src1, vec$2 src2)
1165 %{
1166   predicate(n->as_Vector()->length_in_bytes() == $1);
1167   match(Set dst (VectorBlend (Binary src1 src2) dst));
1168   ins_cost(INSN_COST);
1169   format %{ "bsl  $dst, T$1B, $src2, $src1\t# vector ($1B)" %}
1170   ins_encode %{
1171     __ bsl(as_FloatRegister($dst$$reg), __ T$1B,
1172            as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
1173   %}
1174   ins_pipe(vlogical$3);
1175 %}')dnl
1176 dnl        $1  $2 $3
1177 VECTOR_BSL(8,  D, 64)
1178 VECTOR_BSL(16, X, 128)
1179 dnl
1180 
1181 // --------------------------------- Load/store Mask ----------------------------
1182 dnl
1183 define(`PREDICATE', `ifelse($1, load,
1184 `predicate(n->as_Vector()->length() == $2 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);',
1185 `predicate(n->as_Vector()->length() == $2);')')dnl
1186 dnl
1187 define(`VECTOR_LOAD_STORE_MASK_B', `
1188 instruct $1mask$2B`'(vec$3 dst, vec$3 src $5 $6)
1189 %{
1190   PREDICATE($1, $2)
1191   match(Set dst (Vector$4Mask src $6));
1192   ins_cost(INSN_COST);
1193   format %{ "negr  $dst, T$2B, $src\t# $1 mask ($2B to $2B)" %}
1194   ins_encode %{
1195     __ negr(as_FloatRegister($dst$$reg), __ T$2B, as_FloatRegister($src$$reg));
1196   %}
1197   ins_pipe(pipe_class_default);
1198 %}')dnl
1199 dnl                      $1     $2  $3 $4     $5      $6
1200 VECTOR_LOAD_STORE_MASK_B(load,  8,  D, Load)
1201 VECTOR_LOAD_STORE_MASK_B(load,  16, X, Load)
1202 VECTOR_LOAD_STORE_MASK_B(store, 8,  D, Store, `, immI_1', size)
1203 VECTOR_LOAD_STORE_MASK_B(store, 16, X, Store, `, immI_1', size)
1204 undefine(PREDICATE)dnl
1205 dnl
1206 define(`PREDICATE', `ifelse($1, load,
1207 `predicate(n->as_Vector()->length() == $2 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);',
1208 `predicate(n->as_Vector()->length() == $2);')')dnl
1209 dnl
1210 define(`VECTOR_LOAD_STORE_MASK_S', `
1211 instruct $1mask$2S`'(vec$3 dst, vec$4 src $9 $10)
1212 %{
1213   PREDICATE($1, $2)
1214   match(Set dst (Vector$5Mask src $10));
1215   ins_cost(INSN_COST);
1216   format %{ "$6  $dst, T8$8, $src, T8$7\n\t"
1217             "negr  $dst, T8$8, $dst\t# $1 mask ($2$7 to $2$8)" %}
1218   ins_encode %{
1219     __ $6(as_FloatRegister($dst$$reg), __ T8$8, as_FloatRegister($src$$reg), __ T8$7);
1220     __ negr(as_FloatRegister($dst$$reg), __ T8$8, as_FloatRegister($dst$$reg));
1221   %}
1222   ins_pipe(pipe_slow);
1223 %}')dnl
1224 dnl                      $1     $2 $3 $4 $5     $6    $7 $8    $9       $10
1225 VECTOR_LOAD_STORE_MASK_S(load,  4, D, D, Load,  uxtl, B, H)
1226 VECTOR_LOAD_STORE_MASK_S(load,  8, X, D, Load,  uxtl, B, H)
1227 VECTOR_LOAD_STORE_MASK_S(store, 4, D, D, Store, xtn,  H, B, `, immI_2', size)
1228 VECTOR_LOAD_STORE_MASK_S(store, 8, D, X, Store, xtn,  H, B, `, immI_2', size)
1229 undefine(PREDICATE)dnl
1230 dnl
1231 define(`PREDICATE', `ifelse($1, load,
1232 `predicate(n->as_Vector()->length() == $2 &&
1233             (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
1234              n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));',
1235 `predicate(n->as_Vector()->length() == $2);')')dnl
1236 dnl
1237 define(`VECTOR_LOAD_STORE_MASK_I', `
1238 instruct $1mask$2I`'(vec$3 dst, vec$4 src $12 $13)
1239 %{
1240   PREDICATE($1, $2)
1241   match(Set dst (Vector$5Mask src $13));
1242   ins_cost(INSN_COST);
1243   format %{ "$6  $dst, T$10$8, $src, T$10$7\t# $2$7 to $2$8\n\t"
1244             "$6  $dst, T$11$9, $dst, T$11$8\t# $2$8 to $2$9\n\t"
1245             "negr   $dst, T$11$9, $dst\t# $1 mask ($2$7 to $2$9)" %}
1246   ins_encode %{
1247     __ $6(as_FloatRegister($dst$$reg), __ T$10$8, as_FloatRegister($src$$reg), __ T$10$7);
1248     __ $6(as_FloatRegister($dst$$reg), __ T$11$9, as_FloatRegister($dst$$reg), __ T$11$8);
1249     __ negr(as_FloatRegister($dst$$reg), __ T$11$9, as_FloatRegister($dst$$reg));
1250   %}
1251   ins_pipe(pipe_slow);
1252 %}')dnl
1253 dnl                      $1     $2 $3 $4 $5     $6    $7 $8 $9 $10$11   $12      $13
1254 VECTOR_LOAD_STORE_MASK_I(load,  2, D, D, Load,  uxtl, B, H, S, 8, 4)
1255 VECTOR_LOAD_STORE_MASK_I(load,  4, X, D, Load,  uxtl, B, H, S, 8, 4)
1256 VECTOR_LOAD_STORE_MASK_I(store, 2, D, D, Store, xtn,  S, H, B, 4, 8, `, immI_4', size)
1257 VECTOR_LOAD_STORE_MASK_I(store, 4, D, X, Store, xtn,  S, H, B, 4, 8, `, immI_4', size)
1258 undefine(PREDICATE)
1259 dnl
1260 instruct loadmask2L(vecX dst, vecD src)
1261 %{
1262   predicate(n->as_Vector()->length() == 2 &&
1263             (n->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
1264              n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
1265   match(Set dst (VectorLoadMask src));
1266   ins_cost(INSN_COST);
1267   format %{ "uxtl  $dst, T8H, $src, T8B\t# 2B to 2S\n\t"
1268             "uxtl  $dst, T4S, $dst, T4H\t# 2S to 2I\n\t"
1269             "uxtl  $dst, T2D, $dst, T2S\t# 2I to 2L\n\t"
1270             "neg   $dst, T2D, $dst\t# load mask (2B to 2L)" %}
1271   ins_encode %{
1272     __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
1273     __ uxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg), __ T4H);
1274     __ uxtl(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($dst$$reg), __ T2S);
1275     __ negr(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($dst$$reg));
1276   %}
1277   ins_pipe(pipe_slow);
1278 %}
1279 
1280 instruct storemask2L(vecD dst, vecX src, immI_8 size)
1281 %{
1282   predicate(n->as_Vector()->length() == 2);
1283   match(Set dst (VectorStoreMask src size));
1284   ins_cost(INSN_COST);
1285   format %{ "xtn  $dst, T2S, $src, T2D\t# 2L to 2I\n\t"
1286             "xtn  $dst, T4H, $dst, T4S\t# 2I to 2S\n\t"
1287             "xtn  $dst, T8B, $dst, T8H\t# 2S to 2B\n\t"
1288             "neg  $dst, T8B, $dst\t# store mask (2L to 2B)" %}
1289   ins_encode %{
1290     __ xtn(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg), __ T2D);
1291     __ xtn(as_FloatRegister($dst$$reg), __ T4H, as_FloatRegister($dst$$reg), __ T4S);
1292     __ xtn(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg), __ T8H);
1293     __ negr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg));
1294   %}
1295   ins_pipe(pipe_slow);
1296 %}
1297 
1298 // vector mask cast
1299 dnl
1300 define(`VECTOR_MASK_CAST', `
1301 instruct vmaskcast$1`'(vec$1 dst)
1302 %{
1303   predicate(n->bottom_type()->is_vect()->length_in_bytes() == $2 &&
1304             n->in(1)->bottom_type()->is_vect()->length_in_bytes() == $2 &&
1305             n->bottom_type()->is_vect()->length() == n->in(1)->bottom_type()->is_vect()->length());
1306   match(Set dst (VectorMaskCast dst));
1307   ins_cost(0);
1308   format %{ "vmaskcast $dst\t# empty" %}
1309   ins_encode %{
1310     // empty
1311   %}
1312   ins_pipe(pipe_class_empty);
1313 %}')dnl
1314 dnl              $1 $2
1315 VECTOR_MASK_CAST(D, 8)
1316 VECTOR_MASK_CAST(X, 16)
1317 dnl
1318 
1319 //-------------------------------- LOAD_IOTA_INDICES----------------------------------
1320 dnl
1321 define(`PREDICATE', `ifelse($1, 8,
1322 `predicate(UseSVE == 0 &&
1323            (n->as_Vector()->length() == 2 || n->as_Vector()->length() == 4 ||
1324             n->as_Vector()->length() == 8) &&
1325             n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);',
1326 `predicate(UseSVE == 0 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);')')dnl
1327 dnl
1328 define(`VECTOR_LOAD_CON', `
1329 instruct loadcon$1B`'(vec$2 dst, immI0 src)
1330 %{
1331   PREDICATE($1)
1332   match(Set dst (VectorLoadConst src));
1333   ins_cost(INSN_COST);
1334   format %{ "ldr $dst, CONSTANT_MEMORY\t# load iota indices" %}
1335   ins_encode %{
1336     __ lea(rscratch1, ExternalAddress(StubRoutines::aarch64::vector_iota_indices()));
1337     __ ldr$3(as_FloatRegister($dst$$reg), rscratch1);
1338   %}
1339   ins_pipe(pipe_class_memory);
1340 %}')dnl
1341 dnl             $1  $2 $3
1342 VECTOR_LOAD_CON(8,  D, d)
1343 VECTOR_LOAD_CON(16, X, q)
1344 undefine(PREDICATE)
1345 dnl
1346 //-------------------------------- LOAD_SHUFFLE ----------------------------------
1347 dnl
1348 define(`VECTOR_LOAD_SHUFFLE_B', `
1349 instruct loadshuffle$1B`'(vec$2 dst, vec$2 src)
1350 %{
1351   predicate(n->as_Vector()->length() == $1 &&
1352             n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
1353   match(Set dst (VectorLoadShuffle src));
1354   ins_cost(INSN_COST);
1355   format %{ "mov  $dst, T$1B, $src\t# get $1B shuffle" %}
1356   ins_encode %{
1357     __ orr(as_FloatRegister($dst$$reg), __ T$1B,
1358            as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
1359   %}
1360   ins_pipe(pipe_class_default);
1361 %}')dnl
1362 dnl                   $1  $2
1363 VECTOR_LOAD_SHUFFLE_B(8,  D)
1364 VECTOR_LOAD_SHUFFLE_B(16, X)
1365 dnl
1366 define(`VECTOR_LOAD_SHUFFLE_S', `
1367 instruct loadshuffle$1S`'(vec$2 dst, vec$3 src)
1368 %{
1369   predicate(n->as_Vector()->length() == $1 &&
1370             n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
1371   match(Set dst (VectorLoadShuffle src));
1372   ins_cost(INSN_COST);
1373   format %{ "uxtl  $dst, T8H, $src, T8B\t# $1B to $1H" %}
1374   ins_encode %{
1375     __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
1376   %}
1377   ins_pipe(pipe_class_default);
1378 %}')dnl
1379 dnl                   $1 $2 $3
1380 VECTOR_LOAD_SHUFFLE_S(4, D, D)
1381 VECTOR_LOAD_SHUFFLE_S(8, X, D)
1382 dnl
1383 
1384 instruct loadshuffle4I(vecX dst, vecD src)
1385 %{
1386   predicate(n->as_Vector()->length() == 4 &&
1387            (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
1388             n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
1389   match(Set dst (VectorLoadShuffle src));
1390   ins_cost(INSN_COST);
1391   format %{ "uxtl  $dst, T8H, $src, T8B\t# 4B to 4H \n\t"
1392             "uxtl  $dst, T4S, $dst, T4H\t# 4H to 4S" %}
1393   ins_encode %{
1394     __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
1395     __ uxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg), __ T4H);
1396   %}
1397   ins_pipe(pipe_slow);
1398 %}
1399 
1400 //-------------------------------- Rearrange -------------------------------------
1401 // Here is an example that rearranges a NEON vector with 4 ints:
1402 // Rearrange V1 int[a0, a1, a2, a3] to V2 int[a2, a3, a0, a1]
1403 //   1. Get the indices of V1 and store them as Vi byte[0, 1, 2, 3].
1404 //   2. Convert Vi byte[0, 1, 2, 3] to the indices of V2 and also store them as Vi byte[2, 3, 0, 1].
1405 //   3. Unsigned extend Long Vi from byte[2, 3, 0, 1] to int[2, 3, 0, 1].
1406 //   4. Multiply Vi int[2, 3, 0, 1] with constant int[0x04040404, 0x04040404, 0x04040404, 0x04040404]
1407 //      and get tbl base Vm int[0x08080808, 0x0c0c0c0c, 0x00000000, 0x04040404].
1408 //   5. Add Vm with constant int[0x03020100, 0x03020100, 0x03020100, 0x03020100]
1409 //      and get tbl index Vm int[0x0b0a0908, 0x0f0e0d0c, 0x03020100, 0x07060504]
1410 //   6. Use Vm as index register, and use V1 as table register.
1411 //      Then get V2 as the result by tbl NEON instructions.
1412 // Notes:
1413 //   Step 1 matches VectorLoadConst.
1414 //   Step 3 matches VectorLoadShuffle.
1415 //   Step 4, 5, 6 match VectorRearrange.
1416 //   For VectorRearrange short/int, the reason why such complex calculation is
1417 //   required is because NEON tbl supports bytes table only, so for short/int, we
1418 //   need to lookup 2/4 bytes as a group. For VectorRearrange long, we use bsl
1419 //   to implement rearrange.
1420 define(`VECTOR_REARRANGE_B', `
1421 instruct rearrange$1B`'(vec$2 dst, vec$2 src, vec$2 shuffle)
1422 %{
1423   predicate(n->as_Vector()->length() == $1 &&
1424             n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
1425   match(Set dst (VectorRearrange src shuffle));
1426   ins_cost(INSN_COST);
1427   effect(TEMP_DEF dst);
1428   format %{ "tbl $dst, T$1B, {$dst}, $shuffle\t# rearrange $1B" %}
1429   ins_encode %{
1430     __ tbl(as_FloatRegister($dst$$reg), __ T$1B,
1431            as_FloatRegister($src$$reg), 1, as_FloatRegister($shuffle$$reg));
1432   %}
1433   ins_pipe(pipe_slow);
1434 %}')dnl
1435 dnl                $1  $2
1436 VECTOR_REARRANGE_B(8,  D)
1437 VECTOR_REARRANGE_B(16, X)
1438 dnl
1439 define(`VECTOR_REARRANGE_S', `
1440 instruct rearrange$1S`'(vec$2 dst, vec$2 src, vec$2 shuffle, vec$2 tmp0, vec$2 tmp1)
1441 %{
1442   predicate(n->as_Vector()->length() == $1 &&
1443             n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
1444   match(Set dst (VectorRearrange src shuffle));
1445   ins_cost(INSN_COST);
1446   effect(TEMP_DEF dst, TEMP tmp0, TEMP tmp1);
1447   format %{ "mov   $tmp0, T$3B, CONSTANT\t# constant 0x0202020202020202\n\t"
1448             "mov   $tmp1, T$1H, CONSTANT\t# constant 0x0100010001000100\n\t"
1449             "mulv  $dst, T$1H, T$1H, $shuffle, $tmp0\n\t"
1450             "addv  $dst, T$3B, T$3B, $dst, $tmp1\n\t"
1451             "tbl   $dst, T$3B, {$src}, 1, $dst\t# rearrange $1S" %}
1452   ins_encode %{
1453     __ mov(as_FloatRegister($tmp0$$reg), __ T$3B, 0x02);
1454     __ mov(as_FloatRegister($tmp1$$reg), __ T$1H, 0x0100);
1455     __ mulv(as_FloatRegister($dst$$reg), __ T$1H,
1456             as_FloatRegister($shuffle$$reg), as_FloatRegister($tmp0$$reg));
1457     __ addv(as_FloatRegister($dst$$reg), __ T$3B,
1458             as_FloatRegister($dst$$reg), as_FloatRegister($tmp1$$reg));
1459     __ tbl(as_FloatRegister($dst$$reg), __ T$3B,
1460            as_FloatRegister($src$$reg), 1, as_FloatRegister($dst$$reg));
1461   %}
1462   ins_pipe(pipe_slow);
1463 %}')dnl
1464 dnl                $1 $2 $3
1465 VECTOR_REARRANGE_S(4, D, 8)
1466 VECTOR_REARRANGE_S(8, X, 16)
1467 
1468 instruct rearrange4I(vecX dst, vecX src, vecX shuffle, vecX tmp0, vecX tmp1)
1469 %{
1470   predicate(n->as_Vector()->length() == 4 &&
1471            (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
1472             n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
1473   match(Set dst (VectorRearrange src shuffle));
1474   ins_cost(INSN_COST);
1475   effect(TEMP_DEF dst, TEMP tmp0, TEMP tmp1);
1476   format %{ "mov   $tmp0, T16B, CONSTANT\t# constant 0x0404040404040404\n\t"
1477             "mov   $tmp1, T4S, CONSTANT\t# constant 0x0302010003020100\n\t"
1478             "mulv  $dst, T4S, $shuffle, $tmp0\n\t"
1479             "addv  $dst, T16B, $dst, $tmp1\n\t"
1480             "tbl   $dst, T16B, {$src}, 1, $dst\t# rearrange 4I" %}
1481   ins_encode %{
1482     __ mov(as_FloatRegister($tmp0$$reg), __ T16B, 0x04);
1483     __ mov(as_FloatRegister($tmp1$$reg), __ T4S, 0x03020100);
1484     __ mulv(as_FloatRegister($dst$$reg), __ T4S,
1485             as_FloatRegister($shuffle$$reg), as_FloatRegister($tmp0$$reg));
1486     __ addv(as_FloatRegister($dst$$reg), __ T16B,
1487             as_FloatRegister($dst$$reg), as_FloatRegister($tmp1$$reg));
1488     __ tbl(as_FloatRegister($dst$$reg), __ T16B,
1489            as_FloatRegister($src$$reg), 1, as_FloatRegister($dst$$reg));
1490   %}
1491   ins_pipe(pipe_slow);
1492 %}
1493 
1494 //-------------------------------- Anytrue/alltrue -----------------------------
1495 dnl
1496 define(`ANYTRUE_IN_MASK', `
1497 instruct anytrue_in_mask$1B`'(iRegINoSp dst, vec$2 src1, vec$2 src2, vec$2 tmp, rFlagsReg cr)
1498 %{
1499   predicate(static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
1500   match(Set dst (VectorTest src1 src2 ));
1501   ins_cost(INSN_COST);
1502   effect(TEMP tmp, KILL cr);
1503   format %{ "addv  $tmp, T$1B, $src1\n\t"
1504             "umov  $dst, $tmp, B, 0\n\t"
1505             "cmp   $dst, 0\n\t"
1506             "cset  $dst\t# anytrue $1B" %}
1507   ins_encode %{
1508     // No need to use src2.
1509     __ addv(as_FloatRegister($tmp$$reg), __ T$1B, as_FloatRegister($src1$$reg));
1510     __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0);
1511     __ cmpw($dst$$Register, zr);
1512     __ csetw($dst$$Register, Assembler::NE);
1513   %}
1514   ins_pipe(pipe_slow);
1515 %}')dnl
1516 dnl             $1  $2
1517 ANYTRUE_IN_MASK(8,  D)
1518 ANYTRUE_IN_MASK(16, X)
1519 dnl
1520 define(`ALLTRUE_IN_MASK', `
1521 instruct alltrue_in_mask$1B`'(iRegINoSp dst, vec$2 src1, vec$2 src2, vec$2 tmp, rFlagsReg cr)
1522 %{
1523   predicate(static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
1524   match(Set dst (VectorTest src1 src2 ));
1525   ins_cost(INSN_COST);
1526   effect(TEMP tmp, KILL cr);
1527   format %{ "uminv $tmp, T$1B, $src1\n\t"
1528             "umov  $dst, $tmp, B, 0\n\t"
1529             "cmp   $dst, 0xff\n\t"
1530             "cset  $dst\t# alltrue $1B" %}
1531   ins_encode %{
1532     // No need to use src2.
1533     __ uminv(as_FloatRegister($tmp$$reg), __ T$1B, as_FloatRegister($src1$$reg));
1534     __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0);
1535     __ cmpw($dst$$Register, 0xff);
1536     __ csetw($dst$$Register, Assembler::EQ);
1537   %}
1538   ins_pipe(pipe_slow);
1539 %}')dnl
1540 dnl             $1  $2
1541 ALLTRUE_IN_MASK(8,  D)
1542 ALLTRUE_IN_MASK(16, X)
1543 
1544 // --------------------------------- ABS --------------------------------------
1545 dnl
1546 define(`VABS', `
1547 instruct vabs$3$4`'(vec$5 dst, vec$5 src)
1548 %{
1549   predicate(ifelse($3$4, 8B, n->as_Vector()->length() == 4 || )n->as_Vector()->length() == $3);
1550   match(Set dst (AbsV$4 src));
1551   ins_cost(ifelse($4, F, INSN_COST * 3, $4, D, INSN_COST * 3, INSN_COST));
1552   format %{ "$1  $dst, T$3$6, $src\t# vector ($3$6)" %}
1553   ins_encode %{
1554     __ $2(as_FloatRegister($dst$$reg), __ T$3$6, as_FloatRegister($src$$reg));
1555   %}
1556   ins_pipe(ifelse($4, F, vunop_fp$7, $4, D, vunop_fp$7, vlogical$7));
1557 %}')dnl
1558 dnl  $1    $2    $3  $4 $5 $6 $7
1559 VABS(abs,  absr, 8,  B, D, B, 64)
1560 VABS(abs,  absr, 16, B, X, B, 128)
1561 VABS(abs,  absr, 4,  S, D, H, 64)
1562 VABS(abs,  absr, 8,  S, X, H, 128)
1563 VABS(abs,  absr, 2,  I, D, S, 64)
1564 VABS(abs,  absr, 4,  I, X, S, 128)
1565 VABS(abs,  absr, 2,  L, X, D, 128)
1566 VABS(fabs, fabs, 2,  F, D, S, 64)
1567 VABS(fabs, fabs, 4,  F, X, S, 128)
1568 VABS(fabs, fabs, 2,  D, X, D, 128)
1569 
1570 // --------------------------------- FABS DIFF --------------------------------
1571 dnl
1572 define(`VFABD', `
1573 instruct vabd$3$4`'(vec$5 dst, vec$5 src1, vec$5 src2)
1574 %{
1575   predicate(n->as_Vector()->length() == $3);
1576   match(Set dst (AbsV$4 (SubV$4 src1 src2)));
1577   ins_cost(INSN_COST * 3);
1578   format %{ "$1  $dst, T$3$6, $src1, $src2\t# vector ($3$6)" %}
1579   ins_encode %{
1580     __ $2(as_FloatRegister($dst$$reg), __ T$3$6,
1581             as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
1582   %}
1583   ins_pipe(vunop_fp$7);
1584 %}')dnl
1585 dnl   $1    $2    $3 $4 $5 $6 $7
1586 VFABD(fabd, fabd, 2, F, D, S, 64)
1587 VFABD(fabd, fabd, 4, F, X, S, 128)
1588 VFABD(fabd, fabd, 2, D, X, D, 128)
1589 dnl
1590 define(`VREPLICATE_REG', `
1591 instruct replicate$2$3`'(vec$4 dst, $5 src)
1592 %{
1593   predicate(UseSVE == 0 && ifelse($2$3, 8B,
1594                                   `(n->as_Vector()->length() == 8 ||
1595                             n->as_Vector()->length() == 4)',
1596                                   $2$3, 4S,
1597                                   `(n->as_Vector()->length() == 4 ||
1598                             n->as_Vector()->length() == 2)',
1599                             n->as_Vector()->length() == $2));
1600   match(Set dst (Replicate$3 src));
1601   ins_cost(INSN_COST);
1602   format %{ "dup  $dst, $src\t# vector ($2$3)" %}
1603   ins_encode %{
1604     __ dup(as_FloatRegister($dst$$reg), __ T$2$1, $6($src$$reg));
1605   %}
1606   ins_pipe(ifelse($5, iRegIorL2I, vdup_reg_reg,
1607                   $5, iRegL, vdup_reg_reg,
1608                   $3, F, vdup_reg_freg, vdup_reg_dreg)`'ifelse($4, X, 128, 64));
1609 %}')dnl
1610 define(`VREPLICATE_IMM', `
1611 instruct replicate$2$3_imm`'(vec$4 dst, $5 con)
1612 %{
1613   predicate(UseSVE == 0 && ifelse($2$3, 8B,
1614                                   `(n->as_Vector()->length() == 8 ||
1615                             n->as_Vector()->length() == 4)',
1616                                   $2$3, 4S,
1617                                   `(n->as_Vector()->length() == 4 ||
1618                             n->as_Vector()->length() == 2)',
1619                             n->as_Vector()->length() == $2));
1620   match(Set dst (Replicate$3 con));
1621   ins_cost(INSN_COST);
1622   format %{ "movi  $dst, $con\t`#' vector ($2`'ifelse($3, S, H, $3))" %}
1623   ins_encode %{
1624     __ mov(as_FloatRegister($dst$$reg), __ T$2`'iTYPE2SIMD($3), $con$$constant`'$6);
1625   %}
1626   ins_pipe(vmovi_reg_imm`'ifelse($4, X, 128, 64));
1627 %}')dnl
1628 dnl            $1 $2  $3 $4 $5          $6
1629 VREPLICATE_REG(B, 8,  B, D, iRegIorL2I, as_Register)
1630 VREPLICATE_REG(B, 16, B, X, iRegIorL2I, as_Register)
1631 VREPLICATE_IMM(B, 8,  B, D, immI, ` & 0xff')
1632 VREPLICATE_IMM(B, 16, B, X, immI, ` & 0xff')
1633 VREPLICATE_REG(H, 4,  S, D, iRegIorL2I, as_Register)
1634 VREPLICATE_REG(H, 8,  S, X, iRegIorL2I, as_Register)
1635 VREPLICATE_IMM(H, 4,  S, D, immI, ` & 0xffff')
1636 VREPLICATE_IMM(H, 8,  S, X, immI, ` & 0xffff')
1637 VREPLICATE_REG(S, 2,  I, D, iRegIorL2I, as_Register)
1638 VREPLICATE_REG(S, 4,  I, X, iRegIorL2I, as_Register)
1639 VREPLICATE_IMM(S, 2,  I, D, immI)
1640 VREPLICATE_IMM(S, 4,  I, X, immI)
1641 VREPLICATE_REG(D, 2,  L, X, iRegL,      as_Register)
1642 VREPLICATE_IMM(D, 2,  L, X, immL)
1643 VREPLICATE_REG(S, 2,  F, D, vRegF,      as_FloatRegister)
1644 VREPLICATE_REG(S, 4,  F, X, vRegF,      as_FloatRegister)
1645 VREPLICATE_REG(D, 2,  D, X, vRegD,      as_FloatRegister)
1646 dnl
1647 
1648 // ====================REDUCTION ARITHMETIC====================================
1649 dnl
1650 define(`REDUCE_ADD_INT', `
1651 instruct reduce_add$1$2`'(iRegINoSp dst, iRegIorL2I isrc, vec$3 vsrc, vec$3 vtmp, iRegINoSp itmp)
1652 %{
1653   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
1654   match(Set dst (AddReductionVI isrc vsrc));
1655   ins_cost(INSN_COST);
1656   effect(TEMP vtmp, TEMP itmp);
1657   format %{ ifelse($1, 2, `"addpv  $vtmp, T2S, $vsrc, $vsrc\n\t"',`"addv  $vtmp, T4S, $vsrc\n\t"')
1658             "umov  $itmp, $vtmp, S, 0\n\t"
1659             "addw  $dst, $itmp, $isrc\t# add reduction$1I"
1660   %}
1661   ins_encode %{
1662     ifelse($1, 2, `__ addpv(as_FloatRegister($vtmp$$reg), __ T2S,
1663              as_FloatRegister($vsrc$$reg), as_FloatRegister($vsrc$$reg));', `__ addv(as_FloatRegister($vtmp$$reg), __ T4S,
1664             as_FloatRegister($vsrc$$reg));')
1665     __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ S, 0);
1666     __ addw($dst$$Register, $itmp$$Register, $isrc$$Register);
1667   %}
1668   ins_pipe(pipe_class_default);
1669 %}')dnl
1670 dnl            $1 $2 $3
1671 REDUCE_ADD_INT(2, I, D)
1672 REDUCE_ADD_INT(4, I, X)
1673 dnl
1674 define(`REDUCE_MUL_INT', `
1675 instruct reduce_mul$1$2`'(iRegINoSp dst, iRegIorL2I isrc, vec$3 vsrc, ifelse($1, 2, iRegINoSp tmp`)', vecX vtmp`,' iRegINoSp itmp`)')
1676 %{
1677   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
1678   match(Set dst (MulReductionVI isrc vsrc));
1679   ins_cost(INSN_COST);
1680   effect(TEMP ifelse($1, 2, tmp, vtmp), TEMP ifelse($1, 2, dst, itmp`,' TEMP dst));
1681   format %{ ifelse($1, 2, `"umov  $tmp, $vsrc, S, 0\n\t"
1682             "mul   $dst, $tmp, $isrc\n\t"
1683             "umov  $tmp, $vsrc, S, 1\n\t"
1684             "mul   $dst, $tmp, $dst\t# mul reduction2I"',`"ins   $vtmp, D, $vsrc, 0, 1\n\t"
1685             "mulv  $vtmp, T2S, $vtmp, $vsrc\n\t"
1686             "umov  $itmp, $vtmp, S, 0\n\t"
1687             "mul   $dst, $itmp, $isrc\n\t"
1688             "umov  $itmp, $vtmp, S, 1\n\t"
1689             "mul   $dst, $itmp, $dst\t# mul reduction4I"')
1690   %}
1691   ins_encode %{
1692     ifelse($1, 2, `__ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
1693     __ mul($dst$$Register, $tmp$$Register, $isrc$$Register);
1694     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
1695     __ mul($dst$$Register, $tmp$$Register, $dst$$Register);', `__ ins(as_FloatRegister($vtmp$$reg), __ D,
1696            as_FloatRegister($vsrc$$reg), 0, 1);
1697     __ mulv(as_FloatRegister($vtmp$$reg), __ T2S,
1698             as_FloatRegister($vtmp$$reg), as_FloatRegister($vsrc$$reg));
1699     __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ S, 0);
1700     __ mul($dst$$Register, $itmp$$Register, $isrc$$Register);
1701     __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ S, 1);
1702     __ mul($dst$$Register, $itmp$$Register, $dst$$Register);')
1703   %}
1704   ins_pipe(pipe_class_default);
1705 %}')dnl
1706 dnl            $1 $2 $3
1707 REDUCE_MUL_INT(2, I, D)
1708 REDUCE_MUL_INT(4, I, X)
1709 dnl
1710 define(`REDUCE_MULORADD_FORD', `
1711 instruct reduce_$6$2$3`'(vReg$3 dst, vReg$3 $4src, vec$5 vsrc, vec$5 tmp)
1712 %{
1713   match(Set dst (ifelse($6, add, Add, Mul)ReductionV$3 $4src vsrc));
1714   ins_cost(INSN_COST);
1715   effect(TEMP tmp, TEMP dst);
1716   format %{ "$1 $dst, $$4src, $vsrc\n\t"
1717             "ins   $tmp, ifelse($3, F, S, D), $vsrc, 0, 1\n\t"
1718             ifelse($2, 2, `"$1 $dst, $dst, $tmp\t# $6 reduction$2$3"',
1719             `"$1 $dst, $dst, $tmp\n\t"
1720             "ins   $tmp, S, $vsrc, 0, 2\n\t"
1721             "$1 $dst, $dst, $tmp\n\t"
1722             "ins   $tmp, S, $vsrc, 0, 3\n\t"
1723             "$1 $dst, $dst, $tmp\t# $6 reduction4F"')
1724   %}
1725   ins_encode %{
1726     __ $1(as_FloatRegister($dst$$reg),
1727              as_FloatRegister($$4src$$reg), as_FloatRegister($vsrc$$reg));
1728     __ ins(as_FloatRegister($tmp$$reg), __ ifelse($3, F, S, D),
1729            as_FloatRegister($vsrc$$reg), 0, 1);
1730     __ $1(as_FloatRegister($dst$$reg),
1731              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));ifelse($2, 4, `
1732     __ ins(as_FloatRegister($tmp$$reg), __ ifelse($3, F, S, D),
1733            as_FloatRegister($vsrc$$reg), 0, 2);
1734     __ $1(as_FloatRegister($dst$$reg),
1735              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
1736     __ ins(as_FloatRegister($tmp$$reg), __ S,
1737            as_FloatRegister($vsrc$$reg), 0, 3);
1738     __ $1(as_FloatRegister($dst$$reg),
1739              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));')
1740   %}
1741   ins_pipe(pipe_class_default);
1742 %}')dnl
1743 dnl                  $1     $2 $3 $4 $5 $6
1744 REDUCE_MULORADD_FORD(fadds, 2, F, f, D, add)
1745 REDUCE_MULORADD_FORD(fadds, 4, F, f, X, add)
1746 REDUCE_MULORADD_FORD(fmuls, 2, F, f, D, mul)
1747 REDUCE_MULORADD_FORD(fmuls, 4, F, f, X, mul)
1748 REDUCE_MULORADD_FORD(faddd, 2, D, d, X, add)
1749 REDUCE_MULORADD_FORD(fmuld, 2, D, d, X, mul)
1750 
1751 // ====================VECTOR ARITHMETIC=======================================
1752 
1753 // --------------------------------- ADD --------------------------------------
1754 define(`VADD', `
1755 instruct vadd$2$3`'(vec$4 dst, vec$4 src1, vec$4 src2)
1756 %{ifelse($2$3, 8B, `
1757   predicate(n->as_Vector()->length() == 4 ||
1758             n->as_Vector()->length() == 8);',
1759          $2$3, 4S, `
1760   predicate(n->as_Vector()->length() == 2 ||
1761             n->as_Vector()->length() == 4);',
1762          $2$5, 2D, , `
1763   predicate(n->as_Vector()->length() == $2);')
1764   match(Set dst (AddV$3 src1 src2));
1765   ins_cost(INSN_COST);
1766   format %{ "$1  $dst,$src1,$src2\t# vector ($2$5)" %}
1767   ins_encode %{
1768     __ $1(as_FloatRegister($dst$$reg), __ T$2`'ifelse($5, L, D, $5),
1769             as_FloatRegister($src1$$reg),
1770             as_FloatRegister($src2$$reg));
1771   %}
1772   ins_pipe(vdop`'ifelse($3, F, _fp, $3, D, _fp)`'ifelse($4, D, 64, 128));
1773 %}')dnl
1774 dnl  $1    $2  $3 $4 $5
1775 VADD(addv, 8,  B, D, B)
1776 VADD(addv, 16, B, X, B)
1777 VADD(addv, 4,  S, D, H)
1778 VADD(addv, 8,  S, X, H)
1779 VADD(addv, 2,  I, D, S)
1780 VADD(addv, 4,  I, X, S)
1781 VADD(addv, 2,  L, X, L)
1782 VADD(fadd, 2,  F, D, S)
1783 VADD(fadd, 4,  F, X, S)
1784 VADD(fadd, 2,  D, X, D)
1785 
1786 // --------------------------------- SUB --------------------------------------
1787 define(`VSUB', `
1788 instruct vsub$2$3`'(vec$4 dst, vec$4 src1, vec$4 src2)
1789 %{ifelse($2$3, 8B, `
1790   predicate(n->as_Vector()->length() == 4 ||
1791             n->as_Vector()->length() == 8);',
1792          $2$3, 4S, `
1793   predicate(n->as_Vector()->length() == 2 ||
1794             n->as_Vector()->length() == 4);',`
1795   predicate(n->as_Vector()->length() == $2);')
1796   match(Set dst (SubV$3 src1 src2));
1797   ins_cost(INSN_COST);
1798   format %{ "$1  $dst,$src1,$src2\t# vector ($2$5)" %}
1799   ins_encode %{
1800     __ $1(as_FloatRegister($dst$$reg), __ T$2`'ifelse($5, L, D, $5),
1801             as_FloatRegister($src1$$reg),
1802             as_FloatRegister($src2$$reg));
1803   %}
1804   ins_pipe(vdop`'ifelse($3, F, _fp, $3, D, _fp)`'ifelse($4, D, 64, 128));
1805 %}')dnl
1806 dnl  $1    $2  $3 $4 $5
1807 VSUB(subv, 8,  B, D, B)
1808 VSUB(subv, 16, B, X, B)
1809 VSUB(subv, 4,  S, D, H)
1810 VSUB(subv, 8,  S, X, H)
1811 VSUB(subv, 2,  I, D, S)
1812 VSUB(subv, 4,  I, X, S)
1813 VSUB(subv, 2,  L, X, L)
1814 VSUB(fsub, 2,  F, D, S)
1815 VSUB(fsub, 4,  F, X, S)
1816 VSUB(fsub, 2,  D, X, D)
1817 
1818 // --------------------------------- MUL --------------------------------------
1819 define(`VMUL', `
1820 instruct vmul$2$3`'(vec$4 dst, vec$4 src1, vec$4 src2)
1821 %{ifelse($2$3, 8B, `
1822   predicate(n->as_Vector()->length() == 4 ||
1823             n->as_Vector()->length() == 8);',
1824          $2$3, 4S, `
1825   predicate(n->as_Vector()->length() == 2 ||
1826             n->as_Vector()->length() == 4);',`
1827   predicate(n->as_Vector()->length() == $2);')
1828   match(Set dst (MulV$3 src1 src2));
1829   ins_cost(INSN_COST);
1830   format %{ "$1  $dst,$src1,$src2\t# vector ($2$5)" %}
1831   ins_encode %{
1832     __ $1(as_FloatRegister($dst$$reg), __ T$2`'ifelse($5, L, D, $5),
1833             as_FloatRegister($src1$$reg),
1834             as_FloatRegister($src2$$reg));
1835   %}
1836   ins_pipe(vmul`'ifelse($3, F, div_fp, $3, D, div_fp)`'ifelse($4, D, 64, 128));
1837 %}')dnl
1838 dnl  $1    $2  $3 $4 $5
1839 VMUL(mulv, 8,  B, D, B)
1840 VMUL(mulv, 16, B, X, B)
1841 VMUL(mulv, 4,  S, D, H)
1842 VMUL(mulv, 8,  S, X, H)
1843 VMUL(mulv, 2,  I, D, S)
1844 VMUL(mulv, 4,  I, X, S)
1845 VMUL(fmul, 2,  F, D, S)
1846 VMUL(fmul, 4,  F, X, S)
1847 VMUL(fmul, 2,  D, X, D)
1848 
1849 // --------------------------------- MLA --------------------------------------
1850 define(`VMLA', `ifelse($1, fmla, `
1851 // dst + src1 * src2')
1852 instruct vmla$2$3`'(vec$4 dst, vec$4 src1, vec$4 src2)
1853 %{ifelse($2$3, 4S, `
1854   predicate(n->as_Vector()->length() == 2 ||
1855             n->as_Vector()->length() == 4);', $1, fmla, `
1856   predicate(UseFMA && n->as_Vector()->length() == $2);', `
1857   predicate(n->as_Vector()->length() == $2);')
1858   match(Set dst (ifelse($1, mlav, `AddV'$3` dst (MulV$3 src1 src2)', FmaV$3  `dst (Binary src1 src2)')));
1859   ins_cost(INSN_COST);
1860   format %{ "$1  $dst,$src1,$src2\t# vector ($2$5)" %}
1861   ins_encode %{
1862     __ $1(as_FloatRegister($dst$$reg), __ T$2`'ifelse($5, L, D, $5),
1863             as_FloatRegister($src1$$reg),
1864             as_FloatRegister($src2$$reg));
1865   %}
1866   ins_pipe(vm`'ifelse($3, F, uldiv_fp, $3, D, uldiv_fp, la)`'ifelse($4, D, 64, 128));
1867 %}')dnl
1868 dnl  $1   $2  $3 $4 $5
1869 VMLA(mlav, 4,  S, D, H)
1870 VMLA(mlav, 8,  S, X, H)
1871 VMLA(mlav, 2,  I, D, S)
1872 VMLA(mlav, 4,  I, X, S)
1873 VMLA(fmla, 2,  F, D, S)
1874 VMLA(fmla, 4,  F, X, S)
1875 VMLA(fmla, 2,  D, X, D)
1876 
1877 // --------------------------------- MLS --------------------------------------
1878 define(`VMLS', `ifelse($1, fmls, `
1879 // dst - src1 * src2')
1880 instruct vmls$2$3`'(vec$4 dst, vec$4 src1, vec$4 src2) %{ifelse($2$3, 4S, `
1881   predicate(n->as_Vector()->length() == 2 ||
1882             n->as_Vector()->length() == 4);', $1, fmls, `
1883   predicate(UseFMA && n->as_Vector()->length() == $2);', `
1884   predicate(n->as_Vector()->length() == $2);')
1885   match(Set dst (ifelse($1, mlsv, `SubV'$3` dst (MulV$3 src1 src2)', FmaV$3  `dst (Binary (NegV'$3 `src1) src2)));
1886   match(Set dst (FmaV$3  dst (Binary src1 (NegV'$3 `src2))')));
1887   ins_cost(INSN_COST);
1888   format %{ "$1  $dst,$src1,$src2\t# vector ($2$5)" %}
1889   ins_encode %{
1890     __ $1(as_FloatRegister($dst$$reg), __ T$2`'ifelse($5, L, D, $5),
1891             as_FloatRegister($src1$$reg),
1892             as_FloatRegister($src2$$reg));
1893   %}
1894   ins_pipe(vm`'ifelse($3, F, uldiv_fp, $3, D, uldiv_fp, la)`'ifelse($4, D, 64, 128));
1895 %}')dnl
1896 dnl  $1   $2  $3 $4 $5
1897 VMLS(mlsv, 4,  S, D, H)
1898 VMLS(mlsv, 8,  S, X, H)
1899 VMLS(mlsv, 2,  I, D, S)
1900 VMLS(mlsv, 4,  I, X, S)
1901 VMLS(fmls, 2,  F, D, S)
1902 VMLS(fmls, 4,  F, X, S)
1903 VMLS(fmls, 2,  D, X, D)
1904 
1905 // --------------- Vector Multiply-Add Shorts into Integer --------------------
1906 
1907 instruct vmuladdS2I(vecX dst, vecX src1, vecX src2, vecX tmp) %{
1908   predicate(n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
1909   match(Set dst (MulAddVS2VI src1 src2));
1910   ins_cost(INSN_COST);
1911   effect(TEMP_DEF dst, TEMP tmp);
1912   format %{ "smullv  $tmp, $src1, $src2\t# vector (4H)\n\t"
1913             "smullv  $dst, $src1, $src2\t# vector (8H)\n\t"
1914             "addpv   $dst, $tmp, $dst\t# vector (4S)" %}
1915   ins_encode %{
1916     __ smullv(as_FloatRegister($tmp$$reg), __ T4H,
1917               as_FloatRegister($src1$$reg),
1918               as_FloatRegister($src2$$reg));
1919     __ smullv(as_FloatRegister($dst$$reg), __ T8H,
1920               as_FloatRegister($src1$$reg),
1921               as_FloatRegister($src2$$reg));
1922     __ addpv(as_FloatRegister($dst$$reg), __ T4S,
1923              as_FloatRegister($tmp$$reg),
1924              as_FloatRegister($dst$$reg));
1925   %}
1926   ins_pipe(vmuldiv_fp128);
1927 %}
1928 
1929 // --------------------------------- DIV --------------------------------------
1930 define(`VDIV', `
1931 instruct vdiv$2$3`'(vec$4 dst, vec$4 src1, vec$4 src2)
1932 %{
1933   predicate(n->as_Vector()->length() == $2);
1934   match(Set dst (DivV$3 src1 src2));
1935   ins_cost(INSN_COST);
1936   format %{ "$1  $dst,$src1,$src2\t# vector ($2$5)" %}
1937   ins_encode %{
1938     __ $1(as_FloatRegister($dst$$reg), __ T$2`'ifelse($5, L, D, $5),
1939             as_FloatRegister($src1$$reg),
1940             as_FloatRegister($src2$$reg));
1941   %}
1942   ins_pipe(vmuldiv_fp`'ifelse($4, D, 64, 128));
1943 %}')dnl
1944 dnl  $1    $2  $3 $4 $5
1945 VDIV(fdiv, 2,  F, D, S)
1946 VDIV(fdiv, 4,  F, X, S)
1947 VDIV(fdiv, 2,  D, X, D)
1948 
1949 // --------------------------------- SQRT -------------------------------------
1950 define(`VSQRT', `
1951 instruct vsqrt$2$3`'(vec$4 dst, vec$4 src)
1952 %{
1953   predicate(n->as_Vector()->length() == $2);
1954   match(Set dst (SqrtV$3 src));
1955   format %{ "$1  $dst, $src\t# vector ($2$3)" %}
1956   ins_encode %{
1957     __ $1(as_FloatRegister($dst$$reg), __ T$2$5, as_FloatRegister($src$$reg));
1958   %}
1959   ins_pipe(v`'ifelse($2$3, 2F, unop, sqrt)_fp`'ifelse($4, D, 64, 128));
1960 %}')dnl
1961 dnl   $1     $2  $3 $4 $5
1962 VSQRT(fsqrt, 2,  F, D, S)
1963 VSQRT(fsqrt, 4,  F, X, S)
1964 VSQRT(fsqrt, 2,  D, X, D)
1965 
1966 // --------------------------------- NEG --------------------------------------
1967 define(`VNEGI', `
1968 instruct vnegI$1(vec$1 dst, vec$1 src)
1969 %{
1970   predicate(n->as_Vector()->length_in_bytes() ifelse($1, D, <, ==) 16);
1971   match(Set dst (NegVI src));
1972   ins_cost(INSN_COST);
1973   format %{ "negr  $dst, $src\t# vector ($2)" %}
1974   ins_encode %{
1975     BasicType bt = Matcher::vector_element_basic_type(this);
1976     Assembler::SIMD_Arrangement size = __ esize2arrangement((unsigned)type2aelembytes(bt), ifelse($1, D, false, true));
1977     __ negr(as_FloatRegister($dst$$reg), size, as_FloatRegister($src$$reg));
1978   %}
1979   ins_pipe(vunop_fp`'ifelse($1, D, 64, 128));
1980 %}')dnl
1981 dnl  $1  $2
1982 VNEGI(D, 8B/4H/2S)
1983 VNEGI(X, 16B/8H/4S)
1984 dnl
1985 define(`VNEG', `
1986 instruct vneg$2$3`'(vec$4 dst, vec$4 src)
1987 %{
1988   predicate(n->as_Vector()->length() == $2);
1989   match(Set dst (NegV$3 src));
1990   ins_cost(INSN_COST`'ifelse($3, L, `',` * 3'));
1991   format %{ "$1  $dst,$src\t# vector ($2$5)" %}
1992   ins_encode %{
1993     __ $1(as_FloatRegister($dst$$reg), __ T$2$5,
1994             as_FloatRegister($src$$reg));
1995   %}
1996   ins_pipe(vunop_fp`'ifelse($4, D, 64, 128));
1997 %}')dnl
1998 dnl  $1    $2  $3 $4 $5
1999 VNEG(negr, 2,  L, X, D)
2000 VNEG(fneg, 2,  F, D, S)
2001 VNEG(fneg, 4,  F, X, S)
2002 VNEG(fneg, 2,  D, X, D)
2003 dnl
2004 define(`VLOGICAL', `
2005 instruct v$3$5$6`'(vec$7 dst, vec$7 src1, vec$7 src2)
2006 %{
2007   predicate(ifelse($5, 8, n->as_Vector()->length_in_bytes() == 4 ||`
2008             ')n->as_Vector()->length_in_bytes() == $5);
2009   match(Set dst ($4V src1 src2));
2010   ins_cost(INSN_COST);
2011   format %{ "$1  $dst,$src1,$src2\t# vector ($5$6)" %}
2012   ins_encode %{
2013     __ $2(as_FloatRegister($dst$$reg), __ T$5$6,
2014             as_FloatRegister($src1$$reg),
2015             as_FloatRegister($src2$$reg));
2016   %}
2017   ins_pipe(vlogical`'ifelse($7, D, 64, 128));
2018 %}')dnl
2019 
2020 // --------------------------------- AND --------------------------------------
2021 dnl      $1   $2    $3   $4   $5  $6 $7
2022 VLOGICAL(and, andr, and, And, 8,  B, D)
2023 VLOGICAL(and, andr, and, And, 16, B, X)
2024 
2025 // --------------------------------- OR ---------------------------------------
2026 VLOGICAL(orr, orr,  or,  Or,  8,  B, D)
2027 VLOGICAL(orr, orr,  or,  Or,  16, B, X)
2028 
2029 // --------------------------------- XOR --------------------------------------
2030 VLOGICAL(xor, eor,  xor, Xor, 8,  B, D)
2031 VLOGICAL(xor, eor,  xor, Xor, 16, B, X)
2032 
2033 // ------------------------------ Shift ---------------------------------------
2034 dnl
2035 define(`VSLCNT', `
2036 instruct vslcnt$1$2`'(vec$3 dst, iRegIorL2I cnt) %{
2037   predicate(UseSVE == 0 && ifelse($1, 8,
2038                                   (n->as_Vector()->length_in_bytes() == 4 ||`
2039                             'n->as_Vector()->length_in_bytes() == $1),
2040                                   n->as_Vector()->length_in_bytes() == $1));
2041   match(Set dst (LShiftCntV cnt));
2042   ins_cost(INSN_COST);
2043   format %{ "dup  $dst, $cnt\t# shift count vector ($1$2)" %}
2044   ins_encode %{
2045     __ dup(as_FloatRegister($dst$$reg), __ T$1$2, as_Register($cnt$$reg));
2046   %}
2047   ins_pipe(vdup_reg_reg`'ifelse($3, D, 64, 128));
2048 %}')dnl
2049 dnl
2050 define(`VSRCNT', `
2051 instruct vsrcnt$1$2`'(vec$3 dst, iRegIorL2I cnt) %{
2052   predicate(UseSVE == 0 && ifelse($1, 8,
2053                                   (n->as_Vector()->length_in_bytes() == 4 ||`
2054                             'n->as_Vector()->length_in_bytes() == $1),
2055                                   n->as_Vector()->length_in_bytes() == $1));
2056   match(Set dst (RShiftCntV cnt));
2057   ins_cost(INSN_COST * 2);
2058   format %{ "negw  rscratch1, $cnt\t"
2059             "dup   $dst, rscratch1\t# shift count vector ($1$2)" %}
2060   ins_encode %{
2061     __ negw(rscratch1, as_Register($cnt$$reg));
2062     __ dup(as_FloatRegister($dst$$reg), __ T$1$2, rscratch1);
2063   %}
2064   ins_pipe(vdup_reg_reg`'ifelse($3, D, 64, 128));
2065 %}')dnl
2066 dnl
2067 
2068 // Vector shift count
2069 // Note-1: Low 8 bits of each element are used, so it doesn't matter if we
2070 //         treat it as ints or bytes here.
2071 // Note-2: Shift value is negated for RShiftCntV additionally. See the comments
2072 //         on vsra8B rule for more details.
2073 dnl    $1  $2 $3
2074 VSLCNT(8,  B, D)
2075 VSLCNT(16, B, X)
2076 VSRCNT(8,  B, D)
2077 VSRCNT(16, B, X)
2078 dnl
2079 define(`PREDICATE',
2080 `ifelse($1, 8B,
2081             ifelse($3, `', `predicate(n->as_Vector()->length() == 4 || n->as_Vector()->length() == 8);',
2082                            `predicate((n->as_Vector()->length() == 4 || n->as_Vector()->length() == 8) &&`
2083             '$3);'),
2084         $1, 4S,
2085             ifelse($3, `', `predicate(n->as_Vector()->length() == 2 || n->as_Vector()->length() == 4);',
2086                            `predicate((n->as_Vector()->length() == 2 || n->as_Vector()->length() == 4) &&`
2087             '$3);'),
2088         ifelse($3, `', `predicate(n->as_Vector()->length() == $2);',
2089                        `predicate(n->as_Vector()->length() == $2 && $3);'))')dnl
2090 dnl
2091 define(`VSLL', `
2092 instruct vsll$1$2`'(vec$4 dst, vec$4 src, vec$4 shift) %{
2093   PREDICATE(`$1$2', $1, )
2094   match(Set dst (LShiftV$2 src shift));
2095   ins_cost(INSN_COST);
2096   format %{ "sshl  $dst,$src,$shift\t# vector ($1$3)" %}
2097   ins_encode %{
2098     __ sshl(as_FloatRegister($dst$$reg), __ T$1$3,
2099             as_FloatRegister($src$$reg),
2100             as_FloatRegister($shift$$reg));
2101   %}
2102   ins_pipe(vshift`'ifelse($4, D, 64, 128));
2103 %}')dnl
2104 dnl
2105 define(`VSRA', `
2106 instruct vsra$1$2`'(vec$4 dst, vec$4 src, vec$4 shift) %{
2107   PREDICATE(`$1$2', $1, !n->as_ShiftV()->is_var_shift())
2108   match(Set dst (RShiftV$2 src shift));
2109   ins_cost(INSN_COST);
2110   format %{ "sshl  $dst,$src,$shift\t# vector ($1$3)" %}
2111   ins_encode %{
2112     __ sshl(as_FloatRegister($dst$$reg), __ T$1$3,
2113             as_FloatRegister($src$$reg),
2114             as_FloatRegister($shift$$reg));
2115   %}
2116   ins_pipe(vshift`'ifelse($4, D, 64, 128));
2117 %}')dnl
2118 dnl
2119 define(`VSRA_VAR', `
2120 instruct vsra$1$2_var`'(vec$4 dst, vec$4 src, vec$4 shift) %{
2121   PREDICATE(`$1$2', $1, n->as_ShiftV()->is_var_shift())
2122   match(Set dst (RShiftV$2 src shift));
2123   ins_cost(INSN_COST * 2);
2124   effect(TEMP_DEF dst);
2125   format %{ "negr  $dst,$shift\t"
2126             "sshl  $dst,$src,$dst\t# vector ($1$3)" %}
2127   ins_encode %{
2128     __ negr(as_FloatRegister($dst$$reg), __ T`'ifelse($4, D, 8B, 16B),
2129             as_FloatRegister($shift$$reg));
2130     __ sshl(as_FloatRegister($dst$$reg), __ T$1$3,
2131             as_FloatRegister($src$$reg),
2132             as_FloatRegister($dst$$reg));
2133   %}
2134   ins_pipe(vshift`'ifelse($4, D, 64, 128));
2135 %}')dnl
2136 dnl
2137 define(`VSRL', `
2138 instruct vsrl$1$2`'(vec$4 dst, vec$4 src, vec$4 shift) %{
2139   PREDICATE(`$1$2', $1, !n->as_ShiftV()->is_var_shift())
2140   match(Set dst (URShiftV$2 src shift));
2141   ins_cost(INSN_COST);
2142   format %{ "ushl  $dst,$src,$shift\t# vector ($1$3)" %}
2143   ins_encode %{
2144     __ ushl(as_FloatRegister($dst$$reg), __ T$1$3,
2145             as_FloatRegister($src$$reg),
2146             as_FloatRegister($shift$$reg));
2147   %}
2148   ins_pipe(vshift`'ifelse($4, D, 64, 128));
2149 %}')dnl
2150 dnl
2151 define(`VSRL_VAR', `
2152 instruct vsrl$1$2_var`'(vec$4 dst, vec$4 src, vec$4 shift) %{
2153   PREDICATE(`$1$2', $1, n->as_ShiftV()->is_var_shift())
2154   match(Set dst (URShiftV$2 src shift));
2155   ins_cost(INSN_COST * 2);
2156   effect(TEMP_DEF dst);
2157   format %{ "negr  $dst,$shift\t"
2158             "ushl  $dst,$src,$dst\t# vector ($1$3)" %}
2159   ins_encode %{
2160     __ negr(as_FloatRegister($dst$$reg), __ T`'ifelse($4, D, 8B, 16B),
2161             as_FloatRegister($shift$$reg));
2162     __ ushl(as_FloatRegister($dst$$reg), __ T$1$3,
2163             as_FloatRegister($src$$reg),
2164             as_FloatRegister($dst$$reg));
2165   %}
2166   ins_pipe(vshift`'ifelse($4, D, 64, 128));
2167 %}')dnl
2168 dnl
2169 define(`VSLL_IMM', `
2170 instruct vsll$1$2_imm`'(vec$4 dst, vec$4 src, immI shift) %{
2171   PREDICATE(`$1$2', $1, assert_not_var_shift(n))
2172   match(Set dst (LShiftV$2 src (LShiftCntV shift)));
2173   ins_cost(INSN_COST);
2174   format %{ "shl  $dst, $src, $shift\t# vector ($1$3)" %}
2175   ins_encode %{ifelse($2, B,`
2176     int sh = (int)$shift$$constant;
2177     if (sh >= 8) {
2178       __ eor(as_FloatRegister($dst$$reg), __ ifelse($4, D, T8B, T16B),
2179              as_FloatRegister($src$$reg),
2180              as_FloatRegister($src$$reg));
2181     } else {
2182       __ shl(as_FloatRegister($dst$$reg), __ T$1$3,
2183              as_FloatRegister($src$$reg), sh);
2184     }', $2, S,`
2185     int sh = (int)$shift$$constant;
2186     if (sh >= 16) {
2187       __ eor(as_FloatRegister($dst$$reg), __ ifelse($4, D, T8B, T16B),
2188              as_FloatRegister($src$$reg),
2189              as_FloatRegister($src$$reg));
2190     } else {
2191       __ shl(as_FloatRegister($dst$$reg), __ T$1$3,
2192              as_FloatRegister($src$$reg), sh);
2193     }', `
2194     __ shl(as_FloatRegister($dst$$reg), __ T$1$3,
2195            as_FloatRegister($src$$reg),
2196            (int)$shift$$constant);')
2197   %}
2198   ins_pipe(vshift`'ifelse($4, D, 64, 128)_imm);
2199 %}')dnl
2200 dnl
2201 define(`VSRA_IMM', `
2202 instruct vsra$1$2_imm`'(vec$4 dst, vec$4 src, immI shift) %{
2203   PREDICATE(`$1$2', $1, assert_not_var_shift(n))
2204   match(Set dst (RShiftV$2 src (RShiftCntV shift)));
2205   ins_cost(INSN_COST);
2206   format %{ "sshr  $dst, $src, $shift\t# vector ($1$3)" %}
2207   ins_encode %{ifelse($2, B,`
2208     int sh = (int)$shift$$constant;
2209     if (sh >= 8) sh = 7;
2210     __ sshr(as_FloatRegister($dst$$reg), __ T$1$3,
2211             as_FloatRegister($src$$reg), sh);', $2, S,`
2212     int sh = (int)$shift$$constant;
2213     if (sh >= 16) sh = 15;
2214     __ sshr(as_FloatRegister($dst$$reg), __ T$1$3,
2215             as_FloatRegister($src$$reg), sh);', `
2216     __ sshr(as_FloatRegister($dst$$reg), __ T$1$3,
2217             as_FloatRegister($src$$reg),
2218             (int)$shift$$constant);')
2219   %}
2220   ins_pipe(vshift`'ifelse($4, D, 64, 128)_imm);
2221 %}')dnl
2222 dnl
2223 define(`VSRL_IMM', `
2224 instruct vsrl$1$2_imm`'(vec$4 dst, vec$4 src, immI shift) %{
2225   PREDICATE(`$1$2', $1, assert_not_var_shift(n))
2226   match(Set dst (URShiftV$2 src (RShiftCntV shift)));
2227   ins_cost(INSN_COST);
2228   format %{ "ushr  $dst, $src, $shift\t# vector ($1$3)" %}
2229   ins_encode %{ifelse($2, B,`
2230     int sh = (int)$shift$$constant;
2231     if (sh >= 8) {
2232       __ eor(as_FloatRegister($dst$$reg), __ ifelse($4, D, T8B, T16B),
2233              as_FloatRegister($src$$reg),
2234              as_FloatRegister($src$$reg));
2235     } else {
2236       __ ushr(as_FloatRegister($dst$$reg), __ T$1$3,
2237               as_FloatRegister($src$$reg), sh);
2238     }', $2, S,`
2239     int sh = (int)$shift$$constant;
2240     if (sh >= 16) {
2241       __ eor(as_FloatRegister($dst$$reg), __ ifelse($4, D, T8B, T16B),
2242              as_FloatRegister($src$$reg),
2243              as_FloatRegister($src$$reg));
2244     } else {
2245       __ ushr(as_FloatRegister($dst$$reg), __ T$1$3,
2246               as_FloatRegister($src$$reg), sh);
2247     }', `
2248     __ ushr(as_FloatRegister($dst$$reg), __ T$1$3,
2249             as_FloatRegister($src$$reg),
2250             (int)$shift$$constant);')
2251   %}
2252   ins_pipe(vshift`'ifelse($4, D, 64, 128)_imm);
2253 %}')dnl
2254 dnl
2255 define(`VSRLA_IMM', `
2256 instruct vsrla$1$2_imm`'(vec$4 dst, vec$4 src, immI shift) %{
2257   predicate(n->as_Vector()->length() == $1);
2258   match(Set dst (AddV$2 dst (URShiftV$2 src (RShiftCntV shift))));
2259   ins_cost(INSN_COST);
2260   format %{ "usra  $dst, $src, $shift\t# vector ($1$3)" %}
2261   ins_encode %{ifelse($2, B,`
2262     int sh = (int)$shift$$constant;
2263     if (sh < 8) {
2264       __ usra(as_FloatRegister($dst$$reg), __ T$1$3,
2265               as_FloatRegister($src$$reg), sh);
2266     }', $2, S,`
2267     int sh = (int)$shift$$constant;
2268     if (sh < 16) {
2269       __ usra(as_FloatRegister($dst$$reg), __ T$1$3,
2270               as_FloatRegister($src$$reg), sh);
2271     }', `
2272     __ usra(as_FloatRegister($dst$$reg), __ T$1$3,
2273             as_FloatRegister($src$$reg),
2274             (int)$shift$$constant);')
2275   %}
2276   ins_pipe(vshift`'ifelse($4, D, 64, 128)_imm);
2277 %}')dnl
2278 dnl
2279 define(`VSRAA_IMM', `
2280 instruct vsraa$1$2_imm`'(vec$4 dst, vec$4 src, immI shift) %{
2281   predicate(n->as_Vector()->length() == $1);
2282   match(Set dst (AddV$2 dst (RShiftV$2 src (RShiftCntV shift))));
2283   ins_cost(INSN_COST);
2284   format %{ "ssra  $dst, $src, $shift\t# vector ($1$3)" %}
2285   ins_encode %{ifelse($2, B,`
2286     int sh = (int)$shift$$constant;
2287     if (sh >= 8) sh = 7;
2288     __ ssra(as_FloatRegister($dst$$reg), __ T$1$3,
2289             as_FloatRegister($src$$reg), sh);', $2, S,`
2290     int sh = (int)$shift$$constant;
2291     if (sh >= 16) sh = 15;
2292     __ ssra(as_FloatRegister($dst$$reg), __ T$1$3,
2293             as_FloatRegister($src$$reg), sh);', `
2294     __ ssra(as_FloatRegister($dst$$reg), __ T$1$3,
2295             as_FloatRegister($src$$reg),
2296             (int)$shift$$constant);')
2297   %}
2298   ins_pipe(vshift`'ifelse($4, D, 64, 128)_imm);
2299 %}')dnl
2300 dnl
2301 undefine(PREDICATE)dnl
2302 dnl
2303 dnl  $1  $2 $3 $4
2304 VSLL(8,  B, B, D)
2305 VSLL(16, B, B, X)
2306 
2307 // Right shifts with vector shift count on aarch64 SIMD are implemented
2308 // as left shift by negative shift count.
2309 // There are two cases for vector shift count.
2310 //
2311 // Case 1: The vector shift count is from replication.
2312 //        |            |
2313 //    LoadVector  RShiftCntV
2314 //        |       /
2315 //     RShiftVI
2316 //
2317 // Case 2: The vector shift count is from loading.
2318 // This case isn't supported by middle-end now. But it's supported by
2319 // panama/vectorIntrinsics(JEP 338: Vector API).
2320 //        |            |
2321 //    LoadVector  LoadVector
2322 //        |       /
2323 //     RShiftVI
2324 //
2325 // The negate is conducted in RShiftCntV rule for case 1, whereas it's done in
2326 // RShiftV* rules for case 2. Because there exists an optimization opportunity
2327 // for case 1, that is, multiple neg instructions in inner loop can be hoisted
2328 // to outer loop and merged into one neg instruction.
2329 //
2330 // Note that ShiftVNode::is_var_shift() indicates whether the vector shift
2331 // count is a variable vector(case 2) or not(a vector generated by RShiftCntV,
2332 // i.e. case 1).
2333 dnl  $1  $2 $3 $4
2334 VSRA(8,  B, B, D)
2335 VSRA_VAR(8,  B, B, D)
2336 VSRA(16, B, B, X)
2337 VSRA_VAR(16, B, B, X)
2338 VSRL(8,  B, B, D)
2339 VSRL_VAR(8,  B, B, D)
2340 VSRL(16, B, B, X)
2341 VSRL_VAR(16, B, B, X)
2342 VSLL_IMM(8,  B, B, D)
2343 VSLL_IMM(16, B, B, X)
2344 VSRA_IMM(8,  B, B, D)
2345 VSRA_IMM(16, B, B, X)
2346 VSRL_IMM(8,  B, B, D)
2347 VSRL_IMM(16, B, B, X)
2348 VSLL(4,  S, H, D)
2349 VSLL(8,  S, H, X)
2350 VSRA(4,  S, H, D)
2351 VSRA_VAR(4,  S, H, D)
2352 VSRA(8,  S, H, X)
2353 VSRA_VAR(8,  S, H, X)
2354 VSRL(4,  S, H, D)
2355 VSRL_VAR(4,  S, H, D)
2356 VSRL(8,  S, H, X)
2357 VSRL_VAR(8,  S, H, X)
2358 VSLL_IMM(4,  S, H, D)
2359 VSLL_IMM(8,  S, H, X)
2360 VSRA_IMM(4,  S, H, D)
2361 VSRA_IMM(8,  S, H, X)
2362 VSRL_IMM(4,  S, H, D)
2363 VSRL_IMM(8,  S, H, X)
2364 VSLL(2,  I, S, D)
2365 VSLL(4,  I, S, X)
2366 VSRA(2,  I, S, D)
2367 VSRA_VAR(2,  I, S, D)
2368 VSRA(4,  I, S, X)
2369 VSRA_VAR(4,  I, S, X)
2370 VSRL(2,  I, S, D)
2371 VSRL_VAR(2,  I, S, D)
2372 VSRL(4,  I, S, X)
2373 VSRL_VAR(4,  I, S, X)
2374 VSLL_IMM(2,  I, S, D)
2375 VSLL_IMM(4,  I, S, X)
2376 VSRA_IMM(2,  I, S, D)
2377 VSRA_IMM(4,  I, S, X)
2378 VSRL_IMM(2,  I, S, D)
2379 VSRL_IMM(4,  I, S, X)
2380 VSLL(2,  L, D, X)
2381 VSRA(2,  L, D, X)
2382 VSRA_VAR(2,  L, D, X)
2383 VSRL(2,  L, D, X)
2384 VSRL_VAR(2,  L, D, X)
2385 VSLL_IMM(2,  L, D, X)
2386 VSRA_IMM(2,  L, D, X)
2387 VSRL_IMM(2,  L, D, X)
2388 VSRAA_IMM(8,  B, B, D)
2389 VSRAA_IMM(16, B, B, X)
2390 VSRAA_IMM(4,  S, H, D)
2391 VSRAA_IMM(8,  S, H, X)
2392 VSRAA_IMM(2,  I, S, D)
2393 VSRAA_IMM(4,  I, S, X)
2394 VSRAA_IMM(2,  L, D, X)
2395 VSRLA_IMM(8,  B, B, D)
2396 VSRLA_IMM(16, B, B, X)
2397 VSRLA_IMM(4,  S, H, D)
2398 VSRLA_IMM(8,  S, H, X)
2399 VSRLA_IMM(2,  I, S, D)
2400 VSRLA_IMM(4,  I, S, X)
2401 VSRLA_IMM(2,  L, D, X)
2402 dnl
2403 define(`VMINMAX', `
2404 instruct v$1$3`'ifelse($5, S, F, D)`'(vec$6 dst, vec$6 src1, vec$6 src2)
2405 %{
2406   predicate(n->as_Vector()->length() == $3 && n->bottom_type()->is_vect()->element_basic_type() == T_`'ifelse($5, S, FLOAT, DOUBLE));
2407   match(Set dst ($2V src1 src2));
2408   ins_cost(INSN_COST);
2409   format %{ "f$1  $dst,$src1,$src2\t# vector ($3$4)" %}
2410   ins_encode %{
2411     __ f$1(as_FloatRegister($dst$$reg), __ T$3$5,
2412             as_FloatRegister($src1$$reg),
2413             as_FloatRegister($src2$$reg));
2414   %}
2415   ins_pipe(vdop_fp`'ifelse($6, D, 64, 128));
2416 %}')dnl
2417 dnl     $1   $2   $3 $4 $5 $6
2418 VMINMAX(max, Max, 2, F, S, D)
2419 VMINMAX(max, Max, 4, S, S, X)
2420 VMINMAX(max, Max, 2, D, D, X)
2421 VMINMAX(min, Min, 2, F, S, D)
2422 VMINMAX(min, Min, 4, S, S, X)
2423 VMINMAX(min, Min, 2, D, D, X)
2424 
2425 instruct vround2D_reg(vecX dst, vecX src, immI rmode) %{
2426   predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
2427   match(Set dst (RoundDoubleModeV src rmode));
2428   format %{ "frint  $dst, $src, $rmode" %}
2429   ins_encode %{
2430     switch ($rmode$$constant) {
2431       case RoundDoubleModeNode::rmode_rint:
2432         __ frintn(as_FloatRegister($dst$$reg), __ T2D,
2433                   as_FloatRegister($src$$reg));
2434         break;
2435       case RoundDoubleModeNode::rmode_floor:
2436         __ frintm(as_FloatRegister($dst$$reg), __ T2D,
2437                   as_FloatRegister($src$$reg));
2438         break;
2439       case RoundDoubleModeNode::rmode_ceil:
2440         __ frintp(as_FloatRegister($dst$$reg), __ T2D,
2441                   as_FloatRegister($src$$reg));
2442         break;
2443     }
2444   %}
2445   ins_pipe(vdop_fp128);
2446 %}
2447 dnl
2448 define(`VPOPCOUNT', `dnl
2449 ifelse($1$2, `LD', `
2450 // If the PopCountVL is generated by auto-vectorization, the dst basic
2451 // type is T_INT. And once we have unified the type definition for
2452 // Vector API and auto-vectorization, this rule can be merged with
2453 // "vpopcountLX" rule.', `')
2454 instruct vpopcount$1$2`'(vec$2 dst, vec$3 src) %{
2455   predicate(n->as_Vector()->length_in_bytes() $4 16`'ifelse($1$2, `LD', ` &&
2456             n->bottom_type()->is_vect()->element_basic_type() == T_INT', $1$2, `LX', ` &&
2457             n->bottom_type()->is_vect()->element_basic_type() == T_LONG', `'));
2458   match(Set dst (PopCountV$1 src));
2459   ins_cost($5 * INSN_COST);
2460   format %{ "vpopcount$1  $dst, $src\t# vector ($6)" %}
2461   ins_encode %{
2462     assert(UsePopCountInstruction, "unsupported");dnl
2463 ifelse($1, `I', `
2464     BasicType bt = Matcher::vector_element_basic_type(this);', `')
2465     __ cnt(as_FloatRegister($dst$$reg), __ T`'ifelse($3, D, 8, 16)B,
2466            as_FloatRegister($src$$reg));dnl
2467 ifelse($1, `L', `
2468     __ uaddlp(as_FloatRegister($dst$$reg), __ T16B,
2469               as_FloatRegister($dst$$reg));
2470     __ uaddlp(as_FloatRegister($dst$$reg), __ T8H,
2471               as_FloatRegister($dst$$reg));
2472     __ uaddlp(as_FloatRegister($dst$$reg), __ T4S,
2473               as_FloatRegister($dst$$reg));', `
2474     if (bt == T_SHORT || bt == T_INT) {
2475       __ uaddlp(as_FloatRegister($dst$$reg), __ T`'ifelse($2, D, 8, 16)B,
2476                 as_FloatRegister($dst$$reg));
2477     }
2478     if (bt == T_INT) {
2479       __ uaddlp(as_FloatRegister($dst$$reg), __ T`'ifelse($2, D, 4, 8)H,
2480                 as_FloatRegister($dst$$reg));
2481     }')dnl
2482 ifelse($1$2, `LD', `
2483     __ xtn(as_FloatRegister($dst$$reg), __ T2S,
2484            as_FloatRegister($dst$$reg), __ T2D);', `')
2485   %}
2486   ins_pipe(pipe_class_default);
2487 %}')dnl
2488 dnl       $1 $2 $3 $4  $5 $6
2489 VPOPCOUNT(I, D, D, <,  3, 8B/4H/2S)
2490 VPOPCOUNT(I, X, X, ==, 3, 16B/8H/4S)
2491 VPOPCOUNT(L, D, X, <,  5, 2S)
2492 VPOPCOUNT(L, X, X, ==, 4, 2D)
2493 dnl
2494 dnl VMASK_TRUECOUNT($1,     $2 )
2495 dnl VMASK_TRUECOUNT(suffix, reg)
2496 define(`VMASK_TRUECOUNT', `
2497 instruct vmask_truecount$1(iRegINoSp dst, $2 src, $2 tmp) %{
2498   predicate(n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BOOLEAN);
2499   match(Set dst (VectorMaskTrueCount src));
2500   effect(TEMP tmp);
2501   ins_cost(2 * INSN_COST);
2502   format %{ "addv $tmp, $src\n\t"
2503             "umov $dst, $tmp, B, 0\t# vector ($1)" %}
2504   ins_encode %{
2505     // Input "src" is a vector of boolean represented as bytes with
2506     // 0x00/0x01 as element values.
2507     __ addv(as_FloatRegister($tmp$$reg), __ T$1, as_FloatRegister($src$$reg));
2508     __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0);
2509   %}
2510   ins_pipe(pipe_slow);
2511 %}')dnl
2512 dnl
2513 dnl
2514 define(`ARGLIST',
2515 `ifelse($1, `_LT8B', `iRegINoSp dst, vecD src, rFlagsReg cr', `iRegINoSp dst, vecD src')')
2516 dnl
2517 dnl VMASK_FIRSTTRUE_D($1,     $2,   $3,   $4  )
2518 dnl VMASK_FIRSTTRUE_D(suffix, cond, cost, size)
2519 define(`VMASK_FIRSTTRUE_D', `
2520 instruct vmask_firsttrue$1(ARGLIST($1)) %{
2521   predicate(n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BOOLEAN &&
2522             n->in(1)->bottom_type()->is_vect()->length() $2 8);
2523   match(Set dst (VectorMaskFirstTrue src));dnl
2524 ifelse($1, `_LT8B', `
2525   effect(KILL cr);')
2526   ins_cost($3 * INSN_COST);
2527   format %{ "vmask_firsttrue $dst, $src\t# vector ($4)" %}
2528   ins_encode %{
2529     // Returns the index of the first active lane of the
2530     // vector mask, or VLENGTH if no lane is active.
2531     //
2532     // Input "src" is a vector of boolean represented as
2533     // bytes with 0x00/0x01 as element values.
2534     //
2535     // Computed by reversing the bits and counting the leading
2536     // zero bytes.
2537     __ fmovd($dst$$Register, as_FloatRegister($src$$reg));
2538     __ rbit($dst$$Register, $dst$$Register);
2539     __ clz($dst$$Register, $dst$$Register);
2540     __ lsrw($dst$$Register, $dst$$Register, 3);dnl
2541 ifelse(`$1', `_LT8B', `
2542     __ movw(rscratch1, Matcher::vector_length(this, $src));
2543     __ cmpw($dst$$Register, rscratch1);
2544     __ cselw($dst$$Register, rscratch1, $dst$$Register, Assembler::GE);')
2545   %}
2546   ins_pipe(pipe_slow);
2547 %}')dnl
2548 dnl
2549 undefine(ARGLIST)dnl
2550 dnl
2551 // vector mask reductions
2552 VMASK_TRUECOUNT(8B,  vecD)
2553 VMASK_TRUECOUNT(16B, vecX)
2554 VMASK_FIRSTTRUE_D(_LT8B, <,  7, 4I/4S/2I)
2555 VMASK_FIRSTTRUE_D(8B,    ==, 4, 8B)
2556 
2557 instruct vmask_firsttrue16B(iRegINoSp dst, vecX src) %{
2558   predicate(n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BOOLEAN);
2559   match(Set dst (VectorMaskFirstTrue src));
2560   ins_cost(6 * INSN_COST);
2561   format %{ "vmask_firsttrue $dst, $src\t# vector (16B)" %}
2562   ins_encode %{
2563     // Returns the index of the first active lane of the
2564     // vector mask, or 16 (VLENGTH) if no lane is active.
2565     //
2566     // Input "src" is a vector of boolean represented as
2567     // bytes with 0x00/0x01 as element values.
2568 
2569     Label FIRST_TRUE_INDEX;
2570 
2571     // Try to compute the result from lower 64 bits.
2572     __ fmovd($dst$$Register, as_FloatRegister($src$$reg));
2573     __ movw(rscratch1, zr);
2574     __ cbnz($dst$$Register, FIRST_TRUE_INDEX);
2575 
2576     // Compute the result from the higher 64 bits.
2577     __ fmovhid($dst$$Register, as_FloatRegister($src$$reg));
2578     __ movw(rscratch1, 8);
2579 
2580     // Reverse the bits and count the leading zero bytes.
2581     __ bind(FIRST_TRUE_INDEX);
2582     __ rbit($dst$$Register, $dst$$Register);
2583     __ clz($dst$$Register, $dst$$Register);
2584     __ addw($dst$$Register, rscratch1, $dst$$Register, Assembler::LSR, 3);
2585   %}
2586   ins_pipe(pipe_slow);
2587 %}
2588 
2589 instruct vmask_lasttrue8B(iRegINoSp dst, vecD src) %{
2590   predicate(n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BOOLEAN);
2591   match(Set dst (VectorMaskLastTrue src));
2592   ins_cost(4 * INSN_COST);
2593   format %{ "vmask_lasttrue $dst, $src\t# vector (8B)" %}
2594   ins_encode %{
2595     // Returns the index of the last active lane of the
2596     // vector mask, or -1 if no lane is active.
2597     //
2598     // Input "src" is a vector of boolean represented as
2599     // bytes with 0x00/0x01 as element values.
2600     //
2601     // Computed by counting the leading zero bytes and
2602     // subtracting it by 7 (VLENGTH - 1).
2603     __ fmovd($dst$$Register, as_FloatRegister($src$$reg));
2604     __ clz($dst$$Register, $dst$$Register);
2605     __ movw(rscratch1, 7);
2606     __ subw($dst$$Register, rscratch1, $dst$$Register, Assembler::LSR, 3);
2607   %}
2608   ins_pipe(pipe_slow);
2609 %}
2610 
2611 instruct vmask_lasttrue16B(iRegINoSp dst, vecX src) %{
2612   predicate(n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BOOLEAN);
2613   match(Set dst (VectorMaskLastTrue src));
2614   ins_cost(5 * INSN_COST);
2615   format %{ "vmask_lasttrue $dst, $src\t# vector (16B)" %}
2616   ins_encode %{
2617     // Returns the index of the last active lane of the
2618     // vector mask, or -1 if no lane is active.
2619     //
2620     // Input "src" is a vector of boolean represented as
2621     // bytes with 0x00/0x01 as element values.
2622 
2623     Label LAST_TRUE_INDEX;
2624 
2625     // Try to compute the result from higher 64 bits.
2626     __ fmovhid($dst$$Register, as_FloatRegister($src$$reg));
2627     __ movw(rscratch1, 16 - 1);
2628     __ cbnz($dst$$Register, LAST_TRUE_INDEX);
2629 
2630     // Compute the result from the lower 64 bits.
2631     __ fmovd($dst$$Register, as_FloatRegister($src$$reg));
2632     __ movw(rscratch1, 8 - 1);
2633 
2634     // Count the leading zero bytes and subtract it by 15 (VLENGTH - 1).
2635     __ bind(LAST_TRUE_INDEX);
2636     __ clz($dst$$Register, $dst$$Register);
2637     __ subw($dst$$Register, rscratch1, $dst$$Register, Assembler::LSR, 3);
2638   %}
2639   ins_pipe(pipe_slow);
2640 %}
2641 
2642 instruct vmask_tolong8B(iRegLNoSp dst, vecD src) %{
2643   match(Set dst (VectorMaskToLong src));
2644   ins_cost(5 * INSN_COST);
2645   format %{ "vmask_tolong $dst, $src\t# convert mask to long (8B)" %}
2646   ins_encode %{
2647     // Input "src" is a vector of boolean represented as
2648     // bytes with 0x00/0x01 as element values.
2649 
2650     __ fmovd(as_Register($dst$$reg), as_FloatRegister($src$$reg));
2651     __ bytemask_compress(as_Register($dst$$reg));
2652   %}
2653   ins_pipe(pipe_slow);
2654 %}
2655 
2656 instruct vmask_tolong16B(iRegLNoSp dst, vecX src) %{
2657   match(Set dst (VectorMaskToLong src));
2658   ins_cost(11 * INSN_COST);
2659   format %{ "vmask_tolong $dst, $src\t# convert mask to long (16B)" %}
2660   ins_encode %{
2661     // Input "src" is a vector of boolean represented as
2662     // bytes with 0x00/0x01 as element values.
2663 
2664     __ umov(as_Register($dst$$reg), as_FloatRegister($src$$reg), __ D, 0);
2665     __ umov(rscratch1, as_FloatRegister($src$$reg), __ D, 1);
2666     __ bytemask_compress(as_Register($dst$$reg));
2667     __ bytemask_compress(rscratch1);
2668     __ orr(as_Register($dst$$reg), as_Register($dst$$reg),
2669            rscratch1, Assembler::LSL, 8);
2670   %}
2671   ins_pipe(pipe_slow);
2672 %}
2673 
2674 dnl
2675 dnl CLTZ_D($1     )
2676 dnl CLTZ_D(op_name)
2677 define(`CLTZ_D', `
2678 instruct count$1D(vecD dst, vecD src) %{
2679   predicate(n->as_Vector()->length_in_bytes() == 8);
2680   match(Set dst (Count$1 src));
2681   ins_cost(ifelse($1, `TrailingZerosV', `3 * ', `')INSN_COST);
2682   format %{ "count$1 $dst, $src\t# vector (8B/4H/2S)" %}
2683   ins_encode %{
2684     BasicType bt = Matcher::vector_element_basic_type(this);
2685     Assembler::SIMD_Arrangement size = __ esize2arrangement((unsigned)type2aelembytes(bt), false);dnl
2686 ifelse($1, `TrailingZerosV', `
2687     __ neon_reverse_bits(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), bt, false);', `')
2688     __ clz(as_FloatRegister($dst$$reg), size, as_FloatRegister($ifelse($1, `TrailingZerosV', dst, src)$$reg));
2689   %}
2690   ins_pipe(pipe_slow);
2691 %}')dnl
2692 dnl
2693 dnl CLTZ_X($1     )
2694 dnl CLTZ_X(op_name)
2695 define(`CLTZ_X', `
2696 instruct count$1X(vecX dst, vecX src) %{
2697   predicate(n->as_Vector()->length_in_bytes() == 16);
2698   match(Set dst (Count$1 src));
2699   ins_cost(ifelse($1, `TrailingZerosV', `3 * ', `')INSN_COST);
2700   format %{ "count$1 $dst, $src\t# vector (16B/8H/4S/2D)" %}
2701   ins_encode %{
2702     BasicType bt = Matcher::vector_element_basic_type(this);
2703     Assembler::SIMD_Arrangement size = __ esize2arrangement((unsigned)type2aelembytes(bt), true);dnl
2704 ifelse($1, `TrailingZerosV', `
2705     __ neon_reverse_bits(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), bt, true);', `')
2706     if (bt != T_LONG) {
2707       __ clz(as_FloatRegister($dst$$reg), size, as_FloatRegister($ifelse($1, `TrailingZerosV', dst, src)$$reg));
2708     } else {
2709       __ umov(rscratch1, as_FloatRegister($ifelse($1, `TrailingZerosV', dst, src)$$reg), __ D, 0);
2710       __ clz(rscratch1, rscratch1);
2711       __ mov(as_FloatRegister($dst$$reg), __ D, 0, rscratch1);
2712       __ umov(rscratch1, as_FloatRegister($ifelse($1, `TrailingZerosV', dst, src)$$reg), __ D, 1);
2713       __ clz(rscratch1, rscratch1);
2714       __ mov(as_FloatRegister($dst$$reg), __ D, 1, rscratch1);
2715     }
2716   %}
2717   ins_pipe(pipe_slow);
2718 %}')dnl
2719 dnl
2720 //------------------------- CountLeadingZerosV -----------------------------
2721 CLTZ_D(LeadingZerosV)
2722 CLTZ_X(LeadingZerosV)
2723 
2724 //------------------------- CountTrailingZerosV ----------------------------
2725 CLTZ_D(TrailingZerosV)
2726 CLTZ_X(TrailingZerosV)
2727 
2728 dnl
2729 dnl REVERSE($1,        $2,      $3,   $4  )
2730 dnl REVERSE(insn_name, op_name, type, insn)
2731 define(`REVERSE', `
2732 instruct $1(vec$3 dst, vec$3 src) %{
2733   predicate(n->as_Vector()->length_in_bytes() == ifelse($3, D, 8, 16));
2734   match(Set dst ($2 src));
2735   ins_cost(ifelse($2, `ReverseV', `2 * ', `')INSN_COST);
2736   format %{ "$2 $dst, $src\t# vector ($3)" %}
2737   ins_encode %{
2738     BasicType bt = Matcher::vector_element_basic_type(this);
2739     __ $4(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), bt, ifelse($3, D, false, true));
2740   %}
2741   ins_pipe(pipe_slow);
2742 %}')dnl
2743 dnl
2744 //------------------------------ ReverseV -----------------------------------
2745 REVERSE(vreverseD, ReverseV, D, neon_reverse_bits)
2746 REVERSE(vreverseX, ReverseV, X, neon_reverse_bits)
2747 
2748 //---------------------------- ReverseBytesV --------------------------------
2749 REVERSE(vreverseBytesD, ReverseBytesV, D, neon_reverse_bytes)
2750 REVERSE(vreverseBytesX, ReverseBytesV, X, neon_reverse_bytes)